{ "best_metric": null, "best_model_checkpoint": null, "epoch": 1.9999099301959018, "eval_steps": 500, "global_step": 11102, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.00018013960819635217, "grad_norm": 1.0146561861038208, "learning_rate": 1e-05, "loss": 3.4013, "step": 1 }, { "epoch": 0.00036027921639270434, "grad_norm": 1.078662395477295, "learning_rate": 2e-05, "loss": 3.6395, "step": 2 }, { "epoch": 0.0005404188245890565, "grad_norm": 1.1184042692184448, "learning_rate": 3e-05, "loss": 3.638, "step": 3 }, { "epoch": 0.0007205584327854087, "grad_norm": 1.2305233478546143, "learning_rate": 4e-05, "loss": 3.6723, "step": 4 }, { "epoch": 0.0009006980409817609, "grad_norm": 1.2428853511810303, "learning_rate": 5e-05, "loss": 4.0646, "step": 5 }, { "epoch": 0.001080837649178113, "grad_norm": 1.4851632118225098, "learning_rate": 6e-05, "loss": 4.1793, "step": 6 }, { "epoch": 0.0012609772573744652, "grad_norm": 1.4113008975982666, "learning_rate": 7e-05, "loss": 4.257, "step": 7 }, { "epoch": 0.0014411168655708174, "grad_norm": 1.4709892272949219, "learning_rate": 8e-05, "loss": 4.5698, "step": 8 }, { "epoch": 0.0016212564737671695, "grad_norm": 1.5990041494369507, "learning_rate": 9e-05, "loss": 4.3774, "step": 9 }, { "epoch": 0.0018013960819635217, "grad_norm": 1.0094465017318726, "learning_rate": 0.0001, "loss": 3.1328, "step": 10 }, { "epoch": 0.001981535690159874, "grad_norm": 0.9330719709396362, "learning_rate": 9.999999799451219e-05, "loss": 3.2082, "step": 11 }, { "epoch": 0.002161675298356226, "grad_norm": 0.9791777729988098, "learning_rate": 9.999999197804887e-05, "loss": 2.9366, "step": 12 }, { "epoch": 0.0023418149065525785, "grad_norm": 0.9647117853164673, "learning_rate": 9.999998195061056e-05, "loss": 2.8834, "step": 13 }, { "epoch": 0.0025219545147489304, "grad_norm": 1.0648342370986938, "learning_rate": 9.999996791219805e-05, "loss": 3.1131, "step": 14 }, { "epoch": 0.002702094122945283, "grad_norm": 0.9934209585189819, "learning_rate": 9.999994986281247e-05, "loss": 2.5361, "step": 15 }, { "epoch": 0.0028822337311416348, "grad_norm": 1.257798671722412, "learning_rate": 9.999992780245527e-05, "loss": 2.8522, "step": 16 }, { "epoch": 0.003062373339337987, "grad_norm": 1.149506688117981, "learning_rate": 9.99999017311282e-05, "loss": 2.9118, "step": 17 }, { "epoch": 0.003242512947534339, "grad_norm": 1.1489620208740234, "learning_rate": 9.99998716488334e-05, "loss": 2.8234, "step": 18 }, { "epoch": 0.0034226525557306915, "grad_norm": 1.0399941205978394, "learning_rate": 9.999983755557323e-05, "loss": 2.8068, "step": 19 }, { "epoch": 0.0036027921639270434, "grad_norm": 0.8986930847167969, "learning_rate": 9.999979945135043e-05, "loss": 2.8921, "step": 20 }, { "epoch": 0.003782931772123396, "grad_norm": 0.9371277689933777, "learning_rate": 9.999975733616809e-05, "loss": 2.4691, "step": 21 }, { "epoch": 0.003963071380319748, "grad_norm": 0.9938897490501404, "learning_rate": 9.999971121002958e-05, "loss": 2.8946, "step": 22 }, { "epoch": 0.0041432109885161, "grad_norm": 0.8986033201217651, "learning_rate": 9.999966107293857e-05, "loss": 2.5737, "step": 23 }, { "epoch": 0.004323350596712452, "grad_norm": 0.8785312175750732, "learning_rate": 9.999960692489911e-05, "loss": 2.6496, "step": 24 }, { "epoch": 0.004503490204908804, "grad_norm": 0.863671600818634, "learning_rate": 9.999954876591552e-05, "loss": 2.3118, "step": 25 }, { "epoch": 0.004683629813105157, "grad_norm": 0.9351087212562561, "learning_rate": 9.999948659599252e-05, "loss": 2.7481, "step": 26 }, { "epoch": 0.004863769421301509, "grad_norm": 0.9674515724182129, "learning_rate": 9.999942041513504e-05, "loss": 2.7264, "step": 27 }, { "epoch": 0.005043909029497861, "grad_norm": 1.075107455253601, "learning_rate": 9.99993502233484e-05, "loss": 2.7795, "step": 28 }, { "epoch": 0.005224048637694213, "grad_norm": 1.000078797340393, "learning_rate": 9.999927602063825e-05, "loss": 2.7193, "step": 29 }, { "epoch": 0.005404188245890566, "grad_norm": 0.9467408061027527, "learning_rate": 9.999919780701052e-05, "loss": 2.4211, "step": 30 }, { "epoch": 0.0055843278540869176, "grad_norm": 1.0002549886703491, "learning_rate": 9.999911558247151e-05, "loss": 2.4868, "step": 31 }, { "epoch": 0.0057644674622832695, "grad_norm": 0.9504591226577759, "learning_rate": 9.999902934702778e-05, "loss": 2.6444, "step": 32 }, { "epoch": 0.0059446070704796215, "grad_norm": 0.9449986815452576, "learning_rate": 9.99989391006863e-05, "loss": 2.5808, "step": 33 }, { "epoch": 0.006124746678675974, "grad_norm": 0.8339290618896484, "learning_rate": 9.999884484345426e-05, "loss": 2.4875, "step": 34 }, { "epoch": 0.006304886286872326, "grad_norm": 0.829587459564209, "learning_rate": 9.999874657533923e-05, "loss": 2.5427, "step": 35 }, { "epoch": 0.006485025895068678, "grad_norm": 0.7929829359054565, "learning_rate": 9.999864429634913e-05, "loss": 2.1261, "step": 36 }, { "epoch": 0.00666516550326503, "grad_norm": 0.8820804357528687, "learning_rate": 9.999853800649211e-05, "loss": 3.0714, "step": 37 }, { "epoch": 0.006845305111461383, "grad_norm": 0.8300914764404297, "learning_rate": 9.999842770577675e-05, "loss": 2.5948, "step": 38 }, { "epoch": 0.007025444719657735, "grad_norm": 0.8988324403762817, "learning_rate": 9.999831339421187e-05, "loss": 2.6299, "step": 39 }, { "epoch": 0.007205584327854087, "grad_norm": 0.8685336709022522, "learning_rate": 9.999819507180662e-05, "loss": 2.8515, "step": 40 }, { "epoch": 0.007385723936050439, "grad_norm": 0.7915685176849365, "learning_rate": 9.999807273857054e-05, "loss": 2.3942, "step": 41 }, { "epoch": 0.007565863544246792, "grad_norm": 0.8272666335105896, "learning_rate": 9.99979463945134e-05, "loss": 2.669, "step": 42 }, { "epoch": 0.007746003152443144, "grad_norm": 0.845380425453186, "learning_rate": 9.999781603964535e-05, "loss": 2.6562, "step": 43 }, { "epoch": 0.007926142760639496, "grad_norm": 0.8954911231994629, "learning_rate": 9.999768167397687e-05, "loss": 2.9604, "step": 44 }, { "epoch": 0.008106282368835848, "grad_norm": 0.7897361516952515, "learning_rate": 9.999754329751871e-05, "loss": 2.7354, "step": 45 }, { "epoch": 0.0082864219770322, "grad_norm": 0.8153886795043945, "learning_rate": 9.999740091028198e-05, "loss": 2.5302, "step": 46 }, { "epoch": 0.008466561585228552, "grad_norm": 0.8083345293998718, "learning_rate": 9.99972545122781e-05, "loss": 2.6665, "step": 47 }, { "epoch": 0.008646701193424904, "grad_norm": 0.7844825387001038, "learning_rate": 9.999710410351881e-05, "loss": 2.4497, "step": 48 }, { "epoch": 0.008826840801621256, "grad_norm": 0.7885056734085083, "learning_rate": 9.999694968401618e-05, "loss": 2.6281, "step": 49 }, { "epoch": 0.009006980409817608, "grad_norm": 0.9038015007972717, "learning_rate": 9.999679125378262e-05, "loss": 2.4357, "step": 50 }, { "epoch": 0.00918712001801396, "grad_norm": 1.2260198593139648, "learning_rate": 9.999662881283081e-05, "loss": 2.7279, "step": 51 }, { "epoch": 0.009367259626210314, "grad_norm": 1.0754985809326172, "learning_rate": 9.999646236117377e-05, "loss": 2.9244, "step": 52 }, { "epoch": 0.009547399234406666, "grad_norm": 1.1802886724472046, "learning_rate": 9.999629189882489e-05, "loss": 3.1561, "step": 53 }, { "epoch": 0.009727538842603018, "grad_norm": 1.0842028856277466, "learning_rate": 9.999611742579783e-05, "loss": 2.9517, "step": 54 }, { "epoch": 0.00990767845079937, "grad_norm": 1.1068226099014282, "learning_rate": 9.999593894210657e-05, "loss": 3.2007, "step": 55 }, { "epoch": 0.010087818058995722, "grad_norm": 0.9489306211471558, "learning_rate": 9.999575644776544e-05, "loss": 2.9234, "step": 56 }, { "epoch": 0.010267957667192074, "grad_norm": 1.0008602142333984, "learning_rate": 9.99955699427891e-05, "loss": 2.8981, "step": 57 }, { "epoch": 0.010448097275388426, "grad_norm": 1.174797773361206, "learning_rate": 9.999537942719247e-05, "loss": 3.5682, "step": 58 }, { "epoch": 0.010628236883584778, "grad_norm": 1.3938766717910767, "learning_rate": 9.999518490099086e-05, "loss": 3.8545, "step": 59 }, { "epoch": 0.010808376491781131, "grad_norm": 1.3305302858352661, "learning_rate": 9.999498636419988e-05, "loss": 3.0109, "step": 60 }, { "epoch": 0.010988516099977483, "grad_norm": 0.8620555996894836, "learning_rate": 9.999478381683543e-05, "loss": 2.2525, "step": 61 }, { "epoch": 0.011168655708173835, "grad_norm": 0.7407914400100708, "learning_rate": 9.999457725891377e-05, "loss": 2.3131, "step": 62 }, { "epoch": 0.011348795316370187, "grad_norm": 0.9008898138999939, "learning_rate": 9.999436669045147e-05, "loss": 2.252, "step": 63 }, { "epoch": 0.011528934924566539, "grad_norm": 0.8526875972747803, "learning_rate": 9.999415211146545e-05, "loss": 2.3424, "step": 64 }, { "epoch": 0.011709074532762891, "grad_norm": 0.8313794732093811, "learning_rate": 9.999393352197289e-05, "loss": 2.3411, "step": 65 }, { "epoch": 0.011889214140959243, "grad_norm": 0.8745887875556946, "learning_rate": 9.999371092199132e-05, "loss": 2.354, "step": 66 }, { "epoch": 0.012069353749155595, "grad_norm": 0.7536364197731018, "learning_rate": 9.999348431153863e-05, "loss": 2.358, "step": 67 }, { "epoch": 0.012249493357351949, "grad_norm": 0.7970806360244751, "learning_rate": 9.999325369063297e-05, "loss": 2.1937, "step": 68 }, { "epoch": 0.0124296329655483, "grad_norm": 0.8265585899353027, "learning_rate": 9.999301905929286e-05, "loss": 2.4513, "step": 69 }, { "epoch": 0.012609772573744652, "grad_norm": 0.6983076930046082, "learning_rate": 9.99927804175371e-05, "loss": 2.2879, "step": 70 }, { "epoch": 0.012789912181941004, "grad_norm": 0.7495995759963989, "learning_rate": 9.999253776538484e-05, "loss": 2.5069, "step": 71 }, { "epoch": 0.012970051790137356, "grad_norm": 0.776552677154541, "learning_rate": 9.999229110285557e-05, "loss": 2.5313, "step": 72 }, { "epoch": 0.013150191398333708, "grad_norm": 0.7193177938461304, "learning_rate": 9.999204042996904e-05, "loss": 2.268, "step": 73 }, { "epoch": 0.01333033100653006, "grad_norm": 0.7583246231079102, "learning_rate": 9.999178574674539e-05, "loss": 2.2903, "step": 74 }, { "epoch": 0.013510470614726412, "grad_norm": 0.8359841108322144, "learning_rate": 9.999152705320502e-05, "loss": 2.1628, "step": 75 }, { "epoch": 0.013690610222922766, "grad_norm": 0.8961155414581299, "learning_rate": 9.999126434936871e-05, "loss": 2.4106, "step": 76 }, { "epoch": 0.013870749831119118, "grad_norm": 0.814487099647522, "learning_rate": 9.999099763525755e-05, "loss": 2.5886, "step": 77 }, { "epoch": 0.01405088943931547, "grad_norm": 0.8167582750320435, "learning_rate": 9.999072691089287e-05, "loss": 2.3747, "step": 78 }, { "epoch": 0.014231029047511822, "grad_norm": 0.7982463836669922, "learning_rate": 9.999045217629644e-05, "loss": 2.4233, "step": 79 }, { "epoch": 0.014411168655708174, "grad_norm": 0.7659124732017517, "learning_rate": 9.999017343149029e-05, "loss": 2.2224, "step": 80 }, { "epoch": 0.014591308263904526, "grad_norm": 0.8021962642669678, "learning_rate": 9.998989067649678e-05, "loss": 2.418, "step": 81 }, { "epoch": 0.014771447872100878, "grad_norm": 0.8060923218727112, "learning_rate": 9.998960391133857e-05, "loss": 2.3914, "step": 82 }, { "epoch": 0.01495158748029723, "grad_norm": 0.8500896692276001, "learning_rate": 9.998931313603871e-05, "loss": 2.8956, "step": 83 }, { "epoch": 0.015131727088493583, "grad_norm": 0.7829626798629761, "learning_rate": 9.998901835062048e-05, "loss": 2.3679, "step": 84 }, { "epoch": 0.015311866696689935, "grad_norm": 0.9021100401878357, "learning_rate": 9.998871955510756e-05, "loss": 2.712, "step": 85 }, { "epoch": 0.015492006304886287, "grad_norm": 0.8967096209526062, "learning_rate": 9.99884167495239e-05, "loss": 2.6505, "step": 86 }, { "epoch": 0.01567214591308264, "grad_norm": 0.7971676588058472, "learning_rate": 9.99881099338938e-05, "loss": 2.4672, "step": 87 }, { "epoch": 0.015852285521278993, "grad_norm": 0.9014323353767395, "learning_rate": 9.998779910824185e-05, "loss": 2.4235, "step": 88 }, { "epoch": 0.016032425129475343, "grad_norm": 0.8840922117233276, "learning_rate": 9.998748427259302e-05, "loss": 2.4347, "step": 89 }, { "epoch": 0.016212564737671697, "grad_norm": 0.7735182046890259, "learning_rate": 9.998716542697255e-05, "loss": 2.4093, "step": 90 }, { "epoch": 0.016392704345868047, "grad_norm": 0.8273934125900269, "learning_rate": 9.998684257140603e-05, "loss": 2.7378, "step": 91 }, { "epoch": 0.0165728439540644, "grad_norm": 0.8422622680664062, "learning_rate": 9.998651570591932e-05, "loss": 2.367, "step": 92 }, { "epoch": 0.01675298356226075, "grad_norm": 0.780792236328125, "learning_rate": 9.998618483053868e-05, "loss": 2.2097, "step": 93 }, { "epoch": 0.016933123170457105, "grad_norm": 0.8539138436317444, "learning_rate": 9.998584994529063e-05, "loss": 2.5079, "step": 94 }, { "epoch": 0.017113262778653455, "grad_norm": 0.8521108627319336, "learning_rate": 9.998551105020206e-05, "loss": 2.1245, "step": 95 }, { "epoch": 0.01729340238684981, "grad_norm": 0.9224622845649719, "learning_rate": 9.998516814530015e-05, "loss": 2.2187, "step": 96 }, { "epoch": 0.017473541995046162, "grad_norm": 0.9476681351661682, "learning_rate": 9.998482123061237e-05, "loss": 2.5253, "step": 97 }, { "epoch": 0.017653681603242512, "grad_norm": 1.0359331369400024, "learning_rate": 9.99844703061666e-05, "loss": 2.7389, "step": 98 }, { "epoch": 0.017833821211438866, "grad_norm": 0.9494062662124634, "learning_rate": 9.998411537199096e-05, "loss": 2.0257, "step": 99 }, { "epoch": 0.018013960819635216, "grad_norm": 0.9466662406921387, "learning_rate": 9.998375642811393e-05, "loss": 2.555, "step": 100 }, { "epoch": 0.01819410042783157, "grad_norm": 1.4059162139892578, "learning_rate": 9.99833934745643e-05, "loss": 3.1052, "step": 101 }, { "epoch": 0.01837424003602792, "grad_norm": 1.4918389320373535, "learning_rate": 9.99830265113712e-05, "loss": 3.09, "step": 102 }, { "epoch": 0.018554379644224274, "grad_norm": 1.5020196437835693, "learning_rate": 9.998265553856406e-05, "loss": 3.1729, "step": 103 }, { "epoch": 0.018734519252420628, "grad_norm": 1.1906907558441162, "learning_rate": 9.998228055617263e-05, "loss": 2.8921, "step": 104 }, { "epoch": 0.018914658860616978, "grad_norm": 1.053817868232727, "learning_rate": 9.9981901564227e-05, "loss": 2.9665, "step": 105 }, { "epoch": 0.01909479846881333, "grad_norm": 0.9323416352272034, "learning_rate": 9.99815185627576e-05, "loss": 2.8324, "step": 106 }, { "epoch": 0.019274938077009682, "grad_norm": 1.0798654556274414, "learning_rate": 9.99811315517951e-05, "loss": 3.2274, "step": 107 }, { "epoch": 0.019455077685206035, "grad_norm": 1.1817671060562134, "learning_rate": 9.998074053137056e-05, "loss": 3.2837, "step": 108 }, { "epoch": 0.019635217293402386, "grad_norm": 1.2873262166976929, "learning_rate": 9.998034550151538e-05, "loss": 3.337, "step": 109 }, { "epoch": 0.01981535690159874, "grad_norm": 1.780294418334961, "learning_rate": 9.997994646226122e-05, "loss": 3.371, "step": 110 }, { "epoch": 0.01999549650979509, "grad_norm": 0.9122216105461121, "learning_rate": 9.997954341364011e-05, "loss": 2.2638, "step": 111 }, { "epoch": 0.020175636117991443, "grad_norm": 0.8674176335334778, "learning_rate": 9.997913635568435e-05, "loss": 2.2437, "step": 112 }, { "epoch": 0.020355775726187797, "grad_norm": 0.8664303421974182, "learning_rate": 9.997872528842663e-05, "loss": 2.2444, "step": 113 }, { "epoch": 0.020535915334384147, "grad_norm": 0.890316903591156, "learning_rate": 9.997831021189992e-05, "loss": 2.3717, "step": 114 }, { "epoch": 0.0207160549425805, "grad_norm": 0.776483952999115, "learning_rate": 9.997789112613749e-05, "loss": 2.1567, "step": 115 }, { "epoch": 0.02089619455077685, "grad_norm": 0.9575796127319336, "learning_rate": 9.997746803117298e-05, "loss": 2.2566, "step": 116 }, { "epoch": 0.021076334158973205, "grad_norm": 0.8641085624694824, "learning_rate": 9.997704092704034e-05, "loss": 2.1461, "step": 117 }, { "epoch": 0.021256473767169555, "grad_norm": 0.9085921049118042, "learning_rate": 9.997660981377381e-05, "loss": 2.3909, "step": 118 }, { "epoch": 0.02143661337536591, "grad_norm": 0.8731765747070312, "learning_rate": 9.997617469140799e-05, "loss": 2.0962, "step": 119 }, { "epoch": 0.021616752983562262, "grad_norm": 0.7569247484207153, "learning_rate": 9.997573555997777e-05, "loss": 1.9514, "step": 120 }, { "epoch": 0.021796892591758613, "grad_norm": 0.8122623562812805, "learning_rate": 9.99752924195184e-05, "loss": 2.2195, "step": 121 }, { "epoch": 0.021977032199954966, "grad_norm": 0.7952985763549805, "learning_rate": 9.99748452700654e-05, "loss": 2.2251, "step": 122 }, { "epoch": 0.022157171808151317, "grad_norm": 0.889175295829773, "learning_rate": 9.997439411165466e-05, "loss": 2.0404, "step": 123 }, { "epoch": 0.02233731141634767, "grad_norm": 0.9200299978256226, "learning_rate": 9.997393894432237e-05, "loss": 2.3201, "step": 124 }, { "epoch": 0.02251745102454402, "grad_norm": 0.8736909627914429, "learning_rate": 9.997347976810503e-05, "loss": 1.9624, "step": 125 }, { "epoch": 0.022697590632740374, "grad_norm": 0.8899530172348022, "learning_rate": 9.997301658303948e-05, "loss": 2.2782, "step": 126 }, { "epoch": 0.022877730240936724, "grad_norm": 0.9963721036911011, "learning_rate": 9.997254938916289e-05, "loss": 2.2359, "step": 127 }, { "epoch": 0.023057869849133078, "grad_norm": 0.8973841071128845, "learning_rate": 9.997207818651274e-05, "loss": 2.2099, "step": 128 }, { "epoch": 0.02323800945732943, "grad_norm": 0.9693061113357544, "learning_rate": 9.99716029751268e-05, "loss": 2.0216, "step": 129 }, { "epoch": 0.023418149065525782, "grad_norm": 0.8820716142654419, "learning_rate": 9.997112375504322e-05, "loss": 2.2054, "step": 130 }, { "epoch": 0.023598288673722136, "grad_norm": 1.0860403776168823, "learning_rate": 9.997064052630044e-05, "loss": 2.4929, "step": 131 }, { "epoch": 0.023778428281918486, "grad_norm": 0.9099593758583069, "learning_rate": 9.99701532889372e-05, "loss": 2.0855, "step": 132 }, { "epoch": 0.02395856789011484, "grad_norm": 1.0118968486785889, "learning_rate": 9.99696620429926e-05, "loss": 2.5576, "step": 133 }, { "epoch": 0.02413870749831119, "grad_norm": 0.9647431373596191, "learning_rate": 9.996916678850605e-05, "loss": 2.2156, "step": 134 }, { "epoch": 0.024318847106507543, "grad_norm": 0.9351106882095337, "learning_rate": 9.99686675255173e-05, "loss": 2.5907, "step": 135 }, { "epoch": 0.024498986714703897, "grad_norm": 0.9223719835281372, "learning_rate": 9.996816425406636e-05, "loss": 2.4192, "step": 136 }, { "epoch": 0.024679126322900247, "grad_norm": 0.9520527720451355, "learning_rate": 9.996765697419363e-05, "loss": 2.3176, "step": 137 }, { "epoch": 0.0248592659310966, "grad_norm": 0.8900474309921265, "learning_rate": 9.996714568593981e-05, "loss": 2.3632, "step": 138 }, { "epoch": 0.02503940553929295, "grad_norm": 1.0412280559539795, "learning_rate": 9.996663038934588e-05, "loss": 2.6637, "step": 139 }, { "epoch": 0.025219545147489305, "grad_norm": 0.9756955504417419, "learning_rate": 9.99661110844532e-05, "loss": 2.4556, "step": 140 }, { "epoch": 0.025399684755685655, "grad_norm": 0.9872828125953674, "learning_rate": 9.996558777130344e-05, "loss": 2.5443, "step": 141 }, { "epoch": 0.02557982436388201, "grad_norm": 0.8600561022758484, "learning_rate": 9.996506044993855e-05, "loss": 2.1805, "step": 142 }, { "epoch": 0.02575996397207836, "grad_norm": 0.8384175896644592, "learning_rate": 9.996452912040086e-05, "loss": 2.0727, "step": 143 }, { "epoch": 0.025940103580274713, "grad_norm": 1.015053629875183, "learning_rate": 9.996399378273298e-05, "loss": 2.5225, "step": 144 }, { "epoch": 0.026120243188471066, "grad_norm": 1.0503270626068115, "learning_rate": 9.996345443697785e-05, "loss": 2.3433, "step": 145 }, { "epoch": 0.026300382796667417, "grad_norm": 0.904880940914154, "learning_rate": 9.996291108317874e-05, "loss": 2.1061, "step": 146 }, { "epoch": 0.02648052240486377, "grad_norm": 0.9697627425193787, "learning_rate": 9.996236372137924e-05, "loss": 2.4878, "step": 147 }, { "epoch": 0.02666066201306012, "grad_norm": 0.8747907280921936, "learning_rate": 9.996181235162327e-05, "loss": 1.8756, "step": 148 }, { "epoch": 0.026840801621256474, "grad_norm": 0.9519774913787842, "learning_rate": 9.996125697395503e-05, "loss": 2.2033, "step": 149 }, { "epoch": 0.027020941229452825, "grad_norm": 0.9820895195007324, "learning_rate": 9.99606975884191e-05, "loss": 2.2792, "step": 150 }, { "epoch": 0.027201080837649178, "grad_norm": 1.1771305799484253, "learning_rate": 9.996013419506034e-05, "loss": 2.8351, "step": 151 }, { "epoch": 0.027381220445845532, "grad_norm": 1.1806966066360474, "learning_rate": 9.995956679392395e-05, "loss": 2.8213, "step": 152 }, { "epoch": 0.027561360054041882, "grad_norm": 1.113235354423523, "learning_rate": 9.995899538505546e-05, "loss": 2.5992, "step": 153 }, { "epoch": 0.027741499662238236, "grad_norm": 1.0962517261505127, "learning_rate": 9.995841996850068e-05, "loss": 3.0989, "step": 154 }, { "epoch": 0.027921639270434586, "grad_norm": 1.1463273763656616, "learning_rate": 9.99578405443058e-05, "loss": 2.9798, "step": 155 }, { "epoch": 0.02810177887863094, "grad_norm": 1.1385432481765747, "learning_rate": 9.995725711251726e-05, "loss": 2.9343, "step": 156 }, { "epoch": 0.02828191848682729, "grad_norm": 1.2719049453735352, "learning_rate": 9.995666967318191e-05, "loss": 2.9438, "step": 157 }, { "epoch": 0.028462058095023644, "grad_norm": 1.303350806236267, "learning_rate": 9.995607822634684e-05, "loss": 3.0166, "step": 158 }, { "epoch": 0.028642197703219994, "grad_norm": 1.7089077234268188, "learning_rate": 9.99554827720595e-05, "loss": 3.5575, "step": 159 }, { "epoch": 0.028822337311416348, "grad_norm": 0.9399608969688416, "learning_rate": 9.995488331036769e-05, "loss": 2.5122, "step": 160 }, { "epoch": 0.0290024769196127, "grad_norm": 0.9248188734054565, "learning_rate": 9.995427984131945e-05, "loss": 2.0853, "step": 161 }, { "epoch": 0.02918261652780905, "grad_norm": 1.037304401397705, "learning_rate": 9.995367236496322e-05, "loss": 2.085, "step": 162 }, { "epoch": 0.029362756136005405, "grad_norm": 1.0212739706039429, "learning_rate": 9.995306088134773e-05, "loss": 2.2672, "step": 163 }, { "epoch": 0.029542895744201755, "grad_norm": 0.9299241900444031, "learning_rate": 9.995244539052202e-05, "loss": 2.2869, "step": 164 }, { "epoch": 0.02972303535239811, "grad_norm": 1.0138578414916992, "learning_rate": 9.995182589253548e-05, "loss": 2.0205, "step": 165 }, { "epoch": 0.02990317496059446, "grad_norm": 0.8964521884918213, "learning_rate": 9.995120238743779e-05, "loss": 1.9681, "step": 166 }, { "epoch": 0.030083314568790813, "grad_norm": 0.9747411012649536, "learning_rate": 9.995057487527898e-05, "loss": 2.3657, "step": 167 }, { "epoch": 0.030263454176987167, "grad_norm": 0.9115554690361023, "learning_rate": 9.994994335610939e-05, "loss": 2.0457, "step": 168 }, { "epoch": 0.030443593785183517, "grad_norm": 0.920690655708313, "learning_rate": 9.994930782997966e-05, "loss": 2.2838, "step": 169 }, { "epoch": 0.03062373339337987, "grad_norm": 0.935954213142395, "learning_rate": 9.994866829694079e-05, "loss": 2.2176, "step": 170 }, { "epoch": 0.03080387300157622, "grad_norm": 0.8992125391960144, "learning_rate": 9.994802475704408e-05, "loss": 2.0229, "step": 171 }, { "epoch": 0.030984012609772574, "grad_norm": 0.9858156442642212, "learning_rate": 9.994737721034114e-05, "loss": 2.0928, "step": 172 }, { "epoch": 0.031164152217968925, "grad_norm": 0.8882651925086975, "learning_rate": 9.994672565688394e-05, "loss": 2.1093, "step": 173 }, { "epoch": 0.03134429182616528, "grad_norm": 1.0130804777145386, "learning_rate": 9.994607009672474e-05, "loss": 2.412, "step": 174 }, { "epoch": 0.03152443143436163, "grad_norm": 1.0048799514770508, "learning_rate": 9.994541052991611e-05, "loss": 2.0407, "step": 175 }, { "epoch": 0.031704571042557986, "grad_norm": 0.9575945138931274, "learning_rate": 9.9944746956511e-05, "loss": 2.1829, "step": 176 }, { "epoch": 0.031884710650754336, "grad_norm": 0.9418357610702515, "learning_rate": 9.994407937656259e-05, "loss": 2.2905, "step": 177 }, { "epoch": 0.032064850258950686, "grad_norm": 0.9341018795967102, "learning_rate": 9.994340779012448e-05, "loss": 2.1959, "step": 178 }, { "epoch": 0.032244989867147036, "grad_norm": 0.9014979004859924, "learning_rate": 9.994273219725052e-05, "loss": 1.8595, "step": 179 }, { "epoch": 0.032425129475343394, "grad_norm": 0.9666259288787842, "learning_rate": 9.99420525979949e-05, "loss": 2.1748, "step": 180 }, { "epoch": 0.032605269083539744, "grad_norm": 0.9988476037979126, "learning_rate": 9.994136899241214e-05, "loss": 2.3861, "step": 181 }, { "epoch": 0.032785408691736094, "grad_norm": 0.9823298454284668, "learning_rate": 9.99406813805571e-05, "loss": 2.338, "step": 182 }, { "epoch": 0.032965548299932444, "grad_norm": 1.0125535726547241, "learning_rate": 9.993998976248491e-05, "loss": 2.151, "step": 183 }, { "epoch": 0.0331456879081288, "grad_norm": 0.9595600366592407, "learning_rate": 9.993929413825109e-05, "loss": 2.0875, "step": 184 }, { "epoch": 0.03332582751632515, "grad_norm": 0.9910955429077148, "learning_rate": 9.99385945079114e-05, "loss": 2.2156, "step": 185 }, { "epoch": 0.0335059671245215, "grad_norm": 0.9795382618904114, "learning_rate": 9.9937890871522e-05, "loss": 2.3917, "step": 186 }, { "epoch": 0.03368610673271786, "grad_norm": 1.0755410194396973, "learning_rate": 9.99371832291393e-05, "loss": 2.3763, "step": 187 }, { "epoch": 0.03386624634091421, "grad_norm": 0.968889594078064, "learning_rate": 9.993647158082009e-05, "loss": 2.2037, "step": 188 }, { "epoch": 0.03404638594911056, "grad_norm": 1.0878701210021973, "learning_rate": 9.993575592662145e-05, "loss": 2.425, "step": 189 }, { "epoch": 0.03422652555730691, "grad_norm": 1.120072364807129, "learning_rate": 9.993503626660079e-05, "loss": 2.3524, "step": 190 }, { "epoch": 0.03440666516550327, "grad_norm": 1.0771347284317017, "learning_rate": 9.993431260081585e-05, "loss": 2.4268, "step": 191 }, { "epoch": 0.03458680477369962, "grad_norm": 0.9888401627540588, "learning_rate": 9.993358492932467e-05, "loss": 1.8791, "step": 192 }, { "epoch": 0.03476694438189597, "grad_norm": 1.0292469263076782, "learning_rate": 9.993285325218564e-05, "loss": 2.4085, "step": 193 }, { "epoch": 0.034947083990092324, "grad_norm": 0.9969762563705444, "learning_rate": 9.993211756945744e-05, "loss": 2.3485, "step": 194 }, { "epoch": 0.035127223598288675, "grad_norm": 1.1246824264526367, "learning_rate": 9.993137788119909e-05, "loss": 2.4197, "step": 195 }, { "epoch": 0.035307363206485025, "grad_norm": 1.1985913515090942, "learning_rate": 9.993063418746991e-05, "loss": 2.3737, "step": 196 }, { "epoch": 0.035487502814681375, "grad_norm": 1.054831862449646, "learning_rate": 9.99298864883296e-05, "loss": 2.4413, "step": 197 }, { "epoch": 0.03566764242287773, "grad_norm": 1.0391515493392944, "learning_rate": 9.99291347838381e-05, "loss": 2.2235, "step": 198 }, { "epoch": 0.03584778203107408, "grad_norm": 1.234522819519043, "learning_rate": 9.992837907405574e-05, "loss": 2.5434, "step": 199 }, { "epoch": 0.03602792163927043, "grad_norm": 1.055237054824829, "learning_rate": 9.992761935904312e-05, "loss": 2.2512, "step": 200 }, { "epoch": 0.03620806124746679, "grad_norm": 1.484744906425476, "learning_rate": 9.99268556388612e-05, "loss": 3.1837, "step": 201 }, { "epoch": 0.03638820085566314, "grad_norm": 1.3243892192840576, "learning_rate": 9.992608791357123e-05, "loss": 2.682, "step": 202 }, { "epoch": 0.03656834046385949, "grad_norm": 1.2395107746124268, "learning_rate": 9.992531618323482e-05, "loss": 2.5428, "step": 203 }, { "epoch": 0.03674848007205584, "grad_norm": 1.220672607421875, "learning_rate": 9.992454044791384e-05, "loss": 2.7916, "step": 204 }, { "epoch": 0.0369286196802522, "grad_norm": 1.2384480237960815, "learning_rate": 9.992376070767056e-05, "loss": 2.9702, "step": 205 }, { "epoch": 0.03710875928844855, "grad_norm": 1.176816463470459, "learning_rate": 9.992297696256752e-05, "loss": 2.7499, "step": 206 }, { "epoch": 0.0372888988966449, "grad_norm": 1.1946957111358643, "learning_rate": 9.992218921266756e-05, "loss": 2.638, "step": 207 }, { "epoch": 0.037469038504841255, "grad_norm": 1.4310799837112427, "learning_rate": 9.992139745803393e-05, "loss": 2.5702, "step": 208 }, { "epoch": 0.037649178113037605, "grad_norm": 1.6133946180343628, "learning_rate": 9.992060169873008e-05, "loss": 3.4747, "step": 209 }, { "epoch": 0.037829317721233956, "grad_norm": 1.2044761180877686, "learning_rate": 9.991980193481989e-05, "loss": 2.7726, "step": 210 }, { "epoch": 0.038009457329430306, "grad_norm": 1.0784586668014526, "learning_rate": 9.991899816636751e-05, "loss": 2.0399, "step": 211 }, { "epoch": 0.03818959693762666, "grad_norm": 1.1064428091049194, "learning_rate": 9.991819039343741e-05, "loss": 2.3068, "step": 212 }, { "epoch": 0.03836973654582301, "grad_norm": 1.1013522148132324, "learning_rate": 9.991737861609438e-05, "loss": 2.0892, "step": 213 }, { "epoch": 0.038549876154019364, "grad_norm": 1.0337615013122559, "learning_rate": 9.991656283440356e-05, "loss": 2.2709, "step": 214 }, { "epoch": 0.038730015762215714, "grad_norm": 1.0415656566619873, "learning_rate": 9.991574304843038e-05, "loss": 2.0989, "step": 215 }, { "epoch": 0.03891015537041207, "grad_norm": 0.9581682085990906, "learning_rate": 9.991491925824062e-05, "loss": 2.2555, "step": 216 }, { "epoch": 0.03909029497860842, "grad_norm": 1.040091872215271, "learning_rate": 9.991409146390034e-05, "loss": 1.9537, "step": 217 }, { "epoch": 0.03927043458680477, "grad_norm": 0.9496813416481018, "learning_rate": 9.991325966547596e-05, "loss": 2.1856, "step": 218 }, { "epoch": 0.03945057419500113, "grad_norm": 0.9699931740760803, "learning_rate": 9.991242386303419e-05, "loss": 2.1698, "step": 219 }, { "epoch": 0.03963071380319748, "grad_norm": 1.062461018562317, "learning_rate": 9.991158405664209e-05, "loss": 2.158, "step": 220 }, { "epoch": 0.03981085341139383, "grad_norm": 0.9707486629486084, "learning_rate": 9.991074024636704e-05, "loss": 2.0502, "step": 221 }, { "epoch": 0.03999099301959018, "grad_norm": 1.0586658716201782, "learning_rate": 9.99098924322767e-05, "loss": 2.2018, "step": 222 }, { "epoch": 0.040171132627786536, "grad_norm": 1.1378344297409058, "learning_rate": 9.990904061443911e-05, "loss": 2.4697, "step": 223 }, { "epoch": 0.04035127223598289, "grad_norm": 1.0444320440292358, "learning_rate": 9.990818479292259e-05, "loss": 1.8717, "step": 224 }, { "epoch": 0.04053141184417924, "grad_norm": 1.0241833925247192, "learning_rate": 9.99073249677958e-05, "loss": 2.3334, "step": 225 }, { "epoch": 0.040711551452375594, "grad_norm": 1.0246652364730835, "learning_rate": 9.99064611391277e-05, "loss": 2.0989, "step": 226 }, { "epoch": 0.040891691060571944, "grad_norm": 1.0386786460876465, "learning_rate": 9.99055933069876e-05, "loss": 2.1809, "step": 227 }, { "epoch": 0.041071830668768294, "grad_norm": 1.0378458499908447, "learning_rate": 9.990472147144511e-05, "loss": 2.292, "step": 228 }, { "epoch": 0.041251970276964645, "grad_norm": 1.0621331930160522, "learning_rate": 9.990384563257018e-05, "loss": 2.1973, "step": 229 }, { "epoch": 0.041432109885161, "grad_norm": 1.0714662075042725, "learning_rate": 9.990296579043305e-05, "loss": 2.1582, "step": 230 }, { "epoch": 0.04161224949335735, "grad_norm": 1.0897587537765503, "learning_rate": 9.990208194510433e-05, "loss": 2.2179, "step": 231 }, { "epoch": 0.0417923891015537, "grad_norm": 1.0470248460769653, "learning_rate": 9.990119409665489e-05, "loss": 2.3186, "step": 232 }, { "epoch": 0.04197252870975006, "grad_norm": 1.0715101957321167, "learning_rate": 9.990030224515596e-05, "loss": 2.2274, "step": 233 }, { "epoch": 0.04215266831794641, "grad_norm": 1.1052443981170654, "learning_rate": 9.989940639067911e-05, "loss": 2.3154, "step": 234 }, { "epoch": 0.04233280792614276, "grad_norm": 1.107999563217163, "learning_rate": 9.989850653329617e-05, "loss": 2.3555, "step": 235 }, { "epoch": 0.04251294753433911, "grad_norm": 1.1312811374664307, "learning_rate": 9.989760267307934e-05, "loss": 2.5028, "step": 236 }, { "epoch": 0.04269308714253547, "grad_norm": 1.0960156917572021, "learning_rate": 9.989669481010113e-05, "loss": 2.4164, "step": 237 }, { "epoch": 0.04287322675073182, "grad_norm": 1.0567772388458252, "learning_rate": 9.989578294443436e-05, "loss": 2.0846, "step": 238 }, { "epoch": 0.04305336635892817, "grad_norm": 1.0982437133789062, "learning_rate": 9.989486707615219e-05, "loss": 2.322, "step": 239 }, { "epoch": 0.043233505967124525, "grad_norm": 1.1053307056427002, "learning_rate": 9.98939472053281e-05, "loss": 2.4231, "step": 240 }, { "epoch": 0.043413645575320875, "grad_norm": 1.0721138715744019, "learning_rate": 9.989302333203585e-05, "loss": 2.3685, "step": 241 }, { "epoch": 0.043593785183517225, "grad_norm": 1.0529550313949585, "learning_rate": 9.989209545634957e-05, "loss": 1.8775, "step": 242 }, { "epoch": 0.043773924791713575, "grad_norm": 1.1814721822738647, "learning_rate": 9.98911635783437e-05, "loss": 2.475, "step": 243 }, { "epoch": 0.04395406439990993, "grad_norm": 1.2148950099945068, "learning_rate": 9.989022769809299e-05, "loss": 2.5961, "step": 244 }, { "epoch": 0.04413420400810628, "grad_norm": 1.1676557064056396, "learning_rate": 9.98892878156725e-05, "loss": 2.0822, "step": 245 }, { "epoch": 0.04431434361630263, "grad_norm": 1.2208837270736694, "learning_rate": 9.988834393115767e-05, "loss": 2.3525, "step": 246 }, { "epoch": 0.04449448322449899, "grad_norm": 1.1107949018478394, "learning_rate": 9.988739604462415e-05, "loss": 1.9607, "step": 247 }, { "epoch": 0.04467462283269534, "grad_norm": 1.096728801727295, "learning_rate": 9.988644415614806e-05, "loss": 2.2089, "step": 248 }, { "epoch": 0.04485476244089169, "grad_norm": 1.2097132205963135, "learning_rate": 9.98854882658057e-05, "loss": 2.1281, "step": 249 }, { "epoch": 0.04503490204908804, "grad_norm": 1.3903559446334839, "learning_rate": 9.988452837367377e-05, "loss": 2.2447, "step": 250 }, { "epoch": 0.0452150416572844, "grad_norm": 1.250702142715454, "learning_rate": 9.988356447982927e-05, "loss": 2.3893, "step": 251 }, { "epoch": 0.04539518126548075, "grad_norm": 1.2847386598587036, "learning_rate": 9.988259658434952e-05, "loss": 2.7229, "step": 252 }, { "epoch": 0.0455753208736771, "grad_norm": 1.2485649585723877, "learning_rate": 9.988162468731219e-05, "loss": 2.5685, "step": 253 }, { "epoch": 0.04575546048187345, "grad_norm": 1.2486999034881592, "learning_rate": 9.98806487887952e-05, "loss": 2.6679, "step": 254 }, { "epoch": 0.045935600090069806, "grad_norm": 1.3331091403961182, "learning_rate": 9.987966888887687e-05, "loss": 2.7473, "step": 255 }, { "epoch": 0.046115739698266156, "grad_norm": 1.2322219610214233, "learning_rate": 9.987868498763581e-05, "loss": 2.6721, "step": 256 }, { "epoch": 0.046295879306462506, "grad_norm": 1.2584573030471802, "learning_rate": 9.987769708515091e-05, "loss": 2.5292, "step": 257 }, { "epoch": 0.04647601891465886, "grad_norm": 1.2774226665496826, "learning_rate": 9.987670518150147e-05, "loss": 2.6983, "step": 258 }, { "epoch": 0.046656158522855214, "grad_norm": 1.4696308374404907, "learning_rate": 9.987570927676701e-05, "loss": 2.7173, "step": 259 }, { "epoch": 0.046836298131051564, "grad_norm": 1.8001126050949097, "learning_rate": 9.987470937102747e-05, "loss": 3.094, "step": 260 }, { "epoch": 0.047016437739247914, "grad_norm": 1.1973381042480469, "learning_rate": 9.9873705464363e-05, "loss": 2.3736, "step": 261 }, { "epoch": 0.04719657734744427, "grad_norm": 1.4078965187072754, "learning_rate": 9.98726975568542e-05, "loss": 2.316, "step": 262 }, { "epoch": 0.04737671695564062, "grad_norm": 1.417547583580017, "learning_rate": 9.98716856485819e-05, "loss": 2.7383, "step": 263 }, { "epoch": 0.04755685656383697, "grad_norm": 1.435121774673462, "learning_rate": 9.987066973962726e-05, "loss": 2.0731, "step": 264 }, { "epoch": 0.04773699617203333, "grad_norm": 1.2506507635116577, "learning_rate": 9.98696498300718e-05, "loss": 2.1467, "step": 265 }, { "epoch": 0.04791713578022968, "grad_norm": 1.2805836200714111, "learning_rate": 9.986862591999728e-05, "loss": 2.2571, "step": 266 }, { "epoch": 0.04809727538842603, "grad_norm": 1.1107738018035889, "learning_rate": 9.986759800948591e-05, "loss": 1.9794, "step": 267 }, { "epoch": 0.04827741499662238, "grad_norm": 1.0665452480316162, "learning_rate": 9.986656609862011e-05, "loss": 1.9837, "step": 268 }, { "epoch": 0.04845755460481874, "grad_norm": 0.9659749269485474, "learning_rate": 9.986553018748266e-05, "loss": 2.0036, "step": 269 }, { "epoch": 0.04863769421301509, "grad_norm": 0.935899555683136, "learning_rate": 9.986449027615667e-05, "loss": 1.9917, "step": 270 }, { "epoch": 0.04881783382121144, "grad_norm": 1.054198145866394, "learning_rate": 9.986344636472558e-05, "loss": 1.9455, "step": 271 }, { "epoch": 0.048997973429407794, "grad_norm": 1.116992473602295, "learning_rate": 9.986239845327308e-05, "loss": 2.13, "step": 272 }, { "epoch": 0.049178113037604144, "grad_norm": 1.214431643486023, "learning_rate": 9.986134654188328e-05, "loss": 2.0619, "step": 273 }, { "epoch": 0.049358252645800495, "grad_norm": 1.1717039346694946, "learning_rate": 9.986029063064053e-05, "loss": 1.9885, "step": 274 }, { "epoch": 0.049538392253996845, "grad_norm": 1.2670177221298218, "learning_rate": 9.985923071962957e-05, "loss": 2.1736, "step": 275 }, { "epoch": 0.0497185318621932, "grad_norm": 1.127020239830017, "learning_rate": 9.98581668089354e-05, "loss": 2.1314, "step": 276 }, { "epoch": 0.04989867147038955, "grad_norm": 1.0725072622299194, "learning_rate": 9.985709889864337e-05, "loss": 2.0381, "step": 277 }, { "epoch": 0.0500788110785859, "grad_norm": 1.148308277130127, "learning_rate": 9.985602698883916e-05, "loss": 2.1644, "step": 278 }, { "epoch": 0.05025895068678226, "grad_norm": 1.0978087186813354, "learning_rate": 9.985495107960876e-05, "loss": 2.0789, "step": 279 }, { "epoch": 0.05043909029497861, "grad_norm": 1.1935482025146484, "learning_rate": 9.985387117103843e-05, "loss": 2.2864, "step": 280 }, { "epoch": 0.05061922990317496, "grad_norm": 1.1536046266555786, "learning_rate": 9.985278726321487e-05, "loss": 2.2444, "step": 281 }, { "epoch": 0.05079936951137131, "grad_norm": 1.1859569549560547, "learning_rate": 9.985169935622498e-05, "loss": 2.1967, "step": 282 }, { "epoch": 0.05097950911956767, "grad_norm": 1.2108591794967651, "learning_rate": 9.985060745015607e-05, "loss": 2.309, "step": 283 }, { "epoch": 0.05115964872776402, "grad_norm": 1.1978607177734375, "learning_rate": 9.98495115450957e-05, "loss": 2.4844, "step": 284 }, { "epoch": 0.05133978833596037, "grad_norm": 1.105621337890625, "learning_rate": 9.98484116411318e-05, "loss": 2.3288, "step": 285 }, { "epoch": 0.05151992794415672, "grad_norm": 1.1028847694396973, "learning_rate": 9.984730773835258e-05, "loss": 2.1256, "step": 286 }, { "epoch": 0.051700067552353075, "grad_norm": 1.182299017906189, "learning_rate": 9.984619983684662e-05, "loss": 2.4203, "step": 287 }, { "epoch": 0.051880207160549426, "grad_norm": 1.120054006576538, "learning_rate": 9.984508793670279e-05, "loss": 2.3129, "step": 288 }, { "epoch": 0.052060346768745776, "grad_norm": 1.0880308151245117, "learning_rate": 9.98439720380103e-05, "loss": 2.2993, "step": 289 }, { "epoch": 0.05224048637694213, "grad_norm": 1.1723957061767578, "learning_rate": 9.984285214085865e-05, "loss": 2.4264, "step": 290 }, { "epoch": 0.05242062598513848, "grad_norm": 1.0767836570739746, "learning_rate": 9.984172824533766e-05, "loss": 1.9417, "step": 291 }, { "epoch": 0.05260076559333483, "grad_norm": 1.090864658355713, "learning_rate": 9.98406003515375e-05, "loss": 2.2895, "step": 292 }, { "epoch": 0.052780905201531184, "grad_norm": 1.0966577529907227, "learning_rate": 9.983946845954868e-05, "loss": 2.2194, "step": 293 }, { "epoch": 0.05296104480972754, "grad_norm": 1.108288049697876, "learning_rate": 9.983833256946198e-05, "loss": 2.0902, "step": 294 }, { "epoch": 0.05314118441792389, "grad_norm": 1.1045280694961548, "learning_rate": 9.98371926813685e-05, "loss": 2.382, "step": 295 }, { "epoch": 0.05332132402612024, "grad_norm": 1.0808862447738647, "learning_rate": 9.98360487953597e-05, "loss": 2.1628, "step": 296 }, { "epoch": 0.0535014636343166, "grad_norm": 1.1434890031814575, "learning_rate": 9.983490091152734e-05, "loss": 2.1914, "step": 297 }, { "epoch": 0.05368160324251295, "grad_norm": 1.0393102169036865, "learning_rate": 9.98337490299635e-05, "loss": 2.069, "step": 298 }, { "epoch": 0.0538617428507093, "grad_norm": 1.1679807901382446, "learning_rate": 9.98325931507606e-05, "loss": 2.1102, "step": 299 }, { "epoch": 0.05404188245890565, "grad_norm": 1.3268619775772095, "learning_rate": 9.983143327401134e-05, "loss": 2.2969, "step": 300 }, { "epoch": 0.054222022067102006, "grad_norm": 1.3637564182281494, "learning_rate": 9.983026939980877e-05, "loss": 2.6549, "step": 301 }, { "epoch": 0.054402161675298356, "grad_norm": 1.292595386505127, "learning_rate": 9.982910152824627e-05, "loss": 2.7285, "step": 302 }, { "epoch": 0.05458230128349471, "grad_norm": 1.1744555234909058, "learning_rate": 9.982792965941751e-05, "loss": 2.5136, "step": 303 }, { "epoch": 0.054762440891691064, "grad_norm": 1.2590298652648926, "learning_rate": 9.982675379341651e-05, "loss": 2.6739, "step": 304 }, { "epoch": 0.054942580499887414, "grad_norm": 1.2690422534942627, "learning_rate": 9.982557393033758e-05, "loss": 2.4736, "step": 305 }, { "epoch": 0.055122720108083764, "grad_norm": 1.2975382804870605, "learning_rate": 9.982439007027538e-05, "loss": 2.6993, "step": 306 }, { "epoch": 0.055302859716280114, "grad_norm": 1.5365309715270996, "learning_rate": 9.982320221332488e-05, "loss": 3.1287, "step": 307 }, { "epoch": 0.05548299932447647, "grad_norm": 1.339965581893921, "learning_rate": 9.982201035958138e-05, "loss": 2.6284, "step": 308 }, { "epoch": 0.05566313893267282, "grad_norm": 1.4245731830596924, "learning_rate": 9.982081450914046e-05, "loss": 2.4764, "step": 309 }, { "epoch": 0.05584327854086917, "grad_norm": 1.81796133518219, "learning_rate": 9.981961466209808e-05, "loss": 3.0644, "step": 310 }, { "epoch": 0.05602341814906553, "grad_norm": 1.4154448509216309, "learning_rate": 9.981841081855046e-05, "loss": 2.5412, "step": 311 }, { "epoch": 0.05620355775726188, "grad_norm": 1.485663890838623, "learning_rate": 9.98172029785942e-05, "loss": 2.1247, "step": 312 }, { "epoch": 0.05638369736545823, "grad_norm": 1.418679118156433, "learning_rate": 9.981599114232618e-05, "loss": 2.0103, "step": 313 }, { "epoch": 0.05656383697365458, "grad_norm": 1.4113951921463013, "learning_rate": 9.981477530984362e-05, "loss": 2.2682, "step": 314 }, { "epoch": 0.05674397658185094, "grad_norm": 1.078120231628418, "learning_rate": 9.981355548124405e-05, "loss": 1.8057, "step": 315 }, { "epoch": 0.05692411619004729, "grad_norm": 1.241740345954895, "learning_rate": 9.981233165662531e-05, "loss": 2.1231, "step": 316 }, { "epoch": 0.05710425579824364, "grad_norm": 1.234967827796936, "learning_rate": 9.98111038360856e-05, "loss": 1.99, "step": 317 }, { "epoch": 0.05728439540643999, "grad_norm": 1.0713143348693848, "learning_rate": 9.980987201972338e-05, "loss": 1.9586, "step": 318 }, { "epoch": 0.057464535014636345, "grad_norm": 1.058562159538269, "learning_rate": 9.980863620763752e-05, "loss": 1.966, "step": 319 }, { "epoch": 0.057644674622832695, "grad_norm": 1.1675026416778564, "learning_rate": 9.980739639992711e-05, "loss": 2.3292, "step": 320 }, { "epoch": 0.057824814231029045, "grad_norm": 1.0847362279891968, "learning_rate": 9.980615259669162e-05, "loss": 2.0149, "step": 321 }, { "epoch": 0.0580049538392254, "grad_norm": 1.1505693197250366, "learning_rate": 9.980490479803082e-05, "loss": 2.0683, "step": 322 }, { "epoch": 0.05818509344742175, "grad_norm": 1.2553009986877441, "learning_rate": 9.980365300404483e-05, "loss": 1.9848, "step": 323 }, { "epoch": 0.0583652330556181, "grad_norm": 1.2744611501693726, "learning_rate": 9.980239721483404e-05, "loss": 2.1334, "step": 324 }, { "epoch": 0.05854537266381445, "grad_norm": 1.248488187789917, "learning_rate": 9.980113743049922e-05, "loss": 2.1967, "step": 325 }, { "epoch": 0.05872551227201081, "grad_norm": 1.2903262376785278, "learning_rate": 9.97998736511414e-05, "loss": 2.1005, "step": 326 }, { "epoch": 0.05890565188020716, "grad_norm": 1.1605138778686523, "learning_rate": 9.979860587686198e-05, "loss": 2.2164, "step": 327 }, { "epoch": 0.05908579148840351, "grad_norm": 1.053455114364624, "learning_rate": 9.979733410776266e-05, "loss": 1.8878, "step": 328 }, { "epoch": 0.05926593109659987, "grad_norm": 1.1907683610916138, "learning_rate": 9.979605834394544e-05, "loss": 2.0901, "step": 329 }, { "epoch": 0.05944607070479622, "grad_norm": 1.1980634927749634, "learning_rate": 9.979477858551267e-05, "loss": 2.1962, "step": 330 }, { "epoch": 0.05962621031299257, "grad_norm": 1.1212539672851562, "learning_rate": 9.979349483256704e-05, "loss": 1.8947, "step": 331 }, { "epoch": 0.05980634992118892, "grad_norm": 1.1518847942352295, "learning_rate": 9.979220708521148e-05, "loss": 2.1186, "step": 332 }, { "epoch": 0.059986489529385276, "grad_norm": 1.1192442178726196, "learning_rate": 9.979091534354933e-05, "loss": 1.8505, "step": 333 }, { "epoch": 0.060166629137581626, "grad_norm": 1.2988682985305786, "learning_rate": 9.978961960768421e-05, "loss": 2.2051, "step": 334 }, { "epoch": 0.060346768745777976, "grad_norm": 1.1861850023269653, "learning_rate": 9.978831987772006e-05, "loss": 2.1588, "step": 335 }, { "epoch": 0.06052690835397433, "grad_norm": 1.2539981603622437, "learning_rate": 9.978701615376113e-05, "loss": 2.17, "step": 336 }, { "epoch": 0.060707047962170683, "grad_norm": 1.2473936080932617, "learning_rate": 9.978570843591202e-05, "loss": 2.4724, "step": 337 }, { "epoch": 0.060887187570367034, "grad_norm": 1.2099696397781372, "learning_rate": 9.978439672427762e-05, "loss": 1.9898, "step": 338 }, { "epoch": 0.061067327178563384, "grad_norm": 1.1351348161697388, "learning_rate": 9.978308101896318e-05, "loss": 2.1373, "step": 339 }, { "epoch": 0.06124746678675974, "grad_norm": 1.206483006477356, "learning_rate": 9.978176132007422e-05, "loss": 2.0748, "step": 340 }, { "epoch": 0.06142760639495609, "grad_norm": 1.1359015703201294, "learning_rate": 9.97804376277166e-05, "loss": 1.9103, "step": 341 }, { "epoch": 0.06160774600315244, "grad_norm": 1.3475091457366943, "learning_rate": 9.977910994199655e-05, "loss": 2.2672, "step": 342 }, { "epoch": 0.0617878856113488, "grad_norm": 1.226010799407959, "learning_rate": 9.977777826302053e-05, "loss": 2.0521, "step": 343 }, { "epoch": 0.06196802521954515, "grad_norm": 1.1900585889816284, "learning_rate": 9.977644259089539e-05, "loss": 2.1845, "step": 344 }, { "epoch": 0.0621481648277415, "grad_norm": 1.2761820554733276, "learning_rate": 9.977510292572826e-05, "loss": 2.2941, "step": 345 }, { "epoch": 0.06232830443593785, "grad_norm": 1.282155990600586, "learning_rate": 9.977375926762662e-05, "loss": 2.4086, "step": 346 }, { "epoch": 0.0625084440441342, "grad_norm": 1.2463797330856323, "learning_rate": 9.977241161669825e-05, "loss": 2.3348, "step": 347 }, { "epoch": 0.06268858365233056, "grad_norm": 1.20982825756073, "learning_rate": 9.97710599730513e-05, "loss": 1.9883, "step": 348 }, { "epoch": 0.06286872326052691, "grad_norm": 1.2739825248718262, "learning_rate": 9.976970433679413e-05, "loss": 2.2029, "step": 349 }, { "epoch": 0.06304886286872326, "grad_norm": 1.2550883293151855, "learning_rate": 9.976834470803552e-05, "loss": 2.2877, "step": 350 }, { "epoch": 0.06322900247691961, "grad_norm": 1.5917472839355469, "learning_rate": 9.976698108688454e-05, "loss": 3.0127, "step": 351 }, { "epoch": 0.06340914208511597, "grad_norm": 1.3110361099243164, "learning_rate": 9.976561347345059e-05, "loss": 2.3605, "step": 352 }, { "epoch": 0.06358928169331232, "grad_norm": 1.384914755821228, "learning_rate": 9.976424186784336e-05, "loss": 2.8826, "step": 353 }, { "epoch": 0.06376942130150867, "grad_norm": 1.401537299156189, "learning_rate": 9.976286627017289e-05, "loss": 2.9413, "step": 354 }, { "epoch": 0.06394956090970502, "grad_norm": 1.298903465270996, "learning_rate": 9.976148668054951e-05, "loss": 2.6467, "step": 355 }, { "epoch": 0.06412970051790137, "grad_norm": 1.4152841567993164, "learning_rate": 9.976010309908392e-05, "loss": 2.3555, "step": 356 }, { "epoch": 0.06430984012609772, "grad_norm": 1.5927162170410156, "learning_rate": 9.975871552588709e-05, "loss": 3.0816, "step": 357 }, { "epoch": 0.06448997973429407, "grad_norm": 1.6605229377746582, "learning_rate": 9.975732396107035e-05, "loss": 3.0563, "step": 358 }, { "epoch": 0.06467011934249044, "grad_norm": 1.9879968166351318, "learning_rate": 9.97559284047453e-05, "loss": 2.9739, "step": 359 }, { "epoch": 0.06485025895068679, "grad_norm": 1.1902703046798706, "learning_rate": 9.975452885702392e-05, "loss": 2.1506, "step": 360 }, { "epoch": 0.06503039855888314, "grad_norm": 1.2667981386184692, "learning_rate": 9.975312531801849e-05, "loss": 1.9611, "step": 361 }, { "epoch": 0.06521053816707949, "grad_norm": 1.1608258485794067, "learning_rate": 9.975171778784154e-05, "loss": 1.9572, "step": 362 }, { "epoch": 0.06539067777527584, "grad_norm": 1.2240402698516846, "learning_rate": 9.975030626660604e-05, "loss": 2.0274, "step": 363 }, { "epoch": 0.06557081738347219, "grad_norm": 1.2387436628341675, "learning_rate": 9.974889075442521e-05, "loss": 2.0846, "step": 364 }, { "epoch": 0.06575095699166854, "grad_norm": 1.1588557958602905, "learning_rate": 9.97474712514126e-05, "loss": 2.1116, "step": 365 }, { "epoch": 0.06593109659986489, "grad_norm": 1.1860038042068481, "learning_rate": 9.974604775768206e-05, "loss": 2.069, "step": 366 }, { "epoch": 0.06611123620806125, "grad_norm": 1.1692122220993042, "learning_rate": 9.97446202733478e-05, "loss": 2.1168, "step": 367 }, { "epoch": 0.0662913758162576, "grad_norm": 1.1010984182357788, "learning_rate": 9.974318879852435e-05, "loss": 1.8769, "step": 368 }, { "epoch": 0.06647151542445395, "grad_norm": 1.1738924980163574, "learning_rate": 9.97417533333265e-05, "loss": 2.0203, "step": 369 }, { "epoch": 0.0666516550326503, "grad_norm": 1.1776379346847534, "learning_rate": 9.974031387786945e-05, "loss": 2.0117, "step": 370 }, { "epoch": 0.06683179464084665, "grad_norm": 1.176415205001831, "learning_rate": 9.973887043226863e-05, "loss": 1.9938, "step": 371 }, { "epoch": 0.067011934249043, "grad_norm": 1.03434157371521, "learning_rate": 9.973742299663986e-05, "loss": 1.9566, "step": 372 }, { "epoch": 0.06719207385723935, "grad_norm": 1.062901258468628, "learning_rate": 9.973597157109924e-05, "loss": 1.6733, "step": 373 }, { "epoch": 0.06737221346543572, "grad_norm": 1.09894859790802, "learning_rate": 9.973451615576321e-05, "loss": 1.6909, "step": 374 }, { "epoch": 0.06755235307363207, "grad_norm": 1.1642646789550781, "learning_rate": 9.973305675074851e-05, "loss": 2.1543, "step": 375 }, { "epoch": 0.06773249268182842, "grad_norm": 1.2013028860092163, "learning_rate": 9.973159335617223e-05, "loss": 2.0974, "step": 376 }, { "epoch": 0.06791263229002477, "grad_norm": 1.1819990873336792, "learning_rate": 9.973012597215175e-05, "loss": 2.1242, "step": 377 }, { "epoch": 0.06809277189822112, "grad_norm": 1.2293394804000854, "learning_rate": 9.972865459880479e-05, "loss": 2.0918, "step": 378 }, { "epoch": 0.06827291150641747, "grad_norm": 1.1342253684997559, "learning_rate": 9.972717923624938e-05, "loss": 2.0022, "step": 379 }, { "epoch": 0.06845305111461382, "grad_norm": 1.1995207071304321, "learning_rate": 9.972569988460387e-05, "loss": 2.3192, "step": 380 }, { "epoch": 0.06863319072281018, "grad_norm": 1.2198859453201294, "learning_rate": 9.972421654398693e-05, "loss": 2.1539, "step": 381 }, { "epoch": 0.06881333033100653, "grad_norm": 1.200929880142212, "learning_rate": 9.972272921451758e-05, "loss": 2.1581, "step": 382 }, { "epoch": 0.06899346993920288, "grad_norm": 1.1735786199569702, "learning_rate": 9.972123789631509e-05, "loss": 1.9714, "step": 383 }, { "epoch": 0.06917360954739923, "grad_norm": 1.1791009902954102, "learning_rate": 9.971974258949913e-05, "loss": 2.3169, "step": 384 }, { "epoch": 0.06935374915559558, "grad_norm": 1.164649248123169, "learning_rate": 9.971824329418961e-05, "loss": 2.0622, "step": 385 }, { "epoch": 0.06953388876379193, "grad_norm": 1.1468734741210938, "learning_rate": 9.971674001050686e-05, "loss": 1.9853, "step": 386 }, { "epoch": 0.06971402837198828, "grad_norm": 1.3305964469909668, "learning_rate": 9.971523273857145e-05, "loss": 2.1466, "step": 387 }, { "epoch": 0.06989416798018465, "grad_norm": 1.1789056062698364, "learning_rate": 9.971372147850426e-05, "loss": 2.2511, "step": 388 }, { "epoch": 0.070074307588381, "grad_norm": 1.2829426527023315, "learning_rate": 9.971220623042655e-05, "loss": 2.2993, "step": 389 }, { "epoch": 0.07025444719657735, "grad_norm": 1.1618152856826782, "learning_rate": 9.971068699445987e-05, "loss": 1.9169, "step": 390 }, { "epoch": 0.0704345868047737, "grad_norm": 1.3183369636535645, "learning_rate": 9.97091637707261e-05, "loss": 2.2636, "step": 391 }, { "epoch": 0.07061472641297005, "grad_norm": 1.3134520053863525, "learning_rate": 9.970763655934741e-05, "loss": 2.1004, "step": 392 }, { "epoch": 0.0707948660211664, "grad_norm": 1.2930353879928589, "learning_rate": 9.970610536044634e-05, "loss": 2.2754, "step": 393 }, { "epoch": 0.07097500562936275, "grad_norm": 1.212755799293518, "learning_rate": 9.970457017414572e-05, "loss": 2.0809, "step": 394 }, { "epoch": 0.07115514523755911, "grad_norm": 1.222426414489746, "learning_rate": 9.970303100056867e-05, "loss": 2.0221, "step": 395 }, { "epoch": 0.07133528484575546, "grad_norm": 1.2207961082458496, "learning_rate": 9.970148783983869e-05, "loss": 1.9381, "step": 396 }, { "epoch": 0.07151542445395181, "grad_norm": 1.205553412437439, "learning_rate": 9.969994069207958e-05, "loss": 2.045, "step": 397 }, { "epoch": 0.07169556406214816, "grad_norm": 1.2328301668167114, "learning_rate": 9.969838955741541e-05, "loss": 1.9693, "step": 398 }, { "epoch": 0.07187570367034452, "grad_norm": 1.2753703594207764, "learning_rate": 9.969683443597064e-05, "loss": 1.8625, "step": 399 }, { "epoch": 0.07205584327854087, "grad_norm": 1.1866049766540527, "learning_rate": 9.969527532787003e-05, "loss": 2.0762, "step": 400 }, { "epoch": 0.07223598288673722, "grad_norm": 1.3906890153884888, "learning_rate": 9.969371223323865e-05, "loss": 2.7344, "step": 401 }, { "epoch": 0.07241612249493358, "grad_norm": 1.3052465915679932, "learning_rate": 9.969214515220186e-05, "loss": 2.5495, "step": 402 }, { "epoch": 0.07259626210312993, "grad_norm": 1.3904844522476196, "learning_rate": 9.969057408488539e-05, "loss": 2.8292, "step": 403 }, { "epoch": 0.07277640171132628, "grad_norm": 1.2776987552642822, "learning_rate": 9.968899903141526e-05, "loss": 2.4234, "step": 404 }, { "epoch": 0.07295654131952263, "grad_norm": 1.3141162395477295, "learning_rate": 9.968741999191787e-05, "loss": 2.4546, "step": 405 }, { "epoch": 0.07313668092771898, "grad_norm": 1.2972636222839355, "learning_rate": 9.96858369665198e-05, "loss": 2.4995, "step": 406 }, { "epoch": 0.07331682053591533, "grad_norm": 1.5233914852142334, "learning_rate": 9.968424995534813e-05, "loss": 2.5775, "step": 407 }, { "epoch": 0.07349696014411168, "grad_norm": 1.6031200885772705, "learning_rate": 9.96826589585301e-05, "loss": 2.622, "step": 408 }, { "epoch": 0.07367709975230805, "grad_norm": 2.019437789916992, "learning_rate": 9.968106397619339e-05, "loss": 3.0999, "step": 409 }, { "epoch": 0.0738572393605044, "grad_norm": 1.4470237493515015, "learning_rate": 9.967946500846592e-05, "loss": 2.7362, "step": 410 }, { "epoch": 0.07403737896870075, "grad_norm": 1.2874963283538818, "learning_rate": 9.967786205547595e-05, "loss": 2.2517, "step": 411 }, { "epoch": 0.0742175185768971, "grad_norm": 1.3089977502822876, "learning_rate": 9.96762551173521e-05, "loss": 2.1858, "step": 412 }, { "epoch": 0.07439765818509345, "grad_norm": 1.2105944156646729, "learning_rate": 9.967464419422327e-05, "loss": 1.9773, "step": 413 }, { "epoch": 0.0745777977932898, "grad_norm": 1.3345972299575806, "learning_rate": 9.967302928621867e-05, "loss": 2.2248, "step": 414 }, { "epoch": 0.07475793740148615, "grad_norm": 1.153934359550476, "learning_rate": 9.967141039346784e-05, "loss": 2.1041, "step": 415 }, { "epoch": 0.07493807700968251, "grad_norm": 1.1744452714920044, "learning_rate": 9.966978751610068e-05, "loss": 2.0261, "step": 416 }, { "epoch": 0.07511821661787886, "grad_norm": 1.134575366973877, "learning_rate": 9.966816065424736e-05, "loss": 1.9197, "step": 417 }, { "epoch": 0.07529835622607521, "grad_norm": 1.2032464742660522, "learning_rate": 9.966652980803836e-05, "loss": 1.8453, "step": 418 }, { "epoch": 0.07547849583427156, "grad_norm": 1.1819320917129517, "learning_rate": 9.966489497760456e-05, "loss": 1.9449, "step": 419 }, { "epoch": 0.07565863544246791, "grad_norm": 1.150172472000122, "learning_rate": 9.966325616307706e-05, "loss": 2.0224, "step": 420 }, { "epoch": 0.07583877505066426, "grad_norm": 1.143376111984253, "learning_rate": 9.966161336458734e-05, "loss": 1.9967, "step": 421 }, { "epoch": 0.07601891465886061, "grad_norm": 1.153085470199585, "learning_rate": 9.965996658226719e-05, "loss": 1.816, "step": 422 }, { "epoch": 0.07619905426705698, "grad_norm": 1.2253894805908203, "learning_rate": 9.965831581624871e-05, "loss": 1.9349, "step": 423 }, { "epoch": 0.07637919387525333, "grad_norm": 1.3817367553710938, "learning_rate": 9.965666106666432e-05, "loss": 1.996, "step": 424 }, { "epoch": 0.07655933348344968, "grad_norm": 1.1546779870986938, "learning_rate": 9.965500233364678e-05, "loss": 2.0808, "step": 425 }, { "epoch": 0.07673947309164603, "grad_norm": 1.1899226903915405, "learning_rate": 9.965333961732912e-05, "loss": 1.8567, "step": 426 }, { "epoch": 0.07691961269984238, "grad_norm": 1.3877899646759033, "learning_rate": 9.965167291784474e-05, "loss": 1.7732, "step": 427 }, { "epoch": 0.07709975230803873, "grad_norm": 1.4410600662231445, "learning_rate": 9.965000223532736e-05, "loss": 2.4459, "step": 428 }, { "epoch": 0.07727989191623508, "grad_norm": 1.2743172645568848, "learning_rate": 9.964832756991097e-05, "loss": 2.0879, "step": 429 }, { "epoch": 0.07746003152443143, "grad_norm": 1.1992241144180298, "learning_rate": 9.964664892172992e-05, "loss": 2.0258, "step": 430 }, { "epoch": 0.07764017113262779, "grad_norm": 1.101402759552002, "learning_rate": 9.96449662909189e-05, "loss": 2.0255, "step": 431 }, { "epoch": 0.07782031074082414, "grad_norm": 1.2350220680236816, "learning_rate": 9.964327967761285e-05, "loss": 2.0689, "step": 432 }, { "epoch": 0.07800045034902049, "grad_norm": 1.2699592113494873, "learning_rate": 9.964158908194708e-05, "loss": 2.0516, "step": 433 }, { "epoch": 0.07818058995721684, "grad_norm": 1.3503687381744385, "learning_rate": 9.963989450405722e-05, "loss": 2.1425, "step": 434 }, { "epoch": 0.07836072956541319, "grad_norm": 1.3755441904067993, "learning_rate": 9.963819594407921e-05, "loss": 2.328, "step": 435 }, { "epoch": 0.07854086917360954, "grad_norm": 1.378748893737793, "learning_rate": 9.963649340214929e-05, "loss": 2.4802, "step": 436 }, { "epoch": 0.07872100878180589, "grad_norm": 1.2597651481628418, "learning_rate": 9.963478687840404e-05, "loss": 2.3289, "step": 437 }, { "epoch": 0.07890114839000226, "grad_norm": 1.2250365018844604, "learning_rate": 9.963307637298037e-05, "loss": 2.1271, "step": 438 }, { "epoch": 0.07908128799819861, "grad_norm": 1.2458264827728271, "learning_rate": 9.96313618860155e-05, "loss": 1.952, "step": 439 }, { "epoch": 0.07926142760639496, "grad_norm": 1.347388744354248, "learning_rate": 9.962964341764694e-05, "loss": 2.0552, "step": 440 }, { "epoch": 0.07944156721459131, "grad_norm": 1.1496285200119019, "learning_rate": 9.962792096801255e-05, "loss": 1.7487, "step": 441 }, { "epoch": 0.07962170682278766, "grad_norm": 1.318347692489624, "learning_rate": 9.962619453725052e-05, "loss": 2.0939, "step": 442 }, { "epoch": 0.07980184643098401, "grad_norm": 1.3763155937194824, "learning_rate": 9.962446412549935e-05, "loss": 2.0468, "step": 443 }, { "epoch": 0.07998198603918036, "grad_norm": 1.2991896867752075, "learning_rate": 9.962272973289782e-05, "loss": 2.0533, "step": 444 }, { "epoch": 0.08016212564737672, "grad_norm": 1.1254770755767822, "learning_rate": 9.962099135958509e-05, "loss": 1.5903, "step": 445 }, { "epoch": 0.08034226525557307, "grad_norm": 1.3234833478927612, "learning_rate": 9.96192490057006e-05, "loss": 2.2038, "step": 446 }, { "epoch": 0.08052240486376942, "grad_norm": 1.536211609840393, "learning_rate": 9.961750267138414e-05, "loss": 2.2408, "step": 447 }, { "epoch": 0.08070254447196577, "grad_norm": 1.2849133014678955, "learning_rate": 9.961575235677576e-05, "loss": 1.6949, "step": 448 }, { "epoch": 0.08088268408016212, "grad_norm": 1.3605488538742065, "learning_rate": 9.96139980620159e-05, "loss": 2.1481, "step": 449 }, { "epoch": 0.08106282368835847, "grad_norm": 1.335553526878357, "learning_rate": 9.961223978724528e-05, "loss": 2.2895, "step": 450 }, { "epoch": 0.08124296329655482, "grad_norm": 1.488923192024231, "learning_rate": 9.961047753260496e-05, "loss": 2.6168, "step": 451 }, { "epoch": 0.08142310290475119, "grad_norm": 1.4191235303878784, "learning_rate": 9.960871129823631e-05, "loss": 2.5639, "step": 452 }, { "epoch": 0.08160324251294754, "grad_norm": 1.596569538116455, "learning_rate": 9.960694108428097e-05, "loss": 2.497, "step": 453 }, { "epoch": 0.08178338212114389, "grad_norm": 1.3627617359161377, "learning_rate": 9.960516689088099e-05, "loss": 2.6283, "step": 454 }, { "epoch": 0.08196352172934024, "grad_norm": 1.267029881477356, "learning_rate": 9.960338871817869e-05, "loss": 2.5063, "step": 455 }, { "epoch": 0.08214366133753659, "grad_norm": 1.4305446147918701, "learning_rate": 9.96016065663167e-05, "loss": 2.5778, "step": 456 }, { "epoch": 0.08232380094573294, "grad_norm": 1.4641321897506714, "learning_rate": 9.959982043543799e-05, "loss": 2.3723, "step": 457 }, { "epoch": 0.08250394055392929, "grad_norm": 1.4689418077468872, "learning_rate": 9.959803032568587e-05, "loss": 2.2516, "step": 458 }, { "epoch": 0.08268408016212565, "grad_norm": 1.5636807680130005, "learning_rate": 9.959623623720388e-05, "loss": 2.3476, "step": 459 }, { "epoch": 0.082864219770322, "grad_norm": 1.840383768081665, "learning_rate": 9.9594438170136e-05, "loss": 2.8303, "step": 460 }, { "epoch": 0.08304435937851835, "grad_norm": 1.9453020095825195, "learning_rate": 9.959263612462644e-05, "loss": 2.7647, "step": 461 }, { "epoch": 0.0832244989867147, "grad_norm": 1.5214613676071167, "learning_rate": 9.959083010081976e-05, "loss": 2.1241, "step": 462 }, { "epoch": 0.08340463859491105, "grad_norm": 1.2775464057922363, "learning_rate": 9.958902009886085e-05, "loss": 1.8959, "step": 463 }, { "epoch": 0.0835847782031074, "grad_norm": 1.4920369386672974, "learning_rate": 9.95872061188949e-05, "loss": 2.1968, "step": 464 }, { "epoch": 0.08376491781130375, "grad_norm": 1.3108103275299072, "learning_rate": 9.958538816106742e-05, "loss": 2.1947, "step": 465 }, { "epoch": 0.08394505741950012, "grad_norm": 1.2641619443893433, "learning_rate": 9.958356622552427e-05, "loss": 2.1109, "step": 466 }, { "epoch": 0.08412519702769647, "grad_norm": 1.1450035572052002, "learning_rate": 9.958174031241157e-05, "loss": 1.6667, "step": 467 }, { "epoch": 0.08430533663589282, "grad_norm": 1.2238646745681763, "learning_rate": 9.957991042187582e-05, "loss": 2.1029, "step": 468 }, { "epoch": 0.08448547624408917, "grad_norm": 1.1442573070526123, "learning_rate": 9.957807655406382e-05, "loss": 1.9957, "step": 469 }, { "epoch": 0.08466561585228552, "grad_norm": 1.2300710678100586, "learning_rate": 9.957623870912266e-05, "loss": 1.908, "step": 470 }, { "epoch": 0.08484575546048187, "grad_norm": 1.249601125717163, "learning_rate": 9.957439688719976e-05, "loss": 2.1251, "step": 471 }, { "epoch": 0.08502589506867822, "grad_norm": 1.2331570386886597, "learning_rate": 9.95725510884429e-05, "loss": 2.3607, "step": 472 }, { "epoch": 0.08520603467687458, "grad_norm": 1.259178638458252, "learning_rate": 9.957070131300014e-05, "loss": 2.2836, "step": 473 }, { "epoch": 0.08538617428507093, "grad_norm": 1.2483408451080322, "learning_rate": 9.956884756101987e-05, "loss": 2.0932, "step": 474 }, { "epoch": 0.08556631389326728, "grad_norm": 1.187480092048645, "learning_rate": 9.956698983265081e-05, "loss": 1.9143, "step": 475 }, { "epoch": 0.08574645350146363, "grad_norm": 1.2999680042266846, "learning_rate": 9.956512812804194e-05, "loss": 2.1693, "step": 476 }, { "epoch": 0.08592659310965998, "grad_norm": 1.2362725734710693, "learning_rate": 9.956326244734265e-05, "loss": 2.0334, "step": 477 }, { "epoch": 0.08610673271785634, "grad_norm": 1.1153935194015503, "learning_rate": 9.956139279070257e-05, "loss": 1.8978, "step": 478 }, { "epoch": 0.08628687232605269, "grad_norm": 1.1800506114959717, "learning_rate": 9.955951915827171e-05, "loss": 1.941, "step": 479 }, { "epoch": 0.08646701193424905, "grad_norm": 1.2130142450332642, "learning_rate": 9.955764155020037e-05, "loss": 1.9348, "step": 480 }, { "epoch": 0.0866471515424454, "grad_norm": 1.2380461692810059, "learning_rate": 9.955575996663916e-05, "loss": 2.0916, "step": 481 }, { "epoch": 0.08682729115064175, "grad_norm": 1.1962411403656006, "learning_rate": 9.9553874407739e-05, "loss": 2.089, "step": 482 }, { "epoch": 0.0870074307588381, "grad_norm": 1.161903738975525, "learning_rate": 9.95519848736512e-05, "loss": 1.883, "step": 483 }, { "epoch": 0.08718757036703445, "grad_norm": 1.2464240789413452, "learning_rate": 9.95500913645273e-05, "loss": 2.0199, "step": 484 }, { "epoch": 0.0873677099752308, "grad_norm": 1.1132943630218506, "learning_rate": 9.954819388051921e-05, "loss": 1.6719, "step": 485 }, { "epoch": 0.08754784958342715, "grad_norm": 1.287757396697998, "learning_rate": 9.954629242177913e-05, "loss": 1.8318, "step": 486 }, { "epoch": 0.08772798919162351, "grad_norm": 1.2616487741470337, "learning_rate": 9.954438698845962e-05, "loss": 2.0988, "step": 487 }, { "epoch": 0.08790812879981987, "grad_norm": 1.2310196161270142, "learning_rate": 9.95424775807135e-05, "loss": 2.1358, "step": 488 }, { "epoch": 0.08808826840801622, "grad_norm": 1.3051484823226929, "learning_rate": 9.954056419869398e-05, "loss": 2.051, "step": 489 }, { "epoch": 0.08826840801621257, "grad_norm": 1.2223469018936157, "learning_rate": 9.953864684255452e-05, "loss": 2.0419, "step": 490 }, { "epoch": 0.08844854762440892, "grad_norm": 1.3044649362564087, "learning_rate": 9.953672551244895e-05, "loss": 2.2188, "step": 491 }, { "epoch": 0.08862868723260527, "grad_norm": 1.2248224020004272, "learning_rate": 9.953480020853138e-05, "loss": 1.961, "step": 492 }, { "epoch": 0.08880882684080162, "grad_norm": 1.324702262878418, "learning_rate": 9.953287093095626e-05, "loss": 2.4466, "step": 493 }, { "epoch": 0.08898896644899798, "grad_norm": 1.3229643106460571, "learning_rate": 9.953093767987835e-05, "loss": 1.9756, "step": 494 }, { "epoch": 0.08916910605719433, "grad_norm": 1.4614118337631226, "learning_rate": 9.952900045545278e-05, "loss": 2.2114, "step": 495 }, { "epoch": 0.08934924566539068, "grad_norm": 1.5767461061477661, "learning_rate": 9.952705925783488e-05, "loss": 2.1117, "step": 496 }, { "epoch": 0.08952938527358703, "grad_norm": 1.3316841125488281, "learning_rate": 9.952511408718044e-05, "loss": 1.978, "step": 497 }, { "epoch": 0.08970952488178338, "grad_norm": 1.2290759086608887, "learning_rate": 9.952316494364546e-05, "loss": 2.0191, "step": 498 }, { "epoch": 0.08988966448997973, "grad_norm": 1.276228904724121, "learning_rate": 9.952121182738629e-05, "loss": 1.6621, "step": 499 }, { "epoch": 0.09006980409817608, "grad_norm": 1.3325002193450928, "learning_rate": 9.951925473855965e-05, "loss": 2.2832, "step": 500 }, { "epoch": 0.09024994370637243, "grad_norm": 1.5747864246368408, "learning_rate": 9.951729367732252e-05, "loss": 2.6645, "step": 501 }, { "epoch": 0.0904300833145688, "grad_norm": 1.3309484720230103, "learning_rate": 9.95153286438322e-05, "loss": 2.5603, "step": 502 }, { "epoch": 0.09061022292276515, "grad_norm": 1.4444763660430908, "learning_rate": 9.951335963824633e-05, "loss": 2.565, "step": 503 }, { "epoch": 0.0907903625309615, "grad_norm": 1.4311542510986328, "learning_rate": 9.951138666072288e-05, "loss": 2.4572, "step": 504 }, { "epoch": 0.09097050213915785, "grad_norm": 1.3777457475662231, "learning_rate": 9.95094097114201e-05, "loss": 2.713, "step": 505 }, { "epoch": 0.0911506417473542, "grad_norm": 1.3411471843719482, "learning_rate": 9.95074287904966e-05, "loss": 2.2585, "step": 506 }, { "epoch": 0.09133078135555055, "grad_norm": 1.5575672388076782, "learning_rate": 9.950544389811125e-05, "loss": 2.9659, "step": 507 }, { "epoch": 0.0915109209637469, "grad_norm": 1.4797890186309814, "learning_rate": 9.950345503442332e-05, "loss": 2.6608, "step": 508 }, { "epoch": 0.09169106057194326, "grad_norm": 1.6868726015090942, "learning_rate": 9.950146219959233e-05, "loss": 2.6066, "step": 509 }, { "epoch": 0.09187120018013961, "grad_norm": 1.8538469076156616, "learning_rate": 9.949946539377817e-05, "loss": 2.7629, "step": 510 }, { "epoch": 0.09205133978833596, "grad_norm": 1.512001872062683, "learning_rate": 9.9497464617141e-05, "loss": 2.3833, "step": 511 }, { "epoch": 0.09223147939653231, "grad_norm": 1.3509187698364258, "learning_rate": 9.949545986984133e-05, "loss": 2.2007, "step": 512 }, { "epoch": 0.09241161900472866, "grad_norm": 1.2238082885742188, "learning_rate": 9.949345115203998e-05, "loss": 2.1222, "step": 513 }, { "epoch": 0.09259175861292501, "grad_norm": 1.3921561241149902, "learning_rate": 9.949143846389808e-05, "loss": 2.0566, "step": 514 }, { "epoch": 0.09277189822112136, "grad_norm": 1.2336784601211548, "learning_rate": 9.948942180557708e-05, "loss": 2.0548, "step": 515 }, { "epoch": 0.09295203782931773, "grad_norm": 1.3000797033309937, "learning_rate": 9.948740117723879e-05, "loss": 2.065, "step": 516 }, { "epoch": 0.09313217743751408, "grad_norm": 1.1365313529968262, "learning_rate": 9.948537657904526e-05, "loss": 1.8607, "step": 517 }, { "epoch": 0.09331231704571043, "grad_norm": 1.190427541732788, "learning_rate": 9.948334801115895e-05, "loss": 1.8012, "step": 518 }, { "epoch": 0.09349245665390678, "grad_norm": 1.1424087285995483, "learning_rate": 9.948131547374255e-05, "loss": 1.9257, "step": 519 }, { "epoch": 0.09367259626210313, "grad_norm": 1.2099260091781616, "learning_rate": 9.947927896695913e-05, "loss": 2.039, "step": 520 }, { "epoch": 0.09385273587029948, "grad_norm": 1.2156577110290527, "learning_rate": 9.947723849097203e-05, "loss": 2.0548, "step": 521 }, { "epoch": 0.09403287547849583, "grad_norm": 1.2186594009399414, "learning_rate": 9.947519404594498e-05, "loss": 2.0695, "step": 522 }, { "epoch": 0.09421301508669219, "grad_norm": 1.2354419231414795, "learning_rate": 9.947314563204198e-05, "loss": 2.1092, "step": 523 }, { "epoch": 0.09439315469488854, "grad_norm": 1.2534072399139404, "learning_rate": 9.947109324942729e-05, "loss": 1.9764, "step": 524 }, { "epoch": 0.09457329430308489, "grad_norm": 1.1973639726638794, "learning_rate": 9.946903689826563e-05, "loss": 1.754, "step": 525 }, { "epoch": 0.09475343391128124, "grad_norm": 1.3052338361740112, "learning_rate": 9.946697657872191e-05, "loss": 2.1353, "step": 526 }, { "epoch": 0.0949335735194776, "grad_norm": 1.2879843711853027, "learning_rate": 9.946491229096144e-05, "loss": 1.889, "step": 527 }, { "epoch": 0.09511371312767394, "grad_norm": 1.2169817686080933, "learning_rate": 9.946284403514979e-05, "loss": 2.0355, "step": 528 }, { "epoch": 0.0952938527358703, "grad_norm": 1.2379153966903687, "learning_rate": 9.946077181145287e-05, "loss": 2.125, "step": 529 }, { "epoch": 0.09547399234406666, "grad_norm": 1.3447768688201904, "learning_rate": 9.945869562003694e-05, "loss": 2.2768, "step": 530 }, { "epoch": 0.09565413195226301, "grad_norm": 1.1286956071853638, "learning_rate": 9.945661546106853e-05, "loss": 1.8762, "step": 531 }, { "epoch": 0.09583427156045936, "grad_norm": 1.4641311168670654, "learning_rate": 9.945453133471453e-05, "loss": 2.4861, "step": 532 }, { "epoch": 0.09601441116865571, "grad_norm": 1.2305301427841187, "learning_rate": 9.945244324114211e-05, "loss": 1.9035, "step": 533 }, { "epoch": 0.09619455077685206, "grad_norm": 1.40278160572052, "learning_rate": 9.945035118051878e-05, "loss": 2.3066, "step": 534 }, { "epoch": 0.09637469038504841, "grad_norm": 1.3533518314361572, "learning_rate": 9.944825515301235e-05, "loss": 2.1885, "step": 535 }, { "epoch": 0.09655482999324476, "grad_norm": 1.3937537670135498, "learning_rate": 9.9446155158791e-05, "loss": 2.252, "step": 536 }, { "epoch": 0.09673496960144112, "grad_norm": 1.3002718687057495, "learning_rate": 9.944405119802316e-05, "loss": 1.9532, "step": 537 }, { "epoch": 0.09691510920963747, "grad_norm": 1.3723276853561401, "learning_rate": 9.944194327087761e-05, "loss": 2.2575, "step": 538 }, { "epoch": 0.09709524881783382, "grad_norm": 1.3745765686035156, "learning_rate": 9.943983137752346e-05, "loss": 2.1827, "step": 539 }, { "epoch": 0.09727538842603017, "grad_norm": 1.3019641637802124, "learning_rate": 9.943771551813011e-05, "loss": 2.205, "step": 540 }, { "epoch": 0.09745552803422652, "grad_norm": 1.3320200443267822, "learning_rate": 9.94355956928673e-05, "loss": 2.1291, "step": 541 }, { "epoch": 0.09763566764242287, "grad_norm": 1.3012250661849976, "learning_rate": 9.943347190190511e-05, "loss": 2.005, "step": 542 }, { "epoch": 0.09781580725061922, "grad_norm": 1.3021081686019897, "learning_rate": 9.943134414541386e-05, "loss": 2.0511, "step": 543 }, { "epoch": 0.09799594685881559, "grad_norm": 1.3133217096328735, "learning_rate": 9.942921242356425e-05, "loss": 2.1142, "step": 544 }, { "epoch": 0.09817608646701194, "grad_norm": 1.343146562576294, "learning_rate": 9.942707673652731e-05, "loss": 1.8386, "step": 545 }, { "epoch": 0.09835622607520829, "grad_norm": 1.2855020761489868, "learning_rate": 9.942493708447435e-05, "loss": 1.7687, "step": 546 }, { "epoch": 0.09853636568340464, "grad_norm": 1.188632845878601, "learning_rate": 9.9422793467577e-05, "loss": 1.8733, "step": 547 }, { "epoch": 0.09871650529160099, "grad_norm": 1.3299243450164795, "learning_rate": 9.942064588600724e-05, "loss": 2.0229, "step": 548 }, { "epoch": 0.09889664489979734, "grad_norm": 1.2409086227416992, "learning_rate": 9.941849433993734e-05, "loss": 1.8813, "step": 549 }, { "epoch": 0.09907678450799369, "grad_norm": 1.4136215448379517, "learning_rate": 9.941633882953989e-05, "loss": 2.5996, "step": 550 }, { "epoch": 0.09925692411619005, "grad_norm": 1.5979270935058594, "learning_rate": 9.941417935498782e-05, "loss": 2.3478, "step": 551 }, { "epoch": 0.0994370637243864, "grad_norm": 1.4183084964752197, "learning_rate": 9.941201591645434e-05, "loss": 2.5067, "step": 552 }, { "epoch": 0.09961720333258275, "grad_norm": 1.3027455806732178, "learning_rate": 9.940984851411301e-05, "loss": 2.3411, "step": 553 }, { "epoch": 0.0997973429407791, "grad_norm": 1.8705165386199951, "learning_rate": 9.94076771481377e-05, "loss": 2.3631, "step": 554 }, { "epoch": 0.09997748254897545, "grad_norm": 1.4732381105422974, "learning_rate": 9.94055018187026e-05, "loss": 2.5289, "step": 555 }, { "epoch": 0.1001576221571718, "grad_norm": 1.4549360275268555, "learning_rate": 9.94033225259822e-05, "loss": 2.1177, "step": 556 }, { "epoch": 0.10033776176536816, "grad_norm": 1.69441556930542, "learning_rate": 9.940113927015134e-05, "loss": 2.4761, "step": 557 }, { "epoch": 0.10051790137356452, "grad_norm": 1.782146692276001, "learning_rate": 9.939895205138514e-05, "loss": 2.1732, "step": 558 }, { "epoch": 0.10069804098176087, "grad_norm": 1.762495756149292, "learning_rate": 9.939676086985907e-05, "loss": 2.7288, "step": 559 }, { "epoch": 0.10087818058995722, "grad_norm": 2.0288562774658203, "learning_rate": 9.939456572574892e-05, "loss": 2.1508, "step": 560 }, { "epoch": 0.10105832019815357, "grad_norm": 1.2310386896133423, "learning_rate": 9.939236661923074e-05, "loss": 2.1423, "step": 561 }, { "epoch": 0.10123845980634992, "grad_norm": 1.186572551727295, "learning_rate": 9.939016355048099e-05, "loss": 1.9507, "step": 562 }, { "epoch": 0.10141859941454627, "grad_norm": 1.2622029781341553, "learning_rate": 9.938795651967637e-05, "loss": 1.7998, "step": 563 }, { "epoch": 0.10159873902274262, "grad_norm": 1.3796643018722534, "learning_rate": 9.938574552699394e-05, "loss": 1.9648, "step": 564 }, { "epoch": 0.10177887863093897, "grad_norm": 1.236175537109375, "learning_rate": 9.938353057261105e-05, "loss": 1.9793, "step": 565 }, { "epoch": 0.10195901823913534, "grad_norm": 1.7577669620513916, "learning_rate": 9.938131165670538e-05, "loss": 1.9435, "step": 566 }, { "epoch": 0.10213915784733169, "grad_norm": 1.2309681177139282, "learning_rate": 9.937908877945496e-05, "loss": 2.1315, "step": 567 }, { "epoch": 0.10231929745552804, "grad_norm": 1.2463933229446411, "learning_rate": 9.93768619410381e-05, "loss": 2.2162, "step": 568 }, { "epoch": 0.10249943706372439, "grad_norm": 1.2483500242233276, "learning_rate": 9.93746311416334e-05, "loss": 1.7632, "step": 569 }, { "epoch": 0.10267957667192074, "grad_norm": 1.1438194513320923, "learning_rate": 9.937239638141984e-05, "loss": 1.8644, "step": 570 }, { "epoch": 0.10285971628011709, "grad_norm": 1.1547142267227173, "learning_rate": 9.937015766057671e-05, "loss": 1.985, "step": 571 }, { "epoch": 0.10303985588831344, "grad_norm": 1.1651068925857544, "learning_rate": 9.936791497928357e-05, "loss": 1.932, "step": 572 }, { "epoch": 0.1032199954965098, "grad_norm": 1.2023049592971802, "learning_rate": 9.936566833772034e-05, "loss": 2.0256, "step": 573 }, { "epoch": 0.10340013510470615, "grad_norm": 1.1891133785247803, "learning_rate": 9.936341773606723e-05, "loss": 2.0444, "step": 574 }, { "epoch": 0.1035802747129025, "grad_norm": 1.382871150970459, "learning_rate": 9.936116317450482e-05, "loss": 2.0966, "step": 575 }, { "epoch": 0.10376041432109885, "grad_norm": 1.181156039237976, "learning_rate": 9.935890465321392e-05, "loss": 1.7597, "step": 576 }, { "epoch": 0.1039405539292952, "grad_norm": 1.1654242277145386, "learning_rate": 9.935664217237573e-05, "loss": 1.6947, "step": 577 }, { "epoch": 0.10412069353749155, "grad_norm": 1.2307331562042236, "learning_rate": 9.935437573217175e-05, "loss": 1.9475, "step": 578 }, { "epoch": 0.1043008331456879, "grad_norm": 1.3564519882202148, "learning_rate": 9.935210533278379e-05, "loss": 2.3476, "step": 579 }, { "epoch": 0.10448097275388427, "grad_norm": 1.1362614631652832, "learning_rate": 9.934983097439396e-05, "loss": 1.9735, "step": 580 }, { "epoch": 0.10466111236208062, "grad_norm": 1.245951533317566, "learning_rate": 9.934755265718476e-05, "loss": 2.2328, "step": 581 }, { "epoch": 0.10484125197027697, "grad_norm": 1.2103431224822998, "learning_rate": 9.934527038133889e-05, "loss": 2.2116, "step": 582 }, { "epoch": 0.10502139157847332, "grad_norm": 1.2476712465286255, "learning_rate": 9.934298414703948e-05, "loss": 2.0962, "step": 583 }, { "epoch": 0.10520153118666967, "grad_norm": 1.24763822555542, "learning_rate": 9.934069395446993e-05, "loss": 1.9395, "step": 584 }, { "epoch": 0.10538167079486602, "grad_norm": 1.3773177862167358, "learning_rate": 9.933839980381393e-05, "loss": 2.2344, "step": 585 }, { "epoch": 0.10556181040306237, "grad_norm": 1.348358392715454, "learning_rate": 9.933610169525554e-05, "loss": 2.0153, "step": 586 }, { "epoch": 0.10574195001125873, "grad_norm": 1.327081561088562, "learning_rate": 9.933379962897909e-05, "loss": 2.0961, "step": 587 }, { "epoch": 0.10592208961945508, "grad_norm": 1.2437586784362793, "learning_rate": 9.933149360516927e-05, "loss": 1.9906, "step": 588 }, { "epoch": 0.10610222922765143, "grad_norm": 1.3158669471740723, "learning_rate": 9.932918362401104e-05, "loss": 2.0608, "step": 589 }, { "epoch": 0.10628236883584778, "grad_norm": 1.361436128616333, "learning_rate": 9.932686968568976e-05, "loss": 2.0768, "step": 590 }, { "epoch": 0.10646250844404413, "grad_norm": 1.3792731761932373, "learning_rate": 9.932455179039101e-05, "loss": 1.9328, "step": 591 }, { "epoch": 0.10664264805224048, "grad_norm": 1.528085708618164, "learning_rate": 9.932222993830074e-05, "loss": 2.3766, "step": 592 }, { "epoch": 0.10682278766043683, "grad_norm": 1.2961821556091309, "learning_rate": 9.931990412960518e-05, "loss": 1.9681, "step": 593 }, { "epoch": 0.1070029272686332, "grad_norm": 1.3054449558258057, "learning_rate": 9.931757436449097e-05, "loss": 1.9601, "step": 594 }, { "epoch": 0.10718306687682955, "grad_norm": 1.3940820693969727, "learning_rate": 9.931524064314496e-05, "loss": 2.2602, "step": 595 }, { "epoch": 0.1073632064850259, "grad_norm": 1.380031943321228, "learning_rate": 9.931290296575436e-05, "loss": 2.4307, "step": 596 }, { "epoch": 0.10754334609322225, "grad_norm": 1.3479726314544678, "learning_rate": 9.93105613325067e-05, "loss": 2.1546, "step": 597 }, { "epoch": 0.1077234857014186, "grad_norm": 1.179189682006836, "learning_rate": 9.930821574358984e-05, "loss": 1.6836, "step": 598 }, { "epoch": 0.10790362530961495, "grad_norm": 1.339950680732727, "learning_rate": 9.930586619919192e-05, "loss": 2.2317, "step": 599 }, { "epoch": 0.1080837649178113, "grad_norm": 1.2645717859268188, "learning_rate": 9.930351269950143e-05, "loss": 1.8206, "step": 600 }, { "epoch": 0.10826390452600766, "grad_norm": 1.6206364631652832, "learning_rate": 9.930115524470718e-05, "loss": 2.6552, "step": 601 }, { "epoch": 0.10844404413420401, "grad_norm": 1.2960243225097656, "learning_rate": 9.929879383499825e-05, "loss": 2.2248, "step": 602 }, { "epoch": 0.10862418374240036, "grad_norm": 1.2325005531311035, "learning_rate": 9.92964284705641e-05, "loss": 2.3235, "step": 603 }, { "epoch": 0.10880432335059671, "grad_norm": 1.3556349277496338, "learning_rate": 9.929405915159446e-05, "loss": 2.3627, "step": 604 }, { "epoch": 0.10898446295879306, "grad_norm": 1.3524086475372314, "learning_rate": 9.929168587827942e-05, "loss": 2.5927, "step": 605 }, { "epoch": 0.10916460256698941, "grad_norm": 1.334237813949585, "learning_rate": 9.928930865080934e-05, "loss": 2.1805, "step": 606 }, { "epoch": 0.10934474217518576, "grad_norm": 1.5597326755523682, "learning_rate": 9.928692746937494e-05, "loss": 2.5088, "step": 607 }, { "epoch": 0.10952488178338213, "grad_norm": 1.8527363538742065, "learning_rate": 9.92845423341672e-05, "loss": 2.6455, "step": 608 }, { "epoch": 0.10970502139157848, "grad_norm": 1.8527334928512573, "learning_rate": 9.92821532453775e-05, "loss": 2.71, "step": 609 }, { "epoch": 0.10988516099977483, "grad_norm": 1.2585922479629517, "learning_rate": 9.927976020319747e-05, "loss": 2.1503, "step": 610 }, { "epoch": 0.11006530060797118, "grad_norm": 1.2161167860031128, "learning_rate": 9.927736320781908e-05, "loss": 1.7714, "step": 611 }, { "epoch": 0.11024544021616753, "grad_norm": 1.2532986402511597, "learning_rate": 9.92749622594346e-05, "loss": 2.0686, "step": 612 }, { "epoch": 0.11042557982436388, "grad_norm": 1.2780698537826538, "learning_rate": 9.927255735823667e-05, "loss": 2.0918, "step": 613 }, { "epoch": 0.11060571943256023, "grad_norm": 1.2372409105300903, "learning_rate": 9.927014850441818e-05, "loss": 1.9126, "step": 614 }, { "epoch": 0.11078585904075659, "grad_norm": 1.3066140413284302, "learning_rate": 9.926773569817237e-05, "loss": 1.9936, "step": 615 }, { "epoch": 0.11096599864895294, "grad_norm": 1.2493231296539307, "learning_rate": 9.926531893969279e-05, "loss": 1.9355, "step": 616 }, { "epoch": 0.1111461382571493, "grad_norm": 1.3704819679260254, "learning_rate": 9.926289822917335e-05, "loss": 2.0541, "step": 617 }, { "epoch": 0.11132627786534564, "grad_norm": 1.2036651372909546, "learning_rate": 9.926047356680819e-05, "loss": 2.0361, "step": 618 }, { "epoch": 0.111506417473542, "grad_norm": 1.1734963655471802, "learning_rate": 9.925804495279185e-05, "loss": 2.0089, "step": 619 }, { "epoch": 0.11168655708173834, "grad_norm": 1.3296834230422974, "learning_rate": 9.925561238731912e-05, "loss": 2.1309, "step": 620 }, { "epoch": 0.1118666966899347, "grad_norm": 1.1076639890670776, "learning_rate": 9.925317587058516e-05, "loss": 1.974, "step": 621 }, { "epoch": 0.11204683629813106, "grad_norm": 1.2039434909820557, "learning_rate": 9.925073540278542e-05, "loss": 1.9346, "step": 622 }, { "epoch": 0.11222697590632741, "grad_norm": 1.2502390146255493, "learning_rate": 9.924829098411568e-05, "loss": 1.81, "step": 623 }, { "epoch": 0.11240711551452376, "grad_norm": 1.2662842273712158, "learning_rate": 9.924584261477203e-05, "loss": 1.9783, "step": 624 }, { "epoch": 0.11258725512272011, "grad_norm": 1.18838632106781, "learning_rate": 9.924339029495086e-05, "loss": 1.7056, "step": 625 }, { "epoch": 0.11276739473091646, "grad_norm": 1.202418565750122, "learning_rate": 9.924093402484891e-05, "loss": 1.9405, "step": 626 }, { "epoch": 0.11294753433911281, "grad_norm": 1.2927911281585693, "learning_rate": 9.923847380466323e-05, "loss": 1.7644, "step": 627 }, { "epoch": 0.11312767394730916, "grad_norm": 1.2021912336349487, "learning_rate": 9.923600963459115e-05, "loss": 1.7018, "step": 628 }, { "epoch": 0.11330781355550551, "grad_norm": 1.2121084928512573, "learning_rate": 9.923354151483035e-05, "loss": 2.0384, "step": 629 }, { "epoch": 0.11348795316370187, "grad_norm": 1.2217655181884766, "learning_rate": 9.923106944557884e-05, "loss": 1.9361, "step": 630 }, { "epoch": 0.11366809277189822, "grad_norm": 1.297149658203125, "learning_rate": 9.922859342703492e-05, "loss": 2.1438, "step": 631 }, { "epoch": 0.11384823238009457, "grad_norm": 1.2163119316101074, "learning_rate": 9.922611345939723e-05, "loss": 1.8925, "step": 632 }, { "epoch": 0.11402837198829092, "grad_norm": 1.2779686450958252, "learning_rate": 9.922362954286468e-05, "loss": 1.9334, "step": 633 }, { "epoch": 0.11420851159648727, "grad_norm": 1.3244131803512573, "learning_rate": 9.922114167763655e-05, "loss": 2.0773, "step": 634 }, { "epoch": 0.11438865120468363, "grad_norm": 1.2796010971069336, "learning_rate": 9.92186498639124e-05, "loss": 1.8922, "step": 635 }, { "epoch": 0.11456879081287998, "grad_norm": 1.3653994798660278, "learning_rate": 9.921615410189213e-05, "loss": 1.9424, "step": 636 }, { "epoch": 0.11474893042107634, "grad_norm": 1.3693561553955078, "learning_rate": 9.921365439177596e-05, "loss": 2.1637, "step": 637 }, { "epoch": 0.11492907002927269, "grad_norm": 1.5115869045257568, "learning_rate": 9.921115073376439e-05, "loss": 1.9723, "step": 638 }, { "epoch": 0.11510920963746904, "grad_norm": 1.4102026224136353, "learning_rate": 9.920864312805828e-05, "loss": 2.0927, "step": 639 }, { "epoch": 0.11528934924566539, "grad_norm": 1.5333706140518188, "learning_rate": 9.920613157485881e-05, "loss": 2.269, "step": 640 }, { "epoch": 0.11546948885386174, "grad_norm": 1.3293869495391846, "learning_rate": 9.920361607436742e-05, "loss": 1.9739, "step": 641 }, { "epoch": 0.11564962846205809, "grad_norm": 1.318129062652588, "learning_rate": 9.920109662678592e-05, "loss": 1.8895, "step": 642 }, { "epoch": 0.11582976807025444, "grad_norm": 1.3061244487762451, "learning_rate": 9.91985732323164e-05, "loss": 2.176, "step": 643 }, { "epoch": 0.1160099076784508, "grad_norm": 1.2987688779830933, "learning_rate": 9.919604589116132e-05, "loss": 2.0634, "step": 644 }, { "epoch": 0.11619004728664716, "grad_norm": 1.2750600576400757, "learning_rate": 9.919351460352338e-05, "loss": 1.9271, "step": 645 }, { "epoch": 0.1163701868948435, "grad_norm": 1.3042123317718506, "learning_rate": 9.919097936960567e-05, "loss": 1.8597, "step": 646 }, { "epoch": 0.11655032650303986, "grad_norm": 1.2460150718688965, "learning_rate": 9.918844018961155e-05, "loss": 1.8877, "step": 647 }, { "epoch": 0.1167304661112362, "grad_norm": 1.3470635414123535, "learning_rate": 9.918589706374472e-05, "loss": 2.1862, "step": 648 }, { "epoch": 0.11691060571943256, "grad_norm": 1.4191900491714478, "learning_rate": 9.918334999220917e-05, "loss": 1.7834, "step": 649 }, { "epoch": 0.1170907453276289, "grad_norm": 1.1623694896697998, "learning_rate": 9.918079897520927e-05, "loss": 1.8351, "step": 650 }, { "epoch": 0.11727088493582527, "grad_norm": 1.44710373878479, "learning_rate": 9.917824401294962e-05, "loss": 2.5077, "step": 651 }, { "epoch": 0.11745102454402162, "grad_norm": 1.5147384405136108, "learning_rate": 9.917568510563517e-05, "loss": 2.2504, "step": 652 }, { "epoch": 0.11763116415221797, "grad_norm": 1.3511815071105957, "learning_rate": 9.917312225347124e-05, "loss": 2.2882, "step": 653 }, { "epoch": 0.11781130376041432, "grad_norm": 1.301245093345642, "learning_rate": 9.917055545666339e-05, "loss": 2.2157, "step": 654 }, { "epoch": 0.11799144336861067, "grad_norm": 1.642171025276184, "learning_rate": 9.91679847154175e-05, "loss": 2.3195, "step": 655 }, { "epoch": 0.11817158297680702, "grad_norm": 1.4102668762207031, "learning_rate": 9.916541002993987e-05, "loss": 2.32, "step": 656 }, { "epoch": 0.11835172258500337, "grad_norm": 1.4176762104034424, "learning_rate": 9.916283140043696e-05, "loss": 2.4767, "step": 657 }, { "epoch": 0.11853186219319974, "grad_norm": 1.7280687093734741, "learning_rate": 9.916024882711567e-05, "loss": 2.3133, "step": 658 }, { "epoch": 0.11871200180139609, "grad_norm": 1.9185752868652344, "learning_rate": 9.915766231018318e-05, "loss": 2.5846, "step": 659 }, { "epoch": 0.11889214140959244, "grad_norm": 1.6732910871505737, "learning_rate": 9.915507184984695e-05, "loss": 2.8271, "step": 660 }, { "epoch": 0.11907228101778879, "grad_norm": 1.233290195465088, "learning_rate": 9.91524774463148e-05, "loss": 1.9402, "step": 661 }, { "epoch": 0.11925242062598514, "grad_norm": 1.248557209968567, "learning_rate": 9.914987909979485e-05, "loss": 1.8964, "step": 662 }, { "epoch": 0.11943256023418149, "grad_norm": 1.296522617340088, "learning_rate": 9.914727681049554e-05, "loss": 2.2381, "step": 663 }, { "epoch": 0.11961269984237784, "grad_norm": 1.274907112121582, "learning_rate": 9.914467057862563e-05, "loss": 2.1219, "step": 664 }, { "epoch": 0.1197928394505742, "grad_norm": 1.119384765625, "learning_rate": 9.914206040439418e-05, "loss": 1.8136, "step": 665 }, { "epoch": 0.11997297905877055, "grad_norm": 1.323623538017273, "learning_rate": 9.913944628801058e-05, "loss": 1.9493, "step": 666 }, { "epoch": 0.1201531186669669, "grad_norm": 1.13485848903656, "learning_rate": 9.913682822968454e-05, "loss": 1.9245, "step": 667 }, { "epoch": 0.12033325827516325, "grad_norm": 1.34122633934021, "learning_rate": 9.913420622962606e-05, "loss": 2.3114, "step": 668 }, { "epoch": 0.1205133978833596, "grad_norm": 1.2279052734375, "learning_rate": 9.913158028804548e-05, "loss": 1.9898, "step": 669 }, { "epoch": 0.12069353749155595, "grad_norm": 1.147538185119629, "learning_rate": 9.912895040515349e-05, "loss": 1.8738, "step": 670 }, { "epoch": 0.1208736770997523, "grad_norm": 1.3366378545761108, "learning_rate": 9.912631658116101e-05, "loss": 1.8985, "step": 671 }, { "epoch": 0.12105381670794867, "grad_norm": 1.2011362314224243, "learning_rate": 9.912367881627933e-05, "loss": 2.0348, "step": 672 }, { "epoch": 0.12123395631614502, "grad_norm": 1.2105849981307983, "learning_rate": 9.912103711072008e-05, "loss": 2.0519, "step": 673 }, { "epoch": 0.12141409592434137, "grad_norm": 1.245194435119629, "learning_rate": 9.911839146469515e-05, "loss": 1.9079, "step": 674 }, { "epoch": 0.12159423553253772, "grad_norm": 1.2677587270736694, "learning_rate": 9.91157418784168e-05, "loss": 2.1401, "step": 675 }, { "epoch": 0.12177437514073407, "grad_norm": 1.2442809343338013, "learning_rate": 9.911308835209755e-05, "loss": 1.7792, "step": 676 }, { "epoch": 0.12195451474893042, "grad_norm": 1.4197555780410767, "learning_rate": 9.911043088595027e-05, "loss": 1.9049, "step": 677 }, { "epoch": 0.12213465435712677, "grad_norm": 1.268538475036621, "learning_rate": 9.910776948018814e-05, "loss": 1.8835, "step": 678 }, { "epoch": 0.12231479396532313, "grad_norm": 1.2134947776794434, "learning_rate": 9.910510413502467e-05, "loss": 1.8627, "step": 679 }, { "epoch": 0.12249493357351948, "grad_norm": 1.3159133195877075, "learning_rate": 9.910243485067367e-05, "loss": 2.1532, "step": 680 }, { "epoch": 0.12267507318171583, "grad_norm": 1.3118300437927246, "learning_rate": 9.909976162734926e-05, "loss": 2.0355, "step": 681 }, { "epoch": 0.12285521278991218, "grad_norm": 1.230215311050415, "learning_rate": 9.90970844652659e-05, "loss": 1.8318, "step": 682 }, { "epoch": 0.12303535239810853, "grad_norm": 1.428345799446106, "learning_rate": 9.90944033646383e-05, "loss": 2.1659, "step": 683 }, { "epoch": 0.12321549200630488, "grad_norm": 1.2812304496765137, "learning_rate": 9.90917183256816e-05, "loss": 1.9663, "step": 684 }, { "epoch": 0.12339563161450123, "grad_norm": 1.4184229373931885, "learning_rate": 9.908902934861117e-05, "loss": 1.7655, "step": 685 }, { "epoch": 0.1235757712226976, "grad_norm": 1.2999763488769531, "learning_rate": 9.908633643364271e-05, "loss": 1.885, "step": 686 }, { "epoch": 0.12375591083089395, "grad_norm": 1.1869958639144897, "learning_rate": 9.908363958099225e-05, "loss": 1.866, "step": 687 }, { "epoch": 0.1239360504390903, "grad_norm": 1.1575794219970703, "learning_rate": 9.908093879087614e-05, "loss": 1.6819, "step": 688 }, { "epoch": 0.12411619004728665, "grad_norm": 1.4000781774520874, "learning_rate": 9.907823406351102e-05, "loss": 2.1885, "step": 689 }, { "epoch": 0.124296329655483, "grad_norm": 1.44357430934906, "learning_rate": 9.907552539911387e-05, "loss": 2.0515, "step": 690 }, { "epoch": 0.12447646926367935, "grad_norm": 1.3052406311035156, "learning_rate": 9.907281279790197e-05, "loss": 1.822, "step": 691 }, { "epoch": 0.1246566088718757, "grad_norm": 1.3275206089019775, "learning_rate": 9.907009626009294e-05, "loss": 2.0095, "step": 692 }, { "epoch": 0.12483674848007205, "grad_norm": 1.3867100477218628, "learning_rate": 9.906737578590468e-05, "loss": 2.0162, "step": 693 }, { "epoch": 0.1250168880882684, "grad_norm": 1.2267770767211914, "learning_rate": 9.906465137555543e-05, "loss": 1.8557, "step": 694 }, { "epoch": 0.12519702769646476, "grad_norm": 1.3638699054718018, "learning_rate": 9.906192302926376e-05, "loss": 2.0727, "step": 695 }, { "epoch": 0.1253771673046611, "grad_norm": 1.5085831880569458, "learning_rate": 9.905919074724852e-05, "loss": 1.7744, "step": 696 }, { "epoch": 0.12555730691285746, "grad_norm": 1.4602805376052856, "learning_rate": 9.905645452972889e-05, "loss": 2.3713, "step": 697 }, { "epoch": 0.12573744652105381, "grad_norm": 1.391291856765747, "learning_rate": 9.905371437692437e-05, "loss": 2.0513, "step": 698 }, { "epoch": 0.12591758612925016, "grad_norm": 1.3481264114379883, "learning_rate": 9.905097028905477e-05, "loss": 1.8174, "step": 699 }, { "epoch": 0.12609772573744651, "grad_norm": 1.2525372505187988, "learning_rate": 9.904822226634024e-05, "loss": 2.0188, "step": 700 }, { "epoch": 0.12627786534564286, "grad_norm": 1.3850762844085693, "learning_rate": 9.90454703090012e-05, "loss": 2.1517, "step": 701 }, { "epoch": 0.12645800495383921, "grad_norm": 1.3955851793289185, "learning_rate": 9.904271441725843e-05, "loss": 2.3126, "step": 702 }, { "epoch": 0.12663814456203557, "grad_norm": 1.3821715116500854, "learning_rate": 9.9039954591333e-05, "loss": 2.3915, "step": 703 }, { "epoch": 0.12681828417023194, "grad_norm": 1.4405173063278198, "learning_rate": 9.903719083144627e-05, "loss": 2.0821, "step": 704 }, { "epoch": 0.1269984237784283, "grad_norm": 1.4351264238357544, "learning_rate": 9.903442313782e-05, "loss": 2.5997, "step": 705 }, { "epoch": 0.12717856338662464, "grad_norm": 1.5886861085891724, "learning_rate": 9.90316515106762e-05, "loss": 2.6303, "step": 706 }, { "epoch": 0.127358702994821, "grad_norm": 1.8986220359802246, "learning_rate": 9.902887595023718e-05, "loss": 2.2789, "step": 707 }, { "epoch": 0.12753884260301734, "grad_norm": 1.804407000541687, "learning_rate": 9.902609645672562e-05, "loss": 2.6275, "step": 708 }, { "epoch": 0.1277189822112137, "grad_norm": 1.7212653160095215, "learning_rate": 9.902331303036448e-05, "loss": 2.4336, "step": 709 }, { "epoch": 0.12789912181941004, "grad_norm": 2.3265233039855957, "learning_rate": 9.902052567137706e-05, "loss": 2.7222, "step": 710 }, { "epoch": 0.1280792614276064, "grad_norm": 1.2806710004806519, "learning_rate": 9.901773437998692e-05, "loss": 1.8323, "step": 711 }, { "epoch": 0.12825940103580274, "grad_norm": 1.2179723978042603, "learning_rate": 9.901493915641802e-05, "loss": 2.0353, "step": 712 }, { "epoch": 0.1284395406439991, "grad_norm": 1.2533454895019531, "learning_rate": 9.901214000089456e-05, "loss": 1.9294, "step": 713 }, { "epoch": 0.12861968025219545, "grad_norm": 1.3815363645553589, "learning_rate": 9.900933691364112e-05, "loss": 2.107, "step": 714 }, { "epoch": 0.1287998198603918, "grad_norm": 1.126511573791504, "learning_rate": 9.900652989488255e-05, "loss": 1.7513, "step": 715 }, { "epoch": 0.12897995946858815, "grad_norm": 1.1627007722854614, "learning_rate": 9.900371894484402e-05, "loss": 1.6158, "step": 716 }, { "epoch": 0.1291600990767845, "grad_norm": 1.2627477645874023, "learning_rate": 9.9000904063751e-05, "loss": 1.9606, "step": 717 }, { "epoch": 0.12934023868498087, "grad_norm": 1.299331545829773, "learning_rate": 9.899808525182935e-05, "loss": 1.8576, "step": 718 }, { "epoch": 0.12952037829317722, "grad_norm": 1.1861523389816284, "learning_rate": 9.899526250930516e-05, "loss": 1.8284, "step": 719 }, { "epoch": 0.12970051790137357, "grad_norm": 1.1307034492492676, "learning_rate": 9.899243583640488e-05, "loss": 1.786, "step": 720 }, { "epoch": 0.12988065750956992, "grad_norm": 1.1576093435287476, "learning_rate": 9.898960523335524e-05, "loss": 1.7738, "step": 721 }, { "epoch": 0.13006079711776627, "grad_norm": 1.2430235147476196, "learning_rate": 9.898677070038334e-05, "loss": 1.8535, "step": 722 }, { "epoch": 0.13024093672596262, "grad_norm": 1.2052149772644043, "learning_rate": 9.898393223771656e-05, "loss": 1.624, "step": 723 }, { "epoch": 0.13042107633415898, "grad_norm": 1.35765540599823, "learning_rate": 9.898108984558261e-05, "loss": 1.9485, "step": 724 }, { "epoch": 0.13060121594235533, "grad_norm": 1.1707983016967773, "learning_rate": 9.897824352420946e-05, "loss": 1.8295, "step": 725 }, { "epoch": 0.13078135555055168, "grad_norm": 1.2256286144256592, "learning_rate": 9.89753932738255e-05, "loss": 1.8786, "step": 726 }, { "epoch": 0.13096149515874803, "grad_norm": 1.2813303470611572, "learning_rate": 9.897253909465934e-05, "loss": 2.0545, "step": 727 }, { "epoch": 0.13114163476694438, "grad_norm": 1.2567967176437378, "learning_rate": 9.896968098693993e-05, "loss": 2.0479, "step": 728 }, { "epoch": 0.13132177437514073, "grad_norm": 1.2023850679397583, "learning_rate": 9.896681895089657e-05, "loss": 1.6083, "step": 729 }, { "epoch": 0.13150191398333708, "grad_norm": 1.270164966583252, "learning_rate": 9.896395298675885e-05, "loss": 2.1973, "step": 730 }, { "epoch": 0.13168205359153343, "grad_norm": 1.2565652132034302, "learning_rate": 9.896108309475666e-05, "loss": 1.7755, "step": 731 }, { "epoch": 0.13186219319972978, "grad_norm": 1.235974907875061, "learning_rate": 9.895820927512025e-05, "loss": 1.8644, "step": 732 }, { "epoch": 0.13204233280792615, "grad_norm": 1.3333117961883545, "learning_rate": 9.895533152808013e-05, "loss": 2.1768, "step": 733 }, { "epoch": 0.1322224724161225, "grad_norm": 1.1892369985580444, "learning_rate": 9.895244985386715e-05, "loss": 1.8334, "step": 734 }, { "epoch": 0.13240261202431886, "grad_norm": 1.1675918102264404, "learning_rate": 9.894956425271251e-05, "loss": 1.8035, "step": 735 }, { "epoch": 0.1325827516325152, "grad_norm": 1.21050226688385, "learning_rate": 9.894667472484765e-05, "loss": 1.8119, "step": 736 }, { "epoch": 0.13276289124071156, "grad_norm": 1.3085192441940308, "learning_rate": 9.894378127050439e-05, "loss": 2.0245, "step": 737 }, { "epoch": 0.1329430308489079, "grad_norm": 1.241247296333313, "learning_rate": 9.894088388991483e-05, "loss": 1.7271, "step": 738 }, { "epoch": 0.13312317045710426, "grad_norm": 1.2851693630218506, "learning_rate": 9.893798258331142e-05, "loss": 2.0219, "step": 739 }, { "epoch": 0.1333033100653006, "grad_norm": 1.2805635929107666, "learning_rate": 9.893507735092687e-05, "loss": 1.9879, "step": 740 }, { "epoch": 0.13348344967349696, "grad_norm": 1.2987934350967407, "learning_rate": 9.893216819299425e-05, "loss": 2.0181, "step": 741 }, { "epoch": 0.1336635892816933, "grad_norm": 1.425493597984314, "learning_rate": 9.892925510974693e-05, "loss": 1.9566, "step": 742 }, { "epoch": 0.13384372888988966, "grad_norm": 1.3339426517486572, "learning_rate": 9.89263381014186e-05, "loss": 1.8115, "step": 743 }, { "epoch": 0.134023868498086, "grad_norm": 1.229731559753418, "learning_rate": 9.892341716824327e-05, "loss": 1.7261, "step": 744 }, { "epoch": 0.13420400810628236, "grad_norm": 1.5051815509796143, "learning_rate": 9.892049231045524e-05, "loss": 2.0755, "step": 745 }, { "epoch": 0.1343841477144787, "grad_norm": 1.2894476652145386, "learning_rate": 9.891756352828914e-05, "loss": 1.954, "step": 746 }, { "epoch": 0.13456428732267509, "grad_norm": 1.5340901613235474, "learning_rate": 9.891463082197992e-05, "loss": 1.9708, "step": 747 }, { "epoch": 0.13474442693087144, "grad_norm": 1.2935574054718018, "learning_rate": 9.891169419176283e-05, "loss": 1.7517, "step": 748 }, { "epoch": 0.1349245665390678, "grad_norm": 1.2900742292404175, "learning_rate": 9.890875363787347e-05, "loss": 1.7476, "step": 749 }, { "epoch": 0.13510470614726414, "grad_norm": 1.5172815322875977, "learning_rate": 9.890580916054772e-05, "loss": 1.905, "step": 750 }, { "epoch": 0.1352848457554605, "grad_norm": 1.405549168586731, "learning_rate": 9.890286076002178e-05, "loss": 2.2225, "step": 751 }, { "epoch": 0.13546498536365684, "grad_norm": 1.4486700296401978, "learning_rate": 9.889990843653216e-05, "loss": 2.3053, "step": 752 }, { "epoch": 0.1356451249718532, "grad_norm": 1.4098986387252808, "learning_rate": 9.88969521903157e-05, "loss": 2.1699, "step": 753 }, { "epoch": 0.13582526458004954, "grad_norm": 1.4319884777069092, "learning_rate": 9.889399202160956e-05, "loss": 2.6816, "step": 754 }, { "epoch": 0.1360054041882459, "grad_norm": 1.3512017726898193, "learning_rate": 9.889102793065119e-05, "loss": 2.3135, "step": 755 }, { "epoch": 0.13618554379644224, "grad_norm": 1.548256278038025, "learning_rate": 9.888805991767838e-05, "loss": 2.1343, "step": 756 }, { "epoch": 0.1363656834046386, "grad_norm": 1.5729053020477295, "learning_rate": 9.888508798292921e-05, "loss": 2.3763, "step": 757 }, { "epoch": 0.13654582301283494, "grad_norm": 1.7666906118392944, "learning_rate": 9.888211212664211e-05, "loss": 2.0297, "step": 758 }, { "epoch": 0.1367259626210313, "grad_norm": 2.469851493835449, "learning_rate": 9.887913234905577e-05, "loss": 2.4429, "step": 759 }, { "epoch": 0.13690610222922764, "grad_norm": 1.450822353363037, "learning_rate": 9.887614865040924e-05, "loss": 2.1066, "step": 760 }, { "epoch": 0.13708624183742402, "grad_norm": 1.3207193613052368, "learning_rate": 9.887316103094188e-05, "loss": 1.7213, "step": 761 }, { "epoch": 0.13726638144562037, "grad_norm": 1.3421610593795776, "learning_rate": 9.887016949089333e-05, "loss": 1.8533, "step": 762 }, { "epoch": 0.13744652105381672, "grad_norm": 1.3048672676086426, "learning_rate": 9.88671740305036e-05, "loss": 1.7998, "step": 763 }, { "epoch": 0.13762666066201307, "grad_norm": 1.3105459213256836, "learning_rate": 9.886417465001299e-05, "loss": 1.6609, "step": 764 }, { "epoch": 0.13780680027020942, "grad_norm": 1.2985025644302368, "learning_rate": 9.886117134966207e-05, "loss": 1.628, "step": 765 }, { "epoch": 0.13798693987840577, "grad_norm": 1.3812878131866455, "learning_rate": 9.885816412969178e-05, "loss": 1.8243, "step": 766 }, { "epoch": 0.13816707948660212, "grad_norm": 1.3445358276367188, "learning_rate": 9.885515299034336e-05, "loss": 1.9206, "step": 767 }, { "epoch": 0.13834721909479847, "grad_norm": 1.2129161357879639, "learning_rate": 9.885213793185837e-05, "loss": 1.8118, "step": 768 }, { "epoch": 0.13852735870299482, "grad_norm": 1.299033761024475, "learning_rate": 9.884911895447867e-05, "loss": 1.8389, "step": 769 }, { "epoch": 0.13870749831119117, "grad_norm": 1.2037889957427979, "learning_rate": 9.884609605844644e-05, "loss": 1.6472, "step": 770 }, { "epoch": 0.13888763791938752, "grad_norm": 1.268749713897705, "learning_rate": 9.884306924400418e-05, "loss": 1.923, "step": 771 }, { "epoch": 0.13906777752758387, "grad_norm": 1.2923638820648193, "learning_rate": 9.88400385113947e-05, "loss": 1.9507, "step": 772 }, { "epoch": 0.13924791713578022, "grad_norm": 1.2170230150222778, "learning_rate": 9.883700386086111e-05, "loss": 2.1415, "step": 773 }, { "epoch": 0.13942805674397657, "grad_norm": 1.2914255857467651, "learning_rate": 9.883396529264686e-05, "loss": 1.8496, "step": 774 }, { "epoch": 0.13960819635217295, "grad_norm": 1.294809103012085, "learning_rate": 9.883092280699571e-05, "loss": 2.0246, "step": 775 }, { "epoch": 0.1397883359603693, "grad_norm": 1.315855622291565, "learning_rate": 9.882787640415169e-05, "loss": 1.7098, "step": 776 }, { "epoch": 0.13996847556856565, "grad_norm": 1.3406991958618164, "learning_rate": 9.882482608435923e-05, "loss": 1.9858, "step": 777 }, { "epoch": 0.140148615176762, "grad_norm": 1.2104747295379639, "learning_rate": 9.882177184786302e-05, "loss": 1.9567, "step": 778 }, { "epoch": 0.14032875478495835, "grad_norm": 1.2148669958114624, "learning_rate": 9.881871369490802e-05, "loss": 2.0177, "step": 779 }, { "epoch": 0.1405088943931547, "grad_norm": 1.4571545124053955, "learning_rate": 9.881565162573961e-05, "loss": 1.8855, "step": 780 }, { "epoch": 0.14068903400135105, "grad_norm": 1.2827662229537964, "learning_rate": 9.881258564060338e-05, "loss": 1.7891, "step": 781 }, { "epoch": 0.1408691736095474, "grad_norm": 1.3293724060058594, "learning_rate": 9.880951573974533e-05, "loss": 2.0578, "step": 782 }, { "epoch": 0.14104931321774375, "grad_norm": 1.3720998764038086, "learning_rate": 9.880644192341169e-05, "loss": 2.0085, "step": 783 }, { "epoch": 0.1412294528259401, "grad_norm": 1.2281889915466309, "learning_rate": 9.880336419184905e-05, "loss": 1.8419, "step": 784 }, { "epoch": 0.14140959243413645, "grad_norm": 1.2522205114364624, "learning_rate": 9.88002825453043e-05, "loss": 2.0256, "step": 785 }, { "epoch": 0.1415897320423328, "grad_norm": 1.2879754304885864, "learning_rate": 9.879719698402467e-05, "loss": 1.8484, "step": 786 }, { "epoch": 0.14176987165052915, "grad_norm": 1.3683329820632935, "learning_rate": 9.879410750825766e-05, "loss": 2.2531, "step": 787 }, { "epoch": 0.1419500112587255, "grad_norm": 1.1800748109817505, "learning_rate": 9.879101411825111e-05, "loss": 1.6993, "step": 788 }, { "epoch": 0.14213015086692188, "grad_norm": 1.282247543334961, "learning_rate": 9.878791681425317e-05, "loss": 1.7951, "step": 789 }, { "epoch": 0.14231029047511823, "grad_norm": 1.316503882408142, "learning_rate": 9.87848155965123e-05, "loss": 1.7466, "step": 790 }, { "epoch": 0.14249043008331458, "grad_norm": 1.4377599954605103, "learning_rate": 9.87817104652773e-05, "loss": 2.1601, "step": 791 }, { "epoch": 0.14267056969151093, "grad_norm": 1.289066195487976, "learning_rate": 9.877860142079725e-05, "loss": 2.1348, "step": 792 }, { "epoch": 0.14285070929970728, "grad_norm": 1.2391852140426636, "learning_rate": 9.877548846332154e-05, "loss": 1.8587, "step": 793 }, { "epoch": 0.14303084890790363, "grad_norm": 1.2917535305023193, "learning_rate": 9.877237159309991e-05, "loss": 1.9722, "step": 794 }, { "epoch": 0.14321098851609998, "grad_norm": 1.6067614555358887, "learning_rate": 9.876925081038238e-05, "loss": 2.2133, "step": 795 }, { "epoch": 0.14339112812429633, "grad_norm": 1.5059887170791626, "learning_rate": 9.87661261154193e-05, "loss": 1.9933, "step": 796 }, { "epoch": 0.14357126773249268, "grad_norm": 1.3656240701675415, "learning_rate": 9.876299750846136e-05, "loss": 1.7474, "step": 797 }, { "epoch": 0.14375140734068903, "grad_norm": 1.4455832242965698, "learning_rate": 9.87598649897595e-05, "loss": 1.7519, "step": 798 }, { "epoch": 0.14393154694888538, "grad_norm": 1.288589596748352, "learning_rate": 9.875672855956501e-05, "loss": 1.7428, "step": 799 }, { "epoch": 0.14411168655708173, "grad_norm": 1.4480016231536865, "learning_rate": 9.875358821812953e-05, "loss": 1.7862, "step": 800 }, { "epoch": 0.14429182616527808, "grad_norm": 1.372703194618225, "learning_rate": 9.875044396570493e-05, "loss": 2.5034, "step": 801 }, { "epoch": 0.14447196577347443, "grad_norm": 1.4766262769699097, "learning_rate": 9.874729580254345e-05, "loss": 2.696, "step": 802 }, { "epoch": 0.14465210538167078, "grad_norm": 1.2882176637649536, "learning_rate": 9.874414372889767e-05, "loss": 2.3181, "step": 803 }, { "epoch": 0.14483224498986716, "grad_norm": 1.3298519849777222, "learning_rate": 9.87409877450204e-05, "loss": 2.3199, "step": 804 }, { "epoch": 0.1450123845980635, "grad_norm": 1.3550612926483154, "learning_rate": 9.873782785116484e-05, "loss": 2.4582, "step": 805 }, { "epoch": 0.14519252420625986, "grad_norm": 1.4804792404174805, "learning_rate": 9.873466404758446e-05, "loss": 2.2796, "step": 806 }, { "epoch": 0.1453726638144562, "grad_norm": 1.3525737524032593, "learning_rate": 9.873149633453308e-05, "loss": 2.0421, "step": 807 }, { "epoch": 0.14555280342265256, "grad_norm": 1.615544319152832, "learning_rate": 9.872832471226482e-05, "loss": 2.4053, "step": 808 }, { "epoch": 0.1457329430308489, "grad_norm": 1.6876574754714966, "learning_rate": 9.872514918103406e-05, "loss": 2.3352, "step": 809 }, { "epoch": 0.14591308263904526, "grad_norm": 2.0150530338287354, "learning_rate": 9.872196974109558e-05, "loss": 2.4234, "step": 810 }, { "epoch": 0.1460932222472416, "grad_norm": 1.2573726177215576, "learning_rate": 9.871878639270441e-05, "loss": 1.7109, "step": 811 }, { "epoch": 0.14627336185543796, "grad_norm": 1.3346951007843018, "learning_rate": 9.871559913611592e-05, "loss": 1.9011, "step": 812 }, { "epoch": 0.1464535014636343, "grad_norm": 1.3603603839874268, "learning_rate": 9.871240797158581e-05, "loss": 2.0636, "step": 813 }, { "epoch": 0.14663364107183066, "grad_norm": 1.3373414278030396, "learning_rate": 9.870921289937006e-05, "loss": 1.9809, "step": 814 }, { "epoch": 0.146813780680027, "grad_norm": 1.360762357711792, "learning_rate": 9.870601391972496e-05, "loss": 2.0151, "step": 815 }, { "epoch": 0.14699392028822336, "grad_norm": 1.4189239740371704, "learning_rate": 9.870281103290717e-05, "loss": 1.9082, "step": 816 }, { "epoch": 0.1471740598964197, "grad_norm": 1.1659568548202515, "learning_rate": 9.869960423917357e-05, "loss": 1.7785, "step": 817 }, { "epoch": 0.1473541995046161, "grad_norm": 1.305368423461914, "learning_rate": 9.869639353878146e-05, "loss": 1.9978, "step": 818 }, { "epoch": 0.14753433911281244, "grad_norm": 1.2636337280273438, "learning_rate": 9.869317893198838e-05, "loss": 1.7232, "step": 819 }, { "epoch": 0.1477144787210088, "grad_norm": 1.2035678625106812, "learning_rate": 9.86899604190522e-05, "loss": 1.7567, "step": 820 }, { "epoch": 0.14789461832920514, "grad_norm": 1.2082552909851074, "learning_rate": 9.868673800023111e-05, "loss": 2.0055, "step": 821 }, { "epoch": 0.1480747579374015, "grad_norm": 1.3273311853408813, "learning_rate": 9.868351167578363e-05, "loss": 1.9704, "step": 822 }, { "epoch": 0.14825489754559784, "grad_norm": 1.3554540872573853, "learning_rate": 9.868028144596852e-05, "loss": 2.0078, "step": 823 }, { "epoch": 0.1484350371537942, "grad_norm": 1.3239822387695312, "learning_rate": 9.867704731104496e-05, "loss": 1.8729, "step": 824 }, { "epoch": 0.14861517676199054, "grad_norm": 1.246198296546936, "learning_rate": 9.867380927127239e-05, "loss": 1.8706, "step": 825 }, { "epoch": 0.1487953163701869, "grad_norm": 1.2795337438583374, "learning_rate": 9.867056732691054e-05, "loss": 2.0535, "step": 826 }, { "epoch": 0.14897545597838324, "grad_norm": 1.2475967407226562, "learning_rate": 9.866732147821947e-05, "loss": 1.7275, "step": 827 }, { "epoch": 0.1491555955865796, "grad_norm": 1.3331667184829712, "learning_rate": 9.86640717254596e-05, "loss": 1.8053, "step": 828 }, { "epoch": 0.14933573519477594, "grad_norm": 1.2305011749267578, "learning_rate": 9.866081806889158e-05, "loss": 1.7696, "step": 829 }, { "epoch": 0.1495158748029723, "grad_norm": 1.368841528892517, "learning_rate": 9.865756050877645e-05, "loss": 2.1196, "step": 830 }, { "epoch": 0.14969601441116864, "grad_norm": 1.2340091466903687, "learning_rate": 9.86542990453755e-05, "loss": 1.7208, "step": 831 }, { "epoch": 0.14987615401936502, "grad_norm": 1.3266440629959106, "learning_rate": 9.865103367895039e-05, "loss": 2.0684, "step": 832 }, { "epoch": 0.15005629362756137, "grad_norm": 1.3821064233779907, "learning_rate": 9.864776440976306e-05, "loss": 2.1309, "step": 833 }, { "epoch": 0.15023643323575772, "grad_norm": 1.1727502346038818, "learning_rate": 9.864449123807576e-05, "loss": 1.6972, "step": 834 }, { "epoch": 0.15041657284395407, "grad_norm": 1.2775639295578003, "learning_rate": 9.864121416415106e-05, "loss": 1.8604, "step": 835 }, { "epoch": 0.15059671245215042, "grad_norm": 1.3789913654327393, "learning_rate": 9.863793318825186e-05, "loss": 1.9378, "step": 836 }, { "epoch": 0.15077685206034677, "grad_norm": 1.3935092687606812, "learning_rate": 9.863464831064135e-05, "loss": 1.8925, "step": 837 }, { "epoch": 0.15095699166854312, "grad_norm": 1.3984404802322388, "learning_rate": 9.863135953158305e-05, "loss": 1.9013, "step": 838 }, { "epoch": 0.15113713127673947, "grad_norm": 1.3366034030914307, "learning_rate": 9.862806685134077e-05, "loss": 2.0315, "step": 839 }, { "epoch": 0.15131727088493582, "grad_norm": 1.4392826557159424, "learning_rate": 9.862477027017864e-05, "loss": 2.0174, "step": 840 }, { "epoch": 0.15149741049313217, "grad_norm": 1.4083927869796753, "learning_rate": 9.862146978836113e-05, "loss": 2.0956, "step": 841 }, { "epoch": 0.15167755010132852, "grad_norm": 1.296589970588684, "learning_rate": 9.861816540615299e-05, "loss": 1.9268, "step": 842 }, { "epoch": 0.15185768970952487, "grad_norm": 1.2528111934661865, "learning_rate": 9.861485712381931e-05, "loss": 1.923, "step": 843 }, { "epoch": 0.15203782931772122, "grad_norm": 1.3625739812850952, "learning_rate": 9.861154494162548e-05, "loss": 2.1309, "step": 844 }, { "epoch": 0.15221796892591757, "grad_norm": 1.4396804571151733, "learning_rate": 9.860822885983718e-05, "loss": 1.8609, "step": 845 }, { "epoch": 0.15239810853411395, "grad_norm": 1.4618682861328125, "learning_rate": 9.860490887872045e-05, "loss": 2.0644, "step": 846 }, { "epoch": 0.1525782481423103, "grad_norm": 1.2852728366851807, "learning_rate": 9.860158499854159e-05, "loss": 1.8212, "step": 847 }, { "epoch": 0.15275838775050665, "grad_norm": 1.3814423084259033, "learning_rate": 9.859825721956725e-05, "loss": 1.9965, "step": 848 }, { "epoch": 0.152938527358703, "grad_norm": 1.4789983034133911, "learning_rate": 9.85949255420644e-05, "loss": 2.0927, "step": 849 }, { "epoch": 0.15311866696689935, "grad_norm": 1.369570255279541, "learning_rate": 9.859158996630029e-05, "loss": 1.9551, "step": 850 }, { "epoch": 0.1532988065750957, "grad_norm": 1.4670217037200928, "learning_rate": 9.85882504925425e-05, "loss": 2.3491, "step": 851 }, { "epoch": 0.15347894618329205, "grad_norm": 1.451915979385376, "learning_rate": 9.858490712105892e-05, "loss": 2.3814, "step": 852 }, { "epoch": 0.1536590857914884, "grad_norm": 1.3465337753295898, "learning_rate": 9.858155985211776e-05, "loss": 2.3684, "step": 853 }, { "epoch": 0.15383922539968475, "grad_norm": 1.309025526046753, "learning_rate": 9.857820868598753e-05, "loss": 2.166, "step": 854 }, { "epoch": 0.1540193650078811, "grad_norm": 1.430438756942749, "learning_rate": 9.857485362293707e-05, "loss": 2.3894, "step": 855 }, { "epoch": 0.15419950461607745, "grad_norm": 1.4471404552459717, "learning_rate": 9.857149466323549e-05, "loss": 2.2494, "step": 856 }, { "epoch": 0.1543796442242738, "grad_norm": 1.853070855140686, "learning_rate": 9.856813180715229e-05, "loss": 2.4151, "step": 857 }, { "epoch": 0.15455978383247015, "grad_norm": 1.7586486339569092, "learning_rate": 9.856476505495721e-05, "loss": 2.3734, "step": 858 }, { "epoch": 0.1547399234406665, "grad_norm": 5.4407501220703125, "learning_rate": 9.856139440692031e-05, "loss": 2.6805, "step": 859 }, { "epoch": 0.15492006304886285, "grad_norm": 1.71763014793396, "learning_rate": 9.855801986331203e-05, "loss": 2.3634, "step": 860 }, { "epoch": 0.15510020265705923, "grad_norm": 1.3074826002120972, "learning_rate": 9.855464142440303e-05, "loss": 2.038, "step": 861 }, { "epoch": 0.15528034226525558, "grad_norm": 1.149901032447815, "learning_rate": 9.855125909046436e-05, "loss": 1.601, "step": 862 }, { "epoch": 0.15546048187345193, "grad_norm": 1.2345138788223267, "learning_rate": 9.854787286176732e-05, "loss": 1.832, "step": 863 }, { "epoch": 0.15564062148164828, "grad_norm": 1.199136734008789, "learning_rate": 9.854448273858359e-05, "loss": 1.7501, "step": 864 }, { "epoch": 0.15582076108984463, "grad_norm": 1.298190951347351, "learning_rate": 9.854108872118508e-05, "loss": 1.8705, "step": 865 }, { "epoch": 0.15600090069804098, "grad_norm": 1.2769882678985596, "learning_rate": 9.853769080984407e-05, "loss": 1.9791, "step": 866 }, { "epoch": 0.15618104030623733, "grad_norm": 1.2338829040527344, "learning_rate": 9.853428900483316e-05, "loss": 1.9113, "step": 867 }, { "epoch": 0.15636117991443368, "grad_norm": 1.3071916103363037, "learning_rate": 9.853088330642522e-05, "loss": 1.719, "step": 868 }, { "epoch": 0.15654131952263003, "grad_norm": 1.2592660188674927, "learning_rate": 9.852747371489346e-05, "loss": 1.7682, "step": 869 }, { "epoch": 0.15672145913082638, "grad_norm": 1.2901333570480347, "learning_rate": 9.85240602305114e-05, "loss": 2.0187, "step": 870 }, { "epoch": 0.15690159873902274, "grad_norm": 1.2896544933319092, "learning_rate": 9.852064285355285e-05, "loss": 1.8367, "step": 871 }, { "epoch": 0.15708173834721909, "grad_norm": 1.3103440999984741, "learning_rate": 9.851722158429198e-05, "loss": 1.879, "step": 872 }, { "epoch": 0.15726187795541544, "grad_norm": 1.1968687772750854, "learning_rate": 9.851379642300323e-05, "loss": 1.9501, "step": 873 }, { "epoch": 0.15744201756361179, "grad_norm": 1.1210064888000488, "learning_rate": 9.851036736996133e-05, "loss": 1.5667, "step": 874 }, { "epoch": 0.15762215717180816, "grad_norm": 1.279301404953003, "learning_rate": 9.850693442544141e-05, "loss": 2.0346, "step": 875 }, { "epoch": 0.15780229678000451, "grad_norm": 1.2971493005752563, "learning_rate": 9.850349758971883e-05, "loss": 1.7209, "step": 876 }, { "epoch": 0.15798243638820086, "grad_norm": 1.4926193952560425, "learning_rate": 9.850005686306932e-05, "loss": 2.3681, "step": 877 }, { "epoch": 0.15816257599639721, "grad_norm": 1.486655831336975, "learning_rate": 9.849661224576885e-05, "loss": 2.2133, "step": 878 }, { "epoch": 0.15834271560459356, "grad_norm": 1.5211142301559448, "learning_rate": 9.849316373809378e-05, "loss": 2.1244, "step": 879 }, { "epoch": 0.15852285521278991, "grad_norm": 1.3286242485046387, "learning_rate": 9.848971134032072e-05, "loss": 2.1348, "step": 880 }, { "epoch": 0.15870299482098627, "grad_norm": 1.3326750993728638, "learning_rate": 9.848625505272665e-05, "loss": 2.2662, "step": 881 }, { "epoch": 0.15888313442918262, "grad_norm": 1.3165205717086792, "learning_rate": 9.848279487558881e-05, "loss": 1.9833, "step": 882 }, { "epoch": 0.15906327403737897, "grad_norm": 1.3390347957611084, "learning_rate": 9.847933080918479e-05, "loss": 1.8351, "step": 883 }, { "epoch": 0.15924341364557532, "grad_norm": 1.303579568862915, "learning_rate": 9.847586285379244e-05, "loss": 1.9421, "step": 884 }, { "epoch": 0.15942355325377167, "grad_norm": 1.2818201780319214, "learning_rate": 9.847239100969001e-05, "loss": 1.9231, "step": 885 }, { "epoch": 0.15960369286196802, "grad_norm": 1.3924510478973389, "learning_rate": 9.846891527715597e-05, "loss": 2.1783, "step": 886 }, { "epoch": 0.15978383247016437, "grad_norm": 1.353502869606018, "learning_rate": 9.846543565646915e-05, "loss": 2.0035, "step": 887 }, { "epoch": 0.15996397207836072, "grad_norm": 1.3508769273757935, "learning_rate": 9.84619521479087e-05, "loss": 2.0585, "step": 888 }, { "epoch": 0.1601441116865571, "grad_norm": 1.3455395698547363, "learning_rate": 9.845846475175406e-05, "loss": 1.915, "step": 889 }, { "epoch": 0.16032425129475344, "grad_norm": 1.3258179426193237, "learning_rate": 9.845497346828496e-05, "loss": 1.9067, "step": 890 }, { "epoch": 0.1605043909029498, "grad_norm": 1.2612024545669556, "learning_rate": 9.84514782977815e-05, "loss": 1.8582, "step": 891 }, { "epoch": 0.16068453051114615, "grad_norm": 1.455338478088379, "learning_rate": 9.844797924052406e-05, "loss": 2.2347, "step": 892 }, { "epoch": 0.1608646701193425, "grad_norm": 1.5008792877197266, "learning_rate": 9.844447629679331e-05, "loss": 2.1904, "step": 893 }, { "epoch": 0.16104480972753885, "grad_norm": 1.4137682914733887, "learning_rate": 9.844096946687028e-05, "loss": 1.9771, "step": 894 }, { "epoch": 0.1612249493357352, "grad_norm": 1.2500028610229492, "learning_rate": 9.843745875103627e-05, "loss": 2.134, "step": 895 }, { "epoch": 0.16140508894393155, "grad_norm": 1.375946283340454, "learning_rate": 9.843394414957289e-05, "loss": 1.8825, "step": 896 }, { "epoch": 0.1615852285521279, "grad_norm": 1.4914226531982422, "learning_rate": 9.843042566276214e-05, "loss": 2.0495, "step": 897 }, { "epoch": 0.16176536816032425, "grad_norm": 1.3528846502304077, "learning_rate": 9.842690329088622e-05, "loss": 1.857, "step": 898 }, { "epoch": 0.1619455077685206, "grad_norm": 1.4280825853347778, "learning_rate": 9.84233770342277e-05, "loss": 1.7073, "step": 899 }, { "epoch": 0.16212564737671695, "grad_norm": 1.6007810831069946, "learning_rate": 9.841984689306945e-05, "loss": 1.743, "step": 900 }, { "epoch": 0.1623057869849133, "grad_norm": 1.26369309425354, "learning_rate": 9.841631286769468e-05, "loss": 2.4067, "step": 901 }, { "epoch": 0.16248592659310965, "grad_norm": 1.3648098707199097, "learning_rate": 9.841277495838688e-05, "loss": 2.3882, "step": 902 }, { "epoch": 0.16266606620130603, "grad_norm": 1.3654829263687134, "learning_rate": 9.840923316542983e-05, "loss": 2.2245, "step": 903 }, { "epoch": 0.16284620580950238, "grad_norm": 1.4620901346206665, "learning_rate": 9.84056874891077e-05, "loss": 2.1419, "step": 904 }, { "epoch": 0.16302634541769873, "grad_norm": 1.4209767580032349, "learning_rate": 9.840213792970489e-05, "loss": 2.4339, "step": 905 }, { "epoch": 0.16320648502589508, "grad_norm": 1.3212268352508545, "learning_rate": 9.839858448750614e-05, "loss": 2.2154, "step": 906 }, { "epoch": 0.16338662463409143, "grad_norm": 1.3348406553268433, "learning_rate": 9.839502716279653e-05, "loss": 2.0729, "step": 907 }, { "epoch": 0.16356676424228778, "grad_norm": 1.3997691869735718, "learning_rate": 9.83914659558614e-05, "loss": 2.1104, "step": 908 }, { "epoch": 0.16374690385048413, "grad_norm": 1.744264006614685, "learning_rate": 9.838790086698646e-05, "loss": 2.2759, "step": 909 }, { "epoch": 0.16392704345868048, "grad_norm": 2.3655107021331787, "learning_rate": 9.838433189645768e-05, "loss": 2.2623, "step": 910 }, { "epoch": 0.16410718306687683, "grad_norm": 1.341048240661621, "learning_rate": 9.838075904456134e-05, "loss": 2.057, "step": 911 }, { "epoch": 0.16428732267507318, "grad_norm": 1.2010446786880493, "learning_rate": 9.83771823115841e-05, "loss": 1.8994, "step": 912 }, { "epoch": 0.16446746228326953, "grad_norm": 1.3203390836715698, "learning_rate": 9.837360169781285e-05, "loss": 1.8322, "step": 913 }, { "epoch": 0.16464760189146588, "grad_norm": 1.4009171724319458, "learning_rate": 9.837001720353483e-05, "loss": 2.0131, "step": 914 }, { "epoch": 0.16482774149966223, "grad_norm": 1.2876688241958618, "learning_rate": 9.836642882903759e-05, "loss": 1.807, "step": 915 }, { "epoch": 0.16500788110785858, "grad_norm": 1.2223199605941772, "learning_rate": 9.836283657460899e-05, "loss": 1.7567, "step": 916 }, { "epoch": 0.16518802071605496, "grad_norm": 1.208533525466919, "learning_rate": 9.835924044053719e-05, "loss": 1.8141, "step": 917 }, { "epoch": 0.1653681603242513, "grad_norm": 1.410413384437561, "learning_rate": 9.835564042711068e-05, "loss": 2.05, "step": 918 }, { "epoch": 0.16554829993244766, "grad_norm": 1.1603425741195679, "learning_rate": 9.835203653461824e-05, "loss": 1.6845, "step": 919 }, { "epoch": 0.165728439540644, "grad_norm": 1.264150857925415, "learning_rate": 9.8348428763349e-05, "loss": 2.1106, "step": 920 }, { "epoch": 0.16590857914884036, "grad_norm": 1.314612865447998, "learning_rate": 9.834481711359234e-05, "loss": 1.8999, "step": 921 }, { "epoch": 0.1660887187570367, "grad_norm": 1.2794193029403687, "learning_rate": 9.8341201585638e-05, "loss": 1.7304, "step": 922 }, { "epoch": 0.16626885836523306, "grad_norm": 1.303610920906067, "learning_rate": 9.833758217977603e-05, "loss": 1.8447, "step": 923 }, { "epoch": 0.1664489979734294, "grad_norm": 1.4079903364181519, "learning_rate": 9.833395889629675e-05, "loss": 1.9543, "step": 924 }, { "epoch": 0.16662913758162576, "grad_norm": 1.2935470342636108, "learning_rate": 9.833033173549083e-05, "loss": 1.7859, "step": 925 }, { "epoch": 0.1668092771898221, "grad_norm": 1.25351083278656, "learning_rate": 9.832670069764924e-05, "loss": 2.0095, "step": 926 }, { "epoch": 0.16698941679801846, "grad_norm": 1.3944593667984009, "learning_rate": 9.832306578306326e-05, "loss": 2.0233, "step": 927 }, { "epoch": 0.1671695564062148, "grad_norm": 1.2526030540466309, "learning_rate": 9.831942699202448e-05, "loss": 1.677, "step": 928 }, { "epoch": 0.16734969601441116, "grad_norm": 1.5346907377243042, "learning_rate": 9.83157843248248e-05, "loss": 1.9569, "step": 929 }, { "epoch": 0.1675298356226075, "grad_norm": 1.342502236366272, "learning_rate": 9.831213778175644e-05, "loss": 2.0435, "step": 930 }, { "epoch": 0.16770997523080386, "grad_norm": 1.2255526781082153, "learning_rate": 9.830848736311193e-05, "loss": 1.7563, "step": 931 }, { "epoch": 0.16789011483900024, "grad_norm": 1.268193006515503, "learning_rate": 9.830483306918407e-05, "loss": 1.54, "step": 932 }, { "epoch": 0.1680702544471966, "grad_norm": 1.3726140260696411, "learning_rate": 9.830117490026604e-05, "loss": 1.7818, "step": 933 }, { "epoch": 0.16825039405539294, "grad_norm": 1.3046773672103882, "learning_rate": 9.829751285665128e-05, "loss": 2.1845, "step": 934 }, { "epoch": 0.1684305336635893, "grad_norm": 1.2928993701934814, "learning_rate": 9.829384693863357e-05, "loss": 1.8105, "step": 935 }, { "epoch": 0.16861067327178564, "grad_norm": 1.4520810842514038, "learning_rate": 9.829017714650699e-05, "loss": 2.1978, "step": 936 }, { "epoch": 0.168790812879982, "grad_norm": 1.5034133195877075, "learning_rate": 9.828650348056591e-05, "loss": 2.1587, "step": 937 }, { "epoch": 0.16897095248817834, "grad_norm": 1.3858444690704346, "learning_rate": 9.828282594110505e-05, "loss": 1.9391, "step": 938 }, { "epoch": 0.1691510920963747, "grad_norm": 1.4061371088027954, "learning_rate": 9.82791445284194e-05, "loss": 1.8593, "step": 939 }, { "epoch": 0.16933123170457104, "grad_norm": 1.2276051044464111, "learning_rate": 9.82754592428043e-05, "loss": 1.6239, "step": 940 }, { "epoch": 0.1695113713127674, "grad_norm": 1.2832549810409546, "learning_rate": 9.827177008455536e-05, "loss": 1.6908, "step": 941 }, { "epoch": 0.16969151092096374, "grad_norm": 1.3262428045272827, "learning_rate": 9.826807705396854e-05, "loss": 1.8058, "step": 942 }, { "epoch": 0.1698716505291601, "grad_norm": 1.3945311307907104, "learning_rate": 9.82643801513401e-05, "loss": 2.1552, "step": 943 }, { "epoch": 0.17005179013735644, "grad_norm": 1.315379023551941, "learning_rate": 9.826067937696659e-05, "loss": 1.9155, "step": 944 }, { "epoch": 0.1702319297455528, "grad_norm": 1.521011233329773, "learning_rate": 9.825697473114488e-05, "loss": 2.152, "step": 945 }, { "epoch": 0.17041206935374917, "grad_norm": 1.483184814453125, "learning_rate": 9.825326621417216e-05, "loss": 2.0287, "step": 946 }, { "epoch": 0.17059220896194552, "grad_norm": 1.3443012237548828, "learning_rate": 9.824955382634594e-05, "loss": 1.6047, "step": 947 }, { "epoch": 0.17077234857014187, "grad_norm": 1.398842453956604, "learning_rate": 9.824583756796402e-05, "loss": 1.9133, "step": 948 }, { "epoch": 0.17095248817833822, "grad_norm": 1.4118281602859497, "learning_rate": 9.824211743932449e-05, "loss": 1.9895, "step": 949 }, { "epoch": 0.17113262778653457, "grad_norm": 1.3446736335754395, "learning_rate": 9.82383934407258e-05, "loss": 1.7331, "step": 950 }, { "epoch": 0.17131276739473092, "grad_norm": 1.295842170715332, "learning_rate": 9.823466557246668e-05, "loss": 2.3869, "step": 951 }, { "epoch": 0.17149290700292727, "grad_norm": 1.4812614917755127, "learning_rate": 9.82309338348462e-05, "loss": 2.3976, "step": 952 }, { "epoch": 0.17167304661112362, "grad_norm": 1.270684838294983, "learning_rate": 9.822719822816368e-05, "loss": 2.3213, "step": 953 }, { "epoch": 0.17185318621931997, "grad_norm": 1.2606884241104126, "learning_rate": 9.822345875271883e-05, "loss": 2.0548, "step": 954 }, { "epoch": 0.17203332582751632, "grad_norm": 1.3441598415374756, "learning_rate": 9.821971540881161e-05, "loss": 2.329, "step": 955 }, { "epoch": 0.17221346543571267, "grad_norm": 1.5862243175506592, "learning_rate": 9.821596819674228e-05, "loss": 2.4331, "step": 956 }, { "epoch": 0.17239360504390902, "grad_norm": 1.4265897274017334, "learning_rate": 9.821221711681148e-05, "loss": 2.3035, "step": 957 }, { "epoch": 0.17257374465210537, "grad_norm": 1.9258174896240234, "learning_rate": 9.820846216932011e-05, "loss": 2.3316, "step": 958 }, { "epoch": 0.17275388426030172, "grad_norm": 1.8160074949264526, "learning_rate": 9.82047033545694e-05, "loss": 2.6953, "step": 959 }, { "epoch": 0.1729340238684981, "grad_norm": 2.8916215896606445, "learning_rate": 9.820094067286083e-05, "loss": 2.4196, "step": 960 }, { "epoch": 0.17311416347669445, "grad_norm": 1.324589490890503, "learning_rate": 9.819717412449631e-05, "loss": 1.8192, "step": 961 }, { "epoch": 0.1732943030848908, "grad_norm": 1.2401609420776367, "learning_rate": 9.819340370977794e-05, "loss": 1.7033, "step": 962 }, { "epoch": 0.17347444269308715, "grad_norm": 1.2908941507339478, "learning_rate": 9.81896294290082e-05, "loss": 1.9744, "step": 963 }, { "epoch": 0.1736545823012835, "grad_norm": 1.2396693229675293, "learning_rate": 9.818585128248988e-05, "loss": 1.8415, "step": 964 }, { "epoch": 0.17383472190947985, "grad_norm": 1.175499439239502, "learning_rate": 9.818206927052602e-05, "loss": 1.6938, "step": 965 }, { "epoch": 0.1740148615176762, "grad_norm": 1.2146129608154297, "learning_rate": 9.817828339342003e-05, "loss": 1.8406, "step": 966 }, { "epoch": 0.17419500112587255, "grad_norm": 1.224776029586792, "learning_rate": 9.817449365147561e-05, "loss": 1.7464, "step": 967 }, { "epoch": 0.1743751407340689, "grad_norm": 1.324190616607666, "learning_rate": 9.817070004499677e-05, "loss": 1.9447, "step": 968 }, { "epoch": 0.17455528034226525, "grad_norm": 1.2665823698043823, "learning_rate": 9.816690257428787e-05, "loss": 1.9, "step": 969 }, { "epoch": 0.1747354199504616, "grad_norm": 1.2253963947296143, "learning_rate": 9.816310123965348e-05, "loss": 1.7283, "step": 970 }, { "epoch": 0.17491555955865795, "grad_norm": 1.2518510818481445, "learning_rate": 9.815929604139857e-05, "loss": 1.9728, "step": 971 }, { "epoch": 0.1750956991668543, "grad_norm": 1.2972339391708374, "learning_rate": 9.81554869798284e-05, "loss": 1.8105, "step": 972 }, { "epoch": 0.17527583877505065, "grad_norm": 1.2224445343017578, "learning_rate": 9.81516740552485e-05, "loss": 1.7032, "step": 973 }, { "epoch": 0.17545597838324703, "grad_norm": 1.270284652709961, "learning_rate": 9.814785726796478e-05, "loss": 1.9332, "step": 974 }, { "epoch": 0.17563611799144338, "grad_norm": 1.159845232963562, "learning_rate": 9.81440366182834e-05, "loss": 1.7398, "step": 975 }, { "epoch": 0.17581625759963973, "grad_norm": 1.171785831451416, "learning_rate": 9.814021210651084e-05, "loss": 1.9278, "step": 976 }, { "epoch": 0.17599639720783608, "grad_norm": 1.3585643768310547, "learning_rate": 9.813638373295392e-05, "loss": 2.0651, "step": 977 }, { "epoch": 0.17617653681603243, "grad_norm": 1.1806076765060425, "learning_rate": 9.813255149791975e-05, "loss": 1.6853, "step": 978 }, { "epoch": 0.17635667642422878, "grad_norm": 1.3248120546340942, "learning_rate": 9.812871540171574e-05, "loss": 1.8361, "step": 979 }, { "epoch": 0.17653681603242513, "grad_norm": 1.4118931293487549, "learning_rate": 9.81248754446496e-05, "loss": 2.1368, "step": 980 }, { "epoch": 0.17671695564062148, "grad_norm": 1.1947461366653442, "learning_rate": 9.812103162702942e-05, "loss": 1.5551, "step": 981 }, { "epoch": 0.17689709524881783, "grad_norm": 1.2409764528274536, "learning_rate": 9.811718394916351e-05, "loss": 1.7858, "step": 982 }, { "epoch": 0.17707723485701418, "grad_norm": 1.4271025657653809, "learning_rate": 9.811333241136056e-05, "loss": 2.1236, "step": 983 }, { "epoch": 0.17725737446521053, "grad_norm": 1.3088583946228027, "learning_rate": 9.81094770139295e-05, "loss": 1.7706, "step": 984 }, { "epoch": 0.17743751407340688, "grad_norm": 1.5288505554199219, "learning_rate": 9.810561775717964e-05, "loss": 2.3629, "step": 985 }, { "epoch": 0.17761765368160323, "grad_norm": 1.3391587734222412, "learning_rate": 9.810175464142053e-05, "loss": 2.1055, "step": 986 }, { "epoch": 0.17779779328979958, "grad_norm": 1.3647550344467163, "learning_rate": 9.809788766696213e-05, "loss": 1.8956, "step": 987 }, { "epoch": 0.17797793289799596, "grad_norm": 1.4144048690795898, "learning_rate": 9.809401683411457e-05, "loss": 1.9614, "step": 988 }, { "epoch": 0.1781580725061923, "grad_norm": 1.405476689338684, "learning_rate": 9.809014214318843e-05, "loss": 1.829, "step": 989 }, { "epoch": 0.17833821211438866, "grad_norm": 1.3766098022460938, "learning_rate": 9.808626359449451e-05, "loss": 1.8353, "step": 990 }, { "epoch": 0.178518351722585, "grad_norm": 1.3525934219360352, "learning_rate": 9.808238118834394e-05, "loss": 1.72, "step": 991 }, { "epoch": 0.17869849133078136, "grad_norm": 1.4221618175506592, "learning_rate": 9.807849492504817e-05, "loss": 2.0379, "step": 992 }, { "epoch": 0.1788786309389777, "grad_norm": 1.3941259384155273, "learning_rate": 9.807460480491895e-05, "loss": 2.0559, "step": 993 }, { "epoch": 0.17905877054717406, "grad_norm": 1.195777177810669, "learning_rate": 9.807071082826836e-05, "loss": 1.6323, "step": 994 }, { "epoch": 0.1792389101553704, "grad_norm": 1.3851172924041748, "learning_rate": 9.806681299540877e-05, "loss": 1.7501, "step": 995 }, { "epoch": 0.17941904976356676, "grad_norm": 1.3659976720809937, "learning_rate": 9.806291130665284e-05, "loss": 1.8423, "step": 996 }, { "epoch": 0.1795991893717631, "grad_norm": 1.4213128089904785, "learning_rate": 9.805900576231358e-05, "loss": 1.9137, "step": 997 }, { "epoch": 0.17977932897995946, "grad_norm": 1.3705589771270752, "learning_rate": 9.805509636270427e-05, "loss": 1.9406, "step": 998 }, { "epoch": 0.1799594685881558, "grad_norm": 1.6332011222839355, "learning_rate": 9.805118310813856e-05, "loss": 2.113, "step": 999 }, { "epoch": 0.18013960819635216, "grad_norm": 1.3458186388015747, "learning_rate": 9.804726599893033e-05, "loss": 1.694, "step": 1000 }, { "epoch": 0.1803197478045485, "grad_norm": 1.216593623161316, "learning_rate": 9.804334503539383e-05, "loss": 2.0628, "step": 1001 }, { "epoch": 0.18049988741274486, "grad_norm": 1.2084629535675049, "learning_rate": 9.803942021784361e-05, "loss": 2.1192, "step": 1002 }, { "epoch": 0.18068002702094124, "grad_norm": 1.3153703212738037, "learning_rate": 9.803549154659448e-05, "loss": 2.2023, "step": 1003 }, { "epoch": 0.1808601666291376, "grad_norm": 1.354103446006775, "learning_rate": 9.803155902196161e-05, "loss": 2.3206, "step": 1004 }, { "epoch": 0.18104030623733394, "grad_norm": 1.3447707891464233, "learning_rate": 9.802762264426048e-05, "loss": 2.3337, "step": 1005 }, { "epoch": 0.1812204458455303, "grad_norm": 1.8964850902557373, "learning_rate": 9.802368241380685e-05, "loss": 2.29, "step": 1006 }, { "epoch": 0.18140058545372664, "grad_norm": 1.4371596574783325, "learning_rate": 9.801973833091682e-05, "loss": 2.2448, "step": 1007 }, { "epoch": 0.181580725061923, "grad_norm": 1.5542829036712646, "learning_rate": 9.801579039590675e-05, "loss": 2.1635, "step": 1008 }, { "epoch": 0.18176086467011934, "grad_norm": 1.7047879695892334, "learning_rate": 9.801183860909338e-05, "loss": 2.541, "step": 1009 }, { "epoch": 0.1819410042783157, "grad_norm": 1.702187180519104, "learning_rate": 9.80078829707937e-05, "loss": 2.3559, "step": 1010 }, { "epoch": 0.18212114388651204, "grad_norm": 1.2553833723068237, "learning_rate": 9.800392348132504e-05, "loss": 1.978, "step": 1011 }, { "epoch": 0.1823012834947084, "grad_norm": 1.3444942235946655, "learning_rate": 9.7999960141005e-05, "loss": 2.0664, "step": 1012 }, { "epoch": 0.18248142310290474, "grad_norm": 1.2549513578414917, "learning_rate": 9.799599295015154e-05, "loss": 2.0013, "step": 1013 }, { "epoch": 0.1826615627111011, "grad_norm": 1.3910187482833862, "learning_rate": 9.799202190908293e-05, "loss": 1.9403, "step": 1014 }, { "epoch": 0.18284170231929744, "grad_norm": 1.1417717933654785, "learning_rate": 9.798804701811765e-05, "loss": 1.5851, "step": 1015 }, { "epoch": 0.1830218419274938, "grad_norm": 1.232528567314148, "learning_rate": 9.798406827757465e-05, "loss": 1.9015, "step": 1016 }, { "epoch": 0.18320198153569017, "grad_norm": 1.3617945909500122, "learning_rate": 9.798008568777306e-05, "loss": 1.8823, "step": 1017 }, { "epoch": 0.18338212114388652, "grad_norm": 1.2282837629318237, "learning_rate": 9.797609924903237e-05, "loss": 1.8085, "step": 1018 }, { "epoch": 0.18356226075208287, "grad_norm": 1.1524983644485474, "learning_rate": 9.797210896167234e-05, "loss": 1.8068, "step": 1019 }, { "epoch": 0.18374240036027922, "grad_norm": 1.2424622774124146, "learning_rate": 9.796811482601314e-05, "loss": 1.9911, "step": 1020 }, { "epoch": 0.18392253996847557, "grad_norm": 1.1579972505569458, "learning_rate": 9.79641168423751e-05, "loss": 1.7695, "step": 1021 }, { "epoch": 0.18410267957667192, "grad_norm": 1.2499160766601562, "learning_rate": 9.796011501107898e-05, "loss": 1.675, "step": 1022 }, { "epoch": 0.18428281918486827, "grad_norm": 1.3865952491760254, "learning_rate": 9.795610933244579e-05, "loss": 1.841, "step": 1023 }, { "epoch": 0.18446295879306462, "grad_norm": 1.2480659484863281, "learning_rate": 9.795209980679687e-05, "loss": 1.9389, "step": 1024 }, { "epoch": 0.18464309840126097, "grad_norm": 1.3107167482376099, "learning_rate": 9.794808643445385e-05, "loss": 1.9855, "step": 1025 }, { "epoch": 0.18482323800945732, "grad_norm": 1.2713347673416138, "learning_rate": 9.794406921573871e-05, "loss": 1.7889, "step": 1026 }, { "epoch": 0.18500337761765367, "grad_norm": 1.401381254196167, "learning_rate": 9.794004815097367e-05, "loss": 2.0242, "step": 1027 }, { "epoch": 0.18518351722585003, "grad_norm": 1.3585182428359985, "learning_rate": 9.793602324048134e-05, "loss": 2.1322, "step": 1028 }, { "epoch": 0.18536365683404638, "grad_norm": 1.3229870796203613, "learning_rate": 9.793199448458456e-05, "loss": 1.9269, "step": 1029 }, { "epoch": 0.18554379644224273, "grad_norm": 1.3098610639572144, "learning_rate": 9.792796188360654e-05, "loss": 2.0335, "step": 1030 }, { "epoch": 0.1857239360504391, "grad_norm": 1.2868019342422485, "learning_rate": 9.792392543787075e-05, "loss": 1.9837, "step": 1031 }, { "epoch": 0.18590407565863545, "grad_norm": 1.2790473699569702, "learning_rate": 9.791988514770101e-05, "loss": 1.8837, "step": 1032 }, { "epoch": 0.1860842152668318, "grad_norm": 1.3025767803192139, "learning_rate": 9.791584101342142e-05, "loss": 1.7508, "step": 1033 }, { "epoch": 0.18626435487502815, "grad_norm": 1.2877564430236816, "learning_rate": 9.79117930353564e-05, "loss": 1.6398, "step": 1034 }, { "epoch": 0.1864444944832245, "grad_norm": 1.1584537029266357, "learning_rate": 9.790774121383068e-05, "loss": 1.6753, "step": 1035 }, { "epoch": 0.18662463409142085, "grad_norm": 1.5099139213562012, "learning_rate": 9.790368554916931e-05, "loss": 2.2535, "step": 1036 }, { "epoch": 0.1868047736996172, "grad_norm": 1.408600926399231, "learning_rate": 9.789962604169761e-05, "loss": 1.8433, "step": 1037 }, { "epoch": 0.18698491330781356, "grad_norm": 1.3460115194320679, "learning_rate": 9.789556269174123e-05, "loss": 1.8245, "step": 1038 }, { "epoch": 0.1871650529160099, "grad_norm": 1.4837392568588257, "learning_rate": 9.789149549962617e-05, "loss": 1.9556, "step": 1039 }, { "epoch": 0.18734519252420626, "grad_norm": 1.480675220489502, "learning_rate": 9.788742446567865e-05, "loss": 1.8344, "step": 1040 }, { "epoch": 0.1875253321324026, "grad_norm": 1.3863763809204102, "learning_rate": 9.788334959022524e-05, "loss": 1.6853, "step": 1041 }, { "epoch": 0.18770547174059896, "grad_norm": 1.4679621458053589, "learning_rate": 9.787927087359287e-05, "loss": 1.9679, "step": 1042 }, { "epoch": 0.1878856113487953, "grad_norm": 1.2725722789764404, "learning_rate": 9.787518831610873e-05, "loss": 1.8375, "step": 1043 }, { "epoch": 0.18806575095699166, "grad_norm": 1.3111138343811035, "learning_rate": 9.787110191810027e-05, "loss": 1.6635, "step": 1044 }, { "epoch": 0.18824589056518803, "grad_norm": 1.3817356824874878, "learning_rate": 9.786701167989536e-05, "loss": 2.0924, "step": 1045 }, { "epoch": 0.18842603017338438, "grad_norm": 1.47190260887146, "learning_rate": 9.786291760182207e-05, "loss": 1.7857, "step": 1046 }, { "epoch": 0.18860616978158073, "grad_norm": 1.297044277191162, "learning_rate": 9.785881968420885e-05, "loss": 1.8661, "step": 1047 }, { "epoch": 0.18878630938977708, "grad_norm": 1.4383862018585205, "learning_rate": 9.785471792738443e-05, "loss": 1.8995, "step": 1048 }, { "epoch": 0.18896644899797344, "grad_norm": 1.407568335533142, "learning_rate": 9.785061233167783e-05, "loss": 1.557, "step": 1049 }, { "epoch": 0.18914658860616979, "grad_norm": 1.3079746961593628, "learning_rate": 9.784650289741844e-05, "loss": 1.7306, "step": 1050 }, { "epoch": 0.18932672821436614, "grad_norm": 1.3035038709640503, "learning_rate": 9.784238962493587e-05, "loss": 2.2453, "step": 1051 }, { "epoch": 0.18950686782256249, "grad_norm": 1.310741662979126, "learning_rate": 9.783827251456012e-05, "loss": 2.6441, "step": 1052 }, { "epoch": 0.18968700743075884, "grad_norm": 1.2539050579071045, "learning_rate": 9.783415156662145e-05, "loss": 2.1461, "step": 1053 }, { "epoch": 0.1898671470389552, "grad_norm": 1.2706090211868286, "learning_rate": 9.783002678145043e-05, "loss": 1.9084, "step": 1054 }, { "epoch": 0.19004728664715154, "grad_norm": 1.1846846342086792, "learning_rate": 9.782589815937797e-05, "loss": 1.988, "step": 1055 }, { "epoch": 0.1902274262553479, "grad_norm": 1.547398567199707, "learning_rate": 9.782176570073525e-05, "loss": 2.3427, "step": 1056 }, { "epoch": 0.19040756586354424, "grad_norm": 1.5276302099227905, "learning_rate": 9.781762940585379e-05, "loss": 2.3789, "step": 1057 }, { "epoch": 0.1905877054717406, "grad_norm": 1.664921522140503, "learning_rate": 9.781348927506538e-05, "loss": 2.2988, "step": 1058 }, { "epoch": 0.19076784507993694, "grad_norm": 1.8396921157836914, "learning_rate": 9.780934530870214e-05, "loss": 2.0596, "step": 1059 }, { "epoch": 0.19094798468813332, "grad_norm": 1.892615795135498, "learning_rate": 9.780519750709651e-05, "loss": 2.5788, "step": 1060 }, { "epoch": 0.19112812429632967, "grad_norm": 1.3459168672561646, "learning_rate": 9.780104587058123e-05, "loss": 1.8824, "step": 1061 }, { "epoch": 0.19130826390452602, "grad_norm": 1.4003496170043945, "learning_rate": 9.779689039948934e-05, "loss": 2.008, "step": 1062 }, { "epoch": 0.19148840351272237, "grad_norm": 1.4113889932632446, "learning_rate": 9.779273109415417e-05, "loss": 2.186, "step": 1063 }, { "epoch": 0.19166854312091872, "grad_norm": 1.4023385047912598, "learning_rate": 9.778856795490939e-05, "loss": 1.9166, "step": 1064 }, { "epoch": 0.19184868272911507, "grad_norm": 1.434439778327942, "learning_rate": 9.778440098208897e-05, "loss": 2.1106, "step": 1065 }, { "epoch": 0.19202882233731142, "grad_norm": 1.31666100025177, "learning_rate": 9.778023017602718e-05, "loss": 1.8378, "step": 1066 }, { "epoch": 0.19220896194550777, "grad_norm": 1.1839903593063354, "learning_rate": 9.77760555370586e-05, "loss": 1.6132, "step": 1067 }, { "epoch": 0.19238910155370412, "grad_norm": 1.455063819885254, "learning_rate": 9.777187706551811e-05, "loss": 2.036, "step": 1068 }, { "epoch": 0.19256924116190047, "grad_norm": 1.2490276098251343, "learning_rate": 9.776769476174091e-05, "loss": 1.8584, "step": 1069 }, { "epoch": 0.19274938077009682, "grad_norm": 1.2353384494781494, "learning_rate": 9.776350862606251e-05, "loss": 1.73, "step": 1070 }, { "epoch": 0.19292952037829317, "grad_norm": 1.334887146949768, "learning_rate": 9.775931865881872e-05, "loss": 1.7689, "step": 1071 }, { "epoch": 0.19310965998648952, "grad_norm": 1.2303911447525024, "learning_rate": 9.775512486034563e-05, "loss": 1.6101, "step": 1072 }, { "epoch": 0.19328979959468587, "grad_norm": 1.2655179500579834, "learning_rate": 9.775092723097969e-05, "loss": 1.8952, "step": 1073 }, { "epoch": 0.19346993920288225, "grad_norm": 1.2273050546646118, "learning_rate": 9.774672577105763e-05, "loss": 1.8603, "step": 1074 }, { "epoch": 0.1936500788110786, "grad_norm": 1.1224472522735596, "learning_rate": 9.774252048091648e-05, "loss": 1.63, "step": 1075 }, { "epoch": 0.19383021841927495, "grad_norm": 1.3293113708496094, "learning_rate": 9.77383113608936e-05, "loss": 2.0011, "step": 1076 }, { "epoch": 0.1940103580274713, "grad_norm": 1.2797737121582031, "learning_rate": 9.773409841132664e-05, "loss": 1.9221, "step": 1077 }, { "epoch": 0.19419049763566765, "grad_norm": 1.392388105392456, "learning_rate": 9.772988163255354e-05, "loss": 1.9118, "step": 1078 }, { "epoch": 0.194370637243864, "grad_norm": 1.3774945735931396, "learning_rate": 9.772566102491259e-05, "loss": 1.7821, "step": 1079 }, { "epoch": 0.19455077685206035, "grad_norm": 1.4170559644699097, "learning_rate": 9.772143658874235e-05, "loss": 2.0068, "step": 1080 }, { "epoch": 0.1947309164602567, "grad_norm": 1.3845994472503662, "learning_rate": 9.771720832438172e-05, "loss": 2.0581, "step": 1081 }, { "epoch": 0.19491105606845305, "grad_norm": 1.238254427909851, "learning_rate": 9.771297623216988e-05, "loss": 1.6188, "step": 1082 }, { "epoch": 0.1950911956766494, "grad_norm": 1.5110121965408325, "learning_rate": 9.770874031244634e-05, "loss": 2.0153, "step": 1083 }, { "epoch": 0.19527133528484575, "grad_norm": 1.2862738370895386, "learning_rate": 9.770450056555087e-05, "loss": 1.6687, "step": 1084 }, { "epoch": 0.1954514748930421, "grad_norm": 1.4597505331039429, "learning_rate": 9.770025699182362e-05, "loss": 1.7867, "step": 1085 }, { "epoch": 0.19563161450123845, "grad_norm": 1.3708995580673218, "learning_rate": 9.769600959160497e-05, "loss": 1.9031, "step": 1086 }, { "epoch": 0.1958117541094348, "grad_norm": 1.2739589214324951, "learning_rate": 9.769175836523568e-05, "loss": 1.8369, "step": 1087 }, { "epoch": 0.19599189371763118, "grad_norm": 1.3774598836898804, "learning_rate": 9.768750331305673e-05, "loss": 1.9478, "step": 1088 }, { "epoch": 0.19617203332582753, "grad_norm": 1.405414342880249, "learning_rate": 9.768324443540953e-05, "loss": 1.9954, "step": 1089 }, { "epoch": 0.19635217293402388, "grad_norm": 1.3449652194976807, "learning_rate": 9.767898173263568e-05, "loss": 1.8365, "step": 1090 }, { "epoch": 0.19653231254222023, "grad_norm": 1.4840807914733887, "learning_rate": 9.767471520507711e-05, "loss": 2.1271, "step": 1091 }, { "epoch": 0.19671245215041658, "grad_norm": 1.4266517162322998, "learning_rate": 9.767044485307615e-05, "loss": 1.8355, "step": 1092 }, { "epoch": 0.19689259175861293, "grad_norm": 1.3680309057235718, "learning_rate": 9.76661706769753e-05, "loss": 1.5311, "step": 1093 }, { "epoch": 0.19707273136680928, "grad_norm": 1.4910508394241333, "learning_rate": 9.766189267711745e-05, "loss": 2.1129, "step": 1094 }, { "epoch": 0.19725287097500563, "grad_norm": 1.3345222473144531, "learning_rate": 9.76576108538458e-05, "loss": 1.9193, "step": 1095 }, { "epoch": 0.19743301058320198, "grad_norm": 1.3052685260772705, "learning_rate": 9.765332520750381e-05, "loss": 1.7407, "step": 1096 }, { "epoch": 0.19761315019139833, "grad_norm": 1.4958308935165405, "learning_rate": 9.76490357384353e-05, "loss": 1.9617, "step": 1097 }, { "epoch": 0.19779328979959468, "grad_norm": 1.4788193702697754, "learning_rate": 9.764474244698432e-05, "loss": 1.8313, "step": 1098 }, { "epoch": 0.19797342940779103, "grad_norm": 1.4373680353164673, "learning_rate": 9.764044533349532e-05, "loss": 1.9792, "step": 1099 }, { "epoch": 0.19815356901598738, "grad_norm": 1.3391166925430298, "learning_rate": 9.763614439831301e-05, "loss": 1.794, "step": 1100 }, { "epoch": 0.19833370862418373, "grad_norm": 1.3338404893875122, "learning_rate": 9.76318396417824e-05, "loss": 2.3145, "step": 1101 }, { "epoch": 0.1985138482323801, "grad_norm": 1.3932822942733765, "learning_rate": 9.762753106424882e-05, "loss": 2.3292, "step": 1102 }, { "epoch": 0.19869398784057646, "grad_norm": 1.2400325536727905, "learning_rate": 9.762321866605789e-05, "loss": 2.212, "step": 1103 }, { "epoch": 0.1988741274487728, "grad_norm": 1.3097000122070312, "learning_rate": 9.761890244755554e-05, "loss": 2.3767, "step": 1104 }, { "epoch": 0.19905426705696916, "grad_norm": 1.9655861854553223, "learning_rate": 9.761458240908804e-05, "loss": 2.4407, "step": 1105 }, { "epoch": 0.1992344066651655, "grad_norm": 1.4316792488098145, "learning_rate": 9.761025855100193e-05, "loss": 1.8224, "step": 1106 }, { "epoch": 0.19941454627336186, "grad_norm": 1.503657579421997, "learning_rate": 9.760593087364408e-05, "loss": 2.0246, "step": 1107 }, { "epoch": 0.1995946858815582, "grad_norm": 1.9716205596923828, "learning_rate": 9.760159937736163e-05, "loss": 2.5355, "step": 1108 }, { "epoch": 0.19977482548975456, "grad_norm": 1.720569133758545, "learning_rate": 9.759726406250207e-05, "loss": 2.3835, "step": 1109 }, { "epoch": 0.1999549650979509, "grad_norm": 1.3845270872116089, "learning_rate": 9.759292492941317e-05, "loss": 1.8727, "step": 1110 }, { "epoch": 0.20013510470614726, "grad_norm": 1.1699467897415161, "learning_rate": 9.758858197844302e-05, "loss": 1.791, "step": 1111 }, { "epoch": 0.2003152443143436, "grad_norm": 1.3896262645721436, "learning_rate": 9.758423520993999e-05, "loss": 1.9561, "step": 1112 }, { "epoch": 0.20049538392253996, "grad_norm": 1.3396421670913696, "learning_rate": 9.75798846242528e-05, "loss": 1.7734, "step": 1113 }, { "epoch": 0.2006755235307363, "grad_norm": 1.2620508670806885, "learning_rate": 9.757553022173043e-05, "loss": 1.9857, "step": 1114 }, { "epoch": 0.20085566313893266, "grad_norm": 1.3318549394607544, "learning_rate": 9.757117200272221e-05, "loss": 1.9463, "step": 1115 }, { "epoch": 0.20103580274712904, "grad_norm": 2.266291856765747, "learning_rate": 9.756680996757774e-05, "loss": 2.0614, "step": 1116 }, { "epoch": 0.2012159423553254, "grad_norm": 1.1756788492202759, "learning_rate": 9.756244411664694e-05, "loss": 1.5352, "step": 1117 }, { "epoch": 0.20139608196352174, "grad_norm": 1.4018486738204956, "learning_rate": 9.755807445028005e-05, "loss": 2.0813, "step": 1118 }, { "epoch": 0.2015762215717181, "grad_norm": 1.1505703926086426, "learning_rate": 9.755370096882757e-05, "loss": 1.614, "step": 1119 }, { "epoch": 0.20175636117991444, "grad_norm": 1.2846778631210327, "learning_rate": 9.754932367264038e-05, "loss": 1.9285, "step": 1120 }, { "epoch": 0.2019365007881108, "grad_norm": 1.3055049180984497, "learning_rate": 9.754494256206961e-05, "loss": 1.9963, "step": 1121 }, { "epoch": 0.20211664039630714, "grad_norm": 1.2917466163635254, "learning_rate": 9.754055763746668e-05, "loss": 1.7182, "step": 1122 }, { "epoch": 0.2022967800045035, "grad_norm": 1.3191360235214233, "learning_rate": 9.75361688991834e-05, "loss": 2.0444, "step": 1123 }, { "epoch": 0.20247691961269984, "grad_norm": 1.312503695487976, "learning_rate": 9.753177634757179e-05, "loss": 1.8112, "step": 1124 }, { "epoch": 0.2026570592208962, "grad_norm": 1.2230874300003052, "learning_rate": 9.752737998298425e-05, "loss": 1.8059, "step": 1125 }, { "epoch": 0.20283719882909254, "grad_norm": 1.3253501653671265, "learning_rate": 9.75229798057734e-05, "loss": 1.8656, "step": 1126 }, { "epoch": 0.2030173384372889, "grad_norm": 1.2441682815551758, "learning_rate": 9.75185758162923e-05, "loss": 1.7846, "step": 1127 }, { "epoch": 0.20319747804548524, "grad_norm": 1.2234008312225342, "learning_rate": 9.751416801489417e-05, "loss": 1.9963, "step": 1128 }, { "epoch": 0.2033776176536816, "grad_norm": 1.2510011196136475, "learning_rate": 9.750975640193263e-05, "loss": 1.971, "step": 1129 }, { "epoch": 0.20355775726187794, "grad_norm": 1.238528847694397, "learning_rate": 9.750534097776158e-05, "loss": 1.7935, "step": 1130 }, { "epoch": 0.20373789687007432, "grad_norm": 1.2961379289627075, "learning_rate": 9.750092174273521e-05, "loss": 2.0679, "step": 1131 }, { "epoch": 0.20391803647827067, "grad_norm": 1.3668330907821655, "learning_rate": 9.749649869720803e-05, "loss": 1.8844, "step": 1132 }, { "epoch": 0.20409817608646702, "grad_norm": 1.3781975507736206, "learning_rate": 9.749207184153486e-05, "loss": 2.0807, "step": 1133 }, { "epoch": 0.20427831569466337, "grad_norm": 1.3022228479385376, "learning_rate": 9.748764117607084e-05, "loss": 2.0386, "step": 1134 }, { "epoch": 0.20445845530285972, "grad_norm": 1.3531427383422852, "learning_rate": 9.748320670117134e-05, "loss": 1.9187, "step": 1135 }, { "epoch": 0.20463859491105607, "grad_norm": 1.3155783414840698, "learning_rate": 9.747876841719214e-05, "loss": 1.9167, "step": 1136 }, { "epoch": 0.20481873451925242, "grad_norm": 1.29123055934906, "learning_rate": 9.747432632448928e-05, "loss": 1.8631, "step": 1137 }, { "epoch": 0.20499887412744877, "grad_norm": 1.2773045301437378, "learning_rate": 9.746988042341906e-05, "loss": 1.9638, "step": 1138 }, { "epoch": 0.20517901373564512, "grad_norm": 1.3281618356704712, "learning_rate": 9.746543071433818e-05, "loss": 1.9104, "step": 1139 }, { "epoch": 0.20535915334384147, "grad_norm": 1.2960795164108276, "learning_rate": 9.746097719760355e-05, "loss": 1.6743, "step": 1140 }, { "epoch": 0.20553929295203782, "grad_norm": 1.339294195175171, "learning_rate": 9.745651987357245e-05, "loss": 1.7139, "step": 1141 }, { "epoch": 0.20571943256023417, "grad_norm": 1.2710645198822021, "learning_rate": 9.745205874260246e-05, "loss": 1.8765, "step": 1142 }, { "epoch": 0.20589957216843052, "grad_norm": 1.3489642143249512, "learning_rate": 9.744759380505141e-05, "loss": 1.8905, "step": 1143 }, { "epoch": 0.20607971177662687, "grad_norm": 1.3232749700546265, "learning_rate": 9.744312506127749e-05, "loss": 1.913, "step": 1144 }, { "epoch": 0.20625985138482325, "grad_norm": 1.3831884860992432, "learning_rate": 9.74386525116392e-05, "loss": 1.8077, "step": 1145 }, { "epoch": 0.2064399909930196, "grad_norm": 1.3234407901763916, "learning_rate": 9.74341761564953e-05, "loss": 1.7466, "step": 1146 }, { "epoch": 0.20662013060121595, "grad_norm": 1.478615403175354, "learning_rate": 9.74296959962049e-05, "loss": 1.8893, "step": 1147 }, { "epoch": 0.2068002702094123, "grad_norm": 1.304890751838684, "learning_rate": 9.74252120311274e-05, "loss": 1.7499, "step": 1148 }, { "epoch": 0.20698040981760865, "grad_norm": 1.286595106124878, "learning_rate": 9.742072426162249e-05, "loss": 1.7273, "step": 1149 }, { "epoch": 0.207160549425805, "grad_norm": 1.437442421913147, "learning_rate": 9.741623268805017e-05, "loss": 1.9995, "step": 1150 }, { "epoch": 0.20734068903400135, "grad_norm": 1.3560956716537476, "learning_rate": 9.741173731077075e-05, "loss": 2.2523, "step": 1151 }, { "epoch": 0.2075208286421977, "grad_norm": 1.4598274230957031, "learning_rate": 9.740723813014487e-05, "loss": 2.3113, "step": 1152 }, { "epoch": 0.20770096825039405, "grad_norm": 1.3802415132522583, "learning_rate": 9.740273514653344e-05, "loss": 2.3313, "step": 1153 }, { "epoch": 0.2078811078585904, "grad_norm": 1.4327166080474854, "learning_rate": 9.739822836029768e-05, "loss": 2.3032, "step": 1154 }, { "epoch": 0.20806124746678675, "grad_norm": 1.2819279432296753, "learning_rate": 9.739371777179912e-05, "loss": 2.3655, "step": 1155 }, { "epoch": 0.2082413870749831, "grad_norm": 1.4347268342971802, "learning_rate": 9.738920338139961e-05, "loss": 2.2786, "step": 1156 }, { "epoch": 0.20842152668317945, "grad_norm": 1.5550678968429565, "learning_rate": 9.738468518946128e-05, "loss": 2.435, "step": 1157 }, { "epoch": 0.2086016662913758, "grad_norm": 1.6036711931228638, "learning_rate": 9.73801631963466e-05, "loss": 2.441, "step": 1158 }, { "epoch": 0.20878180589957218, "grad_norm": 2.26991605758667, "learning_rate": 9.73756374024183e-05, "loss": 2.1703, "step": 1159 }, { "epoch": 0.20896194550776853, "grad_norm": 1.752753734588623, "learning_rate": 9.737110780803945e-05, "loss": 2.2458, "step": 1160 }, { "epoch": 0.20914208511596488, "grad_norm": 1.2708410024642944, "learning_rate": 9.736657441357339e-05, "loss": 1.7728, "step": 1161 }, { "epoch": 0.20932222472416123, "grad_norm": 1.3556185960769653, "learning_rate": 9.736203721938382e-05, "loss": 1.9593, "step": 1162 }, { "epoch": 0.20950236433235758, "grad_norm": 1.2849525213241577, "learning_rate": 9.735749622583468e-05, "loss": 2.0225, "step": 1163 }, { "epoch": 0.20968250394055393, "grad_norm": 1.2545552253723145, "learning_rate": 9.735295143329027e-05, "loss": 1.6616, "step": 1164 }, { "epoch": 0.20986264354875028, "grad_norm": 1.3480777740478516, "learning_rate": 9.734840284211515e-05, "loss": 1.7657, "step": 1165 }, { "epoch": 0.21004278315694663, "grad_norm": 1.4599077701568604, "learning_rate": 9.734385045267422e-05, "loss": 1.8052, "step": 1166 }, { "epoch": 0.21022292276514298, "grad_norm": 1.4238650798797607, "learning_rate": 9.733929426533268e-05, "loss": 1.9854, "step": 1167 }, { "epoch": 0.21040306237333933, "grad_norm": 1.3731627464294434, "learning_rate": 9.733473428045599e-05, "loss": 1.872, "step": 1168 }, { "epoch": 0.21058320198153568, "grad_norm": 1.2766082286834717, "learning_rate": 9.733017049840999e-05, "loss": 1.8957, "step": 1169 }, { "epoch": 0.21076334158973203, "grad_norm": 1.3454746007919312, "learning_rate": 9.732560291956076e-05, "loss": 2.0359, "step": 1170 }, { "epoch": 0.21094348119792838, "grad_norm": 1.3586333990097046, "learning_rate": 9.73210315442747e-05, "loss": 2.0876, "step": 1171 }, { "epoch": 0.21112362080612473, "grad_norm": 1.3223870992660522, "learning_rate": 9.731645637291857e-05, "loss": 1.8035, "step": 1172 }, { "epoch": 0.2113037604143211, "grad_norm": 1.6285736560821533, "learning_rate": 9.731187740585934e-05, "loss": 1.8659, "step": 1173 }, { "epoch": 0.21148390002251746, "grad_norm": 1.1811076402664185, "learning_rate": 9.730729464346435e-05, "loss": 1.7004, "step": 1174 }, { "epoch": 0.2116640396307138, "grad_norm": 1.2941184043884277, "learning_rate": 9.730270808610123e-05, "loss": 2.1722, "step": 1175 }, { "epoch": 0.21184417923891016, "grad_norm": 1.2476855516433716, "learning_rate": 9.72981177341379e-05, "loss": 1.9381, "step": 1176 }, { "epoch": 0.2120243188471065, "grad_norm": 1.3430800437927246, "learning_rate": 9.729352358794259e-05, "loss": 1.826, "step": 1177 }, { "epoch": 0.21220445845530286, "grad_norm": 1.2400137186050415, "learning_rate": 9.728892564788387e-05, "loss": 1.5523, "step": 1178 }, { "epoch": 0.2123845980634992, "grad_norm": 1.167266607284546, "learning_rate": 9.728432391433058e-05, "loss": 1.7809, "step": 1179 }, { "epoch": 0.21256473767169556, "grad_norm": 1.388493537902832, "learning_rate": 9.727971838765184e-05, "loss": 1.8449, "step": 1180 }, { "epoch": 0.21274487727989191, "grad_norm": 1.307047963142395, "learning_rate": 9.727510906821712e-05, "loss": 1.5885, "step": 1181 }, { "epoch": 0.21292501688808826, "grad_norm": 1.3642369508743286, "learning_rate": 9.727049595639619e-05, "loss": 1.8891, "step": 1182 }, { "epoch": 0.21310515649628461, "grad_norm": 1.2851828336715698, "learning_rate": 9.72658790525591e-05, "loss": 1.9321, "step": 1183 }, { "epoch": 0.21328529610448096, "grad_norm": 1.4932687282562256, "learning_rate": 9.72612583570762e-05, "loss": 2.1038, "step": 1184 }, { "epoch": 0.21346543571267731, "grad_norm": 1.402328372001648, "learning_rate": 9.725663387031818e-05, "loss": 1.8707, "step": 1185 }, { "epoch": 0.21364557532087367, "grad_norm": 1.4498121738433838, "learning_rate": 9.725200559265601e-05, "loss": 2.0383, "step": 1186 }, { "epoch": 0.21382571492907002, "grad_norm": 1.4633519649505615, "learning_rate": 9.724737352446097e-05, "loss": 1.9944, "step": 1187 }, { "epoch": 0.2140058545372664, "grad_norm": 1.463475227355957, "learning_rate": 9.724273766610463e-05, "loss": 2.1831, "step": 1188 }, { "epoch": 0.21418599414546274, "grad_norm": 1.3918044567108154, "learning_rate": 9.72380980179589e-05, "loss": 2.0299, "step": 1189 }, { "epoch": 0.2143661337536591, "grad_norm": 1.3234190940856934, "learning_rate": 9.723345458039594e-05, "loss": 1.8762, "step": 1190 }, { "epoch": 0.21454627336185544, "grad_norm": 1.3141950368881226, "learning_rate": 9.722880735378827e-05, "loss": 1.8001, "step": 1191 }, { "epoch": 0.2147264129700518, "grad_norm": 1.2665444612503052, "learning_rate": 9.722415633850867e-05, "loss": 1.84, "step": 1192 }, { "epoch": 0.21490655257824814, "grad_norm": 1.313765287399292, "learning_rate": 9.721950153493026e-05, "loss": 1.6348, "step": 1193 }, { "epoch": 0.2150866921864445, "grad_norm": 1.4050348997116089, "learning_rate": 9.721484294342644e-05, "loss": 1.8332, "step": 1194 }, { "epoch": 0.21526683179464084, "grad_norm": 1.3703303337097168, "learning_rate": 9.721018056437091e-05, "loss": 1.9306, "step": 1195 }, { "epoch": 0.2154469714028372, "grad_norm": 1.4321315288543701, "learning_rate": 9.72055143981377e-05, "loss": 1.9754, "step": 1196 }, { "epoch": 0.21562711101103355, "grad_norm": 1.4123356342315674, "learning_rate": 9.720084444510109e-05, "loss": 1.8759, "step": 1197 }, { "epoch": 0.2158072506192299, "grad_norm": 1.4252748489379883, "learning_rate": 9.719617070563575e-05, "loss": 1.8527, "step": 1198 }, { "epoch": 0.21598739022742625, "grad_norm": 1.3909029960632324, "learning_rate": 9.719149318011659e-05, "loss": 1.6909, "step": 1199 }, { "epoch": 0.2161675298356226, "grad_norm": 1.535224437713623, "learning_rate": 9.718681186891883e-05, "loss": 2.043, "step": 1200 }, { "epoch": 0.21634766944381895, "grad_norm": 1.3515214920043945, "learning_rate": 9.718212677241801e-05, "loss": 2.3651, "step": 1201 }, { "epoch": 0.21652780905201532, "grad_norm": 1.2250727415084839, "learning_rate": 9.717743789098994e-05, "loss": 2.0236, "step": 1202 }, { "epoch": 0.21670794866021167, "grad_norm": 1.2858576774597168, "learning_rate": 9.717274522501078e-05, "loss": 2.1827, "step": 1203 }, { "epoch": 0.21688808826840802, "grad_norm": 1.3357107639312744, "learning_rate": 9.7168048774857e-05, "loss": 2.0166, "step": 1204 }, { "epoch": 0.21706822787660437, "grad_norm": 1.3120057582855225, "learning_rate": 9.716334854090532e-05, "loss": 2.1984, "step": 1205 }, { "epoch": 0.21724836748480073, "grad_norm": 2.0808050632476807, "learning_rate": 9.715864452353278e-05, "loss": 2.2997, "step": 1206 }, { "epoch": 0.21742850709299708, "grad_norm": 1.8010993003845215, "learning_rate": 9.715393672311674e-05, "loss": 2.0266, "step": 1207 }, { "epoch": 0.21760864670119343, "grad_norm": 1.7874611616134644, "learning_rate": 9.714922514003489e-05, "loss": 2.7595, "step": 1208 }, { "epoch": 0.21778878630938978, "grad_norm": 1.9151959419250488, "learning_rate": 9.714450977466514e-05, "loss": 2.819, "step": 1209 }, { "epoch": 0.21796892591758613, "grad_norm": 2.263363838195801, "learning_rate": 9.713979062738579e-05, "loss": 2.839, "step": 1210 }, { "epoch": 0.21814906552578248, "grad_norm": 1.2915294170379639, "learning_rate": 9.71350676985754e-05, "loss": 1.9654, "step": 1211 }, { "epoch": 0.21832920513397883, "grad_norm": 1.2647360563278198, "learning_rate": 9.713034098861283e-05, "loss": 2.0295, "step": 1212 }, { "epoch": 0.21850934474217518, "grad_norm": 1.2217167615890503, "learning_rate": 9.712561049787727e-05, "loss": 1.8334, "step": 1213 }, { "epoch": 0.21868948435037153, "grad_norm": 1.156005620956421, "learning_rate": 9.712087622674819e-05, "loss": 1.6766, "step": 1214 }, { "epoch": 0.21886962395856788, "grad_norm": 1.2325598001480103, "learning_rate": 9.711613817560537e-05, "loss": 1.6672, "step": 1215 }, { "epoch": 0.21904976356676426, "grad_norm": 1.2970725297927856, "learning_rate": 9.711139634482889e-05, "loss": 1.6545, "step": 1216 }, { "epoch": 0.2192299031749606, "grad_norm": 1.331709384918213, "learning_rate": 9.710665073479914e-05, "loss": 1.7171, "step": 1217 }, { "epoch": 0.21941004278315696, "grad_norm": 1.2705309391021729, "learning_rate": 9.710190134589681e-05, "loss": 1.8049, "step": 1218 }, { "epoch": 0.2195901823913533, "grad_norm": 1.3399676084518433, "learning_rate": 9.709714817850291e-05, "loss": 1.8443, "step": 1219 }, { "epoch": 0.21977032199954966, "grad_norm": 1.283126950263977, "learning_rate": 9.709239123299871e-05, "loss": 1.6588, "step": 1220 }, { "epoch": 0.219950461607746, "grad_norm": 1.600463628768921, "learning_rate": 9.708763050976583e-05, "loss": 2.2678, "step": 1221 }, { "epoch": 0.22013060121594236, "grad_norm": 1.2552045583724976, "learning_rate": 9.708286600918617e-05, "loss": 1.7007, "step": 1222 }, { "epoch": 0.2203107408241387, "grad_norm": 1.3178097009658813, "learning_rate": 9.707809773164192e-05, "loss": 1.8726, "step": 1223 }, { "epoch": 0.22049088043233506, "grad_norm": 1.349375605583191, "learning_rate": 9.707332567751562e-05, "loss": 1.9585, "step": 1224 }, { "epoch": 0.2206710200405314, "grad_norm": 1.3276695013046265, "learning_rate": 9.706854984719005e-05, "loss": 2.1645, "step": 1225 }, { "epoch": 0.22085115964872776, "grad_norm": 1.1302896738052368, "learning_rate": 9.706377024104834e-05, "loss": 1.628, "step": 1226 }, { "epoch": 0.2210312992569241, "grad_norm": 1.2439773082733154, "learning_rate": 9.70589868594739e-05, "loss": 1.8528, "step": 1227 }, { "epoch": 0.22121143886512046, "grad_norm": 1.3040409088134766, "learning_rate": 9.705419970285046e-05, "loss": 1.7976, "step": 1228 }, { "epoch": 0.2213915784733168, "grad_norm": 1.3220973014831543, "learning_rate": 9.704940877156204e-05, "loss": 2.0121, "step": 1229 }, { "epoch": 0.22157171808151319, "grad_norm": 1.348577618598938, "learning_rate": 9.704461406599296e-05, "loss": 1.9989, "step": 1230 }, { "epoch": 0.22175185768970954, "grad_norm": 1.2426236867904663, "learning_rate": 9.703981558652786e-05, "loss": 1.8465, "step": 1231 }, { "epoch": 0.2219319972979059, "grad_norm": 1.3763753175735474, "learning_rate": 9.703501333355168e-05, "loss": 2.0208, "step": 1232 }, { "epoch": 0.22211213690610224, "grad_norm": 1.3459206819534302, "learning_rate": 9.70302073074496e-05, "loss": 1.8847, "step": 1233 }, { "epoch": 0.2222922765142986, "grad_norm": 1.2877014875411987, "learning_rate": 9.702539750860723e-05, "loss": 1.747, "step": 1234 }, { "epoch": 0.22247241612249494, "grad_norm": 1.3653275966644287, "learning_rate": 9.702058393741038e-05, "loss": 2.045, "step": 1235 }, { "epoch": 0.2226525557306913, "grad_norm": 1.3644788265228271, "learning_rate": 9.701576659424517e-05, "loss": 2.1115, "step": 1236 }, { "epoch": 0.22283269533888764, "grad_norm": 1.3194355964660645, "learning_rate": 9.701094547949807e-05, "loss": 1.9783, "step": 1237 }, { "epoch": 0.223012834947084, "grad_norm": 1.2422782182693481, "learning_rate": 9.700612059355582e-05, "loss": 1.8706, "step": 1238 }, { "epoch": 0.22319297455528034, "grad_norm": 1.231799602508545, "learning_rate": 9.700129193680549e-05, "loss": 1.629, "step": 1239 }, { "epoch": 0.2233731141634767, "grad_norm": 1.4027040004730225, "learning_rate": 9.699645950963441e-05, "loss": 1.8705, "step": 1240 }, { "epoch": 0.22355325377167304, "grad_norm": 1.355323076248169, "learning_rate": 9.699162331243021e-05, "loss": 1.8316, "step": 1241 }, { "epoch": 0.2237333933798694, "grad_norm": 1.4202133417129517, "learning_rate": 9.698678334558092e-05, "loss": 2.1323, "step": 1242 }, { "epoch": 0.22391353298806574, "grad_norm": 1.334398865699768, "learning_rate": 9.698193960947472e-05, "loss": 1.6633, "step": 1243 }, { "epoch": 0.22409367259626212, "grad_norm": 1.416087031364441, "learning_rate": 9.697709210450022e-05, "loss": 1.9239, "step": 1244 }, { "epoch": 0.22427381220445847, "grad_norm": 1.3756368160247803, "learning_rate": 9.697224083104627e-05, "loss": 1.8295, "step": 1245 }, { "epoch": 0.22445395181265482, "grad_norm": 1.3327268362045288, "learning_rate": 9.696738578950206e-05, "loss": 1.8389, "step": 1246 }, { "epoch": 0.22463409142085117, "grad_norm": 1.5033972263336182, "learning_rate": 9.696252698025702e-05, "loss": 1.5961, "step": 1247 }, { "epoch": 0.22481423102904752, "grad_norm": 1.4197916984558105, "learning_rate": 9.695766440370095e-05, "loss": 1.8635, "step": 1248 }, { "epoch": 0.22499437063724387, "grad_norm": 1.3862701654434204, "learning_rate": 9.69527980602239e-05, "loss": 1.6812, "step": 1249 }, { "epoch": 0.22517451024544022, "grad_norm": 1.514758586883545, "learning_rate": 9.694792795021628e-05, "loss": 2.1818, "step": 1250 }, { "epoch": 0.22535464985363657, "grad_norm": 1.2962361574172974, "learning_rate": 9.694305407406873e-05, "loss": 2.2989, "step": 1251 }, { "epoch": 0.22553478946183292, "grad_norm": 1.4761441946029663, "learning_rate": 9.693817643217224e-05, "loss": 2.233, "step": 1252 }, { "epoch": 0.22571492907002927, "grad_norm": 1.2174078226089478, "learning_rate": 9.693329502491813e-05, "loss": 2.134, "step": 1253 }, { "epoch": 0.22589506867822562, "grad_norm": 1.3961092233657837, "learning_rate": 9.692840985269793e-05, "loss": 2.3063, "step": 1254 }, { "epoch": 0.22607520828642197, "grad_norm": 1.3094801902770996, "learning_rate": 9.692352091590356e-05, "loss": 2.1216, "step": 1255 }, { "epoch": 0.22625534789461832, "grad_norm": 1.478244423866272, "learning_rate": 9.691862821492719e-05, "loss": 2.1102, "step": 1256 }, { "epoch": 0.22643548750281467, "grad_norm": 1.435144305229187, "learning_rate": 9.691373175016132e-05, "loss": 2.1021, "step": 1257 }, { "epoch": 0.22661562711101102, "grad_norm": 1.5235644578933716, "learning_rate": 9.690883152199873e-05, "loss": 2.337, "step": 1258 }, { "epoch": 0.2267957667192074, "grad_norm": 1.8211218118667603, "learning_rate": 9.690392753083254e-05, "loss": 2.2778, "step": 1259 }, { "epoch": 0.22697590632740375, "grad_norm": 1.7948099374771118, "learning_rate": 9.689901977705613e-05, "loss": 2.2129, "step": 1260 }, { "epoch": 0.2271560459356001, "grad_norm": 1.3149867057800293, "learning_rate": 9.689410826106317e-05, "loss": 1.8688, "step": 1261 }, { "epoch": 0.22733618554379645, "grad_norm": 1.3334577083587646, "learning_rate": 9.688919298324772e-05, "loss": 1.8792, "step": 1262 }, { "epoch": 0.2275163251519928, "grad_norm": 1.3534876108169556, "learning_rate": 9.688427394400404e-05, "loss": 1.7669, "step": 1263 }, { "epoch": 0.22769646476018915, "grad_norm": 1.2005510330200195, "learning_rate": 9.687935114372673e-05, "loss": 1.7123, "step": 1264 }, { "epoch": 0.2278766043683855, "grad_norm": 1.2224311828613281, "learning_rate": 9.68744245828107e-05, "loss": 1.5392, "step": 1265 }, { "epoch": 0.22805674397658185, "grad_norm": 1.3176820278167725, "learning_rate": 9.686949426165119e-05, "loss": 1.8028, "step": 1266 }, { "epoch": 0.2282368835847782, "grad_norm": 1.3573458194732666, "learning_rate": 9.686456018064367e-05, "loss": 1.7664, "step": 1267 }, { "epoch": 0.22841702319297455, "grad_norm": 1.3003135919570923, "learning_rate": 9.685962234018395e-05, "loss": 1.9275, "step": 1268 }, { "epoch": 0.2285971628011709, "grad_norm": 1.3143268823623657, "learning_rate": 9.685468074066815e-05, "loss": 1.9996, "step": 1269 }, { "epoch": 0.22877730240936725, "grad_norm": 1.291113257408142, "learning_rate": 9.684973538249267e-05, "loss": 1.7173, "step": 1270 }, { "epoch": 0.2289574420175636, "grad_norm": 1.305135726928711, "learning_rate": 9.684478626605427e-05, "loss": 2.1342, "step": 1271 }, { "epoch": 0.22913758162575995, "grad_norm": 1.3201669454574585, "learning_rate": 9.683983339174991e-05, "loss": 2.1349, "step": 1272 }, { "epoch": 0.22931772123395633, "grad_norm": 1.1403651237487793, "learning_rate": 9.683487675997693e-05, "loss": 1.5152, "step": 1273 }, { "epoch": 0.22949786084215268, "grad_norm": 1.1839333772659302, "learning_rate": 9.682991637113296e-05, "loss": 1.7999, "step": 1274 }, { "epoch": 0.22967800045034903, "grad_norm": 1.2619744539260864, "learning_rate": 9.68249522256159e-05, "loss": 1.8163, "step": 1275 }, { "epoch": 0.22985814005854538, "grad_norm": 1.321630835533142, "learning_rate": 9.681998432382398e-05, "loss": 1.9633, "step": 1276 }, { "epoch": 0.23003827966674173, "grad_norm": 1.3500498533248901, "learning_rate": 9.681501266615574e-05, "loss": 1.8286, "step": 1277 }, { "epoch": 0.23021841927493808, "grad_norm": 1.1742342710494995, "learning_rate": 9.681003725300997e-05, "loss": 1.732, "step": 1278 }, { "epoch": 0.23039855888313443, "grad_norm": 1.2814358472824097, "learning_rate": 9.680505808478582e-05, "loss": 1.8687, "step": 1279 }, { "epoch": 0.23057869849133078, "grad_norm": 1.2051386833190918, "learning_rate": 9.68000751618827e-05, "loss": 1.7608, "step": 1280 }, { "epoch": 0.23075883809952713, "grad_norm": 1.4166581630706787, "learning_rate": 9.679508848470033e-05, "loss": 1.7312, "step": 1281 }, { "epoch": 0.23093897770772348, "grad_norm": 1.256353497505188, "learning_rate": 9.679009805363879e-05, "loss": 1.8639, "step": 1282 }, { "epoch": 0.23111911731591983, "grad_norm": 1.284873366355896, "learning_rate": 9.678510386909836e-05, "loss": 1.8801, "step": 1283 }, { "epoch": 0.23129925692411618, "grad_norm": 1.3541619777679443, "learning_rate": 9.678010593147968e-05, "loss": 1.5903, "step": 1284 }, { "epoch": 0.23147939653231253, "grad_norm": 1.3045016527175903, "learning_rate": 9.67751042411837e-05, "loss": 1.9373, "step": 1285 }, { "epoch": 0.23165953614050888, "grad_norm": 1.3718358278274536, "learning_rate": 9.677009879861164e-05, "loss": 1.7672, "step": 1286 }, { "epoch": 0.23183967574870526, "grad_norm": 1.2938722372055054, "learning_rate": 9.676508960416502e-05, "loss": 1.9419, "step": 1287 }, { "epoch": 0.2320198153569016, "grad_norm": 1.3558599948883057, "learning_rate": 9.676007665824568e-05, "loss": 2.0485, "step": 1288 }, { "epoch": 0.23219995496509796, "grad_norm": 1.4920417070388794, "learning_rate": 9.67550599612558e-05, "loss": 2.2428, "step": 1289 }, { "epoch": 0.2323800945732943, "grad_norm": 1.3960587978363037, "learning_rate": 9.675003951359778e-05, "loss": 2.025, "step": 1290 }, { "epoch": 0.23256023418149066, "grad_norm": 1.442877173423767, "learning_rate": 9.674501531567434e-05, "loss": 2.0415, "step": 1291 }, { "epoch": 0.232740373789687, "grad_norm": 1.3591831922531128, "learning_rate": 9.673998736788855e-05, "loss": 2.0451, "step": 1292 }, { "epoch": 0.23292051339788336, "grad_norm": 1.408906102180481, "learning_rate": 9.673495567064375e-05, "loss": 1.8846, "step": 1293 }, { "epoch": 0.2331006530060797, "grad_norm": 1.3663188219070435, "learning_rate": 9.672992022434355e-05, "loss": 1.8281, "step": 1294 }, { "epoch": 0.23328079261427606, "grad_norm": 1.3005244731903076, "learning_rate": 9.672488102939193e-05, "loss": 1.7782, "step": 1295 }, { "epoch": 0.2334609322224724, "grad_norm": 1.3708949089050293, "learning_rate": 9.671983808619309e-05, "loss": 1.7935, "step": 1296 }, { "epoch": 0.23364107183066876, "grad_norm": 1.3792146444320679, "learning_rate": 9.671479139515162e-05, "loss": 1.9796, "step": 1297 }, { "epoch": 0.2338212114388651, "grad_norm": 1.336287260055542, "learning_rate": 9.670974095667232e-05, "loss": 1.8319, "step": 1298 }, { "epoch": 0.23400135104706146, "grad_norm": 1.4283175468444824, "learning_rate": 9.670468677116035e-05, "loss": 1.6735, "step": 1299 }, { "epoch": 0.2341814906552578, "grad_norm": 1.498521327972412, "learning_rate": 9.669962883902118e-05, "loss": 2.2208, "step": 1300 }, { "epoch": 0.2343616302634542, "grad_norm": 1.418279767036438, "learning_rate": 9.669456716066052e-05, "loss": 2.3285, "step": 1301 }, { "epoch": 0.23454176987165054, "grad_norm": 1.287266731262207, "learning_rate": 9.668950173648443e-05, "loss": 2.114, "step": 1302 }, { "epoch": 0.2347219094798469, "grad_norm": 1.5345252752304077, "learning_rate": 9.668443256689924e-05, "loss": 2.0366, "step": 1303 }, { "epoch": 0.23490204908804324, "grad_norm": 1.3359532356262207, "learning_rate": 9.667935965231161e-05, "loss": 2.1441, "step": 1304 }, { "epoch": 0.2350821886962396, "grad_norm": 1.2325955629348755, "learning_rate": 9.667428299312849e-05, "loss": 2.0052, "step": 1305 }, { "epoch": 0.23526232830443594, "grad_norm": 1.2899880409240723, "learning_rate": 9.666920258975712e-05, "loss": 1.7652, "step": 1306 }, { "epoch": 0.2354424679126323, "grad_norm": 1.4716989994049072, "learning_rate": 9.666411844260506e-05, "loss": 2.1174, "step": 1307 }, { "epoch": 0.23562260752082864, "grad_norm": 1.713647484779358, "learning_rate": 9.665903055208014e-05, "loss": 2.3615, "step": 1308 }, { "epoch": 0.235802747129025, "grad_norm": 1.6967394351959229, "learning_rate": 9.665393891859051e-05, "loss": 1.9691, "step": 1309 }, { "epoch": 0.23598288673722134, "grad_norm": 2.2882301807403564, "learning_rate": 9.664884354254463e-05, "loss": 2.504, "step": 1310 }, { "epoch": 0.2361630263454177, "grad_norm": 1.6432644128799438, "learning_rate": 9.664374442435124e-05, "loss": 2.1923, "step": 1311 }, { "epoch": 0.23634316595361404, "grad_norm": 1.2447680234909058, "learning_rate": 9.66386415644194e-05, "loss": 1.7736, "step": 1312 }, { "epoch": 0.2365233055618104, "grad_norm": 1.3229706287384033, "learning_rate": 9.663353496315844e-05, "loss": 1.9034, "step": 1313 }, { "epoch": 0.23670344517000674, "grad_norm": 1.295579195022583, "learning_rate": 9.662842462097802e-05, "loss": 1.9027, "step": 1314 }, { "epoch": 0.23688358477820312, "grad_norm": 1.2668462991714478, "learning_rate": 9.66233105382881e-05, "loss": 1.8819, "step": 1315 }, { "epoch": 0.23706372438639947, "grad_norm": 1.2516711950302124, "learning_rate": 9.661819271549892e-05, "loss": 1.7095, "step": 1316 }, { "epoch": 0.23724386399459582, "grad_norm": 1.3345041275024414, "learning_rate": 9.661307115302101e-05, "loss": 2.007, "step": 1317 }, { "epoch": 0.23742400360279217, "grad_norm": 1.4656058549880981, "learning_rate": 9.660794585126525e-05, "loss": 1.7817, "step": 1318 }, { "epoch": 0.23760414321098852, "grad_norm": 1.3824762105941772, "learning_rate": 9.660281681064276e-05, "loss": 2.1394, "step": 1319 }, { "epoch": 0.23778428281918487, "grad_norm": 1.351065993309021, "learning_rate": 9.659768403156502e-05, "loss": 1.7363, "step": 1320 }, { "epoch": 0.23796442242738122, "grad_norm": 1.3922151327133179, "learning_rate": 9.659254751444376e-05, "loss": 1.9992, "step": 1321 }, { "epoch": 0.23814456203557757, "grad_norm": 1.269749402999878, "learning_rate": 9.658740725969105e-05, "loss": 1.9123, "step": 1322 }, { "epoch": 0.23832470164377392, "grad_norm": 1.2521382570266724, "learning_rate": 9.65822632677192e-05, "loss": 1.833, "step": 1323 }, { "epoch": 0.23850484125197027, "grad_norm": 1.2525455951690674, "learning_rate": 9.657711553894088e-05, "loss": 1.871, "step": 1324 }, { "epoch": 0.23868498086016662, "grad_norm": 1.3701800107955933, "learning_rate": 9.657196407376904e-05, "loss": 2.071, "step": 1325 }, { "epoch": 0.23886512046836297, "grad_norm": 1.2621268033981323, "learning_rate": 9.656680887261693e-05, "loss": 1.9383, "step": 1326 }, { "epoch": 0.23904526007655932, "grad_norm": 1.344577670097351, "learning_rate": 9.65616499358981e-05, "loss": 1.7889, "step": 1327 }, { "epoch": 0.23922539968475567, "grad_norm": 1.2507952451705933, "learning_rate": 9.655648726402637e-05, "loss": 1.9257, "step": 1328 }, { "epoch": 0.23940553929295202, "grad_norm": 1.2649699449539185, "learning_rate": 9.655132085741593e-05, "loss": 1.8827, "step": 1329 }, { "epoch": 0.2395856789011484, "grad_norm": 1.362486481666565, "learning_rate": 9.65461507164812e-05, "loss": 1.5458, "step": 1330 }, { "epoch": 0.23976581850934475, "grad_norm": 1.2885550260543823, "learning_rate": 9.654097684163692e-05, "loss": 1.8555, "step": 1331 }, { "epoch": 0.2399459581175411, "grad_norm": 1.217755675315857, "learning_rate": 9.653579923329816e-05, "loss": 1.718, "step": 1332 }, { "epoch": 0.24012609772573745, "grad_norm": 1.3939847946166992, "learning_rate": 9.653061789188025e-05, "loss": 1.8666, "step": 1333 }, { "epoch": 0.2403062373339338, "grad_norm": 1.2871462106704712, "learning_rate": 9.652543281779884e-05, "loss": 1.7333, "step": 1334 }, { "epoch": 0.24048637694213015, "grad_norm": 1.3200628757476807, "learning_rate": 9.652024401146986e-05, "loss": 1.7965, "step": 1335 }, { "epoch": 0.2406665165503265, "grad_norm": 1.3936206102371216, "learning_rate": 9.651505147330958e-05, "loss": 1.6797, "step": 1336 }, { "epoch": 0.24084665615852285, "grad_norm": 1.3318467140197754, "learning_rate": 9.650985520373452e-05, "loss": 1.7061, "step": 1337 }, { "epoch": 0.2410267957667192, "grad_norm": 1.389526128768921, "learning_rate": 9.650465520316152e-05, "loss": 1.9818, "step": 1338 }, { "epoch": 0.24120693537491555, "grad_norm": 1.3966667652130127, "learning_rate": 9.649945147200774e-05, "loss": 1.714, "step": 1339 }, { "epoch": 0.2413870749831119, "grad_norm": 1.4819436073303223, "learning_rate": 9.649424401069062e-05, "loss": 1.9911, "step": 1340 }, { "epoch": 0.24156721459130825, "grad_norm": 1.4604023694992065, "learning_rate": 9.648903281962789e-05, "loss": 1.6126, "step": 1341 }, { "epoch": 0.2417473541995046, "grad_norm": 1.5306700468063354, "learning_rate": 9.648381789923757e-05, "loss": 1.8541, "step": 1342 }, { "epoch": 0.24192749380770096, "grad_norm": 1.387073278427124, "learning_rate": 9.647859924993804e-05, "loss": 1.662, "step": 1343 }, { "epoch": 0.24210763341589733, "grad_norm": 1.1959772109985352, "learning_rate": 9.647337687214792e-05, "loss": 1.4285, "step": 1344 }, { "epoch": 0.24228777302409368, "grad_norm": 1.3606319427490234, "learning_rate": 9.646815076628612e-05, "loss": 1.7306, "step": 1345 }, { "epoch": 0.24246791263229003, "grad_norm": 1.376552939414978, "learning_rate": 9.646292093277192e-05, "loss": 1.6686, "step": 1346 }, { "epoch": 0.24264805224048638, "grad_norm": 1.4229816198349, "learning_rate": 9.645768737202483e-05, "loss": 1.8728, "step": 1347 }, { "epoch": 0.24282819184868273, "grad_norm": 1.5853666067123413, "learning_rate": 9.645245008446468e-05, "loss": 2.1374, "step": 1348 }, { "epoch": 0.24300833145687908, "grad_norm": 1.2157371044158936, "learning_rate": 9.644720907051163e-05, "loss": 1.2887, "step": 1349 }, { "epoch": 0.24318847106507543, "grad_norm": 1.4207323789596558, "learning_rate": 9.644196433058608e-05, "loss": 1.7471, "step": 1350 }, { "epoch": 0.24336861067327178, "grad_norm": 1.4169080257415771, "learning_rate": 9.643671586510876e-05, "loss": 2.2114, "step": 1351 }, { "epoch": 0.24354875028146813, "grad_norm": 1.8168373107910156, "learning_rate": 9.643146367450073e-05, "loss": 2.3965, "step": 1352 }, { "epoch": 0.24372888988966449, "grad_norm": 1.3238023519515991, "learning_rate": 9.64262077591833e-05, "loss": 2.2433, "step": 1353 }, { "epoch": 0.24390902949786084, "grad_norm": 1.3712788820266724, "learning_rate": 9.642094811957809e-05, "loss": 2.2986, "step": 1354 }, { "epoch": 0.24408916910605719, "grad_norm": 1.4252575635910034, "learning_rate": 9.641568475610703e-05, "loss": 2.3612, "step": 1355 }, { "epoch": 0.24426930871425354, "grad_norm": 1.229488492012024, "learning_rate": 9.641041766919234e-05, "loss": 1.9304, "step": 1356 }, { "epoch": 0.24444944832244989, "grad_norm": 1.4757345914840698, "learning_rate": 9.640514685925656e-05, "loss": 2.0317, "step": 1357 }, { "epoch": 0.24462958793064626, "grad_norm": 1.4099195003509521, "learning_rate": 9.639987232672252e-05, "loss": 2.2138, "step": 1358 }, { "epoch": 0.24480972753884261, "grad_norm": 1.7490659952163696, "learning_rate": 9.63945940720133e-05, "loss": 2.1144, "step": 1359 }, { "epoch": 0.24498986714703896, "grad_norm": 2.476339101791382, "learning_rate": 9.638931209555234e-05, "loss": 2.7539, "step": 1360 }, { "epoch": 0.24517000675523531, "grad_norm": 1.3424603939056396, "learning_rate": 9.638402639776337e-05, "loss": 1.8887, "step": 1361 }, { "epoch": 0.24535014636343166, "grad_norm": 1.328654170036316, "learning_rate": 9.637873697907039e-05, "loss": 1.6789, "step": 1362 }, { "epoch": 0.24553028597162802, "grad_norm": 1.3418844938278198, "learning_rate": 9.637344383989773e-05, "loss": 2.1178, "step": 1363 }, { "epoch": 0.24571042557982437, "grad_norm": 1.2037616968154907, "learning_rate": 9.636814698066998e-05, "loss": 1.7504, "step": 1364 }, { "epoch": 0.24589056518802072, "grad_norm": 1.2933865785598755, "learning_rate": 9.636284640181205e-05, "loss": 1.7595, "step": 1365 }, { "epoch": 0.24607070479621707, "grad_norm": 2.4373340606689453, "learning_rate": 9.635754210374919e-05, "loss": 1.9394, "step": 1366 }, { "epoch": 0.24625084440441342, "grad_norm": 1.310178279876709, "learning_rate": 9.635223408690688e-05, "loss": 1.8679, "step": 1367 }, { "epoch": 0.24643098401260977, "grad_norm": 1.2312427759170532, "learning_rate": 9.634692235171092e-05, "loss": 1.704, "step": 1368 }, { "epoch": 0.24661112362080612, "grad_norm": 1.3104429244995117, "learning_rate": 9.634160689858743e-05, "loss": 1.6453, "step": 1369 }, { "epoch": 0.24679126322900247, "grad_norm": 1.2865937948226929, "learning_rate": 9.63362877279628e-05, "loss": 1.8056, "step": 1370 }, { "epoch": 0.24697140283719882, "grad_norm": 1.2945168018341064, "learning_rate": 9.633096484026375e-05, "loss": 1.9038, "step": 1371 }, { "epoch": 0.2471515424453952, "grad_norm": 1.4774664640426636, "learning_rate": 9.632563823591727e-05, "loss": 1.9085, "step": 1372 }, { "epoch": 0.24733168205359155, "grad_norm": 1.2102530002593994, "learning_rate": 9.632030791535063e-05, "loss": 1.7314, "step": 1373 }, { "epoch": 0.2475118216617879, "grad_norm": 1.210584282875061, "learning_rate": 9.631497387899146e-05, "loss": 1.6257, "step": 1374 }, { "epoch": 0.24769196126998425, "grad_norm": 1.19646418094635, "learning_rate": 9.630963612726766e-05, "loss": 1.7282, "step": 1375 }, { "epoch": 0.2478721008781806, "grad_norm": 1.3051313161849976, "learning_rate": 9.63042946606074e-05, "loss": 1.9294, "step": 1376 }, { "epoch": 0.24805224048637695, "grad_norm": 1.1502336263656616, "learning_rate": 9.629894947943916e-05, "loss": 1.8746, "step": 1377 }, { "epoch": 0.2482323800945733, "grad_norm": 1.210191011428833, "learning_rate": 9.629360058419176e-05, "loss": 1.7942, "step": 1378 }, { "epoch": 0.24841251970276965, "grad_norm": 1.3472601175308228, "learning_rate": 9.628824797529428e-05, "loss": 1.9549, "step": 1379 }, { "epoch": 0.248592659310966, "grad_norm": 1.316145420074463, "learning_rate": 9.628289165317608e-05, "loss": 1.6018, "step": 1380 }, { "epoch": 0.24877279891916235, "grad_norm": 1.2660845518112183, "learning_rate": 9.627753161826685e-05, "loss": 1.738, "step": 1381 }, { "epoch": 0.2489529385273587, "grad_norm": 1.3599636554718018, "learning_rate": 9.627216787099658e-05, "loss": 1.9489, "step": 1382 }, { "epoch": 0.24913307813555505, "grad_norm": 1.2624542713165283, "learning_rate": 9.626680041179555e-05, "loss": 1.7027, "step": 1383 }, { "epoch": 0.2493132177437514, "grad_norm": 1.419079303741455, "learning_rate": 9.626142924109432e-05, "loss": 1.6361, "step": 1384 }, { "epoch": 0.24949335735194775, "grad_norm": 1.394989013671875, "learning_rate": 9.625605435932377e-05, "loss": 1.8748, "step": 1385 }, { "epoch": 0.2496734969601441, "grad_norm": 1.3571964502334595, "learning_rate": 9.625067576691505e-05, "loss": 1.6741, "step": 1386 }, { "epoch": 0.24985363656834048, "grad_norm": 1.4658979177474976, "learning_rate": 9.624529346429967e-05, "loss": 1.8919, "step": 1387 }, { "epoch": 0.2500337761765368, "grad_norm": 1.3335614204406738, "learning_rate": 9.623990745190938e-05, "loss": 1.8162, "step": 1388 }, { "epoch": 0.2502139157847332, "grad_norm": 1.2806657552719116, "learning_rate": 9.623451773017622e-05, "loss": 1.7118, "step": 1389 }, { "epoch": 0.2503940553929295, "grad_norm": 1.41310453414917, "learning_rate": 9.622912429953257e-05, "loss": 1.9767, "step": 1390 }, { "epoch": 0.2505741950011259, "grad_norm": 1.4300659894943237, "learning_rate": 9.62237271604111e-05, "loss": 1.9149, "step": 1391 }, { "epoch": 0.2507543346093222, "grad_norm": 1.3532480001449585, "learning_rate": 9.621832631324474e-05, "loss": 1.7847, "step": 1392 }, { "epoch": 0.2509344742175186, "grad_norm": 1.4936203956604004, "learning_rate": 9.621292175846675e-05, "loss": 2.0031, "step": 1393 }, { "epoch": 0.2511146138257149, "grad_norm": 1.3195195198059082, "learning_rate": 9.620751349651069e-05, "loss": 1.6065, "step": 1394 }, { "epoch": 0.2512947534339113, "grad_norm": 1.4474780559539795, "learning_rate": 9.620210152781042e-05, "loss": 1.9245, "step": 1395 }, { "epoch": 0.25147489304210763, "grad_norm": 1.5137771368026733, "learning_rate": 9.619668585280005e-05, "loss": 2.1028, "step": 1396 }, { "epoch": 0.251655032650304, "grad_norm": 1.3129740953445435, "learning_rate": 9.619126647191405e-05, "loss": 1.7898, "step": 1397 }, { "epoch": 0.25183517225850033, "grad_norm": 1.4312471151351929, "learning_rate": 9.618584338558716e-05, "loss": 2.0074, "step": 1398 }, { "epoch": 0.2520153118666967, "grad_norm": 1.2129664421081543, "learning_rate": 9.618041659425439e-05, "loss": 1.6612, "step": 1399 }, { "epoch": 0.25219545147489303, "grad_norm": 1.4841365814208984, "learning_rate": 9.617498609835112e-05, "loss": 1.9451, "step": 1400 }, { "epoch": 0.2523755910830894, "grad_norm": 1.3020541667938232, "learning_rate": 9.616955189831293e-05, "loss": 2.1399, "step": 1401 }, { "epoch": 0.25255573069128573, "grad_norm": 1.1937135457992554, "learning_rate": 9.616411399457579e-05, "loss": 2.135, "step": 1402 }, { "epoch": 0.2527358702994821, "grad_norm": 1.26156485080719, "learning_rate": 9.615867238757593e-05, "loss": 2.1286, "step": 1403 }, { "epoch": 0.25291600990767843, "grad_norm": 1.1347700357437134, "learning_rate": 9.615322707774982e-05, "loss": 2.0889, "step": 1404 }, { "epoch": 0.2530961495158748, "grad_norm": 1.3031673431396484, "learning_rate": 9.614777806553432e-05, "loss": 1.6973, "step": 1405 }, { "epoch": 0.25327628912407113, "grad_norm": 1.3179755210876465, "learning_rate": 9.614232535136656e-05, "loss": 1.8961, "step": 1406 }, { "epoch": 0.2534564287322675, "grad_norm": 1.5497852563858032, "learning_rate": 9.613686893568392e-05, "loss": 2.3143, "step": 1407 }, { "epoch": 0.2536365683404639, "grad_norm": 1.6290380954742432, "learning_rate": 9.613140881892413e-05, "loss": 2.3828, "step": 1408 }, { "epoch": 0.25381670794866024, "grad_norm": 1.9030406475067139, "learning_rate": 9.61259450015252e-05, "loss": 2.4538, "step": 1409 }, { "epoch": 0.2539968475568566, "grad_norm": 2.0345888137817383, "learning_rate": 9.612047748392543e-05, "loss": 2.5833, "step": 1410 }, { "epoch": 0.25417698716505294, "grad_norm": 1.375148892402649, "learning_rate": 9.61150062665634e-05, "loss": 1.782, "step": 1411 }, { "epoch": 0.2543571267732493, "grad_norm": 1.2778311967849731, "learning_rate": 9.610953134987806e-05, "loss": 1.7237, "step": 1412 }, { "epoch": 0.25453726638144564, "grad_norm": 1.3053090572357178, "learning_rate": 9.610405273430855e-05, "loss": 1.9499, "step": 1413 }, { "epoch": 0.254717405989642, "grad_norm": 1.39927339553833, "learning_rate": 9.60985704202944e-05, "loss": 1.8895, "step": 1414 }, { "epoch": 0.25489754559783834, "grad_norm": 1.3640800714492798, "learning_rate": 9.609308440827538e-05, "loss": 1.936, "step": 1415 }, { "epoch": 0.2550776852060347, "grad_norm": 1.265154480934143, "learning_rate": 9.608759469869158e-05, "loss": 1.7464, "step": 1416 }, { "epoch": 0.25525782481423104, "grad_norm": 1.2529346942901611, "learning_rate": 9.608210129198338e-05, "loss": 1.6161, "step": 1417 }, { "epoch": 0.2554379644224274, "grad_norm": 1.263816237449646, "learning_rate": 9.607660418859146e-05, "loss": 1.851, "step": 1418 }, { "epoch": 0.25561810403062374, "grad_norm": 1.346397876739502, "learning_rate": 9.60711033889568e-05, "loss": 1.6665, "step": 1419 }, { "epoch": 0.2557982436388201, "grad_norm": 1.2287291288375854, "learning_rate": 9.606559889352064e-05, "loss": 1.7784, "step": 1420 }, { "epoch": 0.25597838324701644, "grad_norm": 1.246948480606079, "learning_rate": 9.606009070272461e-05, "loss": 1.6907, "step": 1421 }, { "epoch": 0.2561585228552128, "grad_norm": 1.3155512809753418, "learning_rate": 9.605457881701051e-05, "loss": 1.6913, "step": 1422 }, { "epoch": 0.25633866246340914, "grad_norm": 1.2273707389831543, "learning_rate": 9.604906323682057e-05, "loss": 1.7287, "step": 1423 }, { "epoch": 0.2565188020716055, "grad_norm": 1.3157439231872559, "learning_rate": 9.604354396259718e-05, "loss": 1.7937, "step": 1424 }, { "epoch": 0.25669894167980184, "grad_norm": 1.4950356483459473, "learning_rate": 9.603802099478312e-05, "loss": 2.0248, "step": 1425 }, { "epoch": 0.2568790812879982, "grad_norm": 1.3335496187210083, "learning_rate": 9.603249433382144e-05, "loss": 1.831, "step": 1426 }, { "epoch": 0.25705922089619454, "grad_norm": 1.334462285041809, "learning_rate": 9.60269639801555e-05, "loss": 1.8213, "step": 1427 }, { "epoch": 0.2572393605043909, "grad_norm": 1.2646679878234863, "learning_rate": 9.602142993422892e-05, "loss": 1.7785, "step": 1428 }, { "epoch": 0.25741950011258724, "grad_norm": 1.2021260261535645, "learning_rate": 9.601589219648563e-05, "loss": 1.6934, "step": 1429 }, { "epoch": 0.2575996397207836, "grad_norm": 1.4244736433029175, "learning_rate": 9.60103507673699e-05, "loss": 1.7952, "step": 1430 }, { "epoch": 0.25777977932897994, "grad_norm": 1.3532769680023193, "learning_rate": 9.600480564732623e-05, "loss": 2.0208, "step": 1431 }, { "epoch": 0.2579599189371763, "grad_norm": 1.259838581085205, "learning_rate": 9.599925683679948e-05, "loss": 1.7265, "step": 1432 }, { "epoch": 0.25814005854537264, "grad_norm": 1.320878028869629, "learning_rate": 9.599370433623474e-05, "loss": 1.7461, "step": 1433 }, { "epoch": 0.258320198153569, "grad_norm": 1.3004517555236816, "learning_rate": 9.598814814607744e-05, "loss": 1.7427, "step": 1434 }, { "epoch": 0.25850033776176534, "grad_norm": 1.3136227130889893, "learning_rate": 9.598258826677328e-05, "loss": 1.8261, "step": 1435 }, { "epoch": 0.25868047736996175, "grad_norm": 1.2894428968429565, "learning_rate": 9.597702469876832e-05, "loss": 1.9278, "step": 1436 }, { "epoch": 0.2588606169781581, "grad_norm": 1.3688976764678955, "learning_rate": 9.597145744250882e-05, "loss": 1.6501, "step": 1437 }, { "epoch": 0.25904075658635445, "grad_norm": 1.3156013488769531, "learning_rate": 9.596588649844138e-05, "loss": 1.7856, "step": 1438 }, { "epoch": 0.2592208961945508, "grad_norm": 1.3687058687210083, "learning_rate": 9.596031186701294e-05, "loss": 1.7681, "step": 1439 }, { "epoch": 0.25940103580274715, "grad_norm": 1.341892957687378, "learning_rate": 9.595473354867066e-05, "loss": 2.0313, "step": 1440 }, { "epoch": 0.2595811754109435, "grad_norm": 1.2842971086502075, "learning_rate": 9.594915154386201e-05, "loss": 1.8891, "step": 1441 }, { "epoch": 0.25976131501913985, "grad_norm": 1.5312058925628662, "learning_rate": 9.594356585303484e-05, "loss": 1.9411, "step": 1442 }, { "epoch": 0.2599414546273362, "grad_norm": 1.495235800743103, "learning_rate": 9.593797647663718e-05, "loss": 1.8542, "step": 1443 }, { "epoch": 0.26012159423553255, "grad_norm": 1.4159555435180664, "learning_rate": 9.593238341511743e-05, "loss": 1.5517, "step": 1444 }, { "epoch": 0.2603017338437289, "grad_norm": 1.3420803546905518, "learning_rate": 9.592678666892425e-05, "loss": 2.0079, "step": 1445 }, { "epoch": 0.26048187345192525, "grad_norm": 1.3957349061965942, "learning_rate": 9.592118623850661e-05, "loss": 1.8886, "step": 1446 }, { "epoch": 0.2606620130601216, "grad_norm": 1.4691698551177979, "learning_rate": 9.591558212431378e-05, "loss": 2.0943, "step": 1447 }, { "epoch": 0.26084215266831795, "grad_norm": 1.4037094116210938, "learning_rate": 9.590997432679532e-05, "loss": 1.6345, "step": 1448 }, { "epoch": 0.2610222922765143, "grad_norm": 1.4972895383834839, "learning_rate": 9.590436284640107e-05, "loss": 2.0783, "step": 1449 }, { "epoch": 0.26120243188471065, "grad_norm": 1.3901547193527222, "learning_rate": 9.589874768358121e-05, "loss": 1.767, "step": 1450 }, { "epoch": 0.261382571492907, "grad_norm": 1.3561162948608398, "learning_rate": 9.589312883878615e-05, "loss": 2.0377, "step": 1451 }, { "epoch": 0.26156271110110335, "grad_norm": 1.2409238815307617, "learning_rate": 9.588750631246667e-05, "loss": 2.2851, "step": 1452 }, { "epoch": 0.2617428507092997, "grad_norm": 1.6290526390075684, "learning_rate": 9.588188010507378e-05, "loss": 2.5258, "step": 1453 }, { "epoch": 0.26192299031749605, "grad_norm": 1.3076307773590088, "learning_rate": 9.58762502170588e-05, "loss": 2.4793, "step": 1454 }, { "epoch": 0.2621031299256924, "grad_norm": 1.3256698846817017, "learning_rate": 9.587061664887338e-05, "loss": 2.3607, "step": 1455 }, { "epoch": 0.26228326953388875, "grad_norm": 1.5011816024780273, "learning_rate": 9.586497940096944e-05, "loss": 2.2197, "step": 1456 }, { "epoch": 0.2624634091420851, "grad_norm": 1.44423246383667, "learning_rate": 9.585933847379919e-05, "loss": 2.1764, "step": 1457 }, { "epoch": 0.26264354875028145, "grad_norm": 1.5568348169326782, "learning_rate": 9.585369386781514e-05, "loss": 2.3642, "step": 1458 }, { "epoch": 0.2628236883584778, "grad_norm": 1.6485800743103027, "learning_rate": 9.58480455834701e-05, "loss": 2.1984, "step": 1459 }, { "epoch": 0.26300382796667415, "grad_norm": 2.164614200592041, "learning_rate": 9.584239362121719e-05, "loss": 2.6011, "step": 1460 }, { "epoch": 0.2631839675748705, "grad_norm": 1.3199414014816284, "learning_rate": 9.583673798150979e-05, "loss": 2.0123, "step": 1461 }, { "epoch": 0.26336410718306685, "grad_norm": 1.286777138710022, "learning_rate": 9.583107866480159e-05, "loss": 1.901, "step": 1462 }, { "epoch": 0.2635442467912632, "grad_norm": 1.2382673025131226, "learning_rate": 9.582541567154657e-05, "loss": 1.653, "step": 1463 }, { "epoch": 0.26372438639945955, "grad_norm": 1.1522717475891113, "learning_rate": 9.581974900219905e-05, "loss": 1.4788, "step": 1464 }, { "epoch": 0.26390452600765596, "grad_norm": 1.2540417909622192, "learning_rate": 9.581407865721357e-05, "loss": 1.7519, "step": 1465 }, { "epoch": 0.2640846656158523, "grad_norm": 1.323789119720459, "learning_rate": 9.580840463704501e-05, "loss": 1.9791, "step": 1466 }, { "epoch": 0.26426480522404866, "grad_norm": 1.247562050819397, "learning_rate": 9.580272694214854e-05, "loss": 1.6467, "step": 1467 }, { "epoch": 0.264444944832245, "grad_norm": 1.389323115348816, "learning_rate": 9.579704557297964e-05, "loss": 1.9478, "step": 1468 }, { "epoch": 0.26462508444044136, "grad_norm": 1.429758906364441, "learning_rate": 9.579136052999405e-05, "loss": 2.1681, "step": 1469 }, { "epoch": 0.2648052240486377, "grad_norm": 1.4671255350112915, "learning_rate": 9.578567181364781e-05, "loss": 1.9981, "step": 1470 }, { "epoch": 0.26498536365683406, "grad_norm": 1.3760862350463867, "learning_rate": 9.577997942439729e-05, "loss": 1.976, "step": 1471 }, { "epoch": 0.2651655032650304, "grad_norm": 1.1407766342163086, "learning_rate": 9.577428336269912e-05, "loss": 1.5791, "step": 1472 }, { "epoch": 0.26534564287322676, "grad_norm": 1.1693896055221558, "learning_rate": 9.576858362901023e-05, "loss": 1.7841, "step": 1473 }, { "epoch": 0.2655257824814231, "grad_norm": 1.250060796737671, "learning_rate": 9.576288022378784e-05, "loss": 1.7979, "step": 1474 }, { "epoch": 0.26570592208961946, "grad_norm": 1.1622016429901123, "learning_rate": 9.575717314748951e-05, "loss": 1.8498, "step": 1475 }, { "epoch": 0.2658860616978158, "grad_norm": 1.2893506288528442, "learning_rate": 9.575146240057302e-05, "loss": 1.8537, "step": 1476 }, { "epoch": 0.26606620130601216, "grad_norm": 1.386186957359314, "learning_rate": 9.574574798349651e-05, "loss": 1.8894, "step": 1477 }, { "epoch": 0.2662463409142085, "grad_norm": 1.2083815336227417, "learning_rate": 9.57400298967184e-05, "loss": 1.7881, "step": 1478 }, { "epoch": 0.26642648052240486, "grad_norm": 1.4141194820404053, "learning_rate": 9.573430814069734e-05, "loss": 1.7631, "step": 1479 }, { "epoch": 0.2666066201306012, "grad_norm": 1.270307183265686, "learning_rate": 9.572858271589237e-05, "loss": 2.0846, "step": 1480 }, { "epoch": 0.26678675973879756, "grad_norm": 1.4181755781173706, "learning_rate": 9.572285362276277e-05, "loss": 1.975, "step": 1481 }, { "epoch": 0.2669668993469939, "grad_norm": 1.3155264854431152, "learning_rate": 9.571712086176812e-05, "loss": 1.9158, "step": 1482 }, { "epoch": 0.26714703895519026, "grad_norm": 1.1629223823547363, "learning_rate": 9.57113844333683e-05, "loss": 1.6506, "step": 1483 }, { "epoch": 0.2673271785633866, "grad_norm": 1.3701138496398926, "learning_rate": 9.570564433802348e-05, "loss": 1.6617, "step": 1484 }, { "epoch": 0.26750731817158296, "grad_norm": 1.2416373491287231, "learning_rate": 9.569990057619414e-05, "loss": 1.962, "step": 1485 }, { "epoch": 0.2676874577797793, "grad_norm": 1.343294620513916, "learning_rate": 9.569415314834104e-05, "loss": 1.7463, "step": 1486 }, { "epoch": 0.26786759738797566, "grad_norm": 1.3468525409698486, "learning_rate": 9.568840205492522e-05, "loss": 1.8854, "step": 1487 }, { "epoch": 0.268047736996172, "grad_norm": 1.349214792251587, "learning_rate": 9.568264729640805e-05, "loss": 1.7979, "step": 1488 }, { "epoch": 0.26822787660436836, "grad_norm": 1.4037396907806396, "learning_rate": 9.567688887325116e-05, "loss": 1.9524, "step": 1489 }, { "epoch": 0.2684080162125647, "grad_norm": 1.317078948020935, "learning_rate": 9.567112678591651e-05, "loss": 1.7042, "step": 1490 }, { "epoch": 0.26858815582076107, "grad_norm": 1.3170051574707031, "learning_rate": 9.56653610348663e-05, "loss": 1.4587, "step": 1491 }, { "epoch": 0.2687682954289574, "grad_norm": 1.2764688730239868, "learning_rate": 9.565959162056307e-05, "loss": 1.6758, "step": 1492 }, { "epoch": 0.2689484350371538, "grad_norm": 1.2370458841323853, "learning_rate": 9.565381854346965e-05, "loss": 1.456, "step": 1493 }, { "epoch": 0.26912857464535017, "grad_norm": 1.325675368309021, "learning_rate": 9.564804180404914e-05, "loss": 1.5876, "step": 1494 }, { "epoch": 0.2693087142535465, "grad_norm": 1.4240556955337524, "learning_rate": 9.564226140276493e-05, "loss": 1.6717, "step": 1495 }, { "epoch": 0.26948885386174287, "grad_norm": 1.353955864906311, "learning_rate": 9.563647734008077e-05, "loss": 1.725, "step": 1496 }, { "epoch": 0.2696689934699392, "grad_norm": 1.3728160858154297, "learning_rate": 9.56306896164606e-05, "loss": 1.7114, "step": 1497 }, { "epoch": 0.2698491330781356, "grad_norm": 1.401940941810608, "learning_rate": 9.562489823236877e-05, "loss": 1.8394, "step": 1498 }, { "epoch": 0.2700292726863319, "grad_norm": 1.4119722843170166, "learning_rate": 9.561910318826981e-05, "loss": 1.5947, "step": 1499 }, { "epoch": 0.2702094122945283, "grad_norm": 1.5551724433898926, "learning_rate": 9.56133044846286e-05, "loss": 2.0708, "step": 1500 }, { "epoch": 0.2703895519027246, "grad_norm": 1.380489706993103, "learning_rate": 9.560750212191033e-05, "loss": 2.3114, "step": 1501 }, { "epoch": 0.270569691510921, "grad_norm": 1.2792739868164062, "learning_rate": 9.560169610058045e-05, "loss": 2.0318, "step": 1502 }, { "epoch": 0.2707498311191173, "grad_norm": 2.988199472427368, "learning_rate": 9.559588642110473e-05, "loss": 2.5278, "step": 1503 }, { "epoch": 0.2709299707273137, "grad_norm": 1.3033597469329834, "learning_rate": 9.559007308394921e-05, "loss": 2.0703, "step": 1504 }, { "epoch": 0.27111011033551, "grad_norm": 1.2994680404663086, "learning_rate": 9.558425608958023e-05, "loss": 2.2384, "step": 1505 }, { "epoch": 0.2712902499437064, "grad_norm": 1.3633158206939697, "learning_rate": 9.557843543846443e-05, "loss": 2.1388, "step": 1506 }, { "epoch": 0.2714703895519027, "grad_norm": 1.3668885231018066, "learning_rate": 9.557261113106875e-05, "loss": 1.9701, "step": 1507 }, { "epoch": 0.2716505291600991, "grad_norm": 1.5243639945983887, "learning_rate": 9.556678316786038e-05, "loss": 2.4586, "step": 1508 }, { "epoch": 0.2718306687682954, "grad_norm": 1.534784197807312, "learning_rate": 9.556095154930688e-05, "loss": 2.1406, "step": 1509 }, { "epoch": 0.2720108083764918, "grad_norm": 1.9293186664581299, "learning_rate": 9.555511627587603e-05, "loss": 2.6017, "step": 1510 }, { "epoch": 0.2721909479846881, "grad_norm": 1.9219658374786377, "learning_rate": 9.554927734803594e-05, "loss": 2.334, "step": 1511 }, { "epoch": 0.2723710875928845, "grad_norm": 1.1688010692596436, "learning_rate": 9.5543434766255e-05, "loss": 1.7778, "step": 1512 }, { "epoch": 0.2725512272010808, "grad_norm": 1.214694619178772, "learning_rate": 9.553758853100192e-05, "loss": 1.7873, "step": 1513 }, { "epoch": 0.2727313668092772, "grad_norm": 1.2433154582977295, "learning_rate": 9.553173864274567e-05, "loss": 1.8045, "step": 1514 }, { "epoch": 0.2729115064174735, "grad_norm": 1.2065643072128296, "learning_rate": 9.552588510195552e-05, "loss": 1.7442, "step": 1515 }, { "epoch": 0.2730916460256699, "grad_norm": 1.1479398012161255, "learning_rate": 9.552002790910102e-05, "loss": 1.7399, "step": 1516 }, { "epoch": 0.2732717856338662, "grad_norm": 1.2715672254562378, "learning_rate": 9.551416706465208e-05, "loss": 1.6718, "step": 1517 }, { "epoch": 0.2734519252420626, "grad_norm": 1.1501915454864502, "learning_rate": 9.550830256907884e-05, "loss": 1.6586, "step": 1518 }, { "epoch": 0.2736320648502589, "grad_norm": 1.1713447570800781, "learning_rate": 9.550243442285169e-05, "loss": 1.6865, "step": 1519 }, { "epoch": 0.2738122044584553, "grad_norm": 1.2487322092056274, "learning_rate": 9.549656262644144e-05, "loss": 1.8284, "step": 1520 }, { "epoch": 0.2739923440666516, "grad_norm": 1.3894429206848145, "learning_rate": 9.54906871803191e-05, "loss": 2.1056, "step": 1521 }, { "epoch": 0.27417248367484803, "grad_norm": 1.2422899007797241, "learning_rate": 9.548480808495599e-05, "loss": 1.7347, "step": 1522 }, { "epoch": 0.2743526232830444, "grad_norm": 1.3186671733856201, "learning_rate": 9.547892534082372e-05, "loss": 1.9204, "step": 1523 }, { "epoch": 0.27453276289124073, "grad_norm": 1.209747076034546, "learning_rate": 9.547303894839422e-05, "loss": 1.572, "step": 1524 }, { "epoch": 0.2747129024994371, "grad_norm": 1.217474341392517, "learning_rate": 9.546714890813968e-05, "loss": 1.7755, "step": 1525 }, { "epoch": 0.27489304210763343, "grad_norm": 1.2285915613174438, "learning_rate": 9.546125522053261e-05, "loss": 1.7449, "step": 1526 }, { "epoch": 0.2750731817158298, "grad_norm": 1.1651476621627808, "learning_rate": 9.545535788604578e-05, "loss": 1.4557, "step": 1527 }, { "epoch": 0.27525332132402613, "grad_norm": 1.236716866493225, "learning_rate": 9.544945690515229e-05, "loss": 1.9389, "step": 1528 }, { "epoch": 0.2754334609322225, "grad_norm": 1.240716814994812, "learning_rate": 9.54435522783255e-05, "loss": 1.8538, "step": 1529 }, { "epoch": 0.27561360054041883, "grad_norm": 1.2599238157272339, "learning_rate": 9.543764400603908e-05, "loss": 1.8716, "step": 1530 }, { "epoch": 0.2757937401486152, "grad_norm": 1.4449176788330078, "learning_rate": 9.543173208876698e-05, "loss": 1.9514, "step": 1531 }, { "epoch": 0.27597387975681154, "grad_norm": 1.2154200077056885, "learning_rate": 9.542581652698348e-05, "loss": 1.5626, "step": 1532 }, { "epoch": 0.2761540193650079, "grad_norm": 1.3090604543685913, "learning_rate": 9.54198973211631e-05, "loss": 1.7569, "step": 1533 }, { "epoch": 0.27633415897320424, "grad_norm": 1.328647255897522, "learning_rate": 9.541397447178067e-05, "loss": 1.6839, "step": 1534 }, { "epoch": 0.2765142985814006, "grad_norm": 1.2050557136535645, "learning_rate": 9.540804797931134e-05, "loss": 1.7401, "step": 1535 }, { "epoch": 0.27669443818959694, "grad_norm": 1.3914463520050049, "learning_rate": 9.54021178442305e-05, "loss": 1.8738, "step": 1536 }, { "epoch": 0.2768745777977933, "grad_norm": 1.3764175176620483, "learning_rate": 9.539618406701389e-05, "loss": 1.654, "step": 1537 }, { "epoch": 0.27705471740598964, "grad_norm": 1.413156270980835, "learning_rate": 9.539024664813752e-05, "loss": 1.9631, "step": 1538 }, { "epoch": 0.277234857014186, "grad_norm": 1.3338452577590942, "learning_rate": 9.538430558807766e-05, "loss": 1.7974, "step": 1539 }, { "epoch": 0.27741499662238234, "grad_norm": 1.2033109664916992, "learning_rate": 9.537836088731091e-05, "loss": 1.5907, "step": 1540 }, { "epoch": 0.2775951362305787, "grad_norm": 1.3737608194351196, "learning_rate": 9.537241254631416e-05, "loss": 1.8458, "step": 1541 }, { "epoch": 0.27777527583877504, "grad_norm": 1.5115687847137451, "learning_rate": 9.536646056556458e-05, "loss": 1.9156, "step": 1542 }, { "epoch": 0.2779554154469714, "grad_norm": 1.53287935256958, "learning_rate": 9.536050494553961e-05, "loss": 1.9097, "step": 1543 }, { "epoch": 0.27813555505516774, "grad_norm": 1.5483609437942505, "learning_rate": 9.535454568671704e-05, "loss": 2.058, "step": 1544 }, { "epoch": 0.2783156946633641, "grad_norm": 1.3104422092437744, "learning_rate": 9.534858278957491e-05, "loss": 1.6419, "step": 1545 }, { "epoch": 0.27849583427156044, "grad_norm": 1.3753620386123657, "learning_rate": 9.534261625459156e-05, "loss": 1.581, "step": 1546 }, { "epoch": 0.2786759738797568, "grad_norm": 1.4918774366378784, "learning_rate": 9.53366460822456e-05, "loss": 1.8023, "step": 1547 }, { "epoch": 0.27885611348795314, "grad_norm": 1.5932422876358032, "learning_rate": 9.5330672273016e-05, "loss": 2.1818, "step": 1548 }, { "epoch": 0.2790362530961495, "grad_norm": 1.3778334856033325, "learning_rate": 9.532469482738193e-05, "loss": 1.9875, "step": 1549 }, { "epoch": 0.2792163927043459, "grad_norm": 1.2964246273040771, "learning_rate": 9.531871374582293e-05, "loss": 1.5889, "step": 1550 }, { "epoch": 0.27939653231254225, "grad_norm": 1.4452760219573975, "learning_rate": 9.531272902881878e-05, "loss": 2.2888, "step": 1551 }, { "epoch": 0.2795766719207386, "grad_norm": 1.2659543752670288, "learning_rate": 9.530674067684958e-05, "loss": 1.8517, "step": 1552 }, { "epoch": 0.27975681152893495, "grad_norm": 1.2677006721496582, "learning_rate": 9.530074869039572e-05, "loss": 1.9753, "step": 1553 }, { "epoch": 0.2799369511371313, "grad_norm": 1.2493791580200195, "learning_rate": 9.529475306993784e-05, "loss": 2.1794, "step": 1554 }, { "epoch": 0.28011709074532765, "grad_norm": 1.2145565748214722, "learning_rate": 9.528875381595696e-05, "loss": 1.9815, "step": 1555 }, { "epoch": 0.280297230353524, "grad_norm": 1.2580119371414185, "learning_rate": 9.528275092893428e-05, "loss": 1.8955, "step": 1556 }, { "epoch": 0.28047736996172035, "grad_norm": 1.4347100257873535, "learning_rate": 9.527674440935141e-05, "loss": 2.1757, "step": 1557 }, { "epoch": 0.2806575095699167, "grad_norm": 1.522442102432251, "learning_rate": 9.527073425769013e-05, "loss": 2.2382, "step": 1558 }, { "epoch": 0.28083764917811305, "grad_norm": 1.6864714622497559, "learning_rate": 9.52647204744326e-05, "loss": 2.3198, "step": 1559 }, { "epoch": 0.2810177887863094, "grad_norm": 1.8145338296890259, "learning_rate": 9.525870306006126e-05, "loss": 2.525, "step": 1560 }, { "epoch": 0.28119792839450575, "grad_norm": 2.089353561401367, "learning_rate": 9.52526820150588e-05, "loss": 2.6572, "step": 1561 }, { "epoch": 0.2813780680027021, "grad_norm": 1.4616459608078003, "learning_rate": 9.524665733990822e-05, "loss": 1.9956, "step": 1562 }, { "epoch": 0.28155820761089845, "grad_norm": 1.352818489074707, "learning_rate": 9.524062903509283e-05, "loss": 1.8813, "step": 1563 }, { "epoch": 0.2817383472190948, "grad_norm": 1.2800897359848022, "learning_rate": 9.523459710109622e-05, "loss": 1.9067, "step": 1564 }, { "epoch": 0.28191848682729115, "grad_norm": 1.2382099628448486, "learning_rate": 9.522856153840226e-05, "loss": 1.6283, "step": 1565 }, { "epoch": 0.2820986264354875, "grad_norm": 1.1803381443023682, "learning_rate": 9.522252234749512e-05, "loss": 1.6379, "step": 1566 }, { "epoch": 0.28227876604368385, "grad_norm": 1.1737301349639893, "learning_rate": 9.521647952885927e-05, "loss": 1.7575, "step": 1567 }, { "epoch": 0.2824589056518802, "grad_norm": 1.4332079887390137, "learning_rate": 9.521043308297945e-05, "loss": 1.7779, "step": 1568 }, { "epoch": 0.28263904526007655, "grad_norm": 1.3486615419387817, "learning_rate": 9.520438301034072e-05, "loss": 1.974, "step": 1569 }, { "epoch": 0.2828191848682729, "grad_norm": 1.2072014808654785, "learning_rate": 9.51983293114284e-05, "loss": 1.5324, "step": 1570 }, { "epoch": 0.28299932447646925, "grad_norm": 1.3316431045532227, "learning_rate": 9.519227198672811e-05, "loss": 1.8893, "step": 1571 }, { "epoch": 0.2831794640846656, "grad_norm": 1.3074681758880615, "learning_rate": 9.518621103672578e-05, "loss": 1.8394, "step": 1572 }, { "epoch": 0.28335960369286195, "grad_norm": 1.3992279767990112, "learning_rate": 9.51801464619076e-05, "loss": 1.9081, "step": 1573 }, { "epoch": 0.2835397433010583, "grad_norm": 1.3060131072998047, "learning_rate": 9.51740782627601e-05, "loss": 1.627, "step": 1574 }, { "epoch": 0.28371988290925465, "grad_norm": 1.4052181243896484, "learning_rate": 9.516800643977003e-05, "loss": 1.952, "step": 1575 }, { "epoch": 0.283900022517451, "grad_norm": 1.1823112964630127, "learning_rate": 9.516193099342448e-05, "loss": 1.612, "step": 1576 }, { "epoch": 0.28408016212564735, "grad_norm": 1.3419382572174072, "learning_rate": 9.515585192421084e-05, "loss": 2.0059, "step": 1577 }, { "epoch": 0.28426030173384376, "grad_norm": 1.2367485761642456, "learning_rate": 9.514976923261674e-05, "loss": 1.7249, "step": 1578 }, { "epoch": 0.2844404413420401, "grad_norm": 1.3635226488113403, "learning_rate": 9.514368291913013e-05, "loss": 1.8586, "step": 1579 }, { "epoch": 0.28462058095023646, "grad_norm": 1.2817710638046265, "learning_rate": 9.513759298423929e-05, "loss": 2.049, "step": 1580 }, { "epoch": 0.2848007205584328, "grad_norm": 1.2486647367477417, "learning_rate": 9.513149942843271e-05, "loss": 1.5934, "step": 1581 }, { "epoch": 0.28498086016662916, "grad_norm": 1.3252346515655518, "learning_rate": 9.512540225219925e-05, "loss": 2.1179, "step": 1582 }, { "epoch": 0.2851609997748255, "grad_norm": 1.2106174230575562, "learning_rate": 9.511930145602798e-05, "loss": 1.9192, "step": 1583 }, { "epoch": 0.28534113938302186, "grad_norm": 1.3011929988861084, "learning_rate": 9.511319704040833e-05, "loss": 1.8247, "step": 1584 }, { "epoch": 0.2855212789912182, "grad_norm": 1.2832356691360474, "learning_rate": 9.510708900582999e-05, "loss": 1.8141, "step": 1585 }, { "epoch": 0.28570141859941456, "grad_norm": 1.4250215291976929, "learning_rate": 9.510097735278294e-05, "loss": 1.8305, "step": 1586 }, { "epoch": 0.2858815582076109, "grad_norm": 1.4618161916732788, "learning_rate": 9.509486208175746e-05, "loss": 2.0115, "step": 1587 }, { "epoch": 0.28606169781580726, "grad_norm": 1.4392646551132202, "learning_rate": 9.50887431932441e-05, "loss": 1.8492, "step": 1588 }, { "epoch": 0.2862418374240036, "grad_norm": 1.4258909225463867, "learning_rate": 9.508262068773372e-05, "loss": 1.7784, "step": 1589 }, { "epoch": 0.28642197703219996, "grad_norm": 1.2192386388778687, "learning_rate": 9.507649456571748e-05, "loss": 1.6844, "step": 1590 }, { "epoch": 0.2866021166403963, "grad_norm": 1.3005166053771973, "learning_rate": 9.507036482768679e-05, "loss": 1.6884, "step": 1591 }, { "epoch": 0.28678225624859266, "grad_norm": 1.3351885080337524, "learning_rate": 9.50642314741334e-05, "loss": 1.6317, "step": 1592 }, { "epoch": 0.286962395856789, "grad_norm": 1.4735064506530762, "learning_rate": 9.505809450554928e-05, "loss": 1.7504, "step": 1593 }, { "epoch": 0.28714253546498536, "grad_norm": 1.5064492225646973, "learning_rate": 9.50519539224268e-05, "loss": 1.8461, "step": 1594 }, { "epoch": 0.2873226750731817, "grad_norm": 1.577451229095459, "learning_rate": 9.50458097252585e-05, "loss": 1.8405, "step": 1595 }, { "epoch": 0.28750281468137806, "grad_norm": 1.4107273817062378, "learning_rate": 9.503966191453731e-05, "loss": 1.543, "step": 1596 }, { "epoch": 0.2876829542895744, "grad_norm": 1.431510329246521, "learning_rate": 9.503351049075635e-05, "loss": 1.6636, "step": 1597 }, { "epoch": 0.28786309389777076, "grad_norm": 1.4518580436706543, "learning_rate": 9.502735545440912e-05, "loss": 1.7394, "step": 1598 }, { "epoch": 0.2880432335059671, "grad_norm": 1.3683980703353882, "learning_rate": 9.502119680598939e-05, "loss": 1.6494, "step": 1599 }, { "epoch": 0.28822337311416346, "grad_norm": 1.3465979099273682, "learning_rate": 9.501503454599116e-05, "loss": 1.7304, "step": 1600 }, { "epoch": 0.2884035127223598, "grad_norm": 2.3761870861053467, "learning_rate": 9.500886867490879e-05, "loss": 2.4285, "step": 1601 }, { "epoch": 0.28858365233055616, "grad_norm": 1.8500492572784424, "learning_rate": 9.50026991932369e-05, "loss": 2.3104, "step": 1602 }, { "epoch": 0.2887637919387525, "grad_norm": 1.3993734121322632, "learning_rate": 9.49965261014704e-05, "loss": 2.1481, "step": 1603 }, { "epoch": 0.28894393154694886, "grad_norm": 1.2521560192108154, "learning_rate": 9.49903494001045e-05, "loss": 1.9191, "step": 1604 }, { "epoch": 0.2891240711551452, "grad_norm": 1.3501262664794922, "learning_rate": 9.498416908963465e-05, "loss": 2.0639, "step": 1605 }, { "epoch": 0.28930421076334156, "grad_norm": 1.3352537155151367, "learning_rate": 9.49779851705567e-05, "loss": 2.2062, "step": 1606 }, { "epoch": 0.28948435037153797, "grad_norm": 1.6244553327560425, "learning_rate": 9.497179764336669e-05, "loss": 2.2306, "step": 1607 }, { "epoch": 0.2896644899797343, "grad_norm": 1.4581143856048584, "learning_rate": 9.496560650856097e-05, "loss": 2.0957, "step": 1608 }, { "epoch": 0.28984462958793067, "grad_norm": 1.7605836391448975, "learning_rate": 9.495941176663619e-05, "loss": 2.2289, "step": 1609 }, { "epoch": 0.290024769196127, "grad_norm": 2.2980453968048096, "learning_rate": 9.495321341808931e-05, "loss": 2.5397, "step": 1610 }, { "epoch": 0.29020490880432337, "grad_norm": 1.374963641166687, "learning_rate": 9.494701146341753e-05, "loss": 1.6998, "step": 1611 }, { "epoch": 0.2903850484125197, "grad_norm": 1.308967113494873, "learning_rate": 9.49408059031184e-05, "loss": 1.8211, "step": 1612 }, { "epoch": 0.29056518802071607, "grad_norm": 1.3360024690628052, "learning_rate": 9.49345967376897e-05, "loss": 2.0419, "step": 1613 }, { "epoch": 0.2907453276289124, "grad_norm": 1.4949694871902466, "learning_rate": 9.492838396762954e-05, "loss": 1.8544, "step": 1614 }, { "epoch": 0.29092546723710877, "grad_norm": 1.2362403869628906, "learning_rate": 9.49221675934363e-05, "loss": 1.7997, "step": 1615 }, { "epoch": 0.2911056068453051, "grad_norm": 1.343570351600647, "learning_rate": 9.491594761560866e-05, "loss": 1.8882, "step": 1616 }, { "epoch": 0.29128574645350147, "grad_norm": 1.3654160499572754, "learning_rate": 9.490972403464557e-05, "loss": 1.8538, "step": 1617 }, { "epoch": 0.2914658860616978, "grad_norm": 1.1694066524505615, "learning_rate": 9.490349685104632e-05, "loss": 1.7157, "step": 1618 }, { "epoch": 0.29164602566989417, "grad_norm": 1.2941679954528809, "learning_rate": 9.489726606531039e-05, "loss": 1.6227, "step": 1619 }, { "epoch": 0.2918261652780905, "grad_norm": 1.3347948789596558, "learning_rate": 9.489103167793767e-05, "loss": 1.6148, "step": 1620 }, { "epoch": 0.29200630488628687, "grad_norm": 1.274156928062439, "learning_rate": 9.488479368942825e-05, "loss": 1.6794, "step": 1621 }, { "epoch": 0.2921864444944832, "grad_norm": 1.1951353549957275, "learning_rate": 9.487855210028252e-05, "loss": 1.7496, "step": 1622 }, { "epoch": 0.29236658410267957, "grad_norm": 1.2419942617416382, "learning_rate": 9.487230691100122e-05, "loss": 1.7391, "step": 1623 }, { "epoch": 0.2925467237108759, "grad_norm": 1.2361663579940796, "learning_rate": 9.48660581220853e-05, "loss": 1.8085, "step": 1624 }, { "epoch": 0.2927268633190723, "grad_norm": 1.2734806537628174, "learning_rate": 9.485980573403607e-05, "loss": 1.7154, "step": 1625 }, { "epoch": 0.2929070029272686, "grad_norm": 1.2362664937973022, "learning_rate": 9.485354974735507e-05, "loss": 1.8179, "step": 1626 }, { "epoch": 0.293087142535465, "grad_norm": 1.2693657875061035, "learning_rate": 9.484729016254416e-05, "loss": 1.751, "step": 1627 }, { "epoch": 0.2932672821436613, "grad_norm": 1.3136745691299438, "learning_rate": 9.484102698010548e-05, "loss": 1.7883, "step": 1628 }, { "epoch": 0.2934474217518577, "grad_norm": 1.2337639331817627, "learning_rate": 9.483476020054145e-05, "loss": 1.6342, "step": 1629 }, { "epoch": 0.293627561360054, "grad_norm": 1.240913987159729, "learning_rate": 9.482848982435479e-05, "loss": 1.8566, "step": 1630 }, { "epoch": 0.2938077009682504, "grad_norm": 1.2837787866592407, "learning_rate": 9.482221585204851e-05, "loss": 1.8245, "step": 1631 }, { "epoch": 0.2939878405764467, "grad_norm": 1.2207119464874268, "learning_rate": 9.481593828412591e-05, "loss": 1.5952, "step": 1632 }, { "epoch": 0.2941679801846431, "grad_norm": 1.2471157312393188, "learning_rate": 9.480965712109058e-05, "loss": 1.6466, "step": 1633 }, { "epoch": 0.2943481197928394, "grad_norm": 1.2496901750564575, "learning_rate": 9.480337236344639e-05, "loss": 1.6434, "step": 1634 }, { "epoch": 0.29452825940103583, "grad_norm": 1.2381601333618164, "learning_rate": 9.479708401169747e-05, "loss": 1.6075, "step": 1635 }, { "epoch": 0.2947083990092322, "grad_norm": 1.3611130714416504, "learning_rate": 9.479079206634829e-05, "loss": 1.9765, "step": 1636 }, { "epoch": 0.29488853861742853, "grad_norm": 1.3782380819320679, "learning_rate": 9.47844965279036e-05, "loss": 1.7475, "step": 1637 }, { "epoch": 0.2950686782256249, "grad_norm": 1.4967594146728516, "learning_rate": 9.47781973968684e-05, "loss": 1.8748, "step": 1638 }, { "epoch": 0.29524881783382123, "grad_norm": 1.3652241230010986, "learning_rate": 9.477189467374803e-05, "loss": 1.8639, "step": 1639 }, { "epoch": 0.2954289574420176, "grad_norm": 1.3141024112701416, "learning_rate": 9.476558835904806e-05, "loss": 2.0563, "step": 1640 }, { "epoch": 0.29560909705021393, "grad_norm": 1.3570927381515503, "learning_rate": 9.475927845327441e-05, "loss": 1.8514, "step": 1641 }, { "epoch": 0.2957892366584103, "grad_norm": 1.3338172435760498, "learning_rate": 9.475296495693325e-05, "loss": 1.7215, "step": 1642 }, { "epoch": 0.29596937626660663, "grad_norm": 1.4011904001235962, "learning_rate": 9.474664787053102e-05, "loss": 1.8308, "step": 1643 }, { "epoch": 0.296149515874803, "grad_norm": 1.2640546560287476, "learning_rate": 9.47403271945745e-05, "loss": 1.4881, "step": 1644 }, { "epoch": 0.29632965548299933, "grad_norm": 1.3274166584014893, "learning_rate": 9.473400292957072e-05, "loss": 1.8128, "step": 1645 }, { "epoch": 0.2965097950911957, "grad_norm": 1.2972867488861084, "learning_rate": 9.472767507602702e-05, "loss": 1.6003, "step": 1646 }, { "epoch": 0.29668993469939203, "grad_norm": 1.3326082229614258, "learning_rate": 9.472134363445102e-05, "loss": 1.7415, "step": 1647 }, { "epoch": 0.2968700743075884, "grad_norm": 1.4534722566604614, "learning_rate": 9.471500860535061e-05, "loss": 1.9167, "step": 1648 }, { "epoch": 0.29705021391578473, "grad_norm": 1.3006587028503418, "learning_rate": 9.470866998923398e-05, "loss": 1.6788, "step": 1649 }, { "epoch": 0.2972303535239811, "grad_norm": 1.4854328632354736, "learning_rate": 9.470232778660964e-05, "loss": 1.8662, "step": 1650 }, { "epoch": 0.29741049313217743, "grad_norm": 1.7819095849990845, "learning_rate": 9.469598199798632e-05, "loss": 2.3337, "step": 1651 }, { "epoch": 0.2975906327403738, "grad_norm": 1.4240829944610596, "learning_rate": 9.468963262387309e-05, "loss": 2.2743, "step": 1652 }, { "epoch": 0.29777077234857013, "grad_norm": 1.9591439962387085, "learning_rate": 9.468327966477932e-05, "loss": 2.3144, "step": 1653 }, { "epoch": 0.2979509119567665, "grad_norm": 1.3398984670639038, "learning_rate": 9.467692312121461e-05, "loss": 2.1823, "step": 1654 }, { "epoch": 0.29813105156496283, "grad_norm": 1.3944469690322876, "learning_rate": 9.467056299368889e-05, "loss": 2.1413, "step": 1655 }, { "epoch": 0.2983111911731592, "grad_norm": 1.399717926979065, "learning_rate": 9.466419928271236e-05, "loss": 2.0515, "step": 1656 }, { "epoch": 0.29849133078135553, "grad_norm": 1.5255688428878784, "learning_rate": 9.465783198879552e-05, "loss": 2.4554, "step": 1657 }, { "epoch": 0.2986714703895519, "grad_norm": 1.4890297651290894, "learning_rate": 9.465146111244915e-05, "loss": 2.1461, "step": 1658 }, { "epoch": 0.29885160999774824, "grad_norm": 1.5976670980453491, "learning_rate": 9.464508665418433e-05, "loss": 2.0651, "step": 1659 }, { "epoch": 0.2990317496059446, "grad_norm": 1.8347368240356445, "learning_rate": 9.463870861451239e-05, "loss": 2.177, "step": 1660 }, { "epoch": 0.29921188921414094, "grad_norm": 1.11990487575531, "learning_rate": 9.4632326993945e-05, "loss": 1.6319, "step": 1661 }, { "epoch": 0.2993920288223373, "grad_norm": 1.2626057863235474, "learning_rate": 9.462594179299406e-05, "loss": 1.7326, "step": 1662 }, { "epoch": 0.29957216843053364, "grad_norm": 1.4078112840652466, "learning_rate": 9.461955301217182e-05, "loss": 1.948, "step": 1663 }, { "epoch": 0.29975230803873004, "grad_norm": 1.2782460451126099, "learning_rate": 9.461316065199077e-05, "loss": 1.7655, "step": 1664 }, { "epoch": 0.2999324476469264, "grad_norm": 1.3490705490112305, "learning_rate": 9.46067647129637e-05, "loss": 1.7881, "step": 1665 }, { "epoch": 0.30011258725512274, "grad_norm": 1.3608444929122925, "learning_rate": 9.460036519560369e-05, "loss": 1.8756, "step": 1666 }, { "epoch": 0.3002927268633191, "grad_norm": 1.2374787330627441, "learning_rate": 9.45939621004241e-05, "loss": 1.5984, "step": 1667 }, { "epoch": 0.30047286647151544, "grad_norm": 1.29380202293396, "learning_rate": 9.458755542793862e-05, "loss": 1.7261, "step": 1668 }, { "epoch": 0.3006530060797118, "grad_norm": 1.2572122812271118, "learning_rate": 9.458114517866115e-05, "loss": 1.698, "step": 1669 }, { "epoch": 0.30083314568790814, "grad_norm": 1.1971521377563477, "learning_rate": 9.45747313531059e-05, "loss": 1.5534, "step": 1670 }, { "epoch": 0.3010132852961045, "grad_norm": 1.2243391275405884, "learning_rate": 9.456831395178745e-05, "loss": 1.8837, "step": 1671 }, { "epoch": 0.30119342490430084, "grad_norm": 1.3460075855255127, "learning_rate": 9.456189297522053e-05, "loss": 1.7708, "step": 1672 }, { "epoch": 0.3013735645124972, "grad_norm": 1.1608455181121826, "learning_rate": 9.455546842392029e-05, "loss": 1.5252, "step": 1673 }, { "epoch": 0.30155370412069354, "grad_norm": 1.2588447332382202, "learning_rate": 9.454904029840206e-05, "loss": 1.7134, "step": 1674 }, { "epoch": 0.3017338437288899, "grad_norm": 1.3225351572036743, "learning_rate": 9.454260859918153e-05, "loss": 1.8432, "step": 1675 }, { "epoch": 0.30191398333708624, "grad_norm": 1.3037313222885132, "learning_rate": 9.453617332677461e-05, "loss": 1.9354, "step": 1676 }, { "epoch": 0.3020941229452826, "grad_norm": 1.3181267976760864, "learning_rate": 9.452973448169758e-05, "loss": 2.0047, "step": 1677 }, { "epoch": 0.30227426255347895, "grad_norm": 1.3081953525543213, "learning_rate": 9.452329206446693e-05, "loss": 1.9375, "step": 1678 }, { "epoch": 0.3024544021616753, "grad_norm": 1.3248902559280396, "learning_rate": 9.451684607559947e-05, "loss": 1.7659, "step": 1679 }, { "epoch": 0.30263454176987165, "grad_norm": 1.4065247774124146, "learning_rate": 9.451039651561231e-05, "loss": 2.1117, "step": 1680 }, { "epoch": 0.302814681378068, "grad_norm": 1.2488824129104614, "learning_rate": 9.450394338502284e-05, "loss": 1.9707, "step": 1681 }, { "epoch": 0.30299482098626435, "grad_norm": 1.3439654111862183, "learning_rate": 9.449748668434868e-05, "loss": 1.976, "step": 1682 }, { "epoch": 0.3031749605944607, "grad_norm": 1.2611677646636963, "learning_rate": 9.449102641410783e-05, "loss": 1.8581, "step": 1683 }, { "epoch": 0.30335510020265705, "grad_norm": 1.3079493045806885, "learning_rate": 9.44845625748185e-05, "loss": 1.8, "step": 1684 }, { "epoch": 0.3035352398108534, "grad_norm": 1.2610013484954834, "learning_rate": 9.447809516699924e-05, "loss": 1.9368, "step": 1685 }, { "epoch": 0.30371537941904975, "grad_norm": 1.4763059616088867, "learning_rate": 9.447162419116884e-05, "loss": 2.096, "step": 1686 }, { "epoch": 0.3038955190272461, "grad_norm": 1.380873680114746, "learning_rate": 9.446514964784641e-05, "loss": 1.8453, "step": 1687 }, { "epoch": 0.30407565863544245, "grad_norm": 1.2984628677368164, "learning_rate": 9.445867153755132e-05, "loss": 1.7129, "step": 1688 }, { "epoch": 0.3042557982436388, "grad_norm": 1.2476941347122192, "learning_rate": 9.445218986080329e-05, "loss": 1.7545, "step": 1689 }, { "epoch": 0.30443593785183515, "grad_norm": 1.246397614479065, "learning_rate": 9.444570461812221e-05, "loss": 1.5413, "step": 1690 }, { "epoch": 0.3046160774600315, "grad_norm": 1.2983617782592773, "learning_rate": 9.443921581002837e-05, "loss": 1.8814, "step": 1691 }, { "epoch": 0.3047962170682279, "grad_norm": 1.3920047283172607, "learning_rate": 9.443272343704227e-05, "loss": 1.768, "step": 1692 }, { "epoch": 0.30497635667642425, "grad_norm": 1.2787226438522339, "learning_rate": 9.442622749968475e-05, "loss": 1.6886, "step": 1693 }, { "epoch": 0.3051564962846206, "grad_norm": 1.3687013387680054, "learning_rate": 9.44197279984769e-05, "loss": 1.8526, "step": 1694 }, { "epoch": 0.30533663589281695, "grad_norm": 1.2241677045822144, "learning_rate": 9.441322493394009e-05, "loss": 1.5742, "step": 1695 }, { "epoch": 0.3055167755010133, "grad_norm": 1.394643783569336, "learning_rate": 9.440671830659602e-05, "loss": 1.8381, "step": 1696 }, { "epoch": 0.30569691510920965, "grad_norm": 1.514260172843933, "learning_rate": 9.440020811696664e-05, "loss": 1.9431, "step": 1697 }, { "epoch": 0.305877054717406, "grad_norm": 1.326114296913147, "learning_rate": 9.43936943655742e-05, "loss": 1.7265, "step": 1698 }, { "epoch": 0.30605719432560236, "grad_norm": 1.2670493125915527, "learning_rate": 9.43871770529412e-05, "loss": 1.4803, "step": 1699 }, { "epoch": 0.3062373339337987, "grad_norm": 1.274014949798584, "learning_rate": 9.438065617959048e-05, "loss": 1.5406, "step": 1700 }, { "epoch": 0.30641747354199506, "grad_norm": 1.5541276931762695, "learning_rate": 9.437413174604515e-05, "loss": 2.3882, "step": 1701 }, { "epoch": 0.3065976131501914, "grad_norm": 1.4880536794662476, "learning_rate": 9.436760375282859e-05, "loss": 2.1364, "step": 1702 }, { "epoch": 0.30677775275838776, "grad_norm": 1.2172539234161377, "learning_rate": 9.436107220046445e-05, "loss": 1.9677, "step": 1703 }, { "epoch": 0.3069578923665841, "grad_norm": 1.3161697387695312, "learning_rate": 9.435453708947672e-05, "loss": 2.2429, "step": 1704 }, { "epoch": 0.30713803197478046, "grad_norm": 1.3544458150863647, "learning_rate": 9.434799842038961e-05, "loss": 2.1697, "step": 1705 }, { "epoch": 0.3073181715829768, "grad_norm": 1.2765963077545166, "learning_rate": 9.43414561937277e-05, "loss": 2.1297, "step": 1706 }, { "epoch": 0.30749831119117316, "grad_norm": 1.4520622491836548, "learning_rate": 9.433491041001575e-05, "loss": 2.2196, "step": 1707 }, { "epoch": 0.3076784507993695, "grad_norm": 1.6171400547027588, "learning_rate": 9.432836106977888e-05, "loss": 2.1238, "step": 1708 }, { "epoch": 0.30785859040756586, "grad_norm": 1.802198886871338, "learning_rate": 9.432180817354249e-05, "loss": 2.0303, "step": 1709 }, { "epoch": 0.3080387300157622, "grad_norm": 1.7998108863830566, "learning_rate": 9.431525172183222e-05, "loss": 1.9403, "step": 1710 }, { "epoch": 0.30821886962395856, "grad_norm": 1.5913798809051514, "learning_rate": 9.430869171517405e-05, "loss": 2.0044, "step": 1711 }, { "epoch": 0.3083990092321549, "grad_norm": 1.3948698043823242, "learning_rate": 9.430212815409421e-05, "loss": 1.7397, "step": 1712 }, { "epoch": 0.30857914884035126, "grad_norm": 1.3813555240631104, "learning_rate": 9.429556103911923e-05, "loss": 1.7361, "step": 1713 }, { "epoch": 0.3087592884485476, "grad_norm": 1.2981288433074951, "learning_rate": 9.428899037077592e-05, "loss": 1.6294, "step": 1714 }, { "epoch": 0.30893942805674396, "grad_norm": 1.3228983879089355, "learning_rate": 9.428241614959137e-05, "loss": 1.8087, "step": 1715 }, { "epoch": 0.3091195676649403, "grad_norm": 1.376579761505127, "learning_rate": 9.427583837609297e-05, "loss": 1.8035, "step": 1716 }, { "epoch": 0.30929970727313666, "grad_norm": 1.0601602792739868, "learning_rate": 9.426925705080837e-05, "loss": 1.4258, "step": 1717 }, { "epoch": 0.309479846881333, "grad_norm": 1.2452287673950195, "learning_rate": 9.426267217426556e-05, "loss": 1.6219, "step": 1718 }, { "epoch": 0.30965998648952936, "grad_norm": 1.1422358751296997, "learning_rate": 9.425608374699273e-05, "loss": 1.6694, "step": 1719 }, { "epoch": 0.3098401260977257, "grad_norm": 1.3704848289489746, "learning_rate": 9.424949176951843e-05, "loss": 1.9035, "step": 1720 }, { "epoch": 0.3100202657059221, "grad_norm": 1.2918815612792969, "learning_rate": 9.424289624237144e-05, "loss": 1.8343, "step": 1721 }, { "epoch": 0.31020040531411847, "grad_norm": 1.3253055810928345, "learning_rate": 9.423629716608087e-05, "loss": 1.853, "step": 1722 }, { "epoch": 0.3103805449223148, "grad_norm": 1.381380319595337, "learning_rate": 9.422969454117607e-05, "loss": 2.0804, "step": 1723 }, { "epoch": 0.31056068453051117, "grad_norm": 1.270503282546997, "learning_rate": 9.422308836818675e-05, "loss": 1.6293, "step": 1724 }, { "epoch": 0.3107408241387075, "grad_norm": 1.3035050630569458, "learning_rate": 9.42164786476428e-05, "loss": 1.8483, "step": 1725 }, { "epoch": 0.31092096374690387, "grad_norm": 1.42676842212677, "learning_rate": 9.420986538007446e-05, "loss": 1.8387, "step": 1726 }, { "epoch": 0.3111011033551002, "grad_norm": 1.2072376012802124, "learning_rate": 9.420324856601227e-05, "loss": 1.9353, "step": 1727 }, { "epoch": 0.31128124296329657, "grad_norm": 1.205321192741394, "learning_rate": 9.4196628205987e-05, "loss": 1.7275, "step": 1728 }, { "epoch": 0.3114613825714929, "grad_norm": 1.257953405380249, "learning_rate": 9.419000430052976e-05, "loss": 1.7968, "step": 1729 }, { "epoch": 0.31164152217968927, "grad_norm": 1.274452805519104, "learning_rate": 9.41833768501719e-05, "loss": 1.8045, "step": 1730 }, { "epoch": 0.3118216617878856, "grad_norm": 1.2059699296951294, "learning_rate": 9.417674585544505e-05, "loss": 1.8465, "step": 1731 }, { "epoch": 0.31200180139608197, "grad_norm": 1.16840660572052, "learning_rate": 9.417011131688117e-05, "loss": 1.4997, "step": 1732 }, { "epoch": 0.3121819410042783, "grad_norm": 1.2271968126296997, "learning_rate": 9.416347323501248e-05, "loss": 1.7907, "step": 1733 }, { "epoch": 0.31236208061247467, "grad_norm": 1.349408745765686, "learning_rate": 9.415683161037148e-05, "loss": 1.7231, "step": 1734 }, { "epoch": 0.312542220220671, "grad_norm": 1.4745874404907227, "learning_rate": 9.415018644349095e-05, "loss": 2.1468, "step": 1735 }, { "epoch": 0.31272235982886737, "grad_norm": 1.3756881952285767, "learning_rate": 9.414353773490398e-05, "loss": 1.7441, "step": 1736 }, { "epoch": 0.3129024994370637, "grad_norm": 1.3152180910110474, "learning_rate": 9.41368854851439e-05, "loss": 1.6037, "step": 1737 }, { "epoch": 0.31308263904526007, "grad_norm": 1.4125536680221558, "learning_rate": 9.413022969474437e-05, "loss": 1.8707, "step": 1738 }, { "epoch": 0.3132627786534564, "grad_norm": 1.378162145614624, "learning_rate": 9.412357036423931e-05, "loss": 1.8994, "step": 1739 }, { "epoch": 0.31344291826165277, "grad_norm": 1.3343230485916138, "learning_rate": 9.411690749416294e-05, "loss": 1.8816, "step": 1740 }, { "epoch": 0.3136230578698491, "grad_norm": 1.3739832639694214, "learning_rate": 9.411024108504972e-05, "loss": 1.7611, "step": 1741 }, { "epoch": 0.31380319747804547, "grad_norm": 1.2668706178665161, "learning_rate": 9.410357113743444e-05, "loss": 1.7515, "step": 1742 }, { "epoch": 0.3139833370862418, "grad_norm": 1.3751226663589478, "learning_rate": 9.409689765185218e-05, "loss": 1.7097, "step": 1743 }, { "epoch": 0.31416347669443817, "grad_norm": 1.5417399406433105, "learning_rate": 9.409022062883828e-05, "loss": 1.9759, "step": 1744 }, { "epoch": 0.3143436163026345, "grad_norm": 1.4709678888320923, "learning_rate": 9.408354006892833e-05, "loss": 1.9505, "step": 1745 }, { "epoch": 0.31452375591083087, "grad_norm": 1.3311420679092407, "learning_rate": 9.407685597265828e-05, "loss": 1.6615, "step": 1746 }, { "epoch": 0.3147038955190272, "grad_norm": 1.3597052097320557, "learning_rate": 9.407016834056432e-05, "loss": 1.6673, "step": 1747 }, { "epoch": 0.31488403512722357, "grad_norm": 1.3709936141967773, "learning_rate": 9.406347717318291e-05, "loss": 1.5944, "step": 1748 }, { "epoch": 0.31506417473542, "grad_norm": 1.5791631937026978, "learning_rate": 9.405678247105083e-05, "loss": 1.9664, "step": 1749 }, { "epoch": 0.3152443143436163, "grad_norm": 1.2927601337432861, "learning_rate": 9.405008423470513e-05, "loss": 1.6538, "step": 1750 }, { "epoch": 0.3154244539518127, "grad_norm": 1.4359164237976074, "learning_rate": 9.404338246468311e-05, "loss": 2.0641, "step": 1751 }, { "epoch": 0.31560459356000903, "grad_norm": 1.5348880290985107, "learning_rate": 9.403667716152242e-05, "loss": 2.4473, "step": 1752 }, { "epoch": 0.3157847331682054, "grad_norm": 1.9629950523376465, "learning_rate": 9.402996832576091e-05, "loss": 2.0529, "step": 1753 }, { "epoch": 0.31596487277640173, "grad_norm": 1.2311313152313232, "learning_rate": 9.402325595793682e-05, "loss": 1.8946, "step": 1754 }, { "epoch": 0.3161450123845981, "grad_norm": 1.2952783107757568, "learning_rate": 9.401654005858855e-05, "loss": 1.957, "step": 1755 }, { "epoch": 0.31632515199279443, "grad_norm": 1.3070710897445679, "learning_rate": 9.40098206282549e-05, "loss": 2.223, "step": 1756 }, { "epoch": 0.3165052916009908, "grad_norm": 1.441473126411438, "learning_rate": 9.400309766747486e-05, "loss": 2.2275, "step": 1757 }, { "epoch": 0.31668543120918713, "grad_norm": 1.5374830961227417, "learning_rate": 9.399637117678776e-05, "loss": 2.2977, "step": 1758 }, { "epoch": 0.3168655708173835, "grad_norm": 1.617869257926941, "learning_rate": 9.398964115673321e-05, "loss": 2.3844, "step": 1759 }, { "epoch": 0.31704571042557983, "grad_norm": 1.7544715404510498, "learning_rate": 9.398290760785105e-05, "loss": 2.1444, "step": 1760 }, { "epoch": 0.3172258500337762, "grad_norm": 1.6874605417251587, "learning_rate": 9.397617053068149e-05, "loss": 2.0249, "step": 1761 }, { "epoch": 0.31740598964197253, "grad_norm": 1.2498189210891724, "learning_rate": 9.396942992576493e-05, "loss": 1.7133, "step": 1762 }, { "epoch": 0.3175861292501689, "grad_norm": 1.309888243675232, "learning_rate": 9.396268579364212e-05, "loss": 1.7252, "step": 1763 }, { "epoch": 0.31776626885836523, "grad_norm": 1.2649097442626953, "learning_rate": 9.395593813485408e-05, "loss": 1.5637, "step": 1764 }, { "epoch": 0.3179464084665616, "grad_norm": 1.283597707748413, "learning_rate": 9.394918694994208e-05, "loss": 1.5893, "step": 1765 }, { "epoch": 0.31812654807475793, "grad_norm": 1.2372602224349976, "learning_rate": 9.394243223944771e-05, "loss": 1.7177, "step": 1766 }, { "epoch": 0.3183066876829543, "grad_norm": 1.2657066583633423, "learning_rate": 9.393567400391285e-05, "loss": 1.6995, "step": 1767 }, { "epoch": 0.31848682729115063, "grad_norm": 1.3661051988601685, "learning_rate": 9.392891224387961e-05, "loss": 1.678, "step": 1768 }, { "epoch": 0.318666966899347, "grad_norm": 1.1339210271835327, "learning_rate": 9.392214695989041e-05, "loss": 1.6526, "step": 1769 }, { "epoch": 0.31884710650754333, "grad_norm": 1.2592577934265137, "learning_rate": 9.391537815248799e-05, "loss": 1.618, "step": 1770 }, { "epoch": 0.3190272461157397, "grad_norm": 1.4403939247131348, "learning_rate": 9.390860582221532e-05, "loss": 1.7451, "step": 1771 }, { "epoch": 0.31920738572393603, "grad_norm": 1.1700098514556885, "learning_rate": 9.390182996961569e-05, "loss": 1.5627, "step": 1772 }, { "epoch": 0.3193875253321324, "grad_norm": 1.1356571912765503, "learning_rate": 9.389505059523263e-05, "loss": 1.5484, "step": 1773 }, { "epoch": 0.31956766494032873, "grad_norm": 1.2463253736495972, "learning_rate": 9.388826769961e-05, "loss": 1.6142, "step": 1774 }, { "epoch": 0.3197478045485251, "grad_norm": 1.32034170627594, "learning_rate": 9.388148128329191e-05, "loss": 1.6295, "step": 1775 }, { "epoch": 0.31992794415672143, "grad_norm": 1.277449131011963, "learning_rate": 9.387469134682277e-05, "loss": 1.7325, "step": 1776 }, { "epoch": 0.32010808376491784, "grad_norm": 1.25298273563385, "learning_rate": 9.386789789074726e-05, "loss": 1.6039, "step": 1777 }, { "epoch": 0.3202882233731142, "grad_norm": 1.286400318145752, "learning_rate": 9.386110091561033e-05, "loss": 1.8384, "step": 1778 }, { "epoch": 0.32046836298131054, "grad_norm": 1.3443750143051147, "learning_rate": 9.385430042195726e-05, "loss": 1.8617, "step": 1779 }, { "epoch": 0.3206485025895069, "grad_norm": 1.327980637550354, "learning_rate": 9.384749641033359e-05, "loss": 2.0242, "step": 1780 }, { "epoch": 0.32082864219770324, "grad_norm": 1.197792887687683, "learning_rate": 9.384068888128509e-05, "loss": 1.6825, "step": 1781 }, { "epoch": 0.3210087818058996, "grad_norm": 1.3268731832504272, "learning_rate": 9.383387783535789e-05, "loss": 1.8954, "step": 1782 }, { "epoch": 0.32118892141409594, "grad_norm": 1.2550289630889893, "learning_rate": 9.382706327309836e-05, "loss": 1.8276, "step": 1783 }, { "epoch": 0.3213690610222923, "grad_norm": 1.3409785032272339, "learning_rate": 9.382024519505316e-05, "loss": 1.8141, "step": 1784 }, { "epoch": 0.32154920063048864, "grad_norm": 1.304237961769104, "learning_rate": 9.381342360176924e-05, "loss": 1.5877, "step": 1785 }, { "epoch": 0.321729340238685, "grad_norm": 1.364743709564209, "learning_rate": 9.380659849379382e-05, "loss": 1.7072, "step": 1786 }, { "epoch": 0.32190947984688134, "grad_norm": 1.255950927734375, "learning_rate": 9.37997698716744e-05, "loss": 1.7769, "step": 1787 }, { "epoch": 0.3220896194550777, "grad_norm": 1.3073413372039795, "learning_rate": 9.379293773595879e-05, "loss": 1.7811, "step": 1788 }, { "epoch": 0.32226975906327404, "grad_norm": 1.4029983282089233, "learning_rate": 9.378610208719502e-05, "loss": 1.8239, "step": 1789 }, { "epoch": 0.3224498986714704, "grad_norm": 1.5238877534866333, "learning_rate": 9.377926292593149e-05, "loss": 1.854, "step": 1790 }, { "epoch": 0.32263003827966674, "grad_norm": 1.1692688465118408, "learning_rate": 9.377242025271679e-05, "loss": 1.4625, "step": 1791 }, { "epoch": 0.3228101778878631, "grad_norm": 1.5369685888290405, "learning_rate": 9.376557406809988e-05, "loss": 2.0254, "step": 1792 }, { "epoch": 0.32299031749605944, "grad_norm": 1.4477719068527222, "learning_rate": 9.375872437262993e-05, "loss": 2.0603, "step": 1793 }, { "epoch": 0.3231704571042558, "grad_norm": 1.3892316818237305, "learning_rate": 9.375187116685642e-05, "loss": 1.8796, "step": 1794 }, { "epoch": 0.32335059671245214, "grad_norm": 1.3259357213974, "learning_rate": 9.374501445132912e-05, "loss": 1.5953, "step": 1795 }, { "epoch": 0.3235307363206485, "grad_norm": 1.4816466569900513, "learning_rate": 9.373815422659806e-05, "loss": 2.0084, "step": 1796 }, { "epoch": 0.32371087592884484, "grad_norm": 1.2615822553634644, "learning_rate": 9.373129049321359e-05, "loss": 1.4401, "step": 1797 }, { "epoch": 0.3238910155370412, "grad_norm": 1.6288646459579468, "learning_rate": 9.372442325172628e-05, "loss": 1.8927, "step": 1798 }, { "epoch": 0.32407115514523754, "grad_norm": 1.5668270587921143, "learning_rate": 9.371755250268704e-05, "loss": 1.6474, "step": 1799 }, { "epoch": 0.3242512947534339, "grad_norm": 1.380558729171753, "learning_rate": 9.371067824664703e-05, "loss": 1.4893, "step": 1800 }, { "epoch": 0.32443143436163024, "grad_norm": 1.5281416177749634, "learning_rate": 9.370380048415771e-05, "loss": 2.2178, "step": 1801 }, { "epoch": 0.3246115739698266, "grad_norm": 1.3303380012512207, "learning_rate": 9.36969192157708e-05, "loss": 2.0225, "step": 1802 }, { "epoch": 0.32479171357802294, "grad_norm": 1.4043996334075928, "learning_rate": 9.369003444203831e-05, "loss": 2.2893, "step": 1803 }, { "epoch": 0.3249718531862193, "grad_norm": 1.7341221570968628, "learning_rate": 9.368314616351253e-05, "loss": 2.1961, "step": 1804 }, { "epoch": 0.32515199279441565, "grad_norm": 1.37443208694458, "learning_rate": 9.367625438074606e-05, "loss": 1.9917, "step": 1805 }, { "epoch": 0.32533213240261205, "grad_norm": 1.9039502143859863, "learning_rate": 9.366935909429174e-05, "loss": 2.2042, "step": 1806 }, { "epoch": 0.3255122720108084, "grad_norm": 1.275850534439087, "learning_rate": 9.366246030470268e-05, "loss": 1.9785, "step": 1807 }, { "epoch": 0.32569241161900475, "grad_norm": 1.369605302810669, "learning_rate": 9.365555801253234e-05, "loss": 1.8925, "step": 1808 }, { "epoch": 0.3258725512272011, "grad_norm": 1.7239036560058594, "learning_rate": 9.364865221833439e-05, "loss": 2.5742, "step": 1809 }, { "epoch": 0.32605269083539745, "grad_norm": 2.1848268508911133, "learning_rate": 9.364174292266284e-05, "loss": 2.4709, "step": 1810 }, { "epoch": 0.3262328304435938, "grad_norm": 1.3390096426010132, "learning_rate": 9.36348301260719e-05, "loss": 2.0305, "step": 1811 }, { "epoch": 0.32641297005179015, "grad_norm": 1.4194066524505615, "learning_rate": 9.362791382911615e-05, "loss": 1.9321, "step": 1812 }, { "epoch": 0.3265931096599865, "grad_norm": 1.1762449741363525, "learning_rate": 9.362099403235041e-05, "loss": 1.7828, "step": 1813 }, { "epoch": 0.32677324926818285, "grad_norm": 1.2199952602386475, "learning_rate": 9.361407073632977e-05, "loss": 1.4752, "step": 1814 }, { "epoch": 0.3269533888763792, "grad_norm": 1.3213188648223877, "learning_rate": 9.360714394160964e-05, "loss": 1.8439, "step": 1815 }, { "epoch": 0.32713352848457555, "grad_norm": 1.196629524230957, "learning_rate": 9.360021364874563e-05, "loss": 1.4599, "step": 1816 }, { "epoch": 0.3273136680927719, "grad_norm": 1.333306074142456, "learning_rate": 9.359327985829374e-05, "loss": 1.82, "step": 1817 }, { "epoch": 0.32749380770096825, "grad_norm": 1.2444415092468262, "learning_rate": 9.358634257081016e-05, "loss": 1.8003, "step": 1818 }, { "epoch": 0.3276739473091646, "grad_norm": 1.317900538444519, "learning_rate": 9.357940178685142e-05, "loss": 1.5071, "step": 1819 }, { "epoch": 0.32785408691736095, "grad_norm": 1.3687241077423096, "learning_rate": 9.35724575069743e-05, "loss": 1.8539, "step": 1820 }, { "epoch": 0.3280342265255573, "grad_norm": 1.2784526348114014, "learning_rate": 9.356550973173585e-05, "loss": 1.7341, "step": 1821 }, { "epoch": 0.32821436613375365, "grad_norm": 1.607740044593811, "learning_rate": 9.355855846169344e-05, "loss": 1.7791, "step": 1822 }, { "epoch": 0.32839450574195, "grad_norm": 1.2505072355270386, "learning_rate": 9.355160369740467e-05, "loss": 1.5663, "step": 1823 }, { "epoch": 0.32857464535014635, "grad_norm": 1.281872272491455, "learning_rate": 9.354464543942747e-05, "loss": 1.7267, "step": 1824 }, { "epoch": 0.3287547849583427, "grad_norm": 1.2124817371368408, "learning_rate": 9.353768368832004e-05, "loss": 1.4913, "step": 1825 }, { "epoch": 0.32893492456653906, "grad_norm": 1.2875710725784302, "learning_rate": 9.353071844464081e-05, "loss": 1.7825, "step": 1826 }, { "epoch": 0.3291150641747354, "grad_norm": 1.3107181787490845, "learning_rate": 9.352374970894856e-05, "loss": 1.62, "step": 1827 }, { "epoch": 0.32929520378293176, "grad_norm": 1.2903443574905396, "learning_rate": 9.35167774818023e-05, "loss": 1.8159, "step": 1828 }, { "epoch": 0.3294753433911281, "grad_norm": 1.3100007772445679, "learning_rate": 9.350980176376137e-05, "loss": 1.5669, "step": 1829 }, { "epoch": 0.32965548299932446, "grad_norm": 1.2323390245437622, "learning_rate": 9.350282255538531e-05, "loss": 1.7937, "step": 1830 }, { "epoch": 0.3298356226075208, "grad_norm": 1.3646736145019531, "learning_rate": 9.349583985723402e-05, "loss": 2.0431, "step": 1831 }, { "epoch": 0.33001576221571716, "grad_norm": 1.346835970878601, "learning_rate": 9.348885366986765e-05, "loss": 1.8195, "step": 1832 }, { "epoch": 0.3301959018239135, "grad_norm": 1.4719043970108032, "learning_rate": 9.348186399384662e-05, "loss": 1.7148, "step": 1833 }, { "epoch": 0.3303760414321099, "grad_norm": 1.3289631605148315, "learning_rate": 9.347487082973163e-05, "loss": 1.8976, "step": 1834 }, { "epoch": 0.33055618104030626, "grad_norm": 1.4127905368804932, "learning_rate": 9.346787417808369e-05, "loss": 1.9273, "step": 1835 }, { "epoch": 0.3307363206485026, "grad_norm": 1.272700548171997, "learning_rate": 9.346087403946404e-05, "loss": 1.8676, "step": 1836 }, { "epoch": 0.33091646025669896, "grad_norm": 1.1171737909317017, "learning_rate": 9.345387041443426e-05, "loss": 1.5226, "step": 1837 }, { "epoch": 0.3310965998648953, "grad_norm": 1.3365083932876587, "learning_rate": 9.344686330355614e-05, "loss": 1.8689, "step": 1838 }, { "epoch": 0.33127673947309166, "grad_norm": 1.411765217781067, "learning_rate": 9.343985270739182e-05, "loss": 1.8712, "step": 1839 }, { "epoch": 0.331456879081288, "grad_norm": 1.3648669719696045, "learning_rate": 9.34328386265037e-05, "loss": 1.9321, "step": 1840 }, { "epoch": 0.33163701868948436, "grad_norm": 1.2787798643112183, "learning_rate": 9.342582106145439e-05, "loss": 1.5218, "step": 1841 }, { "epoch": 0.3318171582976807, "grad_norm": 1.4488646984100342, "learning_rate": 9.341880001280687e-05, "loss": 1.9806, "step": 1842 }, { "epoch": 0.33199729790587706, "grad_norm": 1.433493971824646, "learning_rate": 9.341177548112436e-05, "loss": 1.9, "step": 1843 }, { "epoch": 0.3321774375140734, "grad_norm": 1.4048950672149658, "learning_rate": 9.340474746697036e-05, "loss": 1.6446, "step": 1844 }, { "epoch": 0.33235757712226977, "grad_norm": 1.245510220527649, "learning_rate": 9.339771597090869e-05, "loss": 1.6488, "step": 1845 }, { "epoch": 0.3325377167304661, "grad_norm": 1.2737582921981812, "learning_rate": 9.339068099350337e-05, "loss": 1.4383, "step": 1846 }, { "epoch": 0.33271785633866247, "grad_norm": 1.3664889335632324, "learning_rate": 9.338364253531874e-05, "loss": 1.8731, "step": 1847 }, { "epoch": 0.3328979959468588, "grad_norm": 1.336047887802124, "learning_rate": 9.337660059691946e-05, "loss": 1.63, "step": 1848 }, { "epoch": 0.33307813555505517, "grad_norm": 1.3905073404312134, "learning_rate": 9.336955517887039e-05, "loss": 1.7776, "step": 1849 }, { "epoch": 0.3332582751632515, "grad_norm": 1.3520543575286865, "learning_rate": 9.336250628173673e-05, "loss": 1.5627, "step": 1850 }, { "epoch": 0.33343841477144787, "grad_norm": 1.3104815483093262, "learning_rate": 9.335545390608393e-05, "loss": 2.0544, "step": 1851 }, { "epoch": 0.3336185543796442, "grad_norm": 1.2332983016967773, "learning_rate": 9.334839805247775e-05, "loss": 2.0492, "step": 1852 }, { "epoch": 0.33379869398784057, "grad_norm": 1.460875153541565, "learning_rate": 9.334133872148418e-05, "loss": 2.1039, "step": 1853 }, { "epoch": 0.3339788335960369, "grad_norm": 1.4161427021026611, "learning_rate": 9.333427591366954e-05, "loss": 2.0569, "step": 1854 }, { "epoch": 0.33415897320423327, "grad_norm": 1.276246428489685, "learning_rate": 9.33272096296004e-05, "loss": 2.0297, "step": 1855 }, { "epoch": 0.3343391128124296, "grad_norm": 1.3764469623565674, "learning_rate": 9.33201398698436e-05, "loss": 2.4107, "step": 1856 }, { "epoch": 0.33451925242062597, "grad_norm": 1.3868846893310547, "learning_rate": 9.331306663496627e-05, "loss": 1.7107, "step": 1857 }, { "epoch": 0.3346993920288223, "grad_norm": 2.7034528255462646, "learning_rate": 9.330598992553585e-05, "loss": 2.3245, "step": 1858 }, { "epoch": 0.33487953163701867, "grad_norm": 1.936898112297058, "learning_rate": 9.329890974211999e-05, "loss": 2.3913, "step": 1859 }, { "epoch": 0.335059671245215, "grad_norm": 1.9338799715042114, "learning_rate": 9.329182608528671e-05, "loss": 2.6362, "step": 1860 }, { "epoch": 0.33523981085341137, "grad_norm": 1.5080702304840088, "learning_rate": 9.32847389556042e-05, "loss": 2.0042, "step": 1861 }, { "epoch": 0.3354199504616077, "grad_norm": 1.262425422668457, "learning_rate": 9.327764835364102e-05, "loss": 1.7909, "step": 1862 }, { "epoch": 0.3356000900698041, "grad_norm": 1.2286747694015503, "learning_rate": 9.327055427996598e-05, "loss": 1.5188, "step": 1863 }, { "epoch": 0.3357802296780005, "grad_norm": 1.3243101835250854, "learning_rate": 9.326345673514813e-05, "loss": 1.6541, "step": 1864 }, { "epoch": 0.3359603692861968, "grad_norm": 1.3640878200531006, "learning_rate": 9.325635571975688e-05, "loss": 1.9318, "step": 1865 }, { "epoch": 0.3361405088943932, "grad_norm": 1.3129050731658936, "learning_rate": 9.324925123436183e-05, "loss": 1.8943, "step": 1866 }, { "epoch": 0.3363206485025895, "grad_norm": 1.4156001806259155, "learning_rate": 9.324214327953292e-05, "loss": 1.8235, "step": 1867 }, { "epoch": 0.3365007881107859, "grad_norm": 1.2496826648712158, "learning_rate": 9.323503185584032e-05, "loss": 1.6156, "step": 1868 }, { "epoch": 0.3366809277189822, "grad_norm": 1.4432305097579956, "learning_rate": 9.322791696385454e-05, "loss": 1.9378, "step": 1869 }, { "epoch": 0.3368610673271786, "grad_norm": 1.3068046569824219, "learning_rate": 9.322079860414632e-05, "loss": 1.8379, "step": 1870 }, { "epoch": 0.3370412069353749, "grad_norm": 1.2956987619400024, "learning_rate": 9.321367677728667e-05, "loss": 1.862, "step": 1871 }, { "epoch": 0.3372213465435713, "grad_norm": 1.2337764501571655, "learning_rate": 9.320655148384694e-05, "loss": 1.6232, "step": 1872 }, { "epoch": 0.3374014861517676, "grad_norm": 1.186627745628357, "learning_rate": 9.319942272439867e-05, "loss": 1.6559, "step": 1873 }, { "epoch": 0.337581625759964, "grad_norm": 1.2476738691329956, "learning_rate": 9.319229049951378e-05, "loss": 1.8412, "step": 1874 }, { "epoch": 0.3377617653681603, "grad_norm": 1.2603083848953247, "learning_rate": 9.318515480976437e-05, "loss": 1.6934, "step": 1875 }, { "epoch": 0.3379419049763567, "grad_norm": 1.255574345588684, "learning_rate": 9.317801565572287e-05, "loss": 1.615, "step": 1876 }, { "epoch": 0.338122044584553, "grad_norm": 1.4196699857711792, "learning_rate": 9.3170873037962e-05, "loss": 1.872, "step": 1877 }, { "epoch": 0.3383021841927494, "grad_norm": 1.3192800283432007, "learning_rate": 9.316372695705471e-05, "loss": 1.8323, "step": 1878 }, { "epoch": 0.33848232380094573, "grad_norm": 1.3001998662948608, "learning_rate": 9.315657741357427e-05, "loss": 1.6802, "step": 1879 }, { "epoch": 0.3386624634091421, "grad_norm": 1.3281223773956299, "learning_rate": 9.314942440809421e-05, "loss": 1.702, "step": 1880 }, { "epoch": 0.33884260301733843, "grad_norm": 1.3211722373962402, "learning_rate": 9.314226794118834e-05, "loss": 1.7073, "step": 1881 }, { "epoch": 0.3390227426255348, "grad_norm": 1.2838819026947021, "learning_rate": 9.313510801343075e-05, "loss": 1.7984, "step": 1882 }, { "epoch": 0.33920288223373113, "grad_norm": 1.1914340257644653, "learning_rate": 9.312794462539581e-05, "loss": 1.7057, "step": 1883 }, { "epoch": 0.3393830218419275, "grad_norm": 1.2363718748092651, "learning_rate": 9.312077777765816e-05, "loss": 1.531, "step": 1884 }, { "epoch": 0.33956316145012383, "grad_norm": 1.5167639255523682, "learning_rate": 9.311360747079272e-05, "loss": 1.8029, "step": 1885 }, { "epoch": 0.3397433010583202, "grad_norm": 1.523821473121643, "learning_rate": 9.310643370537469e-05, "loss": 1.8245, "step": 1886 }, { "epoch": 0.33992344066651653, "grad_norm": 1.2223461866378784, "learning_rate": 9.309925648197952e-05, "loss": 1.5311, "step": 1887 }, { "epoch": 0.3401035802747129, "grad_norm": 1.393572211265564, "learning_rate": 9.309207580118302e-05, "loss": 1.9235, "step": 1888 }, { "epoch": 0.34028371988290923, "grad_norm": 1.4057350158691406, "learning_rate": 9.308489166356117e-05, "loss": 1.8884, "step": 1889 }, { "epoch": 0.3404638594911056, "grad_norm": 1.3276938199996948, "learning_rate": 9.30777040696903e-05, "loss": 1.6955, "step": 1890 }, { "epoch": 0.340643999099302, "grad_norm": 1.316297173500061, "learning_rate": 9.307051302014701e-05, "loss": 1.7297, "step": 1891 }, { "epoch": 0.34082413870749834, "grad_norm": 1.4817277193069458, "learning_rate": 9.306331851550813e-05, "loss": 1.9366, "step": 1892 }, { "epoch": 0.3410042783156947, "grad_norm": 1.260643720626831, "learning_rate": 9.30561205563508e-05, "loss": 1.7532, "step": 1893 }, { "epoch": 0.34118441792389104, "grad_norm": 1.4127812385559082, "learning_rate": 9.304891914325247e-05, "loss": 1.69, "step": 1894 }, { "epoch": 0.3413645575320874, "grad_norm": 1.4184190034866333, "learning_rate": 9.304171427679081e-05, "loss": 1.8321, "step": 1895 }, { "epoch": 0.34154469714028374, "grad_norm": 1.4236093759536743, "learning_rate": 9.30345059575438e-05, "loss": 1.825, "step": 1896 }, { "epoch": 0.3417248367484801, "grad_norm": 1.3687212467193604, "learning_rate": 9.302729418608969e-05, "loss": 1.6191, "step": 1897 }, { "epoch": 0.34190497635667644, "grad_norm": 1.453348994255066, "learning_rate": 9.302007896300698e-05, "loss": 1.6804, "step": 1898 }, { "epoch": 0.3420851159648728, "grad_norm": 1.274766445159912, "learning_rate": 9.301286028887449e-05, "loss": 1.4069, "step": 1899 }, { "epoch": 0.34226525557306914, "grad_norm": 1.2411935329437256, "learning_rate": 9.30056381642713e-05, "loss": 1.8631, "step": 1900 }, { "epoch": 0.3424453951812655, "grad_norm": 1.3289551734924316, "learning_rate": 9.299841258977678e-05, "loss": 2.3693, "step": 1901 }, { "epoch": 0.34262553478946184, "grad_norm": 1.3542392253875732, "learning_rate": 9.299118356597053e-05, "loss": 2.1098, "step": 1902 }, { "epoch": 0.3428056743976582, "grad_norm": 1.5168741941452026, "learning_rate": 9.298395109343247e-05, "loss": 2.1701, "step": 1903 }, { "epoch": 0.34298581400585454, "grad_norm": 1.4525636434555054, "learning_rate": 9.29767151727428e-05, "loss": 2.2253, "step": 1904 }, { "epoch": 0.3431659536140509, "grad_norm": 1.313673496246338, "learning_rate": 9.296947580448199e-05, "loss": 2.1184, "step": 1905 }, { "epoch": 0.34334609322224724, "grad_norm": 1.2006787061691284, "learning_rate": 9.296223298923073e-05, "loss": 1.9584, "step": 1906 }, { "epoch": 0.3435262328304436, "grad_norm": 1.3107112646102905, "learning_rate": 9.295498672757009e-05, "loss": 2.2213, "step": 1907 }, { "epoch": 0.34370637243863994, "grad_norm": 2.0840630531311035, "learning_rate": 9.294773702008131e-05, "loss": 2.2139, "step": 1908 }, { "epoch": 0.3438865120468363, "grad_norm": 1.8646247386932373, "learning_rate": 9.2940483867346e-05, "loss": 1.9821, "step": 1909 }, { "epoch": 0.34406665165503264, "grad_norm": 1.9690399169921875, "learning_rate": 9.2933227269946e-05, "loss": 2.6646, "step": 1910 }, { "epoch": 0.344246791263229, "grad_norm": 1.2502071857452393, "learning_rate": 9.29259672284634e-05, "loss": 1.6817, "step": 1911 }, { "epoch": 0.34442693087142534, "grad_norm": 1.3751085996627808, "learning_rate": 9.291870374348063e-05, "loss": 1.8343, "step": 1912 }, { "epoch": 0.3446070704796217, "grad_norm": 1.3243052959442139, "learning_rate": 9.291143681558035e-05, "loss": 1.788, "step": 1913 }, { "epoch": 0.34478721008781804, "grad_norm": 1.3268070220947266, "learning_rate": 9.29041664453455e-05, "loss": 1.4985, "step": 1914 }, { "epoch": 0.3449673496960144, "grad_norm": 1.3265726566314697, "learning_rate": 9.289689263335933e-05, "loss": 1.8567, "step": 1915 }, { "epoch": 0.34514748930421074, "grad_norm": 1.3508665561676025, "learning_rate": 9.288961538020533e-05, "loss": 1.6855, "step": 1916 }, { "epoch": 0.3453276289124071, "grad_norm": 1.24409818649292, "learning_rate": 9.288233468646726e-05, "loss": 1.6284, "step": 1917 }, { "epoch": 0.34550776852060344, "grad_norm": 1.3220150470733643, "learning_rate": 9.28750505527292e-05, "loss": 1.7913, "step": 1918 }, { "epoch": 0.3456879081287998, "grad_norm": 1.2090907096862793, "learning_rate": 9.286776297957546e-05, "loss": 1.5898, "step": 1919 }, { "epoch": 0.3458680477369962, "grad_norm": 1.1936092376708984, "learning_rate": 9.286047196759066e-05, "loss": 1.6078, "step": 1920 }, { "epoch": 0.34604818734519255, "grad_norm": 1.2637219429016113, "learning_rate": 9.285317751735967e-05, "loss": 1.7801, "step": 1921 }, { "epoch": 0.3462283269533889, "grad_norm": 1.2839415073394775, "learning_rate": 9.284587962946765e-05, "loss": 1.9403, "step": 1922 }, { "epoch": 0.34640846656158525, "grad_norm": 1.4117120504379272, "learning_rate": 9.283857830450004e-05, "loss": 2.1124, "step": 1923 }, { "epoch": 0.3465886061697816, "grad_norm": 1.3012604713439941, "learning_rate": 9.283127354304255e-05, "loss": 1.6945, "step": 1924 }, { "epoch": 0.34676874577797795, "grad_norm": 1.2102127075195312, "learning_rate": 9.282396534568116e-05, "loss": 1.7112, "step": 1925 }, { "epoch": 0.3469488853861743, "grad_norm": 1.2092266082763672, "learning_rate": 9.281665371300212e-05, "loss": 1.8522, "step": 1926 }, { "epoch": 0.34712902499437065, "grad_norm": 1.4207414388656616, "learning_rate": 9.280933864559198e-05, "loss": 1.9325, "step": 1927 }, { "epoch": 0.347309164602567, "grad_norm": 1.3066776990890503, "learning_rate": 9.280202014403754e-05, "loss": 1.8754, "step": 1928 }, { "epoch": 0.34748930421076335, "grad_norm": 1.2406049966812134, "learning_rate": 9.27946982089259e-05, "loss": 1.8758, "step": 1929 }, { "epoch": 0.3476694438189597, "grad_norm": 1.3070012331008911, "learning_rate": 9.27873728408444e-05, "loss": 1.9121, "step": 1930 }, { "epoch": 0.34784958342715605, "grad_norm": 1.199819803237915, "learning_rate": 9.278004404038069e-05, "loss": 1.8932, "step": 1931 }, { "epoch": 0.3480297230353524, "grad_norm": 1.3043848276138306, "learning_rate": 9.277271180812271e-05, "loss": 1.668, "step": 1932 }, { "epoch": 0.34820986264354875, "grad_norm": 1.2169181108474731, "learning_rate": 9.276537614465862e-05, "loss": 1.8083, "step": 1933 }, { "epoch": 0.3483900022517451, "grad_norm": 1.2651370763778687, "learning_rate": 9.275803705057686e-05, "loss": 1.565, "step": 1934 }, { "epoch": 0.34857014185994145, "grad_norm": 1.3813928365707397, "learning_rate": 9.275069452646621e-05, "loss": 1.6281, "step": 1935 }, { "epoch": 0.3487502814681378, "grad_norm": 1.2649669647216797, "learning_rate": 9.274334857291567e-05, "loss": 1.5485, "step": 1936 }, { "epoch": 0.34893042107633415, "grad_norm": 1.406266212463379, "learning_rate": 9.273599919051452e-05, "loss": 1.7461, "step": 1937 }, { "epoch": 0.3491105606845305, "grad_norm": 1.3586225509643555, "learning_rate": 9.272864637985234e-05, "loss": 2.2013, "step": 1938 }, { "epoch": 0.34929070029272685, "grad_norm": 1.3642197847366333, "learning_rate": 9.272129014151896e-05, "loss": 1.9002, "step": 1939 }, { "epoch": 0.3494708399009232, "grad_norm": 1.472520112991333, "learning_rate": 9.271393047610448e-05, "loss": 1.9047, "step": 1940 }, { "epoch": 0.34965097950911955, "grad_norm": 1.470763921737671, "learning_rate": 9.27065673841993e-05, "loss": 1.9646, "step": 1941 }, { "epoch": 0.3498311191173159, "grad_norm": 1.3430933952331543, "learning_rate": 9.26992008663941e-05, "loss": 1.6854, "step": 1942 }, { "epoch": 0.35001125872551225, "grad_norm": 1.2034293413162231, "learning_rate": 9.26918309232798e-05, "loss": 1.5191, "step": 1943 }, { "epoch": 0.3501913983337086, "grad_norm": 1.3794699907302856, "learning_rate": 9.268445755544763e-05, "loss": 1.7684, "step": 1944 }, { "epoch": 0.35037153794190495, "grad_norm": 1.440076470375061, "learning_rate": 9.267708076348904e-05, "loss": 1.6696, "step": 1945 }, { "epoch": 0.3505516775501013, "grad_norm": 1.313086748123169, "learning_rate": 9.266970054799584e-05, "loss": 1.4966, "step": 1946 }, { "epoch": 0.35073181715829765, "grad_norm": 1.2346845865249634, "learning_rate": 9.266231690956004e-05, "loss": 1.6228, "step": 1947 }, { "epoch": 0.35091195676649406, "grad_norm": 1.6065713167190552, "learning_rate": 9.265492984877396e-05, "loss": 1.9179, "step": 1948 }, { "epoch": 0.3510920963746904, "grad_norm": 1.4420204162597656, "learning_rate": 9.264753936623017e-05, "loss": 1.6355, "step": 1949 }, { "epoch": 0.35127223598288676, "grad_norm": 1.321545958518982, "learning_rate": 9.264014546252155e-05, "loss": 1.6243, "step": 1950 }, { "epoch": 0.3514523755910831, "grad_norm": 1.5253962278366089, "learning_rate": 9.263274813824125e-05, "loss": 2.0982, "step": 1951 }, { "epoch": 0.35163251519927946, "grad_norm": 2.1106340885162354, "learning_rate": 9.262534739398264e-05, "loss": 2.1145, "step": 1952 }, { "epoch": 0.3518126548074758, "grad_norm": 1.6434231996536255, "learning_rate": 9.261794323033943e-05, "loss": 2.0523, "step": 1953 }, { "epoch": 0.35199279441567216, "grad_norm": 1.1787556409835815, "learning_rate": 9.261053564790558e-05, "loss": 2.0348, "step": 1954 }, { "epoch": 0.3521729340238685, "grad_norm": 1.3213731050491333, "learning_rate": 9.26031246472753e-05, "loss": 1.8265, "step": 1955 }, { "epoch": 0.35235307363206486, "grad_norm": 1.4476991891860962, "learning_rate": 9.259571022904312e-05, "loss": 2.2012, "step": 1956 }, { "epoch": 0.3525332132402612, "grad_norm": 1.560680627822876, "learning_rate": 9.25882923938038e-05, "loss": 2.1016, "step": 1957 }, { "epoch": 0.35271335284845756, "grad_norm": 1.5593386888504028, "learning_rate": 9.258087114215243e-05, "loss": 2.3905, "step": 1958 }, { "epoch": 0.3528934924566539, "grad_norm": 2.4309866428375244, "learning_rate": 9.257344647468431e-05, "loss": 2.441, "step": 1959 }, { "epoch": 0.35307363206485026, "grad_norm": 1.4332079887390137, "learning_rate": 9.256601839199506e-05, "loss": 2.1198, "step": 1960 }, { "epoch": 0.3532537716730466, "grad_norm": 1.2350066900253296, "learning_rate": 9.255858689468054e-05, "loss": 1.687, "step": 1961 }, { "epoch": 0.35343391128124296, "grad_norm": 1.3483506441116333, "learning_rate": 9.255115198333689e-05, "loss": 1.9088, "step": 1962 }, { "epoch": 0.3536140508894393, "grad_norm": 1.2272214889526367, "learning_rate": 9.254371365856058e-05, "loss": 1.7149, "step": 1963 }, { "epoch": 0.35379419049763566, "grad_norm": 1.189282774925232, "learning_rate": 9.253627192094827e-05, "loss": 1.6502, "step": 1964 }, { "epoch": 0.353974330105832, "grad_norm": 1.2303739786148071, "learning_rate": 9.252882677109693e-05, "loss": 1.7893, "step": 1965 }, { "epoch": 0.35415446971402836, "grad_norm": 1.2383229732513428, "learning_rate": 9.252137820960384e-05, "loss": 1.7523, "step": 1966 }, { "epoch": 0.3543346093222247, "grad_norm": 1.2676115036010742, "learning_rate": 9.251392623706651e-05, "loss": 1.675, "step": 1967 }, { "epoch": 0.35451474893042106, "grad_norm": 1.1664024591445923, "learning_rate": 9.25064708540827e-05, "loss": 1.5168, "step": 1968 }, { "epoch": 0.3546948885386174, "grad_norm": 1.1627534627914429, "learning_rate": 9.249901206125053e-05, "loss": 1.4811, "step": 1969 }, { "epoch": 0.35487502814681376, "grad_norm": 1.2342137098312378, "learning_rate": 9.249154985916831e-05, "loss": 1.5882, "step": 1970 }, { "epoch": 0.3550551677550101, "grad_norm": 1.2362819910049438, "learning_rate": 9.248408424843464e-05, "loss": 1.7096, "step": 1971 }, { "epoch": 0.35523530736320647, "grad_norm": 1.2362438440322876, "learning_rate": 9.247661522964844e-05, "loss": 1.6364, "step": 1972 }, { "epoch": 0.3554154469714028, "grad_norm": 1.391287922859192, "learning_rate": 9.246914280340884e-05, "loss": 1.9177, "step": 1973 }, { "epoch": 0.35559558657959917, "grad_norm": 1.316846489906311, "learning_rate": 9.246166697031529e-05, "loss": 1.6523, "step": 1974 }, { "epoch": 0.3557757261877955, "grad_norm": 1.258087158203125, "learning_rate": 9.245418773096751e-05, "loss": 1.6798, "step": 1975 }, { "epoch": 0.3559558657959919, "grad_norm": 1.675063133239746, "learning_rate": 9.244670508596547e-05, "loss": 2.1753, "step": 1976 }, { "epoch": 0.35613600540418827, "grad_norm": 1.3191180229187012, "learning_rate": 9.243921903590942e-05, "loss": 1.6388, "step": 1977 }, { "epoch": 0.3563161450123846, "grad_norm": 1.2297879457473755, "learning_rate": 9.243172958139989e-05, "loss": 1.6644, "step": 1978 }, { "epoch": 0.35649628462058097, "grad_norm": 1.2635867595672607, "learning_rate": 9.242423672303768e-05, "loss": 1.7228, "step": 1979 }, { "epoch": 0.3566764242287773, "grad_norm": 1.3461564779281616, "learning_rate": 9.241674046142386e-05, "loss": 1.9923, "step": 1980 }, { "epoch": 0.3568565638369737, "grad_norm": 1.2456141710281372, "learning_rate": 9.240924079715977e-05, "loss": 1.7294, "step": 1981 }, { "epoch": 0.35703670344517, "grad_norm": 1.3183419704437256, "learning_rate": 9.240173773084706e-05, "loss": 1.747, "step": 1982 }, { "epoch": 0.3572168430533664, "grad_norm": 1.2270807027816772, "learning_rate": 9.239423126308759e-05, "loss": 1.6177, "step": 1983 }, { "epoch": 0.3573969826615627, "grad_norm": 1.2759424448013306, "learning_rate": 9.238672139448354e-05, "loss": 1.7872, "step": 1984 }, { "epoch": 0.3575771222697591, "grad_norm": 1.3207776546478271, "learning_rate": 9.237920812563736e-05, "loss": 1.7999, "step": 1985 }, { "epoch": 0.3577572618779554, "grad_norm": 1.4106075763702393, "learning_rate": 9.237169145715175e-05, "loss": 1.7686, "step": 1986 }, { "epoch": 0.3579374014861518, "grad_norm": 1.3376574516296387, "learning_rate": 9.236417138962966e-05, "loss": 1.6492, "step": 1987 }, { "epoch": 0.3581175410943481, "grad_norm": 1.3242768049240112, "learning_rate": 9.23566479236744e-05, "loss": 1.6214, "step": 1988 }, { "epoch": 0.3582976807025445, "grad_norm": 1.199057936668396, "learning_rate": 9.234912105988947e-05, "loss": 1.7488, "step": 1989 }, { "epoch": 0.3584778203107408, "grad_norm": 1.4324913024902344, "learning_rate": 9.234159079887867e-05, "loss": 1.5512, "step": 1990 }, { "epoch": 0.3586579599189372, "grad_norm": 1.341431975364685, "learning_rate": 9.233405714124608e-05, "loss": 1.699, "step": 1991 }, { "epoch": 0.3588380995271335, "grad_norm": 1.3021507263183594, "learning_rate": 9.232652008759605e-05, "loss": 1.8533, "step": 1992 }, { "epoch": 0.3590182391353299, "grad_norm": 1.3398795127868652, "learning_rate": 9.231897963853318e-05, "loss": 1.7793, "step": 1993 }, { "epoch": 0.3591983787435262, "grad_norm": 1.301111102104187, "learning_rate": 9.231143579466239e-05, "loss": 1.6081, "step": 1994 }, { "epoch": 0.3593785183517226, "grad_norm": 1.285243034362793, "learning_rate": 9.230388855658884e-05, "loss": 1.6836, "step": 1995 }, { "epoch": 0.3595586579599189, "grad_norm": 1.3567155599594116, "learning_rate": 9.229633792491795e-05, "loss": 1.6616, "step": 1996 }, { "epoch": 0.3597387975681153, "grad_norm": 1.3073797225952148, "learning_rate": 9.228878390025543e-05, "loss": 1.7619, "step": 1997 }, { "epoch": 0.3599189371763116, "grad_norm": 1.5632058382034302, "learning_rate": 9.228122648320725e-05, "loss": 2.1415, "step": 1998 }, { "epoch": 0.360099076784508, "grad_norm": 1.3863821029663086, "learning_rate": 9.227366567437969e-05, "loss": 1.6922, "step": 1999 }, { "epoch": 0.3602792163927043, "grad_norm": 1.2061693668365479, "learning_rate": 9.226610147437926e-05, "loss": 1.3197, "step": 2000 }, { "epoch": 0.3604593560009007, "grad_norm": 1.421900749206543, "learning_rate": 9.225853388381275e-05, "loss": 2.2009, "step": 2001 }, { "epoch": 0.360639495609097, "grad_norm": 1.434679627418518, "learning_rate": 9.225096290328725e-05, "loss": 2.0606, "step": 2002 }, { "epoch": 0.3608196352172934, "grad_norm": 1.2855474948883057, "learning_rate": 9.224338853341008e-05, "loss": 2.006, "step": 2003 }, { "epoch": 0.3609997748254897, "grad_norm": 1.268730640411377, "learning_rate": 9.223581077478885e-05, "loss": 1.8865, "step": 2004 }, { "epoch": 0.36117991443368613, "grad_norm": 1.4431568384170532, "learning_rate": 9.222822962803146e-05, "loss": 2.3008, "step": 2005 }, { "epoch": 0.3613600540418825, "grad_norm": 1.2382725477218628, "learning_rate": 9.222064509374605e-05, "loss": 1.6778, "step": 2006 }, { "epoch": 0.36154019365007883, "grad_norm": 1.622361183166504, "learning_rate": 9.221305717254105e-05, "loss": 2.029, "step": 2007 }, { "epoch": 0.3617203332582752, "grad_norm": 1.5106502771377563, "learning_rate": 9.220546586502519e-05, "loss": 2.3058, "step": 2008 }, { "epoch": 0.36190047286647153, "grad_norm": 1.5847405195236206, "learning_rate": 9.21978711718074e-05, "loss": 2.3521, "step": 2009 }, { "epoch": 0.3620806124746679, "grad_norm": 1.8725775480270386, "learning_rate": 9.219027309349692e-05, "loss": 2.6225, "step": 2010 }, { "epoch": 0.36226075208286423, "grad_norm": 1.914020299911499, "learning_rate": 9.218267163070332e-05, "loss": 2.2776, "step": 2011 }, { "epoch": 0.3624408916910606, "grad_norm": 1.284096598625183, "learning_rate": 9.217506678403633e-05, "loss": 1.7909, "step": 2012 }, { "epoch": 0.36262103129925694, "grad_norm": 1.2840335369110107, "learning_rate": 9.216745855410602e-05, "loss": 1.6491, "step": 2013 }, { "epoch": 0.3628011709074533, "grad_norm": 1.3496419191360474, "learning_rate": 9.215984694152273e-05, "loss": 1.9515, "step": 2014 }, { "epoch": 0.36298131051564964, "grad_norm": 1.2450848817825317, "learning_rate": 9.215223194689705e-05, "loss": 1.6875, "step": 2015 }, { "epoch": 0.363161450123846, "grad_norm": 1.1996676921844482, "learning_rate": 9.214461357083985e-05, "loss": 1.6659, "step": 2016 }, { "epoch": 0.36334158973204234, "grad_norm": 1.3575389385223389, "learning_rate": 9.21369918139623e-05, "loss": 1.622, "step": 2017 }, { "epoch": 0.3635217293402387, "grad_norm": 1.3158403635025024, "learning_rate": 9.212936667687578e-05, "loss": 1.7217, "step": 2018 }, { "epoch": 0.36370186894843504, "grad_norm": 1.3368136882781982, "learning_rate": 9.212173816019197e-05, "loss": 1.7732, "step": 2019 }, { "epoch": 0.3638820085566314, "grad_norm": 1.40514075756073, "learning_rate": 9.211410626452286e-05, "loss": 1.9252, "step": 2020 }, { "epoch": 0.36406214816482774, "grad_norm": 1.1953845024108887, "learning_rate": 9.210647099048064e-05, "loss": 1.6109, "step": 2021 }, { "epoch": 0.3642422877730241, "grad_norm": 1.245178461074829, "learning_rate": 9.209883233867784e-05, "loss": 1.693, "step": 2022 }, { "epoch": 0.36442242738122044, "grad_norm": 1.197798252105713, "learning_rate": 9.209119030972722e-05, "loss": 1.703, "step": 2023 }, { "epoch": 0.3646025669894168, "grad_norm": 1.398685336112976, "learning_rate": 9.20835449042418e-05, "loss": 1.9915, "step": 2024 }, { "epoch": 0.36478270659761314, "grad_norm": 1.2733819484710693, "learning_rate": 9.207589612283492e-05, "loss": 1.7829, "step": 2025 }, { "epoch": 0.3649628462058095, "grad_norm": 1.304255485534668, "learning_rate": 9.206824396612013e-05, "loss": 1.8843, "step": 2026 }, { "epoch": 0.36514298581400584, "grad_norm": 1.1876626014709473, "learning_rate": 9.206058843471133e-05, "loss": 1.7658, "step": 2027 }, { "epoch": 0.3653231254222022, "grad_norm": 1.302866816520691, "learning_rate": 9.205292952922257e-05, "loss": 1.9191, "step": 2028 }, { "epoch": 0.36550326503039854, "grad_norm": 1.3426549434661865, "learning_rate": 9.204526725026832e-05, "loss": 1.7502, "step": 2029 }, { "epoch": 0.3656834046385949, "grad_norm": 1.3348923921585083, "learning_rate": 9.20376015984632e-05, "loss": 1.835, "step": 2030 }, { "epoch": 0.36586354424679124, "grad_norm": 1.2460559606552124, "learning_rate": 9.202993257442217e-05, "loss": 1.6431, "step": 2031 }, { "epoch": 0.3660436838549876, "grad_norm": 1.2204155921936035, "learning_rate": 9.20222601787604e-05, "loss": 1.7716, "step": 2032 }, { "epoch": 0.366223823463184, "grad_norm": 1.249332070350647, "learning_rate": 9.20145844120934e-05, "loss": 1.6243, "step": 2033 }, { "epoch": 0.36640396307138035, "grad_norm": 1.2118375301361084, "learning_rate": 9.200690527503689e-05, "loss": 1.7388, "step": 2034 }, { "epoch": 0.3665841026795767, "grad_norm": 1.2627040147781372, "learning_rate": 9.199922276820691e-05, "loss": 1.5562, "step": 2035 }, { "epoch": 0.36676424228777305, "grad_norm": 1.2564849853515625, "learning_rate": 9.199153689221974e-05, "loss": 1.6902, "step": 2036 }, { "epoch": 0.3669443818959694, "grad_norm": 1.3405301570892334, "learning_rate": 9.198384764769192e-05, "loss": 1.7691, "step": 2037 }, { "epoch": 0.36712452150416575, "grad_norm": 1.261576533317566, "learning_rate": 9.19761550352403e-05, "loss": 1.595, "step": 2038 }, { "epoch": 0.3673046611123621, "grad_norm": 1.299034595489502, "learning_rate": 9.196845905548197e-05, "loss": 1.6305, "step": 2039 }, { "epoch": 0.36748480072055845, "grad_norm": 1.407024621963501, "learning_rate": 9.19607597090343e-05, "loss": 1.7265, "step": 2040 }, { "epoch": 0.3676649403287548, "grad_norm": 1.4784094095230103, "learning_rate": 9.195305699651491e-05, "loss": 1.9685, "step": 2041 }, { "epoch": 0.36784507993695115, "grad_norm": 1.4655539989471436, "learning_rate": 9.194535091854173e-05, "loss": 1.8251, "step": 2042 }, { "epoch": 0.3680252195451475, "grad_norm": 1.6981803178787231, "learning_rate": 9.193764147573294e-05, "loss": 2.1593, "step": 2043 }, { "epoch": 0.36820535915334385, "grad_norm": 1.373761534690857, "learning_rate": 9.192992866870695e-05, "loss": 1.7459, "step": 2044 }, { "epoch": 0.3683854987615402, "grad_norm": 1.385209083557129, "learning_rate": 9.192221249808254e-05, "loss": 1.5596, "step": 2045 }, { "epoch": 0.36856563836973655, "grad_norm": 1.4608234167099, "learning_rate": 9.191449296447865e-05, "loss": 1.6597, "step": 2046 }, { "epoch": 0.3687457779779329, "grad_norm": 1.4122939109802246, "learning_rate": 9.190677006851454e-05, "loss": 1.5083, "step": 2047 }, { "epoch": 0.36892591758612925, "grad_norm": 1.3078893423080444, "learning_rate": 9.189904381080977e-05, "loss": 1.5587, "step": 2048 }, { "epoch": 0.3691060571943256, "grad_norm": 1.304000973701477, "learning_rate": 9.18913141919841e-05, "loss": 1.3948, "step": 2049 }, { "epoch": 0.36928619680252195, "grad_norm": 1.4559122323989868, "learning_rate": 9.188358121265759e-05, "loss": 1.8596, "step": 2050 }, { "epoch": 0.3694663364107183, "grad_norm": 1.5265893936157227, "learning_rate": 9.187584487345062e-05, "loss": 2.3415, "step": 2051 }, { "epoch": 0.36964647601891465, "grad_norm": 1.9684908390045166, "learning_rate": 9.186810517498376e-05, "loss": 2.2039, "step": 2052 }, { "epoch": 0.369826615627111, "grad_norm": 2.2677884101867676, "learning_rate": 9.186036211787792e-05, "loss": 2.1073, "step": 2053 }, { "epoch": 0.37000675523530735, "grad_norm": 1.3882420063018799, "learning_rate": 9.18526157027542e-05, "loss": 2.0612, "step": 2054 }, { "epoch": 0.3701868948435037, "grad_norm": 1.5191304683685303, "learning_rate": 9.184486593023402e-05, "loss": 2.1988, "step": 2055 }, { "epoch": 0.37036703445170005, "grad_norm": 1.4260764122009277, "learning_rate": 9.18371128009391e-05, "loss": 2.2295, "step": 2056 }, { "epoch": 0.3705471740598964, "grad_norm": 1.4145503044128418, "learning_rate": 9.182935631549137e-05, "loss": 2.2642, "step": 2057 }, { "epoch": 0.37072731366809275, "grad_norm": 1.5801762342453003, "learning_rate": 9.182159647451305e-05, "loss": 2.1425, "step": 2058 }, { "epoch": 0.3709074532762891, "grad_norm": 1.551311731338501, "learning_rate": 9.181383327862662e-05, "loss": 2.1386, "step": 2059 }, { "epoch": 0.37108759288448545, "grad_norm": 1.894332766532898, "learning_rate": 9.180606672845486e-05, "loss": 2.6294, "step": 2060 }, { "epoch": 0.3712677324926818, "grad_norm": 1.365515112876892, "learning_rate": 9.179829682462078e-05, "loss": 2.0323, "step": 2061 }, { "epoch": 0.3714478721008782, "grad_norm": 1.3314865827560425, "learning_rate": 9.179052356774768e-05, "loss": 2.0089, "step": 2062 }, { "epoch": 0.37162801170907456, "grad_norm": 1.2352780103683472, "learning_rate": 9.178274695845917e-05, "loss": 1.7733, "step": 2063 }, { "epoch": 0.3718081513172709, "grad_norm": 1.2196687459945679, "learning_rate": 9.177496699737901e-05, "loss": 1.7665, "step": 2064 }, { "epoch": 0.37198829092546726, "grad_norm": 1.2358906269073486, "learning_rate": 9.176718368513137e-05, "loss": 1.7103, "step": 2065 }, { "epoch": 0.3721684305336636, "grad_norm": 1.3376888036727905, "learning_rate": 9.17593970223406e-05, "loss": 1.9167, "step": 2066 }, { "epoch": 0.37234857014185996, "grad_norm": 1.2779468297958374, "learning_rate": 9.175160700963134e-05, "loss": 1.8454, "step": 2067 }, { "epoch": 0.3725287097500563, "grad_norm": 1.2241846323013306, "learning_rate": 9.17438136476285e-05, "loss": 1.649, "step": 2068 }, { "epoch": 0.37270884935825266, "grad_norm": 1.262999176979065, "learning_rate": 9.173601693695725e-05, "loss": 1.6635, "step": 2069 }, { "epoch": 0.372888988966449, "grad_norm": 1.2626407146453857, "learning_rate": 9.172821687824307e-05, "loss": 1.7404, "step": 2070 }, { "epoch": 0.37306912857464536, "grad_norm": 1.2384082078933716, "learning_rate": 9.172041347211165e-05, "loss": 1.7461, "step": 2071 }, { "epoch": 0.3732492681828417, "grad_norm": 1.2913539409637451, "learning_rate": 9.171260671918897e-05, "loss": 1.5681, "step": 2072 }, { "epoch": 0.37342940779103806, "grad_norm": 1.1793841123580933, "learning_rate": 9.170479662010129e-05, "loss": 1.6358, "step": 2073 }, { "epoch": 0.3736095473992344, "grad_norm": 1.239508032798767, "learning_rate": 9.169698317547517e-05, "loss": 1.8285, "step": 2074 }, { "epoch": 0.37378968700743076, "grad_norm": 1.3518551588058472, "learning_rate": 9.168916638593736e-05, "loss": 1.8222, "step": 2075 }, { "epoch": 0.3739698266156271, "grad_norm": 1.2116771936416626, "learning_rate": 9.168134625211492e-05, "loss": 1.4637, "step": 2076 }, { "epoch": 0.37414996622382346, "grad_norm": 1.2727361917495728, "learning_rate": 9.167352277463519e-05, "loss": 1.6008, "step": 2077 }, { "epoch": 0.3743301058320198, "grad_norm": 1.3380126953125, "learning_rate": 9.166569595412575e-05, "loss": 1.8369, "step": 2078 }, { "epoch": 0.37451024544021616, "grad_norm": 1.2954001426696777, "learning_rate": 9.16578657912145e-05, "loss": 1.6829, "step": 2079 }, { "epoch": 0.3746903850484125, "grad_norm": 1.3621466159820557, "learning_rate": 9.165003228652952e-05, "loss": 1.9403, "step": 2080 }, { "epoch": 0.37487052465660886, "grad_norm": 1.5151996612548828, "learning_rate": 9.164219544069926e-05, "loss": 1.935, "step": 2081 }, { "epoch": 0.3750506642648052, "grad_norm": 1.4557955265045166, "learning_rate": 9.163435525435234e-05, "loss": 1.8357, "step": 2082 }, { "epoch": 0.37523080387300156, "grad_norm": 1.3402683734893799, "learning_rate": 9.162651172811773e-05, "loss": 1.7679, "step": 2083 }, { "epoch": 0.3754109434811979, "grad_norm": 1.3840535879135132, "learning_rate": 9.161866486262463e-05, "loss": 1.6648, "step": 2084 }, { "epoch": 0.37559108308939426, "grad_norm": 1.25978422164917, "learning_rate": 9.161081465850252e-05, "loss": 1.8722, "step": 2085 }, { "epoch": 0.3757712226975906, "grad_norm": 1.357959270477295, "learning_rate": 9.160296111638111e-05, "loss": 1.871, "step": 2086 }, { "epoch": 0.37595136230578696, "grad_norm": 1.4812606573104858, "learning_rate": 9.159510423689042e-05, "loss": 1.9384, "step": 2087 }, { "epoch": 0.3761315019139833, "grad_norm": 1.4350230693817139, "learning_rate": 9.158724402066074e-05, "loss": 1.9554, "step": 2088 }, { "epoch": 0.37631164152217966, "grad_norm": 1.4070546627044678, "learning_rate": 9.157938046832259e-05, "loss": 1.685, "step": 2089 }, { "epoch": 0.37649178113037607, "grad_norm": 1.4612318277359009, "learning_rate": 9.15715135805068e-05, "loss": 1.7132, "step": 2090 }, { "epoch": 0.3766719207385724, "grad_norm": 1.3602055311203003, "learning_rate": 9.156364335784443e-05, "loss": 1.6161, "step": 2091 }, { "epoch": 0.37685206034676877, "grad_norm": 1.3037409782409668, "learning_rate": 9.155576980096685e-05, "loss": 1.782, "step": 2092 }, { "epoch": 0.3770321999549651, "grad_norm": 1.3262662887573242, "learning_rate": 9.154789291050564e-05, "loss": 1.8636, "step": 2093 }, { "epoch": 0.37721233956316147, "grad_norm": 1.504162311553955, "learning_rate": 9.154001268709272e-05, "loss": 1.8956, "step": 2094 }, { "epoch": 0.3773924791713578, "grad_norm": 1.4728641510009766, "learning_rate": 9.153212913136018e-05, "loss": 1.9784, "step": 2095 }, { "epoch": 0.37757261877955417, "grad_norm": 1.2264498472213745, "learning_rate": 9.15242422439405e-05, "loss": 1.3746, "step": 2096 }, { "epoch": 0.3777527583877505, "grad_norm": 1.3264819383621216, "learning_rate": 9.151635202546634e-05, "loss": 1.7244, "step": 2097 }, { "epoch": 0.37793289799594687, "grad_norm": 1.2909444570541382, "learning_rate": 9.150845847657061e-05, "loss": 1.4397, "step": 2098 }, { "epoch": 0.3781130376041432, "grad_norm": 1.2654004096984863, "learning_rate": 9.150056159788658e-05, "loss": 1.5312, "step": 2099 }, { "epoch": 0.37829317721233957, "grad_norm": 1.4643536806106567, "learning_rate": 9.149266139004771e-05, "loss": 1.4035, "step": 2100 }, { "epoch": 0.3784733168205359, "grad_norm": 1.5059345960617065, "learning_rate": 9.148475785368776e-05, "loss": 2.4415, "step": 2101 }, { "epoch": 0.37865345642873227, "grad_norm": 1.2910499572753906, "learning_rate": 9.147685098944072e-05, "loss": 1.8699, "step": 2102 }, { "epoch": 0.3788335960369286, "grad_norm": 1.306274175643921, "learning_rate": 9.146894079794092e-05, "loss": 2.0729, "step": 2103 }, { "epoch": 0.37901373564512497, "grad_norm": 1.284865379333496, "learning_rate": 9.146102727982287e-05, "loss": 2.1571, "step": 2104 }, { "epoch": 0.3791938752533213, "grad_norm": 1.2388386726379395, "learning_rate": 9.145311043572142e-05, "loss": 1.8669, "step": 2105 }, { "epoch": 0.37937401486151767, "grad_norm": 1.7818042039871216, "learning_rate": 9.144519026627164e-05, "loss": 2.1748, "step": 2106 }, { "epoch": 0.379554154469714, "grad_norm": 1.6017647981643677, "learning_rate": 9.143726677210888e-05, "loss": 2.1431, "step": 2107 }, { "epoch": 0.3797342940779104, "grad_norm": 1.6454085111618042, "learning_rate": 9.142933995386876e-05, "loss": 2.3816, "step": 2108 }, { "epoch": 0.3799144336861067, "grad_norm": 1.982255458831787, "learning_rate": 9.142140981218717e-05, "loss": 2.4602, "step": 2109 }, { "epoch": 0.3800945732943031, "grad_norm": 1.2942668199539185, "learning_rate": 9.141347634770027e-05, "loss": 1.6573, "step": 2110 }, { "epoch": 0.3802747129024994, "grad_norm": 1.2739670276641846, "learning_rate": 9.140553956104447e-05, "loss": 1.7545, "step": 2111 }, { "epoch": 0.3804548525106958, "grad_norm": 1.1825495958328247, "learning_rate": 9.139759945285645e-05, "loss": 1.6799, "step": 2112 }, { "epoch": 0.3806349921188921, "grad_norm": 1.2051327228546143, "learning_rate": 9.138965602377315e-05, "loss": 1.4395, "step": 2113 }, { "epoch": 0.3808151317270885, "grad_norm": 1.3557251691818237, "learning_rate": 9.138170927443181e-05, "loss": 1.7576, "step": 2114 }, { "epoch": 0.3809952713352848, "grad_norm": 1.453099250793457, "learning_rate": 9.137375920546992e-05, "loss": 2.0789, "step": 2115 }, { "epoch": 0.3811754109434812, "grad_norm": 1.2727937698364258, "learning_rate": 9.136580581752522e-05, "loss": 1.9015, "step": 2116 }, { "epoch": 0.3813555505516775, "grad_norm": 1.2283263206481934, "learning_rate": 9.135784911123571e-05, "loss": 1.6213, "step": 2117 }, { "epoch": 0.3815356901598739, "grad_norm": 1.3137099742889404, "learning_rate": 9.13498890872397e-05, "loss": 1.7475, "step": 2118 }, { "epoch": 0.3817158297680703, "grad_norm": 1.197319746017456, "learning_rate": 9.134192574617573e-05, "loss": 1.4516, "step": 2119 }, { "epoch": 0.38189596937626663, "grad_norm": 1.2807143926620483, "learning_rate": 9.13339590886826e-05, "loss": 1.5196, "step": 2120 }, { "epoch": 0.382076108984463, "grad_norm": 1.1984219551086426, "learning_rate": 9.132598911539942e-05, "loss": 1.6206, "step": 2121 }, { "epoch": 0.38225624859265933, "grad_norm": 1.1745525598526, "learning_rate": 9.131801582696553e-05, "loss": 1.4356, "step": 2122 }, { "epoch": 0.3824363882008557, "grad_norm": 1.289275050163269, "learning_rate": 9.131003922402052e-05, "loss": 1.8689, "step": 2123 }, { "epoch": 0.38261652780905203, "grad_norm": 1.4010155200958252, "learning_rate": 9.130205930720429e-05, "loss": 1.8454, "step": 2124 }, { "epoch": 0.3827966674172484, "grad_norm": 1.3265488147735596, "learning_rate": 9.129407607715698e-05, "loss": 1.656, "step": 2125 }, { "epoch": 0.38297680702544473, "grad_norm": 1.2692373991012573, "learning_rate": 9.128608953451899e-05, "loss": 1.618, "step": 2126 }, { "epoch": 0.3831569466336411, "grad_norm": 1.3315633535385132, "learning_rate": 9.127809967993102e-05, "loss": 1.6082, "step": 2127 }, { "epoch": 0.38333708624183743, "grad_norm": 1.2551548480987549, "learning_rate": 9.1270106514034e-05, "loss": 1.5285, "step": 2128 }, { "epoch": 0.3835172258500338, "grad_norm": 1.3189990520477295, "learning_rate": 9.126211003746913e-05, "loss": 1.6632, "step": 2129 }, { "epoch": 0.38369736545823013, "grad_norm": 1.2506258487701416, "learning_rate": 9.125411025087789e-05, "loss": 1.4936, "step": 2130 }, { "epoch": 0.3838775050664265, "grad_norm": 1.243418574333191, "learning_rate": 9.124610715490203e-05, "loss": 1.8767, "step": 2131 }, { "epoch": 0.38405764467462283, "grad_norm": 1.2705342769622803, "learning_rate": 9.123810075018354e-05, "loss": 1.6632, "step": 2132 }, { "epoch": 0.3842377842828192, "grad_norm": 1.2149455547332764, "learning_rate": 9.123009103736468e-05, "loss": 1.7818, "step": 2133 }, { "epoch": 0.38441792389101553, "grad_norm": 1.465367317199707, "learning_rate": 9.122207801708802e-05, "loss": 1.6318, "step": 2134 }, { "epoch": 0.3845980634992119, "grad_norm": 1.3344510793685913, "learning_rate": 9.121406168999633e-05, "loss": 1.7328, "step": 2135 }, { "epoch": 0.38477820310740823, "grad_norm": 1.2665818929672241, "learning_rate": 9.120604205673268e-05, "loss": 1.6465, "step": 2136 }, { "epoch": 0.3849583427156046, "grad_norm": 1.3166686296463013, "learning_rate": 9.119801911794043e-05, "loss": 1.4931, "step": 2137 }, { "epoch": 0.38513848232380093, "grad_norm": 1.3134535551071167, "learning_rate": 9.118999287426314e-05, "loss": 1.7463, "step": 2138 }, { "epoch": 0.3853186219319973, "grad_norm": 1.2184779644012451, "learning_rate": 9.118196332634467e-05, "loss": 1.4493, "step": 2139 }, { "epoch": 0.38549876154019364, "grad_norm": 1.2748123407363892, "learning_rate": 9.117393047482919e-05, "loss": 1.7025, "step": 2140 }, { "epoch": 0.38567890114839, "grad_norm": 1.312063217163086, "learning_rate": 9.116589432036104e-05, "loss": 1.6307, "step": 2141 }, { "epoch": 0.38585904075658634, "grad_norm": 1.442418098449707, "learning_rate": 9.11578548635849e-05, "loss": 1.8966, "step": 2142 }, { "epoch": 0.3860391803647827, "grad_norm": 1.4317374229431152, "learning_rate": 9.11498121051457e-05, "loss": 1.7603, "step": 2143 }, { "epoch": 0.38621931997297904, "grad_norm": 1.4041088819503784, "learning_rate": 9.114176604568861e-05, "loss": 1.8934, "step": 2144 }, { "epoch": 0.3863994595811754, "grad_norm": 1.3317939043045044, "learning_rate": 9.113371668585908e-05, "loss": 1.6703, "step": 2145 }, { "epoch": 0.38657959918937174, "grad_norm": 1.3929203748703003, "learning_rate": 9.112566402630285e-05, "loss": 1.7624, "step": 2146 }, { "epoch": 0.38675973879756814, "grad_norm": 1.4137219190597534, "learning_rate": 9.111760806766587e-05, "loss": 1.7475, "step": 2147 }, { "epoch": 0.3869398784057645, "grad_norm": 1.4739933013916016, "learning_rate": 9.110954881059439e-05, "loss": 1.7192, "step": 2148 }, { "epoch": 0.38712001801396084, "grad_norm": 1.3204176425933838, "learning_rate": 9.110148625573496e-05, "loss": 1.6008, "step": 2149 }, { "epoch": 0.3873001576221572, "grad_norm": 1.271134614944458, "learning_rate": 9.10934204037343e-05, "loss": 1.5157, "step": 2150 }, { "epoch": 0.38748029723035354, "grad_norm": 1.3271753787994385, "learning_rate": 9.108535125523947e-05, "loss": 2.2178, "step": 2151 }, { "epoch": 0.3876604368385499, "grad_norm": 3.78749680519104, "learning_rate": 9.107727881089779e-05, "loss": 2.047, "step": 2152 }, { "epoch": 0.38784057644674624, "grad_norm": 1.2255138158798218, "learning_rate": 9.106920307135679e-05, "loss": 2.1348, "step": 2153 }, { "epoch": 0.3880207160549426, "grad_norm": 1.3508459329605103, "learning_rate": 9.106112403726435e-05, "loss": 2.129, "step": 2154 }, { "epoch": 0.38820085566313894, "grad_norm": 1.3681482076644897, "learning_rate": 9.105304170926851e-05, "loss": 2.1737, "step": 2155 }, { "epoch": 0.3883809952713353, "grad_norm": 1.2568097114562988, "learning_rate": 9.104495608801768e-05, "loss": 2.089, "step": 2156 }, { "epoch": 0.38856113487953164, "grad_norm": 1.3417366743087769, "learning_rate": 9.103686717416046e-05, "loss": 2.0041, "step": 2157 }, { "epoch": 0.388741274487728, "grad_norm": 1.3027949333190918, "learning_rate": 9.102877496834575e-05, "loss": 1.9694, "step": 2158 }, { "epoch": 0.38892141409592434, "grad_norm": 1.4856226444244385, "learning_rate": 9.102067947122269e-05, "loss": 2.1128, "step": 2159 }, { "epoch": 0.3891015537041207, "grad_norm": 1.7984919548034668, "learning_rate": 9.101258068344069e-05, "loss": 2.2167, "step": 2160 }, { "epoch": 0.38928169331231705, "grad_norm": 3.168741464614868, "learning_rate": 9.100447860564945e-05, "loss": 2.1743, "step": 2161 }, { "epoch": 0.3894618329205134, "grad_norm": 1.22146737575531, "learning_rate": 9.099637323849893e-05, "loss": 1.6083, "step": 2162 }, { "epoch": 0.38964197252870975, "grad_norm": 1.1351044178009033, "learning_rate": 9.098826458263929e-05, "loss": 1.5818, "step": 2163 }, { "epoch": 0.3898221121369061, "grad_norm": 1.2672655582427979, "learning_rate": 9.098015263872104e-05, "loss": 1.7528, "step": 2164 }, { "epoch": 0.39000225174510245, "grad_norm": 1.3483572006225586, "learning_rate": 9.097203740739491e-05, "loss": 1.7113, "step": 2165 }, { "epoch": 0.3901823913532988, "grad_norm": 1.204759955406189, "learning_rate": 9.09639188893119e-05, "loss": 1.5575, "step": 2166 }, { "epoch": 0.39036253096149515, "grad_norm": 1.3749037981033325, "learning_rate": 9.095579708512327e-05, "loss": 1.6415, "step": 2167 }, { "epoch": 0.3905426705696915, "grad_norm": 1.3593950271606445, "learning_rate": 9.094767199548054e-05, "loss": 1.7345, "step": 2168 }, { "epoch": 0.39072281017788785, "grad_norm": 1.252854824066162, "learning_rate": 9.093954362103551e-05, "loss": 1.6888, "step": 2169 }, { "epoch": 0.3909029497860842, "grad_norm": 1.2474712133407593, "learning_rate": 9.093141196244022e-05, "loss": 1.4746, "step": 2170 }, { "epoch": 0.39108308939428055, "grad_norm": 1.3356012105941772, "learning_rate": 9.092327702034702e-05, "loss": 1.7455, "step": 2171 }, { "epoch": 0.3912632290024769, "grad_norm": 1.4254230260849, "learning_rate": 9.091513879540845e-05, "loss": 1.6512, "step": 2172 }, { "epoch": 0.39144336861067325, "grad_norm": 1.3403728008270264, "learning_rate": 9.090699728827738e-05, "loss": 1.7819, "step": 2173 }, { "epoch": 0.3916235082188696, "grad_norm": 1.3594458103179932, "learning_rate": 9.089885249960693e-05, "loss": 1.8249, "step": 2174 }, { "epoch": 0.39180364782706595, "grad_norm": 1.326656460762024, "learning_rate": 9.089070443005044e-05, "loss": 1.9626, "step": 2175 }, { "epoch": 0.39198378743526235, "grad_norm": 1.3090012073516846, "learning_rate": 9.088255308026156e-05, "loss": 1.7413, "step": 2176 }, { "epoch": 0.3921639270434587, "grad_norm": 1.1989049911499023, "learning_rate": 9.087439845089418e-05, "loss": 1.5912, "step": 2177 }, { "epoch": 0.39234406665165505, "grad_norm": 1.2044060230255127, "learning_rate": 9.086624054260247e-05, "loss": 1.6615, "step": 2178 }, { "epoch": 0.3925242062598514, "grad_norm": 1.2591339349746704, "learning_rate": 9.085807935604085e-05, "loss": 1.5324, "step": 2179 }, { "epoch": 0.39270434586804776, "grad_norm": 1.2562506198883057, "learning_rate": 9.0849914891864e-05, "loss": 1.6469, "step": 2180 }, { "epoch": 0.3928844854762441, "grad_norm": 1.2304260730743408, "learning_rate": 9.084174715072689e-05, "loss": 1.7989, "step": 2181 }, { "epoch": 0.39306462508444046, "grad_norm": 1.3507219552993774, "learning_rate": 9.08335761332847e-05, "loss": 1.7477, "step": 2182 }, { "epoch": 0.3932447646926368, "grad_norm": 1.3541796207427979, "learning_rate": 9.082540184019293e-05, "loss": 1.6694, "step": 2183 }, { "epoch": 0.39342490430083316, "grad_norm": 1.3877840042114258, "learning_rate": 9.081722427210731e-05, "loss": 1.787, "step": 2184 }, { "epoch": 0.3936050439090295, "grad_norm": 1.205797791481018, "learning_rate": 9.080904342968384e-05, "loss": 1.6819, "step": 2185 }, { "epoch": 0.39378518351722586, "grad_norm": 1.3570153713226318, "learning_rate": 9.080085931357877e-05, "loss": 1.7425, "step": 2186 }, { "epoch": 0.3939653231254222, "grad_norm": 1.3105854988098145, "learning_rate": 9.079267192444865e-05, "loss": 1.8101, "step": 2187 }, { "epoch": 0.39414546273361856, "grad_norm": 1.3009496927261353, "learning_rate": 9.078448126295026e-05, "loss": 1.5877, "step": 2188 }, { "epoch": 0.3943256023418149, "grad_norm": 1.3271325826644897, "learning_rate": 9.077628732974065e-05, "loss": 1.9116, "step": 2189 }, { "epoch": 0.39450574195001126, "grad_norm": 1.250184178352356, "learning_rate": 9.076809012547712e-05, "loss": 1.6684, "step": 2190 }, { "epoch": 0.3946858815582076, "grad_norm": 1.4337053298950195, "learning_rate": 9.075988965081728e-05, "loss": 2.0014, "step": 2191 }, { "epoch": 0.39486602116640396, "grad_norm": 1.456337332725525, "learning_rate": 9.075168590641891e-05, "loss": 1.8061, "step": 2192 }, { "epoch": 0.3950461607746003, "grad_norm": 1.2794229984283447, "learning_rate": 9.074347889294016e-05, "loss": 1.6618, "step": 2193 }, { "epoch": 0.39522630038279666, "grad_norm": 1.2905025482177734, "learning_rate": 9.073526861103941e-05, "loss": 1.6935, "step": 2194 }, { "epoch": 0.395406439990993, "grad_norm": 1.2809932231903076, "learning_rate": 9.072705506137522e-05, "loss": 1.6264, "step": 2195 }, { "epoch": 0.39558657959918936, "grad_norm": 1.3347852230072021, "learning_rate": 9.071883824460652e-05, "loss": 1.6993, "step": 2196 }, { "epoch": 0.3957667192073857, "grad_norm": 1.4605298042297363, "learning_rate": 9.071061816139245e-05, "loss": 1.7853, "step": 2197 }, { "epoch": 0.39594685881558206, "grad_norm": 1.5141929388046265, "learning_rate": 9.070239481239243e-05, "loss": 1.9662, "step": 2198 }, { "epoch": 0.3961269984237784, "grad_norm": 1.4083324670791626, "learning_rate": 9.06941681982661e-05, "loss": 1.5646, "step": 2199 }, { "epoch": 0.39630713803197476, "grad_norm": 1.2718425989151, "learning_rate": 9.068593831967344e-05, "loss": 1.4153, "step": 2200 }, { "epoch": 0.3964872776401711, "grad_norm": 1.4546648263931274, "learning_rate": 9.06777051772746e-05, "loss": 2.1496, "step": 2201 }, { "epoch": 0.39666741724836746, "grad_norm": 1.33214271068573, "learning_rate": 9.066946877173009e-05, "loss": 2.3049, "step": 2202 }, { "epoch": 0.3968475568565638, "grad_norm": 1.179665207862854, "learning_rate": 9.06612291037006e-05, "loss": 1.8641, "step": 2203 }, { "epoch": 0.3970276964647602, "grad_norm": 1.496773362159729, "learning_rate": 9.065298617384712e-05, "loss": 2.2468, "step": 2204 }, { "epoch": 0.39720783607295657, "grad_norm": 1.2146224975585938, "learning_rate": 9.064473998283088e-05, "loss": 1.9496, "step": 2205 }, { "epoch": 0.3973879756811529, "grad_norm": 1.3933171033859253, "learning_rate": 9.06364905313134e-05, "loss": 1.9137, "step": 2206 }, { "epoch": 0.39756811528934927, "grad_norm": 1.5050772428512573, "learning_rate": 9.062823781995644e-05, "loss": 2.2607, "step": 2207 }, { "epoch": 0.3977482548975456, "grad_norm": 2.351269006729126, "learning_rate": 9.061998184942203e-05, "loss": 2.3468, "step": 2208 }, { "epoch": 0.39792839450574197, "grad_norm": 1.691042423248291, "learning_rate": 9.061172262037247e-05, "loss": 2.0413, "step": 2209 }, { "epoch": 0.3981085341139383, "grad_norm": 1.6214369535446167, "learning_rate": 9.060346013347029e-05, "loss": 1.9874, "step": 2210 }, { "epoch": 0.39828867372213467, "grad_norm": 1.6569087505340576, "learning_rate": 9.059519438937833e-05, "loss": 2.0923, "step": 2211 }, { "epoch": 0.398468813330331, "grad_norm": 1.254485011100769, "learning_rate": 9.058692538875964e-05, "loss": 1.564, "step": 2212 }, { "epoch": 0.39864895293852737, "grad_norm": 1.2168151140213013, "learning_rate": 9.057865313227756e-05, "loss": 1.6159, "step": 2213 }, { "epoch": 0.3988290925467237, "grad_norm": 1.2943649291992188, "learning_rate": 9.05703776205957e-05, "loss": 1.8458, "step": 2214 }, { "epoch": 0.39900923215492007, "grad_norm": 1.3149343729019165, "learning_rate": 9.05620988543779e-05, "loss": 1.6536, "step": 2215 }, { "epoch": 0.3991893717631164, "grad_norm": 1.247033953666687, "learning_rate": 9.055381683428829e-05, "loss": 1.6111, "step": 2216 }, { "epoch": 0.39936951137131277, "grad_norm": 1.3952230215072632, "learning_rate": 9.054553156099126e-05, "loss": 1.7637, "step": 2217 }, { "epoch": 0.3995496509795091, "grad_norm": 1.394584059715271, "learning_rate": 9.053724303515142e-05, "loss": 1.9503, "step": 2218 }, { "epoch": 0.39972979058770547, "grad_norm": 1.1841211318969727, "learning_rate": 9.05289512574337e-05, "loss": 1.6442, "step": 2219 }, { "epoch": 0.3999099301959018, "grad_norm": 1.3564656972885132, "learning_rate": 9.052065622850324e-05, "loss": 1.6907, "step": 2220 }, { "epoch": 0.40009006980409817, "grad_norm": 1.2043156623840332, "learning_rate": 9.051235794902549e-05, "loss": 1.4155, "step": 2221 }, { "epoch": 0.4002702094122945, "grad_norm": 1.3666579723358154, "learning_rate": 9.050405641966611e-05, "loss": 1.757, "step": 2222 }, { "epoch": 0.40045034902049087, "grad_norm": 1.2404276132583618, "learning_rate": 9.049575164109104e-05, "loss": 1.7984, "step": 2223 }, { "epoch": 0.4006304886286872, "grad_norm": 1.263825535774231, "learning_rate": 9.04874436139665e-05, "loss": 1.8548, "step": 2224 }, { "epoch": 0.40081062823688357, "grad_norm": 1.2468438148498535, "learning_rate": 9.047913233895898e-05, "loss": 1.7792, "step": 2225 }, { "epoch": 0.4009907678450799, "grad_norm": 1.3407175540924072, "learning_rate": 9.047081781673517e-05, "loss": 1.9603, "step": 2226 }, { "epoch": 0.40117090745327627, "grad_norm": 1.2416698932647705, "learning_rate": 9.046250004796205e-05, "loss": 1.5941, "step": 2227 }, { "epoch": 0.4013510470614726, "grad_norm": 1.1872090101242065, "learning_rate": 9.04541790333069e-05, "loss": 1.4569, "step": 2228 }, { "epoch": 0.40153118666966897, "grad_norm": 1.309704303741455, "learning_rate": 9.044585477343722e-05, "loss": 1.8411, "step": 2229 }, { "epoch": 0.4017113262778653, "grad_norm": 1.2670727968215942, "learning_rate": 9.043752726902075e-05, "loss": 1.6368, "step": 2230 }, { "epoch": 0.40189146588606167, "grad_norm": 1.3420205116271973, "learning_rate": 9.042919652072554e-05, "loss": 1.8833, "step": 2231 }, { "epoch": 0.4020716054942581, "grad_norm": 1.4256051778793335, "learning_rate": 9.042086252921989e-05, "loss": 1.791, "step": 2232 }, { "epoch": 0.4022517451024544, "grad_norm": 1.324985384941101, "learning_rate": 9.041252529517232e-05, "loss": 1.8372, "step": 2233 }, { "epoch": 0.4024318847106508, "grad_norm": 1.19207763671875, "learning_rate": 9.040418481925168e-05, "loss": 1.464, "step": 2234 }, { "epoch": 0.40261202431884713, "grad_norm": 1.420929193496704, "learning_rate": 9.0395841102127e-05, "loss": 1.8441, "step": 2235 }, { "epoch": 0.4027921639270435, "grad_norm": 1.3216580152511597, "learning_rate": 9.038749414446763e-05, "loss": 1.7486, "step": 2236 }, { "epoch": 0.40297230353523983, "grad_norm": 1.4530539512634277, "learning_rate": 9.037914394694313e-05, "loss": 1.7013, "step": 2237 }, { "epoch": 0.4031524431434362, "grad_norm": 1.3988749980926514, "learning_rate": 9.037079051022339e-05, "loss": 1.6117, "step": 2238 }, { "epoch": 0.40333258275163253, "grad_norm": 1.1622987985610962, "learning_rate": 9.03624338349785e-05, "loss": 1.5733, "step": 2239 }, { "epoch": 0.4035127223598289, "grad_norm": 1.2403991222381592, "learning_rate": 9.035407392187883e-05, "loss": 1.7356, "step": 2240 }, { "epoch": 0.40369286196802523, "grad_norm": 1.3406028747558594, "learning_rate": 9.0345710771595e-05, "loss": 1.6973, "step": 2241 }, { "epoch": 0.4038730015762216, "grad_norm": 1.459892988204956, "learning_rate": 9.033734438479789e-05, "loss": 1.7797, "step": 2242 }, { "epoch": 0.40405314118441793, "grad_norm": 1.3988192081451416, "learning_rate": 9.032897476215866e-05, "loss": 1.9643, "step": 2243 }, { "epoch": 0.4042332807926143, "grad_norm": 1.5672204494476318, "learning_rate": 9.032060190434874e-05, "loss": 1.7598, "step": 2244 }, { "epoch": 0.40441342040081063, "grad_norm": 1.5195800065994263, "learning_rate": 9.031222581203977e-05, "loss": 2.0137, "step": 2245 }, { "epoch": 0.404593560009007, "grad_norm": 1.3430835008621216, "learning_rate": 9.030384648590369e-05, "loss": 1.6662, "step": 2246 }, { "epoch": 0.40477369961720333, "grad_norm": 1.395531415939331, "learning_rate": 9.029546392661265e-05, "loss": 1.619, "step": 2247 }, { "epoch": 0.4049538392253997, "grad_norm": 1.475770115852356, "learning_rate": 9.028707813483913e-05, "loss": 1.7312, "step": 2248 }, { "epoch": 0.40513397883359603, "grad_norm": 1.3561451435089111, "learning_rate": 9.027868911125583e-05, "loss": 1.6794, "step": 2249 }, { "epoch": 0.4053141184417924, "grad_norm": 1.49905526638031, "learning_rate": 9.02702968565357e-05, "loss": 1.5964, "step": 2250 }, { "epoch": 0.40549425804998873, "grad_norm": 1.259384036064148, "learning_rate": 9.0261901371352e-05, "loss": 1.9782, "step": 2251 }, { "epoch": 0.4056743976581851, "grad_norm": 1.2289361953735352, "learning_rate": 9.025350265637815e-05, "loss": 2.0043, "step": 2252 }, { "epoch": 0.40585453726638143, "grad_norm": 1.1787949800491333, "learning_rate": 9.024510071228793e-05, "loss": 1.962, "step": 2253 }, { "epoch": 0.4060346768745778, "grad_norm": 1.4196516275405884, "learning_rate": 9.023669553975534e-05, "loss": 2.4036, "step": 2254 }, { "epoch": 0.40621481648277413, "grad_norm": 1.3713552951812744, "learning_rate": 9.022828713945463e-05, "loss": 2.1738, "step": 2255 }, { "epoch": 0.4063949560909705, "grad_norm": 1.5663354396820068, "learning_rate": 9.021987551206032e-05, "loss": 2.2402, "step": 2256 }, { "epoch": 0.40657509569916683, "grad_norm": 1.4110584259033203, "learning_rate": 9.021146065824719e-05, "loss": 2.1178, "step": 2257 }, { "epoch": 0.4067552353073632, "grad_norm": 1.68704092502594, "learning_rate": 9.020304257869028e-05, "loss": 2.3081, "step": 2258 }, { "epoch": 0.40693537491555953, "grad_norm": 1.7063883543014526, "learning_rate": 9.019462127406487e-05, "loss": 2.3804, "step": 2259 }, { "epoch": 0.4071155145237559, "grad_norm": 1.3695518970489502, "learning_rate": 9.018619674504652e-05, "loss": 1.8772, "step": 2260 }, { "epoch": 0.4072956541319523, "grad_norm": 1.1992802619934082, "learning_rate": 9.017776899231103e-05, "loss": 1.5364, "step": 2261 }, { "epoch": 0.40747579374014864, "grad_norm": 1.2105752229690552, "learning_rate": 9.016933801653449e-05, "loss": 1.5297, "step": 2262 }, { "epoch": 0.407655933348345, "grad_norm": 1.304094672203064, "learning_rate": 9.016090381839322e-05, "loss": 1.5659, "step": 2263 }, { "epoch": 0.40783607295654134, "grad_norm": 1.4258983135223389, "learning_rate": 9.015246639856381e-05, "loss": 2.0664, "step": 2264 }, { "epoch": 0.4080162125647377, "grad_norm": 1.2939250469207764, "learning_rate": 9.01440257577231e-05, "loss": 1.6481, "step": 2265 }, { "epoch": 0.40819635217293404, "grad_norm": 1.3380907773971558, "learning_rate": 9.013558189654819e-05, "loss": 1.7788, "step": 2266 }, { "epoch": 0.4083764917811304, "grad_norm": 1.2485278844833374, "learning_rate": 9.012713481571645e-05, "loss": 1.5974, "step": 2267 }, { "epoch": 0.40855663138932674, "grad_norm": 1.2398656606674194, "learning_rate": 9.01186845159055e-05, "loss": 1.6576, "step": 2268 }, { "epoch": 0.4087367709975231, "grad_norm": 1.2962778806686401, "learning_rate": 9.011023099779323e-05, "loss": 1.6924, "step": 2269 }, { "epoch": 0.40891691060571944, "grad_norm": 1.2225233316421509, "learning_rate": 9.010177426205777e-05, "loss": 1.5986, "step": 2270 }, { "epoch": 0.4090970502139158, "grad_norm": 1.1218363046646118, "learning_rate": 9.009331430937751e-05, "loss": 1.4091, "step": 2271 }, { "epoch": 0.40927718982211214, "grad_norm": 1.1273895502090454, "learning_rate": 9.008485114043108e-05, "loss": 1.475, "step": 2272 }, { "epoch": 0.4094573294303085, "grad_norm": 1.3381619453430176, "learning_rate": 9.007638475589745e-05, "loss": 1.6634, "step": 2273 }, { "epoch": 0.40963746903850484, "grad_norm": 1.352177619934082, "learning_rate": 9.006791515645573e-05, "loss": 1.7489, "step": 2274 }, { "epoch": 0.4098176086467012, "grad_norm": 1.398384690284729, "learning_rate": 9.005944234278538e-05, "loss": 1.7822, "step": 2275 }, { "epoch": 0.40999774825489754, "grad_norm": 1.241804599761963, "learning_rate": 9.005096631556609e-05, "loss": 1.4318, "step": 2276 }, { "epoch": 0.4101778878630939, "grad_norm": 1.1617920398712158, "learning_rate": 9.004248707547777e-05, "loss": 1.4568, "step": 2277 }, { "epoch": 0.41035802747129024, "grad_norm": 1.2524787187576294, "learning_rate": 9.003400462320067e-05, "loss": 1.358, "step": 2278 }, { "epoch": 0.4105381670794866, "grad_norm": 1.269258737564087, "learning_rate": 9.002551895941519e-05, "loss": 1.8369, "step": 2279 }, { "epoch": 0.41071830668768294, "grad_norm": 1.2870339155197144, "learning_rate": 9.001703008480208e-05, "loss": 1.7499, "step": 2280 }, { "epoch": 0.4108984462958793, "grad_norm": 1.3752247095108032, "learning_rate": 9.000853800004233e-05, "loss": 1.6058, "step": 2281 }, { "epoch": 0.41107858590407564, "grad_norm": 1.4035537242889404, "learning_rate": 9.000004270581712e-05, "loss": 1.8562, "step": 2282 }, { "epoch": 0.411258725512272, "grad_norm": 1.258719563484192, "learning_rate": 8.999154420280798e-05, "loss": 1.6019, "step": 2283 }, { "epoch": 0.41143886512046834, "grad_norm": 1.2502412796020508, "learning_rate": 8.998304249169664e-05, "loss": 1.4756, "step": 2284 }, { "epoch": 0.4116190047286647, "grad_norm": 1.4311153888702393, "learning_rate": 8.997453757316512e-05, "loss": 1.6999, "step": 2285 }, { "epoch": 0.41179914433686104, "grad_norm": 1.5594477653503418, "learning_rate": 8.996602944789565e-05, "loss": 2.0204, "step": 2286 }, { "epoch": 0.4119792839450574, "grad_norm": 1.323693871498108, "learning_rate": 8.995751811657077e-05, "loss": 1.7253, "step": 2287 }, { "epoch": 0.41215942355325375, "grad_norm": 1.3957284688949585, "learning_rate": 8.994900357987326e-05, "loss": 1.6376, "step": 2288 }, { "epoch": 0.41233956316145015, "grad_norm": 1.3958390951156616, "learning_rate": 8.994048583848614e-05, "loss": 1.6135, "step": 2289 }, { "epoch": 0.4125197027696465, "grad_norm": 1.339706540107727, "learning_rate": 8.993196489309267e-05, "loss": 1.4103, "step": 2290 }, { "epoch": 0.41269984237784285, "grad_norm": 1.302673578262329, "learning_rate": 8.992344074437645e-05, "loss": 1.6156, "step": 2291 }, { "epoch": 0.4128799819860392, "grad_norm": 1.3750134706497192, "learning_rate": 8.991491339302126e-05, "loss": 1.8009, "step": 2292 }, { "epoch": 0.41306012159423555, "grad_norm": 1.3975346088409424, "learning_rate": 8.990638283971116e-05, "loss": 1.7812, "step": 2293 }, { "epoch": 0.4132402612024319, "grad_norm": 1.4291768074035645, "learning_rate": 8.989784908513046e-05, "loss": 1.701, "step": 2294 }, { "epoch": 0.41342040081062825, "grad_norm": 1.4330488443374634, "learning_rate": 8.988931212996374e-05, "loss": 1.9773, "step": 2295 }, { "epoch": 0.4136005404188246, "grad_norm": 1.3990591764450073, "learning_rate": 8.988077197489583e-05, "loss": 1.6276, "step": 2296 }, { "epoch": 0.41378068002702095, "grad_norm": 1.2557814121246338, "learning_rate": 8.987222862061182e-05, "loss": 1.3885, "step": 2297 }, { "epoch": 0.4139608196352173, "grad_norm": 1.2447737455368042, "learning_rate": 8.986368206779706e-05, "loss": 1.4814, "step": 2298 }, { "epoch": 0.41414095924341365, "grad_norm": 1.497323989868164, "learning_rate": 8.985513231713713e-05, "loss": 1.9769, "step": 2299 }, { "epoch": 0.41432109885161, "grad_norm": 1.3988481760025024, "learning_rate": 8.984657936931791e-05, "loss": 2.0068, "step": 2300 }, { "epoch": 0.41450123845980635, "grad_norm": 1.3377095460891724, "learning_rate": 8.983802322502548e-05, "loss": 2.2395, "step": 2301 }, { "epoch": 0.4146813780680027, "grad_norm": 1.3252151012420654, "learning_rate": 8.982946388494625e-05, "loss": 2.2211, "step": 2302 }, { "epoch": 0.41486151767619905, "grad_norm": 1.776610016822815, "learning_rate": 8.982090134976683e-05, "loss": 2.2165, "step": 2303 }, { "epoch": 0.4150416572843954, "grad_norm": 1.233462929725647, "learning_rate": 8.981233562017408e-05, "loss": 1.9882, "step": 2304 }, { "epoch": 0.41522179689259175, "grad_norm": 1.1811257600784302, "learning_rate": 8.980376669685517e-05, "loss": 1.8871, "step": 2305 }, { "epoch": 0.4154019365007881, "grad_norm": 2.043971538543701, "learning_rate": 8.97951945804975e-05, "loss": 2.0592, "step": 2306 }, { "epoch": 0.41558207610898446, "grad_norm": 1.7642455101013184, "learning_rate": 8.978661927178867e-05, "loss": 1.9924, "step": 2307 }, { "epoch": 0.4157622157171808, "grad_norm": 1.3262183666229248, "learning_rate": 8.977804077141665e-05, "loss": 2.1365, "step": 2308 }, { "epoch": 0.41594235532537716, "grad_norm": 1.703677773475647, "learning_rate": 8.976945908006956e-05, "loss": 1.981, "step": 2309 }, { "epoch": 0.4161224949335735, "grad_norm": 1.7459417581558228, "learning_rate": 8.976087419843584e-05, "loss": 2.5611, "step": 2310 }, { "epoch": 0.41630263454176986, "grad_norm": 1.5708870887756348, "learning_rate": 8.975228612720416e-05, "loss": 1.8458, "step": 2311 }, { "epoch": 0.4164827741499662, "grad_norm": 1.2064553499221802, "learning_rate": 8.974369486706345e-05, "loss": 1.7064, "step": 2312 }, { "epoch": 0.41666291375816256, "grad_norm": 1.3049978017807007, "learning_rate": 8.973510041870287e-05, "loss": 1.6698, "step": 2313 }, { "epoch": 0.4168430533663589, "grad_norm": 1.2189924716949463, "learning_rate": 8.972650278281191e-05, "loss": 1.7325, "step": 2314 }, { "epoch": 0.41702319297455526, "grad_norm": 1.329236626625061, "learning_rate": 8.971790196008025e-05, "loss": 1.5717, "step": 2315 }, { "epoch": 0.4172033325827516, "grad_norm": 1.280090093612671, "learning_rate": 8.970929795119784e-05, "loss": 1.868, "step": 2316 }, { "epoch": 0.41738347219094796, "grad_norm": 1.1924283504486084, "learning_rate": 8.97006907568549e-05, "loss": 1.5763, "step": 2317 }, { "epoch": 0.41756361179914436, "grad_norm": 1.3362699747085571, "learning_rate": 8.969208037774186e-05, "loss": 1.7364, "step": 2318 }, { "epoch": 0.4177437514073407, "grad_norm": 1.326858401298523, "learning_rate": 8.968346681454949e-05, "loss": 1.6928, "step": 2319 }, { "epoch": 0.41792389101553706, "grad_norm": 1.1357793807983398, "learning_rate": 8.967485006796872e-05, "loss": 1.6156, "step": 2320 }, { "epoch": 0.4181040306237334, "grad_norm": 1.3107483386993408, "learning_rate": 8.966623013869082e-05, "loss": 1.7734, "step": 2321 }, { "epoch": 0.41828417023192976, "grad_norm": 1.5365502834320068, "learning_rate": 8.965760702740725e-05, "loss": 1.71, "step": 2322 }, { "epoch": 0.4184643098401261, "grad_norm": 1.3306937217712402, "learning_rate": 8.964898073480976e-05, "loss": 1.4933, "step": 2323 }, { "epoch": 0.41864444944832246, "grad_norm": 1.3033932447433472, "learning_rate": 8.964035126159035e-05, "loss": 1.7984, "step": 2324 }, { "epoch": 0.4188245890565188, "grad_norm": 1.354055643081665, "learning_rate": 8.963171860844128e-05, "loss": 1.8457, "step": 2325 }, { "epoch": 0.41900472866471516, "grad_norm": 1.2634997367858887, "learning_rate": 8.962308277605502e-05, "loss": 1.8136, "step": 2326 }, { "epoch": 0.4191848682729115, "grad_norm": 1.3568263053894043, "learning_rate": 8.961444376512438e-05, "loss": 1.6783, "step": 2327 }, { "epoch": 0.41936500788110787, "grad_norm": 1.4491347074508667, "learning_rate": 8.960580157634235e-05, "loss": 1.9684, "step": 2328 }, { "epoch": 0.4195451474893042, "grad_norm": 1.267566442489624, "learning_rate": 8.959715621040221e-05, "loss": 1.5467, "step": 2329 }, { "epoch": 0.41972528709750057, "grad_norm": 1.183724284172058, "learning_rate": 8.95885076679975e-05, "loss": 1.7789, "step": 2330 }, { "epoch": 0.4199054267056969, "grad_norm": 1.364312767982483, "learning_rate": 8.957985594982197e-05, "loss": 1.7984, "step": 2331 }, { "epoch": 0.42008556631389327, "grad_norm": 1.3236796855926514, "learning_rate": 8.957120105656968e-05, "loss": 1.8618, "step": 2332 }, { "epoch": 0.4202657059220896, "grad_norm": 1.254014492034912, "learning_rate": 8.956254298893491e-05, "loss": 1.7109, "step": 2333 }, { "epoch": 0.42044584553028597, "grad_norm": 1.1174708604812622, "learning_rate": 8.955388174761222e-05, "loss": 1.3256, "step": 2334 }, { "epoch": 0.4206259851384823, "grad_norm": 1.2135961055755615, "learning_rate": 8.95452173332964e-05, "loss": 1.6955, "step": 2335 }, { "epoch": 0.42080612474667867, "grad_norm": 1.3413887023925781, "learning_rate": 8.953654974668251e-05, "loss": 1.7351, "step": 2336 }, { "epoch": 0.420986264354875, "grad_norm": 1.1912155151367188, "learning_rate": 8.952787898846585e-05, "loss": 1.495, "step": 2337 }, { "epoch": 0.42116640396307137, "grad_norm": 1.3948345184326172, "learning_rate": 8.951920505934199e-05, "loss": 1.6379, "step": 2338 }, { "epoch": 0.4213465435712677, "grad_norm": 1.446315884590149, "learning_rate": 8.951052796000676e-05, "loss": 1.8504, "step": 2339 }, { "epoch": 0.42152668317946407, "grad_norm": 1.2895922660827637, "learning_rate": 8.950184769115621e-05, "loss": 1.6966, "step": 2340 }, { "epoch": 0.4217068227876604, "grad_norm": 1.405571460723877, "learning_rate": 8.94931642534867e-05, "loss": 1.5646, "step": 2341 }, { "epoch": 0.42188696239585677, "grad_norm": 1.4101899862289429, "learning_rate": 8.948447764769477e-05, "loss": 1.679, "step": 2342 }, { "epoch": 0.4220671020040531, "grad_norm": 1.411247968673706, "learning_rate": 8.947578787447728e-05, "loss": 1.8322, "step": 2343 }, { "epoch": 0.42224724161224947, "grad_norm": 1.4264168739318848, "learning_rate": 8.946709493453132e-05, "loss": 1.7271, "step": 2344 }, { "epoch": 0.4224273812204458, "grad_norm": 1.330457329750061, "learning_rate": 8.945839882855422e-05, "loss": 1.5499, "step": 2345 }, { "epoch": 0.4226075208286422, "grad_norm": 1.3925718069076538, "learning_rate": 8.944969955724358e-05, "loss": 1.6385, "step": 2346 }, { "epoch": 0.4227876604368386, "grad_norm": 1.4525631666183472, "learning_rate": 8.944099712129727e-05, "loss": 1.7932, "step": 2347 }, { "epoch": 0.4229678000450349, "grad_norm": 1.4268920421600342, "learning_rate": 8.943229152141337e-05, "loss": 1.6502, "step": 2348 }, { "epoch": 0.4231479396532313, "grad_norm": 1.4260139465332031, "learning_rate": 8.942358275829026e-05, "loss": 1.4499, "step": 2349 }, { "epoch": 0.4233280792614276, "grad_norm": 1.4666041135787964, "learning_rate": 8.941487083262654e-05, "loss": 1.8307, "step": 2350 }, { "epoch": 0.423508218869624, "grad_norm": 1.4299721717834473, "learning_rate": 8.940615574512108e-05, "loss": 2.2927, "step": 2351 }, { "epoch": 0.4236883584778203, "grad_norm": 1.2465479373931885, "learning_rate": 8.939743749647301e-05, "loss": 1.8718, "step": 2352 }, { "epoch": 0.4238684980860167, "grad_norm": 1.1579290628433228, "learning_rate": 8.938871608738168e-05, "loss": 1.7701, "step": 2353 }, { "epoch": 0.424048637694213, "grad_norm": 1.1931153535842896, "learning_rate": 8.937999151854674e-05, "loss": 2.0909, "step": 2354 }, { "epoch": 0.4242287773024094, "grad_norm": 1.3782215118408203, "learning_rate": 8.937126379066804e-05, "loss": 2.0862, "step": 2355 }, { "epoch": 0.4244089169106057, "grad_norm": 1.3637079000473022, "learning_rate": 8.936253290444577e-05, "loss": 2.0026, "step": 2356 }, { "epoch": 0.4245890565188021, "grad_norm": 2.3987016677856445, "learning_rate": 8.935379886058026e-05, "loss": 2.5389, "step": 2357 }, { "epoch": 0.4247691961269984, "grad_norm": 1.3973356485366821, "learning_rate": 8.934506165977218e-05, "loss": 2.1329, "step": 2358 }, { "epoch": 0.4249493357351948, "grad_norm": 1.603434443473816, "learning_rate": 8.933632130272242e-05, "loss": 2.1958, "step": 2359 }, { "epoch": 0.42512947534339113, "grad_norm": 1.675324559211731, "learning_rate": 8.932757779013214e-05, "loss": 2.3248, "step": 2360 }, { "epoch": 0.4253096149515875, "grad_norm": 1.3479998111724854, "learning_rate": 8.93188311227027e-05, "loss": 1.4798, "step": 2361 }, { "epoch": 0.42548975455978383, "grad_norm": 1.265716791152954, "learning_rate": 8.931008130113578e-05, "loss": 1.8728, "step": 2362 }, { "epoch": 0.4256698941679802, "grad_norm": 1.2824552059173584, "learning_rate": 8.93013283261333e-05, "loss": 1.8325, "step": 2363 }, { "epoch": 0.42585003377617653, "grad_norm": 1.2346662282943726, "learning_rate": 8.92925721983974e-05, "loss": 1.511, "step": 2364 }, { "epoch": 0.4260301733843729, "grad_norm": 1.2194435596466064, "learning_rate": 8.928381291863049e-05, "loss": 1.6183, "step": 2365 }, { "epoch": 0.42621031299256923, "grad_norm": 1.2850176095962524, "learning_rate": 8.927505048753526e-05, "loss": 1.8856, "step": 2366 }, { "epoch": 0.4263904526007656, "grad_norm": 1.2203431129455566, "learning_rate": 8.92662849058146e-05, "loss": 1.7079, "step": 2367 }, { "epoch": 0.42657059220896193, "grad_norm": 1.2614035606384277, "learning_rate": 8.925751617417169e-05, "loss": 1.5353, "step": 2368 }, { "epoch": 0.4267507318171583, "grad_norm": 1.1229199171066284, "learning_rate": 8.924874429330994e-05, "loss": 1.2559, "step": 2369 }, { "epoch": 0.42693087142535463, "grad_norm": 1.325992226600647, "learning_rate": 8.923996926393305e-05, "loss": 1.9604, "step": 2370 }, { "epoch": 0.427111011033551, "grad_norm": 1.2546918392181396, "learning_rate": 8.923119108674494e-05, "loss": 1.5735, "step": 2371 }, { "epoch": 0.42729115064174733, "grad_norm": 1.375550627708435, "learning_rate": 8.92224097624498e-05, "loss": 1.7835, "step": 2372 }, { "epoch": 0.4274712902499437, "grad_norm": 1.4299111366271973, "learning_rate": 8.921362529175204e-05, "loss": 1.9749, "step": 2373 }, { "epoch": 0.42765142985814003, "grad_norm": 1.3563504219055176, "learning_rate": 8.920483767535636e-05, "loss": 1.5832, "step": 2374 }, { "epoch": 0.42783156946633644, "grad_norm": 1.3628417253494263, "learning_rate": 8.91960469139677e-05, "loss": 1.7148, "step": 2375 }, { "epoch": 0.4280117090745328, "grad_norm": 1.388820767402649, "learning_rate": 8.918725300829126e-05, "loss": 2.0205, "step": 2376 }, { "epoch": 0.42819184868272914, "grad_norm": 1.3120652437210083, "learning_rate": 8.917845595903246e-05, "loss": 1.6295, "step": 2377 }, { "epoch": 0.4283719882909255, "grad_norm": 1.3631128072738647, "learning_rate": 8.9169655766897e-05, "loss": 1.7328, "step": 2378 }, { "epoch": 0.42855212789912184, "grad_norm": 1.178661584854126, "learning_rate": 8.916085243259084e-05, "loss": 1.4739, "step": 2379 }, { "epoch": 0.4287322675073182, "grad_norm": 1.2599750757217407, "learning_rate": 8.915204595682018e-05, "loss": 1.5898, "step": 2380 }, { "epoch": 0.42891240711551454, "grad_norm": 1.2569741010665894, "learning_rate": 8.914323634029145e-05, "loss": 1.568, "step": 2381 }, { "epoch": 0.4290925467237109, "grad_norm": 1.245320439338684, "learning_rate": 8.913442358371138e-05, "loss": 1.6056, "step": 2382 }, { "epoch": 0.42927268633190724, "grad_norm": 1.3373866081237793, "learning_rate": 8.91256076877869e-05, "loss": 1.7986, "step": 2383 }, { "epoch": 0.4294528259401036, "grad_norm": 1.4051498174667358, "learning_rate": 8.911678865322524e-05, "loss": 1.8037, "step": 2384 }, { "epoch": 0.42963296554829994, "grad_norm": 1.3707317113876343, "learning_rate": 8.910796648073384e-05, "loss": 1.7514, "step": 2385 }, { "epoch": 0.4298131051564963, "grad_norm": 1.3537299633026123, "learning_rate": 8.909914117102041e-05, "loss": 1.6818, "step": 2386 }, { "epoch": 0.42999324476469264, "grad_norm": 1.2826321125030518, "learning_rate": 8.909031272479294e-05, "loss": 1.6717, "step": 2387 }, { "epoch": 0.430173384372889, "grad_norm": 1.5074782371520996, "learning_rate": 8.908148114275961e-05, "loss": 2.0749, "step": 2388 }, { "epoch": 0.43035352398108534, "grad_norm": 1.5770882368087769, "learning_rate": 8.907264642562889e-05, "loss": 1.8662, "step": 2389 }, { "epoch": 0.4305336635892817, "grad_norm": 1.3784534931182861, "learning_rate": 8.906380857410952e-05, "loss": 1.7692, "step": 2390 }, { "epoch": 0.43071380319747804, "grad_norm": 1.253674864768982, "learning_rate": 8.905496758891045e-05, "loss": 1.7131, "step": 2391 }, { "epoch": 0.4308939428056744, "grad_norm": 1.551912546157837, "learning_rate": 8.90461234707409e-05, "loss": 1.7812, "step": 2392 }, { "epoch": 0.43107408241387074, "grad_norm": 1.2576667070388794, "learning_rate": 8.903727622031035e-05, "loss": 1.5691, "step": 2393 }, { "epoch": 0.4312542220220671, "grad_norm": 1.4048371315002441, "learning_rate": 8.902842583832851e-05, "loss": 1.7556, "step": 2394 }, { "epoch": 0.43143436163026344, "grad_norm": 1.3117891550064087, "learning_rate": 8.901957232550537e-05, "loss": 1.752, "step": 2395 }, { "epoch": 0.4316145012384598, "grad_norm": 1.3268016576766968, "learning_rate": 8.901071568255112e-05, "loss": 1.708, "step": 2396 }, { "epoch": 0.43179464084665614, "grad_norm": 1.4855804443359375, "learning_rate": 8.90018559101763e-05, "loss": 1.6548, "step": 2397 }, { "epoch": 0.4319747804548525, "grad_norm": 1.2910501956939697, "learning_rate": 8.899299300909156e-05, "loss": 1.5835, "step": 2398 }, { "epoch": 0.43215492006304884, "grad_norm": 1.2075519561767578, "learning_rate": 8.898412698000792e-05, "loss": 1.5984, "step": 2399 }, { "epoch": 0.4323350596712452, "grad_norm": 1.361940860748291, "learning_rate": 8.897525782363661e-05, "loss": 1.6462, "step": 2400 }, { "epoch": 0.43251519927944154, "grad_norm": 1.417803406715393, "learning_rate": 8.89663855406891e-05, "loss": 2.45, "step": 2401 }, { "epoch": 0.4326953388876379, "grad_norm": 1.3750723600387573, "learning_rate": 8.895751013187713e-05, "loss": 2.2356, "step": 2402 }, { "epoch": 0.4328754784958343, "grad_norm": 1.3236726522445679, "learning_rate": 8.894863159791265e-05, "loss": 2.0514, "step": 2403 }, { "epoch": 0.43305561810403065, "grad_norm": 1.2322497367858887, "learning_rate": 8.893974993950794e-05, "loss": 1.9954, "step": 2404 }, { "epoch": 0.433235757712227, "grad_norm": 1.177254319190979, "learning_rate": 8.893086515737543e-05, "loss": 2.1942, "step": 2405 }, { "epoch": 0.43341589732042335, "grad_norm": 1.2370191812515259, "learning_rate": 8.89219772522279e-05, "loss": 1.884, "step": 2406 }, { "epoch": 0.4335960369286197, "grad_norm": 1.7734014987945557, "learning_rate": 8.89130862247783e-05, "loss": 2.4715, "step": 2407 }, { "epoch": 0.43377617653681605, "grad_norm": 1.4193809032440186, "learning_rate": 8.890419207573988e-05, "loss": 2.1092, "step": 2408 }, { "epoch": 0.4339563161450124, "grad_norm": 1.8293203115463257, "learning_rate": 8.889529480582612e-05, "loss": 2.215, "step": 2409 }, { "epoch": 0.43413645575320875, "grad_norm": 1.8702088594436646, "learning_rate": 8.888639441575077e-05, "loss": 2.294, "step": 2410 }, { "epoch": 0.4343165953614051, "grad_norm": 1.2626774311065674, "learning_rate": 8.887749090622779e-05, "loss": 1.6463, "step": 2411 }, { "epoch": 0.43449673496960145, "grad_norm": 1.2881629467010498, "learning_rate": 8.886858427797143e-05, "loss": 1.665, "step": 2412 }, { "epoch": 0.4346768745777978, "grad_norm": 1.1848063468933105, "learning_rate": 8.885967453169618e-05, "loss": 1.5145, "step": 2413 }, { "epoch": 0.43485701418599415, "grad_norm": 1.3114615678787231, "learning_rate": 8.885076166811674e-05, "loss": 1.7421, "step": 2414 }, { "epoch": 0.4350371537941905, "grad_norm": 1.295041799545288, "learning_rate": 8.884184568794814e-05, "loss": 1.7812, "step": 2415 }, { "epoch": 0.43521729340238685, "grad_norm": 1.3562287092208862, "learning_rate": 8.88329265919056e-05, "loss": 1.8488, "step": 2416 }, { "epoch": 0.4353974330105832, "grad_norm": 1.2571269273757935, "learning_rate": 8.88240043807046e-05, "loss": 1.6278, "step": 2417 }, { "epoch": 0.43557757261877955, "grad_norm": 1.1439836025238037, "learning_rate": 8.881507905506087e-05, "loss": 1.4527, "step": 2418 }, { "epoch": 0.4357577122269759, "grad_norm": 1.3284558057785034, "learning_rate": 8.880615061569041e-05, "loss": 1.8035, "step": 2419 }, { "epoch": 0.43593785183517225, "grad_norm": 1.3843868970870972, "learning_rate": 8.879721906330947e-05, "loss": 1.6351, "step": 2420 }, { "epoch": 0.4361179914433686, "grad_norm": 1.2090736627578735, "learning_rate": 8.878828439863449e-05, "loss": 1.5604, "step": 2421 }, { "epoch": 0.43629813105156495, "grad_norm": 1.3676234483718872, "learning_rate": 8.877934662238223e-05, "loss": 1.5865, "step": 2422 }, { "epoch": 0.4364782706597613, "grad_norm": 1.2687270641326904, "learning_rate": 8.877040573526969e-05, "loss": 1.5578, "step": 2423 }, { "epoch": 0.43665841026795765, "grad_norm": 1.3585126399993896, "learning_rate": 8.876146173801407e-05, "loss": 1.6335, "step": 2424 }, { "epoch": 0.436838549876154, "grad_norm": 1.355819821357727, "learning_rate": 8.875251463133288e-05, "loss": 1.6906, "step": 2425 }, { "epoch": 0.43701868948435035, "grad_norm": 1.2871942520141602, "learning_rate": 8.874356441594382e-05, "loss": 1.7136, "step": 2426 }, { "epoch": 0.4371988290925467, "grad_norm": 1.287707805633545, "learning_rate": 8.873461109256492e-05, "loss": 1.7524, "step": 2427 }, { "epoch": 0.43737896870074305, "grad_norm": 1.2620363235473633, "learning_rate": 8.872565466191438e-05, "loss": 1.7398, "step": 2428 }, { "epoch": 0.4375591083089394, "grad_norm": 1.3467801809310913, "learning_rate": 8.871669512471068e-05, "loss": 1.6144, "step": 2429 }, { "epoch": 0.43773924791713575, "grad_norm": 1.2558860778808594, "learning_rate": 8.870773248167254e-05, "loss": 1.5698, "step": 2430 }, { "epoch": 0.43791938752533216, "grad_norm": 1.2626712322235107, "learning_rate": 8.869876673351898e-05, "loss": 1.6514, "step": 2431 }, { "epoch": 0.4380995271335285, "grad_norm": 1.370428204536438, "learning_rate": 8.86897978809692e-05, "loss": 1.831, "step": 2432 }, { "epoch": 0.43827966674172486, "grad_norm": 1.2651333808898926, "learning_rate": 8.868082592474267e-05, "loss": 1.6784, "step": 2433 }, { "epoch": 0.4384598063499212, "grad_norm": 1.1817269325256348, "learning_rate": 8.867185086555913e-05, "loss": 1.489, "step": 2434 }, { "epoch": 0.43863994595811756, "grad_norm": 1.3302593231201172, "learning_rate": 8.866287270413854e-05, "loss": 1.713, "step": 2435 }, { "epoch": 0.4388200855663139, "grad_norm": 1.2751165628433228, "learning_rate": 8.865389144120114e-05, "loss": 1.4732, "step": 2436 }, { "epoch": 0.43900022517451026, "grad_norm": 1.3313978910446167, "learning_rate": 8.864490707746739e-05, "loss": 1.8413, "step": 2437 }, { "epoch": 0.4391803647827066, "grad_norm": 1.3491672277450562, "learning_rate": 8.863591961365803e-05, "loss": 1.5525, "step": 2438 }, { "epoch": 0.43936050439090296, "grad_norm": 1.1956068277359009, "learning_rate": 8.862692905049401e-05, "loss": 1.2878, "step": 2439 }, { "epoch": 0.4395406439990993, "grad_norm": 1.2738324403762817, "learning_rate": 8.861793538869656e-05, "loss": 1.5003, "step": 2440 }, { "epoch": 0.43972078360729566, "grad_norm": 1.3016011714935303, "learning_rate": 8.860893862898713e-05, "loss": 1.5044, "step": 2441 }, { "epoch": 0.439900923215492, "grad_norm": 1.393778920173645, "learning_rate": 8.859993877208747e-05, "loss": 1.6511, "step": 2442 }, { "epoch": 0.44008106282368836, "grad_norm": 1.3200653791427612, "learning_rate": 8.859093581871952e-05, "loss": 1.4509, "step": 2443 }, { "epoch": 0.4402612024318847, "grad_norm": 1.5782687664031982, "learning_rate": 8.858192976960549e-05, "loss": 1.5456, "step": 2444 }, { "epoch": 0.44044134204008106, "grad_norm": 1.5603652000427246, "learning_rate": 8.857292062546783e-05, "loss": 1.8388, "step": 2445 }, { "epoch": 0.4406214816482774, "grad_norm": 1.380617618560791, "learning_rate": 8.856390838702928e-05, "loss": 1.692, "step": 2446 }, { "epoch": 0.44080162125647376, "grad_norm": 1.5715570449829102, "learning_rate": 8.855489305501279e-05, "loss": 1.5373, "step": 2447 }, { "epoch": 0.4409817608646701, "grad_norm": 1.4420361518859863, "learning_rate": 8.854587463014155e-05, "loss": 1.7806, "step": 2448 }, { "epoch": 0.44116190047286646, "grad_norm": 1.3811311721801758, "learning_rate": 8.853685311313903e-05, "loss": 1.5826, "step": 2449 }, { "epoch": 0.4413420400810628, "grad_norm": 1.3375993967056274, "learning_rate": 8.852782850472892e-05, "loss": 1.8287, "step": 2450 }, { "epoch": 0.44152217968925916, "grad_norm": 1.2993003129959106, "learning_rate": 8.851880080563516e-05, "loss": 2.1843, "step": 2451 }, { "epoch": 0.4417023192974555, "grad_norm": 1.493749976158142, "learning_rate": 8.850977001658198e-05, "loss": 1.9254, "step": 2452 }, { "epoch": 0.44188245890565186, "grad_norm": 1.3017258644104004, "learning_rate": 8.850073613829379e-05, "loss": 1.9634, "step": 2453 }, { "epoch": 0.4420625985138482, "grad_norm": 1.6557326316833496, "learning_rate": 8.849169917149531e-05, "loss": 2.0286, "step": 2454 }, { "epoch": 0.44224273812204457, "grad_norm": 1.2409926652908325, "learning_rate": 8.848265911691147e-05, "loss": 1.9505, "step": 2455 }, { "epoch": 0.4424228777302409, "grad_norm": 1.5028578042984009, "learning_rate": 8.847361597526746e-05, "loss": 1.8375, "step": 2456 }, { "epoch": 0.44260301733843727, "grad_norm": 1.4043923616409302, "learning_rate": 8.846456974728873e-05, "loss": 1.982, "step": 2457 }, { "epoch": 0.4427831569466336, "grad_norm": 1.4624378681182861, "learning_rate": 8.845552043370093e-05, "loss": 2.3653, "step": 2458 }, { "epoch": 0.44296329655482997, "grad_norm": 1.6124484539031982, "learning_rate": 8.844646803523002e-05, "loss": 2.1662, "step": 2459 }, { "epoch": 0.44314343616302637, "grad_norm": 1.7372701168060303, "learning_rate": 8.843741255260217e-05, "loss": 2.0882, "step": 2460 }, { "epoch": 0.4433235757712227, "grad_norm": 1.8878483772277832, "learning_rate": 8.842835398654381e-05, "loss": 1.8989, "step": 2461 }, { "epoch": 0.4435037153794191, "grad_norm": 1.1851613521575928, "learning_rate": 8.841929233778161e-05, "loss": 1.6491, "step": 2462 }, { "epoch": 0.4436838549876154, "grad_norm": 1.2387031316757202, "learning_rate": 8.841022760704248e-05, "loss": 1.9347, "step": 2463 }, { "epoch": 0.4438639945958118, "grad_norm": 1.2441588640213013, "learning_rate": 8.840115979505361e-05, "loss": 1.855, "step": 2464 }, { "epoch": 0.4440441342040081, "grad_norm": 1.280192494392395, "learning_rate": 8.839208890254241e-05, "loss": 1.6512, "step": 2465 }, { "epoch": 0.4442242738122045, "grad_norm": 1.2847806215286255, "learning_rate": 8.838301493023654e-05, "loss": 1.6675, "step": 2466 }, { "epoch": 0.4444044134204008, "grad_norm": 1.3067476749420166, "learning_rate": 8.83739378788639e-05, "loss": 1.661, "step": 2467 }, { "epoch": 0.4445845530285972, "grad_norm": 1.2300266027450562, "learning_rate": 8.836485774915267e-05, "loss": 1.6697, "step": 2468 }, { "epoch": 0.4447646926367935, "grad_norm": 1.2237114906311035, "learning_rate": 8.835577454183122e-05, "loss": 1.6357, "step": 2469 }, { "epoch": 0.4449448322449899, "grad_norm": 1.2065696716308594, "learning_rate": 8.834668825762821e-05, "loss": 1.694, "step": 2470 }, { "epoch": 0.4451249718531862, "grad_norm": 1.2156944274902344, "learning_rate": 8.833759889727257e-05, "loss": 1.6118, "step": 2471 }, { "epoch": 0.4453051114613826, "grad_norm": 1.3187237977981567, "learning_rate": 8.83285064614934e-05, "loss": 1.6126, "step": 2472 }, { "epoch": 0.4454852510695789, "grad_norm": 1.2941577434539795, "learning_rate": 8.831941095102013e-05, "loss": 1.6511, "step": 2473 }, { "epoch": 0.4456653906777753, "grad_norm": 1.389487624168396, "learning_rate": 8.831031236658236e-05, "loss": 1.9561, "step": 2474 }, { "epoch": 0.4458455302859716, "grad_norm": 1.3510010242462158, "learning_rate": 8.830121070891e-05, "loss": 1.8267, "step": 2475 }, { "epoch": 0.446025669894168, "grad_norm": 1.2735846042633057, "learning_rate": 8.829210597873318e-05, "loss": 1.777, "step": 2476 }, { "epoch": 0.4462058095023643, "grad_norm": 1.5049409866333008, "learning_rate": 8.828299817678226e-05, "loss": 2.0296, "step": 2477 }, { "epoch": 0.4463859491105607, "grad_norm": 1.2562586069107056, "learning_rate": 8.827388730378787e-05, "loss": 1.649, "step": 2478 }, { "epoch": 0.446566088718757, "grad_norm": 1.282714605331421, "learning_rate": 8.82647733604809e-05, "loss": 1.8501, "step": 2479 }, { "epoch": 0.4467462283269534, "grad_norm": 1.325710415840149, "learning_rate": 8.825565634759243e-05, "loss": 1.8766, "step": 2480 }, { "epoch": 0.4469263679351497, "grad_norm": 1.2347376346588135, "learning_rate": 8.824653626585386e-05, "loss": 1.5214, "step": 2481 }, { "epoch": 0.4471065075433461, "grad_norm": 1.355336308479309, "learning_rate": 8.823741311599678e-05, "loss": 1.8545, "step": 2482 }, { "epoch": 0.4472866471515424, "grad_norm": 1.1751785278320312, "learning_rate": 8.822828689875301e-05, "loss": 1.3873, "step": 2483 }, { "epoch": 0.4474667867597388, "grad_norm": 1.2634822130203247, "learning_rate": 8.821915761485472e-05, "loss": 1.6765, "step": 2484 }, { "epoch": 0.4476469263679351, "grad_norm": 1.3524913787841797, "learning_rate": 8.821002526503422e-05, "loss": 1.7428, "step": 2485 }, { "epoch": 0.4478270659761315, "grad_norm": 1.4607428312301636, "learning_rate": 8.82008898500241e-05, "loss": 1.5735, "step": 2486 }, { "epoch": 0.44800720558432783, "grad_norm": 1.215192198753357, "learning_rate": 8.81917513705572e-05, "loss": 1.6297, "step": 2487 }, { "epoch": 0.44818734519252423, "grad_norm": 1.2971477508544922, "learning_rate": 8.818260982736661e-05, "loss": 1.6451, "step": 2488 }, { "epoch": 0.4483674848007206, "grad_norm": 1.378607988357544, "learning_rate": 8.817346522118566e-05, "loss": 1.878, "step": 2489 }, { "epoch": 0.44854762440891693, "grad_norm": 1.387888789176941, "learning_rate": 8.816431755274791e-05, "loss": 1.6493, "step": 2490 }, { "epoch": 0.4487277640171133, "grad_norm": 1.3238481283187866, "learning_rate": 8.81551668227872e-05, "loss": 1.6445, "step": 2491 }, { "epoch": 0.44890790362530963, "grad_norm": 1.3662227392196655, "learning_rate": 8.81460130320376e-05, "loss": 1.5854, "step": 2492 }, { "epoch": 0.449088043233506, "grad_norm": 1.3880327939987183, "learning_rate": 8.813685618123341e-05, "loss": 1.7137, "step": 2493 }, { "epoch": 0.44926818284170233, "grad_norm": 1.4108017683029175, "learning_rate": 8.81276962711092e-05, "loss": 1.8754, "step": 2494 }, { "epoch": 0.4494483224498987, "grad_norm": 1.4106652736663818, "learning_rate": 8.811853330239976e-05, "loss": 1.476, "step": 2495 }, { "epoch": 0.44962846205809504, "grad_norm": 1.3727869987487793, "learning_rate": 8.810936727584015e-05, "loss": 1.6171, "step": 2496 }, { "epoch": 0.4498086016662914, "grad_norm": 1.503373384475708, "learning_rate": 8.810019819216566e-05, "loss": 1.7728, "step": 2497 }, { "epoch": 0.44998874127448774, "grad_norm": 1.345131754875183, "learning_rate": 8.809102605211181e-05, "loss": 1.6648, "step": 2498 }, { "epoch": 0.4501688808826841, "grad_norm": 1.2526774406433105, "learning_rate": 8.808185085641443e-05, "loss": 1.5028, "step": 2499 }, { "epoch": 0.45034902049088044, "grad_norm": 1.2435864210128784, "learning_rate": 8.807267260580952e-05, "loss": 1.4284, "step": 2500 }, { "epoch": 0.4505291600990768, "grad_norm": 1.3523681163787842, "learning_rate": 8.806349130103333e-05, "loss": 2.2496, "step": 2501 }, { "epoch": 0.45070929970727314, "grad_norm": 1.1527364253997803, "learning_rate": 8.805430694282244e-05, "loss": 1.9082, "step": 2502 }, { "epoch": 0.4508894393154695, "grad_norm": 1.2345834970474243, "learning_rate": 8.804511953191357e-05, "loss": 2.1592, "step": 2503 }, { "epoch": 0.45106957892366584, "grad_norm": 1.2807146310806274, "learning_rate": 8.803592906904374e-05, "loss": 2.3721, "step": 2504 }, { "epoch": 0.4512497185318622, "grad_norm": 1.3325930833816528, "learning_rate": 8.80267355549502e-05, "loss": 2.0378, "step": 2505 }, { "epoch": 0.45142985814005854, "grad_norm": 1.3501343727111816, "learning_rate": 8.801753899037048e-05, "loss": 2.243, "step": 2506 }, { "epoch": 0.4516099977482549, "grad_norm": 1.3663647174835205, "learning_rate": 8.800833937604227e-05, "loss": 1.8671, "step": 2507 }, { "epoch": 0.45179013735645124, "grad_norm": 1.3206526041030884, "learning_rate": 8.799913671270361e-05, "loss": 1.9298, "step": 2508 }, { "epoch": 0.4519702769646476, "grad_norm": 1.6166900396347046, "learning_rate": 8.798993100109271e-05, "loss": 2.3065, "step": 2509 }, { "epoch": 0.45215041657284394, "grad_norm": 1.8101261854171753, "learning_rate": 8.798072224194805e-05, "loss": 2.2197, "step": 2510 }, { "epoch": 0.4523305561810403, "grad_norm": 1.2505074739456177, "learning_rate": 8.797151043600834e-05, "loss": 1.493, "step": 2511 }, { "epoch": 0.45251069578923664, "grad_norm": 1.354472279548645, "learning_rate": 8.796229558401257e-05, "loss": 1.7866, "step": 2512 }, { "epoch": 0.452690835397433, "grad_norm": 1.2613309621810913, "learning_rate": 8.795307768669992e-05, "loss": 1.8656, "step": 2513 }, { "epoch": 0.45287097500562934, "grad_norm": 1.1571128368377686, "learning_rate": 8.794385674480988e-05, "loss": 1.5235, "step": 2514 }, { "epoch": 0.4530511146138257, "grad_norm": 1.310507893562317, "learning_rate": 8.793463275908214e-05, "loss": 1.6617, "step": 2515 }, { "epoch": 0.45323125422202204, "grad_norm": 1.1342276334762573, "learning_rate": 8.792540573025663e-05, "loss": 1.4005, "step": 2516 }, { "epoch": 0.45341139383021845, "grad_norm": 1.0309433937072754, "learning_rate": 8.791617565907353e-05, "loss": 1.3134, "step": 2517 }, { "epoch": 0.4535915334384148, "grad_norm": 1.2996028661727905, "learning_rate": 8.79069425462733e-05, "loss": 1.7006, "step": 2518 }, { "epoch": 0.45377167304661115, "grad_norm": 1.231593370437622, "learning_rate": 8.78977063925966e-05, "loss": 1.6629, "step": 2519 }, { "epoch": 0.4539518126548075, "grad_norm": 1.2369225025177002, "learning_rate": 8.788846719878436e-05, "loss": 1.6645, "step": 2520 }, { "epoch": 0.45413195226300385, "grad_norm": 1.2013051509857178, "learning_rate": 8.787922496557773e-05, "loss": 1.7033, "step": 2521 }, { "epoch": 0.4543120918712002, "grad_norm": 1.249882698059082, "learning_rate": 8.786997969371813e-05, "loss": 1.8146, "step": 2522 }, { "epoch": 0.45449223147939655, "grad_norm": 1.301825761795044, "learning_rate": 8.786073138394721e-05, "loss": 1.6801, "step": 2523 }, { "epoch": 0.4546723710875929, "grad_norm": 1.1768606901168823, "learning_rate": 8.785148003700685e-05, "loss": 1.5777, "step": 2524 }, { "epoch": 0.45485251069578925, "grad_norm": 1.1592276096343994, "learning_rate": 8.784222565363918e-05, "loss": 1.4041, "step": 2525 }, { "epoch": 0.4550326503039856, "grad_norm": 1.1781882047653198, "learning_rate": 8.783296823458662e-05, "loss": 1.5239, "step": 2526 }, { "epoch": 0.45521278991218195, "grad_norm": 1.2495073080062866, "learning_rate": 8.782370778059177e-05, "loss": 1.6619, "step": 2527 }, { "epoch": 0.4553929295203783, "grad_norm": 1.3477360010147095, "learning_rate": 8.781444429239752e-05, "loss": 2.0455, "step": 2528 }, { "epoch": 0.45557306912857465, "grad_norm": 1.3403608798980713, "learning_rate": 8.780517777074695e-05, "loss": 1.6951, "step": 2529 }, { "epoch": 0.455753208736771, "grad_norm": 1.1876440048217773, "learning_rate": 8.779590821638344e-05, "loss": 1.3576, "step": 2530 }, { "epoch": 0.45593334834496735, "grad_norm": 1.2472617626190186, "learning_rate": 8.778663563005059e-05, "loss": 1.6684, "step": 2531 }, { "epoch": 0.4561134879531637, "grad_norm": 1.2174952030181885, "learning_rate": 8.777736001249224e-05, "loss": 1.5275, "step": 2532 }, { "epoch": 0.45629362756136005, "grad_norm": 1.2705187797546387, "learning_rate": 8.776808136445245e-05, "loss": 1.5641, "step": 2533 }, { "epoch": 0.4564737671695564, "grad_norm": 1.1995683908462524, "learning_rate": 8.775879968667557e-05, "loss": 1.5224, "step": 2534 }, { "epoch": 0.45665390677775275, "grad_norm": 1.3762696981430054, "learning_rate": 8.774951497990617e-05, "loss": 1.8633, "step": 2535 }, { "epoch": 0.4568340463859491, "grad_norm": 1.2934454679489136, "learning_rate": 8.774022724488908e-05, "loss": 1.7836, "step": 2536 }, { "epoch": 0.45701418599414545, "grad_norm": 1.3924291133880615, "learning_rate": 8.773093648236934e-05, "loss": 1.7315, "step": 2537 }, { "epoch": 0.4571943256023418, "grad_norm": 1.357987880706787, "learning_rate": 8.772164269309225e-05, "loss": 1.7143, "step": 2538 }, { "epoch": 0.45737446521053815, "grad_norm": 1.3550989627838135, "learning_rate": 8.771234587780337e-05, "loss": 1.5903, "step": 2539 }, { "epoch": 0.4575546048187345, "grad_norm": 1.3092279434204102, "learning_rate": 8.770304603724846e-05, "loss": 1.7098, "step": 2540 }, { "epoch": 0.45773474442693085, "grad_norm": 1.2539315223693848, "learning_rate": 8.769374317217356e-05, "loss": 1.6217, "step": 2541 }, { "epoch": 0.4579148840351272, "grad_norm": 1.490655541419983, "learning_rate": 8.768443728332494e-05, "loss": 1.7979, "step": 2542 }, { "epoch": 0.45809502364332355, "grad_norm": 1.288400411605835, "learning_rate": 8.767512837144911e-05, "loss": 1.7497, "step": 2543 }, { "epoch": 0.4582751632515199, "grad_norm": 1.5470545291900635, "learning_rate": 8.766581643729286e-05, "loss": 1.8006, "step": 2544 }, { "epoch": 0.4584553028597163, "grad_norm": 1.3264930248260498, "learning_rate": 8.765650148160314e-05, "loss": 1.6516, "step": 2545 }, { "epoch": 0.45863544246791266, "grad_norm": 1.4090070724487305, "learning_rate": 8.764718350512721e-05, "loss": 1.6554, "step": 2546 }, { "epoch": 0.458815582076109, "grad_norm": 1.4938548803329468, "learning_rate": 8.763786250861256e-05, "loss": 1.7768, "step": 2547 }, { "epoch": 0.45899572168430536, "grad_norm": 1.2691638469696045, "learning_rate": 8.762853849280693e-05, "loss": 1.4626, "step": 2548 }, { "epoch": 0.4591758612925017, "grad_norm": 1.5443065166473389, "learning_rate": 8.761921145845826e-05, "loss": 1.6673, "step": 2549 }, { "epoch": 0.45935600090069806, "grad_norm": 1.3302720785140991, "learning_rate": 8.760988140631476e-05, "loss": 1.506, "step": 2550 }, { "epoch": 0.4595361405088944, "grad_norm": 1.1516814231872559, "learning_rate": 8.76005483371249e-05, "loss": 2.1592, "step": 2551 }, { "epoch": 0.45971628011709076, "grad_norm": 1.2670003175735474, "learning_rate": 8.759121225163738e-05, "loss": 1.9908, "step": 2552 }, { "epoch": 0.4598964197252871, "grad_norm": 1.3853363990783691, "learning_rate": 8.758187315060111e-05, "loss": 2.1705, "step": 2553 }, { "epoch": 0.46007655933348346, "grad_norm": 1.2478830814361572, "learning_rate": 8.757253103476528e-05, "loss": 1.9577, "step": 2554 }, { "epoch": 0.4602566989416798, "grad_norm": 1.3179986476898193, "learning_rate": 8.75631859048793e-05, "loss": 2.2253, "step": 2555 }, { "epoch": 0.46043683854987616, "grad_norm": 1.29220449924469, "learning_rate": 8.755383776169286e-05, "loss": 1.7994, "step": 2556 }, { "epoch": 0.4606169781580725, "grad_norm": 1.5291500091552734, "learning_rate": 8.754448660595585e-05, "loss": 1.9425, "step": 2557 }, { "epoch": 0.46079711776626886, "grad_norm": 1.4553630352020264, "learning_rate": 8.75351324384184e-05, "loss": 2.0551, "step": 2558 }, { "epoch": 0.4609772573744652, "grad_norm": 1.6553764343261719, "learning_rate": 8.752577525983093e-05, "loss": 2.1199, "step": 2559 }, { "epoch": 0.46115739698266156, "grad_norm": 1.1331473588943481, "learning_rate": 8.751641507094401e-05, "loss": 1.5915, "step": 2560 }, { "epoch": 0.4613375365908579, "grad_norm": 1.4233940839767456, "learning_rate": 8.750705187250858e-05, "loss": 2.0584, "step": 2561 }, { "epoch": 0.46151767619905426, "grad_norm": 1.2926568984985352, "learning_rate": 8.74976856652757e-05, "loss": 1.5486, "step": 2562 }, { "epoch": 0.4616978158072506, "grad_norm": 1.3534621000289917, "learning_rate": 8.748831644999674e-05, "loss": 1.6501, "step": 2563 }, { "epoch": 0.46187795541544696, "grad_norm": 1.1807377338409424, "learning_rate": 8.74789442274233e-05, "loss": 1.526, "step": 2564 }, { "epoch": 0.4620580950236433, "grad_norm": 1.4298193454742432, "learning_rate": 8.746956899830721e-05, "loss": 1.4228, "step": 2565 }, { "epoch": 0.46223823463183966, "grad_norm": 1.1970449686050415, "learning_rate": 8.746019076340055e-05, "loss": 1.6464, "step": 2566 }, { "epoch": 0.462418374240036, "grad_norm": 1.2095990180969238, "learning_rate": 8.745080952345562e-05, "loss": 1.4759, "step": 2567 }, { "epoch": 0.46259851384823236, "grad_norm": 1.303605079650879, "learning_rate": 8.744142527922501e-05, "loss": 1.8078, "step": 2568 }, { "epoch": 0.4627786534564287, "grad_norm": 1.3396903276443481, "learning_rate": 8.743203803146148e-05, "loss": 1.5931, "step": 2569 }, { "epoch": 0.46295879306462506, "grad_norm": 1.4176502227783203, "learning_rate": 8.742264778091812e-05, "loss": 1.8256, "step": 2570 }, { "epoch": 0.4631389326728214, "grad_norm": 1.3381545543670654, "learning_rate": 8.741325452834814e-05, "loss": 1.7019, "step": 2571 }, { "epoch": 0.46331907228101776, "grad_norm": 1.2189345359802246, "learning_rate": 8.740385827450514e-05, "loss": 1.4363, "step": 2572 }, { "epoch": 0.4634992118892141, "grad_norm": 1.2670601606369019, "learning_rate": 8.739445902014284e-05, "loss": 1.6716, "step": 2573 }, { "epoch": 0.4636793514974105, "grad_norm": 1.3506355285644531, "learning_rate": 8.738505676601525e-05, "loss": 1.6444, "step": 2574 }, { "epoch": 0.46385949110560687, "grad_norm": 1.1819381713867188, "learning_rate": 8.737565151287662e-05, "loss": 1.346, "step": 2575 }, { "epoch": 0.4640396307138032, "grad_norm": 1.245508074760437, "learning_rate": 8.736624326148142e-05, "loss": 1.6375, "step": 2576 }, { "epoch": 0.46421977032199957, "grad_norm": 1.2968839406967163, "learning_rate": 8.73568320125844e-05, "loss": 1.6468, "step": 2577 }, { "epoch": 0.4643999099301959, "grad_norm": 1.289516806602478, "learning_rate": 8.734741776694048e-05, "loss": 1.7434, "step": 2578 }, { "epoch": 0.46458004953839227, "grad_norm": 1.2348601818084717, "learning_rate": 8.733800052530495e-05, "loss": 1.3735, "step": 2579 }, { "epoch": 0.4647601891465886, "grad_norm": 1.2062407732009888, "learning_rate": 8.732858028843316e-05, "loss": 1.6988, "step": 2580 }, { "epoch": 0.46494032875478497, "grad_norm": 1.302638053894043, "learning_rate": 8.731915705708086e-05, "loss": 1.7326, "step": 2581 }, { "epoch": 0.4651204683629813, "grad_norm": 1.3212897777557373, "learning_rate": 8.730973083200395e-05, "loss": 1.8311, "step": 2582 }, { "epoch": 0.46530060797117767, "grad_norm": 1.2713528871536255, "learning_rate": 8.730030161395862e-05, "loss": 1.6469, "step": 2583 }, { "epoch": 0.465480747579374, "grad_norm": 1.217058539390564, "learning_rate": 8.729086940370125e-05, "loss": 1.6528, "step": 2584 }, { "epoch": 0.46566088718757037, "grad_norm": 1.425415277481079, "learning_rate": 8.728143420198851e-05, "loss": 1.8322, "step": 2585 }, { "epoch": 0.4658410267957667, "grad_norm": 1.35520601272583, "learning_rate": 8.727199600957728e-05, "loss": 1.7416, "step": 2586 }, { "epoch": 0.46602116640396307, "grad_norm": 1.2844891548156738, "learning_rate": 8.726255482722467e-05, "loss": 1.6008, "step": 2587 }, { "epoch": 0.4662013060121594, "grad_norm": 1.2533422708511353, "learning_rate": 8.725311065568806e-05, "loss": 1.5579, "step": 2588 }, { "epoch": 0.4663814456203558, "grad_norm": 1.4437865018844604, "learning_rate": 8.724366349572508e-05, "loss": 1.6766, "step": 2589 }, { "epoch": 0.4665615852285521, "grad_norm": 1.2465699911117554, "learning_rate": 8.723421334809354e-05, "loss": 1.4116, "step": 2590 }, { "epoch": 0.4667417248367485, "grad_norm": 1.3845593929290771, "learning_rate": 8.722476021355155e-05, "loss": 1.9713, "step": 2591 }, { "epoch": 0.4669218644449448, "grad_norm": 1.3377214670181274, "learning_rate": 8.72153040928574e-05, "loss": 1.5961, "step": 2592 }, { "epoch": 0.4671020040531412, "grad_norm": 1.4602549076080322, "learning_rate": 8.720584498676971e-05, "loss": 2.0016, "step": 2593 }, { "epoch": 0.4672821436613375, "grad_norm": 1.2804954051971436, "learning_rate": 8.719638289604726e-05, "loss": 1.5078, "step": 2594 }, { "epoch": 0.4674622832695339, "grad_norm": 1.462478518486023, "learning_rate": 8.718691782144908e-05, "loss": 1.6792, "step": 2595 }, { "epoch": 0.4676424228777302, "grad_norm": 1.4092472791671753, "learning_rate": 8.717744976373447e-05, "loss": 1.6286, "step": 2596 }, { "epoch": 0.4678225624859266, "grad_norm": 1.367961049079895, "learning_rate": 8.716797872366293e-05, "loss": 1.3877, "step": 2597 }, { "epoch": 0.4680027020941229, "grad_norm": 1.4596394300460815, "learning_rate": 8.715850470199428e-05, "loss": 1.8017, "step": 2598 }, { "epoch": 0.4681828417023193, "grad_norm": 1.36800217628479, "learning_rate": 8.714902769948846e-05, "loss": 1.4744, "step": 2599 }, { "epoch": 0.4683629813105156, "grad_norm": 1.3347901105880737, "learning_rate": 8.713954771690572e-05, "loss": 1.4369, "step": 2600 }, { "epoch": 0.468543120918712, "grad_norm": 2.5180611610412598, "learning_rate": 8.713006475500657e-05, "loss": 2.0333, "step": 2601 }, { "epoch": 0.4687232605269084, "grad_norm": 1.3184151649475098, "learning_rate": 8.71205788145517e-05, "loss": 2.2133, "step": 2602 }, { "epoch": 0.46890340013510473, "grad_norm": 1.5204957723617554, "learning_rate": 8.711108989630207e-05, "loss": 2.0981, "step": 2603 }, { "epoch": 0.4690835397433011, "grad_norm": 1.3316802978515625, "learning_rate": 8.710159800101891e-05, "loss": 2.0305, "step": 2604 }, { "epoch": 0.46926367935149743, "grad_norm": 1.493296504020691, "learning_rate": 8.709210312946363e-05, "loss": 2.19, "step": 2605 }, { "epoch": 0.4694438189596938, "grad_norm": 1.3044030666351318, "learning_rate": 8.708260528239788e-05, "loss": 1.9925, "step": 2606 }, { "epoch": 0.46962395856789013, "grad_norm": 1.5854933261871338, "learning_rate": 8.707310446058361e-05, "loss": 2.0451, "step": 2607 }, { "epoch": 0.4698040981760865, "grad_norm": 1.5118924379348755, "learning_rate": 8.706360066478296e-05, "loss": 2.2421, "step": 2608 }, { "epoch": 0.46998423778428283, "grad_norm": 1.749561071395874, "learning_rate": 8.705409389575831e-05, "loss": 2.4843, "step": 2609 }, { "epoch": 0.4701643773924792, "grad_norm": 1.6904290914535522, "learning_rate": 8.70445841542723e-05, "loss": 2.1574, "step": 2610 }, { "epoch": 0.47034451700067553, "grad_norm": 1.1131564378738403, "learning_rate": 8.70350714410878e-05, "loss": 1.6179, "step": 2611 }, { "epoch": 0.4705246566088719, "grad_norm": 1.2533875703811646, "learning_rate": 8.702555575696789e-05, "loss": 1.786, "step": 2612 }, { "epoch": 0.47070479621706823, "grad_norm": 1.1966012716293335, "learning_rate": 8.701603710267593e-05, "loss": 1.7227, "step": 2613 }, { "epoch": 0.4708849358252646, "grad_norm": 1.196203589439392, "learning_rate": 8.700651547897553e-05, "loss": 1.8332, "step": 2614 }, { "epoch": 0.47106507543346093, "grad_norm": 1.352143406867981, "learning_rate": 8.699699088663047e-05, "loss": 1.8101, "step": 2615 }, { "epoch": 0.4712452150416573, "grad_norm": 1.2338167428970337, "learning_rate": 8.698746332640482e-05, "loss": 1.5321, "step": 2616 }, { "epoch": 0.47142535464985363, "grad_norm": 1.1382373571395874, "learning_rate": 8.697793279906288e-05, "loss": 1.5785, "step": 2617 }, { "epoch": 0.47160549425805, "grad_norm": 1.2017070055007935, "learning_rate": 8.696839930536919e-05, "loss": 1.4744, "step": 2618 }, { "epoch": 0.47178563386624633, "grad_norm": 1.2269024848937988, "learning_rate": 8.69588628460885e-05, "loss": 1.4611, "step": 2619 }, { "epoch": 0.4719657734744427, "grad_norm": 1.2656941413879395, "learning_rate": 8.694932342198585e-05, "loss": 1.5712, "step": 2620 }, { "epoch": 0.47214591308263903, "grad_norm": 1.6248953342437744, "learning_rate": 8.693978103382646e-05, "loss": 1.6579, "step": 2621 }, { "epoch": 0.4723260526908354, "grad_norm": 1.3158235549926758, "learning_rate": 8.693023568237582e-05, "loss": 1.7402, "step": 2622 }, { "epoch": 0.47250619229903174, "grad_norm": 1.175398826599121, "learning_rate": 8.692068736839968e-05, "loss": 1.4395, "step": 2623 }, { "epoch": 0.4726863319072281, "grad_norm": 1.2811686992645264, "learning_rate": 8.691113609266398e-05, "loss": 1.6473, "step": 2624 }, { "epoch": 0.47286647151542444, "grad_norm": 1.2921103239059448, "learning_rate": 8.690158185593491e-05, "loss": 1.8152, "step": 2625 }, { "epoch": 0.4730466111236208, "grad_norm": 1.3153115510940552, "learning_rate": 8.689202465897893e-05, "loss": 1.728, "step": 2626 }, { "epoch": 0.47322675073181714, "grad_norm": 1.215086579322815, "learning_rate": 8.688246450256267e-05, "loss": 1.4166, "step": 2627 }, { "epoch": 0.4734068903400135, "grad_norm": 1.4470634460449219, "learning_rate": 8.687290138745313e-05, "loss": 1.6833, "step": 2628 }, { "epoch": 0.47358702994820984, "grad_norm": 1.2941168546676636, "learning_rate": 8.686333531441734e-05, "loss": 1.6226, "step": 2629 }, { "epoch": 0.47376716955640624, "grad_norm": 1.4168493747711182, "learning_rate": 8.685376628422279e-05, "loss": 1.8653, "step": 2630 }, { "epoch": 0.4739473091646026, "grad_norm": 1.3069216012954712, "learning_rate": 8.684419429763703e-05, "loss": 1.6843, "step": 2631 }, { "epoch": 0.47412744877279894, "grad_norm": 1.3069379329681396, "learning_rate": 8.683461935542798e-05, "loss": 1.4671, "step": 2632 }, { "epoch": 0.4743075883809953, "grad_norm": 1.3075499534606934, "learning_rate": 8.682504145836369e-05, "loss": 1.7295, "step": 2633 }, { "epoch": 0.47448772798919164, "grad_norm": 1.2161720991134644, "learning_rate": 8.68154606072125e-05, "loss": 1.4187, "step": 2634 }, { "epoch": 0.474667867597388, "grad_norm": 1.4876856803894043, "learning_rate": 8.680587680274302e-05, "loss": 2.0348, "step": 2635 }, { "epoch": 0.47484800720558434, "grad_norm": 1.3415518999099731, "learning_rate": 8.679629004572401e-05, "loss": 1.6867, "step": 2636 }, { "epoch": 0.4750281468137807, "grad_norm": 1.288272500038147, "learning_rate": 8.678670033692454e-05, "loss": 1.4819, "step": 2637 }, { "epoch": 0.47520828642197704, "grad_norm": 1.5106487274169922, "learning_rate": 8.677710767711389e-05, "loss": 2.0366, "step": 2638 }, { "epoch": 0.4753884260301734, "grad_norm": 1.5960053205490112, "learning_rate": 8.676751206706157e-05, "loss": 1.6012, "step": 2639 }, { "epoch": 0.47556856563836974, "grad_norm": 1.4855772256851196, "learning_rate": 8.675791350753735e-05, "loss": 1.988, "step": 2640 }, { "epoch": 0.4757487052465661, "grad_norm": 1.5577479600906372, "learning_rate": 8.674831199931121e-05, "loss": 1.7417, "step": 2641 }, { "epoch": 0.47592884485476245, "grad_norm": 1.4745912551879883, "learning_rate": 8.673870754315336e-05, "loss": 1.6599, "step": 2642 }, { "epoch": 0.4761089844629588, "grad_norm": 1.434037685394287, "learning_rate": 8.672910013983431e-05, "loss": 1.7926, "step": 2643 }, { "epoch": 0.47628912407115515, "grad_norm": 1.387356162071228, "learning_rate": 8.671948979012474e-05, "loss": 1.703, "step": 2644 }, { "epoch": 0.4764692636793515, "grad_norm": 1.4359241724014282, "learning_rate": 8.670987649479557e-05, "loss": 1.6394, "step": 2645 }, { "epoch": 0.47664940328754785, "grad_norm": 1.3842597007751465, "learning_rate": 8.6700260254618e-05, "loss": 1.3689, "step": 2646 }, { "epoch": 0.4768295428957442, "grad_norm": 1.463671088218689, "learning_rate": 8.66906410703634e-05, "loss": 1.5767, "step": 2647 }, { "epoch": 0.47700968250394055, "grad_norm": 1.48291015625, "learning_rate": 8.668101894280347e-05, "loss": 1.6378, "step": 2648 }, { "epoch": 0.4771898221121369, "grad_norm": 1.493817925453186, "learning_rate": 8.667139387271007e-05, "loss": 1.5672, "step": 2649 }, { "epoch": 0.47736996172033325, "grad_norm": 1.350455641746521, "learning_rate": 8.66617658608553e-05, "loss": 1.3829, "step": 2650 }, { "epoch": 0.4775501013285296, "grad_norm": 1.316884994506836, "learning_rate": 8.665213490801154e-05, "loss": 2.2399, "step": 2651 }, { "epoch": 0.47773024093672595, "grad_norm": 1.3907532691955566, "learning_rate": 8.664250101495137e-05, "loss": 2.1095, "step": 2652 }, { "epoch": 0.4779103805449223, "grad_norm": 2.0396082401275635, "learning_rate": 8.663286418244763e-05, "loss": 2.266, "step": 2653 }, { "epoch": 0.47809052015311865, "grad_norm": 1.4065428972244263, "learning_rate": 8.662322441127334e-05, "loss": 2.2639, "step": 2654 }, { "epoch": 0.478270659761315, "grad_norm": 1.420646071434021, "learning_rate": 8.661358170220183e-05, "loss": 2.092, "step": 2655 }, { "epoch": 0.47845079936951135, "grad_norm": 1.272267460823059, "learning_rate": 8.660393605600666e-05, "loss": 1.9187, "step": 2656 }, { "epoch": 0.4786309389777077, "grad_norm": 1.359609842300415, "learning_rate": 8.659428747346154e-05, "loss": 1.9691, "step": 2657 }, { "epoch": 0.47881107858590405, "grad_norm": 1.4266998767852783, "learning_rate": 8.658463595534051e-05, "loss": 1.9072, "step": 2658 }, { "epoch": 0.47899121819410045, "grad_norm": 1.6890134811401367, "learning_rate": 8.657498150241781e-05, "loss": 2.1801, "step": 2659 }, { "epoch": 0.4791713578022968, "grad_norm": 1.6149895191192627, "learning_rate": 8.65653241154679e-05, "loss": 2.0892, "step": 2660 }, { "epoch": 0.47935149741049315, "grad_norm": 1.251991629600525, "learning_rate": 8.655566379526551e-05, "loss": 1.8521, "step": 2661 }, { "epoch": 0.4795316370186895, "grad_norm": 1.3477108478546143, "learning_rate": 8.654600054258557e-05, "loss": 1.8777, "step": 2662 }, { "epoch": 0.47971177662688586, "grad_norm": 1.3829909563064575, "learning_rate": 8.653633435820326e-05, "loss": 1.7965, "step": 2663 }, { "epoch": 0.4798919162350822, "grad_norm": 1.1786986589431763, "learning_rate": 8.6526665242894e-05, "loss": 1.5584, "step": 2664 }, { "epoch": 0.48007205584327856, "grad_norm": 1.1727650165557861, "learning_rate": 8.651699319743347e-05, "loss": 1.4325, "step": 2665 }, { "epoch": 0.4802521954514749, "grad_norm": 1.2339653968811035, "learning_rate": 8.650731822259753e-05, "loss": 1.6374, "step": 2666 }, { "epoch": 0.48043233505967126, "grad_norm": 1.2392654418945312, "learning_rate": 8.649764031916227e-05, "loss": 1.6147, "step": 2667 }, { "epoch": 0.4806124746678676, "grad_norm": 1.35507333278656, "learning_rate": 8.648795948790411e-05, "loss": 1.6223, "step": 2668 }, { "epoch": 0.48079261427606396, "grad_norm": 1.3900197744369507, "learning_rate": 8.647827572959962e-05, "loss": 1.8806, "step": 2669 }, { "epoch": 0.4809727538842603, "grad_norm": 1.4021410942077637, "learning_rate": 8.646858904502559e-05, "loss": 1.9184, "step": 2670 }, { "epoch": 0.48115289349245666, "grad_norm": 1.2401812076568604, "learning_rate": 8.645889943495913e-05, "loss": 1.6602, "step": 2671 }, { "epoch": 0.481333033100653, "grad_norm": 1.418326497077942, "learning_rate": 8.644920690017752e-05, "loss": 1.5734, "step": 2672 }, { "epoch": 0.48151317270884936, "grad_norm": 1.2083072662353516, "learning_rate": 8.643951144145828e-05, "loss": 1.5713, "step": 2673 }, { "epoch": 0.4816933123170457, "grad_norm": 1.2850606441497803, "learning_rate": 8.642981305957919e-05, "loss": 1.7279, "step": 2674 }, { "epoch": 0.48187345192524206, "grad_norm": 1.326866626739502, "learning_rate": 8.642011175531822e-05, "loss": 1.7855, "step": 2675 }, { "epoch": 0.4820535915334384, "grad_norm": 1.4067503213882446, "learning_rate": 8.641040752945363e-05, "loss": 1.7365, "step": 2676 }, { "epoch": 0.48223373114163476, "grad_norm": 1.415247917175293, "learning_rate": 8.640070038276388e-05, "loss": 1.5787, "step": 2677 }, { "epoch": 0.4824138707498311, "grad_norm": 1.2646092176437378, "learning_rate": 8.639099031602769e-05, "loss": 1.4971, "step": 2678 }, { "epoch": 0.48259401035802746, "grad_norm": 1.294352412223816, "learning_rate": 8.638127733002397e-05, "loss": 1.5892, "step": 2679 }, { "epoch": 0.4827741499662238, "grad_norm": 1.2876085042953491, "learning_rate": 8.637156142553191e-05, "loss": 1.5463, "step": 2680 }, { "epoch": 0.48295428957442016, "grad_norm": 1.224370002746582, "learning_rate": 8.636184260333091e-05, "loss": 1.651, "step": 2681 }, { "epoch": 0.4831344291826165, "grad_norm": 1.3199914693832397, "learning_rate": 8.635212086420061e-05, "loss": 1.6555, "step": 2682 }, { "epoch": 0.48331456879081286, "grad_norm": 1.2267916202545166, "learning_rate": 8.634239620892088e-05, "loss": 1.5521, "step": 2683 }, { "epoch": 0.4834947083990092, "grad_norm": 1.3754695653915405, "learning_rate": 8.633266863827181e-05, "loss": 1.6656, "step": 2684 }, { "epoch": 0.48367484800720556, "grad_norm": 1.2926530838012695, "learning_rate": 8.632293815303378e-05, "loss": 1.566, "step": 2685 }, { "epoch": 0.4838549876154019, "grad_norm": 1.33713960647583, "learning_rate": 8.631320475398734e-05, "loss": 1.727, "step": 2686 }, { "epoch": 0.4840351272235983, "grad_norm": 1.3771313428878784, "learning_rate": 8.630346844191329e-05, "loss": 1.509, "step": 2687 }, { "epoch": 0.48421526683179467, "grad_norm": 1.4455842971801758, "learning_rate": 8.629372921759269e-05, "loss": 1.7759, "step": 2688 }, { "epoch": 0.484395406439991, "grad_norm": 1.2864117622375488, "learning_rate": 8.628398708180681e-05, "loss": 1.6044, "step": 2689 }, { "epoch": 0.48457554604818737, "grad_norm": 1.3856821060180664, "learning_rate": 8.627424203533716e-05, "loss": 1.9426, "step": 2690 }, { "epoch": 0.4847556856563837, "grad_norm": 1.5106086730957031, "learning_rate": 8.626449407896548e-05, "loss": 1.7021, "step": 2691 }, { "epoch": 0.48493582526458007, "grad_norm": 1.3311948776245117, "learning_rate": 8.625474321347376e-05, "loss": 1.8208, "step": 2692 }, { "epoch": 0.4851159648727764, "grad_norm": 1.3760781288146973, "learning_rate": 8.624498943964419e-05, "loss": 1.6965, "step": 2693 }, { "epoch": 0.48529610448097277, "grad_norm": 1.385847806930542, "learning_rate": 8.623523275825922e-05, "loss": 1.5341, "step": 2694 }, { "epoch": 0.4854762440891691, "grad_norm": 1.301340937614441, "learning_rate": 8.622547317010154e-05, "loss": 1.5803, "step": 2695 }, { "epoch": 0.48565638369736547, "grad_norm": 1.405910611152649, "learning_rate": 8.621571067595404e-05, "loss": 1.7003, "step": 2696 }, { "epoch": 0.4858365233055618, "grad_norm": 1.2262637615203857, "learning_rate": 8.620594527659987e-05, "loss": 1.3236, "step": 2697 }, { "epoch": 0.48601666291375817, "grad_norm": 1.442098617553711, "learning_rate": 8.61961769728224e-05, "loss": 1.5305, "step": 2698 }, { "epoch": 0.4861968025219545, "grad_norm": 1.2316488027572632, "learning_rate": 8.618640576540525e-05, "loss": 1.3083, "step": 2699 }, { "epoch": 0.48637694213015087, "grad_norm": 1.3544647693634033, "learning_rate": 8.617663165513223e-05, "loss": 1.6582, "step": 2700 }, { "epoch": 0.4865570817383472, "grad_norm": 1.1886539459228516, "learning_rate": 8.616685464278748e-05, "loss": 2.0696, "step": 2701 }, { "epoch": 0.48673722134654357, "grad_norm": 1.2951563596725464, "learning_rate": 8.615707472915525e-05, "loss": 2.2145, "step": 2702 }, { "epoch": 0.4869173609547399, "grad_norm": 1.3158202171325684, "learning_rate": 8.614729191502008e-05, "loss": 2.1534, "step": 2703 }, { "epoch": 0.48709750056293627, "grad_norm": 1.250584602355957, "learning_rate": 8.613750620116679e-05, "loss": 1.9095, "step": 2704 }, { "epoch": 0.4872776401711326, "grad_norm": 1.3280251026153564, "learning_rate": 8.612771758838033e-05, "loss": 2.0795, "step": 2705 }, { "epoch": 0.48745777977932897, "grad_norm": 1.3332096338272095, "learning_rate": 8.611792607744597e-05, "loss": 1.896, "step": 2706 }, { "epoch": 0.4876379193875253, "grad_norm": 1.3256202936172485, "learning_rate": 8.610813166914917e-05, "loss": 1.8772, "step": 2707 }, { "epoch": 0.48781805899572167, "grad_norm": 1.649614930152893, "learning_rate": 8.609833436427563e-05, "loss": 2.2148, "step": 2708 }, { "epoch": 0.487998198603918, "grad_norm": 1.8630517721176147, "learning_rate": 8.608853416361131e-05, "loss": 2.4517, "step": 2709 }, { "epoch": 0.48817833821211437, "grad_norm": 1.3139984607696533, "learning_rate": 8.607873106794235e-05, "loss": 1.7884, "step": 2710 }, { "epoch": 0.4883584778203107, "grad_norm": 1.335869550704956, "learning_rate": 8.606892507805515e-05, "loss": 1.7496, "step": 2711 }, { "epoch": 0.48853861742850707, "grad_norm": 1.2606089115142822, "learning_rate": 8.605911619473635e-05, "loss": 1.597, "step": 2712 }, { "epoch": 0.4887187570367034, "grad_norm": 1.2731069326400757, "learning_rate": 8.604930441877281e-05, "loss": 1.4671, "step": 2713 }, { "epoch": 0.48889889664489977, "grad_norm": 1.2373595237731934, "learning_rate": 8.603948975095165e-05, "loss": 1.5107, "step": 2714 }, { "epoch": 0.4890790362530961, "grad_norm": 1.2658288478851318, "learning_rate": 8.602967219206015e-05, "loss": 1.7882, "step": 2715 }, { "epoch": 0.48925917586129253, "grad_norm": 1.2807252407073975, "learning_rate": 8.60198517428859e-05, "loss": 1.5173, "step": 2716 }, { "epoch": 0.4894393154694889, "grad_norm": 1.3331763744354248, "learning_rate": 8.601002840421671e-05, "loss": 1.6888, "step": 2717 }, { "epoch": 0.48961945507768523, "grad_norm": 1.3163950443267822, "learning_rate": 8.600020217684055e-05, "loss": 1.7147, "step": 2718 }, { "epoch": 0.4897995946858816, "grad_norm": 1.1635547876358032, "learning_rate": 8.599037306154572e-05, "loss": 1.5312, "step": 2719 }, { "epoch": 0.48997973429407793, "grad_norm": 1.3463550806045532, "learning_rate": 8.59805410591207e-05, "loss": 1.7128, "step": 2720 }, { "epoch": 0.4901598739022743, "grad_norm": 1.1776913404464722, "learning_rate": 8.597070617035419e-05, "loss": 1.4958, "step": 2721 }, { "epoch": 0.49034001351047063, "grad_norm": 1.2490227222442627, "learning_rate": 8.596086839603517e-05, "loss": 1.5679, "step": 2722 }, { "epoch": 0.490520153118667, "grad_norm": 1.2558070421218872, "learning_rate": 8.595102773695278e-05, "loss": 1.5767, "step": 2723 }, { "epoch": 0.49070029272686333, "grad_norm": 1.3319237232208252, "learning_rate": 8.594118419389647e-05, "loss": 1.9236, "step": 2724 }, { "epoch": 0.4908804323350597, "grad_norm": 1.2993637323379517, "learning_rate": 8.593133776765587e-05, "loss": 1.8092, "step": 2725 }, { "epoch": 0.49106057194325603, "grad_norm": 1.3291493654251099, "learning_rate": 8.592148845902086e-05, "loss": 1.677, "step": 2726 }, { "epoch": 0.4912407115514524, "grad_norm": 1.222584843635559, "learning_rate": 8.591163626878153e-05, "loss": 1.6296, "step": 2727 }, { "epoch": 0.49142085115964873, "grad_norm": 1.2929731607437134, "learning_rate": 8.590178119772824e-05, "loss": 1.5407, "step": 2728 }, { "epoch": 0.4916009907678451, "grad_norm": 1.325379729270935, "learning_rate": 8.589192324665154e-05, "loss": 1.7361, "step": 2729 }, { "epoch": 0.49178113037604143, "grad_norm": 1.2696601152420044, "learning_rate": 8.588206241634225e-05, "loss": 1.5384, "step": 2730 }, { "epoch": 0.4919612699842378, "grad_norm": 1.283871054649353, "learning_rate": 8.587219870759138e-05, "loss": 1.8387, "step": 2731 }, { "epoch": 0.49214140959243413, "grad_norm": 1.4020068645477295, "learning_rate": 8.586233212119021e-05, "loss": 1.8382, "step": 2732 }, { "epoch": 0.4923215492006305, "grad_norm": 1.1911858320236206, "learning_rate": 8.585246265793023e-05, "loss": 1.4332, "step": 2733 }, { "epoch": 0.49250168880882683, "grad_norm": 1.401396632194519, "learning_rate": 8.584259031860315e-05, "loss": 1.7361, "step": 2734 }, { "epoch": 0.4926818284170232, "grad_norm": 1.230306625366211, "learning_rate": 8.583271510400093e-05, "loss": 1.5654, "step": 2735 }, { "epoch": 0.49286196802521953, "grad_norm": 1.318452000617981, "learning_rate": 8.582283701491576e-05, "loss": 1.8634, "step": 2736 }, { "epoch": 0.4930421076334159, "grad_norm": 1.3202909231185913, "learning_rate": 8.581295605214005e-05, "loss": 1.5214, "step": 2737 }, { "epoch": 0.49322224724161223, "grad_norm": 1.329823613166809, "learning_rate": 8.580307221646647e-05, "loss": 1.5694, "step": 2738 }, { "epoch": 0.4934023868498086, "grad_norm": 1.308545708656311, "learning_rate": 8.579318550868786e-05, "loss": 1.5803, "step": 2739 }, { "epoch": 0.49358252645800493, "grad_norm": 1.420293927192688, "learning_rate": 8.578329592959734e-05, "loss": 1.9692, "step": 2740 }, { "epoch": 0.4937626660662013, "grad_norm": 1.4217355251312256, "learning_rate": 8.577340347998825e-05, "loss": 1.4416, "step": 2741 }, { "epoch": 0.49394280567439763, "grad_norm": 1.490926742553711, "learning_rate": 8.576350816065417e-05, "loss": 1.7305, "step": 2742 }, { "epoch": 0.494122945282594, "grad_norm": 1.429819941520691, "learning_rate": 8.575360997238887e-05, "loss": 1.5678, "step": 2743 }, { "epoch": 0.4943030848907904, "grad_norm": 1.4071948528289795, "learning_rate": 8.57437089159864e-05, "loss": 1.5747, "step": 2744 }, { "epoch": 0.49448322449898674, "grad_norm": 1.3541555404663086, "learning_rate": 8.573380499224101e-05, "loss": 1.5497, "step": 2745 }, { "epoch": 0.4946633641071831, "grad_norm": 1.4640523195266724, "learning_rate": 8.572389820194719e-05, "loss": 1.8185, "step": 2746 }, { "epoch": 0.49484350371537944, "grad_norm": 1.3823490142822266, "learning_rate": 8.571398854589966e-05, "loss": 1.6214, "step": 2747 }, { "epoch": 0.4950236433235758, "grad_norm": 1.3801867961883545, "learning_rate": 8.570407602489335e-05, "loss": 1.4755, "step": 2748 }, { "epoch": 0.49520378293177214, "grad_norm": 1.57958984375, "learning_rate": 8.569416063972347e-05, "loss": 1.7264, "step": 2749 }, { "epoch": 0.4953839225399685, "grad_norm": 1.2653738260269165, "learning_rate": 8.56842423911854e-05, "loss": 1.3777, "step": 2750 }, { "epoch": 0.49556406214816484, "grad_norm": 1.2235174179077148, "learning_rate": 8.56743212800748e-05, "loss": 2.1362, "step": 2751 }, { "epoch": 0.4957442017563612, "grad_norm": 1.1713038682937622, "learning_rate": 8.566439730718749e-05, "loss": 1.9403, "step": 2752 }, { "epoch": 0.49592434136455754, "grad_norm": 1.2549755573272705, "learning_rate": 8.565447047331962e-05, "loss": 2.2198, "step": 2753 }, { "epoch": 0.4961044809727539, "grad_norm": 1.2807815074920654, "learning_rate": 8.564454077926751e-05, "loss": 2.0376, "step": 2754 }, { "epoch": 0.49628462058095024, "grad_norm": 1.4784092903137207, "learning_rate": 8.56346082258277e-05, "loss": 2.4307, "step": 2755 }, { "epoch": 0.4964647601891466, "grad_norm": 1.3252829313278198, "learning_rate": 8.562467281379695e-05, "loss": 2.0048, "step": 2756 }, { "epoch": 0.49664489979734294, "grad_norm": 1.576277494430542, "learning_rate": 8.56147345439723e-05, "loss": 2.0626, "step": 2757 }, { "epoch": 0.4968250394055393, "grad_norm": 1.568812608718872, "learning_rate": 8.560479341715102e-05, "loss": 1.934, "step": 2758 }, { "epoch": 0.49700517901373564, "grad_norm": 1.515198826789856, "learning_rate": 8.559484943413053e-05, "loss": 1.9284, "step": 2759 }, { "epoch": 0.497185318621932, "grad_norm": 1.9978443384170532, "learning_rate": 8.558490259570857e-05, "loss": 2.2414, "step": 2760 }, { "epoch": 0.49736545823012834, "grad_norm": 1.7489478588104248, "learning_rate": 8.557495290268305e-05, "loss": 2.1514, "step": 2761 }, { "epoch": 0.4975455978383247, "grad_norm": 1.2738151550292969, "learning_rate": 8.556500035585213e-05, "loss": 1.8158, "step": 2762 }, { "epoch": 0.49772573744652104, "grad_norm": 1.302860140800476, "learning_rate": 8.55550449560142e-05, "loss": 1.7584, "step": 2763 }, { "epoch": 0.4979058770547174, "grad_norm": 1.2008062601089478, "learning_rate": 8.554508670396791e-05, "loss": 1.6243, "step": 2764 }, { "epoch": 0.49808601666291374, "grad_norm": 1.253883957862854, "learning_rate": 8.553512560051206e-05, "loss": 1.4351, "step": 2765 }, { "epoch": 0.4982661562711101, "grad_norm": 1.179694652557373, "learning_rate": 8.552516164644575e-05, "loss": 1.4304, "step": 2766 }, { "epoch": 0.49844629587930644, "grad_norm": 1.204978585243225, "learning_rate": 8.551519484256826e-05, "loss": 1.4948, "step": 2767 }, { "epoch": 0.4986264354875028, "grad_norm": 1.367863655090332, "learning_rate": 8.550522518967915e-05, "loss": 1.715, "step": 2768 }, { "epoch": 0.49880657509569915, "grad_norm": 1.2354141473770142, "learning_rate": 8.549525268857816e-05, "loss": 1.4642, "step": 2769 }, { "epoch": 0.4989867147038955, "grad_norm": 1.155656099319458, "learning_rate": 8.54852773400653e-05, "loss": 1.3461, "step": 2770 }, { "epoch": 0.49916685431209185, "grad_norm": 1.3603318929672241, "learning_rate": 8.547529914494078e-05, "loss": 1.9171, "step": 2771 }, { "epoch": 0.4993469939202882, "grad_norm": 1.308967113494873, "learning_rate": 8.546531810400503e-05, "loss": 1.6232, "step": 2772 }, { "epoch": 0.4995271335284846, "grad_norm": 1.3593995571136475, "learning_rate": 8.545533421805875e-05, "loss": 1.8749, "step": 2773 }, { "epoch": 0.49970727313668095, "grad_norm": 1.3304009437561035, "learning_rate": 8.544534748790281e-05, "loss": 1.8702, "step": 2774 }, { "epoch": 0.4998874127448773, "grad_norm": 1.2001197338104248, "learning_rate": 8.543535791433838e-05, "loss": 1.4296, "step": 2775 }, { "epoch": 0.5000675523530737, "grad_norm": 1.204925537109375, "learning_rate": 8.542536549816678e-05, "loss": 1.5712, "step": 2776 }, { "epoch": 0.50024769196127, "grad_norm": 1.2322618961334229, "learning_rate": 8.541537024018962e-05, "loss": 1.4776, "step": 2777 }, { "epoch": 0.5004278315694664, "grad_norm": 1.3218423128128052, "learning_rate": 8.540537214120872e-05, "loss": 1.6754, "step": 2778 }, { "epoch": 0.5006079711776626, "grad_norm": 1.4041993618011475, "learning_rate": 8.53953712020261e-05, "loss": 1.8543, "step": 2779 }, { "epoch": 0.500788110785859, "grad_norm": 1.2976808547973633, "learning_rate": 8.538536742344406e-05, "loss": 1.6144, "step": 2780 }, { "epoch": 0.5009682503940553, "grad_norm": 1.2562553882598877, "learning_rate": 8.537536080626505e-05, "loss": 1.5163, "step": 2781 }, { "epoch": 0.5011483900022518, "grad_norm": 1.319531798362732, "learning_rate": 8.536535135129184e-05, "loss": 1.6357, "step": 2782 }, { "epoch": 0.501328529610448, "grad_norm": 1.371364712715149, "learning_rate": 8.535533905932738e-05, "loss": 1.5351, "step": 2783 }, { "epoch": 0.5015086692186445, "grad_norm": 1.1721785068511963, "learning_rate": 8.534532393117484e-05, "loss": 1.555, "step": 2784 }, { "epoch": 0.5016888088268407, "grad_norm": 1.2747962474822998, "learning_rate": 8.533530596763763e-05, "loss": 1.5807, "step": 2785 }, { "epoch": 0.5018689484350372, "grad_norm": 1.2756599187850952, "learning_rate": 8.532528516951939e-05, "loss": 1.547, "step": 2786 }, { "epoch": 0.5020490880432336, "grad_norm": 1.349286675453186, "learning_rate": 8.531526153762396e-05, "loss": 1.4839, "step": 2787 }, { "epoch": 0.5022292276514299, "grad_norm": 1.324385643005371, "learning_rate": 8.530523507275546e-05, "loss": 1.4951, "step": 2788 }, { "epoch": 0.5024093672596263, "grad_norm": 1.485912799835205, "learning_rate": 8.52952057757182e-05, "loss": 1.6979, "step": 2789 }, { "epoch": 0.5025895068678226, "grad_norm": 1.3031331300735474, "learning_rate": 8.528517364731673e-05, "loss": 1.4363, "step": 2790 }, { "epoch": 0.502769646476019, "grad_norm": 1.3996716737747192, "learning_rate": 8.527513868835582e-05, "loss": 1.7178, "step": 2791 }, { "epoch": 0.5029497860842153, "grad_norm": 1.4848968982696533, "learning_rate": 8.526510089964045e-05, "loss": 1.662, "step": 2792 }, { "epoch": 0.5031299256924117, "grad_norm": 1.3520406484603882, "learning_rate": 8.525506028197589e-05, "loss": 1.7115, "step": 2793 }, { "epoch": 0.503310065300608, "grad_norm": 1.38235604763031, "learning_rate": 8.524501683616755e-05, "loss": 1.6243, "step": 2794 }, { "epoch": 0.5034902049088044, "grad_norm": 1.3430402278900146, "learning_rate": 8.523497056302112e-05, "loss": 1.4513, "step": 2795 }, { "epoch": 0.5036703445170007, "grad_norm": 1.3040351867675781, "learning_rate": 8.522492146334251e-05, "loss": 1.5241, "step": 2796 }, { "epoch": 0.5038504841251971, "grad_norm": 1.4564930200576782, "learning_rate": 8.521486953793787e-05, "loss": 1.8308, "step": 2797 }, { "epoch": 0.5040306237333934, "grad_norm": 1.4538522958755493, "learning_rate": 8.520481478761355e-05, "loss": 1.8094, "step": 2798 }, { "epoch": 0.5042107633415898, "grad_norm": 1.3144654035568237, "learning_rate": 8.519475721317614e-05, "loss": 1.4744, "step": 2799 }, { "epoch": 0.5043909029497861, "grad_norm": 1.4392014741897583, "learning_rate": 8.518469681543244e-05, "loss": 1.5725, "step": 2800 }, { "epoch": 0.5045710425579825, "grad_norm": 1.2421153783798218, "learning_rate": 8.517463359518949e-05, "loss": 2.1194, "step": 2801 }, { "epoch": 0.5047511821661788, "grad_norm": 1.2600113153457642, "learning_rate": 8.516456755325457e-05, "loss": 1.9421, "step": 2802 }, { "epoch": 0.5049313217743752, "grad_norm": 1.2511544227600098, "learning_rate": 8.515449869043518e-05, "loss": 2.0808, "step": 2803 }, { "epoch": 0.5051114613825715, "grad_norm": 1.3157927989959717, "learning_rate": 8.514442700753902e-05, "loss": 1.9794, "step": 2804 }, { "epoch": 0.5052916009907679, "grad_norm": 1.231649398803711, "learning_rate": 8.513435250537405e-05, "loss": 1.9295, "step": 2805 }, { "epoch": 0.5054717405989642, "grad_norm": 1.3225748538970947, "learning_rate": 8.512427518474844e-05, "loss": 1.8283, "step": 2806 }, { "epoch": 0.5056518802071606, "grad_norm": 1.4478939771652222, "learning_rate": 8.511419504647056e-05, "loss": 2.4938, "step": 2807 }, { "epoch": 0.5058320198153569, "grad_norm": 1.4256596565246582, "learning_rate": 8.510411209134908e-05, "loss": 1.9922, "step": 2808 }, { "epoch": 0.5060121594235533, "grad_norm": 1.8600107431411743, "learning_rate": 8.509402632019282e-05, "loss": 2.0058, "step": 2809 }, { "epoch": 0.5061922990317496, "grad_norm": 2.1335132122039795, "learning_rate": 8.508393773381085e-05, "loss": 2.6432, "step": 2810 }, { "epoch": 0.506372438639946, "grad_norm": 1.2174696922302246, "learning_rate": 8.507384633301249e-05, "loss": 1.4564, "step": 2811 }, { "epoch": 0.5065525782481423, "grad_norm": 1.5074141025543213, "learning_rate": 8.506375211860728e-05, "loss": 1.6933, "step": 2812 }, { "epoch": 0.5067327178563387, "grad_norm": 1.2732199430465698, "learning_rate": 8.505365509140494e-05, "loss": 1.6016, "step": 2813 }, { "epoch": 0.506912857464535, "grad_norm": 1.3593729734420776, "learning_rate": 8.504355525221545e-05, "loss": 1.6226, "step": 2814 }, { "epoch": 0.5070929970727314, "grad_norm": 1.4237853288650513, "learning_rate": 8.503345260184905e-05, "loss": 1.5724, "step": 2815 }, { "epoch": 0.5072731366809278, "grad_norm": 1.2254680395126343, "learning_rate": 8.502334714111612e-05, "loss": 1.3983, "step": 2816 }, { "epoch": 0.5074532762891241, "grad_norm": 1.4216575622558594, "learning_rate": 8.501323887082737e-05, "loss": 1.5055, "step": 2817 }, { "epoch": 0.5076334158973205, "grad_norm": 1.3354389667510986, "learning_rate": 8.500312779179365e-05, "loss": 1.7029, "step": 2818 }, { "epoch": 0.5078135555055168, "grad_norm": 1.3727309703826904, "learning_rate": 8.499301390482605e-05, "loss": 1.675, "step": 2819 }, { "epoch": 0.5079936951137132, "grad_norm": 1.2982864379882812, "learning_rate": 8.498289721073593e-05, "loss": 1.6344, "step": 2820 }, { "epoch": 0.5081738347219095, "grad_norm": 1.417824625968933, "learning_rate": 8.497277771033482e-05, "loss": 1.6598, "step": 2821 }, { "epoch": 0.5083539743301059, "grad_norm": 1.2452620267868042, "learning_rate": 8.496265540443453e-05, "loss": 1.4911, "step": 2822 }, { "epoch": 0.5085341139383022, "grad_norm": 1.3113662004470825, "learning_rate": 8.495253029384705e-05, "loss": 1.8271, "step": 2823 }, { "epoch": 0.5087142535464986, "grad_norm": 1.2392932176589966, "learning_rate": 8.494240237938461e-05, "loss": 1.561, "step": 2824 }, { "epoch": 0.5088943931546949, "grad_norm": 1.3330018520355225, "learning_rate": 8.493227166185968e-05, "loss": 1.5807, "step": 2825 }, { "epoch": 0.5090745327628913, "grad_norm": 1.315157175064087, "learning_rate": 8.492213814208494e-05, "loss": 1.5571, "step": 2826 }, { "epoch": 0.5092546723710876, "grad_norm": 1.2412223815917969, "learning_rate": 8.491200182087326e-05, "loss": 1.5673, "step": 2827 }, { "epoch": 0.509434811979284, "grad_norm": 1.2737836837768555, "learning_rate": 8.490186269903782e-05, "loss": 1.6543, "step": 2828 }, { "epoch": 0.5096149515874803, "grad_norm": 1.2240816354751587, "learning_rate": 8.489172077739195e-05, "loss": 1.6392, "step": 2829 }, { "epoch": 0.5097950911956767, "grad_norm": 1.350205659866333, "learning_rate": 8.488157605674925e-05, "loss": 1.7706, "step": 2830 }, { "epoch": 0.509975230803873, "grad_norm": 1.1772468090057373, "learning_rate": 8.487142853792348e-05, "loss": 1.4376, "step": 2831 }, { "epoch": 0.5101553704120694, "grad_norm": 1.2457062005996704, "learning_rate": 8.486127822172871e-05, "loss": 1.5642, "step": 2832 }, { "epoch": 0.5103355100202657, "grad_norm": 1.4030654430389404, "learning_rate": 8.485112510897918e-05, "loss": 1.7351, "step": 2833 }, { "epoch": 0.5105156496284621, "grad_norm": 1.4837732315063477, "learning_rate": 8.484096920048939e-05, "loss": 1.7033, "step": 2834 }, { "epoch": 0.5106957892366584, "grad_norm": 1.4558892250061035, "learning_rate": 8.4830810497074e-05, "loss": 1.9693, "step": 2835 }, { "epoch": 0.5108759288448548, "grad_norm": 1.3075482845306396, "learning_rate": 8.482064899954796e-05, "loss": 1.7349, "step": 2836 }, { "epoch": 0.5110560684530511, "grad_norm": 1.214443325996399, "learning_rate": 8.481048470872641e-05, "loss": 1.5078, "step": 2837 }, { "epoch": 0.5112362080612475, "grad_norm": 1.3043891191482544, "learning_rate": 8.480031762542474e-05, "loss": 1.7372, "step": 2838 }, { "epoch": 0.5114163476694438, "grad_norm": 1.3345760107040405, "learning_rate": 8.479014775045855e-05, "loss": 1.8883, "step": 2839 }, { "epoch": 0.5115964872776402, "grad_norm": 1.4989759922027588, "learning_rate": 8.477997508464364e-05, "loss": 2.0237, "step": 2840 }, { "epoch": 0.5117766268858365, "grad_norm": 1.4189120531082153, "learning_rate": 8.476979962879608e-05, "loss": 1.9285, "step": 2841 }, { "epoch": 0.5119567664940329, "grad_norm": 1.379510521888733, "learning_rate": 8.475962138373213e-05, "loss": 1.5603, "step": 2842 }, { "epoch": 0.5121369061022292, "grad_norm": 1.4271522760391235, "learning_rate": 8.474944035026826e-05, "loss": 1.8155, "step": 2843 }, { "epoch": 0.5123170457104256, "grad_norm": 1.439161777496338, "learning_rate": 8.473925652922124e-05, "loss": 1.6182, "step": 2844 }, { "epoch": 0.512497185318622, "grad_norm": 1.286539077758789, "learning_rate": 8.472906992140797e-05, "loss": 1.6494, "step": 2845 }, { "epoch": 0.5126773249268183, "grad_norm": 1.517162799835205, "learning_rate": 8.471888052764563e-05, "loss": 1.478, "step": 2846 }, { "epoch": 0.5128574645350147, "grad_norm": 1.3631229400634766, "learning_rate": 8.470868834875159e-05, "loss": 1.3688, "step": 2847 }, { "epoch": 0.513037604143211, "grad_norm": 1.3169572353363037, "learning_rate": 8.46984933855435e-05, "loss": 1.4934, "step": 2848 }, { "epoch": 0.5132177437514074, "grad_norm": 1.3168138265609741, "learning_rate": 8.468829563883916e-05, "loss": 1.5146, "step": 2849 }, { "epoch": 0.5133978833596037, "grad_norm": 1.397937297821045, "learning_rate": 8.467809510945664e-05, "loss": 1.5788, "step": 2850 }, { "epoch": 0.5135780229678001, "grad_norm": 1.2191779613494873, "learning_rate": 8.466789179821422e-05, "loss": 2.0042, "step": 2851 }, { "epoch": 0.5137581625759964, "grad_norm": 1.2275811433792114, "learning_rate": 8.46576857059304e-05, "loss": 1.9563, "step": 2852 }, { "epoch": 0.5139383021841928, "grad_norm": 1.231316328048706, "learning_rate": 8.464747683342392e-05, "loss": 2.3349, "step": 2853 }, { "epoch": 0.5141184417923891, "grad_norm": 1.2736597061157227, "learning_rate": 8.463726518151372e-05, "loss": 2.2835, "step": 2854 }, { "epoch": 0.5142985814005855, "grad_norm": 1.3382141590118408, "learning_rate": 8.4627050751019e-05, "loss": 1.8476, "step": 2855 }, { "epoch": 0.5144787210087818, "grad_norm": 1.2106196880340576, "learning_rate": 8.461683354275911e-05, "loss": 2.0188, "step": 2856 }, { "epoch": 0.5146588606169782, "grad_norm": 1.3403714895248413, "learning_rate": 8.46066135575537e-05, "loss": 1.8018, "step": 2857 }, { "epoch": 0.5148390002251745, "grad_norm": 1.5464999675750732, "learning_rate": 8.459639079622261e-05, "loss": 2.2572, "step": 2858 }, { "epoch": 0.5150191398333709, "grad_norm": 1.7590980529785156, "learning_rate": 8.45861652595859e-05, "loss": 2.4232, "step": 2859 }, { "epoch": 0.5151992794415672, "grad_norm": 1.77105712890625, "learning_rate": 8.457593694846386e-05, "loss": 2.22, "step": 2860 }, { "epoch": 0.5153794190497636, "grad_norm": 1.3987563848495483, "learning_rate": 8.4565705863677e-05, "loss": 1.8002, "step": 2861 }, { "epoch": 0.5155595586579599, "grad_norm": 1.375073790550232, "learning_rate": 8.455547200604606e-05, "loss": 1.8871, "step": 2862 }, { "epoch": 0.5157396982661563, "grad_norm": 1.266856074333191, "learning_rate": 8.454523537639199e-05, "loss": 1.6751, "step": 2863 }, { "epoch": 0.5159198378743526, "grad_norm": 1.2251063585281372, "learning_rate": 8.453499597553595e-05, "loss": 1.7471, "step": 2864 }, { "epoch": 0.516099977482549, "grad_norm": 1.2130104303359985, "learning_rate": 8.452475380429935e-05, "loss": 1.519, "step": 2865 }, { "epoch": 0.5162801170907453, "grad_norm": 1.157072901725769, "learning_rate": 8.451450886350382e-05, "loss": 1.3935, "step": 2866 }, { "epoch": 0.5164602566989417, "grad_norm": 1.2801405191421509, "learning_rate": 8.450426115397121e-05, "loss": 1.786, "step": 2867 }, { "epoch": 0.516640396307138, "grad_norm": 1.3811697959899902, "learning_rate": 8.449401067652358e-05, "loss": 1.7417, "step": 2868 }, { "epoch": 0.5168205359153344, "grad_norm": 1.3319814205169678, "learning_rate": 8.44837574319832e-05, "loss": 1.8402, "step": 2869 }, { "epoch": 0.5170006755235307, "grad_norm": 1.2061325311660767, "learning_rate": 8.447350142117261e-05, "loss": 1.61, "step": 2870 }, { "epoch": 0.5171808151317271, "grad_norm": 1.3511368036270142, "learning_rate": 8.446324264491452e-05, "loss": 1.7815, "step": 2871 }, { "epoch": 0.5173609547399235, "grad_norm": 1.3119224309921265, "learning_rate": 8.445298110403188e-05, "loss": 1.6436, "step": 2872 }, { "epoch": 0.5175410943481198, "grad_norm": 1.19091796875, "learning_rate": 8.44427167993479e-05, "loss": 1.4899, "step": 2873 }, { "epoch": 0.5177212339563162, "grad_norm": 1.3423573970794678, "learning_rate": 8.443244973168594e-05, "loss": 1.6115, "step": 2874 }, { "epoch": 0.5179013735645125, "grad_norm": 1.301041603088379, "learning_rate": 8.442217990186962e-05, "loss": 1.4322, "step": 2875 }, { "epoch": 0.5180815131727089, "grad_norm": 1.451704740524292, "learning_rate": 8.441190731072281e-05, "loss": 1.7627, "step": 2876 }, { "epoch": 0.5182616527809052, "grad_norm": 1.2976250648498535, "learning_rate": 8.440163195906958e-05, "loss": 1.7752, "step": 2877 }, { "epoch": 0.5184417923891016, "grad_norm": 1.2227054834365845, "learning_rate": 8.439135384773415e-05, "loss": 1.5108, "step": 2878 }, { "epoch": 0.5186219319972979, "grad_norm": 1.3561028242111206, "learning_rate": 8.43810729775411e-05, "loss": 1.7148, "step": 2879 }, { "epoch": 0.5188020716054943, "grad_norm": 1.4228743314743042, "learning_rate": 8.43707893493151e-05, "loss": 1.9676, "step": 2880 }, { "epoch": 0.5189822112136906, "grad_norm": 1.234188199043274, "learning_rate": 8.436050296388114e-05, "loss": 1.4886, "step": 2881 }, { "epoch": 0.519162350821887, "grad_norm": 1.3529471158981323, "learning_rate": 8.435021382206433e-05, "loss": 1.6212, "step": 2882 }, { "epoch": 0.5193424904300833, "grad_norm": 1.2622463703155518, "learning_rate": 8.433992192469013e-05, "loss": 1.585, "step": 2883 }, { "epoch": 0.5195226300382797, "grad_norm": 1.3103488683700562, "learning_rate": 8.432962727258413e-05, "loss": 1.7917, "step": 2884 }, { "epoch": 0.519702769646476, "grad_norm": 1.2690023183822632, "learning_rate": 8.431932986657214e-05, "loss": 1.6042, "step": 2885 }, { "epoch": 0.5198829092546724, "grad_norm": 1.359317421913147, "learning_rate": 8.430902970748022e-05, "loss": 1.7418, "step": 2886 }, { "epoch": 0.5200630488628687, "grad_norm": 1.2745059728622437, "learning_rate": 8.429872679613464e-05, "loss": 1.7374, "step": 2887 }, { "epoch": 0.5202431884710651, "grad_norm": 1.293550968170166, "learning_rate": 8.428842113336191e-05, "loss": 1.6381, "step": 2888 }, { "epoch": 0.5204233280792614, "grad_norm": 1.4161497354507446, "learning_rate": 8.427811271998873e-05, "loss": 1.7489, "step": 2889 }, { "epoch": 0.5206034676874578, "grad_norm": 1.393950343132019, "learning_rate": 8.426780155684206e-05, "loss": 1.8912, "step": 2890 }, { "epoch": 0.5207836072956541, "grad_norm": 1.380697250366211, "learning_rate": 8.425748764474903e-05, "loss": 1.8741, "step": 2891 }, { "epoch": 0.5209637469038505, "grad_norm": 1.2994813919067383, "learning_rate": 8.424717098453703e-05, "loss": 1.4546, "step": 2892 }, { "epoch": 0.5211438865120468, "grad_norm": 1.4606106281280518, "learning_rate": 8.423685157703366e-05, "loss": 1.828, "step": 2893 }, { "epoch": 0.5213240261202432, "grad_norm": 1.217456340789795, "learning_rate": 8.422652942306672e-05, "loss": 1.695, "step": 2894 }, { "epoch": 0.5215041657284395, "grad_norm": 1.4949731826782227, "learning_rate": 8.421620452346428e-05, "loss": 1.9131, "step": 2895 }, { "epoch": 0.5216843053366359, "grad_norm": 1.394025444984436, "learning_rate": 8.420587687905457e-05, "loss": 1.7087, "step": 2896 }, { "epoch": 0.5218644449448322, "grad_norm": 1.5372185707092285, "learning_rate": 8.419554649066606e-05, "loss": 1.4535, "step": 2897 }, { "epoch": 0.5220445845530286, "grad_norm": 1.3466992378234863, "learning_rate": 8.418521335912749e-05, "loss": 1.4944, "step": 2898 }, { "epoch": 0.5222247241612249, "grad_norm": 1.2983089685440063, "learning_rate": 8.417487748526774e-05, "loss": 1.4455, "step": 2899 }, { "epoch": 0.5224048637694213, "grad_norm": 1.4654840230941772, "learning_rate": 8.416453886991598e-05, "loss": 1.6367, "step": 2900 }, { "epoch": 0.5225850033776177, "grad_norm": 1.2391570806503296, "learning_rate": 8.415419751390155e-05, "loss": 2.1621, "step": 2901 }, { "epoch": 0.522765142985814, "grad_norm": 1.38381826877594, "learning_rate": 8.414385341805402e-05, "loss": 2.0932, "step": 2902 }, { "epoch": 0.5229452825940104, "grad_norm": 1.2105937004089355, "learning_rate": 8.41335065832032e-05, "loss": 2.1338, "step": 2903 }, { "epoch": 0.5231254222022067, "grad_norm": 1.2993297576904297, "learning_rate": 8.412315701017912e-05, "loss": 1.859, "step": 2904 }, { "epoch": 0.5233055618104031, "grad_norm": 1.3384552001953125, "learning_rate": 8.4112804699812e-05, "loss": 2.1587, "step": 2905 }, { "epoch": 0.5234857014185994, "grad_norm": 1.508028507232666, "learning_rate": 8.410244965293231e-05, "loss": 2.4367, "step": 2906 }, { "epoch": 0.5236658410267958, "grad_norm": 1.4038044214248657, "learning_rate": 8.409209187037071e-05, "loss": 1.888, "step": 2907 }, { "epoch": 0.5238459806349921, "grad_norm": 1.602329969406128, "learning_rate": 8.408173135295809e-05, "loss": 2.1134, "step": 2908 }, { "epoch": 0.5240261202431885, "grad_norm": 1.6898146867752075, "learning_rate": 8.407136810152561e-05, "loss": 2.1636, "step": 2909 }, { "epoch": 0.5242062598513848, "grad_norm": 1.287115454673767, "learning_rate": 8.406100211690456e-05, "loss": 1.7853, "step": 2910 }, { "epoch": 0.5243863994595812, "grad_norm": 1.2718130350112915, "learning_rate": 8.405063339992651e-05, "loss": 1.6375, "step": 2911 }, { "epoch": 0.5245665390677775, "grad_norm": 1.2713121175765991, "learning_rate": 8.404026195142326e-05, "loss": 1.6516, "step": 2912 }, { "epoch": 0.5247466786759739, "grad_norm": 1.1569982767105103, "learning_rate": 8.402988777222674e-05, "loss": 1.4313, "step": 2913 }, { "epoch": 0.5249268182841702, "grad_norm": 1.328687310218811, "learning_rate": 8.401951086316921e-05, "loss": 1.7319, "step": 2914 }, { "epoch": 0.5251069578923666, "grad_norm": 1.3176500797271729, "learning_rate": 8.400913122508309e-05, "loss": 1.8114, "step": 2915 }, { "epoch": 0.5252870975005629, "grad_norm": 1.2282111644744873, "learning_rate": 8.399874885880104e-05, "loss": 1.6335, "step": 2916 }, { "epoch": 0.5254672371087593, "grad_norm": 1.2876404523849487, "learning_rate": 8.39883637651559e-05, "loss": 1.4814, "step": 2917 }, { "epoch": 0.5256473767169556, "grad_norm": 1.2772941589355469, "learning_rate": 8.39779759449808e-05, "loss": 1.5264, "step": 2918 }, { "epoch": 0.525827516325152, "grad_norm": 1.4183640480041504, "learning_rate": 8.3967585399109e-05, "loss": 1.6784, "step": 2919 }, { "epoch": 0.5260076559333483, "grad_norm": 1.2522879838943481, "learning_rate": 8.395719212837404e-05, "loss": 1.7072, "step": 2920 }, { "epoch": 0.5261877955415447, "grad_norm": 1.3981165885925293, "learning_rate": 8.394679613360968e-05, "loss": 1.764, "step": 2921 }, { "epoch": 0.526367935149741, "grad_norm": 1.313857078552246, "learning_rate": 8.393639741564984e-05, "loss": 1.7087, "step": 2922 }, { "epoch": 0.5265480747579374, "grad_norm": 1.3553773164749146, "learning_rate": 8.392599597532877e-05, "loss": 1.6911, "step": 2923 }, { "epoch": 0.5267282143661337, "grad_norm": 1.2365988492965698, "learning_rate": 8.391559181348082e-05, "loss": 1.5675, "step": 2924 }, { "epoch": 0.5269083539743301, "grad_norm": 1.2551406621932983, "learning_rate": 8.39051849309406e-05, "loss": 1.3187, "step": 2925 }, { "epoch": 0.5270884935825264, "grad_norm": 1.305245041847229, "learning_rate": 8.389477532854297e-05, "loss": 1.5808, "step": 2926 }, { "epoch": 0.5272686331907228, "grad_norm": 1.2748825550079346, "learning_rate": 8.388436300712295e-05, "loss": 1.46, "step": 2927 }, { "epoch": 0.5274487727989191, "grad_norm": 1.2135010957717896, "learning_rate": 8.387394796751585e-05, "loss": 1.4507, "step": 2928 }, { "epoch": 0.5276289124071155, "grad_norm": 1.290830373764038, "learning_rate": 8.386353021055715e-05, "loss": 1.5783, "step": 2929 }, { "epoch": 0.5278090520153119, "grad_norm": 1.295822024345398, "learning_rate": 8.385310973708254e-05, "loss": 1.522, "step": 2930 }, { "epoch": 0.5279891916235082, "grad_norm": 1.4165290594100952, "learning_rate": 8.384268654792797e-05, "loss": 1.7107, "step": 2931 }, { "epoch": 0.5281693312317046, "grad_norm": 1.2215039730072021, "learning_rate": 8.383226064392956e-05, "loss": 1.4735, "step": 2932 }, { "epoch": 0.5283494708399009, "grad_norm": 1.3790441751480103, "learning_rate": 8.382183202592367e-05, "loss": 1.8254, "step": 2933 }, { "epoch": 0.5285296104480973, "grad_norm": 1.2985281944274902, "learning_rate": 8.381140069474691e-05, "loss": 1.5506, "step": 2934 }, { "epoch": 0.5287097500562936, "grad_norm": 1.2924715280532837, "learning_rate": 8.380096665123604e-05, "loss": 1.5431, "step": 2935 }, { "epoch": 0.52888988966449, "grad_norm": 1.3524894714355469, "learning_rate": 8.379052989622809e-05, "loss": 1.7336, "step": 2936 }, { "epoch": 0.5290700292726863, "grad_norm": 1.2735430002212524, "learning_rate": 8.37800904305603e-05, "loss": 1.5671, "step": 2937 }, { "epoch": 0.5292501688808827, "grad_norm": 1.356387734413147, "learning_rate": 8.37696482550701e-05, "loss": 1.5586, "step": 2938 }, { "epoch": 0.529430308489079, "grad_norm": 1.3299981355667114, "learning_rate": 8.375920337059518e-05, "loss": 1.4931, "step": 2939 }, { "epoch": 0.5296104480972754, "grad_norm": 1.4164447784423828, "learning_rate": 8.374875577797339e-05, "loss": 1.7236, "step": 2940 }, { "epoch": 0.5297905877054717, "grad_norm": 1.3486334085464478, "learning_rate": 8.373830547804285e-05, "loss": 1.7085, "step": 2941 }, { "epoch": 0.5299707273136681, "grad_norm": 1.2500412464141846, "learning_rate": 8.37278524716419e-05, "loss": 1.3937, "step": 2942 }, { "epoch": 0.5301508669218644, "grad_norm": 1.350296139717102, "learning_rate": 8.371739675960904e-05, "loss": 1.759, "step": 2943 }, { "epoch": 0.5303310065300608, "grad_norm": 1.2812758684158325, "learning_rate": 8.370693834278302e-05, "loss": 1.693, "step": 2944 }, { "epoch": 0.5305111461382571, "grad_norm": 1.4055895805358887, "learning_rate": 8.369647722200285e-05, "loss": 1.5049, "step": 2945 }, { "epoch": 0.5306912857464535, "grad_norm": 1.4454180002212524, "learning_rate": 8.368601339810767e-05, "loss": 1.6285, "step": 2946 }, { "epoch": 0.5308714253546498, "grad_norm": 1.3234490156173706, "learning_rate": 8.367554687193691e-05, "loss": 1.5159, "step": 2947 }, { "epoch": 0.5310515649628462, "grad_norm": 1.4226984977722168, "learning_rate": 8.366507764433018e-05, "loss": 1.7194, "step": 2948 }, { "epoch": 0.5312317045710425, "grad_norm": 1.2859829664230347, "learning_rate": 8.365460571612733e-05, "loss": 1.6281, "step": 2949 }, { "epoch": 0.5314118441792389, "grad_norm": 1.2395504713058472, "learning_rate": 8.36441310881684e-05, "loss": 1.5818, "step": 2950 }, { "epoch": 0.5315919837874352, "grad_norm": 1.1794137954711914, "learning_rate": 8.363365376129365e-05, "loss": 2.0086, "step": 2951 }, { "epoch": 0.5317721233956316, "grad_norm": 1.2856978178024292, "learning_rate": 8.362317373634359e-05, "loss": 2.0545, "step": 2952 }, { "epoch": 0.5319522630038279, "grad_norm": 1.2867718935012817, "learning_rate": 8.36126910141589e-05, "loss": 2.0849, "step": 2953 }, { "epoch": 0.5321324026120243, "grad_norm": 1.1966649293899536, "learning_rate": 8.360220559558051e-05, "loss": 2.1832, "step": 2954 }, { "epoch": 0.5323125422202206, "grad_norm": 1.377406358718872, "learning_rate": 8.359171748144956e-05, "loss": 2.394, "step": 2955 }, { "epoch": 0.532492681828417, "grad_norm": 2.2069456577301025, "learning_rate": 8.35812266726074e-05, "loss": 2.1278, "step": 2956 }, { "epoch": 0.5326728214366134, "grad_norm": 1.2921961545944214, "learning_rate": 8.357073316989559e-05, "loss": 2.0947, "step": 2957 }, { "epoch": 0.5328529610448097, "grad_norm": 1.3869844675064087, "learning_rate": 8.356023697415591e-05, "loss": 1.8076, "step": 2958 }, { "epoch": 0.5330331006530061, "grad_norm": 1.7190313339233398, "learning_rate": 8.354973808623037e-05, "loss": 2.0916, "step": 2959 }, { "epoch": 0.5332132402612024, "grad_norm": 1.565635323524475, "learning_rate": 8.353923650696118e-05, "loss": 1.7942, "step": 2960 }, { "epoch": 0.5333933798693988, "grad_norm": 1.1866713762283325, "learning_rate": 8.352873223719078e-05, "loss": 1.5644, "step": 2961 }, { "epoch": 0.5335735194775951, "grad_norm": 1.2483091354370117, "learning_rate": 8.351822527776181e-05, "loss": 1.6697, "step": 2962 }, { "epoch": 0.5337536590857915, "grad_norm": 1.2404404878616333, "learning_rate": 8.350771562951712e-05, "loss": 1.654, "step": 2963 }, { "epoch": 0.5339337986939878, "grad_norm": 1.2751595973968506, "learning_rate": 8.349720329329981e-05, "loss": 1.5721, "step": 2964 }, { "epoch": 0.5341139383021842, "grad_norm": 1.1954346895217896, "learning_rate": 8.348668826995319e-05, "loss": 1.5188, "step": 2965 }, { "epoch": 0.5342940779103805, "grad_norm": 1.3471052646636963, "learning_rate": 8.347617056032072e-05, "loss": 1.6529, "step": 2966 }, { "epoch": 0.5344742175185769, "grad_norm": 1.2993495464324951, "learning_rate": 8.346565016524617e-05, "loss": 1.6368, "step": 2967 }, { "epoch": 0.5346543571267732, "grad_norm": 1.2527159452438354, "learning_rate": 8.345512708557345e-05, "loss": 1.6186, "step": 2968 }, { "epoch": 0.5348344967349696, "grad_norm": 1.088618516921997, "learning_rate": 8.344460132214675e-05, "loss": 1.2741, "step": 2969 }, { "epoch": 0.5350146363431659, "grad_norm": 1.2552549839019775, "learning_rate": 8.34340728758104e-05, "loss": 1.6054, "step": 2970 }, { "epoch": 0.5351947759513623, "grad_norm": 1.1767199039459229, "learning_rate": 8.342354174740902e-05, "loss": 1.3718, "step": 2971 }, { "epoch": 0.5353749155595586, "grad_norm": 1.22327721118927, "learning_rate": 8.34130079377874e-05, "loss": 1.5924, "step": 2972 }, { "epoch": 0.535555055167755, "grad_norm": 1.234145998954773, "learning_rate": 8.340247144779055e-05, "loss": 1.5154, "step": 2973 }, { "epoch": 0.5357351947759513, "grad_norm": 1.227959394454956, "learning_rate": 8.339193227826373e-05, "loss": 1.5425, "step": 2974 }, { "epoch": 0.5359153343841477, "grad_norm": 1.2257956266403198, "learning_rate": 8.338139043005235e-05, "loss": 1.5687, "step": 2975 }, { "epoch": 0.536095473992344, "grad_norm": 1.2666376829147339, "learning_rate": 8.33708459040021e-05, "loss": 1.7125, "step": 2976 }, { "epoch": 0.5362756136005404, "grad_norm": 1.416784644126892, "learning_rate": 8.336029870095885e-05, "loss": 1.6145, "step": 2977 }, { "epoch": 0.5364557532087367, "grad_norm": 1.275352954864502, "learning_rate": 8.334974882176868e-05, "loss": 1.6509, "step": 2978 }, { "epoch": 0.5366358928169331, "grad_norm": 1.3713093996047974, "learning_rate": 8.333919626727788e-05, "loss": 1.7314, "step": 2979 }, { "epoch": 0.5368160324251294, "grad_norm": 1.369529366493225, "learning_rate": 8.332864103833302e-05, "loss": 1.6948, "step": 2980 }, { "epoch": 0.5369961720333258, "grad_norm": 1.2536239624023438, "learning_rate": 8.331808313578082e-05, "loss": 1.6574, "step": 2981 }, { "epoch": 0.5371763116415221, "grad_norm": 1.2864121198654175, "learning_rate": 8.33075225604682e-05, "loss": 1.6983, "step": 2982 }, { "epoch": 0.5373564512497185, "grad_norm": 1.2520129680633545, "learning_rate": 8.329695931324235e-05, "loss": 1.6456, "step": 2983 }, { "epoch": 0.5375365908579148, "grad_norm": 1.2824212312698364, "learning_rate": 8.328639339495065e-05, "loss": 1.6142, "step": 2984 }, { "epoch": 0.5377167304661112, "grad_norm": 1.303770661354065, "learning_rate": 8.327582480644067e-05, "loss": 1.5105, "step": 2985 }, { "epoch": 0.5378968700743076, "grad_norm": 1.3146182298660278, "learning_rate": 8.326525354856025e-05, "loss": 1.6884, "step": 2986 }, { "epoch": 0.5380770096825039, "grad_norm": 1.4487546682357788, "learning_rate": 8.325467962215738e-05, "loss": 1.5976, "step": 2987 }, { "epoch": 0.5382571492907003, "grad_norm": 1.4768322706222534, "learning_rate": 8.32441030280803e-05, "loss": 1.5234, "step": 2988 }, { "epoch": 0.5384372888988966, "grad_norm": 1.3447977304458618, "learning_rate": 8.323352376717748e-05, "loss": 1.7109, "step": 2989 }, { "epoch": 0.538617428507093, "grad_norm": 1.355639100074768, "learning_rate": 8.322294184029758e-05, "loss": 1.5224, "step": 2990 }, { "epoch": 0.5387975681152893, "grad_norm": 1.4150404930114746, "learning_rate": 8.321235724828945e-05, "loss": 1.7687, "step": 2991 }, { "epoch": 0.5389777077234857, "grad_norm": 1.295420527458191, "learning_rate": 8.32017699920022e-05, "loss": 1.5991, "step": 2992 }, { "epoch": 0.539157847331682, "grad_norm": 1.2863636016845703, "learning_rate": 8.319118007228515e-05, "loss": 1.5894, "step": 2993 }, { "epoch": 0.5393379869398784, "grad_norm": 1.288286566734314, "learning_rate": 8.318058748998778e-05, "loss": 1.5178, "step": 2994 }, { "epoch": 0.5395181265480747, "grad_norm": 1.325046181678772, "learning_rate": 8.316999224595986e-05, "loss": 1.7127, "step": 2995 }, { "epoch": 0.5396982661562711, "grad_norm": 1.339909553527832, "learning_rate": 8.315939434105132e-05, "loss": 1.6117, "step": 2996 }, { "epoch": 0.5398784057644674, "grad_norm": 1.467614769935608, "learning_rate": 8.314879377611232e-05, "loss": 1.8017, "step": 2997 }, { "epoch": 0.5400585453726638, "grad_norm": 1.4138083457946777, "learning_rate": 8.313819055199321e-05, "loss": 1.5596, "step": 2998 }, { "epoch": 0.5402386849808601, "grad_norm": 1.4147588014602661, "learning_rate": 8.31275846695446e-05, "loss": 1.5653, "step": 2999 }, { "epoch": 0.5404188245890565, "grad_norm": 1.4119365215301514, "learning_rate": 8.311697612961729e-05, "loss": 1.5692, "step": 3000 }, { "epoch": 0.5405989641972528, "grad_norm": 1.2690507173538208, "learning_rate": 8.310636493306227e-05, "loss": 1.7542, "step": 3001 }, { "epoch": 0.5407791038054492, "grad_norm": 1.212904930114746, "learning_rate": 8.30957510807308e-05, "loss": 1.948, "step": 3002 }, { "epoch": 0.5409592434136455, "grad_norm": 1.2991600036621094, "learning_rate": 8.30851345734743e-05, "loss": 1.9466, "step": 3003 }, { "epoch": 0.541139383021842, "grad_norm": 1.3232090473175049, "learning_rate": 8.307451541214441e-05, "loss": 1.7802, "step": 3004 }, { "epoch": 0.5413195226300382, "grad_norm": 1.348582148551941, "learning_rate": 8.306389359759302e-05, "loss": 1.8812, "step": 3005 }, { "epoch": 0.5414996622382346, "grad_norm": 1.344104528427124, "learning_rate": 8.305326913067215e-05, "loss": 1.9632, "step": 3006 }, { "epoch": 0.5416798018464309, "grad_norm": 1.4267274141311646, "learning_rate": 8.304264201223417e-05, "loss": 2.3278, "step": 3007 }, { "epoch": 0.5418599414546273, "grad_norm": 1.5221933126449585, "learning_rate": 8.303201224313153e-05, "loss": 2.5895, "step": 3008 }, { "epoch": 0.5420400810628236, "grad_norm": 1.5019080638885498, "learning_rate": 8.302137982421696e-05, "loss": 1.8229, "step": 3009 }, { "epoch": 0.54222022067102, "grad_norm": 1.8297483921051025, "learning_rate": 8.301074475634337e-05, "loss": 2.2933, "step": 3010 }, { "epoch": 0.5424003602792163, "grad_norm": 1.3127143383026123, "learning_rate": 8.300010704036393e-05, "loss": 1.71, "step": 3011 }, { "epoch": 0.5425804998874127, "grad_norm": 1.1981914043426514, "learning_rate": 8.298946667713197e-05, "loss": 1.631, "step": 3012 }, { "epoch": 0.542760639495609, "grad_norm": 1.332061529159546, "learning_rate": 8.297882366750107e-05, "loss": 1.9382, "step": 3013 }, { "epoch": 0.5429407791038054, "grad_norm": 1.3686964511871338, "learning_rate": 8.2968178012325e-05, "loss": 2.0074, "step": 3014 }, { "epoch": 0.5431209187120019, "grad_norm": 1.3442156314849854, "learning_rate": 8.295752971245773e-05, "loss": 1.57, "step": 3015 }, { "epoch": 0.5433010583201981, "grad_norm": 1.2571334838867188, "learning_rate": 8.294687876875349e-05, "loss": 1.7784, "step": 3016 }, { "epoch": 0.5434811979283946, "grad_norm": 1.2542588710784912, "learning_rate": 8.293622518206668e-05, "loss": 1.625, "step": 3017 }, { "epoch": 0.5436613375365908, "grad_norm": 1.3364351987838745, "learning_rate": 8.292556895325194e-05, "loss": 1.6184, "step": 3018 }, { "epoch": 0.5438414771447873, "grad_norm": 1.201956033706665, "learning_rate": 8.291491008316409e-05, "loss": 1.5305, "step": 3019 }, { "epoch": 0.5440216167529836, "grad_norm": 1.166782259941101, "learning_rate": 8.290424857265818e-05, "loss": 1.5747, "step": 3020 }, { "epoch": 0.54420175636118, "grad_norm": 1.1946702003479004, "learning_rate": 8.289358442258947e-05, "loss": 1.3544, "step": 3021 }, { "epoch": 0.5443818959693763, "grad_norm": 1.2366557121276855, "learning_rate": 8.288291763381346e-05, "loss": 1.6255, "step": 3022 }, { "epoch": 0.5445620355775727, "grad_norm": 1.291358232498169, "learning_rate": 8.28722482071858e-05, "loss": 1.5542, "step": 3023 }, { "epoch": 0.544742175185769, "grad_norm": 1.306465744972229, "learning_rate": 8.28615761435624e-05, "loss": 1.6861, "step": 3024 }, { "epoch": 0.5449223147939654, "grad_norm": 1.3511592149734497, "learning_rate": 8.285090144379937e-05, "loss": 1.6909, "step": 3025 }, { "epoch": 0.5451024544021617, "grad_norm": 1.2512991428375244, "learning_rate": 8.284022410875305e-05, "loss": 1.6008, "step": 3026 }, { "epoch": 0.5452825940103581, "grad_norm": 1.2254688739776611, "learning_rate": 8.282954413927992e-05, "loss": 1.5745, "step": 3027 }, { "epoch": 0.5454627336185544, "grad_norm": 1.3068994283676147, "learning_rate": 8.281886153623675e-05, "loss": 1.704, "step": 3028 }, { "epoch": 0.5456428732267508, "grad_norm": 1.375957727432251, "learning_rate": 8.28081763004805e-05, "loss": 1.8602, "step": 3029 }, { "epoch": 0.545823012834947, "grad_norm": 1.5064692497253418, "learning_rate": 8.279748843286833e-05, "loss": 1.7836, "step": 3030 }, { "epoch": 0.5460031524431435, "grad_norm": 1.173319935798645, "learning_rate": 8.278679793425761e-05, "loss": 1.4822, "step": 3031 }, { "epoch": 0.5461832920513398, "grad_norm": 1.4948939085006714, "learning_rate": 8.277610480550593e-05, "loss": 1.7825, "step": 3032 }, { "epoch": 0.5463634316595362, "grad_norm": 1.5304152965545654, "learning_rate": 8.27654090474711e-05, "loss": 2.0278, "step": 3033 }, { "epoch": 0.5465435712677325, "grad_norm": 1.337569236755371, "learning_rate": 8.275471066101109e-05, "loss": 1.7703, "step": 3034 }, { "epoch": 0.5467237108759289, "grad_norm": 1.3904756307601929, "learning_rate": 8.274400964698417e-05, "loss": 1.5607, "step": 3035 }, { "epoch": 0.5469038504841252, "grad_norm": 1.4075552225112915, "learning_rate": 8.273330600624872e-05, "loss": 1.8431, "step": 3036 }, { "epoch": 0.5470839900923216, "grad_norm": 1.4117883443832397, "learning_rate": 8.272259973966343e-05, "loss": 1.7731, "step": 3037 }, { "epoch": 0.5472641297005179, "grad_norm": 1.3136579990386963, "learning_rate": 8.27118908480871e-05, "loss": 1.4595, "step": 3038 }, { "epoch": 0.5474442693087143, "grad_norm": 1.3299778699874878, "learning_rate": 8.270117933237884e-05, "loss": 1.7849, "step": 3039 }, { "epoch": 0.5476244089169106, "grad_norm": 1.3349429368972778, "learning_rate": 8.269046519339792e-05, "loss": 1.8017, "step": 3040 }, { "epoch": 0.547804548525107, "grad_norm": 1.4871941804885864, "learning_rate": 8.267974843200376e-05, "loss": 1.8816, "step": 3041 }, { "epoch": 0.5479846881333033, "grad_norm": 1.3881686925888062, "learning_rate": 8.266902904905613e-05, "loss": 1.6137, "step": 3042 }, { "epoch": 0.5481648277414997, "grad_norm": 1.5226101875305176, "learning_rate": 8.26583070454149e-05, "loss": 1.7101, "step": 3043 }, { "epoch": 0.5483449673496961, "grad_norm": 1.45222008228302, "learning_rate": 8.264758242194017e-05, "loss": 1.9807, "step": 3044 }, { "epoch": 0.5485251069578924, "grad_norm": 1.4130432605743408, "learning_rate": 8.263685517949228e-05, "loss": 1.6995, "step": 3045 }, { "epoch": 0.5487052465660888, "grad_norm": 1.3273085355758667, "learning_rate": 8.262612531893179e-05, "loss": 1.5851, "step": 3046 }, { "epoch": 0.5488853861742851, "grad_norm": 1.3767088651657104, "learning_rate": 8.261539284111941e-05, "loss": 1.4744, "step": 3047 }, { "epoch": 0.5490655257824815, "grad_norm": 1.264435052871704, "learning_rate": 8.26046577469161e-05, "loss": 1.5227, "step": 3048 }, { "epoch": 0.5492456653906778, "grad_norm": 1.2981853485107422, "learning_rate": 8.259392003718301e-05, "loss": 1.4457, "step": 3049 }, { "epoch": 0.5494258049988742, "grad_norm": 1.4427740573883057, "learning_rate": 8.258317971278155e-05, "loss": 1.6294, "step": 3050 }, { "epoch": 0.5496059446070705, "grad_norm": 1.6608119010925293, "learning_rate": 8.257243677457328e-05, "loss": 2.1272, "step": 3051 }, { "epoch": 0.5497860842152669, "grad_norm": 1.3499118089675903, "learning_rate": 8.256169122341998e-05, "loss": 2.2511, "step": 3052 }, { "epoch": 0.5499662238234632, "grad_norm": 1.202763319015503, "learning_rate": 8.25509430601837e-05, "loss": 1.9001, "step": 3053 }, { "epoch": 0.5501463634316596, "grad_norm": 1.3304518461227417, "learning_rate": 8.25401922857266e-05, "loss": 2.0743, "step": 3054 }, { "epoch": 0.5503265030398559, "grad_norm": 1.2752487659454346, "learning_rate": 8.252943890091113e-05, "loss": 1.9138, "step": 3055 }, { "epoch": 0.5505066426480523, "grad_norm": 1.3434410095214844, "learning_rate": 8.251868290659993e-05, "loss": 1.9507, "step": 3056 }, { "epoch": 0.5506867822562486, "grad_norm": 1.4837720394134521, "learning_rate": 8.250792430365582e-05, "loss": 2.0902, "step": 3057 }, { "epoch": 0.550866921864445, "grad_norm": 1.5025969743728638, "learning_rate": 8.249716309294184e-05, "loss": 1.8903, "step": 3058 }, { "epoch": 0.5510470614726413, "grad_norm": 1.7507224082946777, "learning_rate": 8.248639927532127e-05, "loss": 2.0086, "step": 3059 }, { "epoch": 0.5512272010808377, "grad_norm": 1.455481767654419, "learning_rate": 8.247563285165759e-05, "loss": 1.6329, "step": 3060 }, { "epoch": 0.551407340689034, "grad_norm": 1.2602249383926392, "learning_rate": 8.246486382281444e-05, "loss": 1.6726, "step": 3061 }, { "epoch": 0.5515874802972304, "grad_norm": 1.2342243194580078, "learning_rate": 8.245409218965575e-05, "loss": 1.6463, "step": 3062 }, { "epoch": 0.5517676199054267, "grad_norm": 1.3046754598617554, "learning_rate": 8.244331795304559e-05, "loss": 1.6362, "step": 3063 }, { "epoch": 0.5519477595136231, "grad_norm": 1.353340744972229, "learning_rate": 8.243254111384825e-05, "loss": 1.6933, "step": 3064 }, { "epoch": 0.5521278991218194, "grad_norm": 1.2738736867904663, "learning_rate": 8.242176167292826e-05, "loss": 1.516, "step": 3065 }, { "epoch": 0.5523080387300158, "grad_norm": 1.4168224334716797, "learning_rate": 8.241097963115035e-05, "loss": 1.5416, "step": 3066 }, { "epoch": 0.5524881783382121, "grad_norm": 1.4942160844802856, "learning_rate": 8.240019498937944e-05, "loss": 1.9458, "step": 3067 }, { "epoch": 0.5526683179464085, "grad_norm": 1.2541728019714355, "learning_rate": 8.238940774848066e-05, "loss": 1.4761, "step": 3068 }, { "epoch": 0.5528484575546048, "grad_norm": 1.240328073501587, "learning_rate": 8.237861790931938e-05, "loss": 1.7644, "step": 3069 }, { "epoch": 0.5530285971628012, "grad_norm": 1.3574550151824951, "learning_rate": 8.236782547276114e-05, "loss": 1.5677, "step": 3070 }, { "epoch": 0.5532087367709976, "grad_norm": 1.2949200868606567, "learning_rate": 8.23570304396717e-05, "loss": 2.0283, "step": 3071 }, { "epoch": 0.5533888763791939, "grad_norm": 1.3146377801895142, "learning_rate": 8.234623281091704e-05, "loss": 1.6428, "step": 3072 }, { "epoch": 0.5535690159873903, "grad_norm": 1.2935655117034912, "learning_rate": 8.233543258736334e-05, "loss": 1.7052, "step": 3073 }, { "epoch": 0.5537491555955866, "grad_norm": 1.128687858581543, "learning_rate": 8.232462976987699e-05, "loss": 1.4706, "step": 3074 }, { "epoch": 0.553929295203783, "grad_norm": 1.203316330909729, "learning_rate": 8.231382435932459e-05, "loss": 1.4951, "step": 3075 }, { "epoch": 0.5541094348119793, "grad_norm": 1.3685472011566162, "learning_rate": 8.230301635657293e-05, "loss": 1.8669, "step": 3076 }, { "epoch": 0.5542895744201757, "grad_norm": 1.301174283027649, "learning_rate": 8.229220576248904e-05, "loss": 1.6304, "step": 3077 }, { "epoch": 0.554469714028372, "grad_norm": 1.19906485080719, "learning_rate": 8.228139257794012e-05, "loss": 1.4816, "step": 3078 }, { "epoch": 0.5546498536365684, "grad_norm": 1.3333052396774292, "learning_rate": 8.227057680379362e-05, "loss": 1.5358, "step": 3079 }, { "epoch": 0.5548299932447647, "grad_norm": 1.2843226194381714, "learning_rate": 8.225975844091717e-05, "loss": 1.5784, "step": 3080 }, { "epoch": 0.5550101328529611, "grad_norm": 1.4443106651306152, "learning_rate": 8.224893749017862e-05, "loss": 1.6562, "step": 3081 }, { "epoch": 0.5551902724611574, "grad_norm": 1.2481725215911865, "learning_rate": 8.223811395244598e-05, "loss": 1.479, "step": 3082 }, { "epoch": 0.5553704120693538, "grad_norm": 1.3599581718444824, "learning_rate": 8.222728782858757e-05, "loss": 1.7141, "step": 3083 }, { "epoch": 0.5555505516775501, "grad_norm": 1.281242847442627, "learning_rate": 8.221645911947182e-05, "loss": 1.4411, "step": 3084 }, { "epoch": 0.5557306912857465, "grad_norm": 1.414431095123291, "learning_rate": 8.220562782596742e-05, "loss": 1.7247, "step": 3085 }, { "epoch": 0.5559108308939428, "grad_norm": 1.418641209602356, "learning_rate": 8.219479394894323e-05, "loss": 1.8093, "step": 3086 }, { "epoch": 0.5560909705021392, "grad_norm": 1.356667160987854, "learning_rate": 8.218395748926834e-05, "loss": 1.6301, "step": 3087 }, { "epoch": 0.5562711101103355, "grad_norm": 1.4201664924621582, "learning_rate": 8.217311844781208e-05, "loss": 1.7891, "step": 3088 }, { "epoch": 0.5564512497185319, "grad_norm": 1.3570770025253296, "learning_rate": 8.21622768254439e-05, "loss": 1.6598, "step": 3089 }, { "epoch": 0.5566313893267282, "grad_norm": 1.3093458414077759, "learning_rate": 8.215143262303358e-05, "loss": 1.5541, "step": 3090 }, { "epoch": 0.5568115289349246, "grad_norm": 1.5533108711242676, "learning_rate": 8.214058584145096e-05, "loss": 2.0327, "step": 3091 }, { "epoch": 0.5569916685431209, "grad_norm": 1.458650827407837, "learning_rate": 8.212973648156622e-05, "loss": 1.9261, "step": 3092 }, { "epoch": 0.5571718081513173, "grad_norm": 1.324681043624878, "learning_rate": 8.211888454424966e-05, "loss": 1.762, "step": 3093 }, { "epoch": 0.5573519477595136, "grad_norm": 1.4270514249801636, "learning_rate": 8.210803003037182e-05, "loss": 1.6771, "step": 3094 }, { "epoch": 0.55753208736771, "grad_norm": 1.2860157489776611, "learning_rate": 8.209717294080345e-05, "loss": 1.503, "step": 3095 }, { "epoch": 0.5577122269759063, "grad_norm": 1.3806312084197998, "learning_rate": 8.208631327641551e-05, "loss": 1.5762, "step": 3096 }, { "epoch": 0.5578923665841027, "grad_norm": 1.3448361158370972, "learning_rate": 8.207545103807916e-05, "loss": 1.7357, "step": 3097 }, { "epoch": 0.558072506192299, "grad_norm": 1.4819183349609375, "learning_rate": 8.206458622666574e-05, "loss": 1.8043, "step": 3098 }, { "epoch": 0.5582526458004954, "grad_norm": 1.247347116470337, "learning_rate": 8.205371884304682e-05, "loss": 1.4419, "step": 3099 }, { "epoch": 0.5584327854086918, "grad_norm": 1.2945557832717896, "learning_rate": 8.20428488880942e-05, "loss": 1.3948, "step": 3100 }, { "epoch": 0.5586129250168881, "grad_norm": 1.2828346490859985, "learning_rate": 8.203197636267985e-05, "loss": 2.2183, "step": 3101 }, { "epoch": 0.5587930646250845, "grad_norm": 1.1839442253112793, "learning_rate": 8.202110126767595e-05, "loss": 2.0172, "step": 3102 }, { "epoch": 0.5589732042332808, "grad_norm": 1.301910638809204, "learning_rate": 8.20102236039549e-05, "loss": 2.1956, "step": 3103 }, { "epoch": 0.5591533438414772, "grad_norm": 1.1977522373199463, "learning_rate": 8.199934337238933e-05, "loss": 1.8359, "step": 3104 }, { "epoch": 0.5593334834496735, "grad_norm": 1.2712671756744385, "learning_rate": 8.1988460573852e-05, "loss": 2.0776, "step": 3105 }, { "epoch": 0.5595136230578699, "grad_norm": 1.2882437705993652, "learning_rate": 8.197757520921594e-05, "loss": 2.1507, "step": 3106 }, { "epoch": 0.5596937626660662, "grad_norm": 1.2889759540557861, "learning_rate": 8.196668727935438e-05, "loss": 1.9329, "step": 3107 }, { "epoch": 0.5598739022742626, "grad_norm": 1.398176908493042, "learning_rate": 8.195579678514074e-05, "loss": 1.919, "step": 3108 }, { "epoch": 0.5600540418824589, "grad_norm": 1.539720058441162, "learning_rate": 8.194490372744863e-05, "loss": 2.1278, "step": 3109 }, { "epoch": 0.5602341814906553, "grad_norm": 1.7101788520812988, "learning_rate": 8.193400810715192e-05, "loss": 2.1057, "step": 3110 }, { "epoch": 0.5604143210988516, "grad_norm": 1.318769097328186, "learning_rate": 8.192310992512463e-05, "loss": 1.738, "step": 3111 }, { "epoch": 0.560594460707048, "grad_norm": 1.2736765146255493, "learning_rate": 8.191220918224101e-05, "loss": 1.8106, "step": 3112 }, { "epoch": 0.5607746003152443, "grad_norm": 1.1861085891723633, "learning_rate": 8.190130587937551e-05, "loss": 1.3742, "step": 3113 }, { "epoch": 0.5609547399234407, "grad_norm": 1.3282548189163208, "learning_rate": 8.18904000174028e-05, "loss": 1.9661, "step": 3114 }, { "epoch": 0.561134879531637, "grad_norm": 1.2139323949813843, "learning_rate": 8.187949159719772e-05, "loss": 1.6599, "step": 3115 }, { "epoch": 0.5613150191398334, "grad_norm": 1.1640039682388306, "learning_rate": 8.186858061963537e-05, "loss": 1.4607, "step": 3116 }, { "epoch": 0.5614951587480297, "grad_norm": 1.270864725112915, "learning_rate": 8.185766708559099e-05, "loss": 1.4509, "step": 3117 }, { "epoch": 0.5616752983562261, "grad_norm": 1.2682238817214966, "learning_rate": 8.184675099594008e-05, "loss": 1.4128, "step": 3118 }, { "epoch": 0.5618554379644224, "grad_norm": 1.3159164190292358, "learning_rate": 8.183583235155832e-05, "loss": 1.6746, "step": 3119 }, { "epoch": 0.5620355775726188, "grad_norm": 1.3651436567306519, "learning_rate": 8.18249111533216e-05, "loss": 1.5824, "step": 3120 }, { "epoch": 0.5622157171808151, "grad_norm": 1.2646636962890625, "learning_rate": 8.1813987402106e-05, "loss": 1.7706, "step": 3121 }, { "epoch": 0.5623958567890115, "grad_norm": 1.222787857055664, "learning_rate": 8.180306109878782e-05, "loss": 1.4056, "step": 3122 }, { "epoch": 0.5625759963972078, "grad_norm": 1.2767640352249146, "learning_rate": 8.179213224424358e-05, "loss": 1.6311, "step": 3123 }, { "epoch": 0.5627561360054042, "grad_norm": 1.295236349105835, "learning_rate": 8.178120083934997e-05, "loss": 1.6952, "step": 3124 }, { "epoch": 0.5629362756136005, "grad_norm": 1.268667221069336, "learning_rate": 8.17702668849839e-05, "loss": 1.6174, "step": 3125 }, { "epoch": 0.5631164152217969, "grad_norm": 1.116782784461975, "learning_rate": 8.175933038202252e-05, "loss": 1.2679, "step": 3126 }, { "epoch": 0.5632965548299932, "grad_norm": 1.4525080919265747, "learning_rate": 8.174839133134311e-05, "loss": 2.047, "step": 3127 }, { "epoch": 0.5634766944381896, "grad_norm": 1.267739176750183, "learning_rate": 8.173744973382323e-05, "loss": 1.7404, "step": 3128 }, { "epoch": 0.563656834046386, "grad_norm": 1.4082084894180298, "learning_rate": 8.172650559034056e-05, "loss": 1.7778, "step": 3129 }, { "epoch": 0.5638369736545823, "grad_norm": 1.1513056755065918, "learning_rate": 8.171555890177309e-05, "loss": 1.4083, "step": 3130 }, { "epoch": 0.5640171132627787, "grad_norm": 1.3695405721664429, "learning_rate": 8.170460966899891e-05, "loss": 1.8081, "step": 3131 }, { "epoch": 0.564197252870975, "grad_norm": 1.3276681900024414, "learning_rate": 8.16936578928964e-05, "loss": 1.7415, "step": 3132 }, { "epoch": 0.5643773924791714, "grad_norm": 1.310463547706604, "learning_rate": 8.16827035743441e-05, "loss": 1.7445, "step": 3133 }, { "epoch": 0.5645575320873677, "grad_norm": 1.3860929012298584, "learning_rate": 8.167174671422074e-05, "loss": 1.7978, "step": 3134 }, { "epoch": 0.5647376716955641, "grad_norm": 1.1992087364196777, "learning_rate": 8.16607873134053e-05, "loss": 1.5431, "step": 3135 }, { "epoch": 0.5649178113037604, "grad_norm": 1.1616803407669067, "learning_rate": 8.16498253727769e-05, "loss": 1.4783, "step": 3136 }, { "epoch": 0.5650979509119568, "grad_norm": 1.2000281810760498, "learning_rate": 8.163886089321493e-05, "loss": 1.4426, "step": 3137 }, { "epoch": 0.5652780905201531, "grad_norm": 1.3088774681091309, "learning_rate": 8.162789387559894e-05, "loss": 1.5736, "step": 3138 }, { "epoch": 0.5654582301283495, "grad_norm": 1.4206700325012207, "learning_rate": 8.161692432080873e-05, "loss": 1.5989, "step": 3139 }, { "epoch": 0.5656383697365458, "grad_norm": 1.4686074256896973, "learning_rate": 8.160595222972426e-05, "loss": 1.7029, "step": 3140 }, { "epoch": 0.5658185093447422, "grad_norm": 1.3710918426513672, "learning_rate": 8.159497760322567e-05, "loss": 1.4908, "step": 3141 }, { "epoch": 0.5659986489529385, "grad_norm": 1.4399117231369019, "learning_rate": 8.158400044219338e-05, "loss": 1.9285, "step": 3142 }, { "epoch": 0.5661787885611349, "grad_norm": 1.6106703281402588, "learning_rate": 8.157302074750797e-05, "loss": 1.9503, "step": 3143 }, { "epoch": 0.5663589281693312, "grad_norm": 1.3452823162078857, "learning_rate": 8.15620385200502e-05, "loss": 1.5154, "step": 3144 }, { "epoch": 0.5665390677775276, "grad_norm": 1.34783136844635, "learning_rate": 8.155105376070107e-05, "loss": 1.598, "step": 3145 }, { "epoch": 0.5667192073857239, "grad_norm": 1.367321252822876, "learning_rate": 8.154006647034179e-05, "loss": 1.5694, "step": 3146 }, { "epoch": 0.5668993469939203, "grad_norm": 1.5204110145568848, "learning_rate": 8.152907664985373e-05, "loss": 1.8233, "step": 3147 }, { "epoch": 0.5670794866021166, "grad_norm": 1.4078739881515503, "learning_rate": 8.151808430011848e-05, "loss": 1.5152, "step": 3148 }, { "epoch": 0.567259626210313, "grad_norm": 1.4590213298797607, "learning_rate": 8.150708942201788e-05, "loss": 1.5588, "step": 3149 }, { "epoch": 0.5674397658185093, "grad_norm": 1.358292818069458, "learning_rate": 8.149609201643391e-05, "loss": 1.3822, "step": 3150 }, { "epoch": 0.5676199054267057, "grad_norm": 1.3277286291122437, "learning_rate": 8.148509208424878e-05, "loss": 2.0926, "step": 3151 }, { "epoch": 0.567800045034902, "grad_norm": 1.2633247375488281, "learning_rate": 8.147408962634488e-05, "loss": 2.0861, "step": 3152 }, { "epoch": 0.5679801846430984, "grad_norm": 1.2951682806015015, "learning_rate": 8.146308464360489e-05, "loss": 2.0527, "step": 3153 }, { "epoch": 0.5681603242512947, "grad_norm": 1.1878705024719238, "learning_rate": 8.145207713691153e-05, "loss": 1.8376, "step": 3154 }, { "epoch": 0.5683404638594911, "grad_norm": 1.253411889076233, "learning_rate": 8.144106710714788e-05, "loss": 1.8045, "step": 3155 }, { "epoch": 0.5685206034676875, "grad_norm": 1.3438165187835693, "learning_rate": 8.143005455519715e-05, "loss": 2.0449, "step": 3156 }, { "epoch": 0.5687007430758838, "grad_norm": 1.3496830463409424, "learning_rate": 8.141903948194271e-05, "loss": 2.1844, "step": 3157 }, { "epoch": 0.5688808826840802, "grad_norm": 1.3739079236984253, "learning_rate": 8.140802188826827e-05, "loss": 1.8802, "step": 3158 }, { "epoch": 0.5690610222922765, "grad_norm": 1.5568959712982178, "learning_rate": 8.13970017750576e-05, "loss": 1.8717, "step": 3159 }, { "epoch": 0.5692411619004729, "grad_norm": 1.7015992403030396, "learning_rate": 8.138597914319474e-05, "loss": 1.9087, "step": 3160 }, { "epoch": 0.5694213015086692, "grad_norm": 1.2822550535202026, "learning_rate": 8.137495399356391e-05, "loss": 1.6845, "step": 3161 }, { "epoch": 0.5696014411168656, "grad_norm": 1.450877070426941, "learning_rate": 8.136392632704957e-05, "loss": 1.8716, "step": 3162 }, { "epoch": 0.5697815807250619, "grad_norm": 1.3381742238998413, "learning_rate": 8.135289614453633e-05, "loss": 1.679, "step": 3163 }, { "epoch": 0.5699617203332583, "grad_norm": 1.353865623474121, "learning_rate": 8.134186344690903e-05, "loss": 1.798, "step": 3164 }, { "epoch": 0.5701418599414546, "grad_norm": 1.2209184169769287, "learning_rate": 8.133082823505271e-05, "loss": 1.61, "step": 3165 }, { "epoch": 0.570321999549651, "grad_norm": 1.2871285676956177, "learning_rate": 8.131979050985262e-05, "loss": 1.4535, "step": 3166 }, { "epoch": 0.5705021391578473, "grad_norm": 1.202979326248169, "learning_rate": 8.130875027219417e-05, "loss": 1.5035, "step": 3167 }, { "epoch": 0.5706822787660437, "grad_norm": 1.2236607074737549, "learning_rate": 8.129770752296305e-05, "loss": 1.5575, "step": 3168 }, { "epoch": 0.57086241837424, "grad_norm": 1.18218994140625, "learning_rate": 8.128666226304507e-05, "loss": 1.5653, "step": 3169 }, { "epoch": 0.5710425579824364, "grad_norm": 1.3156682252883911, "learning_rate": 8.127561449332627e-05, "loss": 1.4856, "step": 3170 }, { "epoch": 0.5712226975906327, "grad_norm": 1.2198225259780884, "learning_rate": 8.126456421469291e-05, "loss": 1.5367, "step": 3171 }, { "epoch": 0.5714028371988291, "grad_norm": 1.2979079484939575, "learning_rate": 8.125351142803144e-05, "loss": 1.7432, "step": 3172 }, { "epoch": 0.5715829768070254, "grad_norm": 1.4458940029144287, "learning_rate": 8.124245613422851e-05, "loss": 1.6537, "step": 3173 }, { "epoch": 0.5717631164152218, "grad_norm": 1.4343434572219849, "learning_rate": 8.123139833417095e-05, "loss": 1.7432, "step": 3174 }, { "epoch": 0.5719432560234181, "grad_norm": 1.2733614444732666, "learning_rate": 8.122033802874585e-05, "loss": 1.8152, "step": 3175 }, { "epoch": 0.5721233956316145, "grad_norm": 1.32044517993927, "learning_rate": 8.120927521884044e-05, "loss": 1.8011, "step": 3176 }, { "epoch": 0.5723035352398108, "grad_norm": 1.2091959714889526, "learning_rate": 8.119820990534218e-05, "loss": 1.7056, "step": 3177 }, { "epoch": 0.5724836748480072, "grad_norm": 1.3584576845169067, "learning_rate": 8.11871420891387e-05, "loss": 1.8433, "step": 3178 }, { "epoch": 0.5726638144562035, "grad_norm": 1.2639418840408325, "learning_rate": 8.117607177111787e-05, "loss": 1.6686, "step": 3179 }, { "epoch": 0.5728439540643999, "grad_norm": 1.385926604270935, "learning_rate": 8.116499895216774e-05, "loss": 1.883, "step": 3180 }, { "epoch": 0.5730240936725962, "grad_norm": 1.3171542882919312, "learning_rate": 8.11539236331766e-05, "loss": 1.851, "step": 3181 }, { "epoch": 0.5732042332807926, "grad_norm": 1.26935875415802, "learning_rate": 8.114284581503287e-05, "loss": 1.5329, "step": 3182 }, { "epoch": 0.5733843728889889, "grad_norm": 1.4185380935668945, "learning_rate": 8.113176549862522e-05, "loss": 1.6581, "step": 3183 }, { "epoch": 0.5735645124971853, "grad_norm": 1.344731092453003, "learning_rate": 8.112068268484249e-05, "loss": 1.5609, "step": 3184 }, { "epoch": 0.5737446521053817, "grad_norm": 1.2733148336410522, "learning_rate": 8.110959737457377e-05, "loss": 1.6929, "step": 3185 }, { "epoch": 0.573924791713578, "grad_norm": 1.4895830154418945, "learning_rate": 8.10985095687083e-05, "loss": 1.8285, "step": 3186 }, { "epoch": 0.5741049313217744, "grad_norm": 1.3092437982559204, "learning_rate": 8.108741926813553e-05, "loss": 1.5719, "step": 3187 }, { "epoch": 0.5742850709299707, "grad_norm": 1.3088788986206055, "learning_rate": 8.107632647374514e-05, "loss": 1.6501, "step": 3188 }, { "epoch": 0.5744652105381671, "grad_norm": 1.3658629655838013, "learning_rate": 8.106523118642697e-05, "loss": 1.6845, "step": 3189 }, { "epoch": 0.5746453501463634, "grad_norm": 1.3632906675338745, "learning_rate": 8.105413340707109e-05, "loss": 1.8674, "step": 3190 }, { "epoch": 0.5748254897545598, "grad_norm": 1.3251127004623413, "learning_rate": 8.104303313656775e-05, "loss": 1.6598, "step": 3191 }, { "epoch": 0.5750056293627561, "grad_norm": 1.3589049577713013, "learning_rate": 8.10319303758074e-05, "loss": 1.6211, "step": 3192 }, { "epoch": 0.5751857689709525, "grad_norm": 1.352040410041809, "learning_rate": 8.102082512568075e-05, "loss": 1.4777, "step": 3193 }, { "epoch": 0.5753659085791488, "grad_norm": 1.4846113920211792, "learning_rate": 8.100971738707857e-05, "loss": 1.9339, "step": 3194 }, { "epoch": 0.5755460481873452, "grad_norm": 1.3877880573272705, "learning_rate": 8.0998607160892e-05, "loss": 1.6246, "step": 3195 }, { "epoch": 0.5757261877955415, "grad_norm": 1.3888198137283325, "learning_rate": 8.098749444801224e-05, "loss": 1.7753, "step": 3196 }, { "epoch": 0.5759063274037379, "grad_norm": 1.5230215787887573, "learning_rate": 8.09763792493308e-05, "loss": 1.6742, "step": 3197 }, { "epoch": 0.5760864670119342, "grad_norm": 1.4500510692596436, "learning_rate": 8.096526156573926e-05, "loss": 1.4329, "step": 3198 }, { "epoch": 0.5762666066201306, "grad_norm": 1.4574159383773804, "learning_rate": 8.095414139812956e-05, "loss": 1.6202, "step": 3199 }, { "epoch": 0.5764467462283269, "grad_norm": 1.290509581565857, "learning_rate": 8.09430187473937e-05, "loss": 1.2, "step": 3200 }, { "epoch": 0.5766268858365233, "grad_norm": 1.2402974367141724, "learning_rate": 8.093189361442394e-05, "loss": 1.7644, "step": 3201 }, { "epoch": 0.5768070254447196, "grad_norm": 1.2125972509384155, "learning_rate": 8.092076600011276e-05, "loss": 2.1467, "step": 3202 }, { "epoch": 0.576987165052916, "grad_norm": 1.2628872394561768, "learning_rate": 8.090963590535277e-05, "loss": 2.2245, "step": 3203 }, { "epoch": 0.5771673046611123, "grad_norm": 1.1404106616973877, "learning_rate": 8.089850333103684e-05, "loss": 1.856, "step": 3204 }, { "epoch": 0.5773474442693087, "grad_norm": 1.3384531736373901, "learning_rate": 8.088736827805805e-05, "loss": 1.935, "step": 3205 }, { "epoch": 0.577527583877505, "grad_norm": 1.2109678983688354, "learning_rate": 8.08762307473096e-05, "loss": 1.5656, "step": 3206 }, { "epoch": 0.5777077234857014, "grad_norm": 1.3816996812820435, "learning_rate": 8.086509073968496e-05, "loss": 1.7886, "step": 3207 }, { "epoch": 0.5778878630938977, "grad_norm": 1.5806138515472412, "learning_rate": 8.085394825607778e-05, "loss": 2.3206, "step": 3208 }, { "epoch": 0.5780680027020941, "grad_norm": 1.739669919013977, "learning_rate": 8.084280329738191e-05, "loss": 2.2176, "step": 3209 }, { "epoch": 0.5782481423102904, "grad_norm": 1.7253975868225098, "learning_rate": 8.083165586449138e-05, "loss": 2.2416, "step": 3210 }, { "epoch": 0.5784282819184868, "grad_norm": 1.3106157779693604, "learning_rate": 8.082050595830042e-05, "loss": 1.811, "step": 3211 }, { "epoch": 0.5786084215266831, "grad_norm": 1.2804299592971802, "learning_rate": 8.080935357970348e-05, "loss": 1.6173, "step": 3212 }, { "epoch": 0.5787885611348795, "grad_norm": 1.19856595993042, "learning_rate": 8.079819872959522e-05, "loss": 1.66, "step": 3213 }, { "epoch": 0.5789687007430759, "grad_norm": 1.1803557872772217, "learning_rate": 8.078704140887046e-05, "loss": 1.4163, "step": 3214 }, { "epoch": 0.5791488403512722, "grad_norm": 1.1865688562393188, "learning_rate": 8.077588161842422e-05, "loss": 1.475, "step": 3215 }, { "epoch": 0.5793289799594686, "grad_norm": 1.3032927513122559, "learning_rate": 8.076471935915176e-05, "loss": 1.6544, "step": 3216 }, { "epoch": 0.5795091195676649, "grad_norm": 1.2634093761444092, "learning_rate": 8.075355463194847e-05, "loss": 1.7275, "step": 3217 }, { "epoch": 0.5796892591758613, "grad_norm": 1.341326355934143, "learning_rate": 8.074238743771003e-05, "loss": 1.7533, "step": 3218 }, { "epoch": 0.5798693987840576, "grad_norm": 1.3142240047454834, "learning_rate": 8.073121777733226e-05, "loss": 1.8086, "step": 3219 }, { "epoch": 0.580049538392254, "grad_norm": 1.1341456174850464, "learning_rate": 8.072004565171114e-05, "loss": 1.4552, "step": 3220 }, { "epoch": 0.5802296780004503, "grad_norm": 1.1689975261688232, "learning_rate": 8.070887106174293e-05, "loss": 1.3858, "step": 3221 }, { "epoch": 0.5804098176086467, "grad_norm": 1.2383785247802734, "learning_rate": 8.069769400832404e-05, "loss": 1.5716, "step": 3222 }, { "epoch": 0.580589957216843, "grad_norm": 1.3554126024246216, "learning_rate": 8.06865144923511e-05, "loss": 1.7538, "step": 3223 }, { "epoch": 0.5807700968250394, "grad_norm": 1.5515005588531494, "learning_rate": 8.06753325147209e-05, "loss": 1.8926, "step": 3224 }, { "epoch": 0.5809502364332357, "grad_norm": 1.3006205558776855, "learning_rate": 8.066414807633047e-05, "loss": 1.5122, "step": 3225 }, { "epoch": 0.5811303760414321, "grad_norm": 1.249218463897705, "learning_rate": 8.065296117807703e-05, "loss": 1.7046, "step": 3226 }, { "epoch": 0.5813105156496284, "grad_norm": 1.3515232801437378, "learning_rate": 8.064177182085795e-05, "loss": 1.6931, "step": 3227 }, { "epoch": 0.5814906552578248, "grad_norm": 1.1837708950042725, "learning_rate": 8.06305800055709e-05, "loss": 1.4618, "step": 3228 }, { "epoch": 0.5816707948660211, "grad_norm": 1.3043564558029175, "learning_rate": 8.061938573311359e-05, "loss": 1.5691, "step": 3229 }, { "epoch": 0.5818509344742175, "grad_norm": 1.4088311195373535, "learning_rate": 8.060818900438411e-05, "loss": 1.4785, "step": 3230 }, { "epoch": 0.5820310740824138, "grad_norm": 1.2701168060302734, "learning_rate": 8.059698982028059e-05, "loss": 1.4091, "step": 3231 }, { "epoch": 0.5822112136906102, "grad_norm": 1.3696273565292358, "learning_rate": 8.058578818170147e-05, "loss": 1.7556, "step": 3232 }, { "epoch": 0.5823913532988065, "grad_norm": 1.3635327816009521, "learning_rate": 8.057458408954532e-05, "loss": 1.8731, "step": 3233 }, { "epoch": 0.5825714929070029, "grad_norm": 1.3212649822235107, "learning_rate": 8.05633775447109e-05, "loss": 1.726, "step": 3234 }, { "epoch": 0.5827516325151992, "grad_norm": 1.2060922384262085, "learning_rate": 8.055216854809726e-05, "loss": 1.4458, "step": 3235 }, { "epoch": 0.5829317721233956, "grad_norm": 1.4185734987258911, "learning_rate": 8.054095710060351e-05, "loss": 1.4539, "step": 3236 }, { "epoch": 0.5831119117315919, "grad_norm": 1.3189666271209717, "learning_rate": 8.052974320312907e-05, "loss": 1.8016, "step": 3237 }, { "epoch": 0.5832920513397883, "grad_norm": 1.191605806350708, "learning_rate": 8.05185268565735e-05, "loss": 1.6538, "step": 3238 }, { "epoch": 0.5834721909479846, "grad_norm": 1.3001261949539185, "learning_rate": 8.050730806183657e-05, "loss": 1.5849, "step": 3239 }, { "epoch": 0.583652330556181, "grad_norm": 1.5563225746154785, "learning_rate": 8.049608681981824e-05, "loss": 1.877, "step": 3240 }, { "epoch": 0.5838324701643773, "grad_norm": 1.3988347053527832, "learning_rate": 8.048486313141868e-05, "loss": 1.7948, "step": 3241 }, { "epoch": 0.5840126097725737, "grad_norm": 1.4530854225158691, "learning_rate": 8.047363699753827e-05, "loss": 1.8618, "step": 3242 }, { "epoch": 0.5841927493807701, "grad_norm": 1.329079270362854, "learning_rate": 8.046240841907752e-05, "loss": 1.717, "step": 3243 }, { "epoch": 0.5843728889889664, "grad_norm": 1.4676613807678223, "learning_rate": 8.045117739693722e-05, "loss": 1.62, "step": 3244 }, { "epoch": 0.5845530285971628, "grad_norm": 1.367106556892395, "learning_rate": 8.043994393201828e-05, "loss": 1.5984, "step": 3245 }, { "epoch": 0.5847331682053591, "grad_norm": 1.3436119556427002, "learning_rate": 8.042870802522189e-05, "loss": 1.5044, "step": 3246 }, { "epoch": 0.5849133078135555, "grad_norm": 1.3227373361587524, "learning_rate": 8.041746967744934e-05, "loss": 1.6565, "step": 3247 }, { "epoch": 0.5850934474217518, "grad_norm": 1.3055144548416138, "learning_rate": 8.040622888960221e-05, "loss": 1.5221, "step": 3248 }, { "epoch": 0.5852735870299483, "grad_norm": 1.421113133430481, "learning_rate": 8.03949856625822e-05, "loss": 1.4749, "step": 3249 }, { "epoch": 0.5854537266381445, "grad_norm": 1.351487159729004, "learning_rate": 8.038373999729123e-05, "loss": 1.4543, "step": 3250 }, { "epoch": 0.585633866246341, "grad_norm": 1.2402830123901367, "learning_rate": 8.037249189463145e-05, "loss": 1.9116, "step": 3251 }, { "epoch": 0.5858140058545372, "grad_norm": 1.2029560804367065, "learning_rate": 8.036124135550516e-05, "loss": 1.8752, "step": 3252 }, { "epoch": 0.5859941454627337, "grad_norm": 1.6283750534057617, "learning_rate": 8.03499883808149e-05, "loss": 2.3594, "step": 3253 }, { "epoch": 0.58617428507093, "grad_norm": 1.185128092765808, "learning_rate": 8.033873297146333e-05, "loss": 1.9623, "step": 3254 }, { "epoch": 0.5863544246791264, "grad_norm": 1.2744601964950562, "learning_rate": 8.032747512835337e-05, "loss": 2.067, "step": 3255 }, { "epoch": 0.5865345642873226, "grad_norm": 1.3466190099716187, "learning_rate": 8.031621485238815e-05, "loss": 2.1384, "step": 3256 }, { "epoch": 0.586714703895519, "grad_norm": 1.4176702499389648, "learning_rate": 8.030495214447093e-05, "loss": 1.8959, "step": 3257 }, { "epoch": 0.5868948435037153, "grad_norm": 1.4075653553009033, "learning_rate": 8.029368700550522e-05, "loss": 2.0826, "step": 3258 }, { "epoch": 0.5870749831119118, "grad_norm": 1.6770472526550293, "learning_rate": 8.028241943639468e-05, "loss": 2.1306, "step": 3259 }, { "epoch": 0.587255122720108, "grad_norm": 1.672384262084961, "learning_rate": 8.027114943804322e-05, "loss": 2.3935, "step": 3260 }, { "epoch": 0.5874352623283045, "grad_norm": 1.516764760017395, "learning_rate": 8.025987701135487e-05, "loss": 1.9128, "step": 3261 }, { "epoch": 0.5876154019365007, "grad_norm": 1.1755174398422241, "learning_rate": 8.024860215723396e-05, "loss": 1.556, "step": 3262 }, { "epoch": 0.5877955415446972, "grad_norm": 1.195533275604248, "learning_rate": 8.023732487658489e-05, "loss": 1.7773, "step": 3263 }, { "epoch": 0.5879756811528934, "grad_norm": 1.2383601665496826, "learning_rate": 8.022604517031236e-05, "loss": 1.6818, "step": 3264 }, { "epoch": 0.5881558207610899, "grad_norm": 1.1856685876846313, "learning_rate": 8.021476303932121e-05, "loss": 1.4957, "step": 3265 }, { "epoch": 0.5883359603692861, "grad_norm": 1.2532113790512085, "learning_rate": 8.020347848451647e-05, "loss": 1.5602, "step": 3266 }, { "epoch": 0.5885160999774826, "grad_norm": 1.2764549255371094, "learning_rate": 8.019219150680341e-05, "loss": 1.6345, "step": 3267 }, { "epoch": 0.5886962395856788, "grad_norm": 1.1813106536865234, "learning_rate": 8.018090210708746e-05, "loss": 1.4156, "step": 3268 }, { "epoch": 0.5888763791938753, "grad_norm": 1.1520346403121948, "learning_rate": 8.016961028627423e-05, "loss": 1.4903, "step": 3269 }, { "epoch": 0.5890565188020717, "grad_norm": 1.2903872728347778, "learning_rate": 8.015831604526957e-05, "loss": 1.553, "step": 3270 }, { "epoch": 0.589236658410268, "grad_norm": 1.2544065713882446, "learning_rate": 8.014701938497947e-05, "loss": 1.6497, "step": 3271 }, { "epoch": 0.5894167980184644, "grad_norm": 1.2569499015808105, "learning_rate": 8.013572030631017e-05, "loss": 1.5701, "step": 3272 }, { "epoch": 0.5895969376266607, "grad_norm": 1.1802705526351929, "learning_rate": 8.012441881016806e-05, "loss": 1.4772, "step": 3273 }, { "epoch": 0.5897770772348571, "grad_norm": 1.3752899169921875, "learning_rate": 8.011311489745976e-05, "loss": 1.5558, "step": 3274 }, { "epoch": 0.5899572168430534, "grad_norm": 1.2956390380859375, "learning_rate": 8.010180856909204e-05, "loss": 1.4747, "step": 3275 }, { "epoch": 0.5901373564512498, "grad_norm": 1.2420748472213745, "learning_rate": 8.00904998259719e-05, "loss": 1.664, "step": 3276 }, { "epoch": 0.5903174960594461, "grad_norm": 1.2409071922302246, "learning_rate": 8.007918866900653e-05, "loss": 1.4659, "step": 3277 }, { "epoch": 0.5904976356676425, "grad_norm": 1.4191685914993286, "learning_rate": 8.006787509910328e-05, "loss": 1.7158, "step": 3278 }, { "epoch": 0.5906777752758388, "grad_norm": 1.2597142457962036, "learning_rate": 8.005655911716974e-05, "loss": 1.5931, "step": 3279 }, { "epoch": 0.5908579148840352, "grad_norm": 1.317113995552063, "learning_rate": 8.004524072411367e-05, "loss": 1.4726, "step": 3280 }, { "epoch": 0.5910380544922315, "grad_norm": 1.3195549249649048, "learning_rate": 8.003391992084302e-05, "loss": 1.7517, "step": 3281 }, { "epoch": 0.5912181941004279, "grad_norm": 1.504854440689087, "learning_rate": 8.002259670826596e-05, "loss": 1.65, "step": 3282 }, { "epoch": 0.5913983337086242, "grad_norm": 1.3624268770217896, "learning_rate": 8.001127108729079e-05, "loss": 1.7472, "step": 3283 }, { "epoch": 0.5915784733168206, "grad_norm": 1.3879119157791138, "learning_rate": 7.999994305882608e-05, "loss": 1.7861, "step": 3284 }, { "epoch": 0.5917586129250169, "grad_norm": 1.6236234903335571, "learning_rate": 7.998861262378054e-05, "loss": 1.9822, "step": 3285 }, { "epoch": 0.5919387525332133, "grad_norm": 1.463417887687683, "learning_rate": 7.997727978306313e-05, "loss": 1.8017, "step": 3286 }, { "epoch": 0.5921188921414096, "grad_norm": 1.429961919784546, "learning_rate": 7.996594453758292e-05, "loss": 1.8323, "step": 3287 }, { "epoch": 0.592299031749606, "grad_norm": 1.2735933065414429, "learning_rate": 7.995460688824924e-05, "loss": 1.6421, "step": 3288 }, { "epoch": 0.5924791713578023, "grad_norm": 1.447253942489624, "learning_rate": 7.994326683597158e-05, "loss": 1.5854, "step": 3289 }, { "epoch": 0.5926593109659987, "grad_norm": 1.3713699579238892, "learning_rate": 7.993192438165964e-05, "loss": 1.5027, "step": 3290 }, { "epoch": 0.592839450574195, "grad_norm": 1.3277535438537598, "learning_rate": 7.992057952622332e-05, "loss": 1.6642, "step": 3291 }, { "epoch": 0.5930195901823914, "grad_norm": 1.32603919506073, "learning_rate": 7.990923227057266e-05, "loss": 1.6168, "step": 3292 }, { "epoch": 0.5931997297905877, "grad_norm": 1.4450396299362183, "learning_rate": 7.989788261561797e-05, "loss": 1.5499, "step": 3293 }, { "epoch": 0.5933798693987841, "grad_norm": 1.3398592472076416, "learning_rate": 7.98865305622697e-05, "loss": 1.6352, "step": 3294 }, { "epoch": 0.5935600090069804, "grad_norm": 1.3279551267623901, "learning_rate": 7.98751761114385e-05, "loss": 1.4219, "step": 3295 }, { "epoch": 0.5937401486151768, "grad_norm": 1.3421669006347656, "learning_rate": 7.986381926403524e-05, "loss": 1.7277, "step": 3296 }, { "epoch": 0.5939202882233731, "grad_norm": 1.4003654718399048, "learning_rate": 7.985246002097092e-05, "loss": 1.6725, "step": 3297 }, { "epoch": 0.5941004278315695, "grad_norm": 1.4394348859786987, "learning_rate": 7.984109838315682e-05, "loss": 1.6146, "step": 3298 }, { "epoch": 0.5942805674397659, "grad_norm": 1.2530721426010132, "learning_rate": 7.982973435150433e-05, "loss": 1.4136, "step": 3299 }, { "epoch": 0.5944607070479622, "grad_norm": 1.3988275527954102, "learning_rate": 7.981836792692508e-05, "loss": 1.3519, "step": 3300 }, { "epoch": 0.5946408466561586, "grad_norm": 1.0750325918197632, "learning_rate": 7.980699911033087e-05, "loss": 2.0638, "step": 3301 }, { "epoch": 0.5948209862643549, "grad_norm": 1.0915641784667969, "learning_rate": 7.979562790263373e-05, "loss": 1.9258, "step": 3302 }, { "epoch": 0.5950011258725513, "grad_norm": 1.232743501663208, "learning_rate": 7.978425430474581e-05, "loss": 2.2529, "step": 3303 }, { "epoch": 0.5951812654807476, "grad_norm": 1.2405080795288086, "learning_rate": 7.977287831757954e-05, "loss": 2.0744, "step": 3304 }, { "epoch": 0.595361405088944, "grad_norm": 1.2133256196975708, "learning_rate": 7.976149994204747e-05, "loss": 1.8397, "step": 3305 }, { "epoch": 0.5955415446971403, "grad_norm": 1.2922027111053467, "learning_rate": 7.975011917906236e-05, "loss": 2.122, "step": 3306 }, { "epoch": 0.5957216843053367, "grad_norm": 1.2730096578598022, "learning_rate": 7.973873602953719e-05, "loss": 1.9076, "step": 3307 }, { "epoch": 0.595901823913533, "grad_norm": 1.3810216188430786, "learning_rate": 7.972735049438509e-05, "loss": 1.6815, "step": 3308 }, { "epoch": 0.5960819635217294, "grad_norm": 1.4814426898956299, "learning_rate": 7.97159625745194e-05, "loss": 2.0047, "step": 3309 }, { "epoch": 0.5962621031299257, "grad_norm": 1.6553380489349365, "learning_rate": 7.97045722708537e-05, "loss": 2.2769, "step": 3310 }, { "epoch": 0.5964422427381221, "grad_norm": 1.4031059741973877, "learning_rate": 7.969317958430166e-05, "loss": 2.0157, "step": 3311 }, { "epoch": 0.5966223823463184, "grad_norm": 1.3077967166900635, "learning_rate": 7.968178451577724e-05, "loss": 1.7834, "step": 3312 }, { "epoch": 0.5968025219545148, "grad_norm": 1.2406649589538574, "learning_rate": 7.96703870661945e-05, "loss": 1.558, "step": 3313 }, { "epoch": 0.5969826615627111, "grad_norm": 1.2428460121154785, "learning_rate": 7.965898723646776e-05, "loss": 1.5469, "step": 3314 }, { "epoch": 0.5971628011709075, "grad_norm": 1.2896435260772705, "learning_rate": 7.964758502751153e-05, "loss": 1.4554, "step": 3315 }, { "epoch": 0.5973429407791038, "grad_norm": 1.2954893112182617, "learning_rate": 7.963618044024046e-05, "loss": 1.5566, "step": 3316 }, { "epoch": 0.5975230803873002, "grad_norm": 1.3373665809631348, "learning_rate": 7.962477347556941e-05, "loss": 1.7136, "step": 3317 }, { "epoch": 0.5977032199954965, "grad_norm": 1.358614206314087, "learning_rate": 7.96133641344135e-05, "loss": 1.5006, "step": 3318 }, { "epoch": 0.5978833596036929, "grad_norm": 1.3425312042236328, "learning_rate": 7.960195241768791e-05, "loss": 1.6973, "step": 3319 }, { "epoch": 0.5980634992118892, "grad_norm": 1.2611958980560303, "learning_rate": 7.959053832630813e-05, "loss": 1.5634, "step": 3320 }, { "epoch": 0.5982436388200856, "grad_norm": 1.2140748500823975, "learning_rate": 7.957912186118979e-05, "loss": 1.2989, "step": 3321 }, { "epoch": 0.5984237784282819, "grad_norm": 1.2744649648666382, "learning_rate": 7.956770302324867e-05, "loss": 1.5584, "step": 3322 }, { "epoch": 0.5986039180364783, "grad_norm": 1.2696504592895508, "learning_rate": 7.955628181340084e-05, "loss": 1.6822, "step": 3323 }, { "epoch": 0.5987840576446746, "grad_norm": 1.260656714439392, "learning_rate": 7.954485823256247e-05, "loss": 1.5254, "step": 3324 }, { "epoch": 0.598964197252871, "grad_norm": 1.2681546211242676, "learning_rate": 7.953343228164998e-05, "loss": 1.6567, "step": 3325 }, { "epoch": 0.5991443368610673, "grad_norm": 1.3946624994277954, "learning_rate": 7.952200396157991e-05, "loss": 1.803, "step": 3326 }, { "epoch": 0.5993244764692637, "grad_norm": 1.3119937181472778, "learning_rate": 7.951057327326908e-05, "loss": 1.772, "step": 3327 }, { "epoch": 0.5995046160774601, "grad_norm": 1.2493733167648315, "learning_rate": 7.949914021763444e-05, "loss": 1.5276, "step": 3328 }, { "epoch": 0.5996847556856564, "grad_norm": 1.3633266687393188, "learning_rate": 7.948770479559314e-05, "loss": 1.6384, "step": 3329 }, { "epoch": 0.5998648952938528, "grad_norm": 1.3602796792984009, "learning_rate": 7.94762670080625e-05, "loss": 1.6157, "step": 3330 }, { "epoch": 0.6000450349020491, "grad_norm": 1.277429461479187, "learning_rate": 7.946482685596011e-05, "loss": 1.4277, "step": 3331 }, { "epoch": 0.6002251745102455, "grad_norm": 1.2905783653259277, "learning_rate": 7.945338434020364e-05, "loss": 1.5763, "step": 3332 }, { "epoch": 0.6004053141184418, "grad_norm": 1.4707800149917603, "learning_rate": 7.944193946171103e-05, "loss": 1.8454, "step": 3333 }, { "epoch": 0.6005854537266382, "grad_norm": 1.331000804901123, "learning_rate": 7.943049222140037e-05, "loss": 1.6466, "step": 3334 }, { "epoch": 0.6007655933348345, "grad_norm": 1.3378723859786987, "learning_rate": 7.941904262018998e-05, "loss": 1.6085, "step": 3335 }, { "epoch": 0.6009457329430309, "grad_norm": 1.4617573022842407, "learning_rate": 7.94075906589983e-05, "loss": 1.7044, "step": 3336 }, { "epoch": 0.6011258725512272, "grad_norm": 1.3141578435897827, "learning_rate": 7.939613633874403e-05, "loss": 1.598, "step": 3337 }, { "epoch": 0.6013060121594236, "grad_norm": 1.4842551946640015, "learning_rate": 7.938467966034601e-05, "loss": 1.8941, "step": 3338 }, { "epoch": 0.6014861517676199, "grad_norm": 1.3281524181365967, "learning_rate": 7.937322062472333e-05, "loss": 1.567, "step": 3339 }, { "epoch": 0.6016662913758163, "grad_norm": 1.2148057222366333, "learning_rate": 7.936175923279519e-05, "loss": 1.4619, "step": 3340 }, { "epoch": 0.6018464309840126, "grad_norm": 1.467674732208252, "learning_rate": 7.935029548548099e-05, "loss": 1.7453, "step": 3341 }, { "epoch": 0.602026570592209, "grad_norm": 1.2933822870254517, "learning_rate": 7.933882938370042e-05, "loss": 1.4134, "step": 3342 }, { "epoch": 0.6022067102004053, "grad_norm": 1.387030839920044, "learning_rate": 7.932736092837323e-05, "loss": 1.4154, "step": 3343 }, { "epoch": 0.6023868498086017, "grad_norm": 1.3232898712158203, "learning_rate": 7.931589012041944e-05, "loss": 1.5633, "step": 3344 }, { "epoch": 0.602566989416798, "grad_norm": 1.2709810733795166, "learning_rate": 7.930441696075921e-05, "loss": 1.4484, "step": 3345 }, { "epoch": 0.6027471290249944, "grad_norm": 1.5326019525527954, "learning_rate": 7.929294145031294e-05, "loss": 1.8422, "step": 3346 }, { "epoch": 0.6029272686331907, "grad_norm": 1.559929370880127, "learning_rate": 7.928146359000117e-05, "loss": 1.6572, "step": 3347 }, { "epoch": 0.6031074082413871, "grad_norm": 1.3563951253890991, "learning_rate": 7.926998338074465e-05, "loss": 1.3689, "step": 3348 }, { "epoch": 0.6032875478495834, "grad_norm": 1.3260586261749268, "learning_rate": 7.925850082346433e-05, "loss": 1.4621, "step": 3349 }, { "epoch": 0.6034676874577798, "grad_norm": 1.1549409627914429, "learning_rate": 7.924701591908132e-05, "loss": 1.1978, "step": 3350 }, { "epoch": 0.6036478270659761, "grad_norm": 1.2072349786758423, "learning_rate": 7.923552866851693e-05, "loss": 1.7823, "step": 3351 }, { "epoch": 0.6038279666741725, "grad_norm": 1.190442442893982, "learning_rate": 7.922403907269267e-05, "loss": 1.893, "step": 3352 }, { "epoch": 0.6040081062823688, "grad_norm": 1.2276901006698608, "learning_rate": 7.921254713253024e-05, "loss": 1.9304, "step": 3353 }, { "epoch": 0.6041882458905652, "grad_norm": 1.2261402606964111, "learning_rate": 7.920105284895152e-05, "loss": 2.0447, "step": 3354 }, { "epoch": 0.6043683854987616, "grad_norm": 1.3613996505737305, "learning_rate": 7.918955622287854e-05, "loss": 1.9871, "step": 3355 }, { "epoch": 0.6045485251069579, "grad_norm": 1.402151346206665, "learning_rate": 7.91780572552336e-05, "loss": 2.2685, "step": 3356 }, { "epoch": 0.6047286647151543, "grad_norm": 1.4284284114837646, "learning_rate": 7.916655594693909e-05, "loss": 1.8568, "step": 3357 }, { "epoch": 0.6049088043233506, "grad_norm": 1.518607258796692, "learning_rate": 7.915505229891769e-05, "loss": 2.0351, "step": 3358 }, { "epoch": 0.605088943931547, "grad_norm": 1.6458548307418823, "learning_rate": 7.914354631209219e-05, "loss": 1.9328, "step": 3359 }, { "epoch": 0.6052690835397433, "grad_norm": 1.7217801809310913, "learning_rate": 7.91320379873856e-05, "loss": 2.081, "step": 3360 }, { "epoch": 0.6054492231479397, "grad_norm": 1.547681450843811, "learning_rate": 7.912052732572111e-05, "loss": 1.6799, "step": 3361 }, { "epoch": 0.605629362756136, "grad_norm": 1.3306622505187988, "learning_rate": 7.910901432802212e-05, "loss": 1.63, "step": 3362 }, { "epoch": 0.6058095023643324, "grad_norm": 1.4279226064682007, "learning_rate": 7.909749899521215e-05, "loss": 1.6464, "step": 3363 }, { "epoch": 0.6059896419725287, "grad_norm": 1.4367555379867554, "learning_rate": 7.9085981328215e-05, "loss": 1.7141, "step": 3364 }, { "epoch": 0.6061697815807251, "grad_norm": 1.1761301755905151, "learning_rate": 7.907446132795459e-05, "loss": 1.4359, "step": 3365 }, { "epoch": 0.6063499211889214, "grad_norm": 1.3685128688812256, "learning_rate": 7.906293899535505e-05, "loss": 1.5781, "step": 3366 }, { "epoch": 0.6065300607971178, "grad_norm": 1.2375048398971558, "learning_rate": 7.905141433134071e-05, "loss": 1.4272, "step": 3367 }, { "epoch": 0.6067102004053141, "grad_norm": 1.3058501482009888, "learning_rate": 7.903988733683606e-05, "loss": 1.582, "step": 3368 }, { "epoch": 0.6068903400135105, "grad_norm": 1.3604326248168945, "learning_rate": 7.902835801276578e-05, "loss": 1.8541, "step": 3369 }, { "epoch": 0.6070704796217068, "grad_norm": 1.1521631479263306, "learning_rate": 7.901682636005478e-05, "loss": 1.485, "step": 3370 }, { "epoch": 0.6072506192299032, "grad_norm": 1.3001232147216797, "learning_rate": 7.90052923796281e-05, "loss": 1.6517, "step": 3371 }, { "epoch": 0.6074307588380995, "grad_norm": 1.4358909130096436, "learning_rate": 7.899375607241097e-05, "loss": 1.7774, "step": 3372 }, { "epoch": 0.6076108984462959, "grad_norm": 1.2707207202911377, "learning_rate": 7.898221743932888e-05, "loss": 1.6063, "step": 3373 }, { "epoch": 0.6077910380544922, "grad_norm": 1.3460183143615723, "learning_rate": 7.89706764813074e-05, "loss": 1.6385, "step": 3374 }, { "epoch": 0.6079711776626886, "grad_norm": 1.3746737241744995, "learning_rate": 7.895913319927239e-05, "loss": 1.9249, "step": 3375 }, { "epoch": 0.6081513172708849, "grad_norm": 1.3329097032546997, "learning_rate": 7.89475875941498e-05, "loss": 1.6834, "step": 3376 }, { "epoch": 0.6083314568790813, "grad_norm": 1.2083162069320679, "learning_rate": 7.893603966686584e-05, "loss": 1.4734, "step": 3377 }, { "epoch": 0.6085115964872776, "grad_norm": 1.1004382371902466, "learning_rate": 7.892448941834687e-05, "loss": 1.4186, "step": 3378 }, { "epoch": 0.608691736095474, "grad_norm": 1.3928534984588623, "learning_rate": 7.891293684951945e-05, "loss": 1.7284, "step": 3379 }, { "epoch": 0.6088718757036703, "grad_norm": 1.2312918901443481, "learning_rate": 7.890138196131031e-05, "loss": 1.6094, "step": 3380 }, { "epoch": 0.6090520153118667, "grad_norm": 1.2159086465835571, "learning_rate": 7.88898247546464e-05, "loss": 1.5268, "step": 3381 }, { "epoch": 0.609232154920063, "grad_norm": 1.2257739305496216, "learning_rate": 7.887826523045481e-05, "loss": 1.665, "step": 3382 }, { "epoch": 0.6094122945282594, "grad_norm": 1.3303282260894775, "learning_rate": 7.886670338966284e-05, "loss": 1.5839, "step": 3383 }, { "epoch": 0.6095924341364558, "grad_norm": 1.3492027521133423, "learning_rate": 7.8855139233198e-05, "loss": 1.6221, "step": 3384 }, { "epoch": 0.6097725737446521, "grad_norm": 1.1684311628341675, "learning_rate": 7.884357276198795e-05, "loss": 1.2783, "step": 3385 }, { "epoch": 0.6099527133528485, "grad_norm": 1.3742403984069824, "learning_rate": 7.883200397696053e-05, "loss": 1.4889, "step": 3386 }, { "epoch": 0.6101328529610448, "grad_norm": 1.3382236957550049, "learning_rate": 7.882043287904379e-05, "loss": 1.6217, "step": 3387 }, { "epoch": 0.6103129925692412, "grad_norm": 1.3607343435287476, "learning_rate": 7.880885946916598e-05, "loss": 1.4183, "step": 3388 }, { "epoch": 0.6104931321774375, "grad_norm": 1.3939534425735474, "learning_rate": 7.879728374825551e-05, "loss": 1.5689, "step": 3389 }, { "epoch": 0.6106732717856339, "grad_norm": 1.3255269527435303, "learning_rate": 7.878570571724092e-05, "loss": 1.7329, "step": 3390 }, { "epoch": 0.6108534113938302, "grad_norm": 1.2737993001937866, "learning_rate": 7.877412537705107e-05, "loss": 1.7746, "step": 3391 }, { "epoch": 0.6110335510020266, "grad_norm": 1.463544487953186, "learning_rate": 7.876254272861487e-05, "loss": 1.7846, "step": 3392 }, { "epoch": 0.6112136906102229, "grad_norm": 1.3744786977767944, "learning_rate": 7.875095777286153e-05, "loss": 1.5589, "step": 3393 }, { "epoch": 0.6113938302184193, "grad_norm": 1.3302916288375854, "learning_rate": 7.873937051072035e-05, "loss": 1.7601, "step": 3394 }, { "epoch": 0.6115739698266156, "grad_norm": 1.4363634586334229, "learning_rate": 7.872778094312086e-05, "loss": 1.6493, "step": 3395 }, { "epoch": 0.611754109434812, "grad_norm": 1.3024040460586548, "learning_rate": 7.871618907099279e-05, "loss": 1.6299, "step": 3396 }, { "epoch": 0.6119342490430083, "grad_norm": 1.340654730796814, "learning_rate": 7.870459489526602e-05, "loss": 1.477, "step": 3397 }, { "epoch": 0.6121143886512047, "grad_norm": 1.3368935585021973, "learning_rate": 7.869299841687061e-05, "loss": 1.2291, "step": 3398 }, { "epoch": 0.612294528259401, "grad_norm": 1.4286428689956665, "learning_rate": 7.868139963673686e-05, "loss": 1.5098, "step": 3399 }, { "epoch": 0.6124746678675974, "grad_norm": 1.2529758214950562, "learning_rate": 7.86697985557952e-05, "loss": 1.5391, "step": 3400 }, { "epoch": 0.6126548074757937, "grad_norm": 1.1250114440917969, "learning_rate": 7.865819517497626e-05, "loss": 2.1686, "step": 3401 }, { "epoch": 0.6128349470839901, "grad_norm": 1.1796998977661133, "learning_rate": 7.864658949521087e-05, "loss": 1.8919, "step": 3402 }, { "epoch": 0.6130150866921864, "grad_norm": 1.1717329025268555, "learning_rate": 7.863498151743e-05, "loss": 1.9769, "step": 3403 }, { "epoch": 0.6131952263003828, "grad_norm": 1.2340620756149292, "learning_rate": 7.862337124256487e-05, "loss": 1.8723, "step": 3404 }, { "epoch": 0.6133753659085791, "grad_norm": 1.4481773376464844, "learning_rate": 7.861175867154686e-05, "loss": 2.3697, "step": 3405 }, { "epoch": 0.6135555055167755, "grad_norm": 1.1795804500579834, "learning_rate": 7.860014380530749e-05, "loss": 1.8402, "step": 3406 }, { "epoch": 0.6137356451249718, "grad_norm": 1.2762295007705688, "learning_rate": 7.858852664477851e-05, "loss": 1.7181, "step": 3407 }, { "epoch": 0.6139157847331682, "grad_norm": 1.367079257965088, "learning_rate": 7.857690719089185e-05, "loss": 2.0054, "step": 3408 }, { "epoch": 0.6140959243413645, "grad_norm": 1.3670467138290405, "learning_rate": 7.856528544457964e-05, "loss": 1.8649, "step": 3409 }, { "epoch": 0.6142760639495609, "grad_norm": 1.7702361345291138, "learning_rate": 7.855366140677412e-05, "loss": 2.2306, "step": 3410 }, { "epoch": 0.6144562035577572, "grad_norm": 1.5775545835494995, "learning_rate": 7.854203507840778e-05, "loss": 2.0586, "step": 3411 }, { "epoch": 0.6146363431659536, "grad_norm": 1.212159514427185, "learning_rate": 7.853040646041332e-05, "loss": 1.6627, "step": 3412 }, { "epoch": 0.61481648277415, "grad_norm": 1.323357343673706, "learning_rate": 7.851877555372353e-05, "loss": 1.7685, "step": 3413 }, { "epoch": 0.6149966223823463, "grad_norm": 1.1844624280929565, "learning_rate": 7.850714235927145e-05, "loss": 1.5377, "step": 3414 }, { "epoch": 0.6151767619905427, "grad_norm": 1.4404406547546387, "learning_rate": 7.84955068779903e-05, "loss": 1.5404, "step": 3415 }, { "epoch": 0.615356901598739, "grad_norm": 1.3945233821868896, "learning_rate": 7.84838691108135e-05, "loss": 2.0254, "step": 3416 }, { "epoch": 0.6155370412069354, "grad_norm": 1.2886253595352173, "learning_rate": 7.847222905867455e-05, "loss": 1.583, "step": 3417 }, { "epoch": 0.6157171808151317, "grad_norm": 1.2599767446517944, "learning_rate": 7.846058672250727e-05, "loss": 1.5903, "step": 3418 }, { "epoch": 0.6158973204233281, "grad_norm": 1.1815823316574097, "learning_rate": 7.844894210324559e-05, "loss": 1.3637, "step": 3419 }, { "epoch": 0.6160774600315244, "grad_norm": 1.2556649446487427, "learning_rate": 7.843729520182362e-05, "loss": 1.6547, "step": 3420 }, { "epoch": 0.6162575996397208, "grad_norm": 1.3473434448242188, "learning_rate": 7.842564601917569e-05, "loss": 1.4851, "step": 3421 }, { "epoch": 0.6164377392479171, "grad_norm": 1.2386751174926758, "learning_rate": 7.841399455623629e-05, "loss": 1.5664, "step": 3422 }, { "epoch": 0.6166178788561135, "grad_norm": 1.5451143980026245, "learning_rate": 7.840234081394007e-05, "loss": 1.7077, "step": 3423 }, { "epoch": 0.6167980184643098, "grad_norm": 1.1659245491027832, "learning_rate": 7.83906847932219e-05, "loss": 1.5656, "step": 3424 }, { "epoch": 0.6169781580725062, "grad_norm": 1.2365696430206299, "learning_rate": 7.837902649501684e-05, "loss": 1.6032, "step": 3425 }, { "epoch": 0.6171582976807025, "grad_norm": 1.2708405256271362, "learning_rate": 7.836736592026009e-05, "loss": 1.543, "step": 3426 }, { "epoch": 0.6173384372888989, "grad_norm": 1.3413358926773071, "learning_rate": 7.835570306988706e-05, "loss": 1.6978, "step": 3427 }, { "epoch": 0.6175185768970952, "grad_norm": 1.3258224725723267, "learning_rate": 7.834403794483336e-05, "loss": 1.5797, "step": 3428 }, { "epoch": 0.6176987165052916, "grad_norm": 1.3221250772476196, "learning_rate": 7.833237054603473e-05, "loss": 1.5992, "step": 3429 }, { "epoch": 0.6178788561134879, "grad_norm": 1.235971450805664, "learning_rate": 7.832070087442714e-05, "loss": 1.3491, "step": 3430 }, { "epoch": 0.6180589957216843, "grad_norm": 1.4612895250320435, "learning_rate": 7.83090289309467e-05, "loss": 1.6087, "step": 3431 }, { "epoch": 0.6182391353298806, "grad_norm": 1.3738644123077393, "learning_rate": 7.829735471652978e-05, "loss": 1.5013, "step": 3432 }, { "epoch": 0.618419274938077, "grad_norm": 1.282684087753296, "learning_rate": 7.828567823211282e-05, "loss": 1.6167, "step": 3433 }, { "epoch": 0.6185994145462733, "grad_norm": 1.2626640796661377, "learning_rate": 7.827399947863254e-05, "loss": 1.4793, "step": 3434 }, { "epoch": 0.6187795541544697, "grad_norm": 1.330369234085083, "learning_rate": 7.826231845702578e-05, "loss": 1.5556, "step": 3435 }, { "epoch": 0.618959693762666, "grad_norm": 1.2297042608261108, "learning_rate": 7.825063516822961e-05, "loss": 1.4004, "step": 3436 }, { "epoch": 0.6191398333708624, "grad_norm": 1.3232755661010742, "learning_rate": 7.823894961318125e-05, "loss": 1.5041, "step": 3437 }, { "epoch": 0.6193199729790587, "grad_norm": 1.2605990171432495, "learning_rate": 7.822726179281812e-05, "loss": 1.3582, "step": 3438 }, { "epoch": 0.6195001125872551, "grad_norm": 1.4318681955337524, "learning_rate": 7.821557170807778e-05, "loss": 1.8279, "step": 3439 }, { "epoch": 0.6196802521954514, "grad_norm": 1.4431785345077515, "learning_rate": 7.820387935989802e-05, "loss": 1.9802, "step": 3440 }, { "epoch": 0.6198603918036478, "grad_norm": 1.3473280668258667, "learning_rate": 7.81921847492168e-05, "loss": 1.4405, "step": 3441 }, { "epoch": 0.6200405314118442, "grad_norm": 1.4384108781814575, "learning_rate": 7.818048787697225e-05, "loss": 1.8097, "step": 3442 }, { "epoch": 0.6202206710200405, "grad_norm": 1.236392617225647, "learning_rate": 7.816878874410269e-05, "loss": 1.5365, "step": 3443 }, { "epoch": 0.6204008106282369, "grad_norm": 1.3704609870910645, "learning_rate": 7.815708735154663e-05, "loss": 1.5112, "step": 3444 }, { "epoch": 0.6205809502364332, "grad_norm": 1.439429759979248, "learning_rate": 7.814538370024271e-05, "loss": 1.9872, "step": 3445 }, { "epoch": 0.6207610898446296, "grad_norm": 1.3481985330581665, "learning_rate": 7.813367779112984e-05, "loss": 1.5367, "step": 3446 }, { "epoch": 0.6209412294528259, "grad_norm": 1.5427281856536865, "learning_rate": 7.812196962514704e-05, "loss": 1.6443, "step": 3447 }, { "epoch": 0.6211213690610223, "grad_norm": 1.3741356134414673, "learning_rate": 7.811025920323354e-05, "loss": 1.6081, "step": 3448 }, { "epoch": 0.6213015086692186, "grad_norm": 1.262168049812317, "learning_rate": 7.809854652632872e-05, "loss": 1.3483, "step": 3449 }, { "epoch": 0.621481648277415, "grad_norm": 1.1154948472976685, "learning_rate": 7.808683159537219e-05, "loss": 1.1129, "step": 3450 }, { "epoch": 0.6216617878856113, "grad_norm": 1.092190146446228, "learning_rate": 7.807511441130371e-05, "loss": 1.8382, "step": 3451 }, { "epoch": 0.6218419274938077, "grad_norm": 1.0987615585327148, "learning_rate": 7.806339497506324e-05, "loss": 1.7497, "step": 3452 }, { "epoch": 0.622022067102004, "grad_norm": 1.2533376216888428, "learning_rate": 7.805167328759087e-05, "loss": 2.0765, "step": 3453 }, { "epoch": 0.6222022067102004, "grad_norm": 2.7416746616363525, "learning_rate": 7.803994934982696e-05, "loss": 2.1189, "step": 3454 }, { "epoch": 0.6223823463183967, "grad_norm": 1.2867873907089233, "learning_rate": 7.802822316271195e-05, "loss": 2.0103, "step": 3455 }, { "epoch": 0.6225624859265931, "grad_norm": 1.3870503902435303, "learning_rate": 7.801649472718652e-05, "loss": 1.8653, "step": 3456 }, { "epoch": 0.6227426255347894, "grad_norm": 1.4988107681274414, "learning_rate": 7.800476404419156e-05, "loss": 2.1285, "step": 3457 }, { "epoch": 0.6229227651429858, "grad_norm": 1.4061899185180664, "learning_rate": 7.799303111466806e-05, "loss": 1.7172, "step": 3458 }, { "epoch": 0.6231029047511821, "grad_norm": 1.9066200256347656, "learning_rate": 7.798129593955723e-05, "loss": 2.2153, "step": 3459 }, { "epoch": 0.6232830443593785, "grad_norm": 1.7115209102630615, "learning_rate": 7.796955851980048e-05, "loss": 2.2916, "step": 3460 }, { "epoch": 0.6234631839675748, "grad_norm": 1.2237215042114258, "learning_rate": 7.795781885633936e-05, "loss": 1.7763, "step": 3461 }, { "epoch": 0.6236433235757712, "grad_norm": 1.2384086847305298, "learning_rate": 7.794607695011562e-05, "loss": 1.4757, "step": 3462 }, { "epoch": 0.6238234631839675, "grad_norm": 1.2109719514846802, "learning_rate": 7.793433280207122e-05, "loss": 1.5239, "step": 3463 }, { "epoch": 0.6240036027921639, "grad_norm": 1.2383770942687988, "learning_rate": 7.792258641314824e-05, "loss": 1.5054, "step": 3464 }, { "epoch": 0.6241837424003602, "grad_norm": 1.2524224519729614, "learning_rate": 7.791083778428898e-05, "loss": 1.5824, "step": 3465 }, { "epoch": 0.6243638820085566, "grad_norm": 1.2961297035217285, "learning_rate": 7.78990869164359e-05, "loss": 1.5858, "step": 3466 }, { "epoch": 0.6245440216167529, "grad_norm": 1.2047690153121948, "learning_rate": 7.788733381053166e-05, "loss": 1.6125, "step": 3467 }, { "epoch": 0.6247241612249493, "grad_norm": 1.3629313707351685, "learning_rate": 7.78755784675191e-05, "loss": 1.5742, "step": 3468 }, { "epoch": 0.6249043008331457, "grad_norm": 1.32099187374115, "learning_rate": 7.786382088834119e-05, "loss": 1.6325, "step": 3469 }, { "epoch": 0.625084440441342, "grad_norm": 1.3721431493759155, "learning_rate": 7.785206107394116e-05, "loss": 1.6982, "step": 3470 }, { "epoch": 0.6252645800495384, "grad_norm": 1.186661720275879, "learning_rate": 7.784029902526233e-05, "loss": 1.3455, "step": 3471 }, { "epoch": 0.6254447196577347, "grad_norm": 1.2110214233398438, "learning_rate": 7.78285347432483e-05, "loss": 1.385, "step": 3472 }, { "epoch": 0.6256248592659311, "grad_norm": 1.2676270008087158, "learning_rate": 7.781676822884277e-05, "loss": 1.507, "step": 3473 }, { "epoch": 0.6258049988741274, "grad_norm": 1.1804970502853394, "learning_rate": 7.780499948298962e-05, "loss": 1.47, "step": 3474 }, { "epoch": 0.6259851384823238, "grad_norm": 1.3457781076431274, "learning_rate": 7.779322850663296e-05, "loss": 1.8081, "step": 3475 }, { "epoch": 0.6261652780905201, "grad_norm": 1.367093801498413, "learning_rate": 7.778145530071706e-05, "loss": 1.6013, "step": 3476 }, { "epoch": 0.6263454176987165, "grad_norm": 1.2647483348846436, "learning_rate": 7.776967986618633e-05, "loss": 1.7504, "step": 3477 }, { "epoch": 0.6265255573069128, "grad_norm": 1.2595716714859009, "learning_rate": 7.775790220398542e-05, "loss": 1.6164, "step": 3478 }, { "epoch": 0.6267056969151092, "grad_norm": 1.2980504035949707, "learning_rate": 7.77461223150591e-05, "loss": 1.6195, "step": 3479 }, { "epoch": 0.6268858365233055, "grad_norm": 1.312412977218628, "learning_rate": 7.773434020035238e-05, "loss": 1.7071, "step": 3480 }, { "epoch": 0.627065976131502, "grad_norm": 1.3567476272583008, "learning_rate": 7.77225558608104e-05, "loss": 1.3982, "step": 3481 }, { "epoch": 0.6272461157396982, "grad_norm": 1.2952780723571777, "learning_rate": 7.771076929737849e-05, "loss": 1.536, "step": 3482 }, { "epoch": 0.6274262553478946, "grad_norm": 1.4046076536178589, "learning_rate": 7.769898051100214e-05, "loss": 1.832, "step": 3483 }, { "epoch": 0.6276063949560909, "grad_norm": 1.4581197500228882, "learning_rate": 7.76871895026271e-05, "loss": 1.8457, "step": 3484 }, { "epoch": 0.6277865345642873, "grad_norm": 1.439780354499817, "learning_rate": 7.767539627319917e-05, "loss": 1.7358, "step": 3485 }, { "epoch": 0.6279666741724836, "grad_norm": 1.2732691764831543, "learning_rate": 7.766360082366447e-05, "loss": 1.6066, "step": 3486 }, { "epoch": 0.62814681378068, "grad_norm": 1.406550407409668, "learning_rate": 7.765180315496916e-05, "loss": 1.8004, "step": 3487 }, { "epoch": 0.6283269533888763, "grad_norm": 1.4022067785263062, "learning_rate": 7.764000326805967e-05, "loss": 1.6685, "step": 3488 }, { "epoch": 0.6285070929970727, "grad_norm": 1.453587532043457, "learning_rate": 7.76282011638826e-05, "loss": 1.6558, "step": 3489 }, { "epoch": 0.628687232605269, "grad_norm": 1.4871257543563843, "learning_rate": 7.761639684338466e-05, "loss": 1.5302, "step": 3490 }, { "epoch": 0.6288673722134654, "grad_norm": 1.3871734142303467, "learning_rate": 7.760459030751284e-05, "loss": 1.6276, "step": 3491 }, { "epoch": 0.6290475118216617, "grad_norm": 1.40336275100708, "learning_rate": 7.759278155721422e-05, "loss": 1.727, "step": 3492 }, { "epoch": 0.6292276514298581, "grad_norm": 1.3767496347427368, "learning_rate": 7.758097059343611e-05, "loss": 1.7256, "step": 3493 }, { "epoch": 0.6294077910380544, "grad_norm": 1.3373011350631714, "learning_rate": 7.756915741712597e-05, "loss": 1.5523, "step": 3494 }, { "epoch": 0.6295879306462508, "grad_norm": 1.527956247329712, "learning_rate": 7.755734202923145e-05, "loss": 1.8352, "step": 3495 }, { "epoch": 0.6297680702544471, "grad_norm": 1.320296287536621, "learning_rate": 7.754552443070038e-05, "loss": 1.3598, "step": 3496 }, { "epoch": 0.6299482098626435, "grad_norm": 1.4245721101760864, "learning_rate": 7.753370462248076e-05, "loss": 1.6712, "step": 3497 }, { "epoch": 0.63012834947084, "grad_norm": 1.3571979999542236, "learning_rate": 7.752188260552075e-05, "loss": 1.4392, "step": 3498 }, { "epoch": 0.6303084890790362, "grad_norm": 1.582578182220459, "learning_rate": 7.751005838076872e-05, "loss": 1.7333, "step": 3499 }, { "epoch": 0.6304886286872327, "grad_norm": 1.4914798736572266, "learning_rate": 7.749823194917323e-05, "loss": 1.4464, "step": 3500 }, { "epoch": 0.630668768295429, "grad_norm": 1.2188926935195923, "learning_rate": 7.748640331168296e-05, "loss": 1.9885, "step": 3501 }, { "epoch": 0.6308489079036254, "grad_norm": 1.196022629737854, "learning_rate": 7.74745724692468e-05, "loss": 1.9895, "step": 3502 }, { "epoch": 0.6310290475118217, "grad_norm": 1.2456132173538208, "learning_rate": 7.746273942281383e-05, "loss": 1.9596, "step": 3503 }, { "epoch": 0.6312091871200181, "grad_norm": 1.303802728652954, "learning_rate": 7.745090417333325e-05, "loss": 2.1779, "step": 3504 }, { "epoch": 0.6313893267282144, "grad_norm": 1.2232509851455688, "learning_rate": 7.743906672175453e-05, "loss": 1.765, "step": 3505 }, { "epoch": 0.6315694663364108, "grad_norm": 1.3437637090682983, "learning_rate": 7.742722706902724e-05, "loss": 2.0879, "step": 3506 }, { "epoch": 0.631749605944607, "grad_norm": 1.3863576650619507, "learning_rate": 7.741538521610115e-05, "loss": 1.6599, "step": 3507 }, { "epoch": 0.6319297455528035, "grad_norm": 1.4730451107025146, "learning_rate": 7.740354116392622e-05, "loss": 1.9449, "step": 3508 }, { "epoch": 0.6321098851609998, "grad_norm": 1.678127408027649, "learning_rate": 7.739169491345256e-05, "loss": 2.3293, "step": 3509 }, { "epoch": 0.6322900247691962, "grad_norm": 1.487926721572876, "learning_rate": 7.737984646563048e-05, "loss": 1.6924, "step": 3510 }, { "epoch": 0.6324701643773925, "grad_norm": 1.4182573556900024, "learning_rate": 7.736799582141046e-05, "loss": 2.035, "step": 3511 }, { "epoch": 0.6326503039855889, "grad_norm": 1.2465749979019165, "learning_rate": 7.735614298174313e-05, "loss": 1.8594, "step": 3512 }, { "epoch": 0.6328304435937852, "grad_norm": 1.2813730239868164, "learning_rate": 7.734428794757932e-05, "loss": 1.3947, "step": 3513 }, { "epoch": 0.6330105832019816, "grad_norm": 1.1685377359390259, "learning_rate": 7.733243071987009e-05, "loss": 1.3803, "step": 3514 }, { "epoch": 0.6331907228101779, "grad_norm": 1.2509543895721436, "learning_rate": 7.732057129956656e-05, "loss": 1.616, "step": 3515 }, { "epoch": 0.6333708624183743, "grad_norm": 1.3205718994140625, "learning_rate": 7.730870968762011e-05, "loss": 1.7883, "step": 3516 }, { "epoch": 0.6335510020265706, "grad_norm": 1.3118411302566528, "learning_rate": 7.729684588498226e-05, "loss": 1.6799, "step": 3517 }, { "epoch": 0.633731141634767, "grad_norm": 1.2695672512054443, "learning_rate": 7.728497989260473e-05, "loss": 1.4795, "step": 3518 }, { "epoch": 0.6339112812429633, "grad_norm": 1.3269842863082886, "learning_rate": 7.727311171143943e-05, "loss": 1.6241, "step": 3519 }, { "epoch": 0.6340914208511597, "grad_norm": 1.3300689458847046, "learning_rate": 7.726124134243836e-05, "loss": 1.4836, "step": 3520 }, { "epoch": 0.634271560459356, "grad_norm": 1.1789056062698364, "learning_rate": 7.72493687865538e-05, "loss": 1.6328, "step": 3521 }, { "epoch": 0.6344517000675524, "grad_norm": 1.3083393573760986, "learning_rate": 7.723749404473816e-05, "loss": 1.5419, "step": 3522 }, { "epoch": 0.6346318396757487, "grad_norm": 1.500576376914978, "learning_rate": 7.7225617117944e-05, "loss": 1.8592, "step": 3523 }, { "epoch": 0.6348119792839451, "grad_norm": 1.169145941734314, "learning_rate": 7.72137380071241e-05, "loss": 1.5003, "step": 3524 }, { "epoch": 0.6349921188921414, "grad_norm": 1.3244119882583618, "learning_rate": 7.720185671323138e-05, "loss": 1.8247, "step": 3525 }, { "epoch": 0.6351722585003378, "grad_norm": 1.2466422319412231, "learning_rate": 7.718997323721899e-05, "loss": 1.5434, "step": 3526 }, { "epoch": 0.6353523981085342, "grad_norm": 1.283230185508728, "learning_rate": 7.717808758004018e-05, "loss": 1.4864, "step": 3527 }, { "epoch": 0.6355325377167305, "grad_norm": 1.433368444442749, "learning_rate": 7.716619974264843e-05, "loss": 1.6029, "step": 3528 }, { "epoch": 0.6357126773249269, "grad_norm": 1.4116008281707764, "learning_rate": 7.715430972599736e-05, "loss": 1.8188, "step": 3529 }, { "epoch": 0.6358928169331232, "grad_norm": 1.2602133750915527, "learning_rate": 7.714241753104079e-05, "loss": 1.6417, "step": 3530 }, { "epoch": 0.6360729565413196, "grad_norm": 1.436623215675354, "learning_rate": 7.713052315873269e-05, "loss": 1.5603, "step": 3531 }, { "epoch": 0.6362530961495159, "grad_norm": 1.2853972911834717, "learning_rate": 7.711862661002727e-05, "loss": 1.7582, "step": 3532 }, { "epoch": 0.6364332357577123, "grad_norm": 1.3522992134094238, "learning_rate": 7.71067278858788e-05, "loss": 1.6433, "step": 3533 }, { "epoch": 0.6366133753659086, "grad_norm": 1.2607392072677612, "learning_rate": 7.709482698724185e-05, "loss": 1.459, "step": 3534 }, { "epoch": 0.636793514974105, "grad_norm": 1.2452760934829712, "learning_rate": 7.708292391507105e-05, "loss": 1.4935, "step": 3535 }, { "epoch": 0.6369736545823013, "grad_norm": 1.3124128580093384, "learning_rate": 7.707101867032129e-05, "loss": 1.5672, "step": 3536 }, { "epoch": 0.6371537941904977, "grad_norm": 1.2509071826934814, "learning_rate": 7.705911125394762e-05, "loss": 1.5853, "step": 3537 }, { "epoch": 0.637333933798694, "grad_norm": 1.3260079622268677, "learning_rate": 7.70472016669052e-05, "loss": 1.6166, "step": 3538 }, { "epoch": 0.6375140734068904, "grad_norm": 1.3136029243469238, "learning_rate": 7.703528991014946e-05, "loss": 1.507, "step": 3539 }, { "epoch": 0.6376942130150867, "grad_norm": 1.2431551218032837, "learning_rate": 7.702337598463592e-05, "loss": 1.5781, "step": 3540 }, { "epoch": 0.6378743526232831, "grad_norm": 1.3854396343231201, "learning_rate": 7.701145989132032e-05, "loss": 1.5438, "step": 3541 }, { "epoch": 0.6380544922314794, "grad_norm": 1.5077077150344849, "learning_rate": 7.699954163115857e-05, "loss": 1.8064, "step": 3542 }, { "epoch": 0.6382346318396758, "grad_norm": 1.3464741706848145, "learning_rate": 7.698762120510674e-05, "loss": 1.7212, "step": 3543 }, { "epoch": 0.6384147714478721, "grad_norm": 1.5715100765228271, "learning_rate": 7.697569861412106e-05, "loss": 1.6226, "step": 3544 }, { "epoch": 0.6385949110560685, "grad_norm": 1.283296823501587, "learning_rate": 7.6963773859158e-05, "loss": 1.5795, "step": 3545 }, { "epoch": 0.6387750506642648, "grad_norm": 1.3279004096984863, "learning_rate": 7.695184694117414e-05, "loss": 1.4647, "step": 3546 }, { "epoch": 0.6389551902724612, "grad_norm": 1.182403564453125, "learning_rate": 7.693991786112625e-05, "loss": 1.2688, "step": 3547 }, { "epoch": 0.6391353298806575, "grad_norm": 1.2587999105453491, "learning_rate": 7.692798661997123e-05, "loss": 1.4922, "step": 3548 }, { "epoch": 0.6393154694888539, "grad_norm": 1.4625664949417114, "learning_rate": 7.691605321866628e-05, "loss": 1.6578, "step": 3549 }, { "epoch": 0.6394956090970502, "grad_norm": 1.4390089511871338, "learning_rate": 7.690411765816864e-05, "loss": 1.4838, "step": 3550 }, { "epoch": 0.6396757487052466, "grad_norm": 1.2063490152359009, "learning_rate": 7.689217993943578e-05, "loss": 1.8046, "step": 3551 }, { "epoch": 0.6398558883134429, "grad_norm": 1.2386894226074219, "learning_rate": 7.688024006342535e-05, "loss": 2.1889, "step": 3552 }, { "epoch": 0.6400360279216393, "grad_norm": 1.2151871919631958, "learning_rate": 7.686829803109516e-05, "loss": 2.2384, "step": 3553 }, { "epoch": 0.6402161675298357, "grad_norm": 1.3600953817367554, "learning_rate": 7.685635384340317e-05, "loss": 2.2246, "step": 3554 }, { "epoch": 0.640396307138032, "grad_norm": 1.4402754306793213, "learning_rate": 7.684440750130758e-05, "loss": 1.8696, "step": 3555 }, { "epoch": 0.6405764467462284, "grad_norm": 1.2156671285629272, "learning_rate": 7.683245900576668e-05, "loss": 1.8921, "step": 3556 }, { "epoch": 0.6407565863544247, "grad_norm": 1.6027262210845947, "learning_rate": 7.6820508357739e-05, "loss": 1.993, "step": 3557 }, { "epoch": 0.6409367259626211, "grad_norm": 1.6761640310287476, "learning_rate": 7.680855555818318e-05, "loss": 2.0825, "step": 3558 }, { "epoch": 0.6411168655708174, "grad_norm": 1.5012297630310059, "learning_rate": 7.679660060805811e-05, "loss": 1.7486, "step": 3559 }, { "epoch": 0.6412970051790138, "grad_norm": 1.969394564628601, "learning_rate": 7.678464350832279e-05, "loss": 2.1835, "step": 3560 }, { "epoch": 0.6414771447872101, "grad_norm": 1.3221994638442993, "learning_rate": 7.677268425993642e-05, "loss": 1.8194, "step": 3561 }, { "epoch": 0.6416572843954065, "grad_norm": 1.1828910112380981, "learning_rate": 7.676072286385835e-05, "loss": 1.4909, "step": 3562 }, { "epoch": 0.6418374240036028, "grad_norm": 1.3463126420974731, "learning_rate": 7.674875932104813e-05, "loss": 1.52, "step": 3563 }, { "epoch": 0.6420175636117992, "grad_norm": 1.3547868728637695, "learning_rate": 7.673679363246546e-05, "loss": 1.591, "step": 3564 }, { "epoch": 0.6421977032199955, "grad_norm": 1.189975380897522, "learning_rate": 7.672482579907023e-05, "loss": 1.4636, "step": 3565 }, { "epoch": 0.6423778428281919, "grad_norm": 1.3249268531799316, "learning_rate": 7.671285582182249e-05, "loss": 1.4856, "step": 3566 }, { "epoch": 0.6425579824363882, "grad_norm": 1.3356399536132812, "learning_rate": 7.670088370168246e-05, "loss": 1.5946, "step": 3567 }, { "epoch": 0.6427381220445846, "grad_norm": 1.4490073919296265, "learning_rate": 7.668890943961058e-05, "loss": 1.6814, "step": 3568 }, { "epoch": 0.6429182616527809, "grad_norm": 1.2150338888168335, "learning_rate": 7.667693303656734e-05, "loss": 1.5836, "step": 3569 }, { "epoch": 0.6430984012609773, "grad_norm": 1.2231847047805786, "learning_rate": 7.666495449351355e-05, "loss": 1.3912, "step": 3570 }, { "epoch": 0.6432785408691736, "grad_norm": 1.2491759061813354, "learning_rate": 7.66529738114101e-05, "loss": 1.618, "step": 3571 }, { "epoch": 0.64345868047737, "grad_norm": 1.2275420427322388, "learning_rate": 7.664099099121807e-05, "loss": 1.6126, "step": 3572 }, { "epoch": 0.6436388200855663, "grad_norm": 1.346206784248352, "learning_rate": 7.662900603389871e-05, "loss": 1.6733, "step": 3573 }, { "epoch": 0.6438189596937627, "grad_norm": 1.1287707090377808, "learning_rate": 7.661701894041346e-05, "loss": 1.4035, "step": 3574 }, { "epoch": 0.643999099301959, "grad_norm": 1.2175657749176025, "learning_rate": 7.660502971172391e-05, "loss": 1.3145, "step": 3575 }, { "epoch": 0.6441792389101554, "grad_norm": 1.3306307792663574, "learning_rate": 7.659303834879185e-05, "loss": 1.6304, "step": 3576 }, { "epoch": 0.6443593785183517, "grad_norm": 1.4126652479171753, "learning_rate": 7.658104485257922e-05, "loss": 1.8582, "step": 3577 }, { "epoch": 0.6445395181265481, "grad_norm": 1.4104547500610352, "learning_rate": 7.656904922404809e-05, "loss": 1.634, "step": 3578 }, { "epoch": 0.6447196577347444, "grad_norm": 1.3894020318984985, "learning_rate": 7.655705146416079e-05, "loss": 1.776, "step": 3579 }, { "epoch": 0.6448997973429408, "grad_norm": 1.3726348876953125, "learning_rate": 7.654505157387974e-05, "loss": 1.8422, "step": 3580 }, { "epoch": 0.6450799369511371, "grad_norm": 1.3133918046951294, "learning_rate": 7.653304955416758e-05, "loss": 1.5783, "step": 3581 }, { "epoch": 0.6452600765593335, "grad_norm": 1.3153074979782104, "learning_rate": 7.652104540598712e-05, "loss": 1.5851, "step": 3582 }, { "epoch": 0.6454402161675299, "grad_norm": 1.233579397201538, "learning_rate": 7.650903913030132e-05, "loss": 1.516, "step": 3583 }, { "epoch": 0.6456203557757262, "grad_norm": 1.2603578567504883, "learning_rate": 7.649703072807332e-05, "loss": 1.6104, "step": 3584 }, { "epoch": 0.6458004953839226, "grad_norm": 1.2925444841384888, "learning_rate": 7.64850202002664e-05, "loss": 1.3514, "step": 3585 }, { "epoch": 0.6459806349921189, "grad_norm": 1.2838647365570068, "learning_rate": 7.647300754784407e-05, "loss": 1.3989, "step": 3586 }, { "epoch": 0.6461607746003153, "grad_norm": 1.3049839735031128, "learning_rate": 7.646099277176997e-05, "loss": 1.5073, "step": 3587 }, { "epoch": 0.6463409142085116, "grad_norm": 1.2988070249557495, "learning_rate": 7.644897587300793e-05, "loss": 1.4433, "step": 3588 }, { "epoch": 0.646521053816708, "grad_norm": 1.3294895887374878, "learning_rate": 7.643695685252191e-05, "loss": 1.5193, "step": 3589 }, { "epoch": 0.6467011934249043, "grad_norm": 1.4931018352508545, "learning_rate": 7.64249357112761e-05, "loss": 1.5881, "step": 3590 }, { "epoch": 0.6468813330331007, "grad_norm": 1.4193544387817383, "learning_rate": 7.641291245023481e-05, "loss": 1.7283, "step": 3591 }, { "epoch": 0.647061472641297, "grad_norm": 1.3652536869049072, "learning_rate": 7.640088707036253e-05, "loss": 1.4064, "step": 3592 }, { "epoch": 0.6472416122494934, "grad_norm": 1.3515150547027588, "learning_rate": 7.638885957262398e-05, "loss": 1.5234, "step": 3593 }, { "epoch": 0.6474217518576897, "grad_norm": 1.448966145515442, "learning_rate": 7.637682995798396e-05, "loss": 1.5217, "step": 3594 }, { "epoch": 0.6476018914658861, "grad_norm": 1.4578299522399902, "learning_rate": 7.636479822740749e-05, "loss": 1.6968, "step": 3595 }, { "epoch": 0.6477820310740824, "grad_norm": 1.3756144046783447, "learning_rate": 7.635276438185975e-05, "loss": 1.4141, "step": 3596 }, { "epoch": 0.6479621706822788, "grad_norm": 1.4369256496429443, "learning_rate": 7.634072842230607e-05, "loss": 1.6725, "step": 3597 }, { "epoch": 0.6481423102904751, "grad_norm": 1.1872689723968506, "learning_rate": 7.6328690349712e-05, "loss": 1.2286, "step": 3598 }, { "epoch": 0.6483224498986715, "grad_norm": 1.4163745641708374, "learning_rate": 7.631665016504321e-05, "loss": 1.5682, "step": 3599 }, { "epoch": 0.6485025895068678, "grad_norm": 1.5728036165237427, "learning_rate": 7.630460786926556e-05, "loss": 1.7093, "step": 3600 }, { "epoch": 0.6486827291150642, "grad_norm": 1.23725163936615, "learning_rate": 7.629256346334508e-05, "loss": 2.2935, "step": 3601 }, { "epoch": 0.6488628687232605, "grad_norm": 1.1671984195709229, "learning_rate": 7.628051694824798e-05, "loss": 2.1548, "step": 3602 }, { "epoch": 0.6490430083314569, "grad_norm": 1.3207608461380005, "learning_rate": 7.62684683249406e-05, "loss": 2.1959, "step": 3603 }, { "epoch": 0.6492231479396532, "grad_norm": 1.455020785331726, "learning_rate": 7.625641759438948e-05, "loss": 1.8492, "step": 3604 }, { "epoch": 0.6494032875478496, "grad_norm": 1.3297969102859497, "learning_rate": 7.624436475756134e-05, "loss": 2.0218, "step": 3605 }, { "epoch": 0.6495834271560459, "grad_norm": 1.240293264389038, "learning_rate": 7.623230981542304e-05, "loss": 1.6975, "step": 3606 }, { "epoch": 0.6497635667642423, "grad_norm": 1.381180763244629, "learning_rate": 7.622025276894163e-05, "loss": 2.02, "step": 3607 }, { "epoch": 0.6499437063724386, "grad_norm": 1.4141510725021362, "learning_rate": 7.620819361908429e-05, "loss": 1.9788, "step": 3608 }, { "epoch": 0.650123845980635, "grad_norm": 1.3985809087753296, "learning_rate": 7.619613236681843e-05, "loss": 2.0135, "step": 3609 }, { "epoch": 0.6503039855888313, "grad_norm": 1.6117537021636963, "learning_rate": 7.618406901311162e-05, "loss": 2.1882, "step": 3610 }, { "epoch": 0.6504841251970277, "grad_norm": 1.2975481748580933, "learning_rate": 7.617200355893153e-05, "loss": 1.7086, "step": 3611 }, { "epoch": 0.6506642648052241, "grad_norm": 1.243620753288269, "learning_rate": 7.615993600524606e-05, "loss": 1.6497, "step": 3612 }, { "epoch": 0.6508444044134204, "grad_norm": 1.3089145421981812, "learning_rate": 7.614786635302326e-05, "loss": 1.6266, "step": 3613 }, { "epoch": 0.6510245440216168, "grad_norm": 1.2470273971557617, "learning_rate": 7.613579460323137e-05, "loss": 1.4206, "step": 3614 }, { "epoch": 0.6512046836298131, "grad_norm": 1.2418755292892456, "learning_rate": 7.612372075683875e-05, "loss": 1.6923, "step": 3615 }, { "epoch": 0.6513848232380095, "grad_norm": 1.3111896514892578, "learning_rate": 7.6111644814814e-05, "loss": 1.5096, "step": 3616 }, { "epoch": 0.6515649628462058, "grad_norm": 1.229321837425232, "learning_rate": 7.609956677812579e-05, "loss": 1.6228, "step": 3617 }, { "epoch": 0.6517451024544022, "grad_norm": 1.2228953838348389, "learning_rate": 7.608748664774308e-05, "loss": 1.5049, "step": 3618 }, { "epoch": 0.6519252420625985, "grad_norm": 1.3195760250091553, "learning_rate": 7.607540442463487e-05, "loss": 1.7859, "step": 3619 }, { "epoch": 0.6521053816707949, "grad_norm": 1.26304292678833, "learning_rate": 7.606332010977044e-05, "loss": 1.3972, "step": 3620 }, { "epoch": 0.6522855212789912, "grad_norm": 1.2393215894699097, "learning_rate": 7.605123370411915e-05, "loss": 1.5209, "step": 3621 }, { "epoch": 0.6524656608871876, "grad_norm": 1.2372890710830688, "learning_rate": 7.603914520865059e-05, "loss": 1.5373, "step": 3622 }, { "epoch": 0.6526458004953839, "grad_norm": 1.1615149974822998, "learning_rate": 7.602705462433448e-05, "loss": 1.2234, "step": 3623 }, { "epoch": 0.6528259401035803, "grad_norm": 1.280493140220642, "learning_rate": 7.601496195214073e-05, "loss": 1.5755, "step": 3624 }, { "epoch": 0.6530060797117766, "grad_norm": 1.3271478414535522, "learning_rate": 7.600286719303939e-05, "loss": 1.551, "step": 3625 }, { "epoch": 0.653186219319973, "grad_norm": 1.2108567953109741, "learning_rate": 7.599077034800073e-05, "loss": 1.5421, "step": 3626 }, { "epoch": 0.6533663589281693, "grad_norm": 1.4844692945480347, "learning_rate": 7.597867141799512e-05, "loss": 1.996, "step": 3627 }, { "epoch": 0.6535464985363657, "grad_norm": 1.3217929601669312, "learning_rate": 7.596657040399315e-05, "loss": 1.7011, "step": 3628 }, { "epoch": 0.653726638144562, "grad_norm": 1.2655220031738281, "learning_rate": 7.595446730696554e-05, "loss": 1.4463, "step": 3629 }, { "epoch": 0.6539067777527584, "grad_norm": 1.3344367742538452, "learning_rate": 7.594236212788321e-05, "loss": 1.823, "step": 3630 }, { "epoch": 0.6540869173609547, "grad_norm": 1.3263784646987915, "learning_rate": 7.593025486771722e-05, "loss": 1.5615, "step": 3631 }, { "epoch": 0.6542670569691511, "grad_norm": 1.420602798461914, "learning_rate": 7.591814552743883e-05, "loss": 1.9178, "step": 3632 }, { "epoch": 0.6544471965773474, "grad_norm": 1.235182285308838, "learning_rate": 7.590603410801942e-05, "loss": 1.5472, "step": 3633 }, { "epoch": 0.6546273361855438, "grad_norm": 1.3650022745132446, "learning_rate": 7.589392061043057e-05, "loss": 1.5285, "step": 3634 }, { "epoch": 0.6548074757937401, "grad_norm": 1.3641724586486816, "learning_rate": 7.588180503564401e-05, "loss": 1.7199, "step": 3635 }, { "epoch": 0.6549876154019365, "grad_norm": 1.3221230506896973, "learning_rate": 7.586968738463168e-05, "loss": 1.6271, "step": 3636 }, { "epoch": 0.6551677550101328, "grad_norm": 1.2659703493118286, "learning_rate": 7.585756765836563e-05, "loss": 1.5409, "step": 3637 }, { "epoch": 0.6553478946183292, "grad_norm": 1.246110200881958, "learning_rate": 7.584544585781808e-05, "loss": 1.6047, "step": 3638 }, { "epoch": 0.6555280342265255, "grad_norm": 1.3502000570297241, "learning_rate": 7.583332198396144e-05, "loss": 1.6643, "step": 3639 }, { "epoch": 0.6557081738347219, "grad_norm": 1.3185523748397827, "learning_rate": 7.582119603776833e-05, "loss": 1.5902, "step": 3640 }, { "epoch": 0.6558883134429183, "grad_norm": 1.426889181137085, "learning_rate": 7.580906802021142e-05, "loss": 1.6414, "step": 3641 }, { "epoch": 0.6560684530511146, "grad_norm": 1.2787363529205322, "learning_rate": 7.579693793226367e-05, "loss": 1.5154, "step": 3642 }, { "epoch": 0.656248592659311, "grad_norm": 1.4704679250717163, "learning_rate": 7.57848057748981e-05, "loss": 1.7166, "step": 3643 }, { "epoch": 0.6564287322675073, "grad_norm": 1.2037187814712524, "learning_rate": 7.577267154908798e-05, "loss": 1.3593, "step": 3644 }, { "epoch": 0.6566088718757037, "grad_norm": 1.4547653198242188, "learning_rate": 7.576053525580671e-05, "loss": 1.6249, "step": 3645 }, { "epoch": 0.6567890114839, "grad_norm": 1.2622307538986206, "learning_rate": 7.574839689602784e-05, "loss": 1.4856, "step": 3646 }, { "epoch": 0.6569691510920964, "grad_norm": 1.4857465028762817, "learning_rate": 7.57362564707251e-05, "loss": 1.7151, "step": 3647 }, { "epoch": 0.6571492907002927, "grad_norm": 1.4397097826004028, "learning_rate": 7.572411398087244e-05, "loss": 1.6776, "step": 3648 }, { "epoch": 0.6573294303084891, "grad_norm": 1.4924947023391724, "learning_rate": 7.571196942744386e-05, "loss": 1.4068, "step": 3649 }, { "epoch": 0.6575095699166854, "grad_norm": 1.354737639427185, "learning_rate": 7.569982281141362e-05, "loss": 1.6787, "step": 3650 }, { "epoch": 0.6576897095248818, "grad_norm": 1.1923655271530151, "learning_rate": 7.56876741337561e-05, "loss": 1.8675, "step": 3651 }, { "epoch": 0.6578698491330781, "grad_norm": 2.02211332321167, "learning_rate": 7.567552339544589e-05, "loss": 2.2728, "step": 3652 }, { "epoch": 0.6580499887412745, "grad_norm": 1.2227410078048706, "learning_rate": 7.56633705974577e-05, "loss": 1.8893, "step": 3653 }, { "epoch": 0.6582301283494708, "grad_norm": 1.4408692121505737, "learning_rate": 7.565121574076641e-05, "loss": 1.9994, "step": 3654 }, { "epoch": 0.6584102679576672, "grad_norm": 1.3394439220428467, "learning_rate": 7.563905882634709e-05, "loss": 1.9256, "step": 3655 }, { "epoch": 0.6585904075658635, "grad_norm": 1.3568943738937378, "learning_rate": 7.562689985517496e-05, "loss": 1.9424, "step": 3656 }, { "epoch": 0.6587705471740599, "grad_norm": 1.3870124816894531, "learning_rate": 7.56147388282254e-05, "loss": 1.8813, "step": 3657 }, { "epoch": 0.6589506867822562, "grad_norm": 1.4717187881469727, "learning_rate": 7.560257574647399e-05, "loss": 2.007, "step": 3658 }, { "epoch": 0.6591308263904526, "grad_norm": 1.610626220703125, "learning_rate": 7.55904106108964e-05, "loss": 2.2868, "step": 3659 }, { "epoch": 0.6593109659986489, "grad_norm": 1.8786755800247192, "learning_rate": 7.557824342246856e-05, "loss": 2.6466, "step": 3660 }, { "epoch": 0.6594911056068453, "grad_norm": 1.259275197982788, "learning_rate": 7.556607418216646e-05, "loss": 1.6087, "step": 3661 }, { "epoch": 0.6596712452150416, "grad_norm": 1.2575100660324097, "learning_rate": 7.555390289096635e-05, "loss": 1.5655, "step": 3662 }, { "epoch": 0.659851384823238, "grad_norm": 1.3455612659454346, "learning_rate": 7.554172954984462e-05, "loss": 1.7662, "step": 3663 }, { "epoch": 0.6600315244314343, "grad_norm": 1.0780529975891113, "learning_rate": 7.552955415977776e-05, "loss": 1.3136, "step": 3664 }, { "epoch": 0.6602116640396307, "grad_norm": 1.188529372215271, "learning_rate": 7.551737672174252e-05, "loss": 1.3991, "step": 3665 }, { "epoch": 0.660391803647827, "grad_norm": 1.2434971332550049, "learning_rate": 7.550519723671575e-05, "loss": 1.5803, "step": 3666 }, { "epoch": 0.6605719432560234, "grad_norm": 1.098416805267334, "learning_rate": 7.549301570567446e-05, "loss": 1.2345, "step": 3667 }, { "epoch": 0.6607520828642198, "grad_norm": 1.3781863451004028, "learning_rate": 7.548083212959588e-05, "loss": 1.5225, "step": 3668 }, { "epoch": 0.6609322224724161, "grad_norm": 1.3204600811004639, "learning_rate": 7.546864650945735e-05, "loss": 1.6693, "step": 3669 }, { "epoch": 0.6611123620806125, "grad_norm": 1.3403867483139038, "learning_rate": 7.545645884623643e-05, "loss": 1.5242, "step": 3670 }, { "epoch": 0.6612925016888088, "grad_norm": 1.3336831331253052, "learning_rate": 7.544426914091073e-05, "loss": 1.6023, "step": 3671 }, { "epoch": 0.6614726412970052, "grad_norm": 1.2926417589187622, "learning_rate": 7.543207739445818e-05, "loss": 1.5581, "step": 3672 }, { "epoch": 0.6616527809052015, "grad_norm": 1.2402985095977783, "learning_rate": 7.541988360785678e-05, "loss": 1.5536, "step": 3673 }, { "epoch": 0.6618329205133979, "grad_norm": 1.277171015739441, "learning_rate": 7.54076877820847e-05, "loss": 1.6265, "step": 3674 }, { "epoch": 0.6620130601215942, "grad_norm": 1.300055742263794, "learning_rate": 7.539548991812027e-05, "loss": 1.6525, "step": 3675 }, { "epoch": 0.6621931997297906, "grad_norm": 1.2308604717254639, "learning_rate": 7.5383290016942e-05, "loss": 1.6456, "step": 3676 }, { "epoch": 0.6623733393379869, "grad_norm": 1.2776856422424316, "learning_rate": 7.537108807952858e-05, "loss": 1.5924, "step": 3677 }, { "epoch": 0.6625534789461833, "grad_norm": 1.5007275342941284, "learning_rate": 7.535888410685883e-05, "loss": 1.8818, "step": 3678 }, { "epoch": 0.6627336185543796, "grad_norm": 1.3374208211898804, "learning_rate": 7.534667809991175e-05, "loss": 1.6561, "step": 3679 }, { "epoch": 0.662913758162576, "grad_norm": 1.2487257719039917, "learning_rate": 7.533447005966649e-05, "loss": 1.5636, "step": 3680 }, { "epoch": 0.6630938977707723, "grad_norm": 1.3374063968658447, "learning_rate": 7.532225998710241e-05, "loss": 1.6567, "step": 3681 }, { "epoch": 0.6632740373789687, "grad_norm": 1.2387478351593018, "learning_rate": 7.531004788319895e-05, "loss": 1.2708, "step": 3682 }, { "epoch": 0.663454176987165, "grad_norm": 1.387755274772644, "learning_rate": 7.529783374893576e-05, "loss": 1.6619, "step": 3683 }, { "epoch": 0.6636343165953614, "grad_norm": 1.277577519416809, "learning_rate": 7.528561758529269e-05, "loss": 1.7869, "step": 3684 }, { "epoch": 0.6638144562035577, "grad_norm": 1.2916918992996216, "learning_rate": 7.527339939324968e-05, "loss": 1.2151, "step": 3685 }, { "epoch": 0.6639945958117541, "grad_norm": 1.2440828084945679, "learning_rate": 7.52611791737869e-05, "loss": 1.4988, "step": 3686 }, { "epoch": 0.6641747354199504, "grad_norm": 1.3986138105392456, "learning_rate": 7.524895692788461e-05, "loss": 1.7273, "step": 3687 }, { "epoch": 0.6643548750281468, "grad_norm": 1.285841464996338, "learning_rate": 7.52367326565233e-05, "loss": 1.4586, "step": 3688 }, { "epoch": 0.6645350146363431, "grad_norm": 1.357838749885559, "learning_rate": 7.522450636068359e-05, "loss": 1.7751, "step": 3689 }, { "epoch": 0.6647151542445395, "grad_norm": 1.3919072151184082, "learning_rate": 7.521227804134626e-05, "loss": 1.3994, "step": 3690 }, { "epoch": 0.6648952938527358, "grad_norm": 1.3824599981307983, "learning_rate": 7.520004769949229e-05, "loss": 1.6173, "step": 3691 }, { "epoch": 0.6650754334609322, "grad_norm": 1.237833857536316, "learning_rate": 7.518781533610273e-05, "loss": 1.3284, "step": 3692 }, { "epoch": 0.6652555730691285, "grad_norm": 1.2499383687973022, "learning_rate": 7.517558095215894e-05, "loss": 1.4224, "step": 3693 }, { "epoch": 0.6654357126773249, "grad_norm": 1.377846598625183, "learning_rate": 7.516334454864226e-05, "loss": 1.7126, "step": 3694 }, { "epoch": 0.6656158522855212, "grad_norm": 1.4783602952957153, "learning_rate": 7.515110612653436e-05, "loss": 1.5385, "step": 3695 }, { "epoch": 0.6657959918937176, "grad_norm": 1.3596348762512207, "learning_rate": 7.513886568681699e-05, "loss": 1.4503, "step": 3696 }, { "epoch": 0.665976131501914, "grad_norm": 1.4587466716766357, "learning_rate": 7.512662323047204e-05, "loss": 1.4007, "step": 3697 }, { "epoch": 0.6661562711101103, "grad_norm": 1.4428508281707764, "learning_rate": 7.511437875848162e-05, "loss": 1.564, "step": 3698 }, { "epoch": 0.6663364107183067, "grad_norm": 1.2985502481460571, "learning_rate": 7.510213227182796e-05, "loss": 1.5423, "step": 3699 }, { "epoch": 0.666516550326503, "grad_norm": 1.4289222955703735, "learning_rate": 7.508988377149349e-05, "loss": 1.3352, "step": 3700 }, { "epoch": 0.6666966899346994, "grad_norm": 1.477471947669983, "learning_rate": 7.507763325846075e-05, "loss": 1.9711, "step": 3701 }, { "epoch": 0.6668768295428957, "grad_norm": 1.3349413871765137, "learning_rate": 7.50653807337125e-05, "loss": 2.1618, "step": 3702 }, { "epoch": 0.6670569691510921, "grad_norm": 1.2045387029647827, "learning_rate": 7.505312619823162e-05, "loss": 2.0315, "step": 3703 }, { "epoch": 0.6672371087592884, "grad_norm": 1.3169463872909546, "learning_rate": 7.504086965300113e-05, "loss": 1.9713, "step": 3704 }, { "epoch": 0.6674172483674848, "grad_norm": 1.3186359405517578, "learning_rate": 7.50286110990043e-05, "loss": 2.0726, "step": 3705 }, { "epoch": 0.6675973879756811, "grad_norm": 1.4061086177825928, "learning_rate": 7.501635053722447e-05, "loss": 1.8744, "step": 3706 }, { "epoch": 0.6677775275838775, "grad_norm": 1.3869818449020386, "learning_rate": 7.50040879686452e-05, "loss": 2.1019, "step": 3707 }, { "epoch": 0.6679576671920738, "grad_norm": 1.4747493267059326, "learning_rate": 7.499182339425014e-05, "loss": 2.0295, "step": 3708 }, { "epoch": 0.6681378068002702, "grad_norm": 1.6722146272659302, "learning_rate": 7.49795568150232e-05, "loss": 1.8864, "step": 3709 }, { "epoch": 0.6683179464084665, "grad_norm": 1.7746806144714355, "learning_rate": 7.496728823194839e-05, "loss": 2.143, "step": 3710 }, { "epoch": 0.6684980860166629, "grad_norm": 1.3278132677078247, "learning_rate": 7.495501764600985e-05, "loss": 1.9036, "step": 3711 }, { "epoch": 0.6686782256248592, "grad_norm": 1.2284557819366455, "learning_rate": 7.494274505819197e-05, "loss": 1.5252, "step": 3712 }, { "epoch": 0.6688583652330556, "grad_norm": 1.3370000123977661, "learning_rate": 7.493047046947923e-05, "loss": 1.6233, "step": 3713 }, { "epoch": 0.6690385048412519, "grad_norm": 1.2801949977874756, "learning_rate": 7.491819388085629e-05, "loss": 1.5844, "step": 3714 }, { "epoch": 0.6692186444494483, "grad_norm": 1.3016551733016968, "learning_rate": 7.490591529330797e-05, "loss": 1.4833, "step": 3715 }, { "epoch": 0.6693987840576446, "grad_norm": 1.253571629524231, "learning_rate": 7.489363470781926e-05, "loss": 1.5693, "step": 3716 }, { "epoch": 0.669578923665841, "grad_norm": 1.256401777267456, "learning_rate": 7.48813521253753e-05, "loss": 1.5879, "step": 3717 }, { "epoch": 0.6697590632740373, "grad_norm": 1.175365924835205, "learning_rate": 7.48690675469614e-05, "loss": 1.4255, "step": 3718 }, { "epoch": 0.6699392028822337, "grad_norm": 1.2581830024719238, "learning_rate": 7.4856780973563e-05, "loss": 1.446, "step": 3719 }, { "epoch": 0.67011934249043, "grad_norm": 1.3508281707763672, "learning_rate": 7.484449240616573e-05, "loss": 1.6363, "step": 3720 }, { "epoch": 0.6702994820986264, "grad_norm": 1.3509913682937622, "learning_rate": 7.48322018457554e-05, "loss": 1.6833, "step": 3721 }, { "epoch": 0.6704796217068227, "grad_norm": 1.2553876638412476, "learning_rate": 7.481990929331793e-05, "loss": 1.3355, "step": 3722 }, { "epoch": 0.6706597613150191, "grad_norm": 1.4013798236846924, "learning_rate": 7.480761474983943e-05, "loss": 1.6521, "step": 3723 }, { "epoch": 0.6708399009232154, "grad_norm": 1.413506031036377, "learning_rate": 7.479531821630615e-05, "loss": 1.7168, "step": 3724 }, { "epoch": 0.6710200405314118, "grad_norm": 1.5890861749649048, "learning_rate": 7.478301969370452e-05, "loss": 1.8377, "step": 3725 }, { "epoch": 0.6712001801396082, "grad_norm": 1.3385136127471924, "learning_rate": 7.477071918302112e-05, "loss": 1.6891, "step": 3726 }, { "epoch": 0.6713803197478045, "grad_norm": 1.281814455986023, "learning_rate": 7.475841668524268e-05, "loss": 1.5864, "step": 3727 }, { "epoch": 0.671560459356001, "grad_norm": 1.1952110528945923, "learning_rate": 7.474611220135614e-05, "loss": 1.2592, "step": 3728 }, { "epoch": 0.6717405989641972, "grad_norm": 1.3743549585342407, "learning_rate": 7.473380573234852e-05, "loss": 1.6243, "step": 3729 }, { "epoch": 0.6719207385723936, "grad_norm": 1.3610057830810547, "learning_rate": 7.472149727920705e-05, "loss": 1.6303, "step": 3730 }, { "epoch": 0.67210087818059, "grad_norm": 1.4856343269348145, "learning_rate": 7.47091868429191e-05, "loss": 1.7969, "step": 3731 }, { "epoch": 0.6722810177887864, "grad_norm": 1.369631052017212, "learning_rate": 7.469687442447223e-05, "loss": 1.7481, "step": 3732 }, { "epoch": 0.6724611573969826, "grad_norm": 1.2857675552368164, "learning_rate": 7.468456002485413e-05, "loss": 1.5861, "step": 3733 }, { "epoch": 0.672641297005179, "grad_norm": 1.4559111595153809, "learning_rate": 7.467224364505264e-05, "loss": 1.6241, "step": 3734 }, { "epoch": 0.6728214366133753, "grad_norm": 1.4399436712265015, "learning_rate": 7.465992528605579e-05, "loss": 1.6622, "step": 3735 }, { "epoch": 0.6730015762215718, "grad_norm": 1.5655626058578491, "learning_rate": 7.464760494885172e-05, "loss": 2.0022, "step": 3736 }, { "epoch": 0.673181715829768, "grad_norm": 1.2919420003890991, "learning_rate": 7.46352826344288e-05, "loss": 1.481, "step": 3737 }, { "epoch": 0.6733618554379645, "grad_norm": 1.2697975635528564, "learning_rate": 7.46229583437755e-05, "loss": 1.5485, "step": 3738 }, { "epoch": 0.6735419950461607, "grad_norm": 1.586836814880371, "learning_rate": 7.46106320778805e-05, "loss": 1.8011, "step": 3739 }, { "epoch": 0.6737221346543572, "grad_norm": 1.3497823476791382, "learning_rate": 7.459830383773258e-05, "loss": 1.5316, "step": 3740 }, { "epoch": 0.6739022742625534, "grad_norm": 1.3906464576721191, "learning_rate": 7.458597362432068e-05, "loss": 1.6649, "step": 3741 }, { "epoch": 0.6740824138707499, "grad_norm": 1.3394267559051514, "learning_rate": 7.457364143863397e-05, "loss": 1.4355, "step": 3742 }, { "epoch": 0.6742625534789461, "grad_norm": 1.230089545249939, "learning_rate": 7.456130728166171e-05, "loss": 1.462, "step": 3743 }, { "epoch": 0.6744426930871426, "grad_norm": 1.3259518146514893, "learning_rate": 7.454897115439333e-05, "loss": 1.5647, "step": 3744 }, { "epoch": 0.6746228326953388, "grad_norm": 1.3547018766403198, "learning_rate": 7.453663305781847e-05, "loss": 1.5953, "step": 3745 }, { "epoch": 0.6748029723035353, "grad_norm": 1.4750478267669678, "learning_rate": 7.452429299292684e-05, "loss": 1.5652, "step": 3746 }, { "epoch": 0.6749831119117315, "grad_norm": 1.5076578855514526, "learning_rate": 7.451195096070838e-05, "loss": 1.8026, "step": 3747 }, { "epoch": 0.675163251519928, "grad_norm": 1.308268666267395, "learning_rate": 7.449960696215312e-05, "loss": 1.5457, "step": 3748 }, { "epoch": 0.6753433911281242, "grad_norm": 1.465030312538147, "learning_rate": 7.448726099825137e-05, "loss": 1.8723, "step": 3749 }, { "epoch": 0.6755235307363207, "grad_norm": 1.153728723526001, "learning_rate": 7.447491306999344e-05, "loss": 1.2589, "step": 3750 }, { "epoch": 0.675703670344517, "grad_norm": 1.2619167566299438, "learning_rate": 7.446256317836991e-05, "loss": 2.0856, "step": 3751 }, { "epoch": 0.6758838099527134, "grad_norm": 1.1553776264190674, "learning_rate": 7.445021132437147e-05, "loss": 1.6955, "step": 3752 }, { "epoch": 0.6760639495609098, "grad_norm": 1.1766729354858398, "learning_rate": 7.4437857508989e-05, "loss": 2.0043, "step": 3753 }, { "epoch": 0.676244089169106, "grad_norm": 1.2143126726150513, "learning_rate": 7.442550173321351e-05, "loss": 1.8816, "step": 3754 }, { "epoch": 0.6764242287773025, "grad_norm": 1.2775821685791016, "learning_rate": 7.441314399803616e-05, "loss": 2.0566, "step": 3755 }, { "epoch": 0.6766043683854988, "grad_norm": 1.3005644083023071, "learning_rate": 7.44007843044483e-05, "loss": 1.6926, "step": 3756 }, { "epoch": 0.6767845079936952, "grad_norm": 1.4249347448349, "learning_rate": 7.438842265344139e-05, "loss": 2.0715, "step": 3757 }, { "epoch": 0.6769646476018915, "grad_norm": 1.5612343549728394, "learning_rate": 7.437605904600709e-05, "loss": 1.9976, "step": 3758 }, { "epoch": 0.6771447872100879, "grad_norm": 1.5529882907867432, "learning_rate": 7.436369348313721e-05, "loss": 2.0521, "step": 3759 }, { "epoch": 0.6773249268182842, "grad_norm": 1.8146275281906128, "learning_rate": 7.435132596582373e-05, "loss": 2.164, "step": 3760 }, { "epoch": 0.6775050664264806, "grad_norm": 1.321893334388733, "learning_rate": 7.433895649505873e-05, "loss": 1.5386, "step": 3761 }, { "epoch": 0.6776852060346769, "grad_norm": 1.2590678930282593, "learning_rate": 7.432658507183449e-05, "loss": 1.6997, "step": 3762 }, { "epoch": 0.6778653456428733, "grad_norm": 1.2367582321166992, "learning_rate": 7.431421169714344e-05, "loss": 1.5306, "step": 3763 }, { "epoch": 0.6780454852510696, "grad_norm": 1.3217625617980957, "learning_rate": 7.430183637197818e-05, "loss": 1.7033, "step": 3764 }, { "epoch": 0.678225624859266, "grad_norm": 1.1774741411209106, "learning_rate": 7.428945909733143e-05, "loss": 1.456, "step": 3765 }, { "epoch": 0.6784057644674623, "grad_norm": 1.3103166818618774, "learning_rate": 7.427707987419612e-05, "loss": 1.5354, "step": 3766 }, { "epoch": 0.6785859040756587, "grad_norm": 1.3158172369003296, "learning_rate": 7.426469870356528e-05, "loss": 1.6131, "step": 3767 }, { "epoch": 0.678766043683855, "grad_norm": 1.333551287651062, "learning_rate": 7.425231558643213e-05, "loss": 1.7648, "step": 3768 }, { "epoch": 0.6789461832920514, "grad_norm": 1.4817004203796387, "learning_rate": 7.423993052379003e-05, "loss": 1.4387, "step": 3769 }, { "epoch": 0.6791263229002477, "grad_norm": 1.3711568117141724, "learning_rate": 7.422754351663252e-05, "loss": 1.5995, "step": 3770 }, { "epoch": 0.6793064625084441, "grad_norm": 1.212541103363037, "learning_rate": 7.421515456595325e-05, "loss": 1.3797, "step": 3771 }, { "epoch": 0.6794866021166404, "grad_norm": 1.3111616373062134, "learning_rate": 7.42027636727461e-05, "loss": 1.7147, "step": 3772 }, { "epoch": 0.6796667417248368, "grad_norm": 1.2291555404663086, "learning_rate": 7.419037083800502e-05, "loss": 1.3334, "step": 3773 }, { "epoch": 0.6798468813330331, "grad_norm": 1.2658121585845947, "learning_rate": 7.417797606272418e-05, "loss": 1.4482, "step": 3774 }, { "epoch": 0.6800270209412295, "grad_norm": 1.3814055919647217, "learning_rate": 7.416557934789788e-05, "loss": 1.5043, "step": 3775 }, { "epoch": 0.6802071605494258, "grad_norm": 1.2001444101333618, "learning_rate": 7.415318069452058e-05, "loss": 1.4524, "step": 3776 }, { "epoch": 0.6803873001576222, "grad_norm": 1.3388934135437012, "learning_rate": 7.414078010358688e-05, "loss": 1.6208, "step": 3777 }, { "epoch": 0.6805674397658185, "grad_norm": 1.206924557685852, "learning_rate": 7.412837757609157e-05, "loss": 1.3059, "step": 3778 }, { "epoch": 0.6807475793740149, "grad_norm": 1.3403254747390747, "learning_rate": 7.411597311302955e-05, "loss": 1.4764, "step": 3779 }, { "epoch": 0.6809277189822112, "grad_norm": 1.3341001272201538, "learning_rate": 7.410356671539592e-05, "loss": 1.5923, "step": 3780 }, { "epoch": 0.6811078585904076, "grad_norm": 1.3420345783233643, "learning_rate": 7.40911583841859e-05, "loss": 1.5379, "step": 3781 }, { "epoch": 0.681287998198604, "grad_norm": 1.4320402145385742, "learning_rate": 7.407874812039489e-05, "loss": 1.7614, "step": 3782 }, { "epoch": 0.6814681378068003, "grad_norm": 1.3970222473144531, "learning_rate": 7.406633592501845e-05, "loss": 1.7229, "step": 3783 }, { "epoch": 0.6816482774149967, "grad_norm": 1.4104418754577637, "learning_rate": 7.405392179905225e-05, "loss": 1.7245, "step": 3784 }, { "epoch": 0.681828417023193, "grad_norm": 1.3820829391479492, "learning_rate": 7.404150574349216e-05, "loss": 1.7106, "step": 3785 }, { "epoch": 0.6820085566313894, "grad_norm": 1.3436079025268555, "learning_rate": 7.402908775933419e-05, "loss": 1.7077, "step": 3786 }, { "epoch": 0.6821886962395857, "grad_norm": 1.3855971097946167, "learning_rate": 7.40166678475745e-05, "loss": 1.7022, "step": 3787 }, { "epoch": 0.6823688358477821, "grad_norm": 1.3236973285675049, "learning_rate": 7.400424600920943e-05, "loss": 1.7128, "step": 3788 }, { "epoch": 0.6825489754559784, "grad_norm": 1.3929835557937622, "learning_rate": 7.399182224523543e-05, "loss": 1.8028, "step": 3789 }, { "epoch": 0.6827291150641748, "grad_norm": 1.3194581270217896, "learning_rate": 7.397939655664912e-05, "loss": 1.4563, "step": 3790 }, { "epoch": 0.6829092546723711, "grad_norm": 1.4082924127578735, "learning_rate": 7.396696894444732e-05, "loss": 1.4215, "step": 3791 }, { "epoch": 0.6830893942805675, "grad_norm": 1.3762539625167847, "learning_rate": 7.395453940962693e-05, "loss": 1.68, "step": 3792 }, { "epoch": 0.6832695338887638, "grad_norm": 1.3264034986495972, "learning_rate": 7.394210795318506e-05, "loss": 1.3545, "step": 3793 }, { "epoch": 0.6834496734969602, "grad_norm": 1.429064154624939, "learning_rate": 7.392967457611897e-05, "loss": 1.5334, "step": 3794 }, { "epoch": 0.6836298131051565, "grad_norm": 1.2954745292663574, "learning_rate": 7.391723927942602e-05, "loss": 1.4719, "step": 3795 }, { "epoch": 0.6838099527133529, "grad_norm": 1.4566138982772827, "learning_rate": 7.390480206410379e-05, "loss": 1.5146, "step": 3796 }, { "epoch": 0.6839900923215492, "grad_norm": 1.558975338935852, "learning_rate": 7.389236293114998e-05, "loss": 1.4991, "step": 3797 }, { "epoch": 0.6841702319297456, "grad_norm": 1.516238808631897, "learning_rate": 7.387992188156245e-05, "loss": 1.3562, "step": 3798 }, { "epoch": 0.6843503715379419, "grad_norm": 1.395850419998169, "learning_rate": 7.386747891633923e-05, "loss": 1.4575, "step": 3799 }, { "epoch": 0.6845305111461383, "grad_norm": 1.4928802251815796, "learning_rate": 7.385503403647847e-05, "loss": 1.7443, "step": 3800 }, { "epoch": 0.6847106507543346, "grad_norm": 1.6430546045303345, "learning_rate": 7.38425872429785e-05, "loss": 1.6772, "step": 3801 }, { "epoch": 0.684890790362531, "grad_norm": 1.5847761631011963, "learning_rate": 7.383013853683781e-05, "loss": 2.0852, "step": 3802 }, { "epoch": 0.6850709299707273, "grad_norm": 1.4791513681411743, "learning_rate": 7.381768791905499e-05, "loss": 2.1373, "step": 3803 }, { "epoch": 0.6852510695789237, "grad_norm": 1.3154712915420532, "learning_rate": 7.380523539062885e-05, "loss": 2.1147, "step": 3804 }, { "epoch": 0.68543120918712, "grad_norm": 1.2487972974777222, "learning_rate": 7.379278095255833e-05, "loss": 1.6921, "step": 3805 }, { "epoch": 0.6856113487953164, "grad_norm": 1.2755018472671509, "learning_rate": 7.37803246058425e-05, "loss": 1.6568, "step": 3806 }, { "epoch": 0.6857914884035127, "grad_norm": 1.4299025535583496, "learning_rate": 7.376786635148063e-05, "loss": 2.0746, "step": 3807 }, { "epoch": 0.6859716280117091, "grad_norm": 1.5345515012741089, "learning_rate": 7.375540619047207e-05, "loss": 2.4002, "step": 3808 }, { "epoch": 0.6861517676199054, "grad_norm": 1.5227692127227783, "learning_rate": 7.374294412381643e-05, "loss": 1.9818, "step": 3809 }, { "epoch": 0.6863319072281018, "grad_norm": 1.543299674987793, "learning_rate": 7.373048015251337e-05, "loss": 2.36, "step": 3810 }, { "epoch": 0.6865120468362982, "grad_norm": 2.098301649093628, "learning_rate": 7.371801427756274e-05, "loss": 1.8631, "step": 3811 }, { "epoch": 0.6866921864444945, "grad_norm": 1.3070650100708008, "learning_rate": 7.370554649996454e-05, "loss": 1.6812, "step": 3812 }, { "epoch": 0.6868723260526909, "grad_norm": 1.2857000827789307, "learning_rate": 7.369307682071898e-05, "loss": 1.7798, "step": 3813 }, { "epoch": 0.6870524656608872, "grad_norm": 1.3998967409133911, "learning_rate": 7.368060524082631e-05, "loss": 1.7286, "step": 3814 }, { "epoch": 0.6872326052690836, "grad_norm": 1.2834653854370117, "learning_rate": 7.366813176128705e-05, "loss": 1.6483, "step": 3815 }, { "epoch": 0.6874127448772799, "grad_norm": 1.3898488283157349, "learning_rate": 7.365565638310177e-05, "loss": 1.4193, "step": 3816 }, { "epoch": 0.6875928844854763, "grad_norm": 1.345453143119812, "learning_rate": 7.364317910727128e-05, "loss": 1.6951, "step": 3817 }, { "epoch": 0.6877730240936726, "grad_norm": 1.1374995708465576, "learning_rate": 7.363069993479644e-05, "loss": 1.3432, "step": 3818 }, { "epoch": 0.687953163701869, "grad_norm": 1.245538592338562, "learning_rate": 7.361821886667839e-05, "loss": 1.4805, "step": 3819 }, { "epoch": 0.6881333033100653, "grad_norm": 1.3383915424346924, "learning_rate": 7.360573590391831e-05, "loss": 1.6369, "step": 3820 }, { "epoch": 0.6883134429182617, "grad_norm": 1.427017092704773, "learning_rate": 7.359325104751762e-05, "loss": 1.5755, "step": 3821 }, { "epoch": 0.688493582526458, "grad_norm": 1.3233128786087036, "learning_rate": 7.358076429847779e-05, "loss": 1.4975, "step": 3822 }, { "epoch": 0.6886737221346544, "grad_norm": 1.3432520627975464, "learning_rate": 7.356827565780055e-05, "loss": 1.6049, "step": 3823 }, { "epoch": 0.6888538617428507, "grad_norm": 1.3402349948883057, "learning_rate": 7.355578512648772e-05, "loss": 1.7118, "step": 3824 }, { "epoch": 0.6890340013510471, "grad_norm": 1.343283772468567, "learning_rate": 7.354329270554127e-05, "loss": 1.5753, "step": 3825 }, { "epoch": 0.6892141409592434, "grad_norm": 1.2998533248901367, "learning_rate": 7.353079839596336e-05, "loss": 1.7031, "step": 3826 }, { "epoch": 0.6893942805674398, "grad_norm": 1.4211757183074951, "learning_rate": 7.351830219875625e-05, "loss": 1.7855, "step": 3827 }, { "epoch": 0.6895744201756361, "grad_norm": 1.2932559251785278, "learning_rate": 7.35058041149224e-05, "loss": 1.5938, "step": 3828 }, { "epoch": 0.6897545597838325, "grad_norm": 1.3493373394012451, "learning_rate": 7.34933041454644e-05, "loss": 1.7244, "step": 3829 }, { "epoch": 0.6899346993920288, "grad_norm": 1.2789973020553589, "learning_rate": 7.348080229138496e-05, "loss": 1.5126, "step": 3830 }, { "epoch": 0.6901148390002252, "grad_norm": 1.3149967193603516, "learning_rate": 7.346829855368702e-05, "loss": 1.628, "step": 3831 }, { "epoch": 0.6902949786084215, "grad_norm": 1.2477061748504639, "learning_rate": 7.34557929333736e-05, "loss": 1.6282, "step": 3832 }, { "epoch": 0.6904751182166179, "grad_norm": 1.2891724109649658, "learning_rate": 7.34432854314479e-05, "loss": 1.6348, "step": 3833 }, { "epoch": 0.6906552578248142, "grad_norm": 1.3005331754684448, "learning_rate": 7.343077604891323e-05, "loss": 1.5863, "step": 3834 }, { "epoch": 0.6908353974330106, "grad_norm": 1.3337929248809814, "learning_rate": 7.341826478677316e-05, "loss": 1.4212, "step": 3835 }, { "epoch": 0.6910155370412069, "grad_norm": 1.405542016029358, "learning_rate": 7.340575164603127e-05, "loss": 1.7336, "step": 3836 }, { "epoch": 0.6911956766494033, "grad_norm": 1.415505051612854, "learning_rate": 7.339323662769139e-05, "loss": 1.6209, "step": 3837 }, { "epoch": 0.6913758162575996, "grad_norm": 1.2408785820007324, "learning_rate": 7.338071973275746e-05, "loss": 1.349, "step": 3838 }, { "epoch": 0.691555955865796, "grad_norm": 1.3061391115188599, "learning_rate": 7.33682009622336e-05, "loss": 1.4186, "step": 3839 }, { "epoch": 0.6917360954739924, "grad_norm": 1.4277821779251099, "learning_rate": 7.335568031712401e-05, "loss": 1.5794, "step": 3840 }, { "epoch": 0.6919162350821887, "grad_norm": 1.5519523620605469, "learning_rate": 7.334315779843313e-05, "loss": 1.8449, "step": 3841 }, { "epoch": 0.6920963746903851, "grad_norm": 1.314356803894043, "learning_rate": 7.333063340716551e-05, "loss": 1.6588, "step": 3842 }, { "epoch": 0.6922765142985814, "grad_norm": 1.575269103050232, "learning_rate": 7.331810714432585e-05, "loss": 1.6854, "step": 3843 }, { "epoch": 0.6924566539067778, "grad_norm": 1.36978018283844, "learning_rate": 7.330557901091898e-05, "loss": 1.6053, "step": 3844 }, { "epoch": 0.6926367935149741, "grad_norm": 1.4974101781845093, "learning_rate": 7.329304900794991e-05, "loss": 1.6429, "step": 3845 }, { "epoch": 0.6928169331231705, "grad_norm": 1.35093092918396, "learning_rate": 7.32805171364238e-05, "loss": 1.6877, "step": 3846 }, { "epoch": 0.6929970727313668, "grad_norm": 1.3871434926986694, "learning_rate": 7.326798339734594e-05, "loss": 1.6827, "step": 3847 }, { "epoch": 0.6931772123395632, "grad_norm": 1.3733747005462646, "learning_rate": 7.32554477917218e-05, "loss": 1.4125, "step": 3848 }, { "epoch": 0.6933573519477595, "grad_norm": 1.4671399593353271, "learning_rate": 7.324291032055696e-05, "loss": 1.483, "step": 3849 }, { "epoch": 0.6935374915559559, "grad_norm": 1.3165206909179688, "learning_rate": 7.323037098485716e-05, "loss": 1.5879, "step": 3850 }, { "epoch": 0.6937176311641522, "grad_norm": 1.335375428199768, "learning_rate": 7.321782978562833e-05, "loss": 1.92, "step": 3851 }, { "epoch": 0.6938977707723486, "grad_norm": 1.2365530729293823, "learning_rate": 7.320528672387648e-05, "loss": 2.0427, "step": 3852 }, { "epoch": 0.6940779103805449, "grad_norm": 1.2713297605514526, "learning_rate": 7.319274180060786e-05, "loss": 1.9989, "step": 3853 }, { "epoch": 0.6942580499887413, "grad_norm": 1.3556358814239502, "learning_rate": 7.318019501682879e-05, "loss": 2.07, "step": 3854 }, { "epoch": 0.6944381895969376, "grad_norm": 1.236850380897522, "learning_rate": 7.316764637354575e-05, "loss": 1.8912, "step": 3855 }, { "epoch": 0.694618329205134, "grad_norm": 1.2358862161636353, "learning_rate": 7.315509587176541e-05, "loss": 1.9052, "step": 3856 }, { "epoch": 0.6947984688133303, "grad_norm": 1.446398377418518, "learning_rate": 7.314254351249454e-05, "loss": 2.0899, "step": 3857 }, { "epoch": 0.6949786084215267, "grad_norm": 1.5219694375991821, "learning_rate": 7.312998929674013e-05, "loss": 2.0933, "step": 3858 }, { "epoch": 0.695158748029723, "grad_norm": 1.619506597518921, "learning_rate": 7.311743322550923e-05, "loss": 2.1694, "step": 3859 }, { "epoch": 0.6953388876379194, "grad_norm": 1.593291997909546, "learning_rate": 7.310487529980911e-05, "loss": 1.5654, "step": 3860 }, { "epoch": 0.6955190272461157, "grad_norm": 1.2240742444992065, "learning_rate": 7.309231552064714e-05, "loss": 1.5291, "step": 3861 }, { "epoch": 0.6956991668543121, "grad_norm": 1.2461670637130737, "learning_rate": 7.307975388903086e-05, "loss": 1.505, "step": 3862 }, { "epoch": 0.6958793064625084, "grad_norm": 1.1853049993515015, "learning_rate": 7.306719040596797e-05, "loss": 1.5592, "step": 3863 }, { "epoch": 0.6960594460707048, "grad_norm": 1.2626633644104004, "learning_rate": 7.30546250724663e-05, "loss": 1.3658, "step": 3864 }, { "epoch": 0.6962395856789011, "grad_norm": 1.3056353330612183, "learning_rate": 7.304205788953385e-05, "loss": 1.5105, "step": 3865 }, { "epoch": 0.6964197252870975, "grad_norm": 1.3683557510375977, "learning_rate": 7.30294888581787e-05, "loss": 1.6711, "step": 3866 }, { "epoch": 0.6965998648952939, "grad_norm": 1.394254207611084, "learning_rate": 7.301691797940921e-05, "loss": 1.6437, "step": 3867 }, { "epoch": 0.6967800045034902, "grad_norm": 1.3778191804885864, "learning_rate": 7.300434525423375e-05, "loss": 1.6292, "step": 3868 }, { "epoch": 0.6969601441116866, "grad_norm": 1.2305551767349243, "learning_rate": 7.299177068366094e-05, "loss": 1.4671, "step": 3869 }, { "epoch": 0.6971402837198829, "grad_norm": 1.4378727674484253, "learning_rate": 7.297919426869947e-05, "loss": 1.7736, "step": 3870 }, { "epoch": 0.6973204233280793, "grad_norm": 1.4748146533966064, "learning_rate": 7.296661601035822e-05, "loss": 1.7749, "step": 3871 }, { "epoch": 0.6975005629362756, "grad_norm": 1.2209627628326416, "learning_rate": 7.295403590964623e-05, "loss": 1.6068, "step": 3872 }, { "epoch": 0.697680702544472, "grad_norm": 1.2132511138916016, "learning_rate": 7.294145396757266e-05, "loss": 1.5123, "step": 3873 }, { "epoch": 0.6978608421526683, "grad_norm": 1.3248944282531738, "learning_rate": 7.292887018514683e-05, "loss": 1.5667, "step": 3874 }, { "epoch": 0.6980409817608647, "grad_norm": 1.392854928970337, "learning_rate": 7.29162845633782e-05, "loss": 1.7377, "step": 3875 }, { "epoch": 0.698221121369061, "grad_norm": 1.2801711559295654, "learning_rate": 7.290369710327638e-05, "loss": 1.4187, "step": 3876 }, { "epoch": 0.6984012609772574, "grad_norm": 1.2746928930282593, "learning_rate": 7.289110780585115e-05, "loss": 1.3883, "step": 3877 }, { "epoch": 0.6985814005854537, "grad_norm": 1.3822486400604248, "learning_rate": 7.287851667211238e-05, "loss": 1.6047, "step": 3878 }, { "epoch": 0.6987615401936501, "grad_norm": 1.3795428276062012, "learning_rate": 7.286592370307014e-05, "loss": 1.5998, "step": 3879 }, { "epoch": 0.6989416798018464, "grad_norm": 1.468690276145935, "learning_rate": 7.285332889973467e-05, "loss": 1.7966, "step": 3880 }, { "epoch": 0.6991218194100428, "grad_norm": 1.3186126947402954, "learning_rate": 7.284073226311627e-05, "loss": 1.5719, "step": 3881 }, { "epoch": 0.6993019590182391, "grad_norm": 1.2671804428100586, "learning_rate": 7.282813379422546e-05, "loss": 1.5765, "step": 3882 }, { "epoch": 0.6994820986264355, "grad_norm": 1.311571478843689, "learning_rate": 7.281553349407287e-05, "loss": 1.7042, "step": 3883 }, { "epoch": 0.6996622382346318, "grad_norm": 1.355029821395874, "learning_rate": 7.280293136366929e-05, "loss": 1.6722, "step": 3884 }, { "epoch": 0.6998423778428282, "grad_norm": 1.5546504259109497, "learning_rate": 7.279032740402567e-05, "loss": 1.6475, "step": 3885 }, { "epoch": 0.7000225174510245, "grad_norm": 1.3103325366973877, "learning_rate": 7.277772161615309e-05, "loss": 1.4711, "step": 3886 }, { "epoch": 0.7002026570592209, "grad_norm": 1.3622653484344482, "learning_rate": 7.276511400106277e-05, "loss": 1.4725, "step": 3887 }, { "epoch": 0.7003827966674172, "grad_norm": 1.3578202724456787, "learning_rate": 7.275250455976609e-05, "loss": 1.6427, "step": 3888 }, { "epoch": 0.7005629362756136, "grad_norm": 1.2511574029922485, "learning_rate": 7.273989329327459e-05, "loss": 1.7029, "step": 3889 }, { "epoch": 0.7007430758838099, "grad_norm": 1.3640930652618408, "learning_rate": 7.272728020259992e-05, "loss": 1.7626, "step": 3890 }, { "epoch": 0.7009232154920063, "grad_norm": 1.3834847211837769, "learning_rate": 7.27146652887539e-05, "loss": 1.6992, "step": 3891 }, { "epoch": 0.7011033551002026, "grad_norm": 1.414367437362671, "learning_rate": 7.270204855274849e-05, "loss": 1.7793, "step": 3892 }, { "epoch": 0.701283494708399, "grad_norm": 1.5253984928131104, "learning_rate": 7.26894299955958e-05, "loss": 1.8531, "step": 3893 }, { "epoch": 0.7014636343165953, "grad_norm": 1.3273879289627075, "learning_rate": 7.26768096183081e-05, "loss": 1.3552, "step": 3894 }, { "epoch": 0.7016437739247917, "grad_norm": 1.562617301940918, "learning_rate": 7.266418742189778e-05, "loss": 1.6576, "step": 3895 }, { "epoch": 0.7018239135329881, "grad_norm": 1.5604127645492554, "learning_rate": 7.265156340737737e-05, "loss": 1.5371, "step": 3896 }, { "epoch": 0.7020040531411844, "grad_norm": 1.3432213068008423, "learning_rate": 7.263893757575959e-05, "loss": 1.4072, "step": 3897 }, { "epoch": 0.7021841927493808, "grad_norm": 1.4777671098709106, "learning_rate": 7.262630992805725e-05, "loss": 1.6485, "step": 3898 }, { "epoch": 0.7023643323575771, "grad_norm": 1.305596113204956, "learning_rate": 7.261368046528337e-05, "loss": 1.3085, "step": 3899 }, { "epoch": 0.7025444719657735, "grad_norm": 1.323996901512146, "learning_rate": 7.260104918845105e-05, "loss": 1.5566, "step": 3900 }, { "epoch": 0.7027246115739698, "grad_norm": 1.1090295314788818, "learning_rate": 7.258841609857357e-05, "loss": 1.5064, "step": 3901 }, { "epoch": 0.7029047511821662, "grad_norm": 1.2259312868118286, "learning_rate": 7.257578119666434e-05, "loss": 1.9847, "step": 3902 }, { "epoch": 0.7030848907903625, "grad_norm": 1.179762601852417, "learning_rate": 7.256314448373696e-05, "loss": 1.7255, "step": 3903 }, { "epoch": 0.7032650303985589, "grad_norm": 1.2060153484344482, "learning_rate": 7.255050596080509e-05, "loss": 1.8005, "step": 3904 }, { "epoch": 0.7034451700067552, "grad_norm": 1.2332481145858765, "learning_rate": 7.253786562888263e-05, "loss": 1.7718, "step": 3905 }, { "epoch": 0.7036253096149516, "grad_norm": 1.3497898578643799, "learning_rate": 7.252522348898359e-05, "loss": 1.8627, "step": 3906 }, { "epoch": 0.7038054492231479, "grad_norm": 1.3237775564193726, "learning_rate": 7.251257954212207e-05, "loss": 1.9482, "step": 3907 }, { "epoch": 0.7039855888313443, "grad_norm": 1.4205254316329956, "learning_rate": 7.249993378931239e-05, "loss": 1.8779, "step": 3908 }, { "epoch": 0.7041657284395406, "grad_norm": 1.6139785051345825, "learning_rate": 7.248728623156899e-05, "loss": 2.2808, "step": 3909 }, { "epoch": 0.704345868047737, "grad_norm": 1.4392380714416504, "learning_rate": 7.247463686990643e-05, "loss": 1.7677, "step": 3910 }, { "epoch": 0.7045260076559333, "grad_norm": 1.341322898864746, "learning_rate": 7.246198570533944e-05, "loss": 1.6774, "step": 3911 }, { "epoch": 0.7047061472641297, "grad_norm": 1.24701726436615, "learning_rate": 7.24493327388829e-05, "loss": 1.5218, "step": 3912 }, { "epoch": 0.704886286872326, "grad_norm": 1.2835712432861328, "learning_rate": 7.243667797155185e-05, "loss": 1.6536, "step": 3913 }, { "epoch": 0.7050664264805224, "grad_norm": 1.197148323059082, "learning_rate": 7.24240214043614e-05, "loss": 1.541, "step": 3914 }, { "epoch": 0.7052465660887187, "grad_norm": 1.166495442390442, "learning_rate": 7.241136303832687e-05, "loss": 1.5309, "step": 3915 }, { "epoch": 0.7054267056969151, "grad_norm": 1.3184785842895508, "learning_rate": 7.239870287446372e-05, "loss": 1.4287, "step": 3916 }, { "epoch": 0.7056068453051114, "grad_norm": 1.2774484157562256, "learning_rate": 7.238604091378753e-05, "loss": 1.4549, "step": 3917 }, { "epoch": 0.7057869849133078, "grad_norm": 1.3129510879516602, "learning_rate": 7.237337715731405e-05, "loss": 1.726, "step": 3918 }, { "epoch": 0.7059671245215041, "grad_norm": 1.2660243511199951, "learning_rate": 7.236071160605914e-05, "loss": 1.4032, "step": 3919 }, { "epoch": 0.7061472641297005, "grad_norm": 1.1676615476608276, "learning_rate": 7.234804426103885e-05, "loss": 1.4168, "step": 3920 }, { "epoch": 0.7063274037378968, "grad_norm": 1.321829915046692, "learning_rate": 7.233537512326934e-05, "loss": 1.509, "step": 3921 }, { "epoch": 0.7065075433460932, "grad_norm": 1.3350731134414673, "learning_rate": 7.23227041937669e-05, "loss": 1.4685, "step": 3922 }, { "epoch": 0.7066876829542895, "grad_norm": 1.320751667022705, "learning_rate": 7.2310031473548e-05, "loss": 1.7096, "step": 3923 }, { "epoch": 0.7068678225624859, "grad_norm": 1.2472236156463623, "learning_rate": 7.229735696362927e-05, "loss": 1.3546, "step": 3924 }, { "epoch": 0.7070479621706823, "grad_norm": 1.34446382522583, "learning_rate": 7.228468066502741e-05, "loss": 1.6708, "step": 3925 }, { "epoch": 0.7072281017788786, "grad_norm": 1.2939053773880005, "learning_rate": 7.227200257875931e-05, "loss": 1.515, "step": 3926 }, { "epoch": 0.707408241387075, "grad_norm": 1.395036220550537, "learning_rate": 7.225932270584202e-05, "loss": 1.6256, "step": 3927 }, { "epoch": 0.7075883809952713, "grad_norm": 1.2223687171936035, "learning_rate": 7.224664104729271e-05, "loss": 1.5721, "step": 3928 }, { "epoch": 0.7077685206034677, "grad_norm": 1.5163313150405884, "learning_rate": 7.223395760412868e-05, "loss": 1.7254, "step": 3929 }, { "epoch": 0.707948660211664, "grad_norm": 1.196772813796997, "learning_rate": 7.22212723773674e-05, "loss": 1.2991, "step": 3930 }, { "epoch": 0.7081287998198604, "grad_norm": 1.1617646217346191, "learning_rate": 7.22085853680265e-05, "loss": 1.506, "step": 3931 }, { "epoch": 0.7083089394280567, "grad_norm": 1.4280757904052734, "learning_rate": 7.219589657712367e-05, "loss": 1.6633, "step": 3932 }, { "epoch": 0.7084890790362531, "grad_norm": 1.447654366493225, "learning_rate": 7.218320600567683e-05, "loss": 1.7713, "step": 3933 }, { "epoch": 0.7086692186444494, "grad_norm": 1.418230652809143, "learning_rate": 7.217051365470403e-05, "loss": 1.6651, "step": 3934 }, { "epoch": 0.7088493582526458, "grad_norm": 1.446295142173767, "learning_rate": 7.21578195252234e-05, "loss": 1.7017, "step": 3935 }, { "epoch": 0.7090294978608421, "grad_norm": 1.2032145261764526, "learning_rate": 7.214512361825328e-05, "loss": 1.6577, "step": 3936 }, { "epoch": 0.7092096374690385, "grad_norm": 1.3188484907150269, "learning_rate": 7.213242593481214e-05, "loss": 1.5251, "step": 3937 }, { "epoch": 0.7093897770772348, "grad_norm": 1.3633226156234741, "learning_rate": 7.211972647591858e-05, "loss": 1.5452, "step": 3938 }, { "epoch": 0.7095699166854312, "grad_norm": 1.3551186323165894, "learning_rate": 7.210702524259132e-05, "loss": 1.7061, "step": 3939 }, { "epoch": 0.7097500562936275, "grad_norm": 1.3911857604980469, "learning_rate": 7.209432223584928e-05, "loss": 1.6126, "step": 3940 }, { "epoch": 0.7099301959018239, "grad_norm": 1.241748332977295, "learning_rate": 7.208161745671146e-05, "loss": 1.6858, "step": 3941 }, { "epoch": 0.7101103355100202, "grad_norm": 1.389390230178833, "learning_rate": 7.206891090619706e-05, "loss": 1.6792, "step": 3942 }, { "epoch": 0.7102904751182166, "grad_norm": 1.5643136501312256, "learning_rate": 7.205620258532534e-05, "loss": 1.4664, "step": 3943 }, { "epoch": 0.7104706147264129, "grad_norm": 1.4613926410675049, "learning_rate": 7.204349249511582e-05, "loss": 1.8153, "step": 3944 }, { "epoch": 0.7106507543346093, "grad_norm": 1.3153282403945923, "learning_rate": 7.203078063658807e-05, "loss": 1.5791, "step": 3945 }, { "epoch": 0.7108308939428056, "grad_norm": 1.5081466436386108, "learning_rate": 7.201806701076181e-05, "loss": 1.7991, "step": 3946 }, { "epoch": 0.711011033551002, "grad_norm": 1.3599598407745361, "learning_rate": 7.200535161865696e-05, "loss": 1.8239, "step": 3947 }, { "epoch": 0.7111911731591983, "grad_norm": 1.339067816734314, "learning_rate": 7.19926344612935e-05, "loss": 1.3831, "step": 3948 }, { "epoch": 0.7113713127673947, "grad_norm": 1.2522943019866943, "learning_rate": 7.197991553969164e-05, "loss": 1.3998, "step": 3949 }, { "epoch": 0.711551452375591, "grad_norm": 1.3630602359771729, "learning_rate": 7.196719485487164e-05, "loss": 1.5852, "step": 3950 }, { "epoch": 0.7117315919837874, "grad_norm": 1.2519137859344482, "learning_rate": 7.195447240785399e-05, "loss": 2.053, "step": 3951 }, { "epoch": 0.7119117315919838, "grad_norm": 1.4044971466064453, "learning_rate": 7.194174819965925e-05, "loss": 2.0164, "step": 3952 }, { "epoch": 0.7120918712001801, "grad_norm": 1.2754586935043335, "learning_rate": 7.192902223130814e-05, "loss": 2.0396, "step": 3953 }, { "epoch": 0.7122720108083765, "grad_norm": 1.1970125436782837, "learning_rate": 7.191629450382155e-05, "loss": 1.632, "step": 3954 }, { "epoch": 0.7124521504165728, "grad_norm": 1.2506103515625, "learning_rate": 7.19035650182205e-05, "loss": 1.9135, "step": 3955 }, { "epoch": 0.7126322900247692, "grad_norm": 1.161482572555542, "learning_rate": 7.189083377552615e-05, "loss": 1.4625, "step": 3956 }, { "epoch": 0.7128124296329655, "grad_norm": 1.3697588443756104, "learning_rate": 7.187810077675975e-05, "loss": 1.9631, "step": 3957 }, { "epoch": 0.7129925692411619, "grad_norm": 1.3391778469085693, "learning_rate": 7.186536602294278e-05, "loss": 1.8479, "step": 3958 }, { "epoch": 0.7131727088493582, "grad_norm": 1.4021704196929932, "learning_rate": 7.185262951509678e-05, "loss": 1.9375, "step": 3959 }, { "epoch": 0.7133528484575546, "grad_norm": 1.9403471946716309, "learning_rate": 7.183989125424349e-05, "loss": 2.463, "step": 3960 }, { "epoch": 0.7135329880657509, "grad_norm": 1.3209342956542969, "learning_rate": 7.182715124140478e-05, "loss": 1.7369, "step": 3961 }, { "epoch": 0.7137131276739473, "grad_norm": 1.35624098777771, "learning_rate": 7.181440947760261e-05, "loss": 1.7724, "step": 3962 }, { "epoch": 0.7138932672821436, "grad_norm": 1.3411002159118652, "learning_rate": 7.180166596385914e-05, "loss": 1.627, "step": 3963 }, { "epoch": 0.71407340689034, "grad_norm": 1.2416220903396606, "learning_rate": 7.178892070119665e-05, "loss": 1.6305, "step": 3964 }, { "epoch": 0.7142535464985363, "grad_norm": 1.2578668594360352, "learning_rate": 7.177617369063755e-05, "loss": 1.6771, "step": 3965 }, { "epoch": 0.7144336861067327, "grad_norm": 1.3047893047332764, "learning_rate": 7.17634249332044e-05, "loss": 1.5447, "step": 3966 }, { "epoch": 0.714613825714929, "grad_norm": 1.4433828592300415, "learning_rate": 7.175067442991992e-05, "loss": 1.5483, "step": 3967 }, { "epoch": 0.7147939653231254, "grad_norm": 1.3985286951065063, "learning_rate": 7.173792218180692e-05, "loss": 1.6664, "step": 3968 }, { "epoch": 0.7149741049313217, "grad_norm": 1.3115431070327759, "learning_rate": 7.172516818988838e-05, "loss": 1.6203, "step": 3969 }, { "epoch": 0.7151542445395181, "grad_norm": 1.1956276893615723, "learning_rate": 7.171241245518744e-05, "loss": 1.5123, "step": 3970 }, { "epoch": 0.7153343841477144, "grad_norm": 1.3231576681137085, "learning_rate": 7.169965497872735e-05, "loss": 1.4703, "step": 3971 }, { "epoch": 0.7155145237559108, "grad_norm": 1.3194525241851807, "learning_rate": 7.168689576153151e-05, "loss": 1.4696, "step": 3972 }, { "epoch": 0.7156946633641071, "grad_norm": 1.4018319845199585, "learning_rate": 7.167413480462344e-05, "loss": 1.6021, "step": 3973 }, { "epoch": 0.7158748029723035, "grad_norm": 1.2673927545547485, "learning_rate": 7.166137210902685e-05, "loss": 1.5744, "step": 3974 }, { "epoch": 0.7160549425804998, "grad_norm": 1.2739746570587158, "learning_rate": 7.164860767576553e-05, "loss": 1.4467, "step": 3975 }, { "epoch": 0.7162350821886962, "grad_norm": 1.2330594062805176, "learning_rate": 7.163584150586345e-05, "loss": 1.2784, "step": 3976 }, { "epoch": 0.7164152217968925, "grad_norm": 1.389986276626587, "learning_rate": 7.16230736003447e-05, "loss": 1.6499, "step": 3977 }, { "epoch": 0.716595361405089, "grad_norm": 1.3281902074813843, "learning_rate": 7.161030396023353e-05, "loss": 1.746, "step": 3978 }, { "epoch": 0.7167755010132852, "grad_norm": 1.2783664464950562, "learning_rate": 7.15975325865543e-05, "loss": 1.7524, "step": 3979 }, { "epoch": 0.7169556406214816, "grad_norm": 1.2845207452774048, "learning_rate": 7.158475948033151e-05, "loss": 1.5091, "step": 3980 }, { "epoch": 0.717135780229678, "grad_norm": 1.2399158477783203, "learning_rate": 7.157198464258984e-05, "loss": 1.463, "step": 3981 }, { "epoch": 0.7173159198378743, "grad_norm": 1.195797085762024, "learning_rate": 7.155920807435406e-05, "loss": 1.3059, "step": 3982 }, { "epoch": 0.7174960594460708, "grad_norm": 1.3450088500976562, "learning_rate": 7.154642977664912e-05, "loss": 1.6668, "step": 3983 }, { "epoch": 0.717676199054267, "grad_norm": 1.4196999073028564, "learning_rate": 7.153364975050009e-05, "loss": 1.6847, "step": 3984 }, { "epoch": 0.7178563386624635, "grad_norm": 1.3191006183624268, "learning_rate": 7.152086799693214e-05, "loss": 1.5292, "step": 3985 }, { "epoch": 0.7180364782706598, "grad_norm": 1.2243523597717285, "learning_rate": 7.150808451697066e-05, "loss": 1.6042, "step": 3986 }, { "epoch": 0.7182166178788562, "grad_norm": 1.3338755369186401, "learning_rate": 7.14952993116411e-05, "loss": 1.5491, "step": 3987 }, { "epoch": 0.7183967574870525, "grad_norm": 1.3634601831436157, "learning_rate": 7.14825123819691e-05, "loss": 1.5375, "step": 3988 }, { "epoch": 0.7185768970952489, "grad_norm": 1.427426815032959, "learning_rate": 7.146972372898045e-05, "loss": 1.7399, "step": 3989 }, { "epoch": 0.7187570367034452, "grad_norm": 1.548447608947754, "learning_rate": 7.145693335370098e-05, "loss": 1.8068, "step": 3990 }, { "epoch": 0.7189371763116416, "grad_norm": 1.4484425783157349, "learning_rate": 7.14441412571568e-05, "loss": 1.6907, "step": 3991 }, { "epoch": 0.7191173159198379, "grad_norm": 1.2436573505401611, "learning_rate": 7.143134744037403e-05, "loss": 1.357, "step": 3992 }, { "epoch": 0.7192974555280343, "grad_norm": 1.5230742692947388, "learning_rate": 7.141855190437901e-05, "loss": 1.5732, "step": 3993 }, { "epoch": 0.7194775951362306, "grad_norm": 1.416967511177063, "learning_rate": 7.140575465019819e-05, "loss": 1.6801, "step": 3994 }, { "epoch": 0.719657734744427, "grad_norm": 1.3700101375579834, "learning_rate": 7.139295567885818e-05, "loss": 1.475, "step": 3995 }, { "epoch": 0.7198378743526233, "grad_norm": 1.3727749586105347, "learning_rate": 7.138015499138566e-05, "loss": 1.5376, "step": 3996 }, { "epoch": 0.7200180139608197, "grad_norm": 1.3151254653930664, "learning_rate": 7.136735258880754e-05, "loss": 1.6662, "step": 3997 }, { "epoch": 0.720198153569016, "grad_norm": 1.365763783454895, "learning_rate": 7.135454847215079e-05, "loss": 1.4079, "step": 3998 }, { "epoch": 0.7203782931772124, "grad_norm": 1.4193084239959717, "learning_rate": 7.134174264244256e-05, "loss": 1.5276, "step": 3999 }, { "epoch": 0.7205584327854087, "grad_norm": 1.2450289726257324, "learning_rate": 7.132893510071014e-05, "loss": 1.3972, "step": 4000 }, { "epoch": 0.7207385723936051, "grad_norm": 1.3603992462158203, "learning_rate": 7.131612584798093e-05, "loss": 2.3097, "step": 4001 }, { "epoch": 0.7209187120018014, "grad_norm": 1.162360668182373, "learning_rate": 7.130331488528249e-05, "loss": 1.7911, "step": 4002 }, { "epoch": 0.7210988516099978, "grad_norm": 1.2884531021118164, "learning_rate": 7.12905022136425e-05, "loss": 2.0206, "step": 4003 }, { "epoch": 0.721278991218194, "grad_norm": 1.523433804512024, "learning_rate": 7.127768783408879e-05, "loss": 1.9963, "step": 4004 }, { "epoch": 0.7214591308263905, "grad_norm": 1.2343974113464355, "learning_rate": 7.126487174764936e-05, "loss": 1.7754, "step": 4005 }, { "epoch": 0.7216392704345868, "grad_norm": 1.3891587257385254, "learning_rate": 7.125205395535223e-05, "loss": 1.6804, "step": 4006 }, { "epoch": 0.7218194100427832, "grad_norm": 1.4183688163757324, "learning_rate": 7.12392344582257e-05, "loss": 1.948, "step": 4007 }, { "epoch": 0.7219995496509795, "grad_norm": 1.459185242652893, "learning_rate": 7.122641325729813e-05, "loss": 1.9604, "step": 4008 }, { "epoch": 0.7221796892591759, "grad_norm": 1.7456086874008179, "learning_rate": 7.121359035359802e-05, "loss": 2.0318, "step": 4009 }, { "epoch": 0.7223598288673723, "grad_norm": 1.3245593309402466, "learning_rate": 7.120076574815404e-05, "loss": 1.6815, "step": 4010 }, { "epoch": 0.7225399684755686, "grad_norm": 1.2667951583862305, "learning_rate": 7.118793944199496e-05, "loss": 1.31, "step": 4011 }, { "epoch": 0.722720108083765, "grad_norm": 1.2869495153427124, "learning_rate": 7.117511143614968e-05, "loss": 1.6299, "step": 4012 }, { "epoch": 0.7229002476919613, "grad_norm": 1.3147913217544556, "learning_rate": 7.116228173164727e-05, "loss": 1.5726, "step": 4013 }, { "epoch": 0.7230803873001577, "grad_norm": 1.3100680112838745, "learning_rate": 7.114945032951696e-05, "loss": 1.5938, "step": 4014 }, { "epoch": 0.723260526908354, "grad_norm": 1.3373206853866577, "learning_rate": 7.113661723078801e-05, "loss": 1.4637, "step": 4015 }, { "epoch": 0.7234406665165504, "grad_norm": 1.221670389175415, "learning_rate": 7.112378243648995e-05, "loss": 1.6497, "step": 4016 }, { "epoch": 0.7236208061247467, "grad_norm": 1.302420735359192, "learning_rate": 7.111094594765234e-05, "loss": 1.6215, "step": 4017 }, { "epoch": 0.7238009457329431, "grad_norm": 1.2747881412506104, "learning_rate": 7.109810776530492e-05, "loss": 1.4865, "step": 4018 }, { "epoch": 0.7239810853411394, "grad_norm": 1.280052661895752, "learning_rate": 7.108526789047758e-05, "loss": 1.4805, "step": 4019 }, { "epoch": 0.7241612249493358, "grad_norm": 1.3133503198623657, "learning_rate": 7.107242632420032e-05, "loss": 1.372, "step": 4020 }, { "epoch": 0.7243413645575321, "grad_norm": 1.3217718601226807, "learning_rate": 7.10595830675033e-05, "loss": 1.5532, "step": 4021 }, { "epoch": 0.7245215041657285, "grad_norm": 1.302534580230713, "learning_rate": 7.104673812141675e-05, "loss": 1.5096, "step": 4022 }, { "epoch": 0.7247016437739248, "grad_norm": 1.3490378856658936, "learning_rate": 7.103389148697114e-05, "loss": 1.5999, "step": 4023 }, { "epoch": 0.7248817833821212, "grad_norm": 1.358303427696228, "learning_rate": 7.1021043165197e-05, "loss": 1.5972, "step": 4024 }, { "epoch": 0.7250619229903175, "grad_norm": 1.3473252058029175, "learning_rate": 7.100819315712501e-05, "loss": 1.644, "step": 4025 }, { "epoch": 0.7252420625985139, "grad_norm": 1.2381086349487305, "learning_rate": 7.0995341463786e-05, "loss": 1.4305, "step": 4026 }, { "epoch": 0.7254222022067102, "grad_norm": 1.2565757036209106, "learning_rate": 7.098248808621093e-05, "loss": 1.4568, "step": 4027 }, { "epoch": 0.7256023418149066, "grad_norm": 1.3137116432189941, "learning_rate": 7.096963302543088e-05, "loss": 1.5448, "step": 4028 }, { "epoch": 0.7257824814231029, "grad_norm": 1.2895878553390503, "learning_rate": 7.095677628247708e-05, "loss": 1.4827, "step": 4029 }, { "epoch": 0.7259626210312993, "grad_norm": 1.4240233898162842, "learning_rate": 7.094391785838091e-05, "loss": 1.7361, "step": 4030 }, { "epoch": 0.7261427606394956, "grad_norm": 1.5960263013839722, "learning_rate": 7.093105775417384e-05, "loss": 1.9167, "step": 4031 }, { "epoch": 0.726322900247692, "grad_norm": 1.33319091796875, "learning_rate": 7.091819597088751e-05, "loss": 1.6361, "step": 4032 }, { "epoch": 0.7265030398558883, "grad_norm": 1.4717464447021484, "learning_rate": 7.090533250955369e-05, "loss": 1.6254, "step": 4033 }, { "epoch": 0.7266831794640847, "grad_norm": 1.4692511558532715, "learning_rate": 7.089246737120428e-05, "loss": 1.6136, "step": 4034 }, { "epoch": 0.726863319072281, "grad_norm": 1.498299241065979, "learning_rate": 7.08796005568713e-05, "loss": 1.9383, "step": 4035 }, { "epoch": 0.7270434586804774, "grad_norm": 1.4138383865356445, "learning_rate": 7.086673206758694e-05, "loss": 1.6045, "step": 4036 }, { "epoch": 0.7272235982886737, "grad_norm": 1.4371824264526367, "learning_rate": 7.085386190438352e-05, "loss": 1.5348, "step": 4037 }, { "epoch": 0.7274037378968701, "grad_norm": 1.4831973314285278, "learning_rate": 7.084099006829344e-05, "loss": 1.7428, "step": 4038 }, { "epoch": 0.7275838775050665, "grad_norm": 1.272243618965149, "learning_rate": 7.082811656034928e-05, "loss": 1.4864, "step": 4039 }, { "epoch": 0.7277640171132628, "grad_norm": 1.374729871749878, "learning_rate": 7.081524138158377e-05, "loss": 1.5607, "step": 4040 }, { "epoch": 0.7279441567214592, "grad_norm": 1.5588423013687134, "learning_rate": 7.08023645330297e-05, "loss": 1.9319, "step": 4041 }, { "epoch": 0.7281242963296555, "grad_norm": 1.4162218570709229, "learning_rate": 7.078948601572012e-05, "loss": 1.6257, "step": 4042 }, { "epoch": 0.7283044359378519, "grad_norm": 1.40138840675354, "learning_rate": 7.077660583068808e-05, "loss": 1.6812, "step": 4043 }, { "epoch": 0.7284845755460482, "grad_norm": 1.5075501203536987, "learning_rate": 7.076372397896685e-05, "loss": 1.8057, "step": 4044 }, { "epoch": 0.7286647151542446, "grad_norm": 1.4859552383422852, "learning_rate": 7.07508404615898e-05, "loss": 1.5626, "step": 4045 }, { "epoch": 0.7288448547624409, "grad_norm": 1.4579800367355347, "learning_rate": 7.073795527959041e-05, "loss": 1.469, "step": 4046 }, { "epoch": 0.7290249943706373, "grad_norm": 1.393281102180481, "learning_rate": 7.072506843400237e-05, "loss": 1.4869, "step": 4047 }, { "epoch": 0.7292051339788336, "grad_norm": 1.4526339769363403, "learning_rate": 7.071217992585943e-05, "loss": 1.5421, "step": 4048 }, { "epoch": 0.72938527358703, "grad_norm": 1.3382951021194458, "learning_rate": 7.069928975619551e-05, "loss": 1.6026, "step": 4049 }, { "epoch": 0.7295654131952263, "grad_norm": 1.3069384098052979, "learning_rate": 7.068639792604463e-05, "loss": 1.2638, "step": 4050 }, { "epoch": 0.7297455528034227, "grad_norm": 1.2935819625854492, "learning_rate": 7.0673504436441e-05, "loss": 2.17, "step": 4051 }, { "epoch": 0.729925692411619, "grad_norm": 1.1897939443588257, "learning_rate": 7.066060928841892e-05, "loss": 1.9364, "step": 4052 }, { "epoch": 0.7301058320198154, "grad_norm": 1.3020228147506714, "learning_rate": 7.064771248301282e-05, "loss": 2.1757, "step": 4053 }, { "epoch": 0.7302859716280117, "grad_norm": 1.299071192741394, "learning_rate": 7.063481402125728e-05, "loss": 1.9235, "step": 4054 }, { "epoch": 0.7304661112362081, "grad_norm": 1.297268033027649, "learning_rate": 7.0621913904187e-05, "loss": 2.168, "step": 4055 }, { "epoch": 0.7306462508444044, "grad_norm": 1.1980663537979126, "learning_rate": 7.060901213283684e-05, "loss": 1.8966, "step": 4056 }, { "epoch": 0.7308263904526008, "grad_norm": 1.4256212711334229, "learning_rate": 7.059610870824176e-05, "loss": 2.0502, "step": 4057 }, { "epoch": 0.7310065300607971, "grad_norm": 1.5272873640060425, "learning_rate": 7.058320363143688e-05, "loss": 1.9595, "step": 4058 }, { "epoch": 0.7311866696689935, "grad_norm": 1.5679125785827637, "learning_rate": 7.057029690345742e-05, "loss": 2.1143, "step": 4059 }, { "epoch": 0.7313668092771898, "grad_norm": 1.5130186080932617, "learning_rate": 7.055738852533877e-05, "loss": 1.9007, "step": 4060 }, { "epoch": 0.7315469488853862, "grad_norm": 1.260037899017334, "learning_rate": 7.054447849811642e-05, "loss": 1.6843, "step": 4061 }, { "epoch": 0.7317270884935825, "grad_norm": 1.2341854572296143, "learning_rate": 7.053156682282603e-05, "loss": 1.4969, "step": 4062 }, { "epoch": 0.7319072281017789, "grad_norm": 1.2701035737991333, "learning_rate": 7.051865350050332e-05, "loss": 1.5142, "step": 4063 }, { "epoch": 0.7320873677099752, "grad_norm": 1.486148476600647, "learning_rate": 7.050573853218425e-05, "loss": 1.9954, "step": 4064 }, { "epoch": 0.7322675073181716, "grad_norm": 1.300268292427063, "learning_rate": 7.04928219189048e-05, "loss": 1.7385, "step": 4065 }, { "epoch": 0.732447646926368, "grad_norm": 1.2946431636810303, "learning_rate": 7.047990366170116e-05, "loss": 1.6872, "step": 4066 }, { "epoch": 0.7326277865345643, "grad_norm": 1.3352512121200562, "learning_rate": 7.046698376160964e-05, "loss": 1.7155, "step": 4067 }, { "epoch": 0.7328079261427607, "grad_norm": 1.4107164144515991, "learning_rate": 7.045406221966663e-05, "loss": 1.6769, "step": 4068 }, { "epoch": 0.732988065750957, "grad_norm": 1.2012137174606323, "learning_rate": 7.044113903690873e-05, "loss": 1.5363, "step": 4069 }, { "epoch": 0.7331682053591534, "grad_norm": 1.133810043334961, "learning_rate": 7.042821421437262e-05, "loss": 1.541, "step": 4070 }, { "epoch": 0.7333483449673497, "grad_norm": 1.2481111288070679, "learning_rate": 7.04152877530951e-05, "loss": 1.5736, "step": 4071 }, { "epoch": 0.7335284845755461, "grad_norm": 1.3325014114379883, "learning_rate": 7.040235965411314e-05, "loss": 1.6684, "step": 4072 }, { "epoch": 0.7337086241837424, "grad_norm": 1.0985866785049438, "learning_rate": 7.038942991846382e-05, "loss": 1.3557, "step": 4073 }, { "epoch": 0.7338887637919388, "grad_norm": 1.3543462753295898, "learning_rate": 7.037649854718438e-05, "loss": 1.6038, "step": 4074 }, { "epoch": 0.7340689034001351, "grad_norm": 1.2914390563964844, "learning_rate": 7.036356554131214e-05, "loss": 1.3374, "step": 4075 }, { "epoch": 0.7342490430083315, "grad_norm": 1.1897563934326172, "learning_rate": 7.03506309018846e-05, "loss": 1.4623, "step": 4076 }, { "epoch": 0.7344291826165278, "grad_norm": 1.2917678356170654, "learning_rate": 7.033769462993936e-05, "loss": 1.4686, "step": 4077 }, { "epoch": 0.7346093222247242, "grad_norm": 1.387580394744873, "learning_rate": 7.032475672651415e-05, "loss": 1.7882, "step": 4078 }, { "epoch": 0.7347894618329205, "grad_norm": 1.1911344528198242, "learning_rate": 7.031181719264687e-05, "loss": 1.3947, "step": 4079 }, { "epoch": 0.7349696014411169, "grad_norm": 1.3339983224868774, "learning_rate": 7.02988760293755e-05, "loss": 1.5524, "step": 4080 }, { "epoch": 0.7351497410493132, "grad_norm": 1.2976665496826172, "learning_rate": 7.02859332377382e-05, "loss": 1.7326, "step": 4081 }, { "epoch": 0.7353298806575096, "grad_norm": 1.389215111732483, "learning_rate": 7.02729888187732e-05, "loss": 1.6981, "step": 4082 }, { "epoch": 0.7355100202657059, "grad_norm": 1.3509610891342163, "learning_rate": 7.026004277351889e-05, "loss": 1.6327, "step": 4083 }, { "epoch": 0.7356901598739023, "grad_norm": 1.4561971426010132, "learning_rate": 7.024709510301385e-05, "loss": 1.6417, "step": 4084 }, { "epoch": 0.7358702994820986, "grad_norm": 1.2138378620147705, "learning_rate": 7.023414580829668e-05, "loss": 1.21, "step": 4085 }, { "epoch": 0.736050439090295, "grad_norm": 1.3451802730560303, "learning_rate": 7.022119489040621e-05, "loss": 1.5169, "step": 4086 }, { "epoch": 0.7362305786984913, "grad_norm": 1.398783564567566, "learning_rate": 7.020824235038133e-05, "loss": 1.5407, "step": 4087 }, { "epoch": 0.7364107183066877, "grad_norm": 1.3204797506332397, "learning_rate": 7.019528818926108e-05, "loss": 1.5791, "step": 4088 }, { "epoch": 0.736590857914884, "grad_norm": 1.3674566745758057, "learning_rate": 7.018233240808467e-05, "loss": 1.5936, "step": 4089 }, { "epoch": 0.7367709975230804, "grad_norm": 1.2220652103424072, "learning_rate": 7.016937500789137e-05, "loss": 1.4255, "step": 4090 }, { "epoch": 0.7369511371312767, "grad_norm": 1.4028562307357788, "learning_rate": 7.015641598972064e-05, "loss": 1.4876, "step": 4091 }, { "epoch": 0.7371312767394731, "grad_norm": 1.2851715087890625, "learning_rate": 7.014345535461202e-05, "loss": 1.5646, "step": 4092 }, { "epoch": 0.7373114163476694, "grad_norm": 1.415369987487793, "learning_rate": 7.013049310360524e-05, "loss": 1.39, "step": 4093 }, { "epoch": 0.7374915559558658, "grad_norm": 1.3153975009918213, "learning_rate": 7.011752923774009e-05, "loss": 1.4428, "step": 4094 }, { "epoch": 0.7376716955640622, "grad_norm": 1.3487350940704346, "learning_rate": 7.010456375805656e-05, "loss": 1.457, "step": 4095 }, { "epoch": 0.7378518351722585, "grad_norm": 1.3977594375610352, "learning_rate": 7.009159666559472e-05, "loss": 1.5324, "step": 4096 }, { "epoch": 0.7380319747804549, "grad_norm": 1.3922799825668335, "learning_rate": 7.007862796139477e-05, "loss": 1.3293, "step": 4097 }, { "epoch": 0.7382121143886512, "grad_norm": 1.6159543991088867, "learning_rate": 7.006565764649708e-05, "loss": 1.4438, "step": 4098 }, { "epoch": 0.7383922539968476, "grad_norm": 1.3941423892974854, "learning_rate": 7.005268572194208e-05, "loss": 1.6014, "step": 4099 }, { "epoch": 0.7385723936050439, "grad_norm": 1.4492136240005493, "learning_rate": 7.003971218877043e-05, "loss": 1.5172, "step": 4100 }, { "epoch": 0.7387525332132403, "grad_norm": 1.1980843544006348, "learning_rate": 7.00267370480228e-05, "loss": 1.8891, "step": 4101 }, { "epoch": 0.7389326728214366, "grad_norm": 1.1812303066253662, "learning_rate": 7.00137603007401e-05, "loss": 1.8001, "step": 4102 }, { "epoch": 0.739112812429633, "grad_norm": 1.2779420614242554, "learning_rate": 7.00007819479633e-05, "loss": 2.0786, "step": 4103 }, { "epoch": 0.7392929520378293, "grad_norm": 1.2915014028549194, "learning_rate": 6.998780199073349e-05, "loss": 1.8119, "step": 4104 }, { "epoch": 0.7394730916460257, "grad_norm": 1.2572461366653442, "learning_rate": 6.997482043009196e-05, "loss": 1.9241, "step": 4105 }, { "epoch": 0.739653231254222, "grad_norm": 1.255854606628418, "learning_rate": 6.996183726708005e-05, "loss": 1.7558, "step": 4106 }, { "epoch": 0.7398333708624184, "grad_norm": 1.4942491054534912, "learning_rate": 6.994885250273929e-05, "loss": 1.8149, "step": 4107 }, { "epoch": 0.7400135104706147, "grad_norm": 1.391896367073059, "learning_rate": 6.99358661381113e-05, "loss": 2.0134, "step": 4108 }, { "epoch": 0.7401936500788111, "grad_norm": 1.651044487953186, "learning_rate": 6.992287817423784e-05, "loss": 2.124, "step": 4109 }, { "epoch": 0.7403737896870074, "grad_norm": 1.4316761493682861, "learning_rate": 6.99098886121608e-05, "loss": 1.5978, "step": 4110 }, { "epoch": 0.7405539292952038, "grad_norm": 1.2705094814300537, "learning_rate": 6.989689745292217e-05, "loss": 1.6258, "step": 4111 }, { "epoch": 0.7407340689034001, "grad_norm": 1.2856587171554565, "learning_rate": 6.988390469756415e-05, "loss": 1.6745, "step": 4112 }, { "epoch": 0.7409142085115965, "grad_norm": 1.1784669160842896, "learning_rate": 6.987091034712894e-05, "loss": 1.291, "step": 4113 }, { "epoch": 0.7410943481197928, "grad_norm": 1.2560909986495972, "learning_rate": 6.985791440265902e-05, "loss": 1.6296, "step": 4114 }, { "epoch": 0.7412744877279892, "grad_norm": 1.3841191530227661, "learning_rate": 6.984491686519685e-05, "loss": 1.694, "step": 4115 }, { "epoch": 0.7414546273361855, "grad_norm": 1.1775438785552979, "learning_rate": 6.983191773578513e-05, "loss": 1.5431, "step": 4116 }, { "epoch": 0.7416347669443819, "grad_norm": 1.1501076221466064, "learning_rate": 6.981891701546663e-05, "loss": 1.4605, "step": 4117 }, { "epoch": 0.7418149065525782, "grad_norm": 1.2753643989562988, "learning_rate": 6.980591470528425e-05, "loss": 1.5419, "step": 4118 }, { "epoch": 0.7419950461607746, "grad_norm": 1.362391710281372, "learning_rate": 6.979291080628105e-05, "loss": 1.7005, "step": 4119 }, { "epoch": 0.7421751857689709, "grad_norm": 1.2860534191131592, "learning_rate": 6.977990531950018e-05, "loss": 1.731, "step": 4120 }, { "epoch": 0.7423553253771673, "grad_norm": 1.203721046447754, "learning_rate": 6.976689824598494e-05, "loss": 1.5181, "step": 4121 }, { "epoch": 0.7425354649853636, "grad_norm": 1.4677066802978516, "learning_rate": 6.975388958677875e-05, "loss": 1.6474, "step": 4122 }, { "epoch": 0.74271560459356, "grad_norm": 1.301697850227356, "learning_rate": 6.974087934292517e-05, "loss": 1.5654, "step": 4123 }, { "epoch": 0.7428957442017564, "grad_norm": 1.2527819871902466, "learning_rate": 6.972786751546787e-05, "loss": 1.5692, "step": 4124 }, { "epoch": 0.7430758838099527, "grad_norm": 1.1882433891296387, "learning_rate": 6.971485410545062e-05, "loss": 1.41, "step": 4125 }, { "epoch": 0.7432560234181491, "grad_norm": 1.2155542373657227, "learning_rate": 6.970183911391739e-05, "loss": 1.238, "step": 4126 }, { "epoch": 0.7434361630263454, "grad_norm": 1.3560665845870972, "learning_rate": 6.968882254191223e-05, "loss": 1.6155, "step": 4127 }, { "epoch": 0.7436163026345418, "grad_norm": 1.3439843654632568, "learning_rate": 6.96758043904793e-05, "loss": 1.6426, "step": 4128 }, { "epoch": 0.7437964422427381, "grad_norm": 1.5166290998458862, "learning_rate": 6.966278466066294e-05, "loss": 1.6941, "step": 4129 }, { "epoch": 0.7439765818509345, "grad_norm": 1.2749342918395996, "learning_rate": 6.964976335350757e-05, "loss": 1.3667, "step": 4130 }, { "epoch": 0.7441567214591308, "grad_norm": 1.3304468393325806, "learning_rate": 6.963674047005774e-05, "loss": 1.5901, "step": 4131 }, { "epoch": 0.7443368610673272, "grad_norm": 1.2431001663208008, "learning_rate": 6.962371601135816e-05, "loss": 1.5014, "step": 4132 }, { "epoch": 0.7445170006755235, "grad_norm": 1.2925816774368286, "learning_rate": 6.961068997845366e-05, "loss": 1.596, "step": 4133 }, { "epoch": 0.7446971402837199, "grad_norm": 1.2649071216583252, "learning_rate": 6.959766237238913e-05, "loss": 1.5276, "step": 4134 }, { "epoch": 0.7448772798919162, "grad_norm": 1.4556392431259155, "learning_rate": 6.95846331942097e-05, "loss": 1.7455, "step": 4135 }, { "epoch": 0.7450574195001126, "grad_norm": 1.383287787437439, "learning_rate": 6.957160244496051e-05, "loss": 1.6727, "step": 4136 }, { "epoch": 0.7452375591083089, "grad_norm": 1.2267539501190186, "learning_rate": 6.955857012568692e-05, "loss": 1.3615, "step": 4137 }, { "epoch": 0.7454176987165053, "grad_norm": 1.2379580736160278, "learning_rate": 6.954553623743437e-05, "loss": 1.3082, "step": 4138 }, { "epoch": 0.7455978383247016, "grad_norm": 1.3733998537063599, "learning_rate": 6.95325007812484e-05, "loss": 1.5761, "step": 4139 }, { "epoch": 0.745777977932898, "grad_norm": 1.5266066789627075, "learning_rate": 6.951946375817474e-05, "loss": 1.8277, "step": 4140 }, { "epoch": 0.7459581175410943, "grad_norm": 1.3549997806549072, "learning_rate": 6.950642516925921e-05, "loss": 1.4367, "step": 4141 }, { "epoch": 0.7461382571492907, "grad_norm": 1.323388934135437, "learning_rate": 6.949338501554776e-05, "loss": 1.4384, "step": 4142 }, { "epoch": 0.746318396757487, "grad_norm": 1.2775802612304688, "learning_rate": 6.948034329808644e-05, "loss": 1.497, "step": 4143 }, { "epoch": 0.7464985363656834, "grad_norm": 1.405749797821045, "learning_rate": 6.946730001792148e-05, "loss": 1.5867, "step": 4144 }, { "epoch": 0.7466786759738797, "grad_norm": 1.3147780895233154, "learning_rate": 6.945425517609921e-05, "loss": 1.4402, "step": 4145 }, { "epoch": 0.7468588155820761, "grad_norm": 1.2919516563415527, "learning_rate": 6.944120877366604e-05, "loss": 1.5231, "step": 4146 }, { "epoch": 0.7470389551902724, "grad_norm": 1.4937280416488647, "learning_rate": 6.942816081166859e-05, "loss": 1.6328, "step": 4147 }, { "epoch": 0.7472190947984688, "grad_norm": 1.3586223125457764, "learning_rate": 6.941511129115352e-05, "loss": 1.457, "step": 4148 }, { "epoch": 0.7473992344066651, "grad_norm": 1.3802465200424194, "learning_rate": 6.94020602131677e-05, "loss": 1.5487, "step": 4149 }, { "epoch": 0.7475793740148615, "grad_norm": 1.2864220142364502, "learning_rate": 6.938900757875804e-05, "loss": 1.3871, "step": 4150 }, { "epoch": 0.7477595136230579, "grad_norm": 1.2333890199661255, "learning_rate": 6.937595338897167e-05, "loss": 1.9039, "step": 4151 }, { "epoch": 0.7479396532312542, "grad_norm": 1.222285270690918, "learning_rate": 6.936289764485574e-05, "loss": 1.9035, "step": 4152 }, { "epoch": 0.7481197928394506, "grad_norm": 1.3959789276123047, "learning_rate": 6.934984034745757e-05, "loss": 2.0104, "step": 4153 }, { "epoch": 0.7482999324476469, "grad_norm": 1.1939700841903687, "learning_rate": 6.933678149782465e-05, "loss": 1.8061, "step": 4154 }, { "epoch": 0.7484800720558433, "grad_norm": 1.3192189931869507, "learning_rate": 6.932372109700454e-05, "loss": 2.06, "step": 4155 }, { "epoch": 0.7486602116640396, "grad_norm": 1.5580344200134277, "learning_rate": 6.931065914604493e-05, "loss": 2.0652, "step": 4156 }, { "epoch": 0.748840351272236, "grad_norm": 1.3030911684036255, "learning_rate": 6.929759564599365e-05, "loss": 1.7373, "step": 4157 }, { "epoch": 0.7490204908804323, "grad_norm": 1.580299973487854, "learning_rate": 6.928453059789866e-05, "loss": 2.1873, "step": 4158 }, { "epoch": 0.7492006304886287, "grad_norm": 1.5764505863189697, "learning_rate": 6.927146400280801e-05, "loss": 1.7591, "step": 4159 }, { "epoch": 0.749380770096825, "grad_norm": 2.004701852798462, "learning_rate": 6.92583958617699e-05, "loss": 2.1502, "step": 4160 }, { "epoch": 0.7495609097050214, "grad_norm": 1.2407548427581787, "learning_rate": 6.924532617583266e-05, "loss": 1.5723, "step": 4161 }, { "epoch": 0.7497410493132177, "grad_norm": 1.4992525577545166, "learning_rate": 6.923225494604473e-05, "loss": 1.8719, "step": 4162 }, { "epoch": 0.7499211889214141, "grad_norm": 1.2989745140075684, "learning_rate": 6.921918217345467e-05, "loss": 1.7208, "step": 4163 }, { "epoch": 0.7501013285296104, "grad_norm": 1.203352928161621, "learning_rate": 6.920610785911117e-05, "loss": 1.4796, "step": 4164 }, { "epoch": 0.7502814681378068, "grad_norm": 1.3173727989196777, "learning_rate": 6.919303200406307e-05, "loss": 1.5884, "step": 4165 }, { "epoch": 0.7504616077460031, "grad_norm": 1.3721061944961548, "learning_rate": 6.917995460935928e-05, "loss": 1.8345, "step": 4166 }, { "epoch": 0.7506417473541995, "grad_norm": 1.2218767404556274, "learning_rate": 6.916687567604887e-05, "loss": 1.4724, "step": 4167 }, { "epoch": 0.7508218869623958, "grad_norm": 1.3902645111083984, "learning_rate": 6.915379520518103e-05, "loss": 1.7459, "step": 4168 }, { "epoch": 0.7510020265705922, "grad_norm": 1.3177253007888794, "learning_rate": 6.914071319780507e-05, "loss": 1.5862, "step": 4169 }, { "epoch": 0.7511821661787885, "grad_norm": 1.1674782037734985, "learning_rate": 6.912762965497043e-05, "loss": 1.5232, "step": 4170 }, { "epoch": 0.7513623057869849, "grad_norm": 1.3822455406188965, "learning_rate": 6.911454457772664e-05, "loss": 1.5746, "step": 4171 }, { "epoch": 0.7515424453951812, "grad_norm": 1.274601936340332, "learning_rate": 6.910145796712341e-05, "loss": 1.5619, "step": 4172 }, { "epoch": 0.7517225850033776, "grad_norm": 1.2890963554382324, "learning_rate": 6.908836982421052e-05, "loss": 1.5745, "step": 4173 }, { "epoch": 0.7519027246115739, "grad_norm": 1.3160080909729004, "learning_rate": 6.90752801500379e-05, "loss": 1.5005, "step": 4174 }, { "epoch": 0.7520828642197703, "grad_norm": 1.3277052640914917, "learning_rate": 6.90621889456556e-05, "loss": 1.4722, "step": 4175 }, { "epoch": 0.7522630038279666, "grad_norm": 1.3834278583526611, "learning_rate": 6.904909621211378e-05, "loss": 1.6741, "step": 4176 }, { "epoch": 0.752443143436163, "grad_norm": 1.2452369928359985, "learning_rate": 6.903600195046275e-05, "loss": 1.3404, "step": 4177 }, { "epoch": 0.7526232830443593, "grad_norm": 1.198986291885376, "learning_rate": 6.902290616175291e-05, "loss": 1.3517, "step": 4178 }, { "epoch": 0.7528034226525557, "grad_norm": 1.3502898216247559, "learning_rate": 6.90098088470348e-05, "loss": 1.5992, "step": 4179 }, { "epoch": 0.7529835622607521, "grad_norm": 1.3791543245315552, "learning_rate": 6.899671000735909e-05, "loss": 1.7495, "step": 4180 }, { "epoch": 0.7531637018689484, "grad_norm": 1.263975739479065, "learning_rate": 6.898360964377654e-05, "loss": 1.5498, "step": 4181 }, { "epoch": 0.7533438414771448, "grad_norm": 1.4190168380737305, "learning_rate": 6.89705077573381e-05, "loss": 1.8724, "step": 4182 }, { "epoch": 0.7535239810853411, "grad_norm": 1.3742786645889282, "learning_rate": 6.895740434909476e-05, "loss": 1.5981, "step": 4183 }, { "epoch": 0.7537041206935375, "grad_norm": 1.5329620838165283, "learning_rate": 6.894429942009767e-05, "loss": 1.745, "step": 4184 }, { "epoch": 0.7538842603017338, "grad_norm": 1.347888708114624, "learning_rate": 6.893119297139809e-05, "loss": 1.6484, "step": 4185 }, { "epoch": 0.7540643999099302, "grad_norm": 1.4926917552947998, "learning_rate": 6.891808500404747e-05, "loss": 1.7861, "step": 4186 }, { "epoch": 0.7542445395181265, "grad_norm": 1.3732911348342896, "learning_rate": 6.890497551909725e-05, "loss": 1.703, "step": 4187 }, { "epoch": 0.7544246791263229, "grad_norm": 1.3571734428405762, "learning_rate": 6.889186451759911e-05, "loss": 1.6648, "step": 4188 }, { "epoch": 0.7546048187345192, "grad_norm": 1.3788052797317505, "learning_rate": 6.887875200060483e-05, "loss": 1.3385, "step": 4189 }, { "epoch": 0.7547849583427156, "grad_norm": 1.2850556373596191, "learning_rate": 6.886563796916622e-05, "loss": 1.5291, "step": 4190 }, { "epoch": 0.7549650979509119, "grad_norm": 1.4293962717056274, "learning_rate": 6.885252242433534e-05, "loss": 1.7287, "step": 4191 }, { "epoch": 0.7551452375591083, "grad_norm": 1.4468601942062378, "learning_rate": 6.883940536716428e-05, "loss": 1.3396, "step": 4192 }, { "epoch": 0.7553253771673046, "grad_norm": 1.3453272581100464, "learning_rate": 6.882628679870532e-05, "loss": 1.549, "step": 4193 }, { "epoch": 0.755505516775501, "grad_norm": 1.3950121402740479, "learning_rate": 6.88131667200108e-05, "loss": 1.4155, "step": 4194 }, { "epoch": 0.7556856563836973, "grad_norm": 1.3736757040023804, "learning_rate": 6.88000451321332e-05, "loss": 1.6363, "step": 4195 }, { "epoch": 0.7558657959918937, "grad_norm": 1.5825588703155518, "learning_rate": 6.878692203612513e-05, "loss": 1.6647, "step": 4196 }, { "epoch": 0.75604593560009, "grad_norm": 1.3325579166412354, "learning_rate": 6.877379743303933e-05, "loss": 1.4773, "step": 4197 }, { "epoch": 0.7562260752082864, "grad_norm": 1.3661730289459229, "learning_rate": 6.876067132392866e-05, "loss": 1.4619, "step": 4198 }, { "epoch": 0.7564062148164827, "grad_norm": 1.414772391319275, "learning_rate": 6.874754370984606e-05, "loss": 1.5437, "step": 4199 }, { "epoch": 0.7565863544246791, "grad_norm": 1.3531521558761597, "learning_rate": 6.873441459184464e-05, "loss": 1.3423, "step": 4200 }, { "epoch": 0.7567664940328754, "grad_norm": 1.2674922943115234, "learning_rate": 6.87212839709776e-05, "loss": 2.0647, "step": 4201 }, { "epoch": 0.7569466336410718, "grad_norm": 1.2441178560256958, "learning_rate": 6.870815184829828e-05, "loss": 1.7841, "step": 4202 }, { "epoch": 0.7571267732492681, "grad_norm": 1.375637412071228, "learning_rate": 6.869501822486014e-05, "loss": 1.6896, "step": 4203 }, { "epoch": 0.7573069128574645, "grad_norm": 1.2367630004882812, "learning_rate": 6.868188310171673e-05, "loss": 2.0089, "step": 4204 }, { "epoch": 0.7574870524656608, "grad_norm": 1.1467173099517822, "learning_rate": 6.866874647992177e-05, "loss": 1.491, "step": 4205 }, { "epoch": 0.7576671920738572, "grad_norm": 1.4076244831085205, "learning_rate": 6.865560836052905e-05, "loss": 2.4381, "step": 4206 }, { "epoch": 0.7578473316820535, "grad_norm": 1.4978952407836914, "learning_rate": 6.864246874459252e-05, "loss": 2.082, "step": 4207 }, { "epoch": 0.7580274712902499, "grad_norm": 1.4958641529083252, "learning_rate": 6.862932763316623e-05, "loss": 2.0228, "step": 4208 }, { "epoch": 0.7582076108984463, "grad_norm": 1.7537269592285156, "learning_rate": 6.861618502730435e-05, "loss": 2.2037, "step": 4209 }, { "epoch": 0.7583877505066426, "grad_norm": 1.6714410781860352, "learning_rate": 6.860304092806119e-05, "loss": 1.8447, "step": 4210 }, { "epoch": 0.758567890114839, "grad_norm": 1.3466695547103882, "learning_rate": 6.858989533649112e-05, "loss": 1.6541, "step": 4211 }, { "epoch": 0.7587480297230353, "grad_norm": 1.2620398998260498, "learning_rate": 6.857674825364871e-05, "loss": 1.6686, "step": 4212 }, { "epoch": 0.7589281693312318, "grad_norm": 1.2540416717529297, "learning_rate": 6.856359968058861e-05, "loss": 1.6794, "step": 4213 }, { "epoch": 0.759108308939428, "grad_norm": 1.3351025581359863, "learning_rate": 6.855044961836557e-05, "loss": 1.543, "step": 4214 }, { "epoch": 0.7592884485476245, "grad_norm": 1.3714812994003296, "learning_rate": 6.85372980680345e-05, "loss": 1.5582, "step": 4215 }, { "epoch": 0.7594685881558207, "grad_norm": 1.3614323139190674, "learning_rate": 6.852414503065044e-05, "loss": 1.5234, "step": 4216 }, { "epoch": 0.7596487277640172, "grad_norm": 1.2694581747055054, "learning_rate": 6.851099050726847e-05, "loss": 1.4151, "step": 4217 }, { "epoch": 0.7598288673722134, "grad_norm": 1.311307430267334, "learning_rate": 6.849783449894384e-05, "loss": 1.5952, "step": 4218 }, { "epoch": 0.7600090069804099, "grad_norm": 1.3133302927017212, "learning_rate": 6.848467700673196e-05, "loss": 1.5335, "step": 4219 }, { "epoch": 0.7601891465886061, "grad_norm": 1.120577335357666, "learning_rate": 6.847151803168828e-05, "loss": 1.2536, "step": 4220 }, { "epoch": 0.7603692861968026, "grad_norm": 1.4306323528289795, "learning_rate": 6.845835757486841e-05, "loss": 1.7583, "step": 4221 }, { "epoch": 0.7605494258049988, "grad_norm": 1.3486709594726562, "learning_rate": 6.844519563732811e-05, "loss": 1.7007, "step": 4222 }, { "epoch": 0.7607295654131953, "grad_norm": 1.2146520614624023, "learning_rate": 6.84320322201232e-05, "loss": 1.7896, "step": 4223 }, { "epoch": 0.7609097050213915, "grad_norm": 1.3226094245910645, "learning_rate": 6.841886732430963e-05, "loss": 1.6607, "step": 4224 }, { "epoch": 0.761089844629588, "grad_norm": 1.2693378925323486, "learning_rate": 6.84057009509435e-05, "loss": 1.5153, "step": 4225 }, { "epoch": 0.7612699842377842, "grad_norm": 1.232830286026001, "learning_rate": 6.8392533101081e-05, "loss": 1.6261, "step": 4226 }, { "epoch": 0.7614501238459807, "grad_norm": 1.4661186933517456, "learning_rate": 6.837936377577847e-05, "loss": 1.8412, "step": 4227 }, { "epoch": 0.761630263454177, "grad_norm": 1.3075085878372192, "learning_rate": 6.83661929760923e-05, "loss": 1.5111, "step": 4228 }, { "epoch": 0.7618104030623734, "grad_norm": 1.337407112121582, "learning_rate": 6.835302070307909e-05, "loss": 1.7119, "step": 4229 }, { "epoch": 0.7619905426705696, "grad_norm": 1.382531762123108, "learning_rate": 6.833984695779551e-05, "loss": 1.6738, "step": 4230 }, { "epoch": 0.762170682278766, "grad_norm": 1.2840282917022705, "learning_rate": 6.832667174129834e-05, "loss": 1.6193, "step": 4231 }, { "epoch": 0.7623508218869623, "grad_norm": 1.369227647781372, "learning_rate": 6.831349505464448e-05, "loss": 1.626, "step": 4232 }, { "epoch": 0.7625309614951588, "grad_norm": 1.3168357610702515, "learning_rate": 6.830031689889097e-05, "loss": 1.6952, "step": 4233 }, { "epoch": 0.762711101103355, "grad_norm": 1.2343453168869019, "learning_rate": 6.828713727509494e-05, "loss": 1.504, "step": 4234 }, { "epoch": 0.7628912407115515, "grad_norm": 1.294103980064392, "learning_rate": 6.827395618431367e-05, "loss": 1.5967, "step": 4235 }, { "epoch": 0.7630713803197477, "grad_norm": 1.285144329071045, "learning_rate": 6.826077362760454e-05, "loss": 1.5652, "step": 4236 }, { "epoch": 0.7632515199279442, "grad_norm": 1.2821578979492188, "learning_rate": 6.824758960602504e-05, "loss": 1.3802, "step": 4237 }, { "epoch": 0.7634316595361406, "grad_norm": 1.5116546154022217, "learning_rate": 6.823440412063282e-05, "loss": 1.8018, "step": 4238 }, { "epoch": 0.7636117991443369, "grad_norm": 1.4021544456481934, "learning_rate": 6.822121717248554e-05, "loss": 1.6717, "step": 4239 }, { "epoch": 0.7637919387525333, "grad_norm": 1.3740514516830444, "learning_rate": 6.820802876264112e-05, "loss": 1.4027, "step": 4240 }, { "epoch": 0.7639720783607296, "grad_norm": 1.4727224111557007, "learning_rate": 6.819483889215749e-05, "loss": 1.468, "step": 4241 }, { "epoch": 0.764152217968926, "grad_norm": 1.3883235454559326, "learning_rate": 6.818164756209274e-05, "loss": 1.3633, "step": 4242 }, { "epoch": 0.7643323575771223, "grad_norm": 1.2938035726547241, "learning_rate": 6.81684547735051e-05, "loss": 1.4315, "step": 4243 }, { "epoch": 0.7645124971853187, "grad_norm": 1.5009466409683228, "learning_rate": 6.815526052745284e-05, "loss": 1.5758, "step": 4244 }, { "epoch": 0.764692636793515, "grad_norm": 1.509669303894043, "learning_rate": 6.814206482499445e-05, "loss": 1.6465, "step": 4245 }, { "epoch": 0.7648727764017114, "grad_norm": 1.4357390403747559, "learning_rate": 6.812886766718845e-05, "loss": 1.4487, "step": 4246 }, { "epoch": 0.7650529160099077, "grad_norm": 1.476682186126709, "learning_rate": 6.811566905509351e-05, "loss": 1.332, "step": 4247 }, { "epoch": 0.7652330556181041, "grad_norm": 1.4407345056533813, "learning_rate": 6.810246898976841e-05, "loss": 1.463, "step": 4248 }, { "epoch": 0.7654131952263004, "grad_norm": 1.3901293277740479, "learning_rate": 6.808926747227209e-05, "loss": 1.3918, "step": 4249 }, { "epoch": 0.7655933348344968, "grad_norm": 1.2813270092010498, "learning_rate": 6.807606450366352e-05, "loss": 1.2823, "step": 4250 }, { "epoch": 0.7657734744426931, "grad_norm": 1.3141233921051025, "learning_rate": 6.806286008500189e-05, "loss": 2.0169, "step": 4251 }, { "epoch": 0.7659536140508895, "grad_norm": 1.40625, "learning_rate": 6.80496542173464e-05, "loss": 2.0799, "step": 4252 }, { "epoch": 0.7661337536590858, "grad_norm": 1.2948371171951294, "learning_rate": 6.803644690175644e-05, "loss": 2.0991, "step": 4253 }, { "epoch": 0.7663138932672822, "grad_norm": 1.2399791479110718, "learning_rate": 6.802323813929151e-05, "loss": 1.9906, "step": 4254 }, { "epoch": 0.7664940328754785, "grad_norm": 1.303386926651001, "learning_rate": 6.801002793101119e-05, "loss": 2.0277, "step": 4255 }, { "epoch": 0.7666741724836749, "grad_norm": 1.3103928565979004, "learning_rate": 6.799681627797519e-05, "loss": 1.938, "step": 4256 }, { "epoch": 0.7668543120918712, "grad_norm": 1.369431734085083, "learning_rate": 6.798360318124336e-05, "loss": 1.9729, "step": 4257 }, { "epoch": 0.7670344517000676, "grad_norm": 1.257573127746582, "learning_rate": 6.797038864187564e-05, "loss": 1.7535, "step": 4258 }, { "epoch": 0.7672145913082639, "grad_norm": 1.5795159339904785, "learning_rate": 6.795717266093211e-05, "loss": 1.887, "step": 4259 }, { "epoch": 0.7673947309164603, "grad_norm": 1.7241616249084473, "learning_rate": 6.794395523947293e-05, "loss": 2.0207, "step": 4260 }, { "epoch": 0.7675748705246566, "grad_norm": 1.4644179344177246, "learning_rate": 6.79307363785584e-05, "loss": 1.8781, "step": 4261 }, { "epoch": 0.767755010132853, "grad_norm": 1.1703157424926758, "learning_rate": 6.791751607924892e-05, "loss": 1.5761, "step": 4262 }, { "epoch": 0.7679351497410493, "grad_norm": 1.4546563625335693, "learning_rate": 6.790429434260504e-05, "loss": 1.8715, "step": 4263 }, { "epoch": 0.7681152893492457, "grad_norm": 1.3338547945022583, "learning_rate": 6.789107116968738e-05, "loss": 1.624, "step": 4264 }, { "epoch": 0.7682954289574421, "grad_norm": 1.3146785497665405, "learning_rate": 6.787784656155671e-05, "loss": 1.5431, "step": 4265 }, { "epoch": 0.7684755685656384, "grad_norm": 1.4626420736312866, "learning_rate": 6.78646205192739e-05, "loss": 1.5782, "step": 4266 }, { "epoch": 0.7686557081738348, "grad_norm": 1.3771897554397583, "learning_rate": 6.785139304389993e-05, "loss": 1.4225, "step": 4267 }, { "epoch": 0.7688358477820311, "grad_norm": 1.2438147068023682, "learning_rate": 6.783816413649591e-05, "loss": 1.3842, "step": 4268 }, { "epoch": 0.7690159873902275, "grad_norm": 1.3158282041549683, "learning_rate": 6.782493379812302e-05, "loss": 1.5977, "step": 4269 }, { "epoch": 0.7691961269984238, "grad_norm": 1.3996762037277222, "learning_rate": 6.781170202984267e-05, "loss": 1.5792, "step": 4270 }, { "epoch": 0.7693762666066202, "grad_norm": 1.3123843669891357, "learning_rate": 6.779846883271624e-05, "loss": 1.4833, "step": 4271 }, { "epoch": 0.7695564062148165, "grad_norm": 1.3152434825897217, "learning_rate": 6.77852342078053e-05, "loss": 1.5138, "step": 4272 }, { "epoch": 0.7697365458230129, "grad_norm": 1.3313504457473755, "learning_rate": 6.777199815617154e-05, "loss": 1.5747, "step": 4273 }, { "epoch": 0.7699166854312092, "grad_norm": 1.3116765022277832, "learning_rate": 6.775876067887674e-05, "loss": 1.4184, "step": 4274 }, { "epoch": 0.7700968250394056, "grad_norm": 1.412165880203247, "learning_rate": 6.774552177698282e-05, "loss": 1.6018, "step": 4275 }, { "epoch": 0.7702769646476019, "grad_norm": 1.22885000705719, "learning_rate": 6.77322814515518e-05, "loss": 1.4306, "step": 4276 }, { "epoch": 0.7704571042557983, "grad_norm": 1.2525964975357056, "learning_rate": 6.771903970364577e-05, "loss": 1.5575, "step": 4277 }, { "epoch": 0.7706372438639946, "grad_norm": 1.3322268724441528, "learning_rate": 6.770579653432701e-05, "loss": 1.4084, "step": 4278 }, { "epoch": 0.770817383472191, "grad_norm": 1.3104209899902344, "learning_rate": 6.76925519446579e-05, "loss": 1.6766, "step": 4279 }, { "epoch": 0.7709975230803873, "grad_norm": 1.3410226106643677, "learning_rate": 6.767930593570086e-05, "loss": 1.3541, "step": 4280 }, { "epoch": 0.7711776626885837, "grad_norm": 1.3326008319854736, "learning_rate": 6.766605850851853e-05, "loss": 1.6574, "step": 4281 }, { "epoch": 0.77135780229678, "grad_norm": 1.4369840621948242, "learning_rate": 6.765280966417359e-05, "loss": 2.0069, "step": 4282 }, { "epoch": 0.7715379419049764, "grad_norm": 1.2933682203292847, "learning_rate": 6.763955940372883e-05, "loss": 1.5017, "step": 4283 }, { "epoch": 0.7717180815131727, "grad_norm": 1.5855871438980103, "learning_rate": 6.762630772824724e-05, "loss": 1.9075, "step": 4284 }, { "epoch": 0.7718982211213691, "grad_norm": 1.2598789930343628, "learning_rate": 6.761305463879181e-05, "loss": 1.395, "step": 4285 }, { "epoch": 0.7720783607295654, "grad_norm": 1.3627471923828125, "learning_rate": 6.759980013642573e-05, "loss": 1.5435, "step": 4286 }, { "epoch": 0.7722585003377618, "grad_norm": 1.2259958982467651, "learning_rate": 6.758654422221224e-05, "loss": 1.3809, "step": 4287 }, { "epoch": 0.7724386399459581, "grad_norm": 1.3748903274536133, "learning_rate": 6.757328689721476e-05, "loss": 1.3938, "step": 4288 }, { "epoch": 0.7726187795541545, "grad_norm": 1.361642599105835, "learning_rate": 6.756002816249676e-05, "loss": 1.5747, "step": 4289 }, { "epoch": 0.7727989191623508, "grad_norm": 1.4628759622573853, "learning_rate": 6.754676801912183e-05, "loss": 1.7951, "step": 4290 }, { "epoch": 0.7729790587705472, "grad_norm": 1.324305772781372, "learning_rate": 6.753350646815373e-05, "loss": 1.5718, "step": 4291 }, { "epoch": 0.7731591983787435, "grad_norm": 1.3084319829940796, "learning_rate": 6.752024351065629e-05, "loss": 1.5159, "step": 4292 }, { "epoch": 0.7733393379869399, "grad_norm": 1.4280668497085571, "learning_rate": 6.750697914769345e-05, "loss": 1.433, "step": 4293 }, { "epoch": 0.7735194775951363, "grad_norm": 1.4539705514907837, "learning_rate": 6.749371338032925e-05, "loss": 1.7837, "step": 4294 }, { "epoch": 0.7736996172033326, "grad_norm": 1.4157899618148804, "learning_rate": 6.74804462096279e-05, "loss": 1.4403, "step": 4295 }, { "epoch": 0.773879756811529, "grad_norm": 1.3355616331100464, "learning_rate": 6.746717763665366e-05, "loss": 1.4053, "step": 4296 }, { "epoch": 0.7740598964197253, "grad_norm": 1.419180989265442, "learning_rate": 6.745390766247096e-05, "loss": 1.4461, "step": 4297 }, { "epoch": 0.7742400360279217, "grad_norm": 1.342730164527893, "learning_rate": 6.744063628814428e-05, "loss": 1.55, "step": 4298 }, { "epoch": 0.774420175636118, "grad_norm": 1.462121605873108, "learning_rate": 6.742736351473823e-05, "loss": 1.4211, "step": 4299 }, { "epoch": 0.7746003152443144, "grad_norm": 1.4189155101776123, "learning_rate": 6.741408934331759e-05, "loss": 1.3856, "step": 4300 }, { "epoch": 0.7747804548525107, "grad_norm": 1.4132473468780518, "learning_rate": 6.740081377494715e-05, "loss": 2.1445, "step": 4301 }, { "epoch": 0.7749605944607071, "grad_norm": 1.2943170070648193, "learning_rate": 6.738753681069194e-05, "loss": 2.0852, "step": 4302 }, { "epoch": 0.7751407340689034, "grad_norm": 1.2062082290649414, "learning_rate": 6.737425845161701e-05, "loss": 1.9833, "step": 4303 }, { "epoch": 0.7753208736770998, "grad_norm": 1.3156678676605225, "learning_rate": 6.73609786987875e-05, "loss": 1.851, "step": 4304 }, { "epoch": 0.7755010132852961, "grad_norm": 1.290466070175171, "learning_rate": 6.734769755326875e-05, "loss": 1.7542, "step": 4305 }, { "epoch": 0.7756811528934925, "grad_norm": 1.2796297073364258, "learning_rate": 6.733441501612612e-05, "loss": 1.8778, "step": 4306 }, { "epoch": 0.7758612925016888, "grad_norm": 1.3409113883972168, "learning_rate": 6.73211310884252e-05, "loss": 1.6384, "step": 4307 }, { "epoch": 0.7760414321098852, "grad_norm": 1.4278721809387207, "learning_rate": 6.730784577123157e-05, "loss": 1.974, "step": 4308 }, { "epoch": 0.7762215717180815, "grad_norm": 1.5057724714279175, "learning_rate": 6.729455906561098e-05, "loss": 2.1269, "step": 4309 }, { "epoch": 0.7764017113262779, "grad_norm": 1.9017735719680786, "learning_rate": 6.728127097262929e-05, "loss": 2.1665, "step": 4310 }, { "epoch": 0.7765818509344742, "grad_norm": 1.3164198398590088, "learning_rate": 6.726798149335244e-05, "loss": 1.5831, "step": 4311 }, { "epoch": 0.7767619905426706, "grad_norm": 1.2864744663238525, "learning_rate": 6.725469062884655e-05, "loss": 1.6175, "step": 4312 }, { "epoch": 0.7769421301508669, "grad_norm": 1.3113449811935425, "learning_rate": 6.724139838017778e-05, "loss": 1.5805, "step": 4313 }, { "epoch": 0.7771222697590633, "grad_norm": 1.3403681516647339, "learning_rate": 6.722810474841242e-05, "loss": 1.6504, "step": 4314 }, { "epoch": 0.7773024093672596, "grad_norm": 1.253453254699707, "learning_rate": 6.721480973461688e-05, "loss": 1.5868, "step": 4315 }, { "epoch": 0.777482548975456, "grad_norm": 1.21878182888031, "learning_rate": 6.72015133398577e-05, "loss": 1.4491, "step": 4316 }, { "epoch": 0.7776626885836523, "grad_norm": 1.2276949882507324, "learning_rate": 6.718821556520151e-05, "loss": 1.4078, "step": 4317 }, { "epoch": 0.7778428281918487, "grad_norm": 1.2296260595321655, "learning_rate": 6.7174916411715e-05, "loss": 1.5385, "step": 4318 }, { "epoch": 0.778022967800045, "grad_norm": 1.1654185056686401, "learning_rate": 6.71616158804651e-05, "loss": 1.3349, "step": 4319 }, { "epoch": 0.7782031074082414, "grad_norm": 1.3187497854232788, "learning_rate": 6.714831397251873e-05, "loss": 1.4194, "step": 4320 }, { "epoch": 0.7783832470164377, "grad_norm": 1.3843474388122559, "learning_rate": 6.713501068894296e-05, "loss": 1.6823, "step": 4321 }, { "epoch": 0.7785633866246341, "grad_norm": 1.1842639446258545, "learning_rate": 6.712170603080497e-05, "loss": 1.4052, "step": 4322 }, { "epoch": 0.7787435262328305, "grad_norm": 1.280438780784607, "learning_rate": 6.710839999917207e-05, "loss": 1.4829, "step": 4323 }, { "epoch": 0.7789236658410268, "grad_norm": 1.2648844718933105, "learning_rate": 6.709509259511165e-05, "loss": 1.653, "step": 4324 }, { "epoch": 0.7791038054492232, "grad_norm": 1.4239405393600464, "learning_rate": 6.708178381969123e-05, "loss": 1.5412, "step": 4325 }, { "epoch": 0.7792839450574195, "grad_norm": 1.4355626106262207, "learning_rate": 6.706847367397843e-05, "loss": 1.5768, "step": 4326 }, { "epoch": 0.7794640846656159, "grad_norm": 1.4034491777420044, "learning_rate": 6.7055162159041e-05, "loss": 1.5767, "step": 4327 }, { "epoch": 0.7796442242738122, "grad_norm": 1.4894843101501465, "learning_rate": 6.704184927594675e-05, "loss": 1.6383, "step": 4328 }, { "epoch": 0.7798243638820086, "grad_norm": 1.4030094146728516, "learning_rate": 6.702853502576367e-05, "loss": 1.7633, "step": 4329 }, { "epoch": 0.7800045034902049, "grad_norm": 1.428145408630371, "learning_rate": 6.70152194095598e-05, "loss": 1.3597, "step": 4330 }, { "epoch": 0.7801846430984013, "grad_norm": 1.4601353406906128, "learning_rate": 6.70019024284033e-05, "loss": 1.8679, "step": 4331 }, { "epoch": 0.7803647827065976, "grad_norm": 1.2155267000198364, "learning_rate": 6.698858408336248e-05, "loss": 1.3697, "step": 4332 }, { "epoch": 0.780544922314794, "grad_norm": 1.322020173072815, "learning_rate": 6.697526437550571e-05, "loss": 1.641, "step": 4333 }, { "epoch": 0.7807250619229903, "grad_norm": 1.2564399242401123, "learning_rate": 6.696194330590151e-05, "loss": 1.4124, "step": 4334 }, { "epoch": 0.7809052015311867, "grad_norm": 1.3006970882415771, "learning_rate": 6.694862087561849e-05, "loss": 1.4655, "step": 4335 }, { "epoch": 0.781085341139383, "grad_norm": 1.2777646780014038, "learning_rate": 6.693529708572535e-05, "loss": 1.4395, "step": 4336 }, { "epoch": 0.7812654807475794, "grad_norm": 1.3497413396835327, "learning_rate": 6.692197193729092e-05, "loss": 1.5953, "step": 4337 }, { "epoch": 0.7814456203557757, "grad_norm": 1.4025439023971558, "learning_rate": 6.690864543138415e-05, "loss": 1.4995, "step": 4338 }, { "epoch": 0.7816257599639721, "grad_norm": 1.3810391426086426, "learning_rate": 6.689531756907408e-05, "loss": 1.7339, "step": 4339 }, { "epoch": 0.7818058995721684, "grad_norm": 1.2977656126022339, "learning_rate": 6.688198835142986e-05, "loss": 1.4848, "step": 4340 }, { "epoch": 0.7819860391803648, "grad_norm": 1.365507960319519, "learning_rate": 6.686865777952076e-05, "loss": 1.6372, "step": 4341 }, { "epoch": 0.7821661787885611, "grad_norm": 1.2283918857574463, "learning_rate": 6.685532585441615e-05, "loss": 1.4993, "step": 4342 }, { "epoch": 0.7823463183967575, "grad_norm": 1.3399330377578735, "learning_rate": 6.68419925771855e-05, "loss": 1.45, "step": 4343 }, { "epoch": 0.7825264580049538, "grad_norm": 1.4427807331085205, "learning_rate": 6.682865794889843e-05, "loss": 1.5582, "step": 4344 }, { "epoch": 0.7827065976131502, "grad_norm": 1.5503398180007935, "learning_rate": 6.68153219706246e-05, "loss": 1.6297, "step": 4345 }, { "epoch": 0.7828867372213465, "grad_norm": 1.4203894138336182, "learning_rate": 6.680198464343383e-05, "loss": 1.624, "step": 4346 }, { "epoch": 0.7830668768295429, "grad_norm": 1.4910606145858765, "learning_rate": 6.678864596839605e-05, "loss": 1.4961, "step": 4347 }, { "epoch": 0.7832470164377392, "grad_norm": 1.3974127769470215, "learning_rate": 6.677530594658126e-05, "loss": 1.4824, "step": 4348 }, { "epoch": 0.7834271560459356, "grad_norm": 1.389898419380188, "learning_rate": 6.67619645790596e-05, "loss": 1.3888, "step": 4349 }, { "epoch": 0.7836072956541319, "grad_norm": 1.4136215448379517, "learning_rate": 6.67486218669013e-05, "loss": 1.5896, "step": 4350 }, { "epoch": 0.7837874352623283, "grad_norm": 1.2181212902069092, "learning_rate": 6.673527781117672e-05, "loss": 2.1319, "step": 4351 }, { "epoch": 0.7839675748705247, "grad_norm": 1.2846685647964478, "learning_rate": 6.672193241295631e-05, "loss": 2.0701, "step": 4352 }, { "epoch": 0.784147714478721, "grad_norm": 1.307144045829773, "learning_rate": 6.67085856733106e-05, "loss": 2.1257, "step": 4353 }, { "epoch": 0.7843278540869174, "grad_norm": 1.1837550401687622, "learning_rate": 6.669523759331031e-05, "loss": 1.8665, "step": 4354 }, { "epoch": 0.7845079936951137, "grad_norm": 1.3024024963378906, "learning_rate": 6.668188817402617e-05, "loss": 2.0207, "step": 4355 }, { "epoch": 0.7846881333033101, "grad_norm": 1.3469128608703613, "learning_rate": 6.66685374165291e-05, "loss": 1.7316, "step": 4356 }, { "epoch": 0.7848682729115064, "grad_norm": 1.4680286645889282, "learning_rate": 6.66551853218901e-05, "loss": 1.9258, "step": 4357 }, { "epoch": 0.7850484125197028, "grad_norm": 1.6727474927902222, "learning_rate": 6.664183189118022e-05, "loss": 2.0131, "step": 4358 }, { "epoch": 0.7852285521278991, "grad_norm": 1.9772467613220215, "learning_rate": 6.66284771254707e-05, "loss": 2.3818, "step": 4359 }, { "epoch": 0.7854086917360955, "grad_norm": 1.3002800941467285, "learning_rate": 6.661512102583283e-05, "loss": 1.6019, "step": 4360 }, { "epoch": 0.7855888313442918, "grad_norm": 1.2263084650039673, "learning_rate": 6.660176359333806e-05, "loss": 1.701, "step": 4361 }, { "epoch": 0.7857689709524882, "grad_norm": 1.3445242643356323, "learning_rate": 6.658840482905791e-05, "loss": 1.4601, "step": 4362 }, { "epoch": 0.7859491105606845, "grad_norm": 1.2192333936691284, "learning_rate": 6.657504473406398e-05, "loss": 1.5035, "step": 4363 }, { "epoch": 0.7861292501688809, "grad_norm": 1.333082914352417, "learning_rate": 6.656168330942804e-05, "loss": 1.6493, "step": 4364 }, { "epoch": 0.7863093897770772, "grad_norm": 1.2969721555709839, "learning_rate": 6.654832055622195e-05, "loss": 1.4837, "step": 4365 }, { "epoch": 0.7864895293852736, "grad_norm": 1.3417179584503174, "learning_rate": 6.653495647551763e-05, "loss": 1.4244, "step": 4366 }, { "epoch": 0.7866696689934699, "grad_norm": 1.2683868408203125, "learning_rate": 6.652159106838714e-05, "loss": 1.5175, "step": 4367 }, { "epoch": 0.7868498086016663, "grad_norm": 1.3362749814987183, "learning_rate": 6.65082243359027e-05, "loss": 1.6481, "step": 4368 }, { "epoch": 0.7870299482098626, "grad_norm": 1.3211076259613037, "learning_rate": 6.649485627913652e-05, "loss": 1.4736, "step": 4369 }, { "epoch": 0.787210087818059, "grad_norm": 1.3011056184768677, "learning_rate": 6.648148689916101e-05, "loss": 1.4945, "step": 4370 }, { "epoch": 0.7873902274262553, "grad_norm": 1.336059808731079, "learning_rate": 6.646811619704863e-05, "loss": 1.4135, "step": 4371 }, { "epoch": 0.7875703670344517, "grad_norm": 1.3346209526062012, "learning_rate": 6.645474417387202e-05, "loss": 1.4704, "step": 4372 }, { "epoch": 0.787750506642648, "grad_norm": 1.3304309844970703, "learning_rate": 6.644137083070383e-05, "loss": 1.6287, "step": 4373 }, { "epoch": 0.7879306462508444, "grad_norm": 1.3033826351165771, "learning_rate": 6.64279961686169e-05, "loss": 1.6459, "step": 4374 }, { "epoch": 0.7881107858590407, "grad_norm": 1.3721983432769775, "learning_rate": 6.641462018868409e-05, "loss": 1.6047, "step": 4375 }, { "epoch": 0.7882909254672371, "grad_norm": 1.2553279399871826, "learning_rate": 6.640124289197845e-05, "loss": 1.3705, "step": 4376 }, { "epoch": 0.7884710650754334, "grad_norm": 1.3671807050704956, "learning_rate": 6.638786427957311e-05, "loss": 1.4814, "step": 4377 }, { "epoch": 0.7886512046836298, "grad_norm": 1.3198288679122925, "learning_rate": 6.637448435254128e-05, "loss": 1.5643, "step": 4378 }, { "epoch": 0.7888313442918262, "grad_norm": 1.3639233112335205, "learning_rate": 6.636110311195628e-05, "loss": 1.5882, "step": 4379 }, { "epoch": 0.7890114839000225, "grad_norm": 1.2690165042877197, "learning_rate": 6.634772055889156e-05, "loss": 1.5313, "step": 4380 }, { "epoch": 0.7891916235082189, "grad_norm": 1.244288682937622, "learning_rate": 6.633433669442065e-05, "loss": 1.4042, "step": 4381 }, { "epoch": 0.7893717631164152, "grad_norm": 1.3455307483673096, "learning_rate": 6.632095151961722e-05, "loss": 1.7174, "step": 4382 }, { "epoch": 0.7895519027246116, "grad_norm": 1.3777587413787842, "learning_rate": 6.6307565035555e-05, "loss": 1.6765, "step": 4383 }, { "epoch": 0.7897320423328079, "grad_norm": 1.2678066492080688, "learning_rate": 6.629417724330788e-05, "loss": 1.6305, "step": 4384 }, { "epoch": 0.7899121819410043, "grad_norm": 1.4724981784820557, "learning_rate": 6.628078814394976e-05, "loss": 1.6793, "step": 4385 }, { "epoch": 0.7900923215492006, "grad_norm": 1.2241454124450684, "learning_rate": 6.626739773855477e-05, "loss": 1.5104, "step": 4386 }, { "epoch": 0.790272461157397, "grad_norm": 1.3004575967788696, "learning_rate": 6.625400602819705e-05, "loss": 1.5002, "step": 4387 }, { "epoch": 0.7904526007655933, "grad_norm": 1.4709972143173218, "learning_rate": 6.624061301395088e-05, "loss": 1.935, "step": 4388 }, { "epoch": 0.7906327403737897, "grad_norm": 1.4223140478134155, "learning_rate": 6.622721869689065e-05, "loss": 1.5612, "step": 4389 }, { "epoch": 0.790812879981986, "grad_norm": 1.456362247467041, "learning_rate": 6.621382307809084e-05, "loss": 1.7898, "step": 4390 }, { "epoch": 0.7909930195901824, "grad_norm": 1.4476219415664673, "learning_rate": 6.620042615862604e-05, "loss": 1.7348, "step": 4391 }, { "epoch": 0.7911731591983787, "grad_norm": 1.3089605569839478, "learning_rate": 6.618702793957094e-05, "loss": 1.4495, "step": 4392 }, { "epoch": 0.7913532988065751, "grad_norm": 1.4579360485076904, "learning_rate": 6.617362842200034e-05, "loss": 1.7474, "step": 4393 }, { "epoch": 0.7915334384147714, "grad_norm": 1.3035341501235962, "learning_rate": 6.616022760698917e-05, "loss": 1.4203, "step": 4394 }, { "epoch": 0.7917135780229678, "grad_norm": 1.3709262609481812, "learning_rate": 6.614682549561239e-05, "loss": 1.5029, "step": 4395 }, { "epoch": 0.7918937176311641, "grad_norm": 1.169980525970459, "learning_rate": 6.613342208894515e-05, "loss": 1.197, "step": 4396 }, { "epoch": 0.7920738572393605, "grad_norm": 1.4016973972320557, "learning_rate": 6.612001738806261e-05, "loss": 1.5222, "step": 4397 }, { "epoch": 0.7922539968475568, "grad_norm": 1.3692858219146729, "learning_rate": 6.610661139404018e-05, "loss": 1.5008, "step": 4398 }, { "epoch": 0.7924341364557532, "grad_norm": 1.5063860416412354, "learning_rate": 6.609320410795319e-05, "loss": 1.5498, "step": 4399 }, { "epoch": 0.7926142760639495, "grad_norm": 1.3322750329971313, "learning_rate": 6.607979553087722e-05, "loss": 1.1885, "step": 4400 }, { "epoch": 0.7927944156721459, "grad_norm": 1.312860369682312, "learning_rate": 6.606638566388787e-05, "loss": 1.9776, "step": 4401 }, { "epoch": 0.7929745552803422, "grad_norm": 1.1298643350601196, "learning_rate": 6.605297450806091e-05, "loss": 1.5361, "step": 4402 }, { "epoch": 0.7931546948885386, "grad_norm": 1.2165077924728394, "learning_rate": 6.603956206447215e-05, "loss": 1.8651, "step": 4403 }, { "epoch": 0.7933348344967349, "grad_norm": 1.3561855554580688, "learning_rate": 6.602614833419753e-05, "loss": 2.1797, "step": 4404 }, { "epoch": 0.7935149741049313, "grad_norm": 1.2631462812423706, "learning_rate": 6.601273331831309e-05, "loss": 1.7613, "step": 4405 }, { "epoch": 0.7936951137131276, "grad_norm": 1.247037410736084, "learning_rate": 6.599931701789498e-05, "loss": 1.8524, "step": 4406 }, { "epoch": 0.793875253321324, "grad_norm": 1.4577915668487549, "learning_rate": 6.598589943401948e-05, "loss": 2.0792, "step": 4407 }, { "epoch": 0.7940553929295204, "grad_norm": 1.3760621547698975, "learning_rate": 6.597248056776288e-05, "loss": 1.9623, "step": 4408 }, { "epoch": 0.7942355325377167, "grad_norm": 1.5641895532608032, "learning_rate": 6.595906042020169e-05, "loss": 2.1334, "step": 4409 }, { "epoch": 0.7944156721459131, "grad_norm": 1.5525637865066528, "learning_rate": 6.594563899241244e-05, "loss": 2.1652, "step": 4410 }, { "epoch": 0.7945958117541094, "grad_norm": 1.8955398797988892, "learning_rate": 6.59322162854718e-05, "loss": 2.1922, "step": 4411 }, { "epoch": 0.7947759513623058, "grad_norm": 1.248736023902893, "learning_rate": 6.591879230045653e-05, "loss": 1.513, "step": 4412 }, { "epoch": 0.7949560909705021, "grad_norm": 1.3473514318466187, "learning_rate": 6.590536703844348e-05, "loss": 1.6981, "step": 4413 }, { "epoch": 0.7951362305786985, "grad_norm": 1.3260043859481812, "learning_rate": 6.589194050050964e-05, "loss": 1.5863, "step": 4414 }, { "epoch": 0.7953163701868948, "grad_norm": 1.4116684198379517, "learning_rate": 6.587851268773208e-05, "loss": 1.5412, "step": 4415 }, { "epoch": 0.7954965097950912, "grad_norm": 1.2888160943984985, "learning_rate": 6.586508360118798e-05, "loss": 1.5163, "step": 4416 }, { "epoch": 0.7956766494032875, "grad_norm": 1.4023644924163818, "learning_rate": 6.585165324195458e-05, "loss": 1.7653, "step": 4417 }, { "epoch": 0.7958567890114839, "grad_norm": 1.179885983467102, "learning_rate": 6.583822161110928e-05, "loss": 1.2653, "step": 4418 }, { "epoch": 0.7960369286196802, "grad_norm": 1.1852091550827026, "learning_rate": 6.582478870972956e-05, "loss": 1.4764, "step": 4419 }, { "epoch": 0.7962170682278766, "grad_norm": 1.2814651727676392, "learning_rate": 6.5811354538893e-05, "loss": 1.571, "step": 4420 }, { "epoch": 0.7963972078360729, "grad_norm": 1.3784024715423584, "learning_rate": 6.579791909967727e-05, "loss": 1.5953, "step": 4421 }, { "epoch": 0.7965773474442693, "grad_norm": 1.428084135055542, "learning_rate": 6.578448239316017e-05, "loss": 1.8014, "step": 4422 }, { "epoch": 0.7967574870524656, "grad_norm": 1.3210147619247437, "learning_rate": 6.577104442041959e-05, "loss": 1.6047, "step": 4423 }, { "epoch": 0.796937626660662, "grad_norm": 1.315963625907898, "learning_rate": 6.575760518253351e-05, "loss": 1.5545, "step": 4424 }, { "epoch": 0.7971177662688583, "grad_norm": 1.185929536819458, "learning_rate": 6.574416468058e-05, "loss": 1.4976, "step": 4425 }, { "epoch": 0.7972979058770547, "grad_norm": 1.3967103958129883, "learning_rate": 6.573072291563729e-05, "loss": 1.6291, "step": 4426 }, { "epoch": 0.797478045485251, "grad_norm": 1.2888225317001343, "learning_rate": 6.571727988878364e-05, "loss": 1.4863, "step": 4427 }, { "epoch": 0.7976581850934474, "grad_norm": 1.2711877822875977, "learning_rate": 6.570383560109745e-05, "loss": 1.5938, "step": 4428 }, { "epoch": 0.7978383247016437, "grad_norm": 1.3336187601089478, "learning_rate": 6.569039005365721e-05, "loss": 1.5705, "step": 4429 }, { "epoch": 0.7980184643098401, "grad_norm": 1.2123491764068604, "learning_rate": 6.567694324754153e-05, "loss": 1.5141, "step": 4430 }, { "epoch": 0.7981986039180364, "grad_norm": 1.2405171394348145, "learning_rate": 6.56634951838291e-05, "loss": 1.4064, "step": 4431 }, { "epoch": 0.7983787435262328, "grad_norm": 1.3566830158233643, "learning_rate": 6.565004586359872e-05, "loss": 1.7046, "step": 4432 }, { "epoch": 0.7985588831344291, "grad_norm": 1.2678134441375732, "learning_rate": 6.563659528792928e-05, "loss": 1.5103, "step": 4433 }, { "epoch": 0.7987390227426255, "grad_norm": 1.3577377796173096, "learning_rate": 6.562314345789977e-05, "loss": 1.6721, "step": 4434 }, { "epoch": 0.7989191623508218, "grad_norm": 1.4468982219696045, "learning_rate": 6.560969037458933e-05, "loss": 1.4349, "step": 4435 }, { "epoch": 0.7990993019590182, "grad_norm": 1.3974382877349854, "learning_rate": 6.559623603907709e-05, "loss": 1.6136, "step": 4436 }, { "epoch": 0.7992794415672146, "grad_norm": 1.2704823017120361, "learning_rate": 6.558278045244243e-05, "loss": 1.4007, "step": 4437 }, { "epoch": 0.7994595811754109, "grad_norm": 1.4970290660858154, "learning_rate": 6.55693236157647e-05, "loss": 1.5104, "step": 4438 }, { "epoch": 0.7996397207836073, "grad_norm": 1.4043169021606445, "learning_rate": 6.555586553012341e-05, "loss": 1.522, "step": 4439 }, { "epoch": 0.7998198603918036, "grad_norm": 1.3219150304794312, "learning_rate": 6.554240619659817e-05, "loss": 1.3923, "step": 4440 }, { "epoch": 0.8, "grad_norm": 1.389901876449585, "learning_rate": 6.552894561626865e-05, "loss": 1.5099, "step": 4441 }, { "epoch": 0.8001801396081963, "grad_norm": 1.392430067062378, "learning_rate": 6.55154837902147e-05, "loss": 1.5431, "step": 4442 }, { "epoch": 0.8003602792163927, "grad_norm": 1.4318674802780151, "learning_rate": 6.550202071951622e-05, "loss": 1.853, "step": 4443 }, { "epoch": 0.800540418824589, "grad_norm": 1.4515800476074219, "learning_rate": 6.548855640525315e-05, "loss": 1.6175, "step": 4444 }, { "epoch": 0.8007205584327854, "grad_norm": 1.4249720573425293, "learning_rate": 6.547509084850564e-05, "loss": 1.7187, "step": 4445 }, { "epoch": 0.8009006980409817, "grad_norm": 1.5754344463348389, "learning_rate": 6.546162405035388e-05, "loss": 1.4609, "step": 4446 }, { "epoch": 0.8010808376491781, "grad_norm": 1.4884302616119385, "learning_rate": 6.544815601187818e-05, "loss": 1.536, "step": 4447 }, { "epoch": 0.8012609772573744, "grad_norm": 1.4362431764602661, "learning_rate": 6.543468673415892e-05, "loss": 1.5394, "step": 4448 }, { "epoch": 0.8014411168655708, "grad_norm": 1.4859565496444702, "learning_rate": 6.542121621827661e-05, "loss": 1.5504, "step": 4449 }, { "epoch": 0.8016212564737671, "grad_norm": 1.3708226680755615, "learning_rate": 6.540774446531183e-05, "loss": 1.3096, "step": 4450 }, { "epoch": 0.8018013960819635, "grad_norm": 1.3131113052368164, "learning_rate": 6.539427147634533e-05, "loss": 2.0142, "step": 4451 }, { "epoch": 0.8019815356901598, "grad_norm": 1.1867454051971436, "learning_rate": 6.538079725245785e-05, "loss": 1.9549, "step": 4452 }, { "epoch": 0.8021616752983562, "grad_norm": 1.275564193725586, "learning_rate": 6.53673217947303e-05, "loss": 2.028, "step": 4453 }, { "epoch": 0.8023418149065525, "grad_norm": 1.316580057144165, "learning_rate": 6.535384510424369e-05, "loss": 2.0151, "step": 4454 }, { "epoch": 0.802521954514749, "grad_norm": 1.3737013339996338, "learning_rate": 6.53403671820791e-05, "loss": 2.1012, "step": 4455 }, { "epoch": 0.8027020941229452, "grad_norm": 1.249172568321228, "learning_rate": 6.532688802931773e-05, "loss": 2.169, "step": 4456 }, { "epoch": 0.8028822337311416, "grad_norm": 1.3895149230957031, "learning_rate": 6.531340764704085e-05, "loss": 1.9085, "step": 4457 }, { "epoch": 0.8030623733393379, "grad_norm": 1.4911143779754639, "learning_rate": 6.529992603632989e-05, "loss": 2.0766, "step": 4458 }, { "epoch": 0.8032425129475343, "grad_norm": 1.6138501167297363, "learning_rate": 6.52864431982663e-05, "loss": 2.2572, "step": 4459 }, { "epoch": 0.8034226525557306, "grad_norm": 1.610435962677002, "learning_rate": 6.527295913393171e-05, "loss": 2.3306, "step": 4460 }, { "epoch": 0.803602792163927, "grad_norm": 1.9049159288406372, "learning_rate": 6.525947384440776e-05, "loss": 2.2983, "step": 4461 }, { "epoch": 0.8037829317721233, "grad_norm": 1.2327874898910522, "learning_rate": 6.524598733077625e-05, "loss": 1.618, "step": 4462 }, { "epoch": 0.8039630713803197, "grad_norm": 1.1832700967788696, "learning_rate": 6.523249959411906e-05, "loss": 1.4639, "step": 4463 }, { "epoch": 0.8041432109885162, "grad_norm": 1.218001365661621, "learning_rate": 6.521901063551817e-05, "loss": 1.4186, "step": 4464 }, { "epoch": 0.8043233505967124, "grad_norm": 1.575579285621643, "learning_rate": 6.520552045605566e-05, "loss": 1.6536, "step": 4465 }, { "epoch": 0.8045034902049089, "grad_norm": 1.3112882375717163, "learning_rate": 6.519202905681371e-05, "loss": 1.4742, "step": 4466 }, { "epoch": 0.8046836298131052, "grad_norm": 1.222264051437378, "learning_rate": 6.51785364388746e-05, "loss": 1.5827, "step": 4467 }, { "epoch": 0.8048637694213016, "grad_norm": 1.1691155433654785, "learning_rate": 6.516504260332067e-05, "loss": 1.5971, "step": 4468 }, { "epoch": 0.8050439090294979, "grad_norm": 1.3767268657684326, "learning_rate": 6.515154755123442e-05, "loss": 1.4404, "step": 4469 }, { "epoch": 0.8052240486376943, "grad_norm": 1.291096568107605, "learning_rate": 6.51380512836984e-05, "loss": 1.3579, "step": 4470 }, { "epoch": 0.8054041882458906, "grad_norm": 1.3260118961334229, "learning_rate": 6.512455380179528e-05, "loss": 1.5205, "step": 4471 }, { "epoch": 0.805584327854087, "grad_norm": 1.1601088047027588, "learning_rate": 6.511105510660782e-05, "loss": 1.4457, "step": 4472 }, { "epoch": 0.8057644674622833, "grad_norm": 1.3309887647628784, "learning_rate": 6.509755519921889e-05, "loss": 1.7781, "step": 4473 }, { "epoch": 0.8059446070704797, "grad_norm": 1.3190155029296875, "learning_rate": 6.508405408071141e-05, "loss": 1.5145, "step": 4474 }, { "epoch": 0.806124746678676, "grad_norm": 1.4214673042297363, "learning_rate": 6.50705517521685e-05, "loss": 1.5623, "step": 4475 }, { "epoch": 0.8063048862868724, "grad_norm": 1.2425867319107056, "learning_rate": 6.505704821467325e-05, "loss": 1.3369, "step": 4476 }, { "epoch": 0.8064850258950687, "grad_norm": 1.3489048480987549, "learning_rate": 6.504354346930892e-05, "loss": 1.6381, "step": 4477 }, { "epoch": 0.8066651655032651, "grad_norm": 1.3433719873428345, "learning_rate": 6.503003751715885e-05, "loss": 1.5532, "step": 4478 }, { "epoch": 0.8068453051114614, "grad_norm": 1.1665107011795044, "learning_rate": 6.501653035930651e-05, "loss": 1.2765, "step": 4479 }, { "epoch": 0.8070254447196578, "grad_norm": 1.2134331464767456, "learning_rate": 6.500302199683541e-05, "loss": 1.3399, "step": 4480 }, { "epoch": 0.807205584327854, "grad_norm": 1.3646905422210693, "learning_rate": 6.49895124308292e-05, "loss": 1.6784, "step": 4481 }, { "epoch": 0.8073857239360505, "grad_norm": 1.249129295349121, "learning_rate": 6.497600166237161e-05, "loss": 1.3701, "step": 4482 }, { "epoch": 0.8075658635442468, "grad_norm": 1.4498471021652222, "learning_rate": 6.496248969254643e-05, "loss": 1.7075, "step": 4483 }, { "epoch": 0.8077460031524432, "grad_norm": 1.4726353883743286, "learning_rate": 6.494897652243765e-05, "loss": 1.8316, "step": 4484 }, { "epoch": 0.8079261427606395, "grad_norm": 1.5187658071517944, "learning_rate": 6.493546215312922e-05, "loss": 1.6956, "step": 4485 }, { "epoch": 0.8081062823688359, "grad_norm": 1.5023746490478516, "learning_rate": 6.492194658570532e-05, "loss": 1.7556, "step": 4486 }, { "epoch": 0.8082864219770322, "grad_norm": 1.2612223625183105, "learning_rate": 6.490842982125013e-05, "loss": 1.3557, "step": 4487 }, { "epoch": 0.8084665615852286, "grad_norm": 1.339063048362732, "learning_rate": 6.489491186084796e-05, "loss": 1.4407, "step": 4488 }, { "epoch": 0.8086467011934249, "grad_norm": 1.4253846406936646, "learning_rate": 6.488139270558321e-05, "loss": 1.5354, "step": 4489 }, { "epoch": 0.8088268408016213, "grad_norm": 1.4491996765136719, "learning_rate": 6.486787235654039e-05, "loss": 1.6168, "step": 4490 }, { "epoch": 0.8090069804098176, "grad_norm": 1.3849133253097534, "learning_rate": 6.485435081480409e-05, "loss": 1.3404, "step": 4491 }, { "epoch": 0.809187120018014, "grad_norm": 1.5375938415527344, "learning_rate": 6.484082808145899e-05, "loss": 1.396, "step": 4492 }, { "epoch": 0.8093672596262104, "grad_norm": 1.4248143434524536, "learning_rate": 6.482730415758991e-05, "loss": 1.7538, "step": 4493 }, { "epoch": 0.8095473992344067, "grad_norm": 1.438437581062317, "learning_rate": 6.481377904428171e-05, "loss": 1.7606, "step": 4494 }, { "epoch": 0.8097275388426031, "grad_norm": 1.3876696825027466, "learning_rate": 6.480025274261936e-05, "loss": 1.5314, "step": 4495 }, { "epoch": 0.8099076784507994, "grad_norm": 1.3699356317520142, "learning_rate": 6.478672525368796e-05, "loss": 1.4884, "step": 4496 }, { "epoch": 0.8100878180589958, "grad_norm": 1.3716552257537842, "learning_rate": 6.477319657857263e-05, "loss": 1.681, "step": 4497 }, { "epoch": 0.8102679576671921, "grad_norm": 1.4728550910949707, "learning_rate": 6.47596667183587e-05, "loss": 1.6941, "step": 4498 }, { "epoch": 0.8104480972753885, "grad_norm": 1.3962680101394653, "learning_rate": 6.474613567413148e-05, "loss": 1.4986, "step": 4499 }, { "epoch": 0.8106282368835848, "grad_norm": 1.5372809171676636, "learning_rate": 6.473260344697645e-05, "loss": 1.7452, "step": 4500 }, { "epoch": 0.8108083764917812, "grad_norm": 1.1778926849365234, "learning_rate": 6.471907003797913e-05, "loss": 1.9501, "step": 4501 }, { "epoch": 0.8109885160999775, "grad_norm": 1.2058022022247314, "learning_rate": 6.470553544822519e-05, "loss": 1.6338, "step": 4502 }, { "epoch": 0.8111686557081739, "grad_norm": 1.358543038368225, "learning_rate": 6.469199967880035e-05, "loss": 2.0036, "step": 4503 }, { "epoch": 0.8113487953163702, "grad_norm": 1.2895220518112183, "learning_rate": 6.467846273079047e-05, "loss": 1.8362, "step": 4504 }, { "epoch": 0.8115289349245666, "grad_norm": 1.280074954032898, "learning_rate": 6.466492460528145e-05, "loss": 1.9504, "step": 4505 }, { "epoch": 0.8117090745327629, "grad_norm": 1.3310102224349976, "learning_rate": 6.465138530335932e-05, "loss": 1.8365, "step": 4506 }, { "epoch": 0.8118892141409593, "grad_norm": 1.3557909727096558, "learning_rate": 6.463784482611019e-05, "loss": 2.0871, "step": 4507 }, { "epoch": 0.8120693537491556, "grad_norm": 1.3947160243988037, "learning_rate": 6.462430317462029e-05, "loss": 1.6909, "step": 4508 }, { "epoch": 0.812249493357352, "grad_norm": 1.5817337036132812, "learning_rate": 6.461076034997591e-05, "loss": 1.828, "step": 4509 }, { "epoch": 0.8124296329655483, "grad_norm": 1.9269813299179077, "learning_rate": 6.459721635326346e-05, "loss": 2.0909, "step": 4510 }, { "epoch": 0.8126097725737447, "grad_norm": 1.4996401071548462, "learning_rate": 6.45836711855694e-05, "loss": 1.8779, "step": 4511 }, { "epoch": 0.812789912181941, "grad_norm": 1.3707562685012817, "learning_rate": 6.457012484798037e-05, "loss": 1.6666, "step": 4512 }, { "epoch": 0.8129700517901374, "grad_norm": 1.1545947790145874, "learning_rate": 6.4556577341583e-05, "loss": 1.401, "step": 4513 }, { "epoch": 0.8131501913983337, "grad_norm": 1.2503443956375122, "learning_rate": 6.454302866746411e-05, "loss": 1.5507, "step": 4514 }, { "epoch": 0.8133303310065301, "grad_norm": 1.2434924840927124, "learning_rate": 6.452947882671052e-05, "loss": 1.4838, "step": 4515 }, { "epoch": 0.8135104706147264, "grad_norm": 1.281829595565796, "learning_rate": 6.451592782040922e-05, "loss": 1.5137, "step": 4516 }, { "epoch": 0.8136906102229228, "grad_norm": 1.3889199495315552, "learning_rate": 6.450237564964727e-05, "loss": 1.4431, "step": 4517 }, { "epoch": 0.8138707498311191, "grad_norm": 1.4919050931930542, "learning_rate": 6.448882231551178e-05, "loss": 1.7569, "step": 4518 }, { "epoch": 0.8140508894393155, "grad_norm": 1.3197606801986694, "learning_rate": 6.447526781909006e-05, "loss": 1.4634, "step": 4519 }, { "epoch": 0.8142310290475118, "grad_norm": 1.3520443439483643, "learning_rate": 6.44617121614694e-05, "loss": 1.626, "step": 4520 }, { "epoch": 0.8144111686557082, "grad_norm": 1.4526621103286743, "learning_rate": 6.444815534373721e-05, "loss": 1.4709, "step": 4521 }, { "epoch": 0.8145913082639046, "grad_norm": 1.4153876304626465, "learning_rate": 6.443459736698105e-05, "loss": 1.4838, "step": 4522 }, { "epoch": 0.8147714478721009, "grad_norm": 1.320319414138794, "learning_rate": 6.442103823228852e-05, "loss": 1.4079, "step": 4523 }, { "epoch": 0.8149515874802973, "grad_norm": 1.3222471475601196, "learning_rate": 6.440747794074733e-05, "loss": 1.6301, "step": 4524 }, { "epoch": 0.8151317270884936, "grad_norm": 1.212831974029541, "learning_rate": 6.439391649344528e-05, "loss": 1.2774, "step": 4525 }, { "epoch": 0.81531186669669, "grad_norm": 1.3025630712509155, "learning_rate": 6.438035389147026e-05, "loss": 1.3928, "step": 4526 }, { "epoch": 0.8154920063048863, "grad_norm": 1.2681788206100464, "learning_rate": 6.436679013591023e-05, "loss": 1.3501, "step": 4527 }, { "epoch": 0.8156721459130827, "grad_norm": 1.411774754524231, "learning_rate": 6.435322522785332e-05, "loss": 1.6519, "step": 4528 }, { "epoch": 0.815852285521279, "grad_norm": 1.3411880731582642, "learning_rate": 6.433965916838766e-05, "loss": 1.6624, "step": 4529 }, { "epoch": 0.8160324251294754, "grad_norm": 1.4375041723251343, "learning_rate": 6.432609195860155e-05, "loss": 1.8248, "step": 4530 }, { "epoch": 0.8162125647376717, "grad_norm": 1.380291223526001, "learning_rate": 6.43125235995833e-05, "loss": 1.6202, "step": 4531 }, { "epoch": 0.8163927043458681, "grad_norm": 1.2709763050079346, "learning_rate": 6.429895409242139e-05, "loss": 1.5164, "step": 4532 }, { "epoch": 0.8165728439540644, "grad_norm": 1.4841139316558838, "learning_rate": 6.428538343820434e-05, "loss": 1.7763, "step": 4533 }, { "epoch": 0.8167529835622608, "grad_norm": 1.257549524307251, "learning_rate": 6.427181163802077e-05, "loss": 1.4864, "step": 4534 }, { "epoch": 0.8169331231704571, "grad_norm": 1.3358641862869263, "learning_rate": 6.425823869295945e-05, "loss": 1.7005, "step": 4535 }, { "epoch": 0.8171132627786535, "grad_norm": 1.4645570516586304, "learning_rate": 6.424466460410916e-05, "loss": 1.7505, "step": 4536 }, { "epoch": 0.8172934023868498, "grad_norm": 1.2917605638504028, "learning_rate": 6.423108937255881e-05, "loss": 1.5402, "step": 4537 }, { "epoch": 0.8174735419950462, "grad_norm": 1.4276437759399414, "learning_rate": 6.42175129993974e-05, "loss": 1.7942, "step": 4538 }, { "epoch": 0.8176536816032425, "grad_norm": 1.325857400894165, "learning_rate": 6.420393548571402e-05, "loss": 1.5461, "step": 4539 }, { "epoch": 0.8178338212114389, "grad_norm": 1.3716787099838257, "learning_rate": 6.419035683259786e-05, "loss": 1.4073, "step": 4540 }, { "epoch": 0.8180139608196352, "grad_norm": 1.3536577224731445, "learning_rate": 6.41767770411382e-05, "loss": 1.564, "step": 4541 }, { "epoch": 0.8181941004278316, "grad_norm": 1.3137818574905396, "learning_rate": 6.416319611242437e-05, "loss": 1.4814, "step": 4542 }, { "epoch": 0.8183742400360279, "grad_norm": 1.3973814249038696, "learning_rate": 6.414961404754584e-05, "loss": 1.6267, "step": 4543 }, { "epoch": 0.8185543796442243, "grad_norm": 1.3066155910491943, "learning_rate": 6.413603084759219e-05, "loss": 1.4406, "step": 4544 }, { "epoch": 0.8187345192524206, "grad_norm": 1.4038729667663574, "learning_rate": 6.412244651365302e-05, "loss": 1.6981, "step": 4545 }, { "epoch": 0.818914658860617, "grad_norm": 1.2938554286956787, "learning_rate": 6.410886104681806e-05, "loss": 1.3584, "step": 4546 }, { "epoch": 0.8190947984688133, "grad_norm": 1.4238848686218262, "learning_rate": 6.409527444817716e-05, "loss": 1.3944, "step": 4547 }, { "epoch": 0.8192749380770097, "grad_norm": 1.4574675559997559, "learning_rate": 6.40816867188202e-05, "loss": 1.6258, "step": 4548 }, { "epoch": 0.819455077685206, "grad_norm": 1.344557523727417, "learning_rate": 6.406809785983717e-05, "loss": 1.3529, "step": 4549 }, { "epoch": 0.8196352172934024, "grad_norm": 1.442880630493164, "learning_rate": 6.40545078723182e-05, "loss": 1.5632, "step": 4550 }, { "epoch": 0.8198153569015988, "grad_norm": 1.537803292274475, "learning_rate": 6.404091675735344e-05, "loss": 2.1995, "step": 4551 }, { "epoch": 0.8199954965097951, "grad_norm": 1.1940757036209106, "learning_rate": 6.402732451603322e-05, "loss": 1.9971, "step": 4552 }, { "epoch": 0.8201756361179915, "grad_norm": 1.6424672603607178, "learning_rate": 6.401373114944781e-05, "loss": 1.6392, "step": 4553 }, { "epoch": 0.8203557757261878, "grad_norm": 1.2726942300796509, "learning_rate": 6.400013665868774e-05, "loss": 1.7334, "step": 4554 }, { "epoch": 0.8205359153343842, "grad_norm": 1.3184664249420166, "learning_rate": 6.398654104484351e-05, "loss": 1.7732, "step": 4555 }, { "epoch": 0.8207160549425805, "grad_norm": 1.3863942623138428, "learning_rate": 6.397294430900578e-05, "loss": 1.7735, "step": 4556 }, { "epoch": 0.8208961945507769, "grad_norm": 1.3770524263381958, "learning_rate": 6.395934645226526e-05, "loss": 1.7526, "step": 4557 }, { "epoch": 0.8210763341589732, "grad_norm": 1.5593171119689941, "learning_rate": 6.394574747571277e-05, "loss": 2.1031, "step": 4558 }, { "epoch": 0.8212564737671696, "grad_norm": 1.5171849727630615, "learning_rate": 6.393214738043922e-05, "loss": 1.875, "step": 4559 }, { "epoch": 0.8214366133753659, "grad_norm": 1.7083256244659424, "learning_rate": 6.391854616753556e-05, "loss": 2.2154, "step": 4560 }, { "epoch": 0.8216167529835623, "grad_norm": 1.2111754417419434, "learning_rate": 6.390494383809293e-05, "loss": 1.3375, "step": 4561 }, { "epoch": 0.8217968925917586, "grad_norm": 1.3022698163986206, "learning_rate": 6.389134039320248e-05, "loss": 1.5758, "step": 4562 }, { "epoch": 0.821977032199955, "grad_norm": 1.5092780590057373, "learning_rate": 6.387773583395548e-05, "loss": 1.7321, "step": 4563 }, { "epoch": 0.8221571718081513, "grad_norm": 1.3205093145370483, "learning_rate": 6.386413016144324e-05, "loss": 1.4703, "step": 4564 }, { "epoch": 0.8223373114163477, "grad_norm": 1.437686562538147, "learning_rate": 6.385052337675725e-05, "loss": 1.6962, "step": 4565 }, { "epoch": 0.822517451024544, "grad_norm": 1.3500137329101562, "learning_rate": 6.383691548098902e-05, "loss": 1.7265, "step": 4566 }, { "epoch": 0.8226975906327404, "grad_norm": 1.4039119482040405, "learning_rate": 6.382330647523017e-05, "loss": 1.6105, "step": 4567 }, { "epoch": 0.8228777302409367, "grad_norm": 1.4360295534133911, "learning_rate": 6.380969636057241e-05, "loss": 1.6602, "step": 4568 }, { "epoch": 0.8230578698491331, "grad_norm": 1.3655681610107422, "learning_rate": 6.379608513810753e-05, "loss": 1.5239, "step": 4569 }, { "epoch": 0.8232380094573294, "grad_norm": 1.4226715564727783, "learning_rate": 6.378247280892742e-05, "loss": 1.6426, "step": 4570 }, { "epoch": 0.8234181490655258, "grad_norm": 1.2794270515441895, "learning_rate": 6.376885937412404e-05, "loss": 1.5989, "step": 4571 }, { "epoch": 0.8235982886737221, "grad_norm": 1.2326371669769287, "learning_rate": 6.37552448347895e-05, "loss": 1.2904, "step": 4572 }, { "epoch": 0.8237784282819185, "grad_norm": 1.397631287574768, "learning_rate": 6.374162919201591e-05, "loss": 1.6154, "step": 4573 }, { "epoch": 0.8239585678901148, "grad_norm": 1.369861364364624, "learning_rate": 6.372801244689551e-05, "loss": 1.6403, "step": 4574 }, { "epoch": 0.8241387074983112, "grad_norm": 1.3602043390274048, "learning_rate": 6.371439460052064e-05, "loss": 1.4651, "step": 4575 }, { "epoch": 0.8243188471065075, "grad_norm": 1.4389811754226685, "learning_rate": 6.370077565398371e-05, "loss": 1.4494, "step": 4576 }, { "epoch": 0.8244989867147039, "grad_norm": 1.3502315282821655, "learning_rate": 6.368715560837723e-05, "loss": 1.5568, "step": 4577 }, { "epoch": 0.8246791263229003, "grad_norm": 1.3633898496627808, "learning_rate": 6.36735344647938e-05, "loss": 1.7038, "step": 4578 }, { "epoch": 0.8248592659310966, "grad_norm": 1.2685598134994507, "learning_rate": 6.36599122243261e-05, "loss": 1.3985, "step": 4579 }, { "epoch": 0.825039405539293, "grad_norm": 1.3337403535842896, "learning_rate": 6.364628888806689e-05, "loss": 1.4886, "step": 4580 }, { "epoch": 0.8252195451474893, "grad_norm": 1.527099609375, "learning_rate": 6.363266445710903e-05, "loss": 1.5541, "step": 4581 }, { "epoch": 0.8253996847556857, "grad_norm": 1.592868685722351, "learning_rate": 6.361903893254548e-05, "loss": 1.3807, "step": 4582 }, { "epoch": 0.825579824363882, "grad_norm": 1.319865345954895, "learning_rate": 6.360541231546924e-05, "loss": 1.4804, "step": 4583 }, { "epoch": 0.8257599639720784, "grad_norm": 1.3298275470733643, "learning_rate": 6.359178460697346e-05, "loss": 1.4019, "step": 4584 }, { "epoch": 0.8259401035802747, "grad_norm": 1.3917150497436523, "learning_rate": 6.357815580815135e-05, "loss": 1.5151, "step": 4585 }, { "epoch": 0.8261202431884711, "grad_norm": 1.2952529191970825, "learning_rate": 6.356452592009619e-05, "loss": 1.5054, "step": 4586 }, { "epoch": 0.8263003827966674, "grad_norm": 1.3699872493743896, "learning_rate": 6.355089494390136e-05, "loss": 1.5985, "step": 4587 }, { "epoch": 0.8264805224048638, "grad_norm": 1.3411661386489868, "learning_rate": 6.353726288066033e-05, "loss": 1.3733, "step": 4588 }, { "epoch": 0.8266606620130601, "grad_norm": 1.4760723114013672, "learning_rate": 6.352362973146669e-05, "loss": 1.6383, "step": 4589 }, { "epoch": 0.8268408016212565, "grad_norm": 1.5501197576522827, "learning_rate": 6.350999549741404e-05, "loss": 1.7465, "step": 4590 }, { "epoch": 0.8270209412294528, "grad_norm": 1.340526819229126, "learning_rate": 6.349636017959615e-05, "loss": 1.5862, "step": 4591 }, { "epoch": 0.8272010808376492, "grad_norm": 1.433256983757019, "learning_rate": 6.34827237791068e-05, "loss": 1.631, "step": 4592 }, { "epoch": 0.8273812204458455, "grad_norm": 1.3091191053390503, "learning_rate": 6.346908629703992e-05, "loss": 1.4199, "step": 4593 }, { "epoch": 0.8275613600540419, "grad_norm": 1.3807443380355835, "learning_rate": 6.34554477344895e-05, "loss": 1.5952, "step": 4594 }, { "epoch": 0.8277414996622382, "grad_norm": 1.2758647203445435, "learning_rate": 6.344180809254963e-05, "loss": 1.373, "step": 4595 }, { "epoch": 0.8279216392704346, "grad_norm": 1.3240890502929688, "learning_rate": 6.342816737231446e-05, "loss": 1.4888, "step": 4596 }, { "epoch": 0.8281017788786309, "grad_norm": 1.3974522352218628, "learning_rate": 6.341452557487821e-05, "loss": 1.3543, "step": 4597 }, { "epoch": 0.8282819184868273, "grad_norm": 1.3768842220306396, "learning_rate": 6.340088270133528e-05, "loss": 1.5455, "step": 4598 }, { "epoch": 0.8284620580950236, "grad_norm": 1.2990660667419434, "learning_rate": 6.338723875278006e-05, "loss": 1.3839, "step": 4599 }, { "epoch": 0.82864219770322, "grad_norm": 1.3482491970062256, "learning_rate": 6.337359373030708e-05, "loss": 1.5414, "step": 4600 }, { "epoch": 0.8288223373114163, "grad_norm": 1.3061233758926392, "learning_rate": 6.335994763501094e-05, "loss": 2.0464, "step": 4601 }, { "epoch": 0.8290024769196127, "grad_norm": 1.2174551486968994, "learning_rate": 6.334630046798627e-05, "loss": 1.835, "step": 4602 }, { "epoch": 0.829182616527809, "grad_norm": 1.326425552368164, "learning_rate": 6.333265223032789e-05, "loss": 1.984, "step": 4603 }, { "epoch": 0.8293627561360054, "grad_norm": 1.3246726989746094, "learning_rate": 6.331900292313063e-05, "loss": 1.9577, "step": 4604 }, { "epoch": 0.8295428957442017, "grad_norm": 1.3895658254623413, "learning_rate": 6.330535254748946e-05, "loss": 2.085, "step": 4605 }, { "epoch": 0.8297230353523981, "grad_norm": 1.2714837789535522, "learning_rate": 6.32917011044994e-05, "loss": 1.5644, "step": 4606 }, { "epoch": 0.8299031749605945, "grad_norm": 1.3417433500289917, "learning_rate": 6.327804859525553e-05, "loss": 2.0656, "step": 4607 }, { "epoch": 0.8300833145687908, "grad_norm": 1.440290927886963, "learning_rate": 6.326439502085309e-05, "loss": 2.0938, "step": 4608 }, { "epoch": 0.8302634541769872, "grad_norm": 1.4658920764923096, "learning_rate": 6.325074038238732e-05, "loss": 2.0801, "step": 4609 }, { "epoch": 0.8304435937851835, "grad_norm": 1.7624857425689697, "learning_rate": 6.323708468095362e-05, "loss": 2.0864, "step": 4610 }, { "epoch": 0.8306237333933799, "grad_norm": 1.8614895343780518, "learning_rate": 6.322342791764744e-05, "loss": 2.0584, "step": 4611 }, { "epoch": 0.8308038730015762, "grad_norm": 1.2200448513031006, "learning_rate": 6.320977009356431e-05, "loss": 1.6163, "step": 4612 }, { "epoch": 0.8309840126097726, "grad_norm": 1.2457724809646606, "learning_rate": 6.319611120979984e-05, "loss": 1.4773, "step": 4613 }, { "epoch": 0.8311641522179689, "grad_norm": 1.3264724016189575, "learning_rate": 6.318245126744978e-05, "loss": 1.5159, "step": 4614 }, { "epoch": 0.8313442918261653, "grad_norm": 1.3391426801681519, "learning_rate": 6.316879026760989e-05, "loss": 1.6065, "step": 4615 }, { "epoch": 0.8315244314343616, "grad_norm": 1.300076961517334, "learning_rate": 6.315512821137606e-05, "loss": 1.4951, "step": 4616 }, { "epoch": 0.831704571042558, "grad_norm": 1.2662957906723022, "learning_rate": 6.314146509984427e-05, "loss": 1.5941, "step": 4617 }, { "epoch": 0.8318847106507543, "grad_norm": 1.310592770576477, "learning_rate": 6.312780093411053e-05, "loss": 1.6058, "step": 4618 }, { "epoch": 0.8320648502589507, "grad_norm": 1.329297423362732, "learning_rate": 6.311413571527099e-05, "loss": 1.414, "step": 4619 }, { "epoch": 0.832244989867147, "grad_norm": 1.2725532054901123, "learning_rate": 6.310046944442187e-05, "loss": 1.5976, "step": 4620 }, { "epoch": 0.8324251294753434, "grad_norm": 1.4283620119094849, "learning_rate": 6.30868021226595e-05, "loss": 1.492, "step": 4621 }, { "epoch": 0.8326052690835397, "grad_norm": 1.4487082958221436, "learning_rate": 6.307313375108021e-05, "loss": 1.8385, "step": 4622 }, { "epoch": 0.8327854086917361, "grad_norm": 1.2685261964797974, "learning_rate": 6.305946433078051e-05, "loss": 1.3864, "step": 4623 }, { "epoch": 0.8329655482999324, "grad_norm": 1.3122773170471191, "learning_rate": 6.304579386285694e-05, "loss": 1.4753, "step": 4624 }, { "epoch": 0.8331456879081288, "grad_norm": 1.407189965248108, "learning_rate": 6.303212234840615e-05, "loss": 1.7095, "step": 4625 }, { "epoch": 0.8333258275163251, "grad_norm": 1.3137348890304565, "learning_rate": 6.301844978852484e-05, "loss": 1.3032, "step": 4626 }, { "epoch": 0.8335059671245215, "grad_norm": 1.4981633424758911, "learning_rate": 6.300477618430983e-05, "loss": 1.9303, "step": 4627 }, { "epoch": 0.8336861067327178, "grad_norm": 1.3229765892028809, "learning_rate": 6.299110153685802e-05, "loss": 1.6261, "step": 4628 }, { "epoch": 0.8338662463409142, "grad_norm": 1.231129765510559, "learning_rate": 6.297742584726635e-05, "loss": 1.4318, "step": 4629 }, { "epoch": 0.8340463859491105, "grad_norm": 1.4217721223831177, "learning_rate": 6.296374911663191e-05, "loss": 1.5453, "step": 4630 }, { "epoch": 0.8342265255573069, "grad_norm": 1.2488220930099487, "learning_rate": 6.295007134605184e-05, "loss": 1.5537, "step": 4631 }, { "epoch": 0.8344066651655032, "grad_norm": 1.3453364372253418, "learning_rate": 6.293639253662333e-05, "loss": 1.5872, "step": 4632 }, { "epoch": 0.8345868047736996, "grad_norm": 1.4354774951934814, "learning_rate": 6.292271268944375e-05, "loss": 1.3761, "step": 4633 }, { "epoch": 0.8347669443818959, "grad_norm": 1.30548894405365, "learning_rate": 6.290903180561044e-05, "loss": 1.3112, "step": 4634 }, { "epoch": 0.8349470839900923, "grad_norm": 1.2789276838302612, "learning_rate": 6.289534988622087e-05, "loss": 1.3596, "step": 4635 }, { "epoch": 0.8351272235982887, "grad_norm": 1.4940729141235352, "learning_rate": 6.28816669323726e-05, "loss": 1.89, "step": 4636 }, { "epoch": 0.835307363206485, "grad_norm": 1.353440284729004, "learning_rate": 6.286798294516331e-05, "loss": 1.2853, "step": 4637 }, { "epoch": 0.8354875028146814, "grad_norm": 1.6403306722640991, "learning_rate": 6.28542979256907e-05, "loss": 1.7644, "step": 4638 }, { "epoch": 0.8356676424228777, "grad_norm": 1.4719386100769043, "learning_rate": 6.284061187505257e-05, "loss": 1.5504, "step": 4639 }, { "epoch": 0.8358477820310741, "grad_norm": 1.3217520713806152, "learning_rate": 6.28269247943468e-05, "loss": 1.4542, "step": 4640 }, { "epoch": 0.8360279216392704, "grad_norm": 1.378509521484375, "learning_rate": 6.281323668467136e-05, "loss": 1.5927, "step": 4641 }, { "epoch": 0.8362080612474668, "grad_norm": 1.2521916627883911, "learning_rate": 6.279954754712433e-05, "loss": 1.384, "step": 4642 }, { "epoch": 0.8363882008556631, "grad_norm": 1.5763472318649292, "learning_rate": 6.278585738280384e-05, "loss": 1.5358, "step": 4643 }, { "epoch": 0.8365683404638595, "grad_norm": 1.3223928213119507, "learning_rate": 6.277216619280808e-05, "loss": 1.4529, "step": 4644 }, { "epoch": 0.8367484800720558, "grad_norm": 1.3129265308380127, "learning_rate": 6.275847397823537e-05, "loss": 1.3774, "step": 4645 }, { "epoch": 0.8369286196802522, "grad_norm": 1.4142290353775024, "learning_rate": 6.27447807401841e-05, "loss": 1.5104, "step": 4646 }, { "epoch": 0.8371087592884485, "grad_norm": 1.4789080619812012, "learning_rate": 6.273108647975273e-05, "loss": 1.3418, "step": 4647 }, { "epoch": 0.8372888988966449, "grad_norm": 1.406337857246399, "learning_rate": 6.271739119803979e-05, "loss": 1.3727, "step": 4648 }, { "epoch": 0.8374690385048412, "grad_norm": 1.3417208194732666, "learning_rate": 6.270369489614394e-05, "loss": 1.3004, "step": 4649 }, { "epoch": 0.8376491781130376, "grad_norm": 1.2350142002105713, "learning_rate": 6.268999757516385e-05, "loss": 1.1123, "step": 4650 }, { "epoch": 0.8378293177212339, "grad_norm": 1.3327823877334595, "learning_rate": 6.267629923619835e-05, "loss": 2.035, "step": 4651 }, { "epoch": 0.8380094573294303, "grad_norm": 1.2755335569381714, "learning_rate": 6.26625998803463e-05, "loss": 1.9764, "step": 4652 }, { "epoch": 0.8381895969376266, "grad_norm": 1.1044981479644775, "learning_rate": 6.264889950870664e-05, "loss": 1.6571, "step": 4653 }, { "epoch": 0.838369736545823, "grad_norm": 1.3979949951171875, "learning_rate": 6.263519812237845e-05, "loss": 2.1235, "step": 4654 }, { "epoch": 0.8385498761540193, "grad_norm": 1.342162847518921, "learning_rate": 6.262149572246081e-05, "loss": 1.9552, "step": 4655 }, { "epoch": 0.8387300157622157, "grad_norm": 1.4222033023834229, "learning_rate": 6.260779231005293e-05, "loss": 2.1543, "step": 4656 }, { "epoch": 0.838910155370412, "grad_norm": 1.440486192703247, "learning_rate": 6.259408788625408e-05, "loss": 1.9623, "step": 4657 }, { "epoch": 0.8390902949786084, "grad_norm": 1.5896884202957153, "learning_rate": 6.258038245216365e-05, "loss": 2.1164, "step": 4658 }, { "epoch": 0.8392704345868047, "grad_norm": 1.6232459545135498, "learning_rate": 6.256667600888106e-05, "loss": 2.089, "step": 4659 }, { "epoch": 0.8394505741950011, "grad_norm": 1.4081693887710571, "learning_rate": 6.255296855750584e-05, "loss": 1.8755, "step": 4660 }, { "epoch": 0.8396307138031974, "grad_norm": 1.3497090339660645, "learning_rate": 6.253926009913761e-05, "loss": 1.7496, "step": 4661 }, { "epoch": 0.8398108534113938, "grad_norm": 1.423877239227295, "learning_rate": 6.252555063487602e-05, "loss": 1.8525, "step": 4662 }, { "epoch": 0.8399909930195902, "grad_norm": 1.276597261428833, "learning_rate": 6.251184016582088e-05, "loss": 1.819, "step": 4663 }, { "epoch": 0.8401711326277865, "grad_norm": 1.2613213062286377, "learning_rate": 6.249812869307198e-05, "loss": 1.569, "step": 4664 }, { "epoch": 0.8403512722359829, "grad_norm": 1.2881382703781128, "learning_rate": 6.248441621772933e-05, "loss": 1.491, "step": 4665 }, { "epoch": 0.8405314118441792, "grad_norm": 1.4041613340377808, "learning_rate": 6.247070274089288e-05, "loss": 1.5604, "step": 4666 }, { "epoch": 0.8407115514523756, "grad_norm": 1.2167034149169922, "learning_rate": 6.245698826366272e-05, "loss": 1.4768, "step": 4667 }, { "epoch": 0.8408916910605719, "grad_norm": 1.2529959678649902, "learning_rate": 6.244327278713904e-05, "loss": 1.2245, "step": 4668 }, { "epoch": 0.8410718306687683, "grad_norm": 1.3273762464523315, "learning_rate": 6.242955631242205e-05, "loss": 1.6981, "step": 4669 }, { "epoch": 0.8412519702769646, "grad_norm": 1.301316738128662, "learning_rate": 6.241583884061214e-05, "loss": 1.445, "step": 4670 }, { "epoch": 0.841432109885161, "grad_norm": 1.2993309497833252, "learning_rate": 6.240212037280966e-05, "loss": 1.5646, "step": 4671 }, { "epoch": 0.8416122494933573, "grad_norm": 1.2444121837615967, "learning_rate": 6.238840091011515e-05, "loss": 1.5712, "step": 4672 }, { "epoch": 0.8417923891015537, "grad_norm": 1.3461843729019165, "learning_rate": 6.237468045362915e-05, "loss": 1.5655, "step": 4673 }, { "epoch": 0.84197252870975, "grad_norm": 1.4032658338546753, "learning_rate": 6.236095900445229e-05, "loss": 1.5982, "step": 4674 }, { "epoch": 0.8421526683179464, "grad_norm": 1.3040753602981567, "learning_rate": 6.234723656368533e-05, "loss": 1.5227, "step": 4675 }, { "epoch": 0.8423328079261427, "grad_norm": 1.2950160503387451, "learning_rate": 6.233351313242907e-05, "loss": 1.532, "step": 4676 }, { "epoch": 0.8425129475343391, "grad_norm": 1.5120325088500977, "learning_rate": 6.23197887117844e-05, "loss": 1.7143, "step": 4677 }, { "epoch": 0.8426930871425354, "grad_norm": 1.3828502893447876, "learning_rate": 6.230606330285227e-05, "loss": 1.4519, "step": 4678 }, { "epoch": 0.8428732267507318, "grad_norm": 1.1508655548095703, "learning_rate": 6.229233690673375e-05, "loss": 1.369, "step": 4679 }, { "epoch": 0.8430533663589281, "grad_norm": 1.3323533535003662, "learning_rate": 6.227860952452993e-05, "loss": 1.5739, "step": 4680 }, { "epoch": 0.8432335059671245, "grad_norm": 1.2645061016082764, "learning_rate": 6.226488115734204e-05, "loss": 1.2532, "step": 4681 }, { "epoch": 0.8434136455753208, "grad_norm": 1.4220880270004272, "learning_rate": 6.225115180627137e-05, "loss": 1.5388, "step": 4682 }, { "epoch": 0.8435937851835172, "grad_norm": 1.3079980611801147, "learning_rate": 6.223742147241926e-05, "loss": 1.5281, "step": 4683 }, { "epoch": 0.8437739247917135, "grad_norm": 1.3803733587265015, "learning_rate": 6.222369015688716e-05, "loss": 1.5258, "step": 4684 }, { "epoch": 0.8439540643999099, "grad_norm": 1.4696791172027588, "learning_rate": 6.220995786077658e-05, "loss": 1.6131, "step": 4685 }, { "epoch": 0.8441342040081062, "grad_norm": 1.318091630935669, "learning_rate": 6.219622458518915e-05, "loss": 1.3615, "step": 4686 }, { "epoch": 0.8443143436163026, "grad_norm": 1.4679065942764282, "learning_rate": 6.21824903312265e-05, "loss": 1.6684, "step": 4687 }, { "epoch": 0.8444944832244989, "grad_norm": 1.5002509355545044, "learning_rate": 6.216875509999043e-05, "loss": 1.6468, "step": 4688 }, { "epoch": 0.8446746228326953, "grad_norm": 1.4470481872558594, "learning_rate": 6.215501889258275e-05, "loss": 1.5381, "step": 4689 }, { "epoch": 0.8448547624408916, "grad_norm": 1.3229215145111084, "learning_rate": 6.214128171010536e-05, "loss": 1.4441, "step": 4690 }, { "epoch": 0.845034902049088, "grad_norm": 1.3469367027282715, "learning_rate": 6.212754355366029e-05, "loss": 1.4857, "step": 4691 }, { "epoch": 0.8452150416572844, "grad_norm": 1.3363783359527588, "learning_rate": 6.211380442434957e-05, "loss": 1.4232, "step": 4692 }, { "epoch": 0.8453951812654807, "grad_norm": 1.422350287437439, "learning_rate": 6.210006432327535e-05, "loss": 1.501, "step": 4693 }, { "epoch": 0.8455753208736771, "grad_norm": 1.4966259002685547, "learning_rate": 6.208632325153988e-05, "loss": 1.5585, "step": 4694 }, { "epoch": 0.8457554604818734, "grad_norm": 1.3642261028289795, "learning_rate": 6.207258121024543e-05, "loss": 1.4925, "step": 4695 }, { "epoch": 0.8459356000900699, "grad_norm": 1.379480242729187, "learning_rate": 6.205883820049441e-05, "loss": 1.413, "step": 4696 }, { "epoch": 0.8461157396982661, "grad_norm": 1.2150439023971558, "learning_rate": 6.204509422338924e-05, "loss": 1.3691, "step": 4697 }, { "epoch": 0.8462958793064626, "grad_norm": 1.382617712020874, "learning_rate": 6.203134928003251e-05, "loss": 1.3898, "step": 4698 }, { "epoch": 0.8464760189146588, "grad_norm": 1.2476613521575928, "learning_rate": 6.201760337152677e-05, "loss": 1.4084, "step": 4699 }, { "epoch": 0.8466561585228553, "grad_norm": 1.4892241954803467, "learning_rate": 6.200385649897477e-05, "loss": 1.8027, "step": 4700 }, { "epoch": 0.8468362981310515, "grad_norm": 1.2168999910354614, "learning_rate": 6.199010866347922e-05, "loss": 1.9943, "step": 4701 }, { "epoch": 0.847016437739248, "grad_norm": 1.3281121253967285, "learning_rate": 6.197635986614302e-05, "loss": 1.9106, "step": 4702 }, { "epoch": 0.8471965773474442, "grad_norm": 1.2331305742263794, "learning_rate": 6.196261010806905e-05, "loss": 1.8294, "step": 4703 }, { "epoch": 0.8473767169556407, "grad_norm": 1.5338815450668335, "learning_rate": 6.194885939036033e-05, "loss": 1.6992, "step": 4704 }, { "epoch": 0.847556856563837, "grad_norm": 1.3370227813720703, "learning_rate": 6.193510771411993e-05, "loss": 1.9032, "step": 4705 }, { "epoch": 0.8477369961720334, "grad_norm": 1.3217793703079224, "learning_rate": 6.1921355080451e-05, "loss": 2.1385, "step": 4706 }, { "epoch": 0.8479171357802296, "grad_norm": 1.5505423545837402, "learning_rate": 6.190760149045677e-05, "loss": 2.1535, "step": 4707 }, { "epoch": 0.848097275388426, "grad_norm": 1.4694255590438843, "learning_rate": 6.189384694524056e-05, "loss": 1.8964, "step": 4708 }, { "epoch": 0.8482774149966223, "grad_norm": 1.6660085916519165, "learning_rate": 6.188009144590574e-05, "loss": 2.0454, "step": 4709 }, { "epoch": 0.8484575546048188, "grad_norm": 1.3536443710327148, "learning_rate": 6.186633499355576e-05, "loss": 1.5905, "step": 4710 }, { "epoch": 0.848637694213015, "grad_norm": 1.2272223234176636, "learning_rate": 6.185257758929415e-05, "loss": 1.5948, "step": 4711 }, { "epoch": 0.8488178338212115, "grad_norm": 1.3076916933059692, "learning_rate": 6.183881923422457e-05, "loss": 1.675, "step": 4712 }, { "epoch": 0.8489979734294077, "grad_norm": 1.2455263137817383, "learning_rate": 6.182505992945064e-05, "loss": 1.564, "step": 4713 }, { "epoch": 0.8491781130376042, "grad_norm": 1.254154086112976, "learning_rate": 6.181129967607621e-05, "loss": 1.3722, "step": 4714 }, { "epoch": 0.8493582526458004, "grad_norm": 1.2478543519973755, "learning_rate": 6.179753847520506e-05, "loss": 1.5416, "step": 4715 }, { "epoch": 0.8495383922539969, "grad_norm": 1.4882732629776, "learning_rate": 6.17837763279411e-05, "loss": 1.6731, "step": 4716 }, { "epoch": 0.8497185318621931, "grad_norm": 1.309693694114685, "learning_rate": 6.177001323538836e-05, "loss": 1.4801, "step": 4717 }, { "epoch": 0.8498986714703896, "grad_norm": 1.261641263961792, "learning_rate": 6.175624919865087e-05, "loss": 1.6143, "step": 4718 }, { "epoch": 0.8500788110785858, "grad_norm": 1.4516394138336182, "learning_rate": 6.174248421883282e-05, "loss": 1.5207, "step": 4719 }, { "epoch": 0.8502589506867823, "grad_norm": 1.4540899991989136, "learning_rate": 6.172871829703839e-05, "loss": 1.6793, "step": 4720 }, { "epoch": 0.8504390902949787, "grad_norm": 1.2702233791351318, "learning_rate": 6.17149514343719e-05, "loss": 1.3969, "step": 4721 }, { "epoch": 0.850619229903175, "grad_norm": 1.4019607305526733, "learning_rate": 6.170118363193772e-05, "loss": 1.8609, "step": 4722 }, { "epoch": 0.8507993695113714, "grad_norm": 1.2587826251983643, "learning_rate": 6.168741489084026e-05, "loss": 1.3129, "step": 4723 }, { "epoch": 0.8509795091195677, "grad_norm": 1.452796459197998, "learning_rate": 6.16736452121841e-05, "loss": 1.5949, "step": 4724 }, { "epoch": 0.8511596487277641, "grad_norm": 1.39859139919281, "learning_rate": 6.16598745970738e-05, "loss": 1.6331, "step": 4725 }, { "epoch": 0.8513397883359604, "grad_norm": 1.2477107048034668, "learning_rate": 6.164610304661403e-05, "loss": 1.4915, "step": 4726 }, { "epoch": 0.8515199279441568, "grad_norm": 1.4244507551193237, "learning_rate": 6.163233056190955e-05, "loss": 1.7, "step": 4727 }, { "epoch": 0.8517000675523531, "grad_norm": 1.3881311416625977, "learning_rate": 6.161855714406517e-05, "loss": 1.4208, "step": 4728 }, { "epoch": 0.8518802071605495, "grad_norm": 1.4379562139511108, "learning_rate": 6.16047827941858e-05, "loss": 1.6672, "step": 4729 }, { "epoch": 0.8520603467687458, "grad_norm": 1.3581207990646362, "learning_rate": 6.159100751337642e-05, "loss": 1.8151, "step": 4730 }, { "epoch": 0.8522404863769422, "grad_norm": 1.5168596506118774, "learning_rate": 6.157723130274205e-05, "loss": 1.8394, "step": 4731 }, { "epoch": 0.8524206259851385, "grad_norm": 1.4738023281097412, "learning_rate": 6.156345416338783e-05, "loss": 1.7189, "step": 4732 }, { "epoch": 0.8526007655933349, "grad_norm": 1.3453974723815918, "learning_rate": 6.154967609641895e-05, "loss": 1.3966, "step": 4733 }, { "epoch": 0.8527809052015312, "grad_norm": 1.2744262218475342, "learning_rate": 6.153589710294067e-05, "loss": 1.3853, "step": 4734 }, { "epoch": 0.8529610448097276, "grad_norm": 1.3842501640319824, "learning_rate": 6.152211718405835e-05, "loss": 1.769, "step": 4735 }, { "epoch": 0.8531411844179239, "grad_norm": 1.38172447681427, "learning_rate": 6.15083363408774e-05, "loss": 1.3845, "step": 4736 }, { "epoch": 0.8533213240261203, "grad_norm": 1.382222056388855, "learning_rate": 6.149455457450332e-05, "loss": 1.7328, "step": 4737 }, { "epoch": 0.8535014636343166, "grad_norm": 1.5157599449157715, "learning_rate": 6.148077188604167e-05, "loss": 1.6104, "step": 4738 }, { "epoch": 0.853681603242513, "grad_norm": 1.3676745891571045, "learning_rate": 6.146698827659809e-05, "loss": 1.4409, "step": 4739 }, { "epoch": 0.8538617428507093, "grad_norm": 1.319949984550476, "learning_rate": 6.145320374727828e-05, "loss": 1.4274, "step": 4740 }, { "epoch": 0.8540418824589057, "grad_norm": 1.4198298454284668, "learning_rate": 6.143941829918807e-05, "loss": 1.444, "step": 4741 }, { "epoch": 0.854222022067102, "grad_norm": 1.4113118648529053, "learning_rate": 6.142563193343328e-05, "loss": 1.5287, "step": 4742 }, { "epoch": 0.8544021616752984, "grad_norm": 1.3260498046875, "learning_rate": 6.141184465111985e-05, "loss": 1.5862, "step": 4743 }, { "epoch": 0.8545823012834947, "grad_norm": 1.374909520149231, "learning_rate": 6.139805645335383e-05, "loss": 1.4731, "step": 4744 }, { "epoch": 0.8547624408916911, "grad_norm": 1.4753024578094482, "learning_rate": 6.138426734124125e-05, "loss": 1.7286, "step": 4745 }, { "epoch": 0.8549425804998874, "grad_norm": 1.388883352279663, "learning_rate": 6.13704773158883e-05, "loss": 1.401, "step": 4746 }, { "epoch": 0.8551227201080838, "grad_norm": 1.352304458618164, "learning_rate": 6.13566863784012e-05, "loss": 1.131, "step": 4747 }, { "epoch": 0.8553028597162801, "grad_norm": 1.4158308506011963, "learning_rate": 6.134289452988624e-05, "loss": 1.4943, "step": 4748 }, { "epoch": 0.8554829993244765, "grad_norm": 1.1839171648025513, "learning_rate": 6.132910177144981e-05, "loss": 1.0694, "step": 4749 }, { "epoch": 0.8556631389326729, "grad_norm": 1.160314679145813, "learning_rate": 6.131530810419836e-05, "loss": 1.1007, "step": 4750 }, { "epoch": 0.8558432785408692, "grad_norm": 1.2519688606262207, "learning_rate": 6.13015135292384e-05, "loss": 2.079, "step": 4751 }, { "epoch": 0.8560234181490656, "grad_norm": 1.377999186515808, "learning_rate": 6.128771804767653e-05, "loss": 2.2511, "step": 4752 }, { "epoch": 0.8562035577572619, "grad_norm": 1.21393883228302, "learning_rate": 6.127392166061941e-05, "loss": 1.8693, "step": 4753 }, { "epoch": 0.8563836973654583, "grad_norm": 1.2158241271972656, "learning_rate": 6.12601243691738e-05, "loss": 1.9023, "step": 4754 }, { "epoch": 0.8565638369736546, "grad_norm": 1.2721275091171265, "learning_rate": 6.124632617444649e-05, "loss": 1.6744, "step": 4755 }, { "epoch": 0.856743976581851, "grad_norm": 1.254005789756775, "learning_rate": 6.123252707754438e-05, "loss": 1.9115, "step": 4756 }, { "epoch": 0.8569241161900473, "grad_norm": 1.3052823543548584, "learning_rate": 6.121872707957441e-05, "loss": 1.626, "step": 4757 }, { "epoch": 0.8571042557982437, "grad_norm": 1.4470106363296509, "learning_rate": 6.120492618164362e-05, "loss": 1.7882, "step": 4758 }, { "epoch": 0.85728439540644, "grad_norm": 1.6067529916763306, "learning_rate": 6.11911243848591e-05, "loss": 1.9188, "step": 4759 }, { "epoch": 0.8574645350146364, "grad_norm": 1.6848806142807007, "learning_rate": 6.117732169032805e-05, "loss": 2.2247, "step": 4760 }, { "epoch": 0.8576446746228327, "grad_norm": 1.375502109527588, "learning_rate": 6.116351809915769e-05, "loss": 1.6211, "step": 4761 }, { "epoch": 0.8578248142310291, "grad_norm": 1.215942144393921, "learning_rate": 6.114971361245534e-05, "loss": 1.428, "step": 4762 }, { "epoch": 0.8580049538392254, "grad_norm": 1.2597742080688477, "learning_rate": 6.11359082313284e-05, "loss": 1.5426, "step": 4763 }, { "epoch": 0.8581850934474218, "grad_norm": 1.2725377082824707, "learning_rate": 6.112210195688433e-05, "loss": 1.585, "step": 4764 }, { "epoch": 0.8583652330556181, "grad_norm": 1.2748991250991821, "learning_rate": 6.110829479023066e-05, "loss": 1.6192, "step": 4765 }, { "epoch": 0.8585453726638145, "grad_norm": 1.4489142894744873, "learning_rate": 6.109448673247498e-05, "loss": 1.6671, "step": 4766 }, { "epoch": 0.8587255122720108, "grad_norm": 1.25693941116333, "learning_rate": 6.108067778472498e-05, "loss": 1.6059, "step": 4767 }, { "epoch": 0.8589056518802072, "grad_norm": 1.4101102352142334, "learning_rate": 6.106686794808842e-05, "loss": 1.6158, "step": 4768 }, { "epoch": 0.8590857914884035, "grad_norm": 1.3865492343902588, "learning_rate": 6.105305722367309e-05, "loss": 1.5778, "step": 4769 }, { "epoch": 0.8592659310965999, "grad_norm": 1.3160815238952637, "learning_rate": 6.10392456125869e-05, "loss": 1.5773, "step": 4770 }, { "epoch": 0.8594460707047962, "grad_norm": 1.3327451944351196, "learning_rate": 6.10254331159378e-05, "loss": 1.4592, "step": 4771 }, { "epoch": 0.8596262103129926, "grad_norm": 1.2754762172698975, "learning_rate": 6.101161973483383e-05, "loss": 1.4346, "step": 4772 }, { "epoch": 0.8598063499211889, "grad_norm": 1.375786304473877, "learning_rate": 6.0997805470383085e-05, "loss": 1.5247, "step": 4773 }, { "epoch": 0.8599864895293853, "grad_norm": 1.1936523914337158, "learning_rate": 6.098399032369375e-05, "loss": 1.3325, "step": 4774 }, { "epoch": 0.8601666291375816, "grad_norm": 1.3287545442581177, "learning_rate": 6.097017429587406e-05, "loss": 1.5158, "step": 4775 }, { "epoch": 0.860346768745778, "grad_norm": 1.32796311378479, "learning_rate": 6.095635738803231e-05, "loss": 1.4306, "step": 4776 }, { "epoch": 0.8605269083539744, "grad_norm": 1.3871411085128784, "learning_rate": 6.0942539601276924e-05, "loss": 1.5126, "step": 4777 }, { "epoch": 0.8607070479621707, "grad_norm": 1.3941295146942139, "learning_rate": 6.0928720936716335e-05, "loss": 1.5253, "step": 4778 }, { "epoch": 0.8608871875703671, "grad_norm": 1.3492759466171265, "learning_rate": 6.09149013954591e-05, "loss": 1.3709, "step": 4779 }, { "epoch": 0.8610673271785634, "grad_norm": 1.380707025527954, "learning_rate": 6.090108097861377e-05, "loss": 1.6046, "step": 4780 }, { "epoch": 0.8612474667867598, "grad_norm": 1.2643134593963623, "learning_rate": 6.0887259687289035e-05, "loss": 1.3613, "step": 4781 }, { "epoch": 0.8614276063949561, "grad_norm": 1.3875644207000732, "learning_rate": 6.0873437522593626e-05, "loss": 1.6409, "step": 4782 }, { "epoch": 0.8616077460031525, "grad_norm": 1.3749431371688843, "learning_rate": 6.085961448563635e-05, "loss": 1.5268, "step": 4783 }, { "epoch": 0.8617878856113488, "grad_norm": 1.2980433702468872, "learning_rate": 6.084579057752611e-05, "loss": 1.4625, "step": 4784 }, { "epoch": 0.8619680252195452, "grad_norm": 1.4089449644088745, "learning_rate": 6.083196579937183e-05, "loss": 1.559, "step": 4785 }, { "epoch": 0.8621481648277415, "grad_norm": 1.3613237142562866, "learning_rate": 6.081814015228252e-05, "loss": 1.5107, "step": 4786 }, { "epoch": 0.8623283044359379, "grad_norm": 1.4567334651947021, "learning_rate": 6.080431363736728e-05, "loss": 1.5551, "step": 4787 }, { "epoch": 0.8625084440441342, "grad_norm": 1.2888206243515015, "learning_rate": 6.079048625573525e-05, "loss": 1.4785, "step": 4788 }, { "epoch": 0.8626885836523306, "grad_norm": 1.4184306859970093, "learning_rate": 6.077665800849568e-05, "loss": 1.6856, "step": 4789 }, { "epoch": 0.8628687232605269, "grad_norm": 1.4704737663269043, "learning_rate": 6.0762828896757864e-05, "loss": 1.6755, "step": 4790 }, { "epoch": 0.8630488628687233, "grad_norm": 1.4484741687774658, "learning_rate": 6.074899892163115e-05, "loss": 1.6205, "step": 4791 }, { "epoch": 0.8632290024769196, "grad_norm": 1.4339154958724976, "learning_rate": 6.073516808422496e-05, "loss": 1.5008, "step": 4792 }, { "epoch": 0.863409142085116, "grad_norm": 1.2951631546020508, "learning_rate": 6.072133638564883e-05, "loss": 1.5817, "step": 4793 }, { "epoch": 0.8635892816933123, "grad_norm": 1.449933409690857, "learning_rate": 6.070750382701232e-05, "loss": 1.5311, "step": 4794 }, { "epoch": 0.8637694213015087, "grad_norm": 1.4684382677078247, "learning_rate": 6.0693670409425074e-05, "loss": 1.4948, "step": 4795 }, { "epoch": 0.863949560909705, "grad_norm": 1.30257248878479, "learning_rate": 6.0679836133996783e-05, "loss": 1.3349, "step": 4796 }, { "epoch": 0.8641297005179014, "grad_norm": 1.504602313041687, "learning_rate": 6.0666001001837236e-05, "loss": 1.4596, "step": 4797 }, { "epoch": 0.8643098401260977, "grad_norm": 1.3995978832244873, "learning_rate": 6.0652165014056294e-05, "loss": 1.3697, "step": 4798 }, { "epoch": 0.8644899797342941, "grad_norm": 1.389730453491211, "learning_rate": 6.063832817176386e-05, "loss": 1.2978, "step": 4799 }, { "epoch": 0.8646701193424904, "grad_norm": 1.2073769569396973, "learning_rate": 6.0624490476069916e-05, "loss": 1.2034, "step": 4800 }, { "epoch": 0.8648502589506868, "grad_norm": 1.3232203722000122, "learning_rate": 6.061065192808455e-05, "loss": 2.1753, "step": 4801 }, { "epoch": 0.8650303985588831, "grad_norm": 1.2441351413726807, "learning_rate": 6.0596812528917827e-05, "loss": 1.921, "step": 4802 }, { "epoch": 0.8652105381670795, "grad_norm": 1.377205729484558, "learning_rate": 6.058297227967996e-05, "loss": 1.8491, "step": 4803 }, { "epoch": 0.8653906777752758, "grad_norm": 1.2158855199813843, "learning_rate": 6.056913118148122e-05, "loss": 1.6871, "step": 4804 }, { "epoch": 0.8655708173834722, "grad_norm": 1.1970714330673218, "learning_rate": 6.055528923543192e-05, "loss": 1.8951, "step": 4805 }, { "epoch": 0.8657509569916686, "grad_norm": 1.2609047889709473, "learning_rate": 6.0541446442642466e-05, "loss": 1.8977, "step": 4806 }, { "epoch": 0.8659310965998649, "grad_norm": 1.351499080657959, "learning_rate": 6.052760280422332e-05, "loss": 1.9532, "step": 4807 }, { "epoch": 0.8661112362080613, "grad_norm": 1.4403001070022583, "learning_rate": 6.0513758321285e-05, "loss": 2.0017, "step": 4808 }, { "epoch": 0.8662913758162576, "grad_norm": 1.7267508506774902, "learning_rate": 6.049991299493809e-05, "loss": 2.5492, "step": 4809 }, { "epoch": 0.866471515424454, "grad_norm": 1.7751617431640625, "learning_rate": 6.0486066826293295e-05, "loss": 2.268, "step": 4810 }, { "epoch": 0.8666516550326503, "grad_norm": 1.392783522605896, "learning_rate": 6.047221981646132e-05, "loss": 1.8072, "step": 4811 }, { "epoch": 0.8668317946408467, "grad_norm": 1.4296787977218628, "learning_rate": 6.045837196655299e-05, "loss": 1.6098, "step": 4812 }, { "epoch": 0.867011934249043, "grad_norm": 1.2642333507537842, "learning_rate": 6.044452327767912e-05, "loss": 1.3019, "step": 4813 }, { "epoch": 0.8671920738572394, "grad_norm": 1.1790757179260254, "learning_rate": 6.0430673750950703e-05, "loss": 1.4551, "step": 4814 }, { "epoch": 0.8673722134654357, "grad_norm": 1.4662318229675293, "learning_rate": 6.0416823387478715e-05, "loss": 1.6536, "step": 4815 }, { "epoch": 0.8675523530736321, "grad_norm": 1.3558217287063599, "learning_rate": 6.040297218837422e-05, "loss": 1.6962, "step": 4816 }, { "epoch": 0.8677324926818284, "grad_norm": 1.2178094387054443, "learning_rate": 6.038912015474837e-05, "loss": 1.3872, "step": 4817 }, { "epoch": 0.8679126322900248, "grad_norm": 1.3234713077545166, "learning_rate": 6.0375267287712366e-05, "loss": 1.7961, "step": 4818 }, { "epoch": 0.8680927718982211, "grad_norm": 1.1687045097351074, "learning_rate": 6.036141358837747e-05, "loss": 1.4426, "step": 4819 }, { "epoch": 0.8682729115064175, "grad_norm": 1.3265058994293213, "learning_rate": 6.0347559057855006e-05, "loss": 1.6344, "step": 4820 }, { "epoch": 0.8684530511146138, "grad_norm": 1.4721804857254028, "learning_rate": 6.033370369725642e-05, "loss": 1.5868, "step": 4821 }, { "epoch": 0.8686331907228102, "grad_norm": 1.2043962478637695, "learning_rate": 6.031984750769315e-05, "loss": 1.4309, "step": 4822 }, { "epoch": 0.8688133303310065, "grad_norm": 1.3187940120697021, "learning_rate": 6.0305990490276745e-05, "loss": 1.6828, "step": 4823 }, { "epoch": 0.8689934699392029, "grad_norm": 1.3083454370498657, "learning_rate": 6.029213264611879e-05, "loss": 1.6196, "step": 4824 }, { "epoch": 0.8691736095473992, "grad_norm": 1.3861478567123413, "learning_rate": 6.027827397633097e-05, "loss": 1.7051, "step": 4825 }, { "epoch": 0.8693537491555956, "grad_norm": 1.3731266260147095, "learning_rate": 6.026441448202502e-05, "loss": 1.4161, "step": 4826 }, { "epoch": 0.8695338887637919, "grad_norm": 1.438948154449463, "learning_rate": 6.025055416431273e-05, "loss": 1.6066, "step": 4827 }, { "epoch": 0.8697140283719883, "grad_norm": 1.2723557949066162, "learning_rate": 6.0236693024306e-05, "loss": 1.4009, "step": 4828 }, { "epoch": 0.8698941679801846, "grad_norm": 1.3104726076126099, "learning_rate": 6.022283106311673e-05, "loss": 1.4465, "step": 4829 }, { "epoch": 0.870074307588381, "grad_norm": 1.391049861907959, "learning_rate": 6.0208968281856927e-05, "loss": 1.7052, "step": 4830 }, { "epoch": 0.8702544471965773, "grad_norm": 1.4700539112091064, "learning_rate": 6.019510468163866e-05, "loss": 1.6007, "step": 4831 }, { "epoch": 0.8704345868047737, "grad_norm": 1.3410675525665283, "learning_rate": 6.0181240263574056e-05, "loss": 1.6324, "step": 4832 }, { "epoch": 0.87061472641297, "grad_norm": 1.3417143821716309, "learning_rate": 6.016737502877533e-05, "loss": 1.482, "step": 4833 }, { "epoch": 0.8707948660211664, "grad_norm": 1.4190144538879395, "learning_rate": 6.0153508978354734e-05, "loss": 1.6507, "step": 4834 }, { "epoch": 0.8709750056293628, "grad_norm": 1.297910213470459, "learning_rate": 6.013964211342459e-05, "loss": 1.5162, "step": 4835 }, { "epoch": 0.8711551452375591, "grad_norm": 1.40993332862854, "learning_rate": 6.012577443509728e-05, "loss": 1.4281, "step": 4836 }, { "epoch": 0.8713352848457555, "grad_norm": 1.4083045721054077, "learning_rate": 6.0111905944485304e-05, "loss": 1.6359, "step": 4837 }, { "epoch": 0.8715154244539518, "grad_norm": 1.3539639711380005, "learning_rate": 6.009803664270114e-05, "loss": 1.3878, "step": 4838 }, { "epoch": 0.8716955640621482, "grad_norm": 1.5067014694213867, "learning_rate": 6.00841665308574e-05, "loss": 1.6564, "step": 4839 }, { "epoch": 0.8718757036703445, "grad_norm": 1.4218924045562744, "learning_rate": 6.007029561006674e-05, "loss": 1.4749, "step": 4840 }, { "epoch": 0.8720558432785409, "grad_norm": 1.3239388465881348, "learning_rate": 6.005642388144185e-05, "loss": 1.4957, "step": 4841 }, { "epoch": 0.8722359828867372, "grad_norm": 1.4342950582504272, "learning_rate": 6.0042551346095554e-05, "loss": 1.6117, "step": 4842 }, { "epoch": 0.8724161224949336, "grad_norm": 1.4106281995773315, "learning_rate": 6.002867800514066e-05, "loss": 1.4038, "step": 4843 }, { "epoch": 0.8725962621031299, "grad_norm": 1.6074198484420776, "learning_rate": 6.001480385969013e-05, "loss": 1.9554, "step": 4844 }, { "epoch": 0.8727764017113263, "grad_norm": 1.4746546745300293, "learning_rate": 6.00009289108569e-05, "loss": 1.5582, "step": 4845 }, { "epoch": 0.8729565413195226, "grad_norm": 1.5003559589385986, "learning_rate": 5.998705315975402e-05, "loss": 1.5545, "step": 4846 }, { "epoch": 0.873136680927719, "grad_norm": 1.4416735172271729, "learning_rate": 5.9973176607494596e-05, "loss": 1.6708, "step": 4847 }, { "epoch": 0.8733168205359153, "grad_norm": 1.5088763236999512, "learning_rate": 5.99592992551918e-05, "loss": 1.6303, "step": 4848 }, { "epoch": 0.8734969601441117, "grad_norm": 1.268067717552185, "learning_rate": 5.994542110395889e-05, "loss": 1.4367, "step": 4849 }, { "epoch": 0.873677099752308, "grad_norm": 1.4419240951538086, "learning_rate": 5.993154215490913e-05, "loss": 1.6497, "step": 4850 }, { "epoch": 0.8738572393605044, "grad_norm": 1.3228414058685303, "learning_rate": 5.99176624091559e-05, "loss": 1.8495, "step": 4851 }, { "epoch": 0.8740373789687007, "grad_norm": 1.24224853515625, "learning_rate": 5.990378186781261e-05, "loss": 1.6382, "step": 4852 }, { "epoch": 0.8742175185768971, "grad_norm": 1.3494120836257935, "learning_rate": 5.988990053199277e-05, "loss": 1.9537, "step": 4853 }, { "epoch": 0.8743976581850934, "grad_norm": 1.288163185119629, "learning_rate": 5.987601840280993e-05, "loss": 1.9257, "step": 4854 }, { "epoch": 0.8745777977932898, "grad_norm": 1.2223747968673706, "learning_rate": 5.98621354813777e-05, "loss": 1.7363, "step": 4855 }, { "epoch": 0.8747579374014861, "grad_norm": 1.2455908060073853, "learning_rate": 5.984825176880977e-05, "loss": 1.8408, "step": 4856 }, { "epoch": 0.8749380770096825, "grad_norm": 1.3384571075439453, "learning_rate": 5.983436726621987e-05, "loss": 1.8955, "step": 4857 }, { "epoch": 0.8751182166178788, "grad_norm": 1.524652361869812, "learning_rate": 5.982048197472183e-05, "loss": 1.89, "step": 4858 }, { "epoch": 0.8752983562260752, "grad_norm": 1.3820980787277222, "learning_rate": 5.980659589542951e-05, "loss": 1.6155, "step": 4859 }, { "epoch": 0.8754784958342715, "grad_norm": 1.9295732975006104, "learning_rate": 5.979270902945684e-05, "loss": 2.4923, "step": 4860 }, { "epoch": 0.8756586354424679, "grad_norm": 1.2807426452636719, "learning_rate": 5.977882137791781e-05, "loss": 1.6508, "step": 4861 }, { "epoch": 0.8758387750506643, "grad_norm": 1.2610318660736084, "learning_rate": 5.97649329419265e-05, "loss": 1.6568, "step": 4862 }, { "epoch": 0.8760189146588606, "grad_norm": 1.335521936416626, "learning_rate": 5.975104372259703e-05, "loss": 1.5188, "step": 4863 }, { "epoch": 0.876199054267057, "grad_norm": 1.3706283569335938, "learning_rate": 5.973715372104357e-05, "loss": 1.4579, "step": 4864 }, { "epoch": 0.8763791938752533, "grad_norm": 1.3073537349700928, "learning_rate": 5.9723262938380396e-05, "loss": 1.4596, "step": 4865 }, { "epoch": 0.8765593334834497, "grad_norm": 1.4208894968032837, "learning_rate": 5.9709371375721815e-05, "loss": 1.6413, "step": 4866 }, { "epoch": 0.876739473091646, "grad_norm": 1.373203992843628, "learning_rate": 5.9695479034182164e-05, "loss": 1.7215, "step": 4867 }, { "epoch": 0.8769196126998424, "grad_norm": 1.3259773254394531, "learning_rate": 5.968158591487592e-05, "loss": 1.4677, "step": 4868 }, { "epoch": 0.8770997523080387, "grad_norm": 1.464622974395752, "learning_rate": 5.966769201891756e-05, "loss": 1.5597, "step": 4869 }, { "epoch": 0.8772798919162351, "grad_norm": 1.361344337463379, "learning_rate": 5.965379734742167e-05, "loss": 1.7767, "step": 4870 }, { "epoch": 0.8774600315244314, "grad_norm": 1.3051594495773315, "learning_rate": 5.963990190150286e-05, "loss": 1.5666, "step": 4871 }, { "epoch": 0.8776401711326278, "grad_norm": 1.4447046518325806, "learning_rate": 5.962600568227581e-05, "loss": 1.683, "step": 4872 }, { "epoch": 0.8778203107408241, "grad_norm": 1.3636035919189453, "learning_rate": 5.961210869085527e-05, "loss": 1.5195, "step": 4873 }, { "epoch": 0.8780004503490205, "grad_norm": 1.2971924543380737, "learning_rate": 5.959821092835605e-05, "loss": 1.5972, "step": 4874 }, { "epoch": 0.8781805899572168, "grad_norm": 1.2787104845046997, "learning_rate": 5.9584312395893026e-05, "loss": 1.3256, "step": 4875 }, { "epoch": 0.8783607295654132, "grad_norm": 1.255251407623291, "learning_rate": 5.957041309458113e-05, "loss": 1.3018, "step": 4876 }, { "epoch": 0.8785408691736095, "grad_norm": 1.3704200983047485, "learning_rate": 5.9556513025535374e-05, "loss": 1.5456, "step": 4877 }, { "epoch": 0.8787210087818059, "grad_norm": 1.4923502206802368, "learning_rate": 5.954261218987077e-05, "loss": 1.671, "step": 4878 }, { "epoch": 0.8789011483900022, "grad_norm": 1.2371340990066528, "learning_rate": 5.9528710588702486e-05, "loss": 1.541, "step": 4879 }, { "epoch": 0.8790812879981986, "grad_norm": 1.314180850982666, "learning_rate": 5.951480822314567e-05, "loss": 1.4818, "step": 4880 }, { "epoch": 0.8792614276063949, "grad_norm": 1.5361371040344238, "learning_rate": 5.9500905094315565e-05, "loss": 1.5173, "step": 4881 }, { "epoch": 0.8794415672145913, "grad_norm": 1.1707242727279663, "learning_rate": 5.94870012033275e-05, "loss": 1.4226, "step": 4882 }, { "epoch": 0.8796217068227876, "grad_norm": 1.41873037815094, "learning_rate": 5.94730965512968e-05, "loss": 1.5139, "step": 4883 }, { "epoch": 0.879801846430984, "grad_norm": 1.5557864904403687, "learning_rate": 5.945919113933893e-05, "loss": 1.7769, "step": 4884 }, { "epoch": 0.8799819860391803, "grad_norm": 1.3495111465454102, "learning_rate": 5.9445284968569335e-05, "loss": 1.5695, "step": 4885 }, { "epoch": 0.8801621256473767, "grad_norm": 1.4541488885879517, "learning_rate": 5.943137804010359e-05, "loss": 1.5186, "step": 4886 }, { "epoch": 0.880342265255573, "grad_norm": 1.466526746749878, "learning_rate": 5.9417470355057295e-05, "loss": 1.6613, "step": 4887 }, { "epoch": 0.8805224048637694, "grad_norm": 1.3073025941848755, "learning_rate": 5.940356191454611e-05, "loss": 1.6232, "step": 4888 }, { "epoch": 0.8807025444719657, "grad_norm": 1.3118188381195068, "learning_rate": 5.938965271968577e-05, "loss": 1.4054, "step": 4889 }, { "epoch": 0.8808826840801621, "grad_norm": 1.4506570100784302, "learning_rate": 5.9375742771592056e-05, "loss": 1.6646, "step": 4890 }, { "epoch": 0.8810628236883585, "grad_norm": 1.4313631057739258, "learning_rate": 5.936183207138084e-05, "loss": 1.585, "step": 4891 }, { "epoch": 0.8812429632965548, "grad_norm": 1.5904314517974854, "learning_rate": 5.9347920620167996e-05, "loss": 1.7123, "step": 4892 }, { "epoch": 0.8814231029047512, "grad_norm": 1.330359935760498, "learning_rate": 5.933400841906953e-05, "loss": 1.3484, "step": 4893 }, { "epoch": 0.8816032425129475, "grad_norm": 1.6149178743362427, "learning_rate": 5.9320095469201454e-05, "loss": 1.571, "step": 4894 }, { "epoch": 0.8817833821211439, "grad_norm": 1.3532160520553589, "learning_rate": 5.930618177167984e-05, "loss": 1.3791, "step": 4895 }, { "epoch": 0.8819635217293402, "grad_norm": 1.385862946510315, "learning_rate": 5.9292267327620876e-05, "loss": 1.3973, "step": 4896 }, { "epoch": 0.8821436613375366, "grad_norm": 1.3089302778244019, "learning_rate": 5.927835213814073e-05, "loss": 1.3804, "step": 4897 }, { "epoch": 0.8823238009457329, "grad_norm": 1.2697385549545288, "learning_rate": 5.9264436204355724e-05, "loss": 1.2888, "step": 4898 }, { "epoch": 0.8825039405539293, "grad_norm": 1.3289599418640137, "learning_rate": 5.9250519527382144e-05, "loss": 1.4597, "step": 4899 }, { "epoch": 0.8826840801621256, "grad_norm": 1.3576087951660156, "learning_rate": 5.9236602108336395e-05, "loss": 1.2186, "step": 4900 }, { "epoch": 0.882864219770322, "grad_norm": 1.56609308719635, "learning_rate": 5.922268394833492e-05, "loss": 2.0012, "step": 4901 }, { "epoch": 0.8830443593785183, "grad_norm": 1.3856627941131592, "learning_rate": 5.920876504849423e-05, "loss": 2.1709, "step": 4902 }, { "epoch": 0.8832244989867147, "grad_norm": 1.3094737529754639, "learning_rate": 5.91948454099309e-05, "loss": 2.0393, "step": 4903 }, { "epoch": 0.883404638594911, "grad_norm": 1.206174612045288, "learning_rate": 5.918092503376156e-05, "loss": 1.8737, "step": 4904 }, { "epoch": 0.8835847782031074, "grad_norm": 1.4572250843048096, "learning_rate": 5.916700392110287e-05, "loss": 2.1271, "step": 4905 }, { "epoch": 0.8837649178113037, "grad_norm": 1.272459626197815, "learning_rate": 5.9153082073071586e-05, "loss": 1.796, "step": 4906 }, { "epoch": 0.8839450574195001, "grad_norm": 1.350561499595642, "learning_rate": 5.913915949078452e-05, "loss": 1.6258, "step": 4907 }, { "epoch": 0.8841251970276964, "grad_norm": 1.4500226974487305, "learning_rate": 5.912523617535855e-05, "loss": 1.8637, "step": 4908 }, { "epoch": 0.8843053366358928, "grad_norm": 1.6210451126098633, "learning_rate": 5.911131212791056e-05, "loss": 2.1012, "step": 4909 }, { "epoch": 0.8844854762440891, "grad_norm": 2.0391478538513184, "learning_rate": 5.9097387349557553e-05, "loss": 2.7092, "step": 4910 }, { "epoch": 0.8846656158522855, "grad_norm": 1.4408419132232666, "learning_rate": 5.9083461841416554e-05, "loss": 1.7117, "step": 4911 }, { "epoch": 0.8848457554604818, "grad_norm": 1.1679744720458984, "learning_rate": 5.906953560460469e-05, "loss": 1.4988, "step": 4912 }, { "epoch": 0.8850258950686782, "grad_norm": 1.324597954750061, "learning_rate": 5.905560864023908e-05, "loss": 1.7074, "step": 4913 }, { "epoch": 0.8852060346768745, "grad_norm": 1.2151472568511963, "learning_rate": 5.904168094943696e-05, "loss": 1.353, "step": 4914 }, { "epoch": 0.8853861742850709, "grad_norm": 1.1258124113082886, "learning_rate": 5.902775253331562e-05, "loss": 1.3196, "step": 4915 }, { "epoch": 0.8855663138932672, "grad_norm": 1.2189654111862183, "learning_rate": 5.901382339299234e-05, "loss": 1.6053, "step": 4916 }, { "epoch": 0.8857464535014636, "grad_norm": 1.2708308696746826, "learning_rate": 5.8999893529584546e-05, "loss": 1.5039, "step": 4917 }, { "epoch": 0.8859265931096599, "grad_norm": 1.2425771951675415, "learning_rate": 5.898596294420968e-05, "loss": 1.3228, "step": 4918 }, { "epoch": 0.8861067327178563, "grad_norm": 1.4810388088226318, "learning_rate": 5.897203163798524e-05, "loss": 1.7695, "step": 4919 }, { "epoch": 0.8862868723260527, "grad_norm": 1.410508394241333, "learning_rate": 5.8958099612028804e-05, "loss": 1.4703, "step": 4920 }, { "epoch": 0.886467011934249, "grad_norm": 1.3662093877792358, "learning_rate": 5.894416686745797e-05, "loss": 1.5843, "step": 4921 }, { "epoch": 0.8866471515424454, "grad_norm": 1.4293785095214844, "learning_rate": 5.893023340539043e-05, "loss": 1.4908, "step": 4922 }, { "epoch": 0.8868272911506417, "grad_norm": 1.2521655559539795, "learning_rate": 5.8916299226943905e-05, "loss": 1.4374, "step": 4923 }, { "epoch": 0.8870074307588381, "grad_norm": 1.3169161081314087, "learning_rate": 5.8902364333236214e-05, "loss": 1.5272, "step": 4924 }, { "epoch": 0.8871875703670344, "grad_norm": 1.3253748416900635, "learning_rate": 5.888842872538519e-05, "loss": 1.4615, "step": 4925 }, { "epoch": 0.8873677099752308, "grad_norm": 1.3979521989822388, "learning_rate": 5.887449240450874e-05, "loss": 1.4807, "step": 4926 }, { "epoch": 0.8875478495834271, "grad_norm": 1.3415324687957764, "learning_rate": 5.886055537172482e-05, "loss": 1.4248, "step": 4927 }, { "epoch": 0.8877279891916235, "grad_norm": 1.2935024499893188, "learning_rate": 5.884661762815148e-05, "loss": 1.5159, "step": 4928 }, { "epoch": 0.8879081287998198, "grad_norm": 1.2247368097305298, "learning_rate": 5.883267917490678e-05, "loss": 1.3982, "step": 4929 }, { "epoch": 0.8880882684080162, "grad_norm": 1.396319031715393, "learning_rate": 5.8818740013108854e-05, "loss": 1.5317, "step": 4930 }, { "epoch": 0.8882684080162125, "grad_norm": 1.4318525791168213, "learning_rate": 5.8804800143875915e-05, "loss": 1.6454, "step": 4931 }, { "epoch": 0.888448547624409, "grad_norm": 1.4536333084106445, "learning_rate": 5.8790859568326176e-05, "loss": 1.9068, "step": 4932 }, { "epoch": 0.8886286872326052, "grad_norm": 1.271181344985962, "learning_rate": 5.877691828757798e-05, "loss": 1.397, "step": 4933 }, { "epoch": 0.8888088268408016, "grad_norm": 1.2889289855957031, "learning_rate": 5.876297630274965e-05, "loss": 1.3531, "step": 4934 }, { "epoch": 0.8889889664489979, "grad_norm": 1.4697061777114868, "learning_rate": 5.874903361495965e-05, "loss": 1.5059, "step": 4935 }, { "epoch": 0.8891691060571943, "grad_norm": 1.4518953561782837, "learning_rate": 5.8735090225326436e-05, "loss": 1.5801, "step": 4936 }, { "epoch": 0.8893492456653906, "grad_norm": 1.4577720165252686, "learning_rate": 5.872114613496854e-05, "loss": 1.5614, "step": 4937 }, { "epoch": 0.889529385273587, "grad_norm": 1.35596764087677, "learning_rate": 5.870720134500454e-05, "loss": 1.3468, "step": 4938 }, { "epoch": 0.8897095248817833, "grad_norm": 1.431528091430664, "learning_rate": 5.869325585655309e-05, "loss": 1.4518, "step": 4939 }, { "epoch": 0.8898896644899797, "grad_norm": 1.4274637699127197, "learning_rate": 5.8679309670732896e-05, "loss": 1.5935, "step": 4940 }, { "epoch": 0.890069804098176, "grad_norm": 1.4080716371536255, "learning_rate": 5.8665362788662706e-05, "loss": 1.7318, "step": 4941 }, { "epoch": 0.8902499437063724, "grad_norm": 1.3980385065078735, "learning_rate": 5.8651415211461335e-05, "loss": 1.4891, "step": 4942 }, { "epoch": 0.8904300833145687, "grad_norm": 1.5835100412368774, "learning_rate": 5.8637466940247645e-05, "loss": 1.7582, "step": 4943 }, { "epoch": 0.8906102229227651, "grad_norm": 1.3957884311676025, "learning_rate": 5.8623517976140554e-05, "loss": 1.5943, "step": 4944 }, { "epoch": 0.8907903625309614, "grad_norm": 1.3227876424789429, "learning_rate": 5.8609568320259066e-05, "loss": 1.3689, "step": 4945 }, { "epoch": 0.8909705021391578, "grad_norm": 1.4396952390670776, "learning_rate": 5.859561797372219e-05, "loss": 1.3578, "step": 4946 }, { "epoch": 0.8911506417473541, "grad_norm": 1.3999418020248413, "learning_rate": 5.858166693764905e-05, "loss": 1.4781, "step": 4947 }, { "epoch": 0.8913307813555505, "grad_norm": 1.877793312072754, "learning_rate": 5.856771521315875e-05, "loss": 1.6333, "step": 4948 }, { "epoch": 0.891510920963747, "grad_norm": 1.3292099237442017, "learning_rate": 5.85537628013705e-05, "loss": 1.5068, "step": 4949 }, { "epoch": 0.8916910605719433, "grad_norm": 1.2942478656768799, "learning_rate": 5.853980970340358e-05, "loss": 1.3592, "step": 4950 }, { "epoch": 0.8918712001801397, "grad_norm": 1.351731777191162, "learning_rate": 5.8525855920377285e-05, "loss": 2.1554, "step": 4951 }, { "epoch": 0.892051339788336, "grad_norm": 1.1347591876983643, "learning_rate": 5.851190145341098e-05, "loss": 1.6675, "step": 4952 }, { "epoch": 0.8922314793965324, "grad_norm": 1.4482944011688232, "learning_rate": 5.849794630362408e-05, "loss": 2.5191, "step": 4953 }, { "epoch": 0.8924116190047287, "grad_norm": 1.2418495416641235, "learning_rate": 5.8483990472136074e-05, "loss": 2.0076, "step": 4954 }, { "epoch": 0.8925917586129251, "grad_norm": 1.3946818113327026, "learning_rate": 5.847003396006647e-05, "loss": 1.9167, "step": 4955 }, { "epoch": 0.8927718982211214, "grad_norm": 1.3768799304962158, "learning_rate": 5.8456076768534886e-05, "loss": 1.881, "step": 4956 }, { "epoch": 0.8929520378293178, "grad_norm": 1.40651273727417, "learning_rate": 5.844211889866094e-05, "loss": 1.9443, "step": 4957 }, { "epoch": 0.893132177437514, "grad_norm": 1.4150489568710327, "learning_rate": 5.842816035156432e-05, "loss": 1.9269, "step": 4958 }, { "epoch": 0.8933123170457105, "grad_norm": 1.6062489748001099, "learning_rate": 5.841420112836479e-05, "loss": 2.1852, "step": 4959 }, { "epoch": 0.8934924566539068, "grad_norm": 1.503406286239624, "learning_rate": 5.840024123018213e-05, "loss": 1.8622, "step": 4960 }, { "epoch": 0.8936725962621032, "grad_norm": 1.3765223026275635, "learning_rate": 5.838628065813622e-05, "loss": 1.8461, "step": 4961 }, { "epoch": 0.8938527358702995, "grad_norm": 1.267340898513794, "learning_rate": 5.8372319413346944e-05, "loss": 1.5935, "step": 4962 }, { "epoch": 0.8940328754784959, "grad_norm": 1.3180469274520874, "learning_rate": 5.8358357496934304e-05, "loss": 1.8769, "step": 4963 }, { "epoch": 0.8942130150866922, "grad_norm": 1.2231675386428833, "learning_rate": 5.83443949100183e-05, "loss": 1.6831, "step": 4964 }, { "epoch": 0.8943931546948886, "grad_norm": 1.4154112339019775, "learning_rate": 5.8330431653718986e-05, "loss": 1.7242, "step": 4965 }, { "epoch": 0.8945732943030849, "grad_norm": 1.4427021741867065, "learning_rate": 5.831646772915651e-05, "loss": 1.6218, "step": 4966 }, { "epoch": 0.8947534339112813, "grad_norm": 1.2480283975601196, "learning_rate": 5.830250313745104e-05, "loss": 1.5665, "step": 4967 }, { "epoch": 0.8949335735194776, "grad_norm": 1.3867474794387817, "learning_rate": 5.828853787972281e-05, "loss": 1.7147, "step": 4968 }, { "epoch": 0.895113713127674, "grad_norm": 1.3857214450836182, "learning_rate": 5.827457195709212e-05, "loss": 1.6213, "step": 4969 }, { "epoch": 0.8952938527358703, "grad_norm": 1.2099359035491943, "learning_rate": 5.8260605370679286e-05, "loss": 1.5305, "step": 4970 }, { "epoch": 0.8954739923440667, "grad_norm": 1.4401071071624756, "learning_rate": 5.8246638121604715e-05, "loss": 1.822, "step": 4971 }, { "epoch": 0.895654131952263, "grad_norm": 1.2437294721603394, "learning_rate": 5.8232670210988847e-05, "loss": 1.2666, "step": 4972 }, { "epoch": 0.8958342715604594, "grad_norm": 1.378402590751648, "learning_rate": 5.82187016399522e-05, "loss": 1.6252, "step": 4973 }, { "epoch": 0.8960144111686557, "grad_norm": 1.3699381351470947, "learning_rate": 5.8204732409615304e-05, "loss": 1.4978, "step": 4974 }, { "epoch": 0.8961945507768521, "grad_norm": 1.4028576612472534, "learning_rate": 5.819076252109876e-05, "loss": 1.6407, "step": 4975 }, { "epoch": 0.8963746903850485, "grad_norm": 1.397886872291565, "learning_rate": 5.8176791975523235e-05, "loss": 1.5976, "step": 4976 }, { "epoch": 0.8965548299932448, "grad_norm": 1.4487146139144897, "learning_rate": 5.816282077400945e-05, "loss": 1.492, "step": 4977 }, { "epoch": 0.8967349696014412, "grad_norm": 1.2671177387237549, "learning_rate": 5.814884891767816e-05, "loss": 1.4081, "step": 4978 }, { "epoch": 0.8969151092096375, "grad_norm": 1.3465325832366943, "learning_rate": 5.813487640765018e-05, "loss": 1.6068, "step": 4979 }, { "epoch": 0.8970952488178339, "grad_norm": 1.4604675769805908, "learning_rate": 5.812090324504638e-05, "loss": 1.7003, "step": 4980 }, { "epoch": 0.8972753884260302, "grad_norm": 1.330579161643982, "learning_rate": 5.8106929430987653e-05, "loss": 1.445, "step": 4981 }, { "epoch": 0.8974555280342266, "grad_norm": 1.292406678199768, "learning_rate": 5.8092954966595024e-05, "loss": 1.4893, "step": 4982 }, { "epoch": 0.8976356676424229, "grad_norm": 1.3988697528839111, "learning_rate": 5.8078979852989466e-05, "loss": 1.684, "step": 4983 }, { "epoch": 0.8978158072506193, "grad_norm": 1.3910832405090332, "learning_rate": 5.80650040912921e-05, "loss": 1.7591, "step": 4984 }, { "epoch": 0.8979959468588156, "grad_norm": 1.4246978759765625, "learning_rate": 5.8051027682624026e-05, "loss": 1.7317, "step": 4985 }, { "epoch": 0.898176086467012, "grad_norm": 1.3134583234786987, "learning_rate": 5.803705062810644e-05, "loss": 1.3337, "step": 4986 }, { "epoch": 0.8983562260752083, "grad_norm": 1.3355578184127808, "learning_rate": 5.802307292886056e-05, "loss": 1.4894, "step": 4987 }, { "epoch": 0.8985363656834047, "grad_norm": 1.3627285957336426, "learning_rate": 5.800909458600767e-05, "loss": 1.46, "step": 4988 }, { "epoch": 0.898716505291601, "grad_norm": 1.3580554723739624, "learning_rate": 5.799511560066912e-05, "loss": 1.679, "step": 4989 }, { "epoch": 0.8988966448997974, "grad_norm": 1.4573010206222534, "learning_rate": 5.798113597396629e-05, "loss": 1.54, "step": 4990 }, { "epoch": 0.8990767845079937, "grad_norm": 1.4165380001068115, "learning_rate": 5.7967155707020623e-05, "loss": 1.5047, "step": 4991 }, { "epoch": 0.8992569241161901, "grad_norm": 1.4199260473251343, "learning_rate": 5.7953174800953604e-05, "loss": 1.5042, "step": 4992 }, { "epoch": 0.8994370637243864, "grad_norm": 1.4115427732467651, "learning_rate": 5.793919325688678e-05, "loss": 1.3498, "step": 4993 }, { "epoch": 0.8996172033325828, "grad_norm": 1.4568222761154175, "learning_rate": 5.7925211075941744e-05, "loss": 1.3783, "step": 4994 }, { "epoch": 0.8997973429407791, "grad_norm": 1.3275940418243408, "learning_rate": 5.791122825924011e-05, "loss": 1.5193, "step": 4995 }, { "epoch": 0.8999774825489755, "grad_norm": 1.3888188600540161, "learning_rate": 5.7897244807903636e-05, "loss": 1.5633, "step": 4996 }, { "epoch": 0.9001576221571718, "grad_norm": 1.4054131507873535, "learning_rate": 5.7883260723054e-05, "loss": 1.4336, "step": 4997 }, { "epoch": 0.9003377617653682, "grad_norm": 1.4035102128982544, "learning_rate": 5.7869276005813044e-05, "loss": 1.2238, "step": 4998 }, { "epoch": 0.9005179013735645, "grad_norm": 1.352142572402954, "learning_rate": 5.785529065730258e-05, "loss": 1.4641, "step": 4999 }, { "epoch": 0.9006980409817609, "grad_norm": 1.409900188446045, "learning_rate": 5.784130467864455e-05, "loss": 1.5026, "step": 5000 }, { "epoch": 0.9008781805899572, "grad_norm": 1.3493738174438477, "learning_rate": 5.782731807096088e-05, "loss": 2.1084, "step": 5001 }, { "epoch": 0.9010583201981536, "grad_norm": 1.326295256614685, "learning_rate": 5.7813330835373545e-05, "loss": 2.0093, "step": 5002 }, { "epoch": 0.9012384598063499, "grad_norm": 1.1491564512252808, "learning_rate": 5.779934297300463e-05, "loss": 1.8514, "step": 5003 }, { "epoch": 0.9014185994145463, "grad_norm": 1.203399658203125, "learning_rate": 5.778535448497622e-05, "loss": 1.7148, "step": 5004 }, { "epoch": 0.9015987390227427, "grad_norm": 1.3038862943649292, "learning_rate": 5.7771365372410454e-05, "loss": 2.0116, "step": 5005 }, { "epoch": 0.901778878630939, "grad_norm": 1.2065067291259766, "learning_rate": 5.7757375636429564e-05, "loss": 1.9362, "step": 5006 }, { "epoch": 0.9019590182391354, "grad_norm": 1.418507695198059, "learning_rate": 5.774338527815577e-05, "loss": 1.9639, "step": 5007 }, { "epoch": 0.9021391578473317, "grad_norm": 1.4580485820770264, "learning_rate": 5.772939429871138e-05, "loss": 1.8525, "step": 5008 }, { "epoch": 0.9023192974555281, "grad_norm": 1.5523029565811157, "learning_rate": 5.771540269921874e-05, "loss": 2.0376, "step": 5009 }, { "epoch": 0.9024994370637244, "grad_norm": 1.748618721961975, "learning_rate": 5.770141048080026e-05, "loss": 2.2565, "step": 5010 }, { "epoch": 0.9026795766719208, "grad_norm": 1.416452407836914, "learning_rate": 5.768741764457836e-05, "loss": 1.7559, "step": 5011 }, { "epoch": 0.9028597162801171, "grad_norm": 1.4235934019088745, "learning_rate": 5.7673424191675595e-05, "loss": 1.6576, "step": 5012 }, { "epoch": 0.9030398558883135, "grad_norm": 1.2068952322006226, "learning_rate": 5.7659430123214455e-05, "loss": 1.3391, "step": 5013 }, { "epoch": 0.9032199954965098, "grad_norm": 1.3004122972488403, "learning_rate": 5.764543544031757e-05, "loss": 1.3112, "step": 5014 }, { "epoch": 0.9034001351047062, "grad_norm": 1.3666245937347412, "learning_rate": 5.763144014410759e-05, "loss": 1.7867, "step": 5015 }, { "epoch": 0.9035802747129025, "grad_norm": 1.366073727607727, "learning_rate": 5.761744423570716e-05, "loss": 1.5905, "step": 5016 }, { "epoch": 0.9037604143210989, "grad_norm": 1.282131314277649, "learning_rate": 5.76034477162391e-05, "loss": 1.4685, "step": 5017 }, { "epoch": 0.9039405539292952, "grad_norm": 1.3294016122817993, "learning_rate": 5.7589450586826165e-05, "loss": 1.65, "step": 5018 }, { "epoch": 0.9041206935374916, "grad_norm": 1.3729841709136963, "learning_rate": 5.7575452848591196e-05, "loss": 1.6355, "step": 5019 }, { "epoch": 0.9043008331456879, "grad_norm": 1.4479531049728394, "learning_rate": 5.7561454502657075e-05, "loss": 1.7243, "step": 5020 }, { "epoch": 0.9044809727538843, "grad_norm": 1.2787152528762817, "learning_rate": 5.754745555014677e-05, "loss": 1.5639, "step": 5021 }, { "epoch": 0.9046611123620806, "grad_norm": 1.2977980375289917, "learning_rate": 5.753345599218326e-05, "loss": 1.534, "step": 5022 }, { "epoch": 0.904841251970277, "grad_norm": 1.342494249343872, "learning_rate": 5.751945582988958e-05, "loss": 1.4455, "step": 5023 }, { "epoch": 0.9050213915784733, "grad_norm": 1.2599451541900635, "learning_rate": 5.75054550643888e-05, "loss": 1.2797, "step": 5024 }, { "epoch": 0.9052015311866697, "grad_norm": 1.482111930847168, "learning_rate": 5.749145369680407e-05, "loss": 1.6278, "step": 5025 }, { "epoch": 0.905381670794866, "grad_norm": 1.3941726684570312, "learning_rate": 5.7477451728258605e-05, "loss": 1.552, "step": 5026 }, { "epoch": 0.9055618104030624, "grad_norm": 1.3854631185531616, "learning_rate": 5.746344915987557e-05, "loss": 1.4391, "step": 5027 }, { "epoch": 0.9057419500112587, "grad_norm": 1.315888524055481, "learning_rate": 5.7449445992778304e-05, "loss": 1.4151, "step": 5028 }, { "epoch": 0.9059220896194551, "grad_norm": 1.4299246072769165, "learning_rate": 5.74354422280901e-05, "loss": 1.6382, "step": 5029 }, { "epoch": 0.9061022292276514, "grad_norm": 1.3035376071929932, "learning_rate": 5.742143786693434e-05, "loss": 1.4697, "step": 5030 }, { "epoch": 0.9062823688358478, "grad_norm": 1.3902029991149902, "learning_rate": 5.7407432910434444e-05, "loss": 1.6825, "step": 5031 }, { "epoch": 0.9064625084440441, "grad_norm": 1.3576289415359497, "learning_rate": 5.739342735971389e-05, "loss": 1.4541, "step": 5032 }, { "epoch": 0.9066426480522405, "grad_norm": 1.5524572134017944, "learning_rate": 5.737942121589619e-05, "loss": 1.9006, "step": 5033 }, { "epoch": 0.9068227876604369, "grad_norm": 1.3840633630752563, "learning_rate": 5.736541448010493e-05, "loss": 1.5157, "step": 5034 }, { "epoch": 0.9070029272686332, "grad_norm": 1.4073817729949951, "learning_rate": 5.73514071534637e-05, "loss": 1.7386, "step": 5035 }, { "epoch": 0.9071830668768296, "grad_norm": 1.3186254501342773, "learning_rate": 5.733739923709618e-05, "loss": 1.3219, "step": 5036 }, { "epoch": 0.9073632064850259, "grad_norm": 1.3760497570037842, "learning_rate": 5.732339073212605e-05, "loss": 1.527, "step": 5037 }, { "epoch": 0.9075433460932223, "grad_norm": 1.493990182876587, "learning_rate": 5.7309381639677085e-05, "loss": 1.7089, "step": 5038 }, { "epoch": 0.9077234857014186, "grad_norm": 1.4230737686157227, "learning_rate": 5.729537196087309e-05, "loss": 1.6311, "step": 5039 }, { "epoch": 0.907903625309615, "grad_norm": 1.4095138311386108, "learning_rate": 5.728136169683791e-05, "loss": 1.4789, "step": 5040 }, { "epoch": 0.9080837649178113, "grad_norm": 1.494604229927063, "learning_rate": 5.726735084869543e-05, "loss": 1.4799, "step": 5041 }, { "epoch": 0.9082639045260077, "grad_norm": 1.3156670331954956, "learning_rate": 5.725333941756962e-05, "loss": 1.2875, "step": 5042 }, { "epoch": 0.908444044134204, "grad_norm": 1.2558436393737793, "learning_rate": 5.7239327404584454e-05, "loss": 1.2869, "step": 5043 }, { "epoch": 0.9086241837424004, "grad_norm": 1.484230399131775, "learning_rate": 5.722531481086395e-05, "loss": 1.7496, "step": 5044 }, { "epoch": 0.9088043233505967, "grad_norm": 1.3950295448303223, "learning_rate": 5.7211301637532245e-05, "loss": 1.5871, "step": 5045 }, { "epoch": 0.9089844629587931, "grad_norm": 1.4383394718170166, "learning_rate": 5.7197287885713405e-05, "loss": 1.2528, "step": 5046 }, { "epoch": 0.9091646025669894, "grad_norm": 1.4085705280303955, "learning_rate": 5.718327355653166e-05, "loss": 1.6171, "step": 5047 }, { "epoch": 0.9093447421751858, "grad_norm": 1.2919187545776367, "learning_rate": 5.716925865111118e-05, "loss": 1.3425, "step": 5048 }, { "epoch": 0.9095248817833821, "grad_norm": 1.2899022102355957, "learning_rate": 5.7155243170576276e-05, "loss": 1.205, "step": 5049 }, { "epoch": 0.9097050213915785, "grad_norm": 1.4842193126678467, "learning_rate": 5.714122711605127e-05, "loss": 1.6139, "step": 5050 }, { "epoch": 0.9098851609997748, "grad_norm": 1.2801039218902588, "learning_rate": 5.712721048866049e-05, "loss": 1.7893, "step": 5051 }, { "epoch": 0.9100653006079712, "grad_norm": 1.4865806102752686, "learning_rate": 5.711319328952836e-05, "loss": 2.3504, "step": 5052 }, { "epoch": 0.9102454402161675, "grad_norm": 1.2550746202468872, "learning_rate": 5.7099175519779304e-05, "loss": 1.9261, "step": 5053 }, { "epoch": 0.9104255798243639, "grad_norm": 1.2748281955718994, "learning_rate": 5.7085157180537865e-05, "loss": 1.7573, "step": 5054 }, { "epoch": 0.9106057194325602, "grad_norm": 1.2243000268936157, "learning_rate": 5.707113827292857e-05, "loss": 1.778, "step": 5055 }, { "epoch": 0.9107858590407566, "grad_norm": 1.3699009418487549, "learning_rate": 5.7057118798076004e-05, "loss": 2.1405, "step": 5056 }, { "epoch": 0.9109659986489529, "grad_norm": 1.46440851688385, "learning_rate": 5.7043098757104805e-05, "loss": 1.959, "step": 5057 }, { "epoch": 0.9111461382571493, "grad_norm": 1.5501600503921509, "learning_rate": 5.702907815113966e-05, "loss": 2.0403, "step": 5058 }, { "epoch": 0.9113262778653456, "grad_norm": 1.734147310256958, "learning_rate": 5.701505698130528e-05, "loss": 2.2871, "step": 5059 }, { "epoch": 0.911506417473542, "grad_norm": 1.8098869323730469, "learning_rate": 5.700103524872644e-05, "loss": 2.0413, "step": 5060 }, { "epoch": 0.9116865570817384, "grad_norm": 1.3115081787109375, "learning_rate": 5.698701295452798e-05, "loss": 1.6129, "step": 5061 }, { "epoch": 0.9118666966899347, "grad_norm": 1.2694413661956787, "learning_rate": 5.6972990099834736e-05, "loss": 1.6414, "step": 5062 }, { "epoch": 0.9120468362981311, "grad_norm": 1.2579491138458252, "learning_rate": 5.695896668577163e-05, "loss": 1.5723, "step": 5063 }, { "epoch": 0.9122269759063274, "grad_norm": 1.39626145362854, "learning_rate": 5.694494271346359e-05, "loss": 1.483, "step": 5064 }, { "epoch": 0.9124071155145238, "grad_norm": 1.137127161026001, "learning_rate": 5.6930918184035645e-05, "loss": 1.2465, "step": 5065 }, { "epoch": 0.9125872551227201, "grad_norm": 1.3818987607955933, "learning_rate": 5.6916893098612814e-05, "loss": 1.7448, "step": 5066 }, { "epoch": 0.9127673947309165, "grad_norm": 1.291425347328186, "learning_rate": 5.690286745832019e-05, "loss": 1.6185, "step": 5067 }, { "epoch": 0.9129475343391128, "grad_norm": 1.3024672269821167, "learning_rate": 5.688884126428289e-05, "loss": 1.58, "step": 5068 }, { "epoch": 0.9131276739473092, "grad_norm": 1.1630114316940308, "learning_rate": 5.6874814517626094e-05, "loss": 1.3182, "step": 5069 }, { "epoch": 0.9133078135555055, "grad_norm": 1.2996869087219238, "learning_rate": 5.686078721947504e-05, "loss": 1.3772, "step": 5070 }, { "epoch": 0.9134879531637019, "grad_norm": 1.221467137336731, "learning_rate": 5.684675937095498e-05, "loss": 1.2569, "step": 5071 }, { "epoch": 0.9136680927718982, "grad_norm": 1.3229528665542603, "learning_rate": 5.683273097319122e-05, "loss": 1.624, "step": 5072 }, { "epoch": 0.9138482323800946, "grad_norm": 1.3927230834960938, "learning_rate": 5.68187020273091e-05, "loss": 1.6236, "step": 5073 }, { "epoch": 0.9140283719882909, "grad_norm": 1.4482344388961792, "learning_rate": 5.680467253443402e-05, "loss": 1.5324, "step": 5074 }, { "epoch": 0.9142085115964873, "grad_norm": 1.3677200078964233, "learning_rate": 5.679064249569144e-05, "loss": 1.5572, "step": 5075 }, { "epoch": 0.9143886512046836, "grad_norm": 1.3752690553665161, "learning_rate": 5.67766119122068e-05, "loss": 1.5922, "step": 5076 }, { "epoch": 0.91456879081288, "grad_norm": 1.2365682125091553, "learning_rate": 5.676258078510569e-05, "loss": 1.5804, "step": 5077 }, { "epoch": 0.9147489304210763, "grad_norm": 1.3693547248840332, "learning_rate": 5.674854911551362e-05, "loss": 1.4754, "step": 5078 }, { "epoch": 0.9149290700292727, "grad_norm": 1.393280029296875, "learning_rate": 5.673451690455622e-05, "loss": 1.4322, "step": 5079 }, { "epoch": 0.915109209637469, "grad_norm": 1.3403124809265137, "learning_rate": 5.6720484153359174e-05, "loss": 1.4891, "step": 5080 }, { "epoch": 0.9152893492456654, "grad_norm": 1.2650532722473145, "learning_rate": 5.6706450863048145e-05, "loss": 1.5332, "step": 5081 }, { "epoch": 0.9154694888538617, "grad_norm": 1.4569473266601562, "learning_rate": 5.66924170347489e-05, "loss": 1.7839, "step": 5082 }, { "epoch": 0.9156496284620581, "grad_norm": 1.3612486124038696, "learning_rate": 5.667838266958723e-05, "loss": 1.5323, "step": 5083 }, { "epoch": 0.9158297680702544, "grad_norm": 1.4201631546020508, "learning_rate": 5.666434776868895e-05, "loss": 1.5092, "step": 5084 }, { "epoch": 0.9160099076784508, "grad_norm": 1.459028720855713, "learning_rate": 5.6650312333179923e-05, "loss": 1.5687, "step": 5085 }, { "epoch": 0.9161900472866471, "grad_norm": 1.4282561540603638, "learning_rate": 5.6636276364186105e-05, "loss": 1.3516, "step": 5086 }, { "epoch": 0.9163701868948435, "grad_norm": 1.4773622751235962, "learning_rate": 5.662223986283341e-05, "loss": 1.6518, "step": 5087 }, { "epoch": 0.9165503265030398, "grad_norm": 1.4208288192749023, "learning_rate": 5.660820283024788e-05, "loss": 1.7851, "step": 5088 }, { "epoch": 0.9167304661112362, "grad_norm": 1.382665753364563, "learning_rate": 5.659416526755552e-05, "loss": 1.4614, "step": 5089 }, { "epoch": 0.9169106057194326, "grad_norm": 1.3633079528808594, "learning_rate": 5.658012717588244e-05, "loss": 1.4254, "step": 5090 }, { "epoch": 0.9170907453276289, "grad_norm": 1.4343020915985107, "learning_rate": 5.6566088556354754e-05, "loss": 1.5308, "step": 5091 }, { "epoch": 0.9172708849358253, "grad_norm": 1.2589569091796875, "learning_rate": 5.655204941009864e-05, "loss": 1.3062, "step": 5092 }, { "epoch": 0.9174510245440216, "grad_norm": 1.4107121229171753, "learning_rate": 5.653800973824034e-05, "loss": 1.5481, "step": 5093 }, { "epoch": 0.917631164152218, "grad_norm": 1.4062985181808472, "learning_rate": 5.652396954190608e-05, "loss": 1.573, "step": 5094 }, { "epoch": 0.9178113037604143, "grad_norm": 1.4348030090332031, "learning_rate": 5.650992882222212e-05, "loss": 1.5402, "step": 5095 }, { "epoch": 0.9179914433686107, "grad_norm": 1.2970614433288574, "learning_rate": 5.6495887580314876e-05, "loss": 1.5557, "step": 5096 }, { "epoch": 0.918171582976807, "grad_norm": 1.3836398124694824, "learning_rate": 5.6481845817310685e-05, "loss": 1.269, "step": 5097 }, { "epoch": 0.9183517225850034, "grad_norm": 1.4323362112045288, "learning_rate": 5.6467803534335986e-05, "loss": 1.6282, "step": 5098 }, { "epoch": 0.9185318621931997, "grad_norm": 1.3195542097091675, "learning_rate": 5.645376073251724e-05, "loss": 1.2351, "step": 5099 }, { "epoch": 0.9187120018013961, "grad_norm": 1.3150973320007324, "learning_rate": 5.643971741298094e-05, "loss": 1.5404, "step": 5100 }, { "epoch": 0.9188921414095924, "grad_norm": 1.3054282665252686, "learning_rate": 5.642567357685365e-05, "loss": 2.3153, "step": 5101 }, { "epoch": 0.9190722810177888, "grad_norm": 1.2936956882476807, "learning_rate": 5.6411629225261954e-05, "loss": 1.9611, "step": 5102 }, { "epoch": 0.9192524206259851, "grad_norm": 1.2296570539474487, "learning_rate": 5.639758435933249e-05, "loss": 2.037, "step": 5103 }, { "epoch": 0.9194325602341815, "grad_norm": 1.2555570602416992, "learning_rate": 5.638353898019192e-05, "loss": 1.9361, "step": 5104 }, { "epoch": 0.9196126998423778, "grad_norm": 1.2353285551071167, "learning_rate": 5.6369493088966964e-05, "loss": 1.8856, "step": 5105 }, { "epoch": 0.9197928394505742, "grad_norm": 1.2829554080963135, "learning_rate": 5.6355446686784375e-05, "loss": 2.0834, "step": 5106 }, { "epoch": 0.9199729790587705, "grad_norm": 1.3782570362091064, "learning_rate": 5.634139977477094e-05, "loss": 2.1177, "step": 5107 }, { "epoch": 0.9201531186669669, "grad_norm": 1.4753156900405884, "learning_rate": 5.632735235405352e-05, "loss": 1.8709, "step": 5108 }, { "epoch": 0.9203332582751632, "grad_norm": 1.5350147485733032, "learning_rate": 5.631330442575896e-05, "loss": 1.9023, "step": 5109 }, { "epoch": 0.9205133978833596, "grad_norm": 1.48201322555542, "learning_rate": 5.629925599101422e-05, "loss": 1.7216, "step": 5110 }, { "epoch": 0.9206935374915559, "grad_norm": 1.543350338935852, "learning_rate": 5.62852070509462e-05, "loss": 2.1403, "step": 5111 }, { "epoch": 0.9208736770997523, "grad_norm": 1.331264853477478, "learning_rate": 5.6271157606681954e-05, "loss": 1.7228, "step": 5112 }, { "epoch": 0.9210538167079486, "grad_norm": 1.2444875240325928, "learning_rate": 5.6257107659348484e-05, "loss": 1.6073, "step": 5113 }, { "epoch": 0.921233956316145, "grad_norm": 1.1544668674468994, "learning_rate": 5.624305721007289e-05, "loss": 1.384, "step": 5114 }, { "epoch": 0.9214140959243413, "grad_norm": 1.244156837463379, "learning_rate": 5.62290062599823e-05, "loss": 1.5327, "step": 5115 }, { "epoch": 0.9215942355325377, "grad_norm": 1.4060598611831665, "learning_rate": 5.621495481020386e-05, "loss": 1.4705, "step": 5116 }, { "epoch": 0.921774375140734, "grad_norm": 1.629783034324646, "learning_rate": 5.6200902861864756e-05, "loss": 1.5371, "step": 5117 }, { "epoch": 0.9219545147489304, "grad_norm": 1.2417926788330078, "learning_rate": 5.618685041609225e-05, "loss": 1.3576, "step": 5118 }, { "epoch": 0.9221346543571268, "grad_norm": 1.4018644094467163, "learning_rate": 5.617279747401363e-05, "loss": 1.5638, "step": 5119 }, { "epoch": 0.9223147939653231, "grad_norm": 1.2647839784622192, "learning_rate": 5.61587440367562e-05, "loss": 1.4678, "step": 5120 }, { "epoch": 0.9224949335735195, "grad_norm": 1.2668280601501465, "learning_rate": 5.6144690105447305e-05, "loss": 1.3495, "step": 5121 }, { "epoch": 0.9226750731817158, "grad_norm": 1.2567169666290283, "learning_rate": 5.6130635681214386e-05, "loss": 1.325, "step": 5122 }, { "epoch": 0.9228552127899122, "grad_norm": 1.3366979360580444, "learning_rate": 5.6116580765184834e-05, "loss": 1.4925, "step": 5123 }, { "epoch": 0.9230353523981085, "grad_norm": 1.288009524345398, "learning_rate": 5.610252535848617e-05, "loss": 1.4696, "step": 5124 }, { "epoch": 0.9232154920063049, "grad_norm": 1.2684028148651123, "learning_rate": 5.608846946224589e-05, "loss": 1.4457, "step": 5125 }, { "epoch": 0.9233956316145012, "grad_norm": 1.3418757915496826, "learning_rate": 5.607441307759156e-05, "loss": 1.6583, "step": 5126 }, { "epoch": 0.9235757712226976, "grad_norm": 1.343349575996399, "learning_rate": 5.6060356205650766e-05, "loss": 1.4726, "step": 5127 }, { "epoch": 0.9237559108308939, "grad_norm": 1.4082485437393188, "learning_rate": 5.604629884755115e-05, "loss": 1.4287, "step": 5128 }, { "epoch": 0.9239360504390903, "grad_norm": 1.2966614961624146, "learning_rate": 5.603224100442038e-05, "loss": 1.3629, "step": 5129 }, { "epoch": 0.9241161900472866, "grad_norm": 1.2937183380126953, "learning_rate": 5.6018182677386176e-05, "loss": 1.2554, "step": 5130 }, { "epoch": 0.924296329655483, "grad_norm": 1.3303674459457397, "learning_rate": 5.60041238675763e-05, "loss": 1.6817, "step": 5131 }, { "epoch": 0.9244764692636793, "grad_norm": 1.4202079772949219, "learning_rate": 5.5990064576118525e-05, "loss": 1.5546, "step": 5132 }, { "epoch": 0.9246566088718757, "grad_norm": 1.3486336469650269, "learning_rate": 5.5976004804140695e-05, "loss": 1.4283, "step": 5133 }, { "epoch": 0.924836748480072, "grad_norm": 1.3125056028366089, "learning_rate": 5.596194455277065e-05, "loss": 1.4568, "step": 5134 }, { "epoch": 0.9250168880882684, "grad_norm": 1.5667637586593628, "learning_rate": 5.594788382313634e-05, "loss": 1.8937, "step": 5135 }, { "epoch": 0.9251970276964647, "grad_norm": 1.442713975906372, "learning_rate": 5.593382261636567e-05, "loss": 1.7078, "step": 5136 }, { "epoch": 0.9253771673046611, "grad_norm": 1.2807390689849854, "learning_rate": 5.591976093358665e-05, "loss": 1.4063, "step": 5137 }, { "epoch": 0.9255573069128574, "grad_norm": 1.4069725275039673, "learning_rate": 5.590569877592729e-05, "loss": 1.4569, "step": 5138 }, { "epoch": 0.9257374465210538, "grad_norm": 1.344852328300476, "learning_rate": 5.589163614451565e-05, "loss": 1.6277, "step": 5139 }, { "epoch": 0.9259175861292501, "grad_norm": 1.4091660976409912, "learning_rate": 5.587757304047983e-05, "loss": 1.5884, "step": 5140 }, { "epoch": 0.9260977257374465, "grad_norm": 1.3231937885284424, "learning_rate": 5.586350946494795e-05, "loss": 1.3841, "step": 5141 }, { "epoch": 0.9262778653456428, "grad_norm": 1.48690664768219, "learning_rate": 5.5849445419048216e-05, "loss": 1.5442, "step": 5142 }, { "epoch": 0.9264580049538392, "grad_norm": 1.3540806770324707, "learning_rate": 5.583538090390882e-05, "loss": 1.6341, "step": 5143 }, { "epoch": 0.9266381445620355, "grad_norm": 1.44573974609375, "learning_rate": 5.582131592065799e-05, "loss": 1.6569, "step": 5144 }, { "epoch": 0.9268182841702319, "grad_norm": 1.4059885740280151, "learning_rate": 5.580725047042405e-05, "loss": 1.5414, "step": 5145 }, { "epoch": 0.9269984237784282, "grad_norm": 1.3429491519927979, "learning_rate": 5.579318455433529e-05, "loss": 1.364, "step": 5146 }, { "epoch": 0.9271785633866246, "grad_norm": 1.2300504446029663, "learning_rate": 5.57791181735201e-05, "loss": 1.339, "step": 5147 }, { "epoch": 0.927358702994821, "grad_norm": 1.3269760608673096, "learning_rate": 5.5765051329106864e-05, "loss": 1.3428, "step": 5148 }, { "epoch": 0.9275388426030173, "grad_norm": 1.476166009902954, "learning_rate": 5.5750984022224015e-05, "loss": 1.328, "step": 5149 }, { "epoch": 0.9277189822112137, "grad_norm": 1.4089046716690063, "learning_rate": 5.5736916254000036e-05, "loss": 1.2934, "step": 5150 }, { "epoch": 0.92789912181941, "grad_norm": 1.237349271774292, "learning_rate": 5.572284802556341e-05, "loss": 1.688, "step": 5151 }, { "epoch": 0.9280792614276064, "grad_norm": 1.4099372625350952, "learning_rate": 5.5708779338042725e-05, "loss": 2.1427, "step": 5152 }, { "epoch": 0.9282594010358027, "grad_norm": 1.3852771520614624, "learning_rate": 5.569471019256654e-05, "loss": 1.8432, "step": 5153 }, { "epoch": 0.9284395406439991, "grad_norm": 1.2227874994277954, "learning_rate": 5.568064059026347e-05, "loss": 1.7545, "step": 5154 }, { "epoch": 0.9286196802521954, "grad_norm": 1.2323182821273804, "learning_rate": 5.5666570532262166e-05, "loss": 1.8028, "step": 5155 }, { "epoch": 0.9287998198603918, "grad_norm": 1.3899558782577515, "learning_rate": 5.565250001969136e-05, "loss": 1.8758, "step": 5156 }, { "epoch": 0.9289799594685881, "grad_norm": 1.551007628440857, "learning_rate": 5.563842905367974e-05, "loss": 2.0121, "step": 5157 }, { "epoch": 0.9291600990767845, "grad_norm": 1.4386353492736816, "learning_rate": 5.56243576353561e-05, "loss": 1.8246, "step": 5158 }, { "epoch": 0.9293402386849808, "grad_norm": 1.6415252685546875, "learning_rate": 5.5610285765849214e-05, "loss": 2.0491, "step": 5159 }, { "epoch": 0.9295203782931772, "grad_norm": 1.8444405794143677, "learning_rate": 5.5596213446287935e-05, "loss": 2.2638, "step": 5160 }, { "epoch": 0.9297005179013735, "grad_norm": 1.513251781463623, "learning_rate": 5.558214067780115e-05, "loss": 1.8008, "step": 5161 }, { "epoch": 0.9298806575095699, "grad_norm": 1.319232702255249, "learning_rate": 5.556806746151775e-05, "loss": 1.5633, "step": 5162 }, { "epoch": 0.9300607971177662, "grad_norm": 1.4036771059036255, "learning_rate": 5.5553993798566696e-05, "loss": 1.6758, "step": 5163 }, { "epoch": 0.9302409367259626, "grad_norm": 1.3583061695098877, "learning_rate": 5.553991969007698e-05, "loss": 1.5427, "step": 5164 }, { "epoch": 0.9304210763341589, "grad_norm": 1.3591159582138062, "learning_rate": 5.552584513717758e-05, "loss": 1.7411, "step": 5165 }, { "epoch": 0.9306012159423553, "grad_norm": 1.3450148105621338, "learning_rate": 5.551177014099759e-05, "loss": 1.6463, "step": 5166 }, { "epoch": 0.9307813555505516, "grad_norm": 1.3095580339431763, "learning_rate": 5.549769470266607e-05, "loss": 1.5818, "step": 5167 }, { "epoch": 0.930961495158748, "grad_norm": 1.3924058675765991, "learning_rate": 5.548361882331217e-05, "loss": 1.4851, "step": 5168 }, { "epoch": 0.9311416347669443, "grad_norm": 1.2917842864990234, "learning_rate": 5.546954250406504e-05, "loss": 1.3244, "step": 5169 }, { "epoch": 0.9313217743751407, "grad_norm": 1.3035470247268677, "learning_rate": 5.545546574605387e-05, "loss": 1.5441, "step": 5170 }, { "epoch": 0.931501913983337, "grad_norm": 1.3958535194396973, "learning_rate": 5.544138855040789e-05, "loss": 1.613, "step": 5171 }, { "epoch": 0.9316820535915334, "grad_norm": 1.4021573066711426, "learning_rate": 5.542731091825638e-05, "loss": 1.5517, "step": 5172 }, { "epoch": 0.9318621931997297, "grad_norm": 1.3312063217163086, "learning_rate": 5.5413232850728634e-05, "loss": 1.3708, "step": 5173 }, { "epoch": 0.9320423328079261, "grad_norm": 1.330467939376831, "learning_rate": 5.5399154348953977e-05, "loss": 1.4821, "step": 5174 }, { "epoch": 0.9322224724161225, "grad_norm": 1.4916101694107056, "learning_rate": 5.5385075414061795e-05, "loss": 1.7005, "step": 5175 }, { "epoch": 0.9324026120243188, "grad_norm": 1.3027833700180054, "learning_rate": 5.537099604718148e-05, "loss": 1.4846, "step": 5176 }, { "epoch": 0.9325827516325152, "grad_norm": 1.2656091451644897, "learning_rate": 5.5356916249442484e-05, "loss": 1.4978, "step": 5177 }, { "epoch": 0.9327628912407115, "grad_norm": 1.2995755672454834, "learning_rate": 5.534283602197425e-05, "loss": 1.4641, "step": 5178 }, { "epoch": 0.932943030848908, "grad_norm": 1.477933645248413, "learning_rate": 5.532875536590635e-05, "loss": 1.8889, "step": 5179 }, { "epoch": 0.9331231704571042, "grad_norm": 1.5306252241134644, "learning_rate": 5.5314674282368275e-05, "loss": 1.4539, "step": 5180 }, { "epoch": 0.9333033100653007, "grad_norm": 1.2270597219467163, "learning_rate": 5.5300592772489625e-05, "loss": 1.5087, "step": 5181 }, { "epoch": 0.933483449673497, "grad_norm": 1.3239166736602783, "learning_rate": 5.5286510837399995e-05, "loss": 1.5424, "step": 5182 }, { "epoch": 0.9336635892816934, "grad_norm": 1.3206679821014404, "learning_rate": 5.5272428478229035e-05, "loss": 1.6499, "step": 5183 }, { "epoch": 0.9338437288898896, "grad_norm": 1.2962802648544312, "learning_rate": 5.525834569610644e-05, "loss": 1.4514, "step": 5184 }, { "epoch": 0.934023868498086, "grad_norm": 1.245829701423645, "learning_rate": 5.524426249216191e-05, "loss": 1.3677, "step": 5185 }, { "epoch": 0.9342040081062823, "grad_norm": 1.3724788427352905, "learning_rate": 5.523017886752521e-05, "loss": 1.5466, "step": 5186 }, { "epoch": 0.9343841477144788, "grad_norm": 1.3739076852798462, "learning_rate": 5.5216094823326106e-05, "loss": 1.7048, "step": 5187 }, { "epoch": 0.934564287322675, "grad_norm": 1.4113438129425049, "learning_rate": 5.520201036069439e-05, "loss": 1.589, "step": 5188 }, { "epoch": 0.9347444269308715, "grad_norm": 1.4559268951416016, "learning_rate": 5.518792548075996e-05, "loss": 1.5094, "step": 5189 }, { "epoch": 0.9349245665390677, "grad_norm": 1.3730664253234863, "learning_rate": 5.517384018465267e-05, "loss": 1.5086, "step": 5190 }, { "epoch": 0.9351047061472642, "grad_norm": 1.371534824371338, "learning_rate": 5.515975447350244e-05, "loss": 1.4017, "step": 5191 }, { "epoch": 0.9352848457554604, "grad_norm": 1.32597815990448, "learning_rate": 5.5145668348439216e-05, "loss": 1.406, "step": 5192 }, { "epoch": 0.9354649853636569, "grad_norm": 1.3546141386032104, "learning_rate": 5.5131581810592994e-05, "loss": 1.2636, "step": 5193 }, { "epoch": 0.9356451249718531, "grad_norm": 1.5192584991455078, "learning_rate": 5.5117494861093774e-05, "loss": 1.7882, "step": 5194 }, { "epoch": 0.9358252645800496, "grad_norm": 1.4189032316207886, "learning_rate": 5.51034075010716e-05, "loss": 1.6248, "step": 5195 }, { "epoch": 0.9360054041882458, "grad_norm": 1.5195542573928833, "learning_rate": 5.508931973165658e-05, "loss": 1.432, "step": 5196 }, { "epoch": 0.9361855437964423, "grad_norm": 1.570252776145935, "learning_rate": 5.50752315539788e-05, "loss": 1.6429, "step": 5197 }, { "epoch": 0.9363656834046385, "grad_norm": 1.3162167072296143, "learning_rate": 5.506114296916841e-05, "loss": 1.1696, "step": 5198 }, { "epoch": 0.936545823012835, "grad_norm": 1.3737947940826416, "learning_rate": 5.50470539783556e-05, "loss": 1.2468, "step": 5199 }, { "epoch": 0.9367259626210312, "grad_norm": 1.3073331117630005, "learning_rate": 5.5032964582670584e-05, "loss": 1.4755, "step": 5200 }, { "epoch": 0.9369061022292277, "grad_norm": 1.3313778638839722, "learning_rate": 5.50188747832436e-05, "loss": 2.0013, "step": 5201 }, { "epoch": 0.937086241837424, "grad_norm": 1.3424785137176514, "learning_rate": 5.5004784581204927e-05, "loss": 1.7619, "step": 5202 }, { "epoch": 0.9372663814456204, "grad_norm": 1.2780787944793701, "learning_rate": 5.499069397768487e-05, "loss": 1.9991, "step": 5203 }, { "epoch": 0.9374465210538168, "grad_norm": 1.291505217552185, "learning_rate": 5.497660297381376e-05, "loss": 1.9561, "step": 5204 }, { "epoch": 0.9376266606620131, "grad_norm": 1.309662938117981, "learning_rate": 5.4962511570721995e-05, "loss": 1.9757, "step": 5205 }, { "epoch": 0.9378068002702095, "grad_norm": 1.3198344707489014, "learning_rate": 5.494841976953996e-05, "loss": 2.0563, "step": 5206 }, { "epoch": 0.9379869398784058, "grad_norm": 1.5110433101654053, "learning_rate": 5.493432757139813e-05, "loss": 1.5722, "step": 5207 }, { "epoch": 0.9381670794866022, "grad_norm": 1.486543893814087, "learning_rate": 5.492023497742693e-05, "loss": 1.8125, "step": 5208 }, { "epoch": 0.9383472190947985, "grad_norm": 1.8968520164489746, "learning_rate": 5.4906141988756865e-05, "loss": 2.1808, "step": 5209 }, { "epoch": 0.9385273587029949, "grad_norm": 1.479502558708191, "learning_rate": 5.4892048606518484e-05, "loss": 1.9078, "step": 5210 }, { "epoch": 0.9387074983111912, "grad_norm": 1.4141347408294678, "learning_rate": 5.4877954831842346e-05, "loss": 1.7568, "step": 5211 }, { "epoch": 0.9388876379193876, "grad_norm": 1.2972908020019531, "learning_rate": 5.486386066585904e-05, "loss": 1.4539, "step": 5212 }, { "epoch": 0.9390677775275839, "grad_norm": 1.2835732698440552, "learning_rate": 5.484976610969923e-05, "loss": 1.3333, "step": 5213 }, { "epoch": 0.9392479171357803, "grad_norm": 1.2514272928237915, "learning_rate": 5.483567116449353e-05, "loss": 1.3357, "step": 5214 }, { "epoch": 0.9394280567439766, "grad_norm": 1.2403721809387207, "learning_rate": 5.482157583137264e-05, "loss": 1.4374, "step": 5215 }, { "epoch": 0.939608196352173, "grad_norm": 1.257214903831482, "learning_rate": 5.480748011146728e-05, "loss": 1.4023, "step": 5216 }, { "epoch": 0.9397883359603693, "grad_norm": 1.4019343852996826, "learning_rate": 5.479338400590821e-05, "loss": 1.441, "step": 5217 }, { "epoch": 0.9399684755685657, "grad_norm": 1.4403142929077148, "learning_rate": 5.4779287515826216e-05, "loss": 1.7232, "step": 5218 }, { "epoch": 0.940148615176762, "grad_norm": 1.3482422828674316, "learning_rate": 5.4765190642352106e-05, "loss": 1.6092, "step": 5219 }, { "epoch": 0.9403287547849584, "grad_norm": 1.3199315071105957, "learning_rate": 5.4751093386616716e-05, "loss": 1.6753, "step": 5220 }, { "epoch": 0.9405088943931547, "grad_norm": 1.8255246877670288, "learning_rate": 5.473699574975093e-05, "loss": 1.6225, "step": 5221 }, { "epoch": 0.9406890340013511, "grad_norm": 1.4141913652420044, "learning_rate": 5.472289773288566e-05, "loss": 1.7127, "step": 5222 }, { "epoch": 0.9408691736095474, "grad_norm": 1.280982255935669, "learning_rate": 5.4708799337151826e-05, "loss": 1.393, "step": 5223 }, { "epoch": 0.9410493132177438, "grad_norm": 1.3213261365890503, "learning_rate": 5.469470056368041e-05, "loss": 1.5745, "step": 5224 }, { "epoch": 0.9412294528259401, "grad_norm": 1.2625030279159546, "learning_rate": 5.4680601413602396e-05, "loss": 1.3813, "step": 5225 }, { "epoch": 0.9414095924341365, "grad_norm": 1.4939217567443848, "learning_rate": 5.4666501888048815e-05, "loss": 1.7354, "step": 5226 }, { "epoch": 0.9415897320423328, "grad_norm": 1.3313169479370117, "learning_rate": 5.4652401988150725e-05, "loss": 1.3806, "step": 5227 }, { "epoch": 0.9417698716505292, "grad_norm": 1.310395359992981, "learning_rate": 5.463830171503922e-05, "loss": 1.6968, "step": 5228 }, { "epoch": 0.9419500112587255, "grad_norm": 1.4383376836776733, "learning_rate": 5.4624201069845416e-05, "loss": 1.379, "step": 5229 }, { "epoch": 0.9421301508669219, "grad_norm": 1.3266233205795288, "learning_rate": 5.461010005370044e-05, "loss": 1.453, "step": 5230 }, { "epoch": 0.9423102904751182, "grad_norm": 1.4859930276870728, "learning_rate": 5.459599866773549e-05, "loss": 1.6295, "step": 5231 }, { "epoch": 0.9424904300833146, "grad_norm": 1.4545331001281738, "learning_rate": 5.458189691308177e-05, "loss": 1.7269, "step": 5232 }, { "epoch": 0.942670569691511, "grad_norm": 1.4907279014587402, "learning_rate": 5.4567794790870495e-05, "loss": 1.7467, "step": 5233 }, { "epoch": 0.9428507092997073, "grad_norm": 1.418258547782898, "learning_rate": 5.455369230223296e-05, "loss": 1.7117, "step": 5234 }, { "epoch": 0.9430308489079037, "grad_norm": 1.373248815536499, "learning_rate": 5.453958944830045e-05, "loss": 1.683, "step": 5235 }, { "epoch": 0.9432109885161, "grad_norm": 1.3248751163482666, "learning_rate": 5.452548623020428e-05, "loss": 1.4341, "step": 5236 }, { "epoch": 0.9433911281242964, "grad_norm": 1.4791078567504883, "learning_rate": 5.45113826490758e-05, "loss": 1.6996, "step": 5237 }, { "epoch": 0.9435712677324927, "grad_norm": 1.4098297357559204, "learning_rate": 5.449727870604642e-05, "loss": 1.5332, "step": 5238 }, { "epoch": 0.9437514073406891, "grad_norm": 1.3441678285598755, "learning_rate": 5.4483174402247526e-05, "loss": 1.5943, "step": 5239 }, { "epoch": 0.9439315469488854, "grad_norm": 1.3465396165847778, "learning_rate": 5.446906973881056e-05, "loss": 1.4731, "step": 5240 }, { "epoch": 0.9441116865570818, "grad_norm": 1.401462435722351, "learning_rate": 5.4454964716866997e-05, "loss": 1.4924, "step": 5241 }, { "epoch": 0.9442918261652781, "grad_norm": 1.3031507730484009, "learning_rate": 5.4440859337548346e-05, "loss": 1.3929, "step": 5242 }, { "epoch": 0.9444719657734745, "grad_norm": 1.3358888626098633, "learning_rate": 5.44267536019861e-05, "loss": 1.5051, "step": 5243 }, { "epoch": 0.9446521053816708, "grad_norm": 1.4784542322158813, "learning_rate": 5.4412647511311855e-05, "loss": 1.5675, "step": 5244 }, { "epoch": 0.9448322449898672, "grad_norm": 1.5374906063079834, "learning_rate": 5.439854106665717e-05, "loss": 1.6853, "step": 5245 }, { "epoch": 0.9450123845980635, "grad_norm": 1.4937000274658203, "learning_rate": 5.4384434269153664e-05, "loss": 1.4855, "step": 5246 }, { "epoch": 0.9451925242062599, "grad_norm": 1.3594037294387817, "learning_rate": 5.437032711993297e-05, "loss": 1.4149, "step": 5247 }, { "epoch": 0.9453726638144562, "grad_norm": 1.2588589191436768, "learning_rate": 5.435621962012676e-05, "loss": 1.1674, "step": 5248 }, { "epoch": 0.9455528034226526, "grad_norm": 1.3163026571273804, "learning_rate": 5.434211177086674e-05, "loss": 1.3618, "step": 5249 }, { "epoch": 0.9457329430308489, "grad_norm": 1.2332024574279785, "learning_rate": 5.4328003573284645e-05, "loss": 1.1261, "step": 5250 }, { "epoch": 0.9459130826390453, "grad_norm": 1.2786064147949219, "learning_rate": 5.4313895028512194e-05, "loss": 1.9925, "step": 5251 }, { "epoch": 0.9460932222472416, "grad_norm": 1.2586089372634888, "learning_rate": 5.4299786137681194e-05, "loss": 1.9319, "step": 5252 }, { "epoch": 0.946273361855438, "grad_norm": 1.431467890739441, "learning_rate": 5.4285676901923435e-05, "loss": 1.9859, "step": 5253 }, { "epoch": 0.9464535014636343, "grad_norm": 1.2929086685180664, "learning_rate": 5.4271567322370765e-05, "loss": 2.0411, "step": 5254 }, { "epoch": 0.9466336410718307, "grad_norm": 1.330033540725708, "learning_rate": 5.425745740015504e-05, "loss": 1.8186, "step": 5255 }, { "epoch": 0.946813780680027, "grad_norm": 1.259856939315796, "learning_rate": 5.4243347136408176e-05, "loss": 1.7864, "step": 5256 }, { "epoch": 0.9469939202882234, "grad_norm": 1.3493809700012207, "learning_rate": 5.4229236532262064e-05, "loss": 1.7646, "step": 5257 }, { "epoch": 0.9471740598964197, "grad_norm": 1.4474732875823975, "learning_rate": 5.4215125588848635e-05, "loss": 2.0963, "step": 5258 }, { "epoch": 0.9473541995046161, "grad_norm": 1.9803376197814941, "learning_rate": 5.4201014307299904e-05, "loss": 2.3726, "step": 5259 }, { "epoch": 0.9475343391128125, "grad_norm": 1.6465718746185303, "learning_rate": 5.418690268874784e-05, "loss": 2.2566, "step": 5260 }, { "epoch": 0.9477144787210088, "grad_norm": 1.874721884727478, "learning_rate": 5.41727907343245e-05, "loss": 2.2286, "step": 5261 }, { "epoch": 0.9478946183292052, "grad_norm": 1.2656484842300415, "learning_rate": 5.4158678445161906e-05, "loss": 1.615, "step": 5262 }, { "epoch": 0.9480747579374015, "grad_norm": 1.3518365621566772, "learning_rate": 5.414456582239216e-05, "loss": 1.5953, "step": 5263 }, { "epoch": 0.9482548975455979, "grad_norm": 1.2650874853134155, "learning_rate": 5.4130452867147366e-05, "loss": 1.4918, "step": 5264 }, { "epoch": 0.9484350371537942, "grad_norm": 1.3905199766159058, "learning_rate": 5.411633958055963e-05, "loss": 1.7087, "step": 5265 }, { "epoch": 0.9486151767619906, "grad_norm": 1.3632307052612305, "learning_rate": 5.4102225963761155e-05, "loss": 1.7486, "step": 5266 }, { "epoch": 0.9487953163701869, "grad_norm": 1.2745888233184814, "learning_rate": 5.408811201788412e-05, "loss": 1.5132, "step": 5267 }, { "epoch": 0.9489754559783833, "grad_norm": 1.410667896270752, "learning_rate": 5.407399774406072e-05, "loss": 1.7726, "step": 5268 }, { "epoch": 0.9491555955865796, "grad_norm": 1.3012992143630981, "learning_rate": 5.4059883143423204e-05, "loss": 1.467, "step": 5269 }, { "epoch": 0.949335735194776, "grad_norm": 1.3403841257095337, "learning_rate": 5.4045768217103844e-05, "loss": 1.5374, "step": 5270 }, { "epoch": 0.9495158748029723, "grad_norm": 1.2738533020019531, "learning_rate": 5.4031652966234933e-05, "loss": 1.5176, "step": 5271 }, { "epoch": 0.9496960144111687, "grad_norm": 1.3155604600906372, "learning_rate": 5.401753739194878e-05, "loss": 1.607, "step": 5272 }, { "epoch": 0.949876154019365, "grad_norm": 1.4210665225982666, "learning_rate": 5.4003421495377735e-05, "loss": 1.6619, "step": 5273 }, { "epoch": 0.9500562936275614, "grad_norm": 1.4584317207336426, "learning_rate": 5.3989305277654156e-05, "loss": 1.5684, "step": 5274 }, { "epoch": 0.9502364332357577, "grad_norm": 1.4001803398132324, "learning_rate": 5.3975188739910464e-05, "loss": 1.4645, "step": 5275 }, { "epoch": 0.9504165728439541, "grad_norm": 1.3127332925796509, "learning_rate": 5.396107188327906e-05, "loss": 1.463, "step": 5276 }, { "epoch": 0.9505967124521504, "grad_norm": 1.3617123365402222, "learning_rate": 5.394695470889241e-05, "loss": 1.3824, "step": 5277 }, { "epoch": 0.9507768520603468, "grad_norm": 1.3839503526687622, "learning_rate": 5.3932837217882984e-05, "loss": 1.5643, "step": 5278 }, { "epoch": 0.9509569916685431, "grad_norm": 1.2879011631011963, "learning_rate": 5.391871941138324e-05, "loss": 1.3389, "step": 5279 }, { "epoch": 0.9511371312767395, "grad_norm": 1.3183971643447876, "learning_rate": 5.390460129052576e-05, "loss": 1.3732, "step": 5280 }, { "epoch": 0.9513172708849358, "grad_norm": 1.370556116104126, "learning_rate": 5.3890482856443035e-05, "loss": 1.4709, "step": 5281 }, { "epoch": 0.9514974104931322, "grad_norm": 1.3111011981964111, "learning_rate": 5.38763641102677e-05, "loss": 1.4616, "step": 5282 }, { "epoch": 0.9516775501013285, "grad_norm": 1.2944639921188354, "learning_rate": 5.386224505313232e-05, "loss": 1.577, "step": 5283 }, { "epoch": 0.9518576897095249, "grad_norm": 1.4301786422729492, "learning_rate": 5.3848125686169524e-05, "loss": 1.5857, "step": 5284 }, { "epoch": 0.9520378293177212, "grad_norm": 1.413000464439392, "learning_rate": 5.383400601051194e-05, "loss": 1.6484, "step": 5285 }, { "epoch": 0.9522179689259176, "grad_norm": 1.4588044881820679, "learning_rate": 5.381988602729229e-05, "loss": 1.64, "step": 5286 }, { "epoch": 0.9523981085341139, "grad_norm": 1.338789463043213, "learning_rate": 5.380576573764322e-05, "loss": 1.3751, "step": 5287 }, { "epoch": 0.9525782481423103, "grad_norm": 1.3800326585769653, "learning_rate": 5.37916451426975e-05, "loss": 1.501, "step": 5288 }, { "epoch": 0.9527583877505067, "grad_norm": 1.4623042345046997, "learning_rate": 5.377752424358784e-05, "loss": 1.7401, "step": 5289 }, { "epoch": 0.952938527358703, "grad_norm": 1.474949836730957, "learning_rate": 5.376340304144701e-05, "loss": 1.6748, "step": 5290 }, { "epoch": 0.9531186669668994, "grad_norm": 1.5639631748199463, "learning_rate": 5.3749281537407845e-05, "loss": 1.535, "step": 5291 }, { "epoch": 0.9532988065750957, "grad_norm": 1.2978333234786987, "learning_rate": 5.373515973260313e-05, "loss": 1.616, "step": 5292 }, { "epoch": 0.9534789461832921, "grad_norm": 1.3505828380584717, "learning_rate": 5.3721037628165716e-05, "loss": 1.3829, "step": 5293 }, { "epoch": 0.9536590857914884, "grad_norm": 1.3966565132141113, "learning_rate": 5.3706915225228493e-05, "loss": 1.3703, "step": 5294 }, { "epoch": 0.9538392253996848, "grad_norm": 1.3113038539886475, "learning_rate": 5.3692792524924315e-05, "loss": 1.3557, "step": 5295 }, { "epoch": 0.9540193650078811, "grad_norm": 1.5705491304397583, "learning_rate": 5.367866952838614e-05, "loss": 1.6088, "step": 5296 }, { "epoch": 0.9541995046160775, "grad_norm": 1.3828707933425903, "learning_rate": 5.3664546236746874e-05, "loss": 1.5108, "step": 5297 }, { "epoch": 0.9543796442242738, "grad_norm": 1.3983142375946045, "learning_rate": 5.36504226511395e-05, "loss": 1.2467, "step": 5298 }, { "epoch": 0.9545597838324702, "grad_norm": 1.3830968141555786, "learning_rate": 5.363629877269699e-05, "loss": 1.4298, "step": 5299 }, { "epoch": 0.9547399234406665, "grad_norm": 1.2593024969100952, "learning_rate": 5.3622174602552376e-05, "loss": 1.2716, "step": 5300 }, { "epoch": 0.9549200630488629, "grad_norm": 1.445265531539917, "learning_rate": 5.360805014183867e-05, "loss": 2.1269, "step": 5301 }, { "epoch": 0.9551002026570592, "grad_norm": 1.355506181716919, "learning_rate": 5.3593925391688937e-05, "loss": 1.935, "step": 5302 }, { "epoch": 0.9552803422652556, "grad_norm": 1.340486764907837, "learning_rate": 5.357980035323628e-05, "loss": 1.9725, "step": 5303 }, { "epoch": 0.9554604818734519, "grad_norm": 1.3715964555740356, "learning_rate": 5.356567502761376e-05, "loss": 2.3223, "step": 5304 }, { "epoch": 0.9556406214816483, "grad_norm": 1.3033766746520996, "learning_rate": 5.355154941595454e-05, "loss": 2.0322, "step": 5305 }, { "epoch": 0.9558207610898446, "grad_norm": 1.383777141571045, "learning_rate": 5.3537423519391736e-05, "loss": 1.8917, "step": 5306 }, { "epoch": 0.956000900698041, "grad_norm": 1.3475384712219238, "learning_rate": 5.352329733905856e-05, "loss": 1.6404, "step": 5307 }, { "epoch": 0.9561810403062373, "grad_norm": 1.4337648153305054, "learning_rate": 5.350917087608818e-05, "loss": 1.9408, "step": 5308 }, { "epoch": 0.9563611799144337, "grad_norm": 1.7661281824111938, "learning_rate": 5.349504413161381e-05, "loss": 2.6733, "step": 5309 }, { "epoch": 0.95654131952263, "grad_norm": 1.6380430459976196, "learning_rate": 5.348091710676874e-05, "loss": 1.8912, "step": 5310 }, { "epoch": 0.9567214591308264, "grad_norm": 1.5622223615646362, "learning_rate": 5.3466789802686155e-05, "loss": 1.8553, "step": 5311 }, { "epoch": 0.9569015987390227, "grad_norm": 1.3471565246582031, "learning_rate": 5.345266222049941e-05, "loss": 1.4785, "step": 5312 }, { "epoch": 0.9570817383472191, "grad_norm": 1.3371397256851196, "learning_rate": 5.3438534361341755e-05, "loss": 1.7472, "step": 5313 }, { "epoch": 0.9572618779554154, "grad_norm": 1.2839196920394897, "learning_rate": 5.342440622634657e-05, "loss": 1.421, "step": 5314 }, { "epoch": 0.9574420175636118, "grad_norm": 1.3307623863220215, "learning_rate": 5.341027781664717e-05, "loss": 1.5156, "step": 5315 }, { "epoch": 0.9576221571718081, "grad_norm": 1.386922001838684, "learning_rate": 5.339614913337696e-05, "loss": 1.5564, "step": 5316 }, { "epoch": 0.9578022967800045, "grad_norm": 1.2679773569107056, "learning_rate": 5.3382020177669314e-05, "loss": 1.3923, "step": 5317 }, { "epoch": 0.9579824363882009, "grad_norm": 1.313116431236267, "learning_rate": 5.336789095065765e-05, "loss": 1.4542, "step": 5318 }, { "epoch": 0.9581625759963972, "grad_norm": 1.2313501834869385, "learning_rate": 5.335376145347542e-05, "loss": 1.5989, "step": 5319 }, { "epoch": 0.9583427156045936, "grad_norm": 1.5348842144012451, "learning_rate": 5.3339631687256084e-05, "loss": 1.7846, "step": 5320 }, { "epoch": 0.9585228552127899, "grad_norm": 1.3411052227020264, "learning_rate": 5.3325501653133125e-05, "loss": 1.4423, "step": 5321 }, { "epoch": 0.9587029948209863, "grad_norm": 1.2594385147094727, "learning_rate": 5.331137135224003e-05, "loss": 1.4513, "step": 5322 }, { "epoch": 0.9588831344291826, "grad_norm": 1.2734466791152954, "learning_rate": 5.3297240785710335e-05, "loss": 1.4999, "step": 5323 }, { "epoch": 0.959063274037379, "grad_norm": 1.2433655261993408, "learning_rate": 5.3283109954677604e-05, "loss": 1.297, "step": 5324 }, { "epoch": 0.9592434136455753, "grad_norm": 1.2244817018508911, "learning_rate": 5.326897886027537e-05, "loss": 1.4525, "step": 5325 }, { "epoch": 0.9594235532537717, "grad_norm": 1.2821660041809082, "learning_rate": 5.3254847503637285e-05, "loss": 1.4262, "step": 5326 }, { "epoch": 0.959603692861968, "grad_norm": 1.274381399154663, "learning_rate": 5.324071588589689e-05, "loss": 1.435, "step": 5327 }, { "epoch": 0.9597838324701644, "grad_norm": 1.259706974029541, "learning_rate": 5.322658400818784e-05, "loss": 1.2774, "step": 5328 }, { "epoch": 0.9599639720783607, "grad_norm": 1.3823274374008179, "learning_rate": 5.3212451871643806e-05, "loss": 1.5523, "step": 5329 }, { "epoch": 0.9601441116865571, "grad_norm": 1.4042102098464966, "learning_rate": 5.319831947739844e-05, "loss": 1.4561, "step": 5330 }, { "epoch": 0.9603242512947534, "grad_norm": 1.4428564310073853, "learning_rate": 5.318418682658545e-05, "loss": 1.4929, "step": 5331 }, { "epoch": 0.9605043909029498, "grad_norm": 1.3917436599731445, "learning_rate": 5.317005392033854e-05, "loss": 1.3568, "step": 5332 }, { "epoch": 0.9606845305111461, "grad_norm": 1.3827439546585083, "learning_rate": 5.3155920759791446e-05, "loss": 1.5766, "step": 5333 }, { "epoch": 0.9608646701193425, "grad_norm": 1.5625269412994385, "learning_rate": 5.314178734607792e-05, "loss": 1.5136, "step": 5334 }, { "epoch": 0.9610448097275388, "grad_norm": 1.2335017919540405, "learning_rate": 5.3127653680331755e-05, "loss": 1.3161, "step": 5335 }, { "epoch": 0.9612249493357352, "grad_norm": 1.426091194152832, "learning_rate": 5.311351976368674e-05, "loss": 1.6226, "step": 5336 }, { "epoch": 0.9614050889439315, "grad_norm": 1.402161955833435, "learning_rate": 5.309938559727667e-05, "loss": 1.5161, "step": 5337 }, { "epoch": 0.9615852285521279, "grad_norm": 1.4057179689407349, "learning_rate": 5.3085251182235416e-05, "loss": 1.7032, "step": 5338 }, { "epoch": 0.9617653681603242, "grad_norm": 1.460518479347229, "learning_rate": 5.30711165196968e-05, "loss": 1.5467, "step": 5339 }, { "epoch": 0.9619455077685206, "grad_norm": 1.2929974794387817, "learning_rate": 5.305698161079471e-05, "loss": 1.3473, "step": 5340 }, { "epoch": 0.9621256473767169, "grad_norm": 1.2966831922531128, "learning_rate": 5.304284645666304e-05, "loss": 1.419, "step": 5341 }, { "epoch": 0.9623057869849133, "grad_norm": 1.3768166303634644, "learning_rate": 5.302871105843573e-05, "loss": 1.5191, "step": 5342 }, { "epoch": 0.9624859265931096, "grad_norm": 1.446094036102295, "learning_rate": 5.3014575417246706e-05, "loss": 1.4516, "step": 5343 }, { "epoch": 0.962666066201306, "grad_norm": 1.294031023979187, "learning_rate": 5.3000439534229875e-05, "loss": 1.336, "step": 5344 }, { "epoch": 0.9628462058095023, "grad_norm": 1.4943344593048096, "learning_rate": 5.2986303410519254e-05, "loss": 1.6072, "step": 5345 }, { "epoch": 0.9630263454176987, "grad_norm": 1.4249428510665894, "learning_rate": 5.297216704724883e-05, "loss": 1.4949, "step": 5346 }, { "epoch": 0.9632064850258951, "grad_norm": 1.536133050918579, "learning_rate": 5.295803044555262e-05, "loss": 1.4801, "step": 5347 }, { "epoch": 0.9633866246340914, "grad_norm": 1.5198663473129272, "learning_rate": 5.294389360656465e-05, "loss": 1.641, "step": 5348 }, { "epoch": 0.9635667642422878, "grad_norm": 1.4848164319992065, "learning_rate": 5.292975653141896e-05, "loss": 1.5108, "step": 5349 }, { "epoch": 0.9637469038504841, "grad_norm": 1.4578449726104736, "learning_rate": 5.2915619221249645e-05, "loss": 1.6459, "step": 5350 }, { "epoch": 0.9639270434586805, "grad_norm": 1.1815444231033325, "learning_rate": 5.2901481677190755e-05, "loss": 1.9762, "step": 5351 }, { "epoch": 0.9641071830668768, "grad_norm": 1.2879889011383057, "learning_rate": 5.288734390037643e-05, "loss": 2.2266, "step": 5352 }, { "epoch": 0.9642873226750732, "grad_norm": 1.2407768964767456, "learning_rate": 5.287320589194078e-05, "loss": 2.1087, "step": 5353 }, { "epoch": 0.9644674622832695, "grad_norm": 1.252458930015564, "learning_rate": 5.285906765301796e-05, "loss": 1.9604, "step": 5354 }, { "epoch": 0.9646476018914659, "grad_norm": 1.2389116287231445, "learning_rate": 5.2844929184742106e-05, "loss": 1.6724, "step": 5355 }, { "epoch": 0.9648277414996622, "grad_norm": 1.3210970163345337, "learning_rate": 5.283079048824743e-05, "loss": 1.6816, "step": 5356 }, { "epoch": 0.9650078811078586, "grad_norm": 1.4765480756759644, "learning_rate": 5.281665156466813e-05, "loss": 1.9629, "step": 5357 }, { "epoch": 0.9651880207160549, "grad_norm": 1.600051760673523, "learning_rate": 5.28025124151384e-05, "loss": 2.0372, "step": 5358 }, { "epoch": 0.9653681603242513, "grad_norm": 1.7439708709716797, "learning_rate": 5.278837304079251e-05, "loss": 2.0927, "step": 5359 }, { "epoch": 0.9655482999324476, "grad_norm": 1.5792269706726074, "learning_rate": 5.277423344276465e-05, "loss": 1.873, "step": 5360 }, { "epoch": 0.965728439540644, "grad_norm": 1.2552424669265747, "learning_rate": 5.276009362218917e-05, "loss": 1.4666, "step": 5361 }, { "epoch": 0.9659085791488403, "grad_norm": 1.3538386821746826, "learning_rate": 5.2745953580200314e-05, "loss": 1.6019, "step": 5362 }, { "epoch": 0.9660887187570367, "grad_norm": 1.4427562952041626, "learning_rate": 5.27318133179324e-05, "loss": 1.6921, "step": 5363 }, { "epoch": 0.966268858365233, "grad_norm": 1.3343034982681274, "learning_rate": 5.271767283651975e-05, "loss": 1.4431, "step": 5364 }, { "epoch": 0.9664489979734294, "grad_norm": 1.2728177309036255, "learning_rate": 5.270353213709671e-05, "loss": 1.462, "step": 5365 }, { "epoch": 0.9666291375816257, "grad_norm": 1.2118480205535889, "learning_rate": 5.268939122079764e-05, "loss": 1.5252, "step": 5366 }, { "epoch": 0.9668092771898221, "grad_norm": 1.1633142232894897, "learning_rate": 5.267525008875692e-05, "loss": 1.4332, "step": 5367 }, { "epoch": 0.9669894167980184, "grad_norm": 1.4071487188339233, "learning_rate": 5.2661108742108935e-05, "loss": 1.672, "step": 5368 }, { "epoch": 0.9671695564062148, "grad_norm": 1.3663479089736938, "learning_rate": 5.264696718198811e-05, "loss": 1.7068, "step": 5369 }, { "epoch": 0.9673496960144111, "grad_norm": 1.3703925609588623, "learning_rate": 5.263282540952886e-05, "loss": 1.7422, "step": 5370 }, { "epoch": 0.9675298356226075, "grad_norm": 1.47991144657135, "learning_rate": 5.261868342586565e-05, "loss": 1.7098, "step": 5371 }, { "epoch": 0.9677099752308038, "grad_norm": 1.2913236618041992, "learning_rate": 5.260454123213291e-05, "loss": 1.3145, "step": 5372 }, { "epoch": 0.9678901148390002, "grad_norm": 1.4022060632705688, "learning_rate": 5.259039882946516e-05, "loss": 1.6299, "step": 5373 }, { "epoch": 0.9680702544471966, "grad_norm": 1.493638515472412, "learning_rate": 5.257625621899686e-05, "loss": 1.4015, "step": 5374 }, { "epoch": 0.9682503940553929, "grad_norm": 1.3100414276123047, "learning_rate": 5.2562113401862556e-05, "loss": 1.4389, "step": 5375 }, { "epoch": 0.9684305336635893, "grad_norm": 1.3837242126464844, "learning_rate": 5.254797037919675e-05, "loss": 1.4667, "step": 5376 }, { "epoch": 0.9686106732717856, "grad_norm": 1.3499776124954224, "learning_rate": 5.253382715213402e-05, "loss": 1.4416, "step": 5377 }, { "epoch": 0.968790812879982, "grad_norm": 1.4175798892974854, "learning_rate": 5.2519683721808895e-05, "loss": 1.4218, "step": 5378 }, { "epoch": 0.9689709524881783, "grad_norm": 1.3439507484436035, "learning_rate": 5.250554008935596e-05, "loss": 1.5618, "step": 5379 }, { "epoch": 0.9691510920963747, "grad_norm": 1.3955005407333374, "learning_rate": 5.249139625590984e-05, "loss": 1.4376, "step": 5380 }, { "epoch": 0.969331231704571, "grad_norm": 1.3289790153503418, "learning_rate": 5.247725222260512e-05, "loss": 1.3678, "step": 5381 }, { "epoch": 0.9695113713127674, "grad_norm": 1.2933189868927002, "learning_rate": 5.2463107990576444e-05, "loss": 1.433, "step": 5382 }, { "epoch": 0.9696915109209637, "grad_norm": 1.374428629875183, "learning_rate": 5.244896356095842e-05, "loss": 1.5303, "step": 5383 }, { "epoch": 0.9698716505291601, "grad_norm": 1.5264179706573486, "learning_rate": 5.2434818934885745e-05, "loss": 1.731, "step": 5384 }, { "epoch": 0.9700517901373564, "grad_norm": 1.3887882232666016, "learning_rate": 5.2420674113493084e-05, "loss": 1.3395, "step": 5385 }, { "epoch": 0.9702319297455528, "grad_norm": 1.511896014213562, "learning_rate": 5.2406529097915125e-05, "loss": 1.4429, "step": 5386 }, { "epoch": 0.9704120693537491, "grad_norm": 1.4407421350479126, "learning_rate": 5.2392383889286566e-05, "loss": 1.4556, "step": 5387 }, { "epoch": 0.9705922089619455, "grad_norm": 1.4015072584152222, "learning_rate": 5.237823848874214e-05, "loss": 1.4712, "step": 5388 }, { "epoch": 0.9707723485701418, "grad_norm": 1.5028202533721924, "learning_rate": 5.236409289741658e-05, "loss": 1.772, "step": 5389 }, { "epoch": 0.9709524881783382, "grad_norm": 1.4634078741073608, "learning_rate": 5.234994711644463e-05, "loss": 1.5136, "step": 5390 }, { "epoch": 0.9711326277865345, "grad_norm": 1.4175139665603638, "learning_rate": 5.2335801146961095e-05, "loss": 1.4582, "step": 5391 }, { "epoch": 0.9713127673947309, "grad_norm": 1.3558861017227173, "learning_rate": 5.2321654990100713e-05, "loss": 1.5888, "step": 5392 }, { "epoch": 0.9714929070029272, "grad_norm": 1.2589038610458374, "learning_rate": 5.230750864699829e-05, "loss": 1.2624, "step": 5393 }, { "epoch": 0.9716730466111236, "grad_norm": 1.311826229095459, "learning_rate": 5.229336211878866e-05, "loss": 1.2215, "step": 5394 }, { "epoch": 0.9718531862193199, "grad_norm": 1.3919540643692017, "learning_rate": 5.2279215406606627e-05, "loss": 1.5494, "step": 5395 }, { "epoch": 0.9720333258275163, "grad_norm": 1.5327457189559937, "learning_rate": 5.226506851158706e-05, "loss": 1.6599, "step": 5396 }, { "epoch": 0.9722134654357126, "grad_norm": 1.4243777990341187, "learning_rate": 5.2250921434864786e-05, "loss": 1.4678, "step": 5397 }, { "epoch": 0.972393605043909, "grad_norm": 1.501854419708252, "learning_rate": 5.22367741775747e-05, "loss": 1.3979, "step": 5398 }, { "epoch": 0.9725737446521053, "grad_norm": 1.27566397190094, "learning_rate": 5.222262674085166e-05, "loss": 0.9885, "step": 5399 }, { "epoch": 0.9727538842603017, "grad_norm": 1.4059970378875732, "learning_rate": 5.220847912583061e-05, "loss": 1.5126, "step": 5400 }, { "epoch": 0.972934023868498, "grad_norm": 1.3445314168930054, "learning_rate": 5.219433133364643e-05, "loss": 1.8674, "step": 5401 }, { "epoch": 0.9731141634766944, "grad_norm": 1.2173614501953125, "learning_rate": 5.218018336543407e-05, "loss": 1.8643, "step": 5402 }, { "epoch": 0.9732943030848908, "grad_norm": 1.5966296195983887, "learning_rate": 5.2166035222328446e-05, "loss": 1.9428, "step": 5403 }, { "epoch": 0.9734744426930871, "grad_norm": 1.4709405899047852, "learning_rate": 5.215188690546453e-05, "loss": 2.2181, "step": 5404 }, { "epoch": 0.9736545823012835, "grad_norm": 1.2857550382614136, "learning_rate": 5.21377384159773e-05, "loss": 1.6885, "step": 5405 }, { "epoch": 0.9738347219094798, "grad_norm": 1.477520227432251, "learning_rate": 5.2123589755001733e-05, "loss": 1.8622, "step": 5406 }, { "epoch": 0.9740148615176762, "grad_norm": 1.4609161615371704, "learning_rate": 5.2109440923672836e-05, "loss": 1.7777, "step": 5407 }, { "epoch": 0.9741950011258725, "grad_norm": 1.6814249753952026, "learning_rate": 5.2095291923125635e-05, "loss": 2.0174, "step": 5408 }, { "epoch": 0.974375140734069, "grad_norm": 1.603654146194458, "learning_rate": 5.2081142754495106e-05, "loss": 1.9189, "step": 5409 }, { "epoch": 0.9745552803422652, "grad_norm": 1.7143933773040771, "learning_rate": 5.2066993418916335e-05, "loss": 1.9721, "step": 5410 }, { "epoch": 0.9747354199504616, "grad_norm": 1.3116461038589478, "learning_rate": 5.205284391752434e-05, "loss": 1.3644, "step": 5411 }, { "epoch": 0.9749155595586579, "grad_norm": 1.3820116519927979, "learning_rate": 5.203869425145422e-05, "loss": 1.3874, "step": 5412 }, { "epoch": 0.9750956991668543, "grad_norm": 1.427774429321289, "learning_rate": 5.202454442184104e-05, "loss": 1.641, "step": 5413 }, { "epoch": 0.9752758387750506, "grad_norm": 1.3567496538162231, "learning_rate": 5.20103944298199e-05, "loss": 1.6077, "step": 5414 }, { "epoch": 0.975455978383247, "grad_norm": 1.3524290323257446, "learning_rate": 5.199624427652588e-05, "loss": 1.5241, "step": 5415 }, { "epoch": 0.9756361179914433, "grad_norm": 1.2697441577911377, "learning_rate": 5.1982093963094126e-05, "loss": 1.3101, "step": 5416 }, { "epoch": 0.9758162575996397, "grad_norm": 1.3722608089447021, "learning_rate": 5.196794349065975e-05, "loss": 1.6009, "step": 5417 }, { "epoch": 0.975996397207836, "grad_norm": 1.2859597206115723, "learning_rate": 5.195379286035793e-05, "loss": 1.4874, "step": 5418 }, { "epoch": 0.9761765368160324, "grad_norm": 1.218653917312622, "learning_rate": 5.193964207332378e-05, "loss": 1.4239, "step": 5419 }, { "epoch": 0.9763566764242287, "grad_norm": 1.4374889135360718, "learning_rate": 5.1925491130692486e-05, "loss": 1.5437, "step": 5420 }, { "epoch": 0.9765368160324251, "grad_norm": 1.3119416236877441, "learning_rate": 5.1911340033599244e-05, "loss": 1.4213, "step": 5421 }, { "epoch": 0.9767169556406214, "grad_norm": 1.3285481929779053, "learning_rate": 5.189718878317923e-05, "loss": 1.4009, "step": 5422 }, { "epoch": 0.9768970952488178, "grad_norm": 1.3340466022491455, "learning_rate": 5.188303738056765e-05, "loss": 1.3934, "step": 5423 }, { "epoch": 0.9770772348570141, "grad_norm": 1.4003435373306274, "learning_rate": 5.186888582689974e-05, "loss": 1.359, "step": 5424 }, { "epoch": 0.9772573744652105, "grad_norm": 1.3494806289672852, "learning_rate": 5.18547341233107e-05, "loss": 1.4999, "step": 5425 }, { "epoch": 0.9774375140734068, "grad_norm": 1.427824854850769, "learning_rate": 5.184058227093581e-05, "loss": 1.6114, "step": 5426 }, { "epoch": 0.9776176536816032, "grad_norm": 1.2543511390686035, "learning_rate": 5.1826430270910286e-05, "loss": 1.2611, "step": 5427 }, { "epoch": 0.9777977932897995, "grad_norm": 1.4440135955810547, "learning_rate": 5.181227812436944e-05, "loss": 1.7121, "step": 5428 }, { "epoch": 0.977977932897996, "grad_norm": 1.5105116367340088, "learning_rate": 5.179812583244851e-05, "loss": 1.7424, "step": 5429 }, { "epoch": 0.9781580725061922, "grad_norm": 1.45107901096344, "learning_rate": 5.1783973396282816e-05, "loss": 1.6719, "step": 5430 }, { "epoch": 0.9783382121143887, "grad_norm": 1.2329802513122559, "learning_rate": 5.176982081700763e-05, "loss": 1.4013, "step": 5431 }, { "epoch": 0.9785183517225851, "grad_norm": 1.3970863819122314, "learning_rate": 5.175566809575828e-05, "loss": 1.5069, "step": 5432 }, { "epoch": 0.9786984913307814, "grad_norm": 1.3058767318725586, "learning_rate": 5.17415152336701e-05, "loss": 1.5483, "step": 5433 }, { "epoch": 0.9788786309389778, "grad_norm": 1.3232342004776, "learning_rate": 5.172736223187842e-05, "loss": 1.4056, "step": 5434 }, { "epoch": 0.979058770547174, "grad_norm": 1.5994749069213867, "learning_rate": 5.171320909151858e-05, "loss": 1.8552, "step": 5435 }, { "epoch": 0.9792389101553705, "grad_norm": 1.5129034519195557, "learning_rate": 5.1699055813725936e-05, "loss": 1.5214, "step": 5436 }, { "epoch": 0.9794190497635668, "grad_norm": 1.393964171409607, "learning_rate": 5.168490239963586e-05, "loss": 1.4593, "step": 5437 }, { "epoch": 0.9795991893717632, "grad_norm": 1.2762224674224854, "learning_rate": 5.167074885038373e-05, "loss": 1.2972, "step": 5438 }, { "epoch": 0.9797793289799595, "grad_norm": 1.3246617317199707, "learning_rate": 5.1656595167104946e-05, "loss": 1.3516, "step": 5439 }, { "epoch": 0.9799594685881559, "grad_norm": 1.3270987272262573, "learning_rate": 5.164244135093492e-05, "loss": 1.5593, "step": 5440 }, { "epoch": 0.9801396081963522, "grad_norm": 1.4755040407180786, "learning_rate": 5.1628287403009026e-05, "loss": 1.6389, "step": 5441 }, { "epoch": 0.9803197478045486, "grad_norm": 1.5281306505203247, "learning_rate": 5.161413332446272e-05, "loss": 1.6293, "step": 5442 }, { "epoch": 0.9804998874127449, "grad_norm": 1.4726852178573608, "learning_rate": 5.159997911643143e-05, "loss": 1.4763, "step": 5443 }, { "epoch": 0.9806800270209413, "grad_norm": 1.5389952659606934, "learning_rate": 5.158582478005059e-05, "loss": 1.746, "step": 5444 }, { "epoch": 0.9808601666291376, "grad_norm": 1.358837604522705, "learning_rate": 5.1571670316455654e-05, "loss": 1.412, "step": 5445 }, { "epoch": 0.981040306237334, "grad_norm": 1.2964686155319214, "learning_rate": 5.155751572678209e-05, "loss": 1.3202, "step": 5446 }, { "epoch": 0.9812204458455303, "grad_norm": 1.2630119323730469, "learning_rate": 5.1543361012165384e-05, "loss": 1.3314, "step": 5447 }, { "epoch": 0.9814005854537267, "grad_norm": 1.2962652444839478, "learning_rate": 5.1529206173741e-05, "loss": 1.3861, "step": 5448 }, { "epoch": 0.981580725061923, "grad_norm": 1.461641788482666, "learning_rate": 5.151505121264444e-05, "loss": 1.4171, "step": 5449 }, { "epoch": 0.9817608646701194, "grad_norm": 1.2739298343658447, "learning_rate": 5.1500896130011225e-05, "loss": 1.2318, "step": 5450 }, { "epoch": 0.9819410042783157, "grad_norm": 1.3509132862091064, "learning_rate": 5.148674092697684e-05, "loss": 2.063, "step": 5451 }, { "epoch": 0.9821211438865121, "grad_norm": 1.2478272914886475, "learning_rate": 5.147258560467683e-05, "loss": 1.914, "step": 5452 }, { "epoch": 0.9823012834947084, "grad_norm": 1.295697569847107, "learning_rate": 5.1458430164246706e-05, "loss": 1.984, "step": 5453 }, { "epoch": 0.9824814231029048, "grad_norm": 1.3407965898513794, "learning_rate": 5.144427460682203e-05, "loss": 1.8529, "step": 5454 }, { "epoch": 0.982661562711101, "grad_norm": 1.3061943054199219, "learning_rate": 5.143011893353835e-05, "loss": 1.9232, "step": 5455 }, { "epoch": 0.9828417023192975, "grad_norm": 1.3617926836013794, "learning_rate": 5.141596314553124e-05, "loss": 2.0016, "step": 5456 }, { "epoch": 0.9830218419274938, "grad_norm": 1.422461748123169, "learning_rate": 5.140180724393624e-05, "loss": 1.8131, "step": 5457 }, { "epoch": 0.9832019815356902, "grad_norm": 2.5082414150238037, "learning_rate": 5.1387651229888945e-05, "loss": 2.0925, "step": 5458 }, { "epoch": 0.9833821211438866, "grad_norm": 2.0095503330230713, "learning_rate": 5.137349510452495e-05, "loss": 2.3096, "step": 5459 }, { "epoch": 0.9835622607520829, "grad_norm": 1.7564771175384521, "learning_rate": 5.1359338868979834e-05, "loss": 2.1726, "step": 5460 }, { "epoch": 0.9837424003602793, "grad_norm": 1.3236967325210571, "learning_rate": 5.1345182524389236e-05, "loss": 1.6855, "step": 5461 }, { "epoch": 0.9839225399684756, "grad_norm": 1.2989704608917236, "learning_rate": 5.133102607188874e-05, "loss": 1.5133, "step": 5462 }, { "epoch": 0.984102679576672, "grad_norm": 1.2168680429458618, "learning_rate": 5.1316869512613984e-05, "loss": 1.4162, "step": 5463 }, { "epoch": 0.9842828191848683, "grad_norm": 1.3532770872116089, "learning_rate": 5.1302712847700604e-05, "loss": 1.2964, "step": 5464 }, { "epoch": 0.9844629587930647, "grad_norm": 1.3319768905639648, "learning_rate": 5.1288556078284223e-05, "loss": 1.6427, "step": 5465 }, { "epoch": 0.984643098401261, "grad_norm": 1.2991929054260254, "learning_rate": 5.12743992055005e-05, "loss": 1.479, "step": 5466 }, { "epoch": 0.9848232380094574, "grad_norm": 1.2791972160339355, "learning_rate": 5.12602422304851e-05, "loss": 1.6004, "step": 5467 }, { "epoch": 0.9850033776176537, "grad_norm": 1.3292728662490845, "learning_rate": 5.124608515437368e-05, "loss": 1.4978, "step": 5468 }, { "epoch": 0.9851835172258501, "grad_norm": 1.4439105987548828, "learning_rate": 5.1231927978301906e-05, "loss": 1.4954, "step": 5469 }, { "epoch": 0.9853636568340464, "grad_norm": 1.3360912799835205, "learning_rate": 5.121777070340549e-05, "loss": 1.5736, "step": 5470 }, { "epoch": 0.9855437964422428, "grad_norm": 1.3444323539733887, "learning_rate": 5.120361333082009e-05, "loss": 1.5729, "step": 5471 }, { "epoch": 0.9857239360504391, "grad_norm": 1.2622430324554443, "learning_rate": 5.1189455861681425e-05, "loss": 1.4671, "step": 5472 }, { "epoch": 0.9859040756586355, "grad_norm": 1.5953948497772217, "learning_rate": 5.117529829712517e-05, "loss": 1.6837, "step": 5473 }, { "epoch": 0.9860842152668318, "grad_norm": 1.3227770328521729, "learning_rate": 5.116114063828706e-05, "loss": 1.4916, "step": 5474 }, { "epoch": 0.9862643548750282, "grad_norm": 1.3851453065872192, "learning_rate": 5.114698288630283e-05, "loss": 1.3644, "step": 5475 }, { "epoch": 0.9864444944832245, "grad_norm": 1.356567621231079, "learning_rate": 5.1132825042308186e-05, "loss": 1.4013, "step": 5476 }, { "epoch": 0.9866246340914209, "grad_norm": 1.4187607765197754, "learning_rate": 5.1118667107438864e-05, "loss": 1.5896, "step": 5477 }, { "epoch": 0.9868047736996172, "grad_norm": 1.2975939512252808, "learning_rate": 5.110450908283062e-05, "loss": 1.3973, "step": 5478 }, { "epoch": 0.9869849133078136, "grad_norm": 1.4173749685287476, "learning_rate": 5.109035096961919e-05, "loss": 1.7043, "step": 5479 }, { "epoch": 0.9871650529160099, "grad_norm": 1.3138997554779053, "learning_rate": 5.1076192768940333e-05, "loss": 1.4212, "step": 5480 }, { "epoch": 0.9873451925242063, "grad_norm": 1.4722062349319458, "learning_rate": 5.106203448192981e-05, "loss": 1.5128, "step": 5481 }, { "epoch": 0.9875253321324026, "grad_norm": 1.4500669240951538, "learning_rate": 5.104787610972341e-05, "loss": 1.6007, "step": 5482 }, { "epoch": 0.987705471740599, "grad_norm": 1.5035229921340942, "learning_rate": 5.10337176534569e-05, "loss": 1.6317, "step": 5483 }, { "epoch": 0.9878856113487953, "grad_norm": 1.3849323987960815, "learning_rate": 5.101955911426607e-05, "loss": 1.6066, "step": 5484 }, { "epoch": 0.9880657509569917, "grad_norm": 1.3403972387313843, "learning_rate": 5.100540049328669e-05, "loss": 1.5829, "step": 5485 }, { "epoch": 0.988245890565188, "grad_norm": 1.5764458179473877, "learning_rate": 5.0991241791654576e-05, "loss": 1.5613, "step": 5486 }, { "epoch": 0.9884260301733844, "grad_norm": 1.3477911949157715, "learning_rate": 5.097708301050552e-05, "loss": 1.5545, "step": 5487 }, { "epoch": 0.9886061697815808, "grad_norm": 1.506456971168518, "learning_rate": 5.096292415097534e-05, "loss": 1.5169, "step": 5488 }, { "epoch": 0.9887863093897771, "grad_norm": 1.4868850708007812, "learning_rate": 5.094876521419989e-05, "loss": 1.573, "step": 5489 }, { "epoch": 0.9889664489979735, "grad_norm": 1.4440927505493164, "learning_rate": 5.093460620131492e-05, "loss": 1.5389, "step": 5490 }, { "epoch": 0.9891465886061698, "grad_norm": 1.4233566522598267, "learning_rate": 5.0920447113456306e-05, "loss": 1.7414, "step": 5491 }, { "epoch": 0.9893267282143662, "grad_norm": 1.4849672317504883, "learning_rate": 5.0906287951759866e-05, "loss": 1.5977, "step": 5492 }, { "epoch": 0.9895068678225625, "grad_norm": 1.417472004890442, "learning_rate": 5.0892128717361456e-05, "loss": 1.5074, "step": 5493 }, { "epoch": 0.9896870074307589, "grad_norm": 1.471031665802002, "learning_rate": 5.087796941139691e-05, "loss": 1.7083, "step": 5494 }, { "epoch": 0.9898671470389552, "grad_norm": 1.314753532409668, "learning_rate": 5.086381003500209e-05, "loss": 1.4298, "step": 5495 }, { "epoch": 0.9900472866471516, "grad_norm": 1.4409737586975098, "learning_rate": 5.084965058931286e-05, "loss": 1.5632, "step": 5496 }, { "epoch": 0.9902274262553479, "grad_norm": 1.2563427686691284, "learning_rate": 5.0835491075465045e-05, "loss": 1.3845, "step": 5497 }, { "epoch": 0.9904075658635443, "grad_norm": 1.5046136379241943, "learning_rate": 5.082133149459457e-05, "loss": 1.4388, "step": 5498 }, { "epoch": 0.9905877054717406, "grad_norm": 1.3909345865249634, "learning_rate": 5.0807171847837274e-05, "loss": 1.279, "step": 5499 }, { "epoch": 0.990767845079937, "grad_norm": 1.4372528791427612, "learning_rate": 5.0793012136329044e-05, "loss": 1.4589, "step": 5500 }, { "epoch": 0.9909479846881333, "grad_norm": 1.443535566329956, "learning_rate": 5.077885236120577e-05, "loss": 2.205, "step": 5501 }, { "epoch": 0.9911281242963297, "grad_norm": 1.1812182664871216, "learning_rate": 5.076469252360333e-05, "loss": 1.7309, "step": 5502 }, { "epoch": 0.991308263904526, "grad_norm": 1.285457730293274, "learning_rate": 5.075053262465763e-05, "loss": 1.6994, "step": 5503 }, { "epoch": 0.9914884035127224, "grad_norm": 1.4190510511398315, "learning_rate": 5.073637266550456e-05, "loss": 1.9109, "step": 5504 }, { "epoch": 0.9916685431209187, "grad_norm": 1.3112819194793701, "learning_rate": 5.072221264728007e-05, "loss": 1.9456, "step": 5505 }, { "epoch": 0.9918486827291151, "grad_norm": 1.3437947034835815, "learning_rate": 5.070805257111999e-05, "loss": 1.6904, "step": 5506 }, { "epoch": 0.9920288223373114, "grad_norm": 1.30347740650177, "learning_rate": 5.069389243816031e-05, "loss": 1.9173, "step": 5507 }, { "epoch": 0.9922089619455078, "grad_norm": 1.4260587692260742, "learning_rate": 5.0679732249536905e-05, "loss": 1.9504, "step": 5508 }, { "epoch": 0.9923891015537041, "grad_norm": 1.5754131078720093, "learning_rate": 5.0665572006385695e-05, "loss": 1.8174, "step": 5509 }, { "epoch": 0.9925692411619005, "grad_norm": 1.795831561088562, "learning_rate": 5.065141170984263e-05, "loss": 2.2449, "step": 5510 }, { "epoch": 0.9927493807700968, "grad_norm": 1.3534268140792847, "learning_rate": 5.063725136104365e-05, "loss": 1.6678, "step": 5511 }, { "epoch": 0.9929295203782932, "grad_norm": 1.368416666984558, "learning_rate": 5.062309096112467e-05, "loss": 1.785, "step": 5512 }, { "epoch": 0.9931096599864895, "grad_norm": 1.3788455724716187, "learning_rate": 5.060893051122163e-05, "loss": 1.5657, "step": 5513 }, { "epoch": 0.9932897995946859, "grad_norm": 1.2808939218521118, "learning_rate": 5.059477001247048e-05, "loss": 1.4714, "step": 5514 }, { "epoch": 0.9934699392028822, "grad_norm": 1.336501121520996, "learning_rate": 5.0580609466007176e-05, "loss": 1.6352, "step": 5515 }, { "epoch": 0.9936500788110786, "grad_norm": 1.1835541725158691, "learning_rate": 5.056644887296766e-05, "loss": 1.5222, "step": 5516 }, { "epoch": 0.993830218419275, "grad_norm": 1.2137409448623657, "learning_rate": 5.05522882344879e-05, "loss": 1.3276, "step": 5517 }, { "epoch": 0.9940103580274713, "grad_norm": 1.2530930042266846, "learning_rate": 5.053812755170383e-05, "loss": 1.4916, "step": 5518 }, { "epoch": 0.9941904976356677, "grad_norm": 1.301798701286316, "learning_rate": 5.052396682575145e-05, "loss": 1.2784, "step": 5519 }, { "epoch": 0.994370637243864, "grad_norm": 1.2902483940124512, "learning_rate": 5.050980605776668e-05, "loss": 1.3717, "step": 5520 }, { "epoch": 0.9945507768520604, "grad_norm": 1.370252251625061, "learning_rate": 5.049564524888555e-05, "loss": 1.6479, "step": 5521 }, { "epoch": 0.9947309164602567, "grad_norm": 1.4862064123153687, "learning_rate": 5.048148440024398e-05, "loss": 1.8263, "step": 5522 }, { "epoch": 0.9949110560684531, "grad_norm": 1.2226202487945557, "learning_rate": 5.046732351297796e-05, "loss": 1.3234, "step": 5523 }, { "epoch": 0.9950911956766494, "grad_norm": 1.2787630558013916, "learning_rate": 5.045316258822349e-05, "loss": 1.4156, "step": 5524 }, { "epoch": 0.9952713352848458, "grad_norm": 1.4211828708648682, "learning_rate": 5.043900162711652e-05, "loss": 1.869, "step": 5525 }, { "epoch": 0.9954514748930421, "grad_norm": 1.5089571475982666, "learning_rate": 5.042484063079307e-05, "loss": 1.7412, "step": 5526 }, { "epoch": 0.9956316145012385, "grad_norm": 1.439766526222229, "learning_rate": 5.041067960038911e-05, "loss": 1.5093, "step": 5527 }, { "epoch": 0.9958117541094348, "grad_norm": 1.345784068107605, "learning_rate": 5.039651853704063e-05, "loss": 1.4175, "step": 5528 }, { "epoch": 0.9959918937176312, "grad_norm": 1.3591045141220093, "learning_rate": 5.038235744188362e-05, "loss": 1.3927, "step": 5529 }, { "epoch": 0.9961720333258275, "grad_norm": 1.3794161081314087, "learning_rate": 5.036819631605409e-05, "loss": 1.4927, "step": 5530 }, { "epoch": 0.9963521729340239, "grad_norm": 1.3785758018493652, "learning_rate": 5.035403516068803e-05, "loss": 1.4733, "step": 5531 }, { "epoch": 0.9965323125422202, "grad_norm": 1.4453762769699097, "learning_rate": 5.033987397692144e-05, "loss": 1.5312, "step": 5532 }, { "epoch": 0.9967124521504166, "grad_norm": 1.569035291671753, "learning_rate": 5.032571276589032e-05, "loss": 1.4063, "step": 5533 }, { "epoch": 0.9968925917586129, "grad_norm": 1.4120374917984009, "learning_rate": 5.0311551528730685e-05, "loss": 1.3971, "step": 5534 }, { "epoch": 0.9970727313668093, "grad_norm": 1.3823720216751099, "learning_rate": 5.029739026657854e-05, "loss": 1.4651, "step": 5535 }, { "epoch": 0.9972528709750056, "grad_norm": 1.3192176818847656, "learning_rate": 5.0283228980569883e-05, "loss": 1.3163, "step": 5536 }, { "epoch": 0.997433010583202, "grad_norm": 1.4467960596084595, "learning_rate": 5.026906767184075e-05, "loss": 1.5889, "step": 5537 }, { "epoch": 0.9976131501913983, "grad_norm": 1.411865234375, "learning_rate": 5.025490634152714e-05, "loss": 1.5999, "step": 5538 }, { "epoch": 0.9977932897995947, "grad_norm": 1.5603841543197632, "learning_rate": 5.0240744990765055e-05, "loss": 1.535, "step": 5539 }, { "epoch": 0.997973429407791, "grad_norm": 1.41175377368927, "learning_rate": 5.022658362069053e-05, "loss": 1.4259, "step": 5540 }, { "epoch": 0.9981535690159874, "grad_norm": 1.5095746517181396, "learning_rate": 5.021242223243956e-05, "loss": 1.464, "step": 5541 }, { "epoch": 0.9983337086241837, "grad_norm": 1.488194465637207, "learning_rate": 5.0198260827148206e-05, "loss": 1.6052, "step": 5542 }, { "epoch": 0.9985138482323801, "grad_norm": 1.5387918949127197, "learning_rate": 5.018409940595247e-05, "loss": 1.6405, "step": 5543 }, { "epoch": 0.9986939878405764, "grad_norm": 1.4464972019195557, "learning_rate": 5.016993796998835e-05, "loss": 1.3603, "step": 5544 }, { "epoch": 0.9988741274487728, "grad_norm": 1.234643816947937, "learning_rate": 5.0155776520391896e-05, "loss": 1.2346, "step": 5545 }, { "epoch": 0.9990542670569692, "grad_norm": 1.556142807006836, "learning_rate": 5.014161505829912e-05, "loss": 1.7073, "step": 5546 }, { "epoch": 0.9992344066651655, "grad_norm": 1.4931926727294922, "learning_rate": 5.012745358484605e-05, "loss": 1.5738, "step": 5547 }, { "epoch": 0.9994145462733619, "grad_norm": 1.6659613847732544, "learning_rate": 5.011329210116873e-05, "loss": 1.3528, "step": 5548 }, { "epoch": 0.9995946858815582, "grad_norm": 1.372674584388733, "learning_rate": 5.0099130608403164e-05, "loss": 1.4698, "step": 5549 }, { "epoch": 0.9997748254897546, "grad_norm": 1.2082171440124512, "learning_rate": 5.00849691076854e-05, "loss": 1.1453, "step": 5550 }, { "epoch": 0.9999549650979509, "grad_norm": 1.3851683139801025, "learning_rate": 5.007080760015144e-05, "loss": 1.8766, "step": 5551 }, { "epoch": 0.9999549650979509, "eval_loss": 1.5813547372817993, "eval_runtime": 185.5209, "eval_samples_per_second": 50.399, "eval_steps_per_second": 12.602, "step": 5551 }, { "epoch": 1.0001351047061473, "grad_norm": 3.594057559967041, "learning_rate": 5.0056646086937344e-05, "loss": 2.2345, "step": 5552 }, { "epoch": 1.0003152443143437, "grad_norm": 1.1515469551086426, "learning_rate": 5.0042484569179114e-05, "loss": 1.7658, "step": 5553 }, { "epoch": 1.00049538392254, "grad_norm": 1.23834228515625, "learning_rate": 5.00283230480128e-05, "loss": 1.7991, "step": 5554 }, { "epoch": 1.0006755235307363, "grad_norm": 1.2544416189193726, "learning_rate": 5.001416152457441e-05, "loss": 1.9802, "step": 5555 }, { "epoch": 1.0008556631389327, "grad_norm": 1.2491496801376343, "learning_rate": 5e-05, "loss": 1.6689, "step": 5556 }, { "epoch": 1.001035802747129, "grad_norm": 1.207411766052246, "learning_rate": 4.99858384754256e-05, "loss": 1.6938, "step": 5557 }, { "epoch": 1.0012159423553253, "grad_norm": 1.329949975013733, "learning_rate": 4.997167695198722e-05, "loss": 1.7802, "step": 5558 }, { "epoch": 1.0013960819635217, "grad_norm": 1.3830840587615967, "learning_rate": 4.99575154308209e-05, "loss": 1.665, "step": 5559 }, { "epoch": 1.001576221571718, "grad_norm": 1.5742459297180176, "learning_rate": 4.9943353913062674e-05, "loss": 2.0974, "step": 5560 }, { "epoch": 1.0017563611799145, "grad_norm": 1.754102110862732, "learning_rate": 4.992919239984857e-05, "loss": 2.1248, "step": 5561 }, { "epoch": 1.0019365007881107, "grad_norm": 1.3664779663085938, "learning_rate": 4.9915030892314625e-05, "loss": 1.7125, "step": 5562 }, { "epoch": 1.002116640396307, "grad_norm": 1.286925196647644, "learning_rate": 4.990086939159684e-05, "loss": 1.5467, "step": 5563 }, { "epoch": 1.0022967800045035, "grad_norm": 1.2146855592727661, "learning_rate": 4.988670789883128e-05, "loss": 1.2794, "step": 5564 }, { "epoch": 1.0024769196127, "grad_norm": 1.346280574798584, "learning_rate": 4.9872546415153954e-05, "loss": 1.3649, "step": 5565 }, { "epoch": 1.002657059220896, "grad_norm": 1.497022271156311, "learning_rate": 4.98583849417009e-05, "loss": 1.6313, "step": 5566 }, { "epoch": 1.0028371988290925, "grad_norm": 1.3814294338226318, "learning_rate": 4.984422347960813e-05, "loss": 1.575, "step": 5567 }, { "epoch": 1.003017338437289, "grad_norm": 1.2965688705444336, "learning_rate": 4.9830062030011657e-05, "loss": 1.3843, "step": 5568 }, { "epoch": 1.0031974780454853, "grad_norm": 1.2462544441223145, "learning_rate": 4.981590059404754e-05, "loss": 1.2929, "step": 5569 }, { "epoch": 1.0033776176536815, "grad_norm": 1.2687498331069946, "learning_rate": 4.9801739172851806e-05, "loss": 1.528, "step": 5570 }, { "epoch": 1.003557757261878, "grad_norm": 1.3159748315811157, "learning_rate": 4.978757776756045e-05, "loss": 1.4044, "step": 5571 }, { "epoch": 1.0037378968700743, "grad_norm": 1.2301899194717407, "learning_rate": 4.9773416379309496e-05, "loss": 1.3239, "step": 5572 }, { "epoch": 1.0039180364782707, "grad_norm": 1.318537950515747, "learning_rate": 4.975925500923495e-05, "loss": 1.6227, "step": 5573 }, { "epoch": 1.0040981760864671, "grad_norm": 1.2856943607330322, "learning_rate": 4.9745093658472876e-05, "loss": 1.6121, "step": 5574 }, { "epoch": 1.0042783156946633, "grad_norm": 1.3267793655395508, "learning_rate": 4.9730932328159255e-05, "loss": 1.2396, "step": 5575 }, { "epoch": 1.0044584553028597, "grad_norm": 1.5443915128707886, "learning_rate": 4.971677101943013e-05, "loss": 1.244, "step": 5576 }, { "epoch": 1.0046385949110561, "grad_norm": 1.492397427558899, "learning_rate": 4.970260973342146e-05, "loss": 1.6181, "step": 5577 }, { "epoch": 1.0048187345192525, "grad_norm": 1.349518895149231, "learning_rate": 4.968844847126932e-05, "loss": 1.6127, "step": 5578 }, { "epoch": 1.0049988741274487, "grad_norm": 1.4305379390716553, "learning_rate": 4.967428723410969e-05, "loss": 1.5743, "step": 5579 }, { "epoch": 1.005179013735645, "grad_norm": 1.5587505102157593, "learning_rate": 4.966012602307857e-05, "loss": 1.6561, "step": 5580 }, { "epoch": 1.0053591533438415, "grad_norm": 1.4036973714828491, "learning_rate": 4.9645964839311985e-05, "loss": 1.3369, "step": 5581 }, { "epoch": 1.005539292952038, "grad_norm": 1.286837100982666, "learning_rate": 4.963180368394592e-05, "loss": 1.1369, "step": 5582 }, { "epoch": 1.005719432560234, "grad_norm": 1.6943107843399048, "learning_rate": 4.9617642558116384e-05, "loss": 1.7455, "step": 5583 }, { "epoch": 1.0058995721684305, "grad_norm": 1.4491300582885742, "learning_rate": 4.9603481462959384e-05, "loss": 1.6461, "step": 5584 }, { "epoch": 1.006079711776627, "grad_norm": 1.4178802967071533, "learning_rate": 4.9589320399610904e-05, "loss": 1.3058, "step": 5585 }, { "epoch": 1.0062598513848233, "grad_norm": 1.4115400314331055, "learning_rate": 4.9575159369206944e-05, "loss": 1.4939, "step": 5586 }, { "epoch": 1.0064399909930195, "grad_norm": 1.3227558135986328, "learning_rate": 4.9560998372883476e-05, "loss": 1.2713, "step": 5587 }, { "epoch": 1.006620130601216, "grad_norm": 1.4719223976135254, "learning_rate": 4.954683741177652e-05, "loss": 1.7106, "step": 5588 }, { "epoch": 1.0068002702094123, "grad_norm": 1.3001450300216675, "learning_rate": 4.9532676487022046e-05, "loss": 1.4296, "step": 5589 }, { "epoch": 1.0069804098176087, "grad_norm": 1.227793574333191, "learning_rate": 4.9518515599756035e-05, "loss": 1.1813, "step": 5590 }, { "epoch": 1.007160549425805, "grad_norm": 1.5430632829666138, "learning_rate": 4.950435475111446e-05, "loss": 1.5679, "step": 5591 }, { "epoch": 1.0073406890340013, "grad_norm": 1.5058188438415527, "learning_rate": 4.9490193942233315e-05, "loss": 1.6437, "step": 5592 }, { "epoch": 1.0075208286421977, "grad_norm": 1.3336890935897827, "learning_rate": 4.9476033174248564e-05, "loss": 1.187, "step": 5593 }, { "epoch": 1.0077009682503941, "grad_norm": 1.5512518882751465, "learning_rate": 4.946187244829618e-05, "loss": 1.5464, "step": 5594 }, { "epoch": 1.0078811078585903, "grad_norm": 1.434260368347168, "learning_rate": 4.944771176551212e-05, "loss": 1.4756, "step": 5595 }, { "epoch": 1.0080612474667867, "grad_norm": 1.3953254222869873, "learning_rate": 4.943355112703234e-05, "loss": 1.4091, "step": 5596 }, { "epoch": 1.0082413870749831, "grad_norm": 1.5238646268844604, "learning_rate": 4.9419390533992835e-05, "loss": 1.5481, "step": 5597 }, { "epoch": 1.0084215266831795, "grad_norm": 1.3699073791503906, "learning_rate": 4.940522998752953e-05, "loss": 1.2815, "step": 5598 }, { "epoch": 1.0086016662913757, "grad_norm": 1.3145962953567505, "learning_rate": 4.939106948877839e-05, "loss": 1.2487, "step": 5599 }, { "epoch": 1.0087818058995721, "grad_norm": 1.2763704061508179, "learning_rate": 4.937690903887535e-05, "loss": 1.0733, "step": 5600 }, { "epoch": 1.0089619455077685, "grad_norm": 1.2779762744903564, "learning_rate": 4.936274863895635e-05, "loss": 1.3373, "step": 5601 }, { "epoch": 1.009142085115965, "grad_norm": 1.236710548400879, "learning_rate": 4.9348588290157375e-05, "loss": 1.8283, "step": 5602 }, { "epoch": 1.0093222247241613, "grad_norm": 1.3207132816314697, "learning_rate": 4.933442799361432e-05, "loss": 1.7135, "step": 5603 }, { "epoch": 1.0095023643323575, "grad_norm": 1.2202510833740234, "learning_rate": 4.932026775046312e-05, "loss": 1.5694, "step": 5604 }, { "epoch": 1.009682503940554, "grad_norm": 1.2876667976379395, "learning_rate": 4.9306107561839696e-05, "loss": 1.7128, "step": 5605 }, { "epoch": 1.0098626435487503, "grad_norm": 1.3892180919647217, "learning_rate": 4.9291947428880005e-05, "loss": 1.9629, "step": 5606 }, { "epoch": 1.0100427831569467, "grad_norm": 1.3952772617340088, "learning_rate": 4.9277787352719944e-05, "loss": 1.8067, "step": 5607 }, { "epoch": 1.010222922765143, "grad_norm": 1.5145434141159058, "learning_rate": 4.9263627334495446e-05, "loss": 1.8333, "step": 5608 }, { "epoch": 1.0104030623733393, "grad_norm": 1.5198681354522705, "learning_rate": 4.924946737534239e-05, "loss": 1.6511, "step": 5609 }, { "epoch": 1.0105832019815357, "grad_norm": 1.6591224670410156, "learning_rate": 4.923530747639668e-05, "loss": 2.1028, "step": 5610 }, { "epoch": 1.0107633415897321, "grad_norm": 1.8929842710494995, "learning_rate": 4.922114763879424e-05, "loss": 1.9754, "step": 5611 }, { "epoch": 1.0109434811979283, "grad_norm": 1.4713486433029175, "learning_rate": 4.920698786367097e-05, "loss": 1.6352, "step": 5612 }, { "epoch": 1.0111236208061247, "grad_norm": 1.2287005186080933, "learning_rate": 4.919282815216274e-05, "loss": 1.3001, "step": 5613 }, { "epoch": 1.0113037604143211, "grad_norm": 1.1148327589035034, "learning_rate": 4.9178668505405454e-05, "loss": 1.4141, "step": 5614 }, { "epoch": 1.0114839000225175, "grad_norm": 1.3809200525283813, "learning_rate": 4.916450892453495e-05, "loss": 1.4582, "step": 5615 }, { "epoch": 1.0116640396307137, "grad_norm": 1.37238609790802, "learning_rate": 4.915034941068716e-05, "loss": 1.6725, "step": 5616 }, { "epoch": 1.0118441792389101, "grad_norm": 1.2395992279052734, "learning_rate": 4.913618996499792e-05, "loss": 1.4422, "step": 5617 }, { "epoch": 1.0120243188471065, "grad_norm": 1.3363028764724731, "learning_rate": 4.91220305886031e-05, "loss": 1.3323, "step": 5618 }, { "epoch": 1.012204458455303, "grad_norm": 1.382542610168457, "learning_rate": 4.910787128263855e-05, "loss": 1.6322, "step": 5619 }, { "epoch": 1.0123845980634991, "grad_norm": 1.3107426166534424, "learning_rate": 4.909371204824014e-05, "loss": 1.6006, "step": 5620 }, { "epoch": 1.0125647376716955, "grad_norm": 1.3757141828536987, "learning_rate": 4.9079552886543706e-05, "loss": 1.4538, "step": 5621 }, { "epoch": 1.012744877279892, "grad_norm": 1.3708990812301636, "learning_rate": 4.906539379868509e-05, "loss": 1.5539, "step": 5622 }, { "epoch": 1.0129250168880883, "grad_norm": 1.3575074672698975, "learning_rate": 4.905123478580014e-05, "loss": 1.427, "step": 5623 }, { "epoch": 1.0131051564962845, "grad_norm": 1.1885992288589478, "learning_rate": 4.903707584902465e-05, "loss": 1.1832, "step": 5624 }, { "epoch": 1.013285296104481, "grad_norm": 1.426460862159729, "learning_rate": 4.902291698949448e-05, "loss": 1.6044, "step": 5625 }, { "epoch": 1.0134654357126773, "grad_norm": 1.3127444982528687, "learning_rate": 4.900875820834544e-05, "loss": 1.2975, "step": 5626 }, { "epoch": 1.0136455753208737, "grad_norm": 1.4574682712554932, "learning_rate": 4.899459950671333e-05, "loss": 1.3662, "step": 5627 }, { "epoch": 1.01382571492907, "grad_norm": 1.389467477798462, "learning_rate": 4.898044088573395e-05, "loss": 1.3365, "step": 5628 }, { "epoch": 1.0140058545372663, "grad_norm": 1.3078991174697876, "learning_rate": 4.896628234654311e-05, "loss": 1.403, "step": 5629 }, { "epoch": 1.0141859941454627, "grad_norm": 1.4988057613372803, "learning_rate": 4.89521238902766e-05, "loss": 1.6475, "step": 5630 }, { "epoch": 1.0143661337536591, "grad_norm": 1.3802160024642944, "learning_rate": 4.8937965518070195e-05, "loss": 1.3365, "step": 5631 }, { "epoch": 1.0145462733618555, "grad_norm": 1.4270375967025757, "learning_rate": 4.8923807231059685e-05, "loss": 1.4756, "step": 5632 }, { "epoch": 1.0147264129700517, "grad_norm": 1.396551251411438, "learning_rate": 4.890964903038081e-05, "loss": 1.4774, "step": 5633 }, { "epoch": 1.0149065525782481, "grad_norm": 1.4541343450546265, "learning_rate": 4.8895490917169395e-05, "loss": 1.5586, "step": 5634 }, { "epoch": 1.0150866921864445, "grad_norm": 1.3801171779632568, "learning_rate": 4.888133289256115e-05, "loss": 1.3555, "step": 5635 }, { "epoch": 1.015266831794641, "grad_norm": 1.4155704975128174, "learning_rate": 4.886717495769183e-05, "loss": 1.5705, "step": 5636 }, { "epoch": 1.0154469714028371, "grad_norm": 1.3111226558685303, "learning_rate": 4.8853017113697186e-05, "loss": 1.2058, "step": 5637 }, { "epoch": 1.0156271110110335, "grad_norm": 1.4586259126663208, "learning_rate": 4.883885936171293e-05, "loss": 1.5129, "step": 5638 }, { "epoch": 1.01580725061923, "grad_norm": 1.357161521911621, "learning_rate": 4.8824701702874834e-05, "loss": 1.3479, "step": 5639 }, { "epoch": 1.0159873902274263, "grad_norm": 1.5355088710784912, "learning_rate": 4.881054413831859e-05, "loss": 1.7294, "step": 5640 }, { "epoch": 1.0161675298356225, "grad_norm": 1.4661026000976562, "learning_rate": 4.879638666917993e-05, "loss": 1.5689, "step": 5641 }, { "epoch": 1.016347669443819, "grad_norm": 1.311046838760376, "learning_rate": 4.878222929659453e-05, "loss": 1.3014, "step": 5642 }, { "epoch": 1.0165278090520153, "grad_norm": 1.6099871397018433, "learning_rate": 4.8768072021698085e-05, "loss": 1.6142, "step": 5643 }, { "epoch": 1.0167079486602117, "grad_norm": 1.4108574390411377, "learning_rate": 4.875391484562632e-05, "loss": 1.3236, "step": 5644 }, { "epoch": 1.016888088268408, "grad_norm": 1.4383701086044312, "learning_rate": 4.873975776951491e-05, "loss": 1.2987, "step": 5645 }, { "epoch": 1.0170682278766043, "grad_norm": 1.5219625234603882, "learning_rate": 4.872560079449951e-05, "loss": 1.6096, "step": 5646 }, { "epoch": 1.0172483674848007, "grad_norm": 1.450549602508545, "learning_rate": 4.871144392171578e-05, "loss": 1.3545, "step": 5647 }, { "epoch": 1.0174285070929971, "grad_norm": 1.5298703908920288, "learning_rate": 4.869728715229941e-05, "loss": 1.5569, "step": 5648 }, { "epoch": 1.0176086467011933, "grad_norm": 1.4178144931793213, "learning_rate": 4.868313048738602e-05, "loss": 1.1251, "step": 5649 }, { "epoch": 1.0177887863093897, "grad_norm": 1.3194656372070312, "learning_rate": 4.866897392811126e-05, "loss": 1.2814, "step": 5650 }, { "epoch": 1.0179689259175861, "grad_norm": 1.3082959651947021, "learning_rate": 4.8654817475610776e-05, "loss": 1.0645, "step": 5651 }, { "epoch": 1.0181490655257825, "grad_norm": 1.2600536346435547, "learning_rate": 4.864066113102016e-05, "loss": 1.658, "step": 5652 }, { "epoch": 1.0183292051339787, "grad_norm": 1.3712444305419922, "learning_rate": 4.862650489547506e-05, "loss": 1.9719, "step": 5653 }, { "epoch": 1.0185093447421751, "grad_norm": 1.3431187868118286, "learning_rate": 4.8612348770111066e-05, "loss": 1.6319, "step": 5654 }, { "epoch": 1.0186894843503715, "grad_norm": 1.2910689115524292, "learning_rate": 4.8598192756063774e-05, "loss": 1.9287, "step": 5655 }, { "epoch": 1.018869623958568, "grad_norm": 1.3449338674545288, "learning_rate": 4.8584036854468785e-05, "loss": 1.7589, "step": 5656 }, { "epoch": 1.0190497635667644, "grad_norm": 1.3084237575531006, "learning_rate": 4.856988106646165e-05, "loss": 1.7129, "step": 5657 }, { "epoch": 1.0192299031749605, "grad_norm": 1.4933860301971436, "learning_rate": 4.855572539317798e-05, "loss": 1.7005, "step": 5658 }, { "epoch": 1.019410042783157, "grad_norm": 1.3637412786483765, "learning_rate": 4.8541569835753306e-05, "loss": 1.5829, "step": 5659 }, { "epoch": 1.0195901823913534, "grad_norm": 1.538561463356018, "learning_rate": 4.8527414395323196e-05, "loss": 1.7994, "step": 5660 }, { "epoch": 1.0197703219995498, "grad_norm": 1.685618281364441, "learning_rate": 4.851325907302317e-05, "loss": 1.7449, "step": 5661 }, { "epoch": 1.019950461607746, "grad_norm": 1.3408700227737427, "learning_rate": 4.849910386998879e-05, "loss": 1.5771, "step": 5662 }, { "epoch": 1.0201306012159423, "grad_norm": 1.4169869422912598, "learning_rate": 4.8484948787355565e-05, "loss": 1.4825, "step": 5663 }, { "epoch": 1.0203107408241388, "grad_norm": 1.2307233810424805, "learning_rate": 4.8470793826259015e-05, "loss": 1.3632, "step": 5664 }, { "epoch": 1.0204908804323352, "grad_norm": 1.3273478746414185, "learning_rate": 4.8456638987834635e-05, "loss": 1.3481, "step": 5665 }, { "epoch": 1.0206710200405313, "grad_norm": 1.2714406251907349, "learning_rate": 4.84424842732179e-05, "loss": 1.6143, "step": 5666 }, { "epoch": 1.0208511596487277, "grad_norm": 1.3952585458755493, "learning_rate": 4.842832968354436e-05, "loss": 1.5994, "step": 5667 }, { "epoch": 1.0210312992569242, "grad_norm": 1.3672749996185303, "learning_rate": 4.841417521994943e-05, "loss": 1.5182, "step": 5668 }, { "epoch": 1.0212114388651206, "grad_norm": 1.3039138317108154, "learning_rate": 4.8400020883568594e-05, "loss": 1.4702, "step": 5669 }, { "epoch": 1.0213915784733167, "grad_norm": 1.3019334077835083, "learning_rate": 4.838586667553728e-05, "loss": 1.3222, "step": 5670 }, { "epoch": 1.0215717180815131, "grad_norm": 1.3701164722442627, "learning_rate": 4.837171259699097e-05, "loss": 1.5042, "step": 5671 }, { "epoch": 1.0217518576897096, "grad_norm": 1.2783962488174438, "learning_rate": 4.8357558649065096e-05, "loss": 1.3902, "step": 5672 }, { "epoch": 1.021931997297906, "grad_norm": 1.3674273490905762, "learning_rate": 4.8343404832895066e-05, "loss": 1.3956, "step": 5673 }, { "epoch": 1.0221121369061021, "grad_norm": 1.4398524761199951, "learning_rate": 4.832925114961629e-05, "loss": 1.6261, "step": 5674 }, { "epoch": 1.0222922765142985, "grad_norm": 1.490523338317871, "learning_rate": 4.8315097600364145e-05, "loss": 1.618, "step": 5675 }, { "epoch": 1.022472416122495, "grad_norm": 1.2867107391357422, "learning_rate": 4.8300944186274076e-05, "loss": 1.2315, "step": 5676 }, { "epoch": 1.0226525557306914, "grad_norm": 1.539480209350586, "learning_rate": 4.828679090848143e-05, "loss": 1.5947, "step": 5677 }, { "epoch": 1.0228326953388875, "grad_norm": 1.5215333700180054, "learning_rate": 4.827263776812159e-05, "loss": 1.4624, "step": 5678 }, { "epoch": 1.023012834947084, "grad_norm": 1.510625958442688, "learning_rate": 4.825848476632992e-05, "loss": 1.4863, "step": 5679 }, { "epoch": 1.0231929745552804, "grad_norm": 1.3307843208312988, "learning_rate": 4.824433190424172e-05, "loss": 1.2927, "step": 5680 }, { "epoch": 1.0233731141634768, "grad_norm": 1.5096434354782104, "learning_rate": 4.8230179182992375e-05, "loss": 1.4389, "step": 5681 }, { "epoch": 1.023553253771673, "grad_norm": 1.4708826541900635, "learning_rate": 4.82160266037172e-05, "loss": 1.4199, "step": 5682 }, { "epoch": 1.0237333933798693, "grad_norm": 1.5468809604644775, "learning_rate": 4.82018741675515e-05, "loss": 1.6074, "step": 5683 }, { "epoch": 1.0239135329880658, "grad_norm": 1.3394520282745361, "learning_rate": 4.8187721875630565e-05, "loss": 1.2816, "step": 5684 }, { "epoch": 1.0240936725962622, "grad_norm": 1.495758295059204, "learning_rate": 4.817356972908971e-05, "loss": 1.6142, "step": 5685 }, { "epoch": 1.0242738122044583, "grad_norm": 1.5061441659927368, "learning_rate": 4.815941772906421e-05, "loss": 1.5169, "step": 5686 }, { "epoch": 1.0244539518126548, "grad_norm": 1.2849524021148682, "learning_rate": 4.814526587668931e-05, "loss": 1.3454, "step": 5687 }, { "epoch": 1.0246340914208512, "grad_norm": 1.5339057445526123, "learning_rate": 4.8131114173100286e-05, "loss": 1.3045, "step": 5688 }, { "epoch": 1.0248142310290476, "grad_norm": 1.389390230178833, "learning_rate": 4.811696261943236e-05, "loss": 1.3284, "step": 5689 }, { "epoch": 1.024994370637244, "grad_norm": 1.5021467208862305, "learning_rate": 4.810281121682079e-05, "loss": 1.5879, "step": 5690 }, { "epoch": 1.0251745102454402, "grad_norm": 1.5365718603134155, "learning_rate": 4.8088659966400774e-05, "loss": 1.641, "step": 5691 }, { "epoch": 1.0253546498536366, "grad_norm": 1.3402938842773438, "learning_rate": 4.8074508869307526e-05, "loss": 1.1189, "step": 5692 }, { "epoch": 1.025534789461833, "grad_norm": 1.5072957277297974, "learning_rate": 4.8060357926676244e-05, "loss": 1.7261, "step": 5693 }, { "epoch": 1.0257149290700294, "grad_norm": 1.3815417289733887, "learning_rate": 4.804620713964208e-05, "loss": 1.2545, "step": 5694 }, { "epoch": 1.0258950686782256, "grad_norm": 1.4508488178253174, "learning_rate": 4.8032056509340254e-05, "loss": 1.4157, "step": 5695 }, { "epoch": 1.026075208286422, "grad_norm": 1.3612934350967407, "learning_rate": 4.801790603690589e-05, "loss": 1.2882, "step": 5696 }, { "epoch": 1.0262553478946184, "grad_norm": 1.4334169626235962, "learning_rate": 4.800375572347414e-05, "loss": 1.2776, "step": 5697 }, { "epoch": 1.0264354875028148, "grad_norm": 1.533494234085083, "learning_rate": 4.7989605570180106e-05, "loss": 1.6476, "step": 5698 }, { "epoch": 1.026615627111011, "grad_norm": 1.666290283203125, "learning_rate": 4.797545557815897e-05, "loss": 1.6083, "step": 5699 }, { "epoch": 1.0267957667192074, "grad_norm": 1.4549219608306885, "learning_rate": 4.7961305748545795e-05, "loss": 1.2693, "step": 5700 }, { "epoch": 1.0269759063274038, "grad_norm": 1.545725703239441, "learning_rate": 4.794715608247567e-05, "loss": 1.5388, "step": 5701 }, { "epoch": 1.0271560459356002, "grad_norm": 1.2349741458892822, "learning_rate": 4.79330065810837e-05, "loss": 1.843, "step": 5702 }, { "epoch": 1.0273361855437964, "grad_norm": 1.222366452217102, "learning_rate": 4.791885724550489e-05, "loss": 1.7908, "step": 5703 }, { "epoch": 1.0275163251519928, "grad_norm": 1.40467369556427, "learning_rate": 4.7904708076874383e-05, "loss": 1.9344, "step": 5704 }, { "epoch": 1.0276964647601892, "grad_norm": 1.4209883213043213, "learning_rate": 4.7890559076327175e-05, "loss": 1.8369, "step": 5705 }, { "epoch": 1.0278766043683856, "grad_norm": 1.4214222431182861, "learning_rate": 4.787641024499828e-05, "loss": 1.9501, "step": 5706 }, { "epoch": 1.0280567439765818, "grad_norm": 1.3862560987472534, "learning_rate": 4.7862261584022716e-05, "loss": 1.9316, "step": 5707 }, { "epoch": 1.0282368835847782, "grad_norm": 1.3517098426818848, "learning_rate": 4.7848113094535466e-05, "loss": 1.5606, "step": 5708 }, { "epoch": 1.0284170231929746, "grad_norm": 1.4003384113311768, "learning_rate": 4.7833964777671565e-05, "loss": 1.8409, "step": 5709 }, { "epoch": 1.028597162801171, "grad_norm": 1.5580077171325684, "learning_rate": 4.781981663456595e-05, "loss": 1.897, "step": 5710 }, { "epoch": 1.0287773024093672, "grad_norm": 1.5970946550369263, "learning_rate": 4.7805668666353584e-05, "loss": 1.9684, "step": 5711 }, { "epoch": 1.0289574420175636, "grad_norm": 1.526471495628357, "learning_rate": 4.7791520874169395e-05, "loss": 1.6483, "step": 5712 }, { "epoch": 1.02913758162576, "grad_norm": 1.3739126920700073, "learning_rate": 4.7777373259148335e-05, "loss": 1.7123, "step": 5713 }, { "epoch": 1.0293177212339564, "grad_norm": 1.33206045627594, "learning_rate": 4.776322582242531e-05, "loss": 1.261, "step": 5714 }, { "epoch": 1.0294978608421528, "grad_norm": 1.2750029563903809, "learning_rate": 4.7749078565135225e-05, "loss": 1.4291, "step": 5715 }, { "epoch": 1.029678000450349, "grad_norm": 1.3429150581359863, "learning_rate": 4.773493148841296e-05, "loss": 1.4563, "step": 5716 }, { "epoch": 1.0298581400585454, "grad_norm": 1.4499880075454712, "learning_rate": 4.772078459339338e-05, "loss": 1.5551, "step": 5717 }, { "epoch": 1.0300382796667418, "grad_norm": 1.3189891576766968, "learning_rate": 4.7706637881211346e-05, "loss": 1.2808, "step": 5718 }, { "epoch": 1.0302184192749382, "grad_norm": 1.411763072013855, "learning_rate": 4.769249135300172e-05, "loss": 1.527, "step": 5719 }, { "epoch": 1.0303985588831344, "grad_norm": 1.4098269939422607, "learning_rate": 4.7678345009899305e-05, "loss": 1.5637, "step": 5720 }, { "epoch": 1.0305786984913308, "grad_norm": 1.2264735698699951, "learning_rate": 4.7664198853038924e-05, "loss": 1.3567, "step": 5721 }, { "epoch": 1.0307588380995272, "grad_norm": 1.4814525842666626, "learning_rate": 4.765005288355536e-05, "loss": 1.806, "step": 5722 }, { "epoch": 1.0309389777077236, "grad_norm": 1.3225632905960083, "learning_rate": 4.763590710258343e-05, "loss": 1.3965, "step": 5723 }, { "epoch": 1.0311191173159198, "grad_norm": 1.2582075595855713, "learning_rate": 4.762176151125787e-05, "loss": 1.5195, "step": 5724 }, { "epoch": 1.0312992569241162, "grad_norm": 1.3020389080047607, "learning_rate": 4.7607616110713446e-05, "loss": 1.3946, "step": 5725 }, { "epoch": 1.0314793965323126, "grad_norm": 1.3780348300933838, "learning_rate": 4.759347090208489e-05, "loss": 1.5789, "step": 5726 }, { "epoch": 1.031659536140509, "grad_norm": 1.4056990146636963, "learning_rate": 4.757932588650693e-05, "loss": 1.3888, "step": 5727 }, { "epoch": 1.0318396757487052, "grad_norm": 1.4529263973236084, "learning_rate": 4.7565181065114267e-05, "loss": 1.5747, "step": 5728 }, { "epoch": 1.0320198153569016, "grad_norm": 1.5221878290176392, "learning_rate": 4.755103643904159e-05, "loss": 1.5775, "step": 5729 }, { "epoch": 1.032199954965098, "grad_norm": 1.277483582496643, "learning_rate": 4.753689200942358e-05, "loss": 1.2928, "step": 5730 }, { "epoch": 1.0323800945732944, "grad_norm": 1.3328691720962524, "learning_rate": 4.752274777739488e-05, "loss": 1.2877, "step": 5731 }, { "epoch": 1.0325602341814906, "grad_norm": 1.3660414218902588, "learning_rate": 4.750860374409017e-05, "loss": 1.558, "step": 5732 }, { "epoch": 1.032740373789687, "grad_norm": 1.5818179845809937, "learning_rate": 4.749445991064404e-05, "loss": 1.6719, "step": 5733 }, { "epoch": 1.0329205133978834, "grad_norm": 1.4644523859024048, "learning_rate": 4.748031627819112e-05, "loss": 1.5023, "step": 5734 }, { "epoch": 1.0331006530060798, "grad_norm": 1.5040640830993652, "learning_rate": 4.746617284786601e-05, "loss": 1.7585, "step": 5735 }, { "epoch": 1.033280792614276, "grad_norm": 1.3353251218795776, "learning_rate": 4.7452029620803246e-05, "loss": 1.3647, "step": 5736 }, { "epoch": 1.0334609322224724, "grad_norm": 1.4378539323806763, "learning_rate": 4.743788659813745e-05, "loss": 1.5239, "step": 5737 }, { "epoch": 1.0336410718306688, "grad_norm": 1.3207393884658813, "learning_rate": 4.7423743781003153e-05, "loss": 1.3488, "step": 5738 }, { "epoch": 1.0338212114388652, "grad_norm": 1.3925564289093018, "learning_rate": 4.740960117053487e-05, "loss": 1.4494, "step": 5739 }, { "epoch": 1.0340013510470614, "grad_norm": 1.521926999092102, "learning_rate": 4.739545876786709e-05, "loss": 1.5359, "step": 5740 }, { "epoch": 1.0341814906552578, "grad_norm": 1.3042010068893433, "learning_rate": 4.7381316574134365e-05, "loss": 1.2302, "step": 5741 }, { "epoch": 1.0343616302634542, "grad_norm": 1.4988808631896973, "learning_rate": 4.7367174590471145e-05, "loss": 1.3834, "step": 5742 }, { "epoch": 1.0345417698716506, "grad_norm": 1.567260503768921, "learning_rate": 4.73530328180119e-05, "loss": 1.5681, "step": 5743 }, { "epoch": 1.0347219094798468, "grad_norm": 1.4177359342575073, "learning_rate": 4.7338891257891084e-05, "loss": 1.398, "step": 5744 }, { "epoch": 1.0349020490880432, "grad_norm": 1.4603091478347778, "learning_rate": 4.7324749911243085e-05, "loss": 1.4455, "step": 5745 }, { "epoch": 1.0350821886962396, "grad_norm": 1.4425009489059448, "learning_rate": 4.731060877920236e-05, "loss": 1.2133, "step": 5746 }, { "epoch": 1.035262328304436, "grad_norm": 1.3918821811676025, "learning_rate": 4.7296467862903295e-05, "loss": 1.3172, "step": 5747 }, { "epoch": 1.0354424679126324, "grad_norm": 1.505253791809082, "learning_rate": 4.728232716348025e-05, "loss": 1.5475, "step": 5748 }, { "epoch": 1.0356226075208286, "grad_norm": 1.3155560493469238, "learning_rate": 4.726818668206761e-05, "loss": 1.1824, "step": 5749 }, { "epoch": 1.035802747129025, "grad_norm": 1.3571481704711914, "learning_rate": 4.725404641979969e-05, "loss": 1.3126, "step": 5750 }, { "epoch": 1.0359828867372214, "grad_norm": 1.384545922279358, "learning_rate": 4.723990637781084e-05, "loss": 1.3437, "step": 5751 }, { "epoch": 1.0361630263454178, "grad_norm": 1.3022403717041016, "learning_rate": 4.7225766557235354e-05, "loss": 1.649, "step": 5752 }, { "epoch": 1.036343165953614, "grad_norm": 1.3195267915725708, "learning_rate": 4.7211626959207524e-05, "loss": 1.8459, "step": 5753 }, { "epoch": 1.0365233055618104, "grad_norm": 1.3528718948364258, "learning_rate": 4.719748758486161e-05, "loss": 1.9176, "step": 5754 }, { "epoch": 1.0367034451700068, "grad_norm": 1.3536591529846191, "learning_rate": 4.718334843533189e-05, "loss": 1.9696, "step": 5755 }, { "epoch": 1.0368835847782032, "grad_norm": 1.287377953529358, "learning_rate": 4.7169209511752574e-05, "loss": 1.5935, "step": 5756 }, { "epoch": 1.0370637243863994, "grad_norm": 1.296950340270996, "learning_rate": 4.7155070815257905e-05, "loss": 1.7355, "step": 5757 }, { "epoch": 1.0372438639945958, "grad_norm": 1.4993720054626465, "learning_rate": 4.714093234698207e-05, "loss": 1.9432, "step": 5758 }, { "epoch": 1.0374240036027922, "grad_norm": 1.4126299619674683, "learning_rate": 4.712679410805923e-05, "loss": 1.7028, "step": 5759 }, { "epoch": 1.0376041432109886, "grad_norm": 1.6363420486450195, "learning_rate": 4.7112656099623584e-05, "loss": 1.9907, "step": 5760 }, { "epoch": 1.0377842828191848, "grad_norm": 1.7183916568756104, "learning_rate": 4.709851832280926e-05, "loss": 2.1713, "step": 5761 }, { "epoch": 1.0379644224273812, "grad_norm": 1.4752726554870605, "learning_rate": 4.708438077875038e-05, "loss": 1.5608, "step": 5762 }, { "epoch": 1.0381445620355776, "grad_norm": 1.3664577007293701, "learning_rate": 4.707024346858105e-05, "loss": 1.5523, "step": 5763 }, { "epoch": 1.038324701643774, "grad_norm": 1.333034873008728, "learning_rate": 4.705610639343536e-05, "loss": 1.5955, "step": 5764 }, { "epoch": 1.0385048412519702, "grad_norm": 1.3754521608352661, "learning_rate": 4.704196955444739e-05, "loss": 1.6484, "step": 5765 }, { "epoch": 1.0386849808601666, "grad_norm": 1.2431342601776123, "learning_rate": 4.7027832952751175e-05, "loss": 1.3816, "step": 5766 }, { "epoch": 1.038865120468363, "grad_norm": 1.32204008102417, "learning_rate": 4.701369658948076e-05, "loss": 1.4545, "step": 5767 }, { "epoch": 1.0390452600765594, "grad_norm": 1.2806951999664307, "learning_rate": 4.699956046577012e-05, "loss": 1.2847, "step": 5768 }, { "epoch": 1.0392253996847556, "grad_norm": 1.33052659034729, "learning_rate": 4.698542458275331e-05, "loss": 1.4433, "step": 5769 }, { "epoch": 1.039405539292952, "grad_norm": 1.435530662536621, "learning_rate": 4.697128894156428e-05, "loss": 1.6555, "step": 5770 }, { "epoch": 1.0395856789011484, "grad_norm": 1.381945252418518, "learning_rate": 4.6957153543336965e-05, "loss": 1.5089, "step": 5771 }, { "epoch": 1.0397658185093448, "grad_norm": 1.315426230430603, "learning_rate": 4.694301838920531e-05, "loss": 1.3656, "step": 5772 }, { "epoch": 1.0399459581175412, "grad_norm": 1.4077342748641968, "learning_rate": 4.6928883480303206e-05, "loss": 1.4299, "step": 5773 }, { "epoch": 1.0401260977257374, "grad_norm": 1.274634599685669, "learning_rate": 4.6914748817764596e-05, "loss": 1.2912, "step": 5774 }, { "epoch": 1.0403062373339338, "grad_norm": 1.373408555984497, "learning_rate": 4.6900614402723334e-05, "loss": 1.44, "step": 5775 }, { "epoch": 1.0404863769421302, "grad_norm": 1.3952643871307373, "learning_rate": 4.688648023631327e-05, "loss": 1.4443, "step": 5776 }, { "epoch": 1.0406665165503266, "grad_norm": 1.4027163982391357, "learning_rate": 4.687234631966826e-05, "loss": 1.5012, "step": 5777 }, { "epoch": 1.0408466561585228, "grad_norm": 1.400055170059204, "learning_rate": 4.685821265392208e-05, "loss": 1.489, "step": 5778 }, { "epoch": 1.0410267957667192, "grad_norm": 1.3491933345794678, "learning_rate": 4.684407924020856e-05, "loss": 1.3453, "step": 5779 }, { "epoch": 1.0412069353749156, "grad_norm": 1.4953827857971191, "learning_rate": 4.682994607966147e-05, "loss": 1.6989, "step": 5780 }, { "epoch": 1.041387074983112, "grad_norm": 1.3640010356903076, "learning_rate": 4.681581317341457e-05, "loss": 1.4085, "step": 5781 }, { "epoch": 1.0415672145913082, "grad_norm": 1.3998982906341553, "learning_rate": 4.680168052260156e-05, "loss": 1.4891, "step": 5782 }, { "epoch": 1.0417473541995046, "grad_norm": 1.3895014524459839, "learning_rate": 4.6787548128356205e-05, "loss": 1.4404, "step": 5783 }, { "epoch": 1.041927493807701, "grad_norm": 1.475217342376709, "learning_rate": 4.677341599181217e-05, "loss": 1.6394, "step": 5784 }, { "epoch": 1.0421076334158974, "grad_norm": 1.3195574283599854, "learning_rate": 4.675928411410313e-05, "loss": 1.2674, "step": 5785 }, { "epoch": 1.0422877730240936, "grad_norm": 1.4098390340805054, "learning_rate": 4.674515249636274e-05, "loss": 1.3154, "step": 5786 }, { "epoch": 1.04246791263229, "grad_norm": 1.3156152963638306, "learning_rate": 4.6731021139724625e-05, "loss": 1.3817, "step": 5787 }, { "epoch": 1.0426480522404864, "grad_norm": 1.2982183694839478, "learning_rate": 4.671689004532241e-05, "loss": 1.3423, "step": 5788 }, { "epoch": 1.0428281918486828, "grad_norm": 1.4725338220596313, "learning_rate": 4.670275921428967e-05, "loss": 1.3816, "step": 5789 }, { "epoch": 1.043008331456879, "grad_norm": 1.4499729871749878, "learning_rate": 4.668862864775999e-05, "loss": 1.4162, "step": 5790 }, { "epoch": 1.0431884710650754, "grad_norm": 1.3309400081634521, "learning_rate": 4.667449834686689e-05, "loss": 1.3185, "step": 5791 }, { "epoch": 1.0433686106732718, "grad_norm": 1.342346429824829, "learning_rate": 4.666036831274392e-05, "loss": 1.2237, "step": 5792 }, { "epoch": 1.0435487502814682, "grad_norm": 1.3679298162460327, "learning_rate": 4.664623854652459e-05, "loss": 1.3494, "step": 5793 }, { "epoch": 1.0437288898896644, "grad_norm": 1.3507254123687744, "learning_rate": 4.663210904934237e-05, "loss": 1.3174, "step": 5794 }, { "epoch": 1.0439090294978608, "grad_norm": 1.377913475036621, "learning_rate": 4.661797982233071e-05, "loss": 1.2674, "step": 5795 }, { "epoch": 1.0440891691060572, "grad_norm": 1.5450230836868286, "learning_rate": 4.6603850866623037e-05, "loss": 1.5028, "step": 5796 }, { "epoch": 1.0442693087142536, "grad_norm": 1.4675216674804688, "learning_rate": 4.658972218335284e-05, "loss": 1.4726, "step": 5797 }, { "epoch": 1.0444494483224498, "grad_norm": 1.5356272459030151, "learning_rate": 4.657559377365345e-05, "loss": 1.2549, "step": 5798 }, { "epoch": 1.0446295879306462, "grad_norm": 1.4964866638183594, "learning_rate": 4.656146563865826e-05, "loss": 1.4362, "step": 5799 }, { "epoch": 1.0448097275388426, "grad_norm": 1.5085233449935913, "learning_rate": 4.654733777950062e-05, "loss": 1.4774, "step": 5800 }, { "epoch": 1.044989867147039, "grad_norm": 1.521103858947754, "learning_rate": 4.653321019731384e-05, "loss": 1.4785, "step": 5801 }, { "epoch": 1.0451700067552354, "grad_norm": 1.3913700580596924, "learning_rate": 4.6519082893231273e-05, "loss": 1.8465, "step": 5802 }, { "epoch": 1.0453501463634316, "grad_norm": 1.3035120964050293, "learning_rate": 4.6504955868386194e-05, "loss": 1.6488, "step": 5803 }, { "epoch": 1.045530285971628, "grad_norm": 1.3929150104522705, "learning_rate": 4.649082912391184e-05, "loss": 1.6733, "step": 5804 }, { "epoch": 1.0457104255798244, "grad_norm": 1.2299821376800537, "learning_rate": 4.647670266094144e-05, "loss": 1.5208, "step": 5805 }, { "epoch": 1.0458905651880208, "grad_norm": 1.302491545677185, "learning_rate": 4.646257648060826e-05, "loss": 1.8307, "step": 5806 }, { "epoch": 1.046070704796217, "grad_norm": 1.3841044902801514, "learning_rate": 4.644845058404547e-05, "loss": 1.6925, "step": 5807 }, { "epoch": 1.0462508444044134, "grad_norm": 1.4533723592758179, "learning_rate": 4.643432497238625e-05, "loss": 2.1432, "step": 5808 }, { "epoch": 1.0464309840126098, "grad_norm": 1.8999817371368408, "learning_rate": 4.6420199646763754e-05, "loss": 1.8443, "step": 5809 }, { "epoch": 1.0466111236208062, "grad_norm": 1.6552634239196777, "learning_rate": 4.6406074608311055e-05, "loss": 1.6317, "step": 5810 }, { "epoch": 1.0467912632290024, "grad_norm": 1.7162585258483887, "learning_rate": 4.639194985816133e-05, "loss": 2.0495, "step": 5811 }, { "epoch": 1.0469714028371988, "grad_norm": 1.5610268115997314, "learning_rate": 4.6377825397447635e-05, "loss": 1.8608, "step": 5812 }, { "epoch": 1.0471515424453952, "grad_norm": 1.3679322004318237, "learning_rate": 4.636370122730302e-05, "loss": 1.5618, "step": 5813 }, { "epoch": 1.0473316820535916, "grad_norm": 1.4862409830093384, "learning_rate": 4.634957734886052e-05, "loss": 1.6521, "step": 5814 }, { "epoch": 1.0475118216617878, "grad_norm": 1.3192274570465088, "learning_rate": 4.633545376325313e-05, "loss": 1.4314, "step": 5815 }, { "epoch": 1.0476919612699842, "grad_norm": 1.335257887840271, "learning_rate": 4.632133047161387e-05, "loss": 1.3611, "step": 5816 }, { "epoch": 1.0478721008781806, "grad_norm": 1.3837766647338867, "learning_rate": 4.630720747507569e-05, "loss": 1.4771, "step": 5817 }, { "epoch": 1.048052240486377, "grad_norm": 1.3603489398956299, "learning_rate": 4.6293084774771525e-05, "loss": 1.3864, "step": 5818 }, { "epoch": 1.0482323800945732, "grad_norm": 1.3667148351669312, "learning_rate": 4.627896237183428e-05, "loss": 1.4564, "step": 5819 }, { "epoch": 1.0484125197027696, "grad_norm": 1.3413883447647095, "learning_rate": 4.626484026739688e-05, "loss": 1.4014, "step": 5820 }, { "epoch": 1.048592659310966, "grad_norm": 1.3456906080245972, "learning_rate": 4.625071846259217e-05, "loss": 1.4981, "step": 5821 }, { "epoch": 1.0487727989191624, "grad_norm": 1.468139886856079, "learning_rate": 4.6236596958553e-05, "loss": 1.5266, "step": 5822 }, { "epoch": 1.0489529385273586, "grad_norm": 1.2669085264205933, "learning_rate": 4.6222475756412184e-05, "loss": 1.4241, "step": 5823 }, { "epoch": 1.049133078135555, "grad_norm": 1.2660475969314575, "learning_rate": 4.6208354857302514e-05, "loss": 1.3017, "step": 5824 }, { "epoch": 1.0493132177437514, "grad_norm": 1.2904064655303955, "learning_rate": 4.6194234262356785e-05, "loss": 1.4019, "step": 5825 }, { "epoch": 1.0494933573519478, "grad_norm": 1.3185572624206543, "learning_rate": 4.618011397270773e-05, "loss": 1.5035, "step": 5826 }, { "epoch": 1.049673496960144, "grad_norm": 1.4489637613296509, "learning_rate": 4.6165993989488065e-05, "loss": 1.522, "step": 5827 }, { "epoch": 1.0498536365683404, "grad_norm": 1.3474037647247314, "learning_rate": 4.61518743138305e-05, "loss": 1.3995, "step": 5828 }, { "epoch": 1.0500337761765368, "grad_norm": 1.378036618232727, "learning_rate": 4.613775494686769e-05, "loss": 1.5432, "step": 5829 }, { "epoch": 1.0502139157847332, "grad_norm": 1.4725327491760254, "learning_rate": 4.612363588973231e-05, "loss": 1.1525, "step": 5830 }, { "epoch": 1.0503940553929296, "grad_norm": 1.4765253067016602, "learning_rate": 4.610951714355697e-05, "loss": 1.4696, "step": 5831 }, { "epoch": 1.0505741950011258, "grad_norm": 1.4648715257644653, "learning_rate": 4.609539870947427e-05, "loss": 1.4586, "step": 5832 }, { "epoch": 1.0507543346093222, "grad_norm": 1.3569397926330566, "learning_rate": 4.608128058861676e-05, "loss": 1.3504, "step": 5833 }, { "epoch": 1.0509344742175186, "grad_norm": 1.3940021991729736, "learning_rate": 4.6067162782117034e-05, "loss": 1.35, "step": 5834 }, { "epoch": 1.051114613825715, "grad_norm": 1.5747038125991821, "learning_rate": 4.605304529110761e-05, "loss": 1.6744, "step": 5835 }, { "epoch": 1.0512947534339112, "grad_norm": 1.4956122636795044, "learning_rate": 4.603892811672095e-05, "loss": 1.5604, "step": 5836 }, { "epoch": 1.0514748930421076, "grad_norm": 1.3946802616119385, "learning_rate": 4.6024811260089554e-05, "loss": 1.4269, "step": 5837 }, { "epoch": 1.051655032650304, "grad_norm": 1.3431092500686646, "learning_rate": 4.601069472234584e-05, "loss": 1.3158, "step": 5838 }, { "epoch": 1.0518351722585004, "grad_norm": 1.4149354696273804, "learning_rate": 4.5996578504622276e-05, "loss": 1.3701, "step": 5839 }, { "epoch": 1.0520153118666966, "grad_norm": 1.423290729522705, "learning_rate": 4.598246260805123e-05, "loss": 1.0842, "step": 5840 }, { "epoch": 1.052195451474893, "grad_norm": 1.358107089996338, "learning_rate": 4.596834703376509e-05, "loss": 1.3242, "step": 5841 }, { "epoch": 1.0523755910830894, "grad_norm": 1.4556576013565063, "learning_rate": 4.595423178289618e-05, "loss": 1.3552, "step": 5842 }, { "epoch": 1.0525557306912858, "grad_norm": 1.4829086065292358, "learning_rate": 4.59401168565768e-05, "loss": 1.3519, "step": 5843 }, { "epoch": 1.052735870299482, "grad_norm": 1.6029837131500244, "learning_rate": 4.5926002255939285e-05, "loss": 1.722, "step": 5844 }, { "epoch": 1.0529160099076784, "grad_norm": 1.5289759635925293, "learning_rate": 4.591188798211589e-05, "loss": 1.4243, "step": 5845 }, { "epoch": 1.0530961495158748, "grad_norm": 1.2917219400405884, "learning_rate": 4.589777403623885e-05, "loss": 1.1035, "step": 5846 }, { "epoch": 1.0532762891240712, "grad_norm": 1.4382274150848389, "learning_rate": 4.588366041944037e-05, "loss": 1.3642, "step": 5847 }, { "epoch": 1.0534564287322674, "grad_norm": 1.3584870100021362, "learning_rate": 4.586954713285265e-05, "loss": 1.2684, "step": 5848 }, { "epoch": 1.0536365683404638, "grad_norm": 1.4906058311462402, "learning_rate": 4.5855434177607855e-05, "loss": 1.2704, "step": 5849 }, { "epoch": 1.0538167079486602, "grad_norm": 1.67153000831604, "learning_rate": 4.5841321554838105e-05, "loss": 1.5062, "step": 5850 }, { "epoch": 1.0539968475568566, "grad_norm": 1.4223164319992065, "learning_rate": 4.582720926567552e-05, "loss": 1.2765, "step": 5851 }, { "epoch": 1.0541769871650528, "grad_norm": 1.279051423072815, "learning_rate": 4.5813097311252165e-05, "loss": 1.7388, "step": 5852 }, { "epoch": 1.0543571267732492, "grad_norm": 1.3545536994934082, "learning_rate": 4.579898569270011e-05, "loss": 1.9797, "step": 5853 }, { "epoch": 1.0545372663814456, "grad_norm": 1.3983289003372192, "learning_rate": 4.5784874411151376e-05, "loss": 1.9432, "step": 5854 }, { "epoch": 1.054717405989642, "grad_norm": 1.4242984056472778, "learning_rate": 4.577076346773797e-05, "loss": 1.9216, "step": 5855 }, { "epoch": 1.0548975455978384, "grad_norm": 1.3120312690734863, "learning_rate": 4.575665286359185e-05, "loss": 1.6461, "step": 5856 }, { "epoch": 1.0550776852060346, "grad_norm": 1.3882075548171997, "learning_rate": 4.574254259984496e-05, "loss": 1.8425, "step": 5857 }, { "epoch": 1.055257824814231, "grad_norm": 1.3838682174682617, "learning_rate": 4.5728432677629246e-05, "loss": 1.7617, "step": 5858 }, { "epoch": 1.0554379644224274, "grad_norm": 1.5086780786514282, "learning_rate": 4.5714323098076576e-05, "loss": 1.7995, "step": 5859 }, { "epoch": 1.0556181040306238, "grad_norm": 1.463242769241333, "learning_rate": 4.570021386231883e-05, "loss": 1.9989, "step": 5860 }, { "epoch": 1.05579824363882, "grad_norm": 1.5298558473587036, "learning_rate": 4.568610497148782e-05, "loss": 1.7859, "step": 5861 }, { "epoch": 1.0559783832470164, "grad_norm": 1.228330373764038, "learning_rate": 4.5671996426715366e-05, "loss": 1.4762, "step": 5862 }, { "epoch": 1.0561585228552128, "grad_norm": 1.23577880859375, "learning_rate": 4.5657888229133264e-05, "loss": 1.5135, "step": 5863 }, { "epoch": 1.0563386624634092, "grad_norm": 1.3579933643341064, "learning_rate": 4.564378037987325e-05, "loss": 1.5408, "step": 5864 }, { "epoch": 1.0565188020716054, "grad_norm": 1.2992329597473145, "learning_rate": 4.562967288006705e-05, "loss": 1.5238, "step": 5865 }, { "epoch": 1.0566989416798018, "grad_norm": 1.2924782037734985, "learning_rate": 4.561556573084634e-05, "loss": 1.3862, "step": 5866 }, { "epoch": 1.0568790812879982, "grad_norm": 1.387759804725647, "learning_rate": 4.560145893334283e-05, "loss": 1.4879, "step": 5867 }, { "epoch": 1.0570592208961946, "grad_norm": 1.2933061122894287, "learning_rate": 4.558735248868816e-05, "loss": 1.5111, "step": 5868 }, { "epoch": 1.0572393605043908, "grad_norm": 1.3263051509857178, "learning_rate": 4.5573246398013916e-05, "loss": 1.508, "step": 5869 }, { "epoch": 1.0574195001125872, "grad_norm": 1.3353958129882812, "learning_rate": 4.555914066245168e-05, "loss": 1.4281, "step": 5870 }, { "epoch": 1.0575996397207836, "grad_norm": 1.2505006790161133, "learning_rate": 4.5545035283133e-05, "loss": 1.4233, "step": 5871 }, { "epoch": 1.05777977932898, "grad_norm": 1.2882511615753174, "learning_rate": 4.553093026118944e-05, "loss": 1.3904, "step": 5872 }, { "epoch": 1.0579599189371762, "grad_norm": 1.498822569847107, "learning_rate": 4.5516825597752485e-05, "loss": 1.6502, "step": 5873 }, { "epoch": 1.0581400585453726, "grad_norm": 1.428572416305542, "learning_rate": 4.55027212939536e-05, "loss": 1.5253, "step": 5874 }, { "epoch": 1.058320198153569, "grad_norm": 1.2035534381866455, "learning_rate": 4.548861735092419e-05, "loss": 1.2131, "step": 5875 }, { "epoch": 1.0585003377617654, "grad_norm": 1.3449424505233765, "learning_rate": 4.547451376979572e-05, "loss": 1.4028, "step": 5876 }, { "epoch": 1.0586804773699616, "grad_norm": 1.3589001893997192, "learning_rate": 4.546041055169956e-05, "loss": 1.6379, "step": 5877 }, { "epoch": 1.058860616978158, "grad_norm": 1.363996148109436, "learning_rate": 4.5446307697767044e-05, "loss": 1.3631, "step": 5878 }, { "epoch": 1.0590407565863544, "grad_norm": 1.513213872909546, "learning_rate": 4.543220520912951e-05, "loss": 1.5737, "step": 5879 }, { "epoch": 1.0592208961945508, "grad_norm": 1.6133108139038086, "learning_rate": 4.541810308691824e-05, "loss": 1.752, "step": 5880 }, { "epoch": 1.059401035802747, "grad_norm": 1.4693034887313843, "learning_rate": 4.540400133226452e-05, "loss": 1.4857, "step": 5881 }, { "epoch": 1.0595811754109434, "grad_norm": 1.2282518148422241, "learning_rate": 4.538989994629957e-05, "loss": 1.1928, "step": 5882 }, { "epoch": 1.0597613150191398, "grad_norm": 1.4172661304473877, "learning_rate": 4.53757989301546e-05, "loss": 1.4922, "step": 5883 }, { "epoch": 1.0599414546273362, "grad_norm": 1.4527961015701294, "learning_rate": 4.5361698284960794e-05, "loss": 1.5314, "step": 5884 }, { "epoch": 1.0601215942355324, "grad_norm": 1.4484930038452148, "learning_rate": 4.534759801184928e-05, "loss": 1.4391, "step": 5885 }, { "epoch": 1.0603017338437288, "grad_norm": 1.4354983568191528, "learning_rate": 4.5333498111951196e-05, "loss": 1.386, "step": 5886 }, { "epoch": 1.0604818734519252, "grad_norm": 1.336592674255371, "learning_rate": 4.5319398586397616e-05, "loss": 1.2486, "step": 5887 }, { "epoch": 1.0606620130601216, "grad_norm": 1.3638192415237427, "learning_rate": 4.530529943631961e-05, "loss": 1.3237, "step": 5888 }, { "epoch": 1.060842152668318, "grad_norm": 1.3784631490707397, "learning_rate": 4.529120066284818e-05, "loss": 1.2917, "step": 5889 }, { "epoch": 1.0610222922765142, "grad_norm": 1.6139980554580688, "learning_rate": 4.527710226711435e-05, "loss": 1.7189, "step": 5890 }, { "epoch": 1.0612024318847106, "grad_norm": 1.4127531051635742, "learning_rate": 4.526300425024908e-05, "loss": 1.6182, "step": 5891 }, { "epoch": 1.061382571492907, "grad_norm": 1.4597697257995605, "learning_rate": 4.52489066133833e-05, "loss": 1.2911, "step": 5892 }, { "epoch": 1.0615627111011035, "grad_norm": 1.6121646165847778, "learning_rate": 4.523480935764792e-05, "loss": 1.7029, "step": 5893 }, { "epoch": 1.0617428507092996, "grad_norm": 1.3753849267959595, "learning_rate": 4.5220712484173796e-05, "loss": 1.2042, "step": 5894 }, { "epoch": 1.061922990317496, "grad_norm": 1.5212903022766113, "learning_rate": 4.52066159940918e-05, "loss": 1.6569, "step": 5895 }, { "epoch": 1.0621031299256924, "grad_norm": 1.5773612260818481, "learning_rate": 4.5192519888532734e-05, "loss": 1.4344, "step": 5896 }, { "epoch": 1.0622832695338889, "grad_norm": 1.4069286584854126, "learning_rate": 4.517842416862739e-05, "loss": 1.3726, "step": 5897 }, { "epoch": 1.062463409142085, "grad_norm": 1.7123864889144897, "learning_rate": 4.5164328835506476e-05, "loss": 1.5374, "step": 5898 }, { "epoch": 1.0626435487502814, "grad_norm": 1.2970200777053833, "learning_rate": 4.5150233890300776e-05, "loss": 1.2195, "step": 5899 }, { "epoch": 1.0628236883584778, "grad_norm": 1.3803675174713135, "learning_rate": 4.513613933414096e-05, "loss": 1.2504, "step": 5900 }, { "epoch": 1.0630038279666743, "grad_norm": 1.3868794441223145, "learning_rate": 4.5122045168157666e-05, "loss": 1.2987, "step": 5901 }, { "epoch": 1.0631839675748704, "grad_norm": 1.298801302909851, "learning_rate": 4.5107951393481534e-05, "loss": 2.0606, "step": 5902 }, { "epoch": 1.0633641071830668, "grad_norm": 1.3050814867019653, "learning_rate": 4.509385801124314e-05, "loss": 1.9173, "step": 5903 }, { "epoch": 1.0635442467912632, "grad_norm": 1.687298059463501, "learning_rate": 4.507976502257308e-05, "loss": 1.8127, "step": 5904 }, { "epoch": 1.0637243863994597, "grad_norm": 1.3398560285568237, "learning_rate": 4.506567242860188e-05, "loss": 1.7186, "step": 5905 }, { "epoch": 1.0639045260076558, "grad_norm": 1.3587313890457153, "learning_rate": 4.5051580230460046e-05, "loss": 1.922, "step": 5906 }, { "epoch": 1.0640846656158522, "grad_norm": 1.3958826065063477, "learning_rate": 4.503748842927802e-05, "loss": 1.7471, "step": 5907 }, { "epoch": 1.0642648052240486, "grad_norm": 1.5311014652252197, "learning_rate": 4.502339702618624e-05, "loss": 1.6232, "step": 5908 }, { "epoch": 1.064444944832245, "grad_norm": 1.4137617349624634, "learning_rate": 4.5009306022315136e-05, "loss": 1.7105, "step": 5909 }, { "epoch": 1.0646250844404412, "grad_norm": 1.5777653455734253, "learning_rate": 4.4995215418795085e-05, "loss": 1.7451, "step": 5910 }, { "epoch": 1.0648052240486376, "grad_norm": 1.6742364168167114, "learning_rate": 4.498112521675641e-05, "loss": 1.81, "step": 5911 }, { "epoch": 1.064985363656834, "grad_norm": 1.5370639562606812, "learning_rate": 4.496703541732942e-05, "loss": 1.5559, "step": 5912 }, { "epoch": 1.0651655032650305, "grad_norm": 1.3023706674575806, "learning_rate": 4.4952946021644396e-05, "loss": 1.3688, "step": 5913 }, { "epoch": 1.0653456428732269, "grad_norm": 1.5068551301956177, "learning_rate": 4.493885703083159e-05, "loss": 1.5758, "step": 5914 }, { "epoch": 1.065525782481423, "grad_norm": 1.2598767280578613, "learning_rate": 4.492476844602122e-05, "loss": 1.2191, "step": 5915 }, { "epoch": 1.0657059220896195, "grad_norm": 1.4634027481079102, "learning_rate": 4.4910680268343435e-05, "loss": 1.7271, "step": 5916 }, { "epoch": 1.0658860616978159, "grad_norm": 1.409731149673462, "learning_rate": 4.48965924989284e-05, "loss": 1.5372, "step": 5917 }, { "epoch": 1.0660662013060123, "grad_norm": 1.2124196290969849, "learning_rate": 4.488250513890623e-05, "loss": 1.3647, "step": 5918 }, { "epoch": 1.0662463409142084, "grad_norm": 1.2886885404586792, "learning_rate": 4.486841818940701e-05, "loss": 1.3609, "step": 5919 }, { "epoch": 1.0664264805224049, "grad_norm": 1.2771979570388794, "learning_rate": 4.485433165156079e-05, "loss": 1.2345, "step": 5920 }, { "epoch": 1.0666066201306013, "grad_norm": 1.3354061841964722, "learning_rate": 4.484024552649757e-05, "loss": 1.5148, "step": 5921 }, { "epoch": 1.0667867597387977, "grad_norm": 1.3562688827514648, "learning_rate": 4.482615981534734e-05, "loss": 1.2987, "step": 5922 }, { "epoch": 1.0669668993469938, "grad_norm": 1.5325772762298584, "learning_rate": 4.4812074519240046e-05, "loss": 1.5195, "step": 5923 }, { "epoch": 1.0671470389551903, "grad_norm": 1.3717572689056396, "learning_rate": 4.4797989639305615e-05, "loss": 1.3244, "step": 5924 }, { "epoch": 1.0673271785633867, "grad_norm": 1.3823553323745728, "learning_rate": 4.4783905176673926e-05, "loss": 1.3844, "step": 5925 }, { "epoch": 1.067507318171583, "grad_norm": 1.352996587753296, "learning_rate": 4.4769821132474805e-05, "loss": 1.5602, "step": 5926 }, { "epoch": 1.0676874577797792, "grad_norm": 1.401050329208374, "learning_rate": 4.47557375078381e-05, "loss": 1.4517, "step": 5927 }, { "epoch": 1.0678675973879757, "grad_norm": 1.4393761157989502, "learning_rate": 4.474165430389357e-05, "loss": 1.3229, "step": 5928 }, { "epoch": 1.068047736996172, "grad_norm": 1.4201732873916626, "learning_rate": 4.472757152177098e-05, "loss": 1.3306, "step": 5929 }, { "epoch": 1.0682278766043685, "grad_norm": 1.4842742681503296, "learning_rate": 4.471348916260003e-05, "loss": 1.484, "step": 5930 }, { "epoch": 1.0684080162125646, "grad_norm": 1.4290024042129517, "learning_rate": 4.469940722751038e-05, "loss": 1.6884, "step": 5931 }, { "epoch": 1.068588155820761, "grad_norm": 1.4492005109786987, "learning_rate": 4.4685325717631736e-05, "loss": 1.547, "step": 5932 }, { "epoch": 1.0687682954289575, "grad_norm": 1.4960498809814453, "learning_rate": 4.467124463409367e-05, "loss": 1.6342, "step": 5933 }, { "epoch": 1.0689484350371539, "grad_norm": 1.4255046844482422, "learning_rate": 4.4657163978025754e-05, "loss": 1.4318, "step": 5934 }, { "epoch": 1.06912857464535, "grad_norm": 1.4704073667526245, "learning_rate": 4.464308375055755e-05, "loss": 1.4211, "step": 5935 }, { "epoch": 1.0693087142535465, "grad_norm": 1.43056321144104, "learning_rate": 4.4629003952818523e-05, "loss": 1.3927, "step": 5936 }, { "epoch": 1.0694888538617429, "grad_norm": 1.4655250310897827, "learning_rate": 4.461492458593821e-05, "loss": 1.5348, "step": 5937 }, { "epoch": 1.0696689934699393, "grad_norm": 1.4300577640533447, "learning_rate": 4.460084565104603e-05, "loss": 1.5673, "step": 5938 }, { "epoch": 1.0698491330781357, "grad_norm": 1.3906139135360718, "learning_rate": 4.458676714927139e-05, "loss": 1.2609, "step": 5939 }, { "epoch": 1.0700292726863319, "grad_norm": 1.521336555480957, "learning_rate": 4.457268908174362e-05, "loss": 1.6969, "step": 5940 }, { "epoch": 1.0702094122945283, "grad_norm": 1.5022079944610596, "learning_rate": 4.4558611449592106e-05, "loss": 1.2934, "step": 5941 }, { "epoch": 1.0703895519027247, "grad_norm": 1.4440133571624756, "learning_rate": 4.4544534253946136e-05, "loss": 1.4334, "step": 5942 }, { "epoch": 1.0705696915109209, "grad_norm": 1.4926468133926392, "learning_rate": 4.453045749593498e-05, "loss": 1.5811, "step": 5943 }, { "epoch": 1.0707498311191173, "grad_norm": 1.5202566385269165, "learning_rate": 4.4516381176687835e-05, "loss": 1.4628, "step": 5944 }, { "epoch": 1.0709299707273137, "grad_norm": 1.6362404823303223, "learning_rate": 4.450230529733393e-05, "loss": 1.7735, "step": 5945 }, { "epoch": 1.07111011033551, "grad_norm": 1.5577993392944336, "learning_rate": 4.448822985900242e-05, "loss": 1.6431, "step": 5946 }, { "epoch": 1.0712902499437065, "grad_norm": 1.553818702697754, "learning_rate": 4.447415486282242e-05, "loss": 1.46, "step": 5947 }, { "epoch": 1.0714703895519027, "grad_norm": 1.4666478633880615, "learning_rate": 4.446008030992304e-05, "loss": 1.4502, "step": 5948 }, { "epoch": 1.071650529160099, "grad_norm": 1.5311304330825806, "learning_rate": 4.444600620143331e-05, "loss": 1.2108, "step": 5949 }, { "epoch": 1.0718306687682955, "grad_norm": 1.4128271341323853, "learning_rate": 4.4431932538482255e-05, "loss": 1.2548, "step": 5950 }, { "epoch": 1.0720108083764919, "grad_norm": 1.2450443506240845, "learning_rate": 4.441785932219886e-05, "loss": 0.9277, "step": 5951 }, { "epoch": 1.072190947984688, "grad_norm": 1.3749061822891235, "learning_rate": 4.440378655371208e-05, "loss": 2.1365, "step": 5952 }, { "epoch": 1.0723710875928845, "grad_norm": 1.2680636644363403, "learning_rate": 4.438971423415081e-05, "loss": 1.8767, "step": 5953 }, { "epoch": 1.0725512272010809, "grad_norm": 1.4089096784591675, "learning_rate": 4.437564236464392e-05, "loss": 1.8431, "step": 5954 }, { "epoch": 1.0727313668092773, "grad_norm": 1.3442976474761963, "learning_rate": 4.4361570946320276e-05, "loss": 2.0213, "step": 5955 }, { "epoch": 1.0729115064174735, "grad_norm": 1.2464367151260376, "learning_rate": 4.434749998030866e-05, "loss": 1.5912, "step": 5956 }, { "epoch": 1.0730916460256699, "grad_norm": 1.347922921180725, "learning_rate": 4.433342946773784e-05, "loss": 1.7482, "step": 5957 }, { "epoch": 1.0732717856338663, "grad_norm": 1.4396653175354004, "learning_rate": 4.431935940973656e-05, "loss": 1.7131, "step": 5958 }, { "epoch": 1.0734519252420627, "grad_norm": 1.484238862991333, "learning_rate": 4.430528980743347e-05, "loss": 1.9303, "step": 5959 }, { "epoch": 1.0736320648502589, "grad_norm": 1.5621731281280518, "learning_rate": 4.429122066195729e-05, "loss": 1.8973, "step": 5960 }, { "epoch": 1.0738122044584553, "grad_norm": 1.7975538969039917, "learning_rate": 4.42771519744366e-05, "loss": 1.9678, "step": 5961 }, { "epoch": 1.0739923440666517, "grad_norm": 1.3895522356033325, "learning_rate": 4.426308374599999e-05, "loss": 1.7068, "step": 5962 }, { "epoch": 1.074172483674848, "grad_norm": 1.3360257148742676, "learning_rate": 4.4249015977776e-05, "loss": 1.4722, "step": 5963 }, { "epoch": 1.0743526232830443, "grad_norm": 1.3033549785614014, "learning_rate": 4.4234948670893134e-05, "loss": 1.2835, "step": 5964 }, { "epoch": 1.0745327628912407, "grad_norm": 1.2391910552978516, "learning_rate": 4.4220881826479906e-05, "loss": 1.133, "step": 5965 }, { "epoch": 1.074712902499437, "grad_norm": 1.3447905778884888, "learning_rate": 4.420681544566472e-05, "loss": 1.4391, "step": 5966 }, { "epoch": 1.0748930421076335, "grad_norm": 1.3464741706848145, "learning_rate": 4.419274952957597e-05, "loss": 1.4672, "step": 5967 }, { "epoch": 1.0750731817158297, "grad_norm": 1.3003981113433838, "learning_rate": 4.417868407934201e-05, "loss": 1.3032, "step": 5968 }, { "epoch": 1.075253321324026, "grad_norm": 1.3012442588806152, "learning_rate": 4.416461909609119e-05, "loss": 1.3675, "step": 5969 }, { "epoch": 1.0754334609322225, "grad_norm": 1.449258804321289, "learning_rate": 4.415055458095179e-05, "loss": 1.6051, "step": 5970 }, { "epoch": 1.0756136005404189, "grad_norm": 1.3937456607818604, "learning_rate": 4.4136490535052064e-05, "loss": 1.5427, "step": 5971 }, { "epoch": 1.0757937401486153, "grad_norm": 1.3208332061767578, "learning_rate": 4.412242695952019e-05, "loss": 1.4454, "step": 5972 }, { "epoch": 1.0759738797568115, "grad_norm": 1.5141353607177734, "learning_rate": 4.4108363855484356e-05, "loss": 1.6584, "step": 5973 }, { "epoch": 1.0761540193650079, "grad_norm": 1.4284627437591553, "learning_rate": 4.409430122407271e-05, "loss": 1.5334, "step": 5974 }, { "epoch": 1.0763341589732043, "grad_norm": 1.6092393398284912, "learning_rate": 4.408023906641335e-05, "loss": 1.6227, "step": 5975 }, { "epoch": 1.0765142985814007, "grad_norm": 1.332499623298645, "learning_rate": 4.406617738363433e-05, "loss": 1.4038, "step": 5976 }, { "epoch": 1.0766944381895969, "grad_norm": 1.417228102684021, "learning_rate": 4.4052116176863686e-05, "loss": 1.4458, "step": 5977 }, { "epoch": 1.0768745777977933, "grad_norm": 1.5306553840637207, "learning_rate": 4.403805544722935e-05, "loss": 1.5597, "step": 5978 }, { "epoch": 1.0770547174059897, "grad_norm": 1.4487805366516113, "learning_rate": 4.402399519585932e-05, "loss": 1.6045, "step": 5979 }, { "epoch": 1.077234857014186, "grad_norm": 1.3833152055740356, "learning_rate": 4.400993542388148e-05, "loss": 1.4061, "step": 5980 }, { "epoch": 1.0774149966223823, "grad_norm": 1.2953568696975708, "learning_rate": 4.399587613242371e-05, "loss": 1.3023, "step": 5981 }, { "epoch": 1.0775951362305787, "grad_norm": 1.4946660995483398, "learning_rate": 4.398181732261382e-05, "loss": 1.4403, "step": 5982 }, { "epoch": 1.077775275838775, "grad_norm": 1.4172781705856323, "learning_rate": 4.3967758995579624e-05, "loss": 1.4389, "step": 5983 }, { "epoch": 1.0779554154469715, "grad_norm": 1.2224206924438477, "learning_rate": 4.395370115244886e-05, "loss": 1.2448, "step": 5984 }, { "epoch": 1.0781355550551677, "grad_norm": 1.4250576496124268, "learning_rate": 4.393964379434925e-05, "loss": 1.4833, "step": 5985 }, { "epoch": 1.078315694663364, "grad_norm": 1.4825717210769653, "learning_rate": 4.392558692240846e-05, "loss": 1.5569, "step": 5986 }, { "epoch": 1.0784958342715605, "grad_norm": 1.4613025188446045, "learning_rate": 4.3911530537754117e-05, "loss": 1.5337, "step": 5987 }, { "epoch": 1.0786759738797569, "grad_norm": 1.5007274150848389, "learning_rate": 4.389747464151384e-05, "loss": 1.498, "step": 5988 }, { "epoch": 1.078856113487953, "grad_norm": 1.3157531023025513, "learning_rate": 4.388341923481518e-05, "loss": 1.2244, "step": 5989 }, { "epoch": 1.0790362530961495, "grad_norm": 1.4924142360687256, "learning_rate": 4.386936431878564e-05, "loss": 1.5304, "step": 5990 }, { "epoch": 1.0792163927043459, "grad_norm": 1.5481082201004028, "learning_rate": 4.3855309894552707e-05, "loss": 1.5226, "step": 5991 }, { "epoch": 1.0793965323125423, "grad_norm": 1.411333441734314, "learning_rate": 4.384125596324382e-05, "loss": 1.477, "step": 5992 }, { "epoch": 1.0795766719207385, "grad_norm": 1.5101584196090698, "learning_rate": 4.382720252598639e-05, "loss": 1.5143, "step": 5993 }, { "epoch": 1.0797568115289349, "grad_norm": 1.5340688228607178, "learning_rate": 4.381314958390776e-05, "loss": 1.6914, "step": 5994 }, { "epoch": 1.0799369511371313, "grad_norm": 1.4889111518859863, "learning_rate": 4.379909713813526e-05, "loss": 1.2961, "step": 5995 }, { "epoch": 1.0801170907453277, "grad_norm": 1.476090669631958, "learning_rate": 4.378504518979615e-05, "loss": 1.5113, "step": 5996 }, { "epoch": 1.080297230353524, "grad_norm": 1.4502959251403809, "learning_rate": 4.377099374001771e-05, "loss": 1.3697, "step": 5997 }, { "epoch": 1.0804773699617203, "grad_norm": 1.3926317691802979, "learning_rate": 4.375694278992712e-05, "loss": 1.3198, "step": 5998 }, { "epoch": 1.0806575095699167, "grad_norm": 1.4504799842834473, "learning_rate": 4.3742892340651534e-05, "loss": 1.4005, "step": 5999 }, { "epoch": 1.080837649178113, "grad_norm": 1.4456108808517456, "learning_rate": 4.372884239331807e-05, "loss": 1.3226, "step": 6000 }, { "epoch": 1.0810177887863093, "grad_norm": 1.344207525253296, "learning_rate": 4.37147929490538e-05, "loss": 1.2448, "step": 6001 }, { "epoch": 1.0811979283945057, "grad_norm": 1.4364771842956543, "learning_rate": 4.370074400898579e-05, "loss": 1.7137, "step": 6002 }, { "epoch": 1.081378068002702, "grad_norm": 1.269330382347107, "learning_rate": 4.368669557424105e-05, "loss": 1.8557, "step": 6003 }, { "epoch": 1.0815582076108985, "grad_norm": 1.5894314050674438, "learning_rate": 4.36726476459465e-05, "loss": 1.9153, "step": 6004 }, { "epoch": 1.081738347219095, "grad_norm": 1.3860946893692017, "learning_rate": 4.365860022522905e-05, "loss": 1.6729, "step": 6005 }, { "epoch": 1.081918486827291, "grad_norm": 1.4932881593704224, "learning_rate": 4.364455331321563e-05, "loss": 2.2214, "step": 6006 }, { "epoch": 1.0820986264354875, "grad_norm": 1.2864151000976562, "learning_rate": 4.363050691103304e-05, "loss": 1.5079, "step": 6007 }, { "epoch": 1.082278766043684, "grad_norm": 1.3679653406143188, "learning_rate": 4.3616461019808094e-05, "loss": 1.7773, "step": 6008 }, { "epoch": 1.0824589056518803, "grad_norm": 1.3484203815460205, "learning_rate": 4.360241564066753e-05, "loss": 1.6213, "step": 6009 }, { "epoch": 1.0826390452600765, "grad_norm": 1.747590184211731, "learning_rate": 4.358837077473805e-05, "loss": 1.9143, "step": 6010 }, { "epoch": 1.0828191848682729, "grad_norm": 1.7672019004821777, "learning_rate": 4.357432642314636e-05, "loss": 2.0902, "step": 6011 }, { "epoch": 1.0829993244764693, "grad_norm": 1.4902225732803345, "learning_rate": 4.3560282587019075e-05, "loss": 1.6244, "step": 6012 }, { "epoch": 1.0831794640846657, "grad_norm": 1.4241995811462402, "learning_rate": 4.354623926748278e-05, "loss": 1.5463, "step": 6013 }, { "epoch": 1.0833596036928619, "grad_norm": 1.3042106628417969, "learning_rate": 4.3532196465664025e-05, "loss": 1.402, "step": 6014 }, { "epoch": 1.0835397433010583, "grad_norm": 1.3135544061660767, "learning_rate": 4.351815418268932e-05, "loss": 1.3167, "step": 6015 }, { "epoch": 1.0837198829092547, "grad_norm": 1.4225006103515625, "learning_rate": 4.350411241968513e-05, "loss": 1.3696, "step": 6016 }, { "epoch": 1.083900022517451, "grad_norm": 1.2467031478881836, "learning_rate": 4.3490071177777883e-05, "loss": 1.4926, "step": 6017 }, { "epoch": 1.0840801621256473, "grad_norm": 1.2875277996063232, "learning_rate": 4.3476030458093955e-05, "loss": 1.3543, "step": 6018 }, { "epoch": 1.0842603017338437, "grad_norm": 1.3541805744171143, "learning_rate": 4.3461990261759673e-05, "loss": 1.3853, "step": 6019 }, { "epoch": 1.08444044134204, "grad_norm": 1.2741343975067139, "learning_rate": 4.3447950589901356e-05, "loss": 1.4867, "step": 6020 }, { "epoch": 1.0846205809502365, "grad_norm": 1.5485223531723022, "learning_rate": 4.343391144364525e-05, "loss": 1.6307, "step": 6021 }, { "epoch": 1.0848007205584327, "grad_norm": 1.3859827518463135, "learning_rate": 4.341987282411758e-05, "loss": 1.4764, "step": 6022 }, { "epoch": 1.084980860166629, "grad_norm": 1.3100894689559937, "learning_rate": 4.34058347324445e-05, "loss": 1.3163, "step": 6023 }, { "epoch": 1.0851609997748255, "grad_norm": 1.332659363746643, "learning_rate": 4.339179716975214e-05, "loss": 1.4089, "step": 6024 }, { "epoch": 1.085341139383022, "grad_norm": 1.4047330617904663, "learning_rate": 4.33777601371666e-05, "loss": 1.2615, "step": 6025 }, { "epoch": 1.085521278991218, "grad_norm": 1.4291481971740723, "learning_rate": 4.336372363581391e-05, "loss": 1.4737, "step": 6026 }, { "epoch": 1.0857014185994145, "grad_norm": 1.3553016185760498, "learning_rate": 4.334968766682008e-05, "loss": 1.5754, "step": 6027 }, { "epoch": 1.085881558207611, "grad_norm": 1.5707887411117554, "learning_rate": 4.333565223131107e-05, "loss": 1.7987, "step": 6028 }, { "epoch": 1.0860616978158073, "grad_norm": 1.5148882865905762, "learning_rate": 4.332161733041277e-05, "loss": 1.6844, "step": 6029 }, { "epoch": 1.0862418374240037, "grad_norm": 1.5114638805389404, "learning_rate": 4.330758296525111e-05, "loss": 1.5731, "step": 6030 }, { "epoch": 1.0864219770322, "grad_norm": 1.44391667842865, "learning_rate": 4.329354913695187e-05, "loss": 1.4934, "step": 6031 }, { "epoch": 1.0866021166403963, "grad_norm": 1.3163456916809082, "learning_rate": 4.327951584664085e-05, "loss": 1.3704, "step": 6032 }, { "epoch": 1.0867822562485927, "grad_norm": 1.4682332277297974, "learning_rate": 4.3265483095443775e-05, "loss": 1.4003, "step": 6033 }, { "epoch": 1.086962395856789, "grad_norm": 1.3556357622146606, "learning_rate": 4.325145088448639e-05, "loss": 1.2178, "step": 6034 }, { "epoch": 1.0871425354649853, "grad_norm": 1.542004108428955, "learning_rate": 4.3237419214894325e-05, "loss": 1.507, "step": 6035 }, { "epoch": 1.0873226750731817, "grad_norm": 1.406859278678894, "learning_rate": 4.322338808779321e-05, "loss": 1.4783, "step": 6036 }, { "epoch": 1.087502814681378, "grad_norm": 1.5269654989242554, "learning_rate": 4.320935750430859e-05, "loss": 1.5656, "step": 6037 }, { "epoch": 1.0876829542895745, "grad_norm": 1.5209910869598389, "learning_rate": 4.3195327465565984e-05, "loss": 1.4596, "step": 6038 }, { "epoch": 1.0878630938977707, "grad_norm": 1.37599515914917, "learning_rate": 4.3181297972690906e-05, "loss": 1.2902, "step": 6039 }, { "epoch": 1.088043233505967, "grad_norm": 1.4066253900527954, "learning_rate": 4.3167269026808794e-05, "loss": 1.496, "step": 6040 }, { "epoch": 1.0882233731141635, "grad_norm": 1.4396476745605469, "learning_rate": 4.315324062904503e-05, "loss": 1.3581, "step": 6041 }, { "epoch": 1.08840351272236, "grad_norm": 1.4745769500732422, "learning_rate": 4.313921278052498e-05, "loss": 1.4068, "step": 6042 }, { "epoch": 1.088583652330556, "grad_norm": 1.3182926177978516, "learning_rate": 4.3125185482373904e-05, "loss": 1.2551, "step": 6043 }, { "epoch": 1.0887637919387525, "grad_norm": 1.494860053062439, "learning_rate": 4.311115873571712e-05, "loss": 1.5242, "step": 6044 }, { "epoch": 1.088943931546949, "grad_norm": 1.4621728658676147, "learning_rate": 4.309713254167983e-05, "loss": 1.3596, "step": 6045 }, { "epoch": 1.0891240711551453, "grad_norm": 1.654954195022583, "learning_rate": 4.3083106901387205e-05, "loss": 1.6727, "step": 6046 }, { "epoch": 1.0893042107633415, "grad_norm": 1.3115841150283813, "learning_rate": 4.306908181596436e-05, "loss": 1.1973, "step": 6047 }, { "epoch": 1.089484350371538, "grad_norm": 1.5648349523544312, "learning_rate": 4.305505728653641e-05, "loss": 1.6711, "step": 6048 }, { "epoch": 1.0896644899797343, "grad_norm": 1.4789600372314453, "learning_rate": 4.304103331422839e-05, "loss": 1.4837, "step": 6049 }, { "epoch": 1.0898446295879307, "grad_norm": 1.466086983680725, "learning_rate": 4.302700990016527e-05, "loss": 1.399, "step": 6050 }, { "epoch": 1.090024769196127, "grad_norm": 1.5863369703292847, "learning_rate": 4.301298704547203e-05, "loss": 1.4064, "step": 6051 }, { "epoch": 1.0902049088043233, "grad_norm": 1.335128664970398, "learning_rate": 4.2998964751273556e-05, "loss": 1.9362, "step": 6052 }, { "epoch": 1.0903850484125197, "grad_norm": 1.3851066827774048, "learning_rate": 4.298494301869473e-05, "loss": 1.9888, "step": 6053 }, { "epoch": 1.0905651880207161, "grad_norm": 1.3395419120788574, "learning_rate": 4.297092184886036e-05, "loss": 1.993, "step": 6054 }, { "epoch": 1.0907453276289125, "grad_norm": 1.314893126487732, "learning_rate": 4.295690124289521e-05, "loss": 1.8678, "step": 6055 }, { "epoch": 1.0909254672371087, "grad_norm": 1.322532296180725, "learning_rate": 4.2942881201924015e-05, "loss": 2.1428, "step": 6056 }, { "epoch": 1.091105606845305, "grad_norm": 1.3147462606430054, "learning_rate": 4.292886172707144e-05, "loss": 1.4492, "step": 6057 }, { "epoch": 1.0912857464535015, "grad_norm": 1.4396051168441772, "learning_rate": 4.291484281946214e-05, "loss": 1.7382, "step": 6058 }, { "epoch": 1.0914658860616977, "grad_norm": 1.4112471342086792, "learning_rate": 4.290082448022071e-05, "loss": 1.7363, "step": 6059 }, { "epoch": 1.091646025669894, "grad_norm": 1.578538179397583, "learning_rate": 4.2886806710471674e-05, "loss": 1.8698, "step": 6060 }, { "epoch": 1.0918261652780905, "grad_norm": 1.9945176839828491, "learning_rate": 4.2872789511339517e-05, "loss": 2.2201, "step": 6061 }, { "epoch": 1.092006304886287, "grad_norm": 1.579054355621338, "learning_rate": 4.2858772883948744e-05, "loss": 1.6256, "step": 6062 }, { "epoch": 1.0921864444944833, "grad_norm": 1.2972469329833984, "learning_rate": 4.284475682942373e-05, "loss": 1.3623, "step": 6063 }, { "epoch": 1.0923665841026795, "grad_norm": 1.48723566532135, "learning_rate": 4.2830741348888834e-05, "loss": 1.7508, "step": 6064 }, { "epoch": 1.092546723710876, "grad_norm": 1.3057808876037598, "learning_rate": 4.2816726443468366e-05, "loss": 1.3124, "step": 6065 }, { "epoch": 1.0927268633190723, "grad_norm": 1.4572094678878784, "learning_rate": 4.2802712114286593e-05, "loss": 1.4669, "step": 6066 }, { "epoch": 1.0929070029272687, "grad_norm": 1.3565524816513062, "learning_rate": 4.278869836246777e-05, "loss": 1.4857, "step": 6067 }, { "epoch": 1.093087142535465, "grad_norm": 1.3721365928649902, "learning_rate": 4.2774685189136055e-05, "loss": 1.2895, "step": 6068 }, { "epoch": 1.0932672821436613, "grad_norm": 1.3331338167190552, "learning_rate": 4.276067259541557e-05, "loss": 1.2973, "step": 6069 }, { "epoch": 1.0934474217518577, "grad_norm": 1.4717903137207031, "learning_rate": 4.2746660582430404e-05, "loss": 1.522, "step": 6070 }, { "epoch": 1.0936275613600541, "grad_norm": 1.4906092882156372, "learning_rate": 4.273264915130457e-05, "loss": 1.6026, "step": 6071 }, { "epoch": 1.0938077009682503, "grad_norm": 1.4196577072143555, "learning_rate": 4.27186383031621e-05, "loss": 1.4763, "step": 6072 }, { "epoch": 1.0939878405764467, "grad_norm": 1.445973515510559, "learning_rate": 4.270462803912692e-05, "loss": 1.5939, "step": 6073 }, { "epoch": 1.0941679801846431, "grad_norm": 1.4378174543380737, "learning_rate": 4.269061836032293e-05, "loss": 1.5654, "step": 6074 }, { "epoch": 1.0943481197928395, "grad_norm": 1.4773672819137573, "learning_rate": 4.2676609267873955e-05, "loss": 1.7185, "step": 6075 }, { "epoch": 1.0945282594010357, "grad_norm": 1.503448724746704, "learning_rate": 4.266260076290384e-05, "loss": 1.491, "step": 6076 }, { "epoch": 1.0947083990092321, "grad_norm": 1.4960205554962158, "learning_rate": 4.26485928465363e-05, "loss": 1.6752, "step": 6077 }, { "epoch": 1.0948885386174285, "grad_norm": 1.517636775970459, "learning_rate": 4.263458551989508e-05, "loss": 1.393, "step": 6078 }, { "epoch": 1.095068678225625, "grad_norm": 1.5256741046905518, "learning_rate": 4.2620578784103817e-05, "loss": 1.3955, "step": 6079 }, { "epoch": 1.095248817833821, "grad_norm": 1.5632925033569336, "learning_rate": 4.260657264028611e-05, "loss": 1.781, "step": 6080 }, { "epoch": 1.0954289574420175, "grad_norm": 1.4825087785720825, "learning_rate": 4.259256708956557e-05, "loss": 1.3952, "step": 6081 }, { "epoch": 1.095609097050214, "grad_norm": 1.4218758344650269, "learning_rate": 4.257856213306568e-05, "loss": 1.3979, "step": 6082 }, { "epoch": 1.0957892366584103, "grad_norm": 1.3864850997924805, "learning_rate": 4.256455777190992e-05, "loss": 1.4768, "step": 6083 }, { "epoch": 1.0959693762666065, "grad_norm": 1.3884128332138062, "learning_rate": 4.2550554007221714e-05, "loss": 1.3947, "step": 6084 }, { "epoch": 1.096149515874803, "grad_norm": 1.3406847715377808, "learning_rate": 4.2536550840124437e-05, "loss": 1.3267, "step": 6085 }, { "epoch": 1.0963296554829993, "grad_norm": 1.5844337940216064, "learning_rate": 4.252254827174141e-05, "loss": 1.6502, "step": 6086 }, { "epoch": 1.0965097950911957, "grad_norm": 1.4591584205627441, "learning_rate": 4.250854630319593e-05, "loss": 1.3718, "step": 6087 }, { "epoch": 1.0966899346993921, "grad_norm": 1.604280710220337, "learning_rate": 4.249454493561121e-05, "loss": 1.5846, "step": 6088 }, { "epoch": 1.0968700743075883, "grad_norm": 1.5479811429977417, "learning_rate": 4.2480544170110434e-05, "loss": 1.5252, "step": 6089 }, { "epoch": 1.0970502139157847, "grad_norm": 1.4364222288131714, "learning_rate": 4.2466544007816757e-05, "loss": 1.5394, "step": 6090 }, { "epoch": 1.0972303535239811, "grad_norm": 1.6261520385742188, "learning_rate": 4.245254444985324e-05, "loss": 1.4463, "step": 6091 }, { "epoch": 1.0974104931321775, "grad_norm": 1.571993112564087, "learning_rate": 4.2438545497342944e-05, "loss": 1.5242, "step": 6092 }, { "epoch": 1.0975906327403737, "grad_norm": 1.4871073961257935, "learning_rate": 4.2424547151408835e-05, "loss": 1.1836, "step": 6093 }, { "epoch": 1.0977707723485701, "grad_norm": 1.4581156969070435, "learning_rate": 4.241054941317384e-05, "loss": 1.4042, "step": 6094 }, { "epoch": 1.0979509119567665, "grad_norm": 1.4372199773788452, "learning_rate": 4.239655228376091e-05, "loss": 1.4195, "step": 6095 }, { "epoch": 1.098131051564963, "grad_norm": 1.6576018333435059, "learning_rate": 4.2382555764292844e-05, "loss": 1.4222, "step": 6096 }, { "epoch": 1.0983111911731591, "grad_norm": 1.435503363609314, "learning_rate": 4.2368559855892445e-05, "loss": 1.3131, "step": 6097 }, { "epoch": 1.0984913307813555, "grad_norm": 1.5506922006607056, "learning_rate": 4.235456455968245e-05, "loss": 1.5346, "step": 6098 }, { "epoch": 1.098671470389552, "grad_norm": 1.3754539489746094, "learning_rate": 4.234056987678554e-05, "loss": 1.212, "step": 6099 }, { "epoch": 1.0988516099977483, "grad_norm": 1.4971472024917603, "learning_rate": 4.232657580832442e-05, "loss": 1.0587, "step": 6100 }, { "epoch": 1.0990317496059445, "grad_norm": 1.2814102172851562, "learning_rate": 4.231258235542165e-05, "loss": 1.0463, "step": 6101 }, { "epoch": 1.099211889214141, "grad_norm": 1.3388750553131104, "learning_rate": 4.2298589519199774e-05, "loss": 1.9516, "step": 6102 }, { "epoch": 1.0993920288223373, "grad_norm": 1.3929924964904785, "learning_rate": 4.228459730078127e-05, "loss": 2.0303, "step": 6103 }, { "epoch": 1.0995721684305337, "grad_norm": 1.508176565170288, "learning_rate": 4.227060570128863e-05, "loss": 1.859, "step": 6104 }, { "epoch": 1.09975230803873, "grad_norm": 1.3213920593261719, "learning_rate": 4.225661472184424e-05, "loss": 1.6766, "step": 6105 }, { "epoch": 1.0999324476469263, "grad_norm": 1.2895421981811523, "learning_rate": 4.224262436357045e-05, "loss": 1.6967, "step": 6106 }, { "epoch": 1.1001125872551227, "grad_norm": 1.4962865114212036, "learning_rate": 4.222863462758956e-05, "loss": 1.9401, "step": 6107 }, { "epoch": 1.1002927268633191, "grad_norm": 1.7972993850708008, "learning_rate": 4.2214645515023785e-05, "loss": 1.8173, "step": 6108 }, { "epoch": 1.1004728664715153, "grad_norm": 1.3864963054656982, "learning_rate": 4.2200657026995373e-05, "loss": 1.7349, "step": 6109 }, { "epoch": 1.1006530060797117, "grad_norm": 1.4920361042022705, "learning_rate": 4.218666916462646e-05, "loss": 1.8326, "step": 6110 }, { "epoch": 1.1008331456879081, "grad_norm": 1.8557910919189453, "learning_rate": 4.217268192903914e-05, "loss": 2.0118, "step": 6111 }, { "epoch": 1.1010132852961045, "grad_norm": 1.4169776439666748, "learning_rate": 4.215869532135544e-05, "loss": 1.6487, "step": 6112 }, { "epoch": 1.101193424904301, "grad_norm": 1.309471607208252, "learning_rate": 4.214470934269741e-05, "loss": 1.4913, "step": 6113 }, { "epoch": 1.1013735645124971, "grad_norm": 1.285409688949585, "learning_rate": 4.213072399418697e-05, "loss": 1.4006, "step": 6114 }, { "epoch": 1.1015537041206935, "grad_norm": 1.4673672914505005, "learning_rate": 4.2116739276946004e-05, "loss": 1.4354, "step": 6115 }, { "epoch": 1.10173384372889, "grad_norm": 1.3363569974899292, "learning_rate": 4.210275519209639e-05, "loss": 1.3883, "step": 6116 }, { "epoch": 1.1019139833370863, "grad_norm": 1.3354419469833374, "learning_rate": 4.208877174075989e-05, "loss": 1.3839, "step": 6117 }, { "epoch": 1.1020941229452825, "grad_norm": 1.4057977199554443, "learning_rate": 4.2074788924058274e-05, "loss": 1.3925, "step": 6118 }, { "epoch": 1.102274262553479, "grad_norm": 1.295888900756836, "learning_rate": 4.206080674311323e-05, "loss": 1.3169, "step": 6119 }, { "epoch": 1.1024544021616753, "grad_norm": 1.305772304534912, "learning_rate": 4.204682519904641e-05, "loss": 1.3151, "step": 6120 }, { "epoch": 1.1026345417698717, "grad_norm": 1.3515914678573608, "learning_rate": 4.203284429297939e-05, "loss": 1.3692, "step": 6121 }, { "epoch": 1.102814681378068, "grad_norm": 1.3483145236968994, "learning_rate": 4.2018864026033713e-05, "loss": 1.2599, "step": 6122 }, { "epoch": 1.1029948209862643, "grad_norm": 1.3273777961730957, "learning_rate": 4.200488439933089e-05, "loss": 1.315, "step": 6123 }, { "epoch": 1.1031749605944607, "grad_norm": 1.4054166078567505, "learning_rate": 4.1990905413992345e-05, "loss": 1.613, "step": 6124 }, { "epoch": 1.1033551002026571, "grad_norm": 1.3974183797836304, "learning_rate": 4.197692707113946e-05, "loss": 1.3795, "step": 6125 }, { "epoch": 1.1035352398108533, "grad_norm": 1.3704001903533936, "learning_rate": 4.196294937189356e-05, "loss": 1.4559, "step": 6126 }, { "epoch": 1.1037153794190497, "grad_norm": 1.3539764881134033, "learning_rate": 4.1948972317375986e-05, "loss": 1.4313, "step": 6127 }, { "epoch": 1.1038955190272461, "grad_norm": 1.3365424871444702, "learning_rate": 4.1934995908707914e-05, "loss": 1.2696, "step": 6128 }, { "epoch": 1.1040756586354425, "grad_norm": 1.4005861282348633, "learning_rate": 4.1921020147010546e-05, "loss": 1.3777, "step": 6129 }, { "epoch": 1.1042557982436387, "grad_norm": 1.512091040611267, "learning_rate": 4.1907045033405e-05, "loss": 1.6633, "step": 6130 }, { "epoch": 1.1044359378518351, "grad_norm": 1.5257515907287598, "learning_rate": 4.189307056901234e-05, "loss": 1.3437, "step": 6131 }, { "epoch": 1.1046160774600315, "grad_norm": 1.5249392986297607, "learning_rate": 4.1879096754953635e-05, "loss": 1.4375, "step": 6132 }, { "epoch": 1.104796217068228, "grad_norm": 1.3857553005218506, "learning_rate": 4.186512359234984e-05, "loss": 1.409, "step": 6133 }, { "epoch": 1.1049763566764241, "grad_norm": 1.7051955461502075, "learning_rate": 4.185115108232186e-05, "loss": 1.5983, "step": 6134 }, { "epoch": 1.1051564962846205, "grad_norm": 1.4331978559494019, "learning_rate": 4.183717922599057e-05, "loss": 1.4122, "step": 6135 }, { "epoch": 1.105336635892817, "grad_norm": 1.413873314857483, "learning_rate": 4.1823208024476763e-05, "loss": 1.5692, "step": 6136 }, { "epoch": 1.1055167755010133, "grad_norm": 1.3154181241989136, "learning_rate": 4.180923747890125e-05, "loss": 1.3426, "step": 6137 }, { "epoch": 1.1056969151092098, "grad_norm": 1.4994357824325562, "learning_rate": 4.1795267590384715e-05, "loss": 1.3224, "step": 6138 }, { "epoch": 1.105877054717406, "grad_norm": 1.5189157724380493, "learning_rate": 4.178129836004783e-05, "loss": 1.3851, "step": 6139 }, { "epoch": 1.1060571943256023, "grad_norm": 1.490999698638916, "learning_rate": 4.176732978901115e-05, "loss": 1.508, "step": 6140 }, { "epoch": 1.1062373339337987, "grad_norm": 1.5014995336532593, "learning_rate": 4.175336187839529e-05, "loss": 1.4084, "step": 6141 }, { "epoch": 1.106417473541995, "grad_norm": 1.5019289255142212, "learning_rate": 4.1739394629320725e-05, "loss": 1.4676, "step": 6142 }, { "epoch": 1.1065976131501913, "grad_norm": 1.5706757307052612, "learning_rate": 4.17254280429079e-05, "loss": 1.5517, "step": 6143 }, { "epoch": 1.1067777527583877, "grad_norm": 1.3584853410720825, "learning_rate": 4.17114621202772e-05, "loss": 1.2703, "step": 6144 }, { "epoch": 1.1069578923665842, "grad_norm": 1.4021124839782715, "learning_rate": 4.1697496862548965e-05, "loss": 1.4356, "step": 6145 }, { "epoch": 1.1071380319747806, "grad_norm": 1.4259263277053833, "learning_rate": 4.1683532270843504e-05, "loss": 1.3867, "step": 6146 }, { "epoch": 1.1073181715829767, "grad_norm": 1.5630364418029785, "learning_rate": 4.1669568346281025e-05, "loss": 1.4107, "step": 6147 }, { "epoch": 1.1074983111911731, "grad_norm": 1.4057809114456177, "learning_rate": 4.165560508998172e-05, "loss": 1.3747, "step": 6148 }, { "epoch": 1.1076784507993696, "grad_norm": 1.383571743965149, "learning_rate": 4.164164250306571e-05, "loss": 1.2607, "step": 6149 }, { "epoch": 1.107858590407566, "grad_norm": 1.5991369485855103, "learning_rate": 4.162768058665305e-05, "loss": 1.3144, "step": 6150 }, { "epoch": 1.1080387300157621, "grad_norm": 1.554791808128357, "learning_rate": 4.1613719341863795e-05, "loss": 1.2946, "step": 6151 }, { "epoch": 1.1082188696239585, "grad_norm": 1.2826147079467773, "learning_rate": 4.1599758769817884e-05, "loss": 1.629, "step": 6152 }, { "epoch": 1.108399009232155, "grad_norm": 1.3295612335205078, "learning_rate": 4.1585798871635235e-05, "loss": 1.7586, "step": 6153 }, { "epoch": 1.1085791488403514, "grad_norm": 1.305613398551941, "learning_rate": 4.157183964843569e-05, "loss": 1.6992, "step": 6154 }, { "epoch": 1.1087592884485475, "grad_norm": 1.301514983177185, "learning_rate": 4.155788110133908e-05, "loss": 1.5269, "step": 6155 }, { "epoch": 1.108939428056744, "grad_norm": 1.3898534774780273, "learning_rate": 4.154392323146513e-05, "loss": 1.9453, "step": 6156 }, { "epoch": 1.1091195676649404, "grad_norm": 1.3027302026748657, "learning_rate": 4.152996603993354e-05, "loss": 1.5309, "step": 6157 }, { "epoch": 1.1092997072731368, "grad_norm": 1.3024576902389526, "learning_rate": 4.151600952786395e-05, "loss": 1.681, "step": 6158 }, { "epoch": 1.109479846881333, "grad_norm": 1.5968878269195557, "learning_rate": 4.150205369637593e-05, "loss": 2.0805, "step": 6159 }, { "epoch": 1.1096599864895293, "grad_norm": 1.414453387260437, "learning_rate": 4.148809854658904e-05, "loss": 1.7257, "step": 6160 }, { "epoch": 1.1098401260977258, "grad_norm": 1.6638097763061523, "learning_rate": 4.1474144079622734e-05, "loss": 1.6771, "step": 6161 }, { "epoch": 1.1100202657059222, "grad_norm": 1.6642818450927734, "learning_rate": 4.146019029659643e-05, "loss": 1.7272, "step": 6162 }, { "epoch": 1.1102004053141183, "grad_norm": 1.2560365200042725, "learning_rate": 4.1446237198629515e-05, "loss": 1.4937, "step": 6163 }, { "epoch": 1.1103805449223147, "grad_norm": 1.5085198879241943, "learning_rate": 4.143228478684126e-05, "loss": 1.7527, "step": 6164 }, { "epoch": 1.1105606845305112, "grad_norm": 1.3567441701889038, "learning_rate": 4.141833306235096e-05, "loss": 1.4795, "step": 6165 }, { "epoch": 1.1107408241387076, "grad_norm": 1.3132731914520264, "learning_rate": 4.140438202627782e-05, "loss": 1.3473, "step": 6166 }, { "epoch": 1.1109209637469037, "grad_norm": 1.3185608386993408, "learning_rate": 4.139043167974095e-05, "loss": 1.3879, "step": 6167 }, { "epoch": 1.1111011033551002, "grad_norm": 1.4361873865127563, "learning_rate": 4.1376482023859444e-05, "loss": 1.3423, "step": 6168 }, { "epoch": 1.1112812429632966, "grad_norm": 1.4034762382507324, "learning_rate": 4.1362533059752367e-05, "loss": 1.2948, "step": 6169 }, { "epoch": 1.111461382571493, "grad_norm": 1.4269036054611206, "learning_rate": 4.134858478853868e-05, "loss": 1.4175, "step": 6170 }, { "epoch": 1.1116415221796894, "grad_norm": 1.3170973062515259, "learning_rate": 4.133463721133731e-05, "loss": 1.3649, "step": 6171 }, { "epoch": 1.1118216617878856, "grad_norm": 1.3620795011520386, "learning_rate": 4.132069032926713e-05, "loss": 1.3632, "step": 6172 }, { "epoch": 1.112001801396082, "grad_norm": 1.4967515468597412, "learning_rate": 4.1306744143446916e-05, "loss": 1.4926, "step": 6173 }, { "epoch": 1.1121819410042784, "grad_norm": 1.3666870594024658, "learning_rate": 4.129279865499547e-05, "loss": 1.4527, "step": 6174 }, { "epoch": 1.1123620806124748, "grad_norm": 1.4432941675186157, "learning_rate": 4.1278853865031476e-05, "loss": 1.4963, "step": 6175 }, { "epoch": 1.112542220220671, "grad_norm": 1.399491310119629, "learning_rate": 4.1264909774673575e-05, "loss": 1.3659, "step": 6176 }, { "epoch": 1.1127223598288674, "grad_norm": 1.400530219078064, "learning_rate": 4.125096638504036e-05, "loss": 1.4273, "step": 6177 }, { "epoch": 1.1129024994370638, "grad_norm": 1.2825788259506226, "learning_rate": 4.123702369725035e-05, "loss": 1.3412, "step": 6178 }, { "epoch": 1.1130826390452602, "grad_norm": 1.600413203239441, "learning_rate": 4.1223081712422034e-05, "loss": 1.4961, "step": 6179 }, { "epoch": 1.1132627786534564, "grad_norm": 1.4961121082305908, "learning_rate": 4.1209140431673836e-05, "loss": 1.5415, "step": 6180 }, { "epoch": 1.1134429182616528, "grad_norm": 1.5522667169570923, "learning_rate": 4.119519985612411e-05, "loss": 1.4536, "step": 6181 }, { "epoch": 1.1136230578698492, "grad_norm": 1.3497360944747925, "learning_rate": 4.118125998689115e-05, "loss": 1.2582, "step": 6182 }, { "epoch": 1.1138031974780456, "grad_norm": 1.4879268407821655, "learning_rate": 4.116732082509323e-05, "loss": 1.3136, "step": 6183 }, { "epoch": 1.1139833370862418, "grad_norm": 1.3646485805511475, "learning_rate": 4.115338237184853e-05, "loss": 1.4057, "step": 6184 }, { "epoch": 1.1141634766944382, "grad_norm": 1.564008116722107, "learning_rate": 4.1139444628275184e-05, "loss": 1.6474, "step": 6185 }, { "epoch": 1.1143436163026346, "grad_norm": 1.4447704553604126, "learning_rate": 4.112550759549128e-05, "loss": 1.4193, "step": 6186 }, { "epoch": 1.114523755910831, "grad_norm": 1.3231905698776245, "learning_rate": 4.1111571274614826e-05, "loss": 1.3128, "step": 6187 }, { "epoch": 1.1147038955190272, "grad_norm": 1.4894272089004517, "learning_rate": 4.1097635666763804e-05, "loss": 1.4481, "step": 6188 }, { "epoch": 1.1148840351272236, "grad_norm": 1.4499069452285767, "learning_rate": 4.1083700773056106e-05, "loss": 1.5206, "step": 6189 }, { "epoch": 1.11506417473542, "grad_norm": 1.5077327489852905, "learning_rate": 4.106976659460959e-05, "loss": 1.2893, "step": 6190 }, { "epoch": 1.1152443143436164, "grad_norm": 1.443794846534729, "learning_rate": 4.105583313254205e-05, "loss": 1.3777, "step": 6191 }, { "epoch": 1.1154244539518126, "grad_norm": 1.6584303379058838, "learning_rate": 4.104190038797121e-05, "loss": 1.5709, "step": 6192 }, { "epoch": 1.115604593560009, "grad_norm": 1.5060536861419678, "learning_rate": 4.1027968362014767e-05, "loss": 1.452, "step": 6193 }, { "epoch": 1.1157847331682054, "grad_norm": 1.540624737739563, "learning_rate": 4.101403705579033e-05, "loss": 1.3916, "step": 6194 }, { "epoch": 1.1159648727764018, "grad_norm": 1.7504982948303223, "learning_rate": 4.1000106470415465e-05, "loss": 1.7559, "step": 6195 }, { "epoch": 1.1161450123845982, "grad_norm": 1.3713117837905884, "learning_rate": 4.098617660700766e-05, "loss": 1.3707, "step": 6196 }, { "epoch": 1.1163251519927944, "grad_norm": 1.4045027494430542, "learning_rate": 4.097224746668439e-05, "loss": 1.3817, "step": 6197 }, { "epoch": 1.1165052916009908, "grad_norm": 1.4067449569702148, "learning_rate": 4.095831905056305e-05, "loss": 1.2176, "step": 6198 }, { "epoch": 1.1166854312091872, "grad_norm": 1.4754767417907715, "learning_rate": 4.0944391359760936e-05, "loss": 1.4074, "step": 6199 }, { "epoch": 1.1168655708173834, "grad_norm": 1.6160401105880737, "learning_rate": 4.093046439539534e-05, "loss": 1.4791, "step": 6200 }, { "epoch": 1.1170457104255798, "grad_norm": 1.523664951324463, "learning_rate": 4.0916538158583444e-05, "loss": 1.471, "step": 6201 }, { "epoch": 1.1172258500337762, "grad_norm": 1.2320739030838013, "learning_rate": 4.090261265044246e-05, "loss": 1.7411, "step": 6202 }, { "epoch": 1.1174059896419726, "grad_norm": 1.2761849164962769, "learning_rate": 4.0888687872089454e-05, "loss": 1.7275, "step": 6203 }, { "epoch": 1.117586129250169, "grad_norm": 1.428338885307312, "learning_rate": 4.0874763824641485e-05, "loss": 1.7193, "step": 6204 }, { "epoch": 1.1177662688583652, "grad_norm": 1.2932162284851074, "learning_rate": 4.0860840509215496e-05, "loss": 1.8825, "step": 6205 }, { "epoch": 1.1179464084665616, "grad_norm": 1.3267757892608643, "learning_rate": 4.084691792692842e-05, "loss": 1.7919, "step": 6206 }, { "epoch": 1.118126548074758, "grad_norm": 1.3492305278778076, "learning_rate": 4.0832996078897144e-05, "loss": 1.7378, "step": 6207 }, { "epoch": 1.1183066876829544, "grad_norm": 1.4578297138214111, "learning_rate": 4.0819074966238454e-05, "loss": 1.6261, "step": 6208 }, { "epoch": 1.1184868272911506, "grad_norm": 1.4927153587341309, "learning_rate": 4.0805154590069105e-05, "loss": 1.7397, "step": 6209 }, { "epoch": 1.118666966899347, "grad_norm": 1.5851614475250244, "learning_rate": 4.079123495150577e-05, "loss": 1.7325, "step": 6210 }, { "epoch": 1.1188471065075434, "grad_norm": 1.8917169570922852, "learning_rate": 4.0777316051665083e-05, "loss": 2.1976, "step": 6211 }, { "epoch": 1.1190272461157398, "grad_norm": 1.5172230005264282, "learning_rate": 4.076339789166361e-05, "loss": 1.6581, "step": 6212 }, { "epoch": 1.119207385723936, "grad_norm": 1.4962286949157715, "learning_rate": 4.074948047261787e-05, "loss": 1.7189, "step": 6213 }, { "epoch": 1.1193875253321324, "grad_norm": 1.5342788696289062, "learning_rate": 4.0735563795644294e-05, "loss": 1.4977, "step": 6214 }, { "epoch": 1.1195676649403288, "grad_norm": 1.3722361326217651, "learning_rate": 4.072164786185927e-05, "loss": 1.4322, "step": 6215 }, { "epoch": 1.1197478045485252, "grad_norm": 1.3871073722839355, "learning_rate": 4.070773267237914e-05, "loss": 1.2903, "step": 6216 }, { "epoch": 1.1199279441567214, "grad_norm": 1.4097100496292114, "learning_rate": 4.0693818228320166e-05, "loss": 1.5195, "step": 6217 }, { "epoch": 1.1201080837649178, "grad_norm": 1.2969202995300293, "learning_rate": 4.067990453079857e-05, "loss": 1.3181, "step": 6218 }, { "epoch": 1.1202882233731142, "grad_norm": 1.3555386066436768, "learning_rate": 4.066599158093049e-05, "loss": 1.4059, "step": 6219 }, { "epoch": 1.1204683629813106, "grad_norm": 1.2622871398925781, "learning_rate": 4.065207937983201e-05, "loss": 1.3708, "step": 6220 }, { "epoch": 1.1206485025895068, "grad_norm": 1.2996799945831299, "learning_rate": 4.063816792861918e-05, "loss": 1.3131, "step": 6221 }, { "epoch": 1.1208286421977032, "grad_norm": 1.4033098220825195, "learning_rate": 4.0624257228407956e-05, "loss": 1.3551, "step": 6222 }, { "epoch": 1.1210087818058996, "grad_norm": 1.4054595232009888, "learning_rate": 4.0610347280314246e-05, "loss": 1.6246, "step": 6223 }, { "epoch": 1.121188921414096, "grad_norm": 1.4164077043533325, "learning_rate": 4.0596438085453895e-05, "loss": 1.494, "step": 6224 }, { "epoch": 1.1213690610222922, "grad_norm": 1.368730068206787, "learning_rate": 4.0582529644942717e-05, "loss": 1.3676, "step": 6225 }, { "epoch": 1.1215492006304886, "grad_norm": 1.3133366107940674, "learning_rate": 4.056862195989642e-05, "loss": 1.5143, "step": 6226 }, { "epoch": 1.121729340238685, "grad_norm": 1.3156968355178833, "learning_rate": 4.0554715031430676e-05, "loss": 1.406, "step": 6227 }, { "epoch": 1.1219094798468814, "grad_norm": 1.4045811891555786, "learning_rate": 4.0540808860661096e-05, "loss": 1.3386, "step": 6228 }, { "epoch": 1.1220896194550778, "grad_norm": 1.4746567010879517, "learning_rate": 4.052690344870319e-05, "loss": 1.4049, "step": 6229 }, { "epoch": 1.122269759063274, "grad_norm": 1.46615731716156, "learning_rate": 4.051299879667251e-05, "loss": 1.6106, "step": 6230 }, { "epoch": 1.1224498986714704, "grad_norm": 1.4413039684295654, "learning_rate": 4.049909490568444e-05, "loss": 1.5601, "step": 6231 }, { "epoch": 1.1226300382796668, "grad_norm": 1.422850489616394, "learning_rate": 4.048519177685435e-05, "loss": 1.4184, "step": 6232 }, { "epoch": 1.1228101778878632, "grad_norm": 1.4803673028945923, "learning_rate": 4.047128941129752e-05, "loss": 1.3622, "step": 6233 }, { "epoch": 1.1229903174960594, "grad_norm": 1.3702976703643799, "learning_rate": 4.045738781012923e-05, "loss": 1.2834, "step": 6234 }, { "epoch": 1.1231704571042558, "grad_norm": 1.5145506858825684, "learning_rate": 4.044348697446464e-05, "loss": 1.3921, "step": 6235 }, { "epoch": 1.1233505967124522, "grad_norm": 1.624612808227539, "learning_rate": 4.042958690541887e-05, "loss": 1.3908, "step": 6236 }, { "epoch": 1.1235307363206486, "grad_norm": 1.5227570533752441, "learning_rate": 4.0415687604106986e-05, "loss": 1.5937, "step": 6237 }, { "epoch": 1.1237108759288448, "grad_norm": 1.5994679927825928, "learning_rate": 4.040178907164395e-05, "loss": 1.4782, "step": 6238 }, { "epoch": 1.1238910155370412, "grad_norm": 1.4512572288513184, "learning_rate": 4.038789130914474e-05, "loss": 1.4218, "step": 6239 }, { "epoch": 1.1240711551452376, "grad_norm": 1.345016360282898, "learning_rate": 4.03739943177242e-05, "loss": 1.2091, "step": 6240 }, { "epoch": 1.124251294753434, "grad_norm": 1.4000985622406006, "learning_rate": 4.036009809849715e-05, "loss": 1.2851, "step": 6241 }, { "epoch": 1.1244314343616302, "grad_norm": 1.5489205121994019, "learning_rate": 4.0346202652578335e-05, "loss": 1.4182, "step": 6242 }, { "epoch": 1.1246115739698266, "grad_norm": 1.365464448928833, "learning_rate": 4.0332307981082436e-05, "loss": 1.2173, "step": 6243 }, { "epoch": 1.124791713578023, "grad_norm": 1.4468257427215576, "learning_rate": 4.031841408512409e-05, "loss": 1.2509, "step": 6244 }, { "epoch": 1.1249718531862194, "grad_norm": 1.3929481506347656, "learning_rate": 4.030452096581785e-05, "loss": 1.3307, "step": 6245 }, { "epoch": 1.1251519927944156, "grad_norm": 1.3547942638397217, "learning_rate": 4.029062862427821e-05, "loss": 1.2824, "step": 6246 }, { "epoch": 1.125332132402612, "grad_norm": 1.6060343980789185, "learning_rate": 4.027673706161961e-05, "loss": 1.5025, "step": 6247 }, { "epoch": 1.1255122720108084, "grad_norm": 1.5124266147613525, "learning_rate": 4.0262846278956426e-05, "loss": 1.4452, "step": 6248 }, { "epoch": 1.1256924116190048, "grad_norm": 1.726059913635254, "learning_rate": 4.0248956277402985e-05, "loss": 1.6267, "step": 6249 }, { "epoch": 1.125872551227201, "grad_norm": 1.3140368461608887, "learning_rate": 4.0235067058073514e-05, "loss": 0.9682, "step": 6250 }, { "epoch": 1.1260526908353974, "grad_norm": 1.5666542053222656, "learning_rate": 4.0221178622082206e-05, "loss": 1.3454, "step": 6251 }, { "epoch": 1.1262328304435938, "grad_norm": 1.1789259910583496, "learning_rate": 4.0207290970543175e-05, "loss": 1.6018, "step": 6252 }, { "epoch": 1.1264129700517902, "grad_norm": 1.466103196144104, "learning_rate": 4.019340410457051e-05, "loss": 1.8465, "step": 6253 }, { "epoch": 1.1265931096599866, "grad_norm": 1.2758469581604004, "learning_rate": 4.017951802527819e-05, "loss": 1.5849, "step": 6254 }, { "epoch": 1.1267732492681828, "grad_norm": 1.4252521991729736, "learning_rate": 4.0165632733780144e-05, "loss": 1.9982, "step": 6255 }, { "epoch": 1.1269533888763792, "grad_norm": 1.3692508935928345, "learning_rate": 4.015174823119025e-05, "loss": 1.6104, "step": 6256 }, { "epoch": 1.1271335284845756, "grad_norm": 1.4817110300064087, "learning_rate": 4.013786451862231e-05, "loss": 1.9699, "step": 6257 }, { "epoch": 1.1273136680927718, "grad_norm": 1.4180225133895874, "learning_rate": 4.012398159719009e-05, "loss": 1.8913, "step": 6258 }, { "epoch": 1.1274938077009682, "grad_norm": 1.4201819896697998, "learning_rate": 4.011009946800724e-05, "loss": 1.6075, "step": 6259 }, { "epoch": 1.1276739473091646, "grad_norm": 1.7947778701782227, "learning_rate": 4.0096218132187406e-05, "loss": 2.1542, "step": 6260 }, { "epoch": 1.127854086917361, "grad_norm": 1.7639341354370117, "learning_rate": 4.008233759084411e-05, "loss": 1.7316, "step": 6261 }, { "epoch": 1.1280342265255574, "grad_norm": 1.5179598331451416, "learning_rate": 4.006845784509087e-05, "loss": 1.5267, "step": 6262 }, { "epoch": 1.1282143661337536, "grad_norm": 1.4841554164886475, "learning_rate": 4.005457889604113e-05, "loss": 1.5748, "step": 6263 }, { "epoch": 1.12839450574195, "grad_norm": 1.2656651735305786, "learning_rate": 4.0040700744808204e-05, "loss": 1.3786, "step": 6264 }, { "epoch": 1.1285746453501464, "grad_norm": 1.4172027111053467, "learning_rate": 4.002682339250542e-05, "loss": 1.4976, "step": 6265 }, { "epoch": 1.1287547849583428, "grad_norm": 1.3436676263809204, "learning_rate": 4.001294684024599e-05, "loss": 1.3844, "step": 6266 }, { "epoch": 1.128934924566539, "grad_norm": 1.3546191453933716, "learning_rate": 3.999907108914311e-05, "loss": 1.3296, "step": 6267 }, { "epoch": 1.1291150641747354, "grad_norm": 1.3178660869598389, "learning_rate": 3.9985196140309876e-05, "loss": 1.3973, "step": 6268 }, { "epoch": 1.1292952037829318, "grad_norm": 1.2529330253601074, "learning_rate": 3.9971321994859345e-05, "loss": 1.3467, "step": 6269 }, { "epoch": 1.1294753433911282, "grad_norm": 1.3508087396621704, "learning_rate": 3.995744865390447e-05, "loss": 1.4668, "step": 6270 }, { "epoch": 1.1296554829993244, "grad_norm": 1.421682357788086, "learning_rate": 3.994357611855815e-05, "loss": 1.579, "step": 6271 }, { "epoch": 1.1298356226075208, "grad_norm": 1.3042895793914795, "learning_rate": 3.992970438993327e-05, "loss": 1.2686, "step": 6272 }, { "epoch": 1.1300157622157172, "grad_norm": 1.4298288822174072, "learning_rate": 3.9915833469142607e-05, "loss": 1.4867, "step": 6273 }, { "epoch": 1.1301959018239136, "grad_norm": 1.3123995065689087, "learning_rate": 3.990196335729887e-05, "loss": 1.3188, "step": 6274 }, { "epoch": 1.1303760414321098, "grad_norm": 1.3402804136276245, "learning_rate": 3.98880940555147e-05, "loss": 1.2947, "step": 6275 }, { "epoch": 1.1305561810403062, "grad_norm": 1.485793113708496, "learning_rate": 3.987422556490271e-05, "loss": 1.6032, "step": 6276 }, { "epoch": 1.1307363206485026, "grad_norm": 1.3809276819229126, "learning_rate": 3.986035788657542e-05, "loss": 1.2968, "step": 6277 }, { "epoch": 1.130916460256699, "grad_norm": 1.3818897008895874, "learning_rate": 3.984649102164528e-05, "loss": 1.3279, "step": 6278 }, { "epoch": 1.1310965998648954, "grad_norm": 1.5038496255874634, "learning_rate": 3.9832624971224675e-05, "loss": 1.4183, "step": 6279 }, { "epoch": 1.1312767394730916, "grad_norm": 1.48307204246521, "learning_rate": 3.9818759736425935e-05, "loss": 1.5878, "step": 6280 }, { "epoch": 1.131456879081288, "grad_norm": 1.4590240716934204, "learning_rate": 3.980489531836135e-05, "loss": 1.5134, "step": 6281 }, { "epoch": 1.1316370186894844, "grad_norm": 1.5738403797149658, "learning_rate": 3.979103171814309e-05, "loss": 1.778, "step": 6282 }, { "epoch": 1.1318171582976806, "grad_norm": 1.4653578996658325, "learning_rate": 3.977716893688329e-05, "loss": 1.5657, "step": 6283 }, { "epoch": 1.131997297905877, "grad_norm": 1.470521092414856, "learning_rate": 3.976330697569403e-05, "loss": 1.4279, "step": 6284 }, { "epoch": 1.1321774375140734, "grad_norm": 1.4195396900177002, "learning_rate": 3.974944583568727e-05, "loss": 1.3877, "step": 6285 }, { "epoch": 1.1323575771222698, "grad_norm": 1.4443098306655884, "learning_rate": 3.973558551797499e-05, "loss": 1.2852, "step": 6286 }, { "epoch": 1.1325377167304662, "grad_norm": 1.5698120594024658, "learning_rate": 3.972172602366905e-05, "loss": 1.6662, "step": 6287 }, { "epoch": 1.1327178563386624, "grad_norm": 1.5509881973266602, "learning_rate": 3.9707867353881234e-05, "loss": 1.5111, "step": 6288 }, { "epoch": 1.1328979959468588, "grad_norm": 1.4579596519470215, "learning_rate": 3.969400950972327e-05, "loss": 1.4525, "step": 6289 }, { "epoch": 1.1330781355550552, "grad_norm": 1.4359208345413208, "learning_rate": 3.968015249230686e-05, "loss": 1.4731, "step": 6290 }, { "epoch": 1.1332582751632516, "grad_norm": 1.5758246183395386, "learning_rate": 3.966629630274359e-05, "loss": 1.6345, "step": 6291 }, { "epoch": 1.1334384147714478, "grad_norm": 1.3557735681533813, "learning_rate": 3.9652440942145e-05, "loss": 1.2489, "step": 6292 }, { "epoch": 1.1336185543796442, "grad_norm": 1.5547388792037964, "learning_rate": 3.963858641162255e-05, "loss": 1.3588, "step": 6293 }, { "epoch": 1.1337986939878406, "grad_norm": 1.4470330476760864, "learning_rate": 3.962473271228764e-05, "loss": 1.3628, "step": 6294 }, { "epoch": 1.133978833596037, "grad_norm": 1.5260344743728638, "learning_rate": 3.9610879845251643e-05, "loss": 1.4396, "step": 6295 }, { "epoch": 1.1341589732042332, "grad_norm": 1.450937271118164, "learning_rate": 3.95970278116258e-05, "loss": 1.3569, "step": 6296 }, { "epoch": 1.1343391128124296, "grad_norm": 1.525468111038208, "learning_rate": 3.958317661252131e-05, "loss": 1.3179, "step": 6297 }, { "epoch": 1.134519252420626, "grad_norm": 1.706613302230835, "learning_rate": 3.956932624904932e-05, "loss": 1.3127, "step": 6298 }, { "epoch": 1.1346993920288224, "grad_norm": 1.4617421627044678, "learning_rate": 3.955547672232088e-05, "loss": 1.1749, "step": 6299 }, { "epoch": 1.1348795316370186, "grad_norm": 1.493533730506897, "learning_rate": 3.954162803344703e-05, "loss": 1.2902, "step": 6300 }, { "epoch": 1.135059671245215, "grad_norm": 1.4127315282821655, "learning_rate": 3.95277801835387e-05, "loss": 1.253, "step": 6301 }, { "epoch": 1.1352398108534114, "grad_norm": 1.4898011684417725, "learning_rate": 3.951393317370672e-05, "loss": 2.1482, "step": 6302 }, { "epoch": 1.1354199504616078, "grad_norm": 1.21418035030365, "learning_rate": 3.9500087005061905e-05, "loss": 1.7092, "step": 6303 }, { "epoch": 1.135600090069804, "grad_norm": 1.4689902067184448, "learning_rate": 3.9486241678715006e-05, "loss": 1.9027, "step": 6304 }, { "epoch": 1.1357802296780004, "grad_norm": 1.3548260927200317, "learning_rate": 3.947239719577669e-05, "loss": 1.9257, "step": 6305 }, { "epoch": 1.1359603692861968, "grad_norm": 1.430167317390442, "learning_rate": 3.945855355735754e-05, "loss": 2.0, "step": 6306 }, { "epoch": 1.1361405088943932, "grad_norm": 1.4146255254745483, "learning_rate": 3.944471076456808e-05, "loss": 1.6218, "step": 6307 }, { "epoch": 1.1363206485025894, "grad_norm": 1.4098167419433594, "learning_rate": 3.9430868818518784e-05, "loss": 1.7763, "step": 6308 }, { "epoch": 1.1365007881107858, "grad_norm": 1.6797902584075928, "learning_rate": 3.941702772032004e-05, "loss": 1.9914, "step": 6309 }, { "epoch": 1.1366809277189822, "grad_norm": 1.7100614309310913, "learning_rate": 3.940318747108219e-05, "loss": 2.1759, "step": 6310 }, { "epoch": 1.1368610673271786, "grad_norm": 1.419112205505371, "learning_rate": 3.938934807191548e-05, "loss": 1.7008, "step": 6311 }, { "epoch": 1.137041206935375, "grad_norm": 1.3717199563980103, "learning_rate": 3.937550952393009e-05, "loss": 1.2475, "step": 6312 }, { "epoch": 1.1372213465435712, "grad_norm": 1.3663500547409058, "learning_rate": 3.936167182823615e-05, "loss": 1.4451, "step": 6313 }, { "epoch": 1.1374014861517676, "grad_norm": 1.3883980512619019, "learning_rate": 3.934783498594372e-05, "loss": 1.4397, "step": 6314 }, { "epoch": 1.137581625759964, "grad_norm": 1.3909910917282104, "learning_rate": 3.9333998998162776e-05, "loss": 1.4214, "step": 6315 }, { "epoch": 1.1377617653681602, "grad_norm": 1.3119492530822754, "learning_rate": 3.9320163866003235e-05, "loss": 1.4058, "step": 6316 }, { "epoch": 1.1379419049763566, "grad_norm": 1.498169183731079, "learning_rate": 3.9306329590574945e-05, "loss": 1.5825, "step": 6317 }, { "epoch": 1.138122044584553, "grad_norm": 1.2926905155181885, "learning_rate": 3.9292496172987695e-05, "loss": 1.3581, "step": 6318 }, { "epoch": 1.1383021841927494, "grad_norm": 1.4041759967803955, "learning_rate": 3.927866361435118e-05, "loss": 1.3549, "step": 6319 }, { "epoch": 1.1384823238009458, "grad_norm": 1.4187194108963013, "learning_rate": 3.9264831915775047e-05, "loss": 1.8183, "step": 6320 }, { "epoch": 1.138662463409142, "grad_norm": 1.3764961957931519, "learning_rate": 3.9251001078368874e-05, "loss": 1.3475, "step": 6321 }, { "epoch": 1.1388426030173384, "grad_norm": 1.3768562078475952, "learning_rate": 3.923717110324215e-05, "loss": 1.4672, "step": 6322 }, { "epoch": 1.1390227426255348, "grad_norm": 1.3775190114974976, "learning_rate": 3.922334199150432e-05, "loss": 1.3738, "step": 6323 }, { "epoch": 1.1392028822337312, "grad_norm": 1.4008256196975708, "learning_rate": 3.920951374426476e-05, "loss": 1.6002, "step": 6324 }, { "epoch": 1.1393830218419274, "grad_norm": 1.4886822700500488, "learning_rate": 3.9195686362632745e-05, "loss": 1.4508, "step": 6325 }, { "epoch": 1.1395631614501238, "grad_norm": 1.3538321256637573, "learning_rate": 3.9181859847717505e-05, "loss": 1.4591, "step": 6326 }, { "epoch": 1.1397433010583202, "grad_norm": 1.4685170650482178, "learning_rate": 3.916803420062818e-05, "loss": 1.6289, "step": 6327 }, { "epoch": 1.1399234406665166, "grad_norm": 1.4800105094909668, "learning_rate": 3.91542094224739e-05, "loss": 1.5703, "step": 6328 }, { "epoch": 1.1401035802747128, "grad_norm": 1.6208146810531616, "learning_rate": 3.914038551436366e-05, "loss": 1.4553, "step": 6329 }, { "epoch": 1.1402837198829092, "grad_norm": 1.3834105730056763, "learning_rate": 3.912656247740639e-05, "loss": 1.2279, "step": 6330 }, { "epoch": 1.1404638594911056, "grad_norm": 1.4585415124893188, "learning_rate": 3.911274031271097e-05, "loss": 1.3839, "step": 6331 }, { "epoch": 1.140643999099302, "grad_norm": 1.326542854309082, "learning_rate": 3.909891902138624e-05, "loss": 1.2692, "step": 6332 }, { "epoch": 1.1408241387074982, "grad_norm": 1.4512786865234375, "learning_rate": 3.908509860454092e-05, "loss": 1.3632, "step": 6333 }, { "epoch": 1.1410042783156946, "grad_norm": 1.4155592918395996, "learning_rate": 3.907127906328367e-05, "loss": 1.4285, "step": 6334 }, { "epoch": 1.141184417923891, "grad_norm": 1.5636345148086548, "learning_rate": 3.9057460398723095e-05, "loss": 1.5357, "step": 6335 }, { "epoch": 1.1413645575320874, "grad_norm": 1.5808826684951782, "learning_rate": 3.904364261196769e-05, "loss": 1.6927, "step": 6336 }, { "epoch": 1.1415446971402838, "grad_norm": 1.5937719345092773, "learning_rate": 3.902982570412596e-05, "loss": 1.4371, "step": 6337 }, { "epoch": 1.14172483674848, "grad_norm": 1.3962398767471313, "learning_rate": 3.9016009676306266e-05, "loss": 1.441, "step": 6338 }, { "epoch": 1.1419049763566764, "grad_norm": 1.26748526096344, "learning_rate": 3.900219452961692e-05, "loss": 1.2156, "step": 6339 }, { "epoch": 1.1420851159648728, "grad_norm": 1.5198264122009277, "learning_rate": 3.898838026516617e-05, "loss": 1.4083, "step": 6340 }, { "epoch": 1.142265255573069, "grad_norm": 1.433039665222168, "learning_rate": 3.8974566884062204e-05, "loss": 1.4679, "step": 6341 }, { "epoch": 1.1424453951812654, "grad_norm": 1.4591983556747437, "learning_rate": 3.8960754387413106e-05, "loss": 1.5379, "step": 6342 }, { "epoch": 1.1426255347894618, "grad_norm": 1.4905363321304321, "learning_rate": 3.894694277632692e-05, "loss": 1.4098, "step": 6343 }, { "epoch": 1.1428056743976582, "grad_norm": 1.4621517658233643, "learning_rate": 3.893313205191159e-05, "loss": 1.5027, "step": 6344 }, { "epoch": 1.1429858140058546, "grad_norm": 1.5439268350601196, "learning_rate": 3.8919322215275015e-05, "loss": 1.4637, "step": 6345 }, { "epoch": 1.1431659536140508, "grad_norm": 1.6386984586715698, "learning_rate": 3.890551326752503e-05, "loss": 1.6819, "step": 6346 }, { "epoch": 1.1433460932222472, "grad_norm": 1.6549303531646729, "learning_rate": 3.8891705209769355e-05, "loss": 1.5999, "step": 6347 }, { "epoch": 1.1435262328304436, "grad_norm": 1.6477750539779663, "learning_rate": 3.887789804311568e-05, "loss": 1.605, "step": 6348 }, { "epoch": 1.14370637243864, "grad_norm": 1.4324318170547485, "learning_rate": 3.886409176867161e-05, "loss": 1.2021, "step": 6349 }, { "epoch": 1.1438865120468362, "grad_norm": 1.399327039718628, "learning_rate": 3.885028638754466e-05, "loss": 1.3076, "step": 6350 }, { "epoch": 1.1440666516550326, "grad_norm": 1.422559142112732, "learning_rate": 3.883648190084232e-05, "loss": 1.4297, "step": 6351 }, { "epoch": 1.144246791263229, "grad_norm": 1.458188533782959, "learning_rate": 3.882267830967197e-05, "loss": 2.1258, "step": 6352 }, { "epoch": 1.1444269308714254, "grad_norm": 1.3275213241577148, "learning_rate": 3.880887561514091e-05, "loss": 1.7881, "step": 6353 }, { "epoch": 1.1446070704796216, "grad_norm": 1.3926016092300415, "learning_rate": 3.879507381835639e-05, "loss": 2.0682, "step": 6354 }, { "epoch": 1.144787210087818, "grad_norm": 1.3775486946105957, "learning_rate": 3.8781272920425605e-05, "loss": 1.8687, "step": 6355 }, { "epoch": 1.1449673496960144, "grad_norm": 1.4278829097747803, "learning_rate": 3.876747292245564e-05, "loss": 1.8642, "step": 6356 }, { "epoch": 1.1451474893042108, "grad_norm": 1.298592209815979, "learning_rate": 3.875367382555352e-05, "loss": 1.7746, "step": 6357 }, { "epoch": 1.145327628912407, "grad_norm": 1.5147939920425415, "learning_rate": 3.873987563082622e-05, "loss": 1.8696, "step": 6358 }, { "epoch": 1.1455077685206034, "grad_norm": 1.5675779581069946, "learning_rate": 3.872607833938058e-05, "loss": 1.76, "step": 6359 }, { "epoch": 1.1456879081287998, "grad_norm": 1.6738656759262085, "learning_rate": 3.871228195232348e-05, "loss": 2.1328, "step": 6360 }, { "epoch": 1.1458680477369962, "grad_norm": 1.5009450912475586, "learning_rate": 3.869848647076161e-05, "loss": 1.6758, "step": 6361 }, { "epoch": 1.1460481873451926, "grad_norm": 1.436381220817566, "learning_rate": 3.8684691895801656e-05, "loss": 1.5397, "step": 6362 }, { "epoch": 1.1462283269533888, "grad_norm": 1.3255953788757324, "learning_rate": 3.867089822855021e-05, "loss": 1.542, "step": 6363 }, { "epoch": 1.1464084665615852, "grad_norm": 1.4282640218734741, "learning_rate": 3.8657105470113755e-05, "loss": 1.6246, "step": 6364 }, { "epoch": 1.1465886061697816, "grad_norm": 1.4649337530136108, "learning_rate": 3.8643313621598807e-05, "loss": 1.5199, "step": 6365 }, { "epoch": 1.1467687457779778, "grad_norm": 1.2402459383010864, "learning_rate": 3.862952268411172e-05, "loss": 1.1631, "step": 6366 }, { "epoch": 1.1469488853861742, "grad_norm": 1.5456969738006592, "learning_rate": 3.8615732658758764e-05, "loss": 1.6976, "step": 6367 }, { "epoch": 1.1471290249943706, "grad_norm": 1.3947255611419678, "learning_rate": 3.8601943546646175e-05, "loss": 1.4114, "step": 6368 }, { "epoch": 1.147309164602567, "grad_norm": 1.5307798385620117, "learning_rate": 3.8588155348880146e-05, "loss": 1.4185, "step": 6369 }, { "epoch": 1.1474893042107634, "grad_norm": 1.4056857824325562, "learning_rate": 3.857436806656673e-05, "loss": 1.5179, "step": 6370 }, { "epoch": 1.1476694438189596, "grad_norm": 1.3568029403686523, "learning_rate": 3.8560581700811946e-05, "loss": 1.3396, "step": 6371 }, { "epoch": 1.147849583427156, "grad_norm": 1.7604305744171143, "learning_rate": 3.854679625272174e-05, "loss": 1.6465, "step": 6372 }, { "epoch": 1.1480297230353524, "grad_norm": 1.3852447271347046, "learning_rate": 3.8533011723401924e-05, "loss": 1.454, "step": 6373 }, { "epoch": 1.1482098626435486, "grad_norm": 1.4612922668457031, "learning_rate": 3.851922811395834e-05, "loss": 1.481, "step": 6374 }, { "epoch": 1.148390002251745, "grad_norm": 1.456812858581543, "learning_rate": 3.850544542549669e-05, "loss": 1.3663, "step": 6375 }, { "epoch": 1.1485701418599414, "grad_norm": 1.247881531715393, "learning_rate": 3.849166365912261e-05, "loss": 1.1812, "step": 6376 }, { "epoch": 1.1487502814681378, "grad_norm": 1.5341315269470215, "learning_rate": 3.8477882815941665e-05, "loss": 1.5675, "step": 6377 }, { "epoch": 1.1489304210763343, "grad_norm": 1.4741030931472778, "learning_rate": 3.846410289705933e-05, "loss": 1.5101, "step": 6378 }, { "epoch": 1.1491105606845304, "grad_norm": 1.4194914102554321, "learning_rate": 3.8450323903581066e-05, "loss": 1.466, "step": 6379 }, { "epoch": 1.1492907002927268, "grad_norm": 1.3781687021255493, "learning_rate": 3.843654583661218e-05, "loss": 1.4657, "step": 6380 }, { "epoch": 1.1494708399009232, "grad_norm": 1.627516746520996, "learning_rate": 3.842276869725796e-05, "loss": 1.6975, "step": 6381 }, { "epoch": 1.1496509795091197, "grad_norm": 1.3546347618103027, "learning_rate": 3.840899248662358e-05, "loss": 1.3582, "step": 6382 }, { "epoch": 1.1498311191173158, "grad_norm": 1.63339102268219, "learning_rate": 3.83952172058142e-05, "loss": 1.6237, "step": 6383 }, { "epoch": 1.1500112587255122, "grad_norm": 1.3516658544540405, "learning_rate": 3.8381442855934836e-05, "loss": 1.2936, "step": 6384 }, { "epoch": 1.1501913983337086, "grad_norm": 1.4490880966186523, "learning_rate": 3.836766943809047e-05, "loss": 1.405, "step": 6385 }, { "epoch": 1.150371537941905, "grad_norm": 1.4267319440841675, "learning_rate": 3.835389695338599e-05, "loss": 1.3181, "step": 6386 }, { "epoch": 1.1505516775501012, "grad_norm": 1.4693578481674194, "learning_rate": 3.834012540292622e-05, "loss": 1.4233, "step": 6387 }, { "epoch": 1.1507318171582976, "grad_norm": 1.3248298168182373, "learning_rate": 3.832635478781592e-05, "loss": 1.2684, "step": 6388 }, { "epoch": 1.150911956766494, "grad_norm": 1.3616430759429932, "learning_rate": 3.831258510915974e-05, "loss": 1.4677, "step": 6389 }, { "epoch": 1.1510920963746905, "grad_norm": 1.4244612455368042, "learning_rate": 3.829881636806231e-05, "loss": 1.4384, "step": 6390 }, { "epoch": 1.1512722359828866, "grad_norm": 1.437246561050415, "learning_rate": 3.828504856562812e-05, "loss": 1.5431, "step": 6391 }, { "epoch": 1.151452375591083, "grad_norm": 1.3519972562789917, "learning_rate": 3.82712817029616e-05, "loss": 1.3013, "step": 6392 }, { "epoch": 1.1516325151992794, "grad_norm": 1.6793372631072998, "learning_rate": 3.8257515781167196e-05, "loss": 1.3241, "step": 6393 }, { "epoch": 1.1518126548074759, "grad_norm": 1.5597165822982788, "learning_rate": 3.824375080134914e-05, "loss": 1.6509, "step": 6394 }, { "epoch": 1.1519927944156723, "grad_norm": 1.557692289352417, "learning_rate": 3.822998676461166e-05, "loss": 1.3878, "step": 6395 }, { "epoch": 1.1521729340238684, "grad_norm": 1.5494650602340698, "learning_rate": 3.8216223672058896e-05, "loss": 1.3849, "step": 6396 }, { "epoch": 1.1523530736320649, "grad_norm": 1.5278879404067993, "learning_rate": 3.8202461524794955e-05, "loss": 1.0978, "step": 6397 }, { "epoch": 1.1525332132402613, "grad_norm": 1.3613522052764893, "learning_rate": 3.81887003239238e-05, "loss": 1.1918, "step": 6398 }, { "epoch": 1.1527133528484574, "grad_norm": 1.652655005455017, "learning_rate": 3.8174940070549356e-05, "loss": 1.5971, "step": 6399 }, { "epoch": 1.1528934924566538, "grad_norm": 1.5285753011703491, "learning_rate": 3.816118076577546e-05, "loss": 1.1683, "step": 6400 }, { "epoch": 1.1530736320648503, "grad_norm": 1.4251883029937744, "learning_rate": 3.8147422410705846e-05, "loss": 1.0841, "step": 6401 }, { "epoch": 1.1532537716730467, "grad_norm": 1.289496660232544, "learning_rate": 3.8133665006444255e-05, "loss": 1.6405, "step": 6402 }, { "epoch": 1.153433911281243, "grad_norm": 1.2119741439819336, "learning_rate": 3.811990855409428e-05, "loss": 1.6039, "step": 6403 }, { "epoch": 1.1536140508894392, "grad_norm": 1.4235894680023193, "learning_rate": 3.8106153054759456e-05, "loss": 1.9263, "step": 6404 }, { "epoch": 1.1537941904976357, "grad_norm": 1.3450112342834473, "learning_rate": 3.8092398509543257e-05, "loss": 1.8604, "step": 6405 }, { "epoch": 1.153974330105832, "grad_norm": 1.3842041492462158, "learning_rate": 3.807864491954901e-05, "loss": 1.8604, "step": 6406 }, { "epoch": 1.1541544697140285, "grad_norm": 1.4505120515823364, "learning_rate": 3.806489228588008e-05, "loss": 1.6408, "step": 6407 }, { "epoch": 1.1543346093222246, "grad_norm": 1.4167990684509277, "learning_rate": 3.805114060963968e-05, "loss": 1.6922, "step": 6408 }, { "epoch": 1.154514748930421, "grad_norm": 1.6952906847000122, "learning_rate": 3.803738989193096e-05, "loss": 1.6504, "step": 6409 }, { "epoch": 1.1546948885386175, "grad_norm": 1.7385038137435913, "learning_rate": 3.8023640133856985e-05, "loss": 1.8393, "step": 6410 }, { "epoch": 1.1548750281468139, "grad_norm": 1.6009682416915894, "learning_rate": 3.8009891336520774e-05, "loss": 1.4323, "step": 6411 }, { "epoch": 1.15505516775501, "grad_norm": 1.4975910186767578, "learning_rate": 3.7996143501025245e-05, "loss": 1.5291, "step": 6412 }, { "epoch": 1.1552353073632065, "grad_norm": 1.3827592134475708, "learning_rate": 3.798239662847323e-05, "loss": 1.2069, "step": 6413 }, { "epoch": 1.1554154469714029, "grad_norm": 1.3769804239273071, "learning_rate": 3.796865071996751e-05, "loss": 1.2456, "step": 6414 }, { "epoch": 1.1555955865795993, "grad_norm": 1.2939969301223755, "learning_rate": 3.795490577661076e-05, "loss": 1.3427, "step": 6415 }, { "epoch": 1.1557757261877954, "grad_norm": 1.6286332607269287, "learning_rate": 3.794116179950561e-05, "loss": 1.4717, "step": 6416 }, { "epoch": 1.1559558657959919, "grad_norm": 1.4103859663009644, "learning_rate": 3.792741878975458e-05, "loss": 1.3882, "step": 6417 }, { "epoch": 1.1561360054041883, "grad_norm": 1.2872146368026733, "learning_rate": 3.791367674846015e-05, "loss": 1.3637, "step": 6418 }, { "epoch": 1.1563161450123847, "grad_norm": 1.3618946075439453, "learning_rate": 3.789993567672467e-05, "loss": 1.4437, "step": 6419 }, { "epoch": 1.156496284620581, "grad_norm": 1.3749947547912598, "learning_rate": 3.788619557565045e-05, "loss": 1.3932, "step": 6420 }, { "epoch": 1.1566764242287773, "grad_norm": 1.3605971336364746, "learning_rate": 3.7872456446339736e-05, "loss": 1.3635, "step": 6421 }, { "epoch": 1.1568565638369737, "grad_norm": 1.3279310464859009, "learning_rate": 3.785871828989465e-05, "loss": 1.356, "step": 6422 }, { "epoch": 1.15703670344517, "grad_norm": 1.3740218877792358, "learning_rate": 3.784498110741727e-05, "loss": 1.4392, "step": 6423 }, { "epoch": 1.1572168430533663, "grad_norm": 1.3229327201843262, "learning_rate": 3.7831244900009564e-05, "loss": 1.2514, "step": 6424 }, { "epoch": 1.1573969826615627, "grad_norm": 1.4733837842941284, "learning_rate": 3.78175096687735e-05, "loss": 1.4161, "step": 6425 }, { "epoch": 1.157577122269759, "grad_norm": 1.3571406602859497, "learning_rate": 3.780377541481087e-05, "loss": 1.1683, "step": 6426 }, { "epoch": 1.1577572618779555, "grad_norm": 1.405378818511963, "learning_rate": 3.779004213922343e-05, "loss": 1.4102, "step": 6427 }, { "epoch": 1.1579374014861519, "grad_norm": 1.3658502101898193, "learning_rate": 3.777630984311287e-05, "loss": 1.3832, "step": 6428 }, { "epoch": 1.158117541094348, "grad_norm": 1.374346375465393, "learning_rate": 3.776257852758074e-05, "loss": 1.3915, "step": 6429 }, { "epoch": 1.1582976807025445, "grad_norm": 1.5733004808425903, "learning_rate": 3.7748848193728644e-05, "loss": 1.6897, "step": 6430 }, { "epoch": 1.1584778203107409, "grad_norm": 1.362378716468811, "learning_rate": 3.7735118842657974e-05, "loss": 1.4484, "step": 6431 }, { "epoch": 1.158657959918937, "grad_norm": 1.5261523723602295, "learning_rate": 3.7721390475470094e-05, "loss": 1.548, "step": 6432 }, { "epoch": 1.1588380995271335, "grad_norm": 1.442342758178711, "learning_rate": 3.770766309326628e-05, "loss": 1.427, "step": 6433 }, { "epoch": 1.1590182391353299, "grad_norm": 1.503861665725708, "learning_rate": 3.7693936697147737e-05, "loss": 1.587, "step": 6434 }, { "epoch": 1.1591983787435263, "grad_norm": 1.4095183610916138, "learning_rate": 3.7680211288215614e-05, "loss": 1.3114, "step": 6435 }, { "epoch": 1.1593785183517227, "grad_norm": 1.5870410203933716, "learning_rate": 3.766648686757094e-05, "loss": 1.4534, "step": 6436 }, { "epoch": 1.1595586579599189, "grad_norm": 1.478755235671997, "learning_rate": 3.7652763436314686e-05, "loss": 1.4959, "step": 6437 }, { "epoch": 1.1597387975681153, "grad_norm": 1.4625827074050903, "learning_rate": 3.7639040995547706e-05, "loss": 1.3592, "step": 6438 }, { "epoch": 1.1599189371763117, "grad_norm": 1.5260828733444214, "learning_rate": 3.762531954637086e-05, "loss": 1.5229, "step": 6439 }, { "epoch": 1.160099076784508, "grad_norm": 1.5431325435638428, "learning_rate": 3.761159908988486e-05, "loss": 1.4521, "step": 6440 }, { "epoch": 1.1602792163927043, "grad_norm": 1.4463703632354736, "learning_rate": 3.7597879627190334e-05, "loss": 1.3324, "step": 6441 }, { "epoch": 1.1604593560009007, "grad_norm": 1.5385984182357788, "learning_rate": 3.7584161159387875e-05, "loss": 1.7039, "step": 6442 }, { "epoch": 1.160639495609097, "grad_norm": 1.502137303352356, "learning_rate": 3.7570443687577944e-05, "loss": 1.4978, "step": 6443 }, { "epoch": 1.1608196352172935, "grad_norm": 1.4563536643981934, "learning_rate": 3.755672721286098e-05, "loss": 1.5419, "step": 6444 }, { "epoch": 1.1609997748254897, "grad_norm": 1.407694697380066, "learning_rate": 3.7543011736337294e-05, "loss": 1.3218, "step": 6445 }, { "epoch": 1.161179914433686, "grad_norm": 1.5893535614013672, "learning_rate": 3.752929725910714e-05, "loss": 1.7216, "step": 6446 }, { "epoch": 1.1613600540418825, "grad_norm": 1.3737194538116455, "learning_rate": 3.751558378227067e-05, "loss": 1.2127, "step": 6447 }, { "epoch": 1.1615401936500789, "grad_norm": 1.387589693069458, "learning_rate": 3.750187130692801e-05, "loss": 1.3567, "step": 6448 }, { "epoch": 1.161720333258275, "grad_norm": 1.58565354347229, "learning_rate": 3.748815983417914e-05, "loss": 1.3706, "step": 6449 }, { "epoch": 1.1619004728664715, "grad_norm": 1.4688928127288818, "learning_rate": 3.747444936512399e-05, "loss": 1.2583, "step": 6450 }, { "epoch": 1.1620806124746679, "grad_norm": 1.393837332725525, "learning_rate": 3.746073990086242e-05, "loss": 1.2619, "step": 6451 }, { "epoch": 1.1622607520828643, "grad_norm": 1.3083038330078125, "learning_rate": 3.744703144249416e-05, "loss": 1.79, "step": 6452 }, { "epoch": 1.1624408916910607, "grad_norm": 1.3120161294937134, "learning_rate": 3.743332399111895e-05, "loss": 1.8202, "step": 6453 }, { "epoch": 1.1626210312992569, "grad_norm": 1.4391287565231323, "learning_rate": 3.741961754783636e-05, "loss": 2.0526, "step": 6454 }, { "epoch": 1.1628011709074533, "grad_norm": 1.375231146812439, "learning_rate": 3.7405912113745933e-05, "loss": 1.9177, "step": 6455 }, { "epoch": 1.1629813105156497, "grad_norm": 1.3305304050445557, "learning_rate": 3.739220768994709e-05, "loss": 1.6191, "step": 6456 }, { "epoch": 1.1631614501238459, "grad_norm": 1.34432852268219, "learning_rate": 3.737850427753921e-05, "loss": 1.7354, "step": 6457 }, { "epoch": 1.1633415897320423, "grad_norm": 1.469430923461914, "learning_rate": 3.7364801877621565e-05, "loss": 1.7996, "step": 6458 }, { "epoch": 1.1635217293402387, "grad_norm": 1.4734642505645752, "learning_rate": 3.735110049129336e-05, "loss": 1.8147, "step": 6459 }, { "epoch": 1.163701868948435, "grad_norm": 1.6314880847930908, "learning_rate": 3.733740011965372e-05, "loss": 1.5816, "step": 6460 }, { "epoch": 1.1638820085566315, "grad_norm": 1.8882904052734375, "learning_rate": 3.732370076380165e-05, "loss": 2.0899, "step": 6461 }, { "epoch": 1.1640621481648277, "grad_norm": 1.7348037958145142, "learning_rate": 3.731000242483615e-05, "loss": 1.6417, "step": 6462 }, { "epoch": 1.164242287773024, "grad_norm": 1.3300294876098633, "learning_rate": 3.7296305103856076e-05, "loss": 1.3842, "step": 6463 }, { "epoch": 1.1644224273812205, "grad_norm": 1.3609557151794434, "learning_rate": 3.7282608801960227e-05, "loss": 1.6424, "step": 6464 }, { "epoch": 1.1646025669894169, "grad_norm": 1.4475817680358887, "learning_rate": 3.72689135202473e-05, "loss": 1.6811, "step": 6465 }, { "epoch": 1.164782706597613, "grad_norm": 1.361619472503662, "learning_rate": 3.72552192598159e-05, "loss": 1.2482, "step": 6466 }, { "epoch": 1.1649628462058095, "grad_norm": 1.3050634860992432, "learning_rate": 3.724152602176463e-05, "loss": 1.2937, "step": 6467 }, { "epoch": 1.1651429858140059, "grad_norm": 1.3399953842163086, "learning_rate": 3.7227833807191926e-05, "loss": 1.1783, "step": 6468 }, { "epoch": 1.1653231254222023, "grad_norm": 1.4773786067962646, "learning_rate": 3.7214142617196176e-05, "loss": 1.2402, "step": 6469 }, { "epoch": 1.1655032650303985, "grad_norm": 1.4315987825393677, "learning_rate": 3.720045245287569e-05, "loss": 1.4231, "step": 6470 }, { "epoch": 1.1656834046385949, "grad_norm": 1.4008889198303223, "learning_rate": 3.7186763315328635e-05, "loss": 1.3352, "step": 6471 }, { "epoch": 1.1658635442467913, "grad_norm": 1.4410016536712646, "learning_rate": 3.7173075205653206e-05, "loss": 1.5302, "step": 6472 }, { "epoch": 1.1660436838549877, "grad_norm": 1.399985909461975, "learning_rate": 3.7159388124947445e-05, "loss": 1.4895, "step": 6473 }, { "epoch": 1.1662238234631839, "grad_norm": 1.3843098878860474, "learning_rate": 3.714570207430931e-05, "loss": 1.4967, "step": 6474 }, { "epoch": 1.1664039630713803, "grad_norm": 1.3149001598358154, "learning_rate": 3.713201705483668e-05, "loss": 1.304, "step": 6475 }, { "epoch": 1.1665841026795767, "grad_norm": 1.4783203601837158, "learning_rate": 3.7118333067627385e-05, "loss": 1.5237, "step": 6476 }, { "epoch": 1.166764242287773, "grad_norm": 1.3466395139694214, "learning_rate": 3.7104650113779145e-05, "loss": 1.46, "step": 6477 }, { "epoch": 1.1669443818959695, "grad_norm": 1.4924614429473877, "learning_rate": 3.709096819438958e-05, "loss": 1.3535, "step": 6478 }, { "epoch": 1.1671245215041657, "grad_norm": 1.5412014722824097, "learning_rate": 3.707728731055627e-05, "loss": 1.5989, "step": 6479 }, { "epoch": 1.167304661112362, "grad_norm": 1.3754088878631592, "learning_rate": 3.706360746337666e-05, "loss": 1.3466, "step": 6480 }, { "epoch": 1.1674848007205585, "grad_norm": 1.4865368604660034, "learning_rate": 3.704992865394817e-05, "loss": 1.3704, "step": 6481 }, { "epoch": 1.1676649403287547, "grad_norm": 1.5877918004989624, "learning_rate": 3.703625088336809e-05, "loss": 1.5244, "step": 6482 }, { "epoch": 1.167845079936951, "grad_norm": 1.6087439060211182, "learning_rate": 3.702257415273366e-05, "loss": 1.8559, "step": 6483 }, { "epoch": 1.1680252195451475, "grad_norm": 1.4541953802108765, "learning_rate": 3.700889846314201e-05, "loss": 1.363, "step": 6484 }, { "epoch": 1.168205359153344, "grad_norm": 1.4501092433929443, "learning_rate": 3.6995223815690184e-05, "loss": 1.5096, "step": 6485 }, { "epoch": 1.1683854987615403, "grad_norm": 1.5299330949783325, "learning_rate": 3.698155021147518e-05, "loss": 1.6382, "step": 6486 }, { "epoch": 1.1685656383697365, "grad_norm": 1.4811174869537354, "learning_rate": 3.696787765159388e-05, "loss": 1.4464, "step": 6487 }, { "epoch": 1.1687457779779329, "grad_norm": 1.4445598125457764, "learning_rate": 3.695420613714308e-05, "loss": 1.3327, "step": 6488 }, { "epoch": 1.1689259175861293, "grad_norm": 1.3719395399093628, "learning_rate": 3.694053566921949e-05, "loss": 1.3368, "step": 6489 }, { "epoch": 1.1691060571943257, "grad_norm": 1.5457484722137451, "learning_rate": 3.69268662489198e-05, "loss": 1.5565, "step": 6490 }, { "epoch": 1.1692861968025219, "grad_norm": 1.4242305755615234, "learning_rate": 3.691319787734052e-05, "loss": 1.2629, "step": 6491 }, { "epoch": 1.1694663364107183, "grad_norm": 1.4743695259094238, "learning_rate": 3.689953055557813e-05, "loss": 1.4033, "step": 6492 }, { "epoch": 1.1696464760189147, "grad_norm": 1.5519685745239258, "learning_rate": 3.688586428472902e-05, "loss": 1.4642, "step": 6493 }, { "epoch": 1.169826615627111, "grad_norm": 1.5706673860549927, "learning_rate": 3.687219906588947e-05, "loss": 1.6092, "step": 6494 }, { "epoch": 1.1700067552353073, "grad_norm": 1.5480272769927979, "learning_rate": 3.6858534900155736e-05, "loss": 1.3821, "step": 6495 }, { "epoch": 1.1701868948435037, "grad_norm": 1.4981380701065063, "learning_rate": 3.6844871788623945e-05, "loss": 1.5754, "step": 6496 }, { "epoch": 1.1703670344517, "grad_norm": 1.4690871238708496, "learning_rate": 3.6831209732390126e-05, "loss": 1.3932, "step": 6497 }, { "epoch": 1.1705471740598965, "grad_norm": 1.5345977544784546, "learning_rate": 3.681754873255023e-05, "loss": 1.4397, "step": 6498 }, { "epoch": 1.1707273136680927, "grad_norm": 1.3727781772613525, "learning_rate": 3.680388879020015e-05, "loss": 1.215, "step": 6499 }, { "epoch": 1.170907453276289, "grad_norm": 1.418428659439087, "learning_rate": 3.6790229906435705e-05, "loss": 1.4243, "step": 6500 }, { "epoch": 1.1710875928844855, "grad_norm": 1.4023103713989258, "learning_rate": 3.677657208235257e-05, "loss": 1.2126, "step": 6501 }, { "epoch": 1.171267732492682, "grad_norm": 1.33457612991333, "learning_rate": 3.67629153190464e-05, "loss": 1.8205, "step": 6502 }, { "epoch": 1.171447872100878, "grad_norm": 1.3318908214569092, "learning_rate": 3.674925961761268e-05, "loss": 1.7812, "step": 6503 }, { "epoch": 1.1716280117090745, "grad_norm": 1.2840074300765991, "learning_rate": 3.673560497914692e-05, "loss": 1.6893, "step": 6504 }, { "epoch": 1.171808151317271, "grad_norm": 1.3347468376159668, "learning_rate": 3.672195140474447e-05, "loss": 1.6688, "step": 6505 }, { "epoch": 1.1719882909254673, "grad_norm": 1.3650994300842285, "learning_rate": 3.6708298895500616e-05, "loss": 1.9315, "step": 6506 }, { "epoch": 1.1721684305336635, "grad_norm": 1.4201208353042603, "learning_rate": 3.669464745251054e-05, "loss": 1.9089, "step": 6507 }, { "epoch": 1.17234857014186, "grad_norm": 1.414838433265686, "learning_rate": 3.668099707686936e-05, "loss": 1.8584, "step": 6508 }, { "epoch": 1.1725287097500563, "grad_norm": 1.4731675386428833, "learning_rate": 3.666734776967212e-05, "loss": 1.9729, "step": 6509 }, { "epoch": 1.1727088493582527, "grad_norm": 1.6170134544372559, "learning_rate": 3.6653699532013745e-05, "loss": 1.7726, "step": 6510 }, { "epoch": 1.172888988966449, "grad_norm": 1.7776848077774048, "learning_rate": 3.6640052364989096e-05, "loss": 1.8517, "step": 6511 }, { "epoch": 1.1730691285746453, "grad_norm": 1.449196696281433, "learning_rate": 3.6626406269692934e-05, "loss": 1.7625, "step": 6512 }, { "epoch": 1.1732492681828417, "grad_norm": 1.324354887008667, "learning_rate": 3.661276124721994e-05, "loss": 1.3314, "step": 6513 }, { "epoch": 1.173429407791038, "grad_norm": 1.3754370212554932, "learning_rate": 3.6599117298664725e-05, "loss": 1.4978, "step": 6514 }, { "epoch": 1.1736095473992343, "grad_norm": 1.4485403299331665, "learning_rate": 3.658547442512179e-05, "loss": 1.4981, "step": 6515 }, { "epoch": 1.1737896870074307, "grad_norm": 1.3009345531463623, "learning_rate": 3.657183262768557e-05, "loss": 1.3513, "step": 6516 }, { "epoch": 1.173969826615627, "grad_norm": 1.4406200647354126, "learning_rate": 3.6558191907450385e-05, "loss": 1.3956, "step": 6517 }, { "epoch": 1.1741499662238235, "grad_norm": 1.3275244235992432, "learning_rate": 3.6544552265510505e-05, "loss": 1.3103, "step": 6518 }, { "epoch": 1.17433010583202, "grad_norm": 1.371840476989746, "learning_rate": 3.653091370296009e-05, "loss": 1.547, "step": 6519 }, { "epoch": 1.174510245440216, "grad_norm": 1.2835233211517334, "learning_rate": 3.6517276220893214e-05, "loss": 1.4501, "step": 6520 }, { "epoch": 1.1746903850484125, "grad_norm": 1.268537998199463, "learning_rate": 3.650363982040388e-05, "loss": 1.3726, "step": 6521 }, { "epoch": 1.174870524656609, "grad_norm": 1.2959752082824707, "learning_rate": 3.6490004502585965e-05, "loss": 1.3328, "step": 6522 }, { "epoch": 1.1750506642648053, "grad_norm": 1.4207144975662231, "learning_rate": 3.647637026853333e-05, "loss": 1.3504, "step": 6523 }, { "epoch": 1.1752308038730015, "grad_norm": 1.4284688234329224, "learning_rate": 3.646273711933967e-05, "loss": 1.4336, "step": 6524 }, { "epoch": 1.175410943481198, "grad_norm": 1.2378349304199219, "learning_rate": 3.644910505609866e-05, "loss": 1.122, "step": 6525 }, { "epoch": 1.1755910830893943, "grad_norm": 1.335356593132019, "learning_rate": 3.643547407990384e-05, "loss": 1.3188, "step": 6526 }, { "epoch": 1.1757712226975907, "grad_norm": 1.39003324508667, "learning_rate": 3.642184419184865e-05, "loss": 1.5439, "step": 6527 }, { "epoch": 1.175951362305787, "grad_norm": 1.451756477355957, "learning_rate": 3.640821539302654e-05, "loss": 1.4459, "step": 6528 }, { "epoch": 1.1761315019139833, "grad_norm": 1.557690978050232, "learning_rate": 3.639458768453077e-05, "loss": 1.5089, "step": 6529 }, { "epoch": 1.1763116415221797, "grad_norm": 1.441338062286377, "learning_rate": 3.638096106745454e-05, "loss": 1.299, "step": 6530 }, { "epoch": 1.1764917811303761, "grad_norm": 1.384159803390503, "learning_rate": 3.636733554289097e-05, "loss": 1.3679, "step": 6531 }, { "epoch": 1.1766719207385723, "grad_norm": 1.4989361763000488, "learning_rate": 3.635371111193311e-05, "loss": 1.522, "step": 6532 }, { "epoch": 1.1768520603467687, "grad_norm": 1.581574559211731, "learning_rate": 3.6340087775673903e-05, "loss": 1.474, "step": 6533 }, { "epoch": 1.177032199954965, "grad_norm": 1.382921814918518, "learning_rate": 3.63264655352062e-05, "loss": 1.5329, "step": 6534 }, { "epoch": 1.1772123395631615, "grad_norm": 1.5133802890777588, "learning_rate": 3.631284439162278e-05, "loss": 1.4656, "step": 6535 }, { "epoch": 1.177392479171358, "grad_norm": 1.3116607666015625, "learning_rate": 3.629922434601629e-05, "loss": 1.2995, "step": 6536 }, { "epoch": 1.177572618779554, "grad_norm": 1.3999848365783691, "learning_rate": 3.628560539947937e-05, "loss": 1.2268, "step": 6537 }, { "epoch": 1.1777527583877505, "grad_norm": 1.499196171760559, "learning_rate": 3.6271987553104504e-05, "loss": 1.3645, "step": 6538 }, { "epoch": 1.177932897995947, "grad_norm": 1.4488658905029297, "learning_rate": 3.625837080798411e-05, "loss": 1.5458, "step": 6539 }, { "epoch": 1.178113037604143, "grad_norm": 1.327940583229065, "learning_rate": 3.624475516521051e-05, "loss": 1.2288, "step": 6540 }, { "epoch": 1.1782931772123395, "grad_norm": 1.5767016410827637, "learning_rate": 3.623114062587595e-05, "loss": 1.4652, "step": 6541 }, { "epoch": 1.178473316820536, "grad_norm": 1.592172622680664, "learning_rate": 3.6217527191072585e-05, "loss": 1.7449, "step": 6542 }, { "epoch": 1.1786534564287323, "grad_norm": 1.4359644651412964, "learning_rate": 3.620391486189248e-05, "loss": 1.4, "step": 6543 }, { "epoch": 1.1788335960369287, "grad_norm": 1.545168161392212, "learning_rate": 3.6190303639427604e-05, "loss": 1.4624, "step": 6544 }, { "epoch": 1.179013735645125, "grad_norm": 1.4305164813995361, "learning_rate": 3.617669352476983e-05, "loss": 1.4609, "step": 6545 }, { "epoch": 1.1791938752533213, "grad_norm": 1.4097996950149536, "learning_rate": 3.6163084519010984e-05, "loss": 1.2905, "step": 6546 }, { "epoch": 1.1793740148615177, "grad_norm": 1.4933857917785645, "learning_rate": 3.6149476623242754e-05, "loss": 1.2906, "step": 6547 }, { "epoch": 1.1795541544697141, "grad_norm": 1.5967057943344116, "learning_rate": 3.6135869838556766e-05, "loss": 1.4725, "step": 6548 }, { "epoch": 1.1797342940779103, "grad_norm": 1.6981664896011353, "learning_rate": 3.612226416604455e-05, "loss": 1.7249, "step": 6549 }, { "epoch": 1.1799144336861067, "grad_norm": 1.554762363433838, "learning_rate": 3.610865960679752e-05, "loss": 1.2583, "step": 6550 }, { "epoch": 1.1800945732943031, "grad_norm": 1.509871244430542, "learning_rate": 3.6095056161907074e-05, "loss": 1.1946, "step": 6551 }, { "epoch": 1.1802747129024995, "grad_norm": 1.244280219078064, "learning_rate": 3.608145383246444e-05, "loss": 1.7441, "step": 6552 }, { "epoch": 1.1804548525106957, "grad_norm": 1.366241216659546, "learning_rate": 3.606785261956081e-05, "loss": 1.9998, "step": 6553 }, { "epoch": 1.1806349921188921, "grad_norm": 1.330600619316101, "learning_rate": 3.605425252428725e-05, "loss": 1.9065, "step": 6554 }, { "epoch": 1.1808151317270885, "grad_norm": 1.2577922344207764, "learning_rate": 3.604065354773475e-05, "loss": 1.893, "step": 6555 }, { "epoch": 1.180995271335285, "grad_norm": 1.3520675897598267, "learning_rate": 3.6027055690994235e-05, "loss": 2.0387, "step": 6556 }, { "epoch": 1.181175410943481, "grad_norm": 1.5209952592849731, "learning_rate": 3.601345895515651e-05, "loss": 1.8802, "step": 6557 }, { "epoch": 1.1813555505516775, "grad_norm": 1.3552844524383545, "learning_rate": 3.599986334131229e-05, "loss": 1.4301, "step": 6558 }, { "epoch": 1.181535690159874, "grad_norm": 1.4343005418777466, "learning_rate": 3.598626885055219e-05, "loss": 1.8323, "step": 6559 }, { "epoch": 1.1817158297680703, "grad_norm": 1.4352056980133057, "learning_rate": 3.59726754839668e-05, "loss": 1.8075, "step": 6560 }, { "epoch": 1.1818959693762667, "grad_norm": 1.5641226768493652, "learning_rate": 3.595908324264656e-05, "loss": 1.7918, "step": 6561 }, { "epoch": 1.182076108984463, "grad_norm": 1.930543065071106, "learning_rate": 3.5945492127681816e-05, "loss": 2.145, "step": 6562 }, { "epoch": 1.1822562485926593, "grad_norm": 1.615303635597229, "learning_rate": 3.593190214016285e-05, "loss": 1.8811, "step": 6563 }, { "epoch": 1.1824363882008557, "grad_norm": 1.411547303199768, "learning_rate": 3.591831328117982e-05, "loss": 1.4036, "step": 6564 }, { "epoch": 1.182616527809052, "grad_norm": 1.3096319437026978, "learning_rate": 3.590472555182285e-05, "loss": 1.3405, "step": 6565 }, { "epoch": 1.1827966674172483, "grad_norm": 1.263176679611206, "learning_rate": 3.589113895318194e-05, "loss": 1.252, "step": 6566 }, { "epoch": 1.1829768070254447, "grad_norm": 1.35726797580719, "learning_rate": 3.5877553486347e-05, "loss": 1.5132, "step": 6567 }, { "epoch": 1.1831569466336411, "grad_norm": 1.3140900135040283, "learning_rate": 3.586396915240781e-05, "loss": 1.3644, "step": 6568 }, { "epoch": 1.1833370862418375, "grad_norm": 1.448294758796692, "learning_rate": 3.585038595245415e-05, "loss": 1.4053, "step": 6569 }, { "epoch": 1.1835172258500337, "grad_norm": 1.3514827489852905, "learning_rate": 3.5836803887575634e-05, "loss": 1.4455, "step": 6570 }, { "epoch": 1.1836973654582301, "grad_norm": 1.4190112352371216, "learning_rate": 3.5823222958861814e-05, "loss": 1.4431, "step": 6571 }, { "epoch": 1.1838775050664265, "grad_norm": 1.328613519668579, "learning_rate": 3.5809643167402145e-05, "loss": 1.4305, "step": 6572 }, { "epoch": 1.1840576446746227, "grad_norm": 1.4426491260528564, "learning_rate": 3.579606451428598e-05, "loss": 1.5225, "step": 6573 }, { "epoch": 1.1842377842828191, "grad_norm": 1.3465397357940674, "learning_rate": 3.57824870006026e-05, "loss": 1.3794, "step": 6574 }, { "epoch": 1.1844179238910155, "grad_norm": 1.4426008462905884, "learning_rate": 3.57689106274412e-05, "loss": 1.5571, "step": 6575 }, { "epoch": 1.184598063499212, "grad_norm": 1.393587350845337, "learning_rate": 3.5755335395890855e-05, "loss": 1.3809, "step": 6576 }, { "epoch": 1.1847782031074083, "grad_norm": 1.320299744606018, "learning_rate": 3.5741761307040564e-05, "loss": 1.3174, "step": 6577 }, { "epoch": 1.1849583427156045, "grad_norm": 1.3964899778366089, "learning_rate": 3.572818836197923e-05, "loss": 1.4343, "step": 6578 }, { "epoch": 1.185138482323801, "grad_norm": 1.4703927040100098, "learning_rate": 3.5714616561795675e-05, "loss": 1.4531, "step": 6579 }, { "epoch": 1.1853186219319973, "grad_norm": 1.4955027103424072, "learning_rate": 3.570104590757863e-05, "loss": 1.3522, "step": 6580 }, { "epoch": 1.1854987615401937, "grad_norm": 1.3167777061462402, "learning_rate": 3.5687476400416714e-05, "loss": 1.3425, "step": 6581 }, { "epoch": 1.18567890114839, "grad_norm": 1.4521174430847168, "learning_rate": 3.5673908041398464e-05, "loss": 1.2777, "step": 6582 }, { "epoch": 1.1858590407565863, "grad_norm": 1.509108543395996, "learning_rate": 3.566034083161234e-05, "loss": 1.5353, "step": 6583 }, { "epoch": 1.1860391803647827, "grad_norm": 1.5208263397216797, "learning_rate": 3.5646774772146684e-05, "loss": 1.6614, "step": 6584 }, { "epoch": 1.1862193199729791, "grad_norm": 1.4845576286315918, "learning_rate": 3.5633209864089766e-05, "loss": 1.3558, "step": 6585 }, { "epoch": 1.1863994595811753, "grad_norm": 1.5247132778167725, "learning_rate": 3.561964610852976e-05, "loss": 1.3456, "step": 6586 }, { "epoch": 1.1865795991893717, "grad_norm": 1.523598551750183, "learning_rate": 3.560608350655473e-05, "loss": 1.6709, "step": 6587 }, { "epoch": 1.1867597387975681, "grad_norm": 1.4654392004013062, "learning_rate": 3.5592522059252674e-05, "loss": 1.519, "step": 6588 }, { "epoch": 1.1869398784057645, "grad_norm": 1.4512569904327393, "learning_rate": 3.557896176771149e-05, "loss": 1.2494, "step": 6589 }, { "epoch": 1.1871200180139607, "grad_norm": 1.5786213874816895, "learning_rate": 3.556540263301896e-05, "loss": 1.5712, "step": 6590 }, { "epoch": 1.1873001576221571, "grad_norm": 1.3032244443893433, "learning_rate": 3.55518446562628e-05, "loss": 1.2322, "step": 6591 }, { "epoch": 1.1874802972303535, "grad_norm": 1.6659175157546997, "learning_rate": 3.553828783853061e-05, "loss": 1.582, "step": 6592 }, { "epoch": 1.18766043683855, "grad_norm": 1.2859901189804077, "learning_rate": 3.552473218090995e-05, "loss": 1.1052, "step": 6593 }, { "epoch": 1.1878405764467463, "grad_norm": 1.5602586269378662, "learning_rate": 3.5511177684488216e-05, "loss": 1.5606, "step": 6594 }, { "epoch": 1.1880207160549425, "grad_norm": 1.5847281217575073, "learning_rate": 3.549762435035275e-05, "loss": 1.3735, "step": 6595 }, { "epoch": 1.188200855663139, "grad_norm": 1.463122844696045, "learning_rate": 3.5484072179590776e-05, "loss": 1.4242, "step": 6596 }, { "epoch": 1.1883809952713353, "grad_norm": 1.4870685338974, "learning_rate": 3.5470521173289486e-05, "loss": 1.4618, "step": 6597 }, { "epoch": 1.1885611348795315, "grad_norm": 1.5126922130584717, "learning_rate": 3.54569713325359e-05, "loss": 1.4509, "step": 6598 }, { "epoch": 1.188741274487728, "grad_norm": 1.701738953590393, "learning_rate": 3.544342265841701e-05, "loss": 1.4731, "step": 6599 }, { "epoch": 1.1889214140959243, "grad_norm": 1.4555048942565918, "learning_rate": 3.5429875152019655e-05, "loss": 1.275, "step": 6600 }, { "epoch": 1.1891015537041207, "grad_norm": 1.3295918703079224, "learning_rate": 3.5416328814430586e-05, "loss": 1.0307, "step": 6601 }, { "epoch": 1.1892816933123171, "grad_norm": 1.3476378917694092, "learning_rate": 3.540278364673655e-05, "loss": 1.7332, "step": 6602 }, { "epoch": 1.1894618329205133, "grad_norm": 1.3140027523040771, "learning_rate": 3.538923965002409e-05, "loss": 1.9636, "step": 6603 }, { "epoch": 1.1896419725287097, "grad_norm": 1.2953075170516968, "learning_rate": 3.537569682537971e-05, "loss": 1.8666, "step": 6604 }, { "epoch": 1.1898221121369061, "grad_norm": 1.4390342235565186, "learning_rate": 3.536215517388981e-05, "loss": 1.8716, "step": 6605 }, { "epoch": 1.1900022517451025, "grad_norm": 1.352335810661316, "learning_rate": 3.534861469664068e-05, "loss": 1.8955, "step": 6606 }, { "epoch": 1.1901823913532987, "grad_norm": 1.4060144424438477, "learning_rate": 3.533507539471856e-05, "loss": 1.5629, "step": 6607 }, { "epoch": 1.1903625309614951, "grad_norm": 1.4898676872253418, "learning_rate": 3.532153726920954e-05, "loss": 1.9746, "step": 6608 }, { "epoch": 1.1905426705696915, "grad_norm": 1.6041971445083618, "learning_rate": 3.530800032119965e-05, "loss": 1.9493, "step": 6609 }, { "epoch": 1.190722810177888, "grad_norm": 1.5663259029388428, "learning_rate": 3.529446455177481e-05, "loss": 1.8175, "step": 6610 }, { "epoch": 1.1909029497860841, "grad_norm": 1.8397724628448486, "learning_rate": 3.5280929962020884e-05, "loss": 2.1634, "step": 6611 }, { "epoch": 1.1910830893942805, "grad_norm": 1.4266645908355713, "learning_rate": 3.5267396553023565e-05, "loss": 1.5405, "step": 6612 }, { "epoch": 1.191263229002477, "grad_norm": 1.4743822813034058, "learning_rate": 3.5253864325868536e-05, "loss": 1.7425, "step": 6613 }, { "epoch": 1.1914433686106733, "grad_norm": 1.421815037727356, "learning_rate": 3.524033328164132e-05, "loss": 1.5163, "step": 6614 }, { "epoch": 1.1916235082188695, "grad_norm": 1.3058090209960938, "learning_rate": 3.5226803421427366e-05, "loss": 1.3694, "step": 6615 }, { "epoch": 1.191803647827066, "grad_norm": 1.467496395111084, "learning_rate": 3.521327474631206e-05, "loss": 1.6727, "step": 6616 }, { "epoch": 1.1919837874352623, "grad_norm": 1.3814575672149658, "learning_rate": 3.5199747257380654e-05, "loss": 1.3561, "step": 6617 }, { "epoch": 1.1921639270434587, "grad_norm": 1.4303704500198364, "learning_rate": 3.5186220955718306e-05, "loss": 1.5032, "step": 6618 }, { "epoch": 1.1923440666516552, "grad_norm": 1.372300148010254, "learning_rate": 3.517269584241011e-05, "loss": 1.606, "step": 6619 }, { "epoch": 1.1925242062598513, "grad_norm": 1.3018789291381836, "learning_rate": 3.5159171918541015e-05, "loss": 1.3698, "step": 6620 }, { "epoch": 1.1927043458680477, "grad_norm": 1.3748687505722046, "learning_rate": 3.5145649185195926e-05, "loss": 1.5707, "step": 6621 }, { "epoch": 1.1928844854762441, "grad_norm": 1.305811882019043, "learning_rate": 3.5132127643459626e-05, "loss": 1.404, "step": 6622 }, { "epoch": 1.1930646250844403, "grad_norm": 1.4802151918411255, "learning_rate": 3.5118607294416806e-05, "loss": 1.4995, "step": 6623 }, { "epoch": 1.1932447646926367, "grad_norm": 1.418360710144043, "learning_rate": 3.510508813915204e-05, "loss": 1.4648, "step": 6624 }, { "epoch": 1.1934249043008331, "grad_norm": 1.2332953214645386, "learning_rate": 3.5091570178749863e-05, "loss": 1.1852, "step": 6625 }, { "epoch": 1.1936050439090296, "grad_norm": 1.437515377998352, "learning_rate": 3.507805341429469e-05, "loss": 1.5245, "step": 6626 }, { "epoch": 1.193785183517226, "grad_norm": 1.522303819656372, "learning_rate": 3.506453784687078e-05, "loss": 1.3664, "step": 6627 }, { "epoch": 1.1939653231254221, "grad_norm": 1.4197916984558105, "learning_rate": 3.505102347756237e-05, "loss": 1.4164, "step": 6628 }, { "epoch": 1.1941454627336185, "grad_norm": 1.382589340209961, "learning_rate": 3.503751030745356e-05, "loss": 1.3275, "step": 6629 }, { "epoch": 1.194325602341815, "grad_norm": 1.3801850080490112, "learning_rate": 3.502399833762841e-05, "loss": 1.2015, "step": 6630 }, { "epoch": 1.1945057419500111, "grad_norm": 1.4676933288574219, "learning_rate": 3.5010487569170806e-05, "loss": 1.7089, "step": 6631 }, { "epoch": 1.1946858815582075, "grad_norm": 1.4051316976547241, "learning_rate": 3.499697800316461e-05, "loss": 1.3133, "step": 6632 }, { "epoch": 1.194866021166404, "grad_norm": 1.3830493688583374, "learning_rate": 3.498346964069351e-05, "loss": 1.1997, "step": 6633 }, { "epoch": 1.1950461607746004, "grad_norm": 1.3955798149108887, "learning_rate": 3.4969962482841145e-05, "loss": 1.3241, "step": 6634 }, { "epoch": 1.1952263003827968, "grad_norm": 1.3250913619995117, "learning_rate": 3.4956456530691094e-05, "loss": 1.3055, "step": 6635 }, { "epoch": 1.195406439990993, "grad_norm": 1.4945846796035767, "learning_rate": 3.494295178532677e-05, "loss": 1.4199, "step": 6636 }, { "epoch": 1.1955865795991893, "grad_norm": 1.5242571830749512, "learning_rate": 3.492944824783152e-05, "loss": 1.4864, "step": 6637 }, { "epoch": 1.1957667192073858, "grad_norm": 1.3969535827636719, "learning_rate": 3.4915945919288576e-05, "loss": 1.2787, "step": 6638 }, { "epoch": 1.1959468588155822, "grad_norm": 1.3807302713394165, "learning_rate": 3.490244480078112e-05, "loss": 1.379, "step": 6639 }, { "epoch": 1.1961269984237783, "grad_norm": 1.5274091958999634, "learning_rate": 3.488894489339218e-05, "loss": 1.621, "step": 6640 }, { "epoch": 1.1963071380319747, "grad_norm": 1.5574896335601807, "learning_rate": 3.487544619820473e-05, "loss": 1.4443, "step": 6641 }, { "epoch": 1.1964872776401712, "grad_norm": 1.5202059745788574, "learning_rate": 3.4861948716301615e-05, "loss": 1.4528, "step": 6642 }, { "epoch": 1.1966674172483676, "grad_norm": 1.5719102621078491, "learning_rate": 3.484845244876559e-05, "loss": 1.2942, "step": 6643 }, { "epoch": 1.1968475568565637, "grad_norm": 1.289324402809143, "learning_rate": 3.4834957396679336e-05, "loss": 1.2822, "step": 6644 }, { "epoch": 1.1970276964647601, "grad_norm": 1.6651968955993652, "learning_rate": 3.482146356112542e-05, "loss": 1.7346, "step": 6645 }, { "epoch": 1.1972078360729566, "grad_norm": 1.5542699098587036, "learning_rate": 3.48079709431863e-05, "loss": 1.5366, "step": 6646 }, { "epoch": 1.197387975681153, "grad_norm": 1.4728922843933105, "learning_rate": 3.4794479543944355e-05, "loss": 1.3818, "step": 6647 }, { "epoch": 1.1975681152893491, "grad_norm": 1.511291265487671, "learning_rate": 3.4780989364481836e-05, "loss": 1.2833, "step": 6648 }, { "epoch": 1.1977482548975456, "grad_norm": 1.5342804193496704, "learning_rate": 3.476750040588095e-05, "loss": 1.485, "step": 6649 }, { "epoch": 1.197928394505742, "grad_norm": 1.5278414487838745, "learning_rate": 3.475401266922377e-05, "loss": 1.3269, "step": 6650 }, { "epoch": 1.1981085341139384, "grad_norm": 1.4570335149765015, "learning_rate": 3.474052615559226e-05, "loss": 1.2613, "step": 6651 }, { "epoch": 1.1982886737221348, "grad_norm": 1.3100883960723877, "learning_rate": 3.47270408660683e-05, "loss": 1.5977, "step": 6652 }, { "epoch": 1.198468813330331, "grad_norm": 1.361167311668396, "learning_rate": 3.471355680173369e-05, "loss": 1.8322, "step": 6653 }, { "epoch": 1.1986489529385274, "grad_norm": 1.3418521881103516, "learning_rate": 3.470007396367012e-05, "loss": 1.8426, "step": 6654 }, { "epoch": 1.1988290925467238, "grad_norm": 1.4249584674835205, "learning_rate": 3.468659235295916e-05, "loss": 2.033, "step": 6655 }, { "epoch": 1.19900923215492, "grad_norm": 1.7422164678573608, "learning_rate": 3.467311197068229e-05, "loss": 1.9195, "step": 6656 }, { "epoch": 1.1991893717631164, "grad_norm": 1.4534685611724854, "learning_rate": 3.46596328179209e-05, "loss": 1.872, "step": 6657 }, { "epoch": 1.1993695113713128, "grad_norm": 1.381417155265808, "learning_rate": 3.464615489575632e-05, "loss": 1.7978, "step": 6658 }, { "epoch": 1.1995496509795092, "grad_norm": 1.6449260711669922, "learning_rate": 3.463267820526972e-05, "loss": 1.695, "step": 6659 }, { "epoch": 1.1997297905877056, "grad_norm": 1.7877861261367798, "learning_rate": 3.461920274754218e-05, "loss": 1.8696, "step": 6660 }, { "epoch": 1.1999099301959018, "grad_norm": 1.7133687734603882, "learning_rate": 3.4605728523654704e-05, "loss": 1.9319, "step": 6661 }, { "epoch": 1.2000900698040982, "grad_norm": 1.4877243041992188, "learning_rate": 3.459225553468815e-05, "loss": 1.5891, "step": 6662 }, { "epoch": 1.2002702094122946, "grad_norm": 1.4385182857513428, "learning_rate": 3.45787837817234e-05, "loss": 1.5529, "step": 6663 }, { "epoch": 1.200450349020491, "grad_norm": 1.4236807823181152, "learning_rate": 3.45653132658411e-05, "loss": 1.5168, "step": 6664 }, { "epoch": 1.2006304886286872, "grad_norm": 1.3317898511886597, "learning_rate": 3.4551843988121844e-05, "loss": 1.4586, "step": 6665 }, { "epoch": 1.2008106282368836, "grad_norm": 1.3173550367355347, "learning_rate": 3.4538375949646116e-05, "loss": 1.4256, "step": 6666 }, { "epoch": 1.20099076784508, "grad_norm": 1.3368550539016724, "learning_rate": 3.452490915149436e-05, "loss": 1.4086, "step": 6667 }, { "epoch": 1.2011709074532764, "grad_norm": 1.414510726928711, "learning_rate": 3.4511443594746854e-05, "loss": 1.3895, "step": 6668 }, { "epoch": 1.2013510470614726, "grad_norm": 1.5006206035614014, "learning_rate": 3.44979792804838e-05, "loss": 1.3912, "step": 6669 }, { "epoch": 1.201531186669669, "grad_norm": 1.3819299936294556, "learning_rate": 3.448451620978531e-05, "loss": 1.5017, "step": 6670 }, { "epoch": 1.2017113262778654, "grad_norm": 1.3423086404800415, "learning_rate": 3.447105438373134e-05, "loss": 1.4041, "step": 6671 }, { "epoch": 1.2018914658860618, "grad_norm": 1.47804856300354, "learning_rate": 3.445759380340185e-05, "loss": 1.5716, "step": 6672 }, { "epoch": 1.202071605494258, "grad_norm": 1.4944829940795898, "learning_rate": 3.44441344698766e-05, "loss": 1.5392, "step": 6673 }, { "epoch": 1.2022517451024544, "grad_norm": 1.5020502805709839, "learning_rate": 3.443067638423532e-05, "loss": 1.3804, "step": 6674 }, { "epoch": 1.2024318847106508, "grad_norm": 1.3079289197921753, "learning_rate": 3.441721954755757e-05, "loss": 1.3554, "step": 6675 }, { "epoch": 1.2026120243188472, "grad_norm": 1.3910051584243774, "learning_rate": 3.44037639609229e-05, "loss": 1.2899, "step": 6676 }, { "epoch": 1.2027921639270436, "grad_norm": 1.4919273853302002, "learning_rate": 3.4390309625410686e-05, "loss": 1.5703, "step": 6677 }, { "epoch": 1.2029723035352398, "grad_norm": 1.4217326641082764, "learning_rate": 3.437685654210023e-05, "loss": 1.3826, "step": 6678 }, { "epoch": 1.2031524431434362, "grad_norm": 1.3119813203811646, "learning_rate": 3.436340471207074e-05, "loss": 1.2576, "step": 6679 }, { "epoch": 1.2033325827516326, "grad_norm": 1.313380241394043, "learning_rate": 3.434995413640129e-05, "loss": 1.2351, "step": 6680 }, { "epoch": 1.2035127223598288, "grad_norm": 1.389377474784851, "learning_rate": 3.4336504816170905e-05, "loss": 1.4679, "step": 6681 }, { "epoch": 1.2036928619680252, "grad_norm": 1.4892104864120483, "learning_rate": 3.4323056752458476e-05, "loss": 1.4898, "step": 6682 }, { "epoch": 1.2038730015762216, "grad_norm": 1.5369975566864014, "learning_rate": 3.43096099463428e-05, "loss": 1.3933, "step": 6683 }, { "epoch": 1.204053141184418, "grad_norm": 1.5410183668136597, "learning_rate": 3.429616439890258e-05, "loss": 1.3769, "step": 6684 }, { "epoch": 1.2042332807926144, "grad_norm": 1.416324257850647, "learning_rate": 3.428272011121637e-05, "loss": 1.1954, "step": 6685 }, { "epoch": 1.2044134204008106, "grad_norm": 1.549851417541504, "learning_rate": 3.4269277084362724e-05, "loss": 1.527, "step": 6686 }, { "epoch": 1.204593560009007, "grad_norm": 1.5699172019958496, "learning_rate": 3.425583531942e-05, "loss": 1.4905, "step": 6687 }, { "epoch": 1.2047736996172034, "grad_norm": 1.7205044031143188, "learning_rate": 3.42423948174665e-05, "loss": 1.8317, "step": 6688 }, { "epoch": 1.2049538392253998, "grad_norm": 1.5982245206832886, "learning_rate": 3.42289555795804e-05, "loss": 1.6165, "step": 6689 }, { "epoch": 1.205133978833596, "grad_norm": 1.4603633880615234, "learning_rate": 3.421551760683982e-05, "loss": 1.2945, "step": 6690 }, { "epoch": 1.2053141184417924, "grad_norm": 1.384820818901062, "learning_rate": 3.420208090032274e-05, "loss": 1.2724, "step": 6691 }, { "epoch": 1.2054942580499888, "grad_norm": 1.553351879119873, "learning_rate": 3.418864546110701e-05, "loss": 1.4566, "step": 6692 }, { "epoch": 1.2056743976581852, "grad_norm": 1.5097345113754272, "learning_rate": 3.417521129027046e-05, "loss": 1.4959, "step": 6693 }, { "epoch": 1.2058545372663814, "grad_norm": 1.501902461051941, "learning_rate": 3.416177838889072e-05, "loss": 1.4165, "step": 6694 }, { "epoch": 1.2060346768745778, "grad_norm": 1.6061943769454956, "learning_rate": 3.4148346758045426e-05, "loss": 1.6726, "step": 6695 }, { "epoch": 1.2062148164827742, "grad_norm": 1.3930976390838623, "learning_rate": 3.4134916398812034e-05, "loss": 1.1994, "step": 6696 }, { "epoch": 1.2063949560909706, "grad_norm": 1.3988239765167236, "learning_rate": 3.412148731226793e-05, "loss": 1.2137, "step": 6697 }, { "epoch": 1.2065750956991668, "grad_norm": 1.3684662580490112, "learning_rate": 3.4108059499490376e-05, "loss": 1.2246, "step": 6698 }, { "epoch": 1.2067552353073632, "grad_norm": 1.584137201309204, "learning_rate": 3.409463296155652e-05, "loss": 1.5362, "step": 6699 }, { "epoch": 1.2069353749155596, "grad_norm": 1.3989081382751465, "learning_rate": 3.4081207699543485e-05, "loss": 1.178, "step": 6700 }, { "epoch": 1.207115514523756, "grad_norm": 1.605372667312622, "learning_rate": 3.4067783714528215e-05, "loss": 1.4927, "step": 6701 }, { "epoch": 1.2072956541319522, "grad_norm": 1.2538365125656128, "learning_rate": 3.4054361007587576e-05, "loss": 1.4983, "step": 6702 }, { "epoch": 1.2074757937401486, "grad_norm": 1.2905033826828003, "learning_rate": 3.4040939579798316e-05, "loss": 1.9806, "step": 6703 }, { "epoch": 1.207655933348345, "grad_norm": 1.3866654634475708, "learning_rate": 3.402751943223712e-05, "loss": 1.9978, "step": 6704 }, { "epoch": 1.2078360729565414, "grad_norm": 1.3942734003067017, "learning_rate": 3.4014100565980534e-05, "loss": 1.6547, "step": 6705 }, { "epoch": 1.2080162125647376, "grad_norm": 1.3620405197143555, "learning_rate": 3.4000682982105014e-05, "loss": 1.7364, "step": 6706 }, { "epoch": 1.208196352172934, "grad_norm": 1.4084973335266113, "learning_rate": 3.398726668168692e-05, "loss": 1.6698, "step": 6707 }, { "epoch": 1.2083764917811304, "grad_norm": 1.6366336345672607, "learning_rate": 3.397385166580248e-05, "loss": 2.0062, "step": 6708 }, { "epoch": 1.2085566313893268, "grad_norm": 1.6566872596740723, "learning_rate": 3.3960437935527855e-05, "loss": 2.061, "step": 6709 }, { "epoch": 1.2087367709975232, "grad_norm": 1.6653355360031128, "learning_rate": 3.394702549193909e-05, "loss": 1.7207, "step": 6710 }, { "epoch": 1.2089169106057194, "grad_norm": 1.474739909172058, "learning_rate": 3.393361433611213e-05, "loss": 1.5235, "step": 6711 }, { "epoch": 1.2090970502139158, "grad_norm": 1.298464298248291, "learning_rate": 3.392020446912279e-05, "loss": 1.388, "step": 6712 }, { "epoch": 1.2092771898221122, "grad_norm": 1.2222894430160522, "learning_rate": 3.390679589204682e-05, "loss": 1.2057, "step": 6713 }, { "epoch": 1.2094573294303084, "grad_norm": 1.3018053770065308, "learning_rate": 3.389338860595984e-05, "loss": 1.23, "step": 6714 }, { "epoch": 1.2096374690385048, "grad_norm": 1.3688886165618896, "learning_rate": 3.3879982611937386e-05, "loss": 1.3463, "step": 6715 }, { "epoch": 1.2098176086467012, "grad_norm": 1.47565495967865, "learning_rate": 3.386657791105488e-05, "loss": 1.593, "step": 6716 }, { "epoch": 1.2099977482548976, "grad_norm": 1.3760709762573242, "learning_rate": 3.385317450438762e-05, "loss": 1.3085, "step": 6717 }, { "epoch": 1.210177887863094, "grad_norm": 1.4314104318618774, "learning_rate": 3.383977239301085e-05, "loss": 1.5979, "step": 6718 }, { "epoch": 1.2103580274712902, "grad_norm": 1.3776984214782715, "learning_rate": 3.382637157799966e-05, "loss": 1.5581, "step": 6719 }, { "epoch": 1.2105381670794866, "grad_norm": 1.4690535068511963, "learning_rate": 3.381297206042907e-05, "loss": 1.5545, "step": 6720 }, { "epoch": 1.210718306687683, "grad_norm": 1.3706868886947632, "learning_rate": 3.3799573841373975e-05, "loss": 1.3295, "step": 6721 }, { "epoch": 1.2108984462958794, "grad_norm": 1.4949100017547607, "learning_rate": 3.3786176921909155e-05, "loss": 1.4161, "step": 6722 }, { "epoch": 1.2110785859040756, "grad_norm": 1.3225620985031128, "learning_rate": 3.3772781303109355e-05, "loss": 1.3062, "step": 6723 }, { "epoch": 1.211258725512272, "grad_norm": 1.4657484292984009, "learning_rate": 3.3759386986049135e-05, "loss": 1.4986, "step": 6724 }, { "epoch": 1.2114388651204684, "grad_norm": 1.4000439643859863, "learning_rate": 3.3745993971802966e-05, "loss": 1.2593, "step": 6725 }, { "epoch": 1.2116190047286648, "grad_norm": 1.436929702758789, "learning_rate": 3.373260226144525e-05, "loss": 1.3059, "step": 6726 }, { "epoch": 1.211799144336861, "grad_norm": 1.4112604856491089, "learning_rate": 3.3719211856050234e-05, "loss": 1.3099, "step": 6727 }, { "epoch": 1.2119792839450574, "grad_norm": 1.4148492813110352, "learning_rate": 3.370582275669214e-05, "loss": 1.4562, "step": 6728 }, { "epoch": 1.2121594235532538, "grad_norm": 1.5359387397766113, "learning_rate": 3.369243496444501e-05, "loss": 1.5223, "step": 6729 }, { "epoch": 1.2123395631614502, "grad_norm": 1.3944507837295532, "learning_rate": 3.36790484803828e-05, "loss": 1.4496, "step": 6730 }, { "epoch": 1.2125197027696464, "grad_norm": 1.5073171854019165, "learning_rate": 3.366566330557935e-05, "loss": 1.3671, "step": 6731 }, { "epoch": 1.2126998423778428, "grad_norm": 1.4034618139266968, "learning_rate": 3.3652279441108446e-05, "loss": 1.2737, "step": 6732 }, { "epoch": 1.2128799819860392, "grad_norm": 1.4668939113616943, "learning_rate": 3.363889688804373e-05, "loss": 1.4895, "step": 6733 }, { "epoch": 1.2130601215942356, "grad_norm": 1.2924970388412476, "learning_rate": 3.362551564745874e-05, "loss": 1.2958, "step": 6734 }, { "epoch": 1.213240261202432, "grad_norm": 1.6009396314620972, "learning_rate": 3.3612135720426904e-05, "loss": 1.6714, "step": 6735 }, { "epoch": 1.2134204008106282, "grad_norm": 1.5855759382247925, "learning_rate": 3.3598757108021546e-05, "loss": 1.3441, "step": 6736 }, { "epoch": 1.2136005404188246, "grad_norm": 1.610321044921875, "learning_rate": 3.3585379811315914e-05, "loss": 1.4576, "step": 6737 }, { "epoch": 1.213780680027021, "grad_norm": 1.5988126993179321, "learning_rate": 3.357200383138312e-05, "loss": 1.4523, "step": 6738 }, { "epoch": 1.2139608196352172, "grad_norm": 1.5248401165008545, "learning_rate": 3.355862916929618e-05, "loss": 1.3886, "step": 6739 }, { "epoch": 1.2141409592434136, "grad_norm": 1.531491994857788, "learning_rate": 3.3545255826128e-05, "loss": 1.4907, "step": 6740 }, { "epoch": 1.21432109885161, "grad_norm": 1.582066297531128, "learning_rate": 3.3531883802951364e-05, "loss": 1.3933, "step": 6741 }, { "epoch": 1.2145012384598064, "grad_norm": 1.5020043849945068, "learning_rate": 3.3518513100839e-05, "loss": 1.4564, "step": 6742 }, { "epoch": 1.2146813780680028, "grad_norm": 1.484968900680542, "learning_rate": 3.3505143720863497e-05, "loss": 1.3698, "step": 6743 }, { "epoch": 1.214861517676199, "grad_norm": 1.5655699968338013, "learning_rate": 3.3491775664097326e-05, "loss": 1.4121, "step": 6744 }, { "epoch": 1.2150416572843954, "grad_norm": 1.5694249868392944, "learning_rate": 3.3478408931612856e-05, "loss": 1.4314, "step": 6745 }, { "epoch": 1.2152217968925918, "grad_norm": 1.4166194200515747, "learning_rate": 3.3465043524482384e-05, "loss": 1.219, "step": 6746 }, { "epoch": 1.2154019365007882, "grad_norm": 1.4143685102462769, "learning_rate": 3.345167944377807e-05, "loss": 1.3164, "step": 6747 }, { "epoch": 1.2155820761089844, "grad_norm": 1.499548316001892, "learning_rate": 3.343831669057197e-05, "loss": 1.476, "step": 6748 }, { "epoch": 1.2157622157171808, "grad_norm": 1.3988614082336426, "learning_rate": 3.342495526593604e-05, "loss": 1.1152, "step": 6749 }, { "epoch": 1.2159423553253772, "grad_norm": 1.4815285205841064, "learning_rate": 3.3411595170942115e-05, "loss": 1.2164, "step": 6750 }, { "epoch": 1.2161224949335736, "grad_norm": 1.4810343980789185, "learning_rate": 3.3398236406661955e-05, "loss": 1.0439, "step": 6751 }, { "epoch": 1.2163026345417698, "grad_norm": 1.4816360473632812, "learning_rate": 3.338487897416718e-05, "loss": 2.1415, "step": 6752 }, { "epoch": 1.2164827741499662, "grad_norm": 1.4066493511199951, "learning_rate": 3.337152287452932e-05, "loss": 1.8022, "step": 6753 }, { "epoch": 1.2166629137581626, "grad_norm": 1.4352459907531738, "learning_rate": 3.33581681088198e-05, "loss": 1.7989, "step": 6754 }, { "epoch": 1.216843053366359, "grad_norm": 1.3035228252410889, "learning_rate": 3.334481467810992e-05, "loss": 1.6342, "step": 6755 }, { "epoch": 1.2170231929745552, "grad_norm": 1.297839641571045, "learning_rate": 3.3331462583470894e-05, "loss": 1.8681, "step": 6756 }, { "epoch": 1.2172033325827516, "grad_norm": 1.612751841545105, "learning_rate": 3.331811182597383e-05, "loss": 2.1145, "step": 6757 }, { "epoch": 1.217383472190948, "grad_norm": 1.5627317428588867, "learning_rate": 3.330476240668971e-05, "loss": 1.9384, "step": 6758 }, { "epoch": 1.2175636117991444, "grad_norm": 1.390958309173584, "learning_rate": 3.3291414326689396e-05, "loss": 1.5293, "step": 6759 }, { "epoch": 1.2177437514073408, "grad_norm": 1.6251673698425293, "learning_rate": 3.3278067587043705e-05, "loss": 1.904, "step": 6760 }, { "epoch": 1.217923891015537, "grad_norm": 1.6113286018371582, "learning_rate": 3.3264722188823295e-05, "loss": 1.8217, "step": 6761 }, { "epoch": 1.2181040306237334, "grad_norm": 1.5598195791244507, "learning_rate": 3.3251378133098723e-05, "loss": 1.7962, "step": 6762 }, { "epoch": 1.2182841702319298, "grad_norm": 1.4992597103118896, "learning_rate": 3.323803542094043e-05, "loss": 1.6648, "step": 6763 }, { "epoch": 1.218464309840126, "grad_norm": 1.30910325050354, "learning_rate": 3.322469405341875e-05, "loss": 1.3617, "step": 6764 }, { "epoch": 1.2186444494483224, "grad_norm": 1.3764314651489258, "learning_rate": 3.321135403160396e-05, "loss": 1.4496, "step": 6765 }, { "epoch": 1.2188245890565188, "grad_norm": 1.4324058294296265, "learning_rate": 3.319801535656617e-05, "loss": 1.3841, "step": 6766 }, { "epoch": 1.2190047286647152, "grad_norm": 1.372729778289795, "learning_rate": 3.318467802937541e-05, "loss": 1.3912, "step": 6767 }, { "epoch": 1.2191848682729116, "grad_norm": 1.3461893796920776, "learning_rate": 3.317134205110159e-05, "loss": 1.3233, "step": 6768 }, { "epoch": 1.2193650078811078, "grad_norm": 1.3404725790023804, "learning_rate": 3.3158007422814496e-05, "loss": 1.1978, "step": 6769 }, { "epoch": 1.2195451474893042, "grad_norm": 1.4907500743865967, "learning_rate": 3.314467414558385e-05, "loss": 1.5287, "step": 6770 }, { "epoch": 1.2197252870975006, "grad_norm": 1.4512618780136108, "learning_rate": 3.313134222047924e-05, "loss": 1.2156, "step": 6771 }, { "epoch": 1.2199054267056968, "grad_norm": 1.4059784412384033, "learning_rate": 3.311801164857015e-05, "loss": 1.4038, "step": 6772 }, { "epoch": 1.2200855663138932, "grad_norm": 1.3922499418258667, "learning_rate": 3.3104682430925924e-05, "loss": 1.4809, "step": 6773 }, { "epoch": 1.2202657059220896, "grad_norm": 1.4750616550445557, "learning_rate": 3.3091354568615854e-05, "loss": 1.5713, "step": 6774 }, { "epoch": 1.220445845530286, "grad_norm": 1.6020843982696533, "learning_rate": 3.307802806270909e-05, "loss": 1.8264, "step": 6775 }, { "epoch": 1.2206259851384824, "grad_norm": 1.324859380722046, "learning_rate": 3.3064702914274665e-05, "loss": 1.2919, "step": 6776 }, { "epoch": 1.2208061247466786, "grad_norm": 1.4866806268692017, "learning_rate": 3.3051379124381526e-05, "loss": 1.4767, "step": 6777 }, { "epoch": 1.220986264354875, "grad_norm": 1.5042155981063843, "learning_rate": 3.303805669409848e-05, "loss": 1.4528, "step": 6778 }, { "epoch": 1.2211664039630714, "grad_norm": 1.5368664264678955, "learning_rate": 3.302473562449429e-05, "loss": 1.6127, "step": 6779 }, { "epoch": 1.2213465435712678, "grad_norm": 1.3732553720474243, "learning_rate": 3.3011415916637536e-05, "loss": 1.2218, "step": 6780 }, { "epoch": 1.221526683179464, "grad_norm": 1.4287132024765015, "learning_rate": 3.2998097571596715e-05, "loss": 1.3873, "step": 6781 }, { "epoch": 1.2217068227876604, "grad_norm": 1.8109266757965088, "learning_rate": 3.2984780590440215e-05, "loss": 1.4497, "step": 6782 }, { "epoch": 1.2218869623958568, "grad_norm": 1.3814113140106201, "learning_rate": 3.2971464974236346e-05, "loss": 1.366, "step": 6783 }, { "epoch": 1.2220671020040532, "grad_norm": 1.4273579120635986, "learning_rate": 3.295815072405326e-05, "loss": 1.3397, "step": 6784 }, { "epoch": 1.2222472416122494, "grad_norm": 1.5226681232452393, "learning_rate": 3.294483784095902e-05, "loss": 1.5882, "step": 6785 }, { "epoch": 1.2224273812204458, "grad_norm": 1.3168551921844482, "learning_rate": 3.293152632602158e-05, "loss": 1.2676, "step": 6786 }, { "epoch": 1.2226075208286422, "grad_norm": 1.4255211353302002, "learning_rate": 3.291821618030877e-05, "loss": 1.3754, "step": 6787 }, { "epoch": 1.2227876604368386, "grad_norm": 1.4688334465026855, "learning_rate": 3.2904907404888366e-05, "loss": 1.365, "step": 6788 }, { "epoch": 1.2229678000450348, "grad_norm": 1.517831802368164, "learning_rate": 3.2891600000827946e-05, "loss": 1.4551, "step": 6789 }, { "epoch": 1.2231479396532312, "grad_norm": 1.399332880973816, "learning_rate": 3.287829396919505e-05, "loss": 1.1788, "step": 6790 }, { "epoch": 1.2233280792614276, "grad_norm": 1.6225569248199463, "learning_rate": 3.286498931105706e-05, "loss": 1.3243, "step": 6791 }, { "epoch": 1.223508218869624, "grad_norm": 1.4913194179534912, "learning_rate": 3.285168602748128e-05, "loss": 1.3081, "step": 6792 }, { "epoch": 1.2236883584778204, "grad_norm": 1.4559649229049683, "learning_rate": 3.28383841195349e-05, "loss": 1.3107, "step": 6793 }, { "epoch": 1.2238684980860166, "grad_norm": 1.4115616083145142, "learning_rate": 3.2825083588285e-05, "loss": 1.0701, "step": 6794 }, { "epoch": 1.224048637694213, "grad_norm": 1.4900233745574951, "learning_rate": 3.281178443479852e-05, "loss": 1.3649, "step": 6795 }, { "epoch": 1.2242287773024094, "grad_norm": 1.3989254236221313, "learning_rate": 3.27984866601423e-05, "loss": 1.1522, "step": 6796 }, { "epoch": 1.2244089169106056, "grad_norm": 1.4771958589553833, "learning_rate": 3.278519026538312e-05, "loss": 1.2725, "step": 6797 }, { "epoch": 1.224589056518802, "grad_norm": 1.3885525465011597, "learning_rate": 3.277189525158759e-05, "loss": 1.3949, "step": 6798 }, { "epoch": 1.2247691961269984, "grad_norm": 1.324245810508728, "learning_rate": 3.275860161982224e-05, "loss": 1.1143, "step": 6799 }, { "epoch": 1.2249493357351948, "grad_norm": 1.5012863874435425, "learning_rate": 3.274530937115348e-05, "loss": 1.1181, "step": 6800 }, { "epoch": 1.2251294753433912, "grad_norm": 1.40353262424469, "learning_rate": 3.2732018506647553e-05, "loss": 1.1923, "step": 6801 }, { "epoch": 1.2253096149515874, "grad_norm": 1.4508329629898071, "learning_rate": 3.271872902737072e-05, "loss": 1.968, "step": 6802 }, { "epoch": 1.2254897545597838, "grad_norm": 1.3287415504455566, "learning_rate": 3.270544093438903e-05, "loss": 1.8131, "step": 6803 }, { "epoch": 1.2256698941679802, "grad_norm": 1.3261590003967285, "learning_rate": 3.269215422876844e-05, "loss": 1.793, "step": 6804 }, { "epoch": 1.2258500337761766, "grad_norm": 1.4887539148330688, "learning_rate": 3.2678868911574814e-05, "loss": 1.9968, "step": 6805 }, { "epoch": 1.2260301733843728, "grad_norm": 1.391884684562683, "learning_rate": 3.266558498387386e-05, "loss": 1.6587, "step": 6806 }, { "epoch": 1.2262103129925692, "grad_norm": 1.4738408327102661, "learning_rate": 3.2652302446731266e-05, "loss": 1.7724, "step": 6807 }, { "epoch": 1.2263904526007656, "grad_norm": 1.4325567483901978, "learning_rate": 3.2639021301212513e-05, "loss": 1.873, "step": 6808 }, { "epoch": 1.226570592208962, "grad_norm": 1.4966789484024048, "learning_rate": 3.262574154838301e-05, "loss": 1.6836, "step": 6809 }, { "epoch": 1.2267507318171582, "grad_norm": 1.8694946765899658, "learning_rate": 3.2612463189308054e-05, "loss": 1.8775, "step": 6810 }, { "epoch": 1.2269308714253546, "grad_norm": 1.651310920715332, "learning_rate": 3.259918622505284e-05, "loss": 1.6738, "step": 6811 }, { "epoch": 1.227111011033551, "grad_norm": 1.4324439764022827, "learning_rate": 3.258591065668243e-05, "loss": 1.5847, "step": 6812 }, { "epoch": 1.2272911506417474, "grad_norm": 1.497910499572754, "learning_rate": 3.257263648526178e-05, "loss": 1.6083, "step": 6813 }, { "epoch": 1.2274712902499436, "grad_norm": 1.367472767829895, "learning_rate": 3.255936371185575e-05, "loss": 1.5481, "step": 6814 }, { "epoch": 1.22765142985814, "grad_norm": 1.2517704963684082, "learning_rate": 3.254609233752906e-05, "loss": 1.2703, "step": 6815 }, { "epoch": 1.2278315694663364, "grad_norm": 1.38472580909729, "learning_rate": 3.253282236334634e-05, "loss": 1.2486, "step": 6816 }, { "epoch": 1.2280117090745328, "grad_norm": 1.2442814111709595, "learning_rate": 3.251955379037211e-05, "loss": 1.3852, "step": 6817 }, { "epoch": 1.2281918486827292, "grad_norm": 1.413346290588379, "learning_rate": 3.250628661967076e-05, "loss": 1.4071, "step": 6818 }, { "epoch": 1.2283719882909254, "grad_norm": 1.603061556816101, "learning_rate": 3.249302085230658e-05, "loss": 1.5594, "step": 6819 }, { "epoch": 1.2285521278991218, "grad_norm": 1.2397353649139404, "learning_rate": 3.2479756489343714e-05, "loss": 1.1788, "step": 6820 }, { "epoch": 1.2287322675073182, "grad_norm": 1.375002145767212, "learning_rate": 3.2466493531846274e-05, "loss": 1.2393, "step": 6821 }, { "epoch": 1.2289124071155144, "grad_norm": 1.4281266927719116, "learning_rate": 3.245323198087818e-05, "loss": 1.5349, "step": 6822 }, { "epoch": 1.2290925467237108, "grad_norm": 1.3915725946426392, "learning_rate": 3.2439971837503266e-05, "loss": 1.4838, "step": 6823 }, { "epoch": 1.2292726863319072, "grad_norm": 1.503351092338562, "learning_rate": 3.242671310278525e-05, "loss": 1.6898, "step": 6824 }, { "epoch": 1.2294528259401036, "grad_norm": 1.3596590757369995, "learning_rate": 3.241345577778775e-05, "loss": 1.587, "step": 6825 }, { "epoch": 1.2296329655483, "grad_norm": 1.49898099899292, "learning_rate": 3.240019986357429e-05, "loss": 1.4074, "step": 6826 }, { "epoch": 1.2298131051564962, "grad_norm": 1.4580934047698975, "learning_rate": 3.23869453612082e-05, "loss": 1.396, "step": 6827 }, { "epoch": 1.2299932447646926, "grad_norm": 1.3868801593780518, "learning_rate": 3.237369227175278e-05, "loss": 1.1905, "step": 6828 }, { "epoch": 1.230173384372889, "grad_norm": 1.2144285440444946, "learning_rate": 3.236044059627116e-05, "loss": 1.1591, "step": 6829 }, { "epoch": 1.2303535239810852, "grad_norm": 1.482460618019104, "learning_rate": 3.234719033582642e-05, "loss": 1.5664, "step": 6830 }, { "epoch": 1.2305336635892816, "grad_norm": 1.5039933919906616, "learning_rate": 3.233394149148148e-05, "loss": 1.6836, "step": 6831 }, { "epoch": 1.230713803197478, "grad_norm": 1.5417627096176147, "learning_rate": 3.2320694064299146e-05, "loss": 1.4081, "step": 6832 }, { "epoch": 1.2308939428056744, "grad_norm": 1.4270129203796387, "learning_rate": 3.230744805534213e-05, "loss": 1.3792, "step": 6833 }, { "epoch": 1.2310740824138708, "grad_norm": 1.448017954826355, "learning_rate": 3.2294203465672986e-05, "loss": 1.3987, "step": 6834 }, { "epoch": 1.231254222022067, "grad_norm": 1.5489007234573364, "learning_rate": 3.228096029635423e-05, "loss": 1.4949, "step": 6835 }, { "epoch": 1.2314343616302634, "grad_norm": 1.4830647706985474, "learning_rate": 3.226771854844822e-05, "loss": 1.479, "step": 6836 }, { "epoch": 1.2316145012384598, "grad_norm": 1.587178349494934, "learning_rate": 3.225447822301718e-05, "loss": 1.6389, "step": 6837 }, { "epoch": 1.2317946408466562, "grad_norm": 1.553702473640442, "learning_rate": 3.2241239321123254e-05, "loss": 1.5346, "step": 6838 }, { "epoch": 1.2319747804548524, "grad_norm": 1.398423194885254, "learning_rate": 3.222800184382846e-05, "loss": 1.1183, "step": 6839 }, { "epoch": 1.2321549200630488, "grad_norm": 1.6206096410751343, "learning_rate": 3.2214765792194706e-05, "loss": 1.6472, "step": 6840 }, { "epoch": 1.2323350596712452, "grad_norm": 1.6678884029388428, "learning_rate": 3.2201531167283774e-05, "loss": 1.6587, "step": 6841 }, { "epoch": 1.2325151992794416, "grad_norm": 1.458854079246521, "learning_rate": 3.218829797015735e-05, "loss": 1.3503, "step": 6842 }, { "epoch": 1.2326953388876378, "grad_norm": 1.5148917436599731, "learning_rate": 3.217506620187697e-05, "loss": 1.3895, "step": 6843 }, { "epoch": 1.2328754784958342, "grad_norm": 1.3003181219100952, "learning_rate": 3.216183586350411e-05, "loss": 1.1938, "step": 6844 }, { "epoch": 1.2330556181040306, "grad_norm": 1.507581114768982, "learning_rate": 3.214860695610008e-05, "loss": 1.364, "step": 6845 }, { "epoch": 1.233235757712227, "grad_norm": 1.55634343624115, "learning_rate": 3.2135379480726116e-05, "loss": 1.5315, "step": 6846 }, { "epoch": 1.2334158973204232, "grad_norm": 1.4182003736495972, "learning_rate": 3.212215343844331e-05, "loss": 1.2499, "step": 6847 }, { "epoch": 1.2335960369286196, "grad_norm": 1.4784570932388306, "learning_rate": 3.210892883031263e-05, "loss": 1.3, "step": 6848 }, { "epoch": 1.233776176536816, "grad_norm": 1.5924310684204102, "learning_rate": 3.209570565739498e-05, "loss": 1.2403, "step": 6849 }, { "epoch": 1.2339563161450124, "grad_norm": 1.3215382099151611, "learning_rate": 3.2082483920751095e-05, "loss": 1.0135, "step": 6850 }, { "epoch": 1.2341364557532088, "grad_norm": 1.370424747467041, "learning_rate": 3.2069263621441626e-05, "loss": 1.1475, "step": 6851 }, { "epoch": 1.234316595361405, "grad_norm": 1.4670120477676392, "learning_rate": 3.205604476052707e-05, "loss": 2.2251, "step": 6852 }, { "epoch": 1.2344967349696014, "grad_norm": 1.2843053340911865, "learning_rate": 3.2042827339067903e-05, "loss": 1.671, "step": 6853 }, { "epoch": 1.2346768745777978, "grad_norm": 1.268125295639038, "learning_rate": 3.202961135812437e-05, "loss": 1.6883, "step": 6854 }, { "epoch": 1.234857014185994, "grad_norm": 1.2623155117034912, "learning_rate": 3.201639681875665e-05, "loss": 1.6978, "step": 6855 }, { "epoch": 1.2350371537941904, "grad_norm": 1.4933207035064697, "learning_rate": 3.200318372202483e-05, "loss": 1.7517, "step": 6856 }, { "epoch": 1.2352172934023868, "grad_norm": 1.4884768724441528, "learning_rate": 3.1989972068988815e-05, "loss": 1.686, "step": 6857 }, { "epoch": 1.2353974330105832, "grad_norm": 1.4897816181182861, "learning_rate": 3.197676186070849e-05, "loss": 1.8471, "step": 6858 }, { "epoch": 1.2355775726187797, "grad_norm": 1.6314853429794312, "learning_rate": 3.196355309824357e-05, "loss": 2.145, "step": 6859 }, { "epoch": 1.2357577122269758, "grad_norm": 1.7411549091339111, "learning_rate": 3.195034578265362e-05, "loss": 2.0476, "step": 6860 }, { "epoch": 1.2359378518351722, "grad_norm": 1.785402774810791, "learning_rate": 3.1937139914998135e-05, "loss": 2.2141, "step": 6861 }, { "epoch": 1.2361179914433686, "grad_norm": 1.5895336866378784, "learning_rate": 3.192393549633647e-05, "loss": 1.7239, "step": 6862 }, { "epoch": 1.236298131051565, "grad_norm": 1.421332836151123, "learning_rate": 3.191073252772791e-05, "loss": 1.6081, "step": 6863 }, { "epoch": 1.2364782706597612, "grad_norm": 1.5207263231277466, "learning_rate": 3.1897531010231584e-05, "loss": 1.465, "step": 6864 }, { "epoch": 1.2366584102679576, "grad_norm": 1.3388986587524414, "learning_rate": 3.188433094490652e-05, "loss": 1.3126, "step": 6865 }, { "epoch": 1.236838549876154, "grad_norm": 1.3267252445220947, "learning_rate": 3.187113233281156e-05, "loss": 1.246, "step": 6866 }, { "epoch": 1.2370186894843505, "grad_norm": 1.4351184368133545, "learning_rate": 3.185793517500555e-05, "loss": 1.3043, "step": 6867 }, { "epoch": 1.2371988290925466, "grad_norm": 1.4483933448791504, "learning_rate": 3.1844739472547156e-05, "loss": 1.3921, "step": 6868 }, { "epoch": 1.237378968700743, "grad_norm": 1.3321502208709717, "learning_rate": 3.183154522649492e-05, "loss": 1.1965, "step": 6869 }, { "epoch": 1.2375591083089394, "grad_norm": 1.4448031187057495, "learning_rate": 3.181835243790727e-05, "loss": 1.5186, "step": 6870 }, { "epoch": 1.2377392479171359, "grad_norm": 1.4353901147842407, "learning_rate": 3.180516110784252e-05, "loss": 1.4443, "step": 6871 }, { "epoch": 1.237919387525332, "grad_norm": 1.4049485921859741, "learning_rate": 3.179197123735889e-05, "loss": 1.4224, "step": 6872 }, { "epoch": 1.2380995271335284, "grad_norm": 1.414174199104309, "learning_rate": 3.177878282751446e-05, "loss": 1.2071, "step": 6873 }, { "epoch": 1.2382796667417248, "grad_norm": 1.3704897165298462, "learning_rate": 3.17655958793672e-05, "loss": 1.4004, "step": 6874 }, { "epoch": 1.2384598063499213, "grad_norm": 1.7496007680892944, "learning_rate": 3.1752410393974965e-05, "loss": 1.6258, "step": 6875 }, { "epoch": 1.2386399459581177, "grad_norm": 1.510542392730713, "learning_rate": 3.173922637239546e-05, "loss": 1.3983, "step": 6876 }, { "epoch": 1.2388200855663138, "grad_norm": 1.4122511148452759, "learning_rate": 3.172604381568634e-05, "loss": 1.3087, "step": 6877 }, { "epoch": 1.2390002251745103, "grad_norm": 1.4079084396362305, "learning_rate": 3.1712862724905076e-05, "loss": 1.3244, "step": 6878 }, { "epoch": 1.2391803647827067, "grad_norm": 1.5156313180923462, "learning_rate": 3.169968310110906e-05, "loss": 1.5998, "step": 6879 }, { "epoch": 1.2393605043909028, "grad_norm": 1.4952698945999146, "learning_rate": 3.1686504945355536e-05, "loss": 1.6639, "step": 6880 }, { "epoch": 1.2395406439990992, "grad_norm": 1.3995941877365112, "learning_rate": 3.167332825870168e-05, "loss": 1.6085, "step": 6881 }, { "epoch": 1.2397207836072957, "grad_norm": 1.5575815439224243, "learning_rate": 3.16601530422045e-05, "loss": 1.5242, "step": 6882 }, { "epoch": 1.239900923215492, "grad_norm": 1.4111615419387817, "learning_rate": 3.164697929692092e-05, "loss": 1.3462, "step": 6883 }, { "epoch": 1.2400810628236885, "grad_norm": 1.5483009815216064, "learning_rate": 3.1633807023907716e-05, "loss": 1.4555, "step": 6884 }, { "epoch": 1.2402612024318846, "grad_norm": 1.4701731204986572, "learning_rate": 3.162063622422155e-05, "loss": 1.3976, "step": 6885 }, { "epoch": 1.240441342040081, "grad_norm": 1.435465693473816, "learning_rate": 3.160746689891901e-05, "loss": 1.4032, "step": 6886 }, { "epoch": 1.2406214816482775, "grad_norm": 1.362147569656372, "learning_rate": 3.159429904905652e-05, "loss": 1.4076, "step": 6887 }, { "epoch": 1.2408016212564739, "grad_norm": 1.590501070022583, "learning_rate": 3.158113267569039e-05, "loss": 1.5094, "step": 6888 }, { "epoch": 1.24098176086467, "grad_norm": 1.5492151975631714, "learning_rate": 3.1567967779876804e-05, "loss": 1.7515, "step": 6889 }, { "epoch": 1.2411619004728665, "grad_norm": 1.4863865375518799, "learning_rate": 3.1554804362671884e-05, "loss": 1.3005, "step": 6890 }, { "epoch": 1.2413420400810629, "grad_norm": 1.5774645805358887, "learning_rate": 3.154164242513159e-05, "loss": 1.3072, "step": 6891 }, { "epoch": 1.2415221796892593, "grad_norm": 1.7997633218765259, "learning_rate": 3.152848196831174e-05, "loss": 1.6606, "step": 6892 }, { "epoch": 1.2417023192974554, "grad_norm": 1.5264493227005005, "learning_rate": 3.151532299326806e-05, "loss": 1.4024, "step": 6893 }, { "epoch": 1.2418824589056519, "grad_norm": 1.5488672256469727, "learning_rate": 3.1502165501056156e-05, "loss": 1.2862, "step": 6894 }, { "epoch": 1.2420625985138483, "grad_norm": 1.4287611246109009, "learning_rate": 3.148900949273155e-05, "loss": 1.4463, "step": 6895 }, { "epoch": 1.2422427381220447, "grad_norm": 1.6001183986663818, "learning_rate": 3.147585496934957e-05, "loss": 1.4893, "step": 6896 }, { "epoch": 1.2424228777302408, "grad_norm": 1.5124192237854004, "learning_rate": 3.14627019319655e-05, "loss": 1.3634, "step": 6897 }, { "epoch": 1.2426030173384373, "grad_norm": 1.4344770908355713, "learning_rate": 3.144955038163444e-05, "loss": 1.1965, "step": 6898 }, { "epoch": 1.2427831569466337, "grad_norm": 1.4477797746658325, "learning_rate": 3.14364003194114e-05, "loss": 1.2443, "step": 6899 }, { "epoch": 1.24296329655483, "grad_norm": 1.4012515544891357, "learning_rate": 3.14232517463513e-05, "loss": 1.1767, "step": 6900 }, { "epoch": 1.2431434361630262, "grad_norm": 1.3314661979675293, "learning_rate": 3.141010466350889e-05, "loss": 1.2804, "step": 6901 }, { "epoch": 1.2433235757712227, "grad_norm": 1.3033140897750854, "learning_rate": 3.139695907193884e-05, "loss": 1.801, "step": 6902 }, { "epoch": 1.243503715379419, "grad_norm": 1.3037872314453125, "learning_rate": 3.1383814972695646e-05, "loss": 1.8049, "step": 6903 }, { "epoch": 1.2436838549876155, "grad_norm": 1.3784995079040527, "learning_rate": 3.137067236683378e-05, "loss": 1.9693, "step": 6904 }, { "epoch": 1.2438639945958117, "grad_norm": 1.3589712381362915, "learning_rate": 3.135753125540748e-05, "loss": 1.8749, "step": 6905 }, { "epoch": 1.244044134204008, "grad_norm": 1.5621092319488525, "learning_rate": 3.134439163947095e-05, "loss": 1.6441, "step": 6906 }, { "epoch": 1.2442242738122045, "grad_norm": 1.3298524618148804, "learning_rate": 3.1331253520078244e-05, "loss": 1.5181, "step": 6907 }, { "epoch": 1.2444044134204009, "grad_norm": 1.46505868434906, "learning_rate": 3.131811689828327e-05, "loss": 1.762, "step": 6908 }, { "epoch": 1.2445845530285973, "grad_norm": 1.7069956064224243, "learning_rate": 3.1304981775139875e-05, "loss": 2.0042, "step": 6909 }, { "epoch": 1.2447646926367935, "grad_norm": 1.6842715740203857, "learning_rate": 3.129184815170173e-05, "loss": 1.8457, "step": 6910 }, { "epoch": 1.2449448322449899, "grad_norm": 1.6530653238296509, "learning_rate": 3.127871602902241e-05, "loss": 1.8401, "step": 6911 }, { "epoch": 1.2451249718531863, "grad_norm": 1.4249504804611206, "learning_rate": 3.126558540815538e-05, "loss": 1.3493, "step": 6912 }, { "epoch": 1.2453051114613825, "grad_norm": 1.3376333713531494, "learning_rate": 3.1252456290153954e-05, "loss": 1.5235, "step": 6913 }, { "epoch": 1.2454852510695789, "grad_norm": 1.407130479812622, "learning_rate": 3.1239328676071355e-05, "loss": 1.4224, "step": 6914 }, { "epoch": 1.2456653906777753, "grad_norm": 1.4102033376693726, "learning_rate": 3.122620256696067e-05, "loss": 1.6468, "step": 6915 }, { "epoch": 1.2458455302859717, "grad_norm": 1.3153547048568726, "learning_rate": 3.121307796387488e-05, "loss": 1.251, "step": 6916 }, { "epoch": 1.246025669894168, "grad_norm": 1.3923897743225098, "learning_rate": 3.11999548678668e-05, "loss": 1.5507, "step": 6917 }, { "epoch": 1.2462058095023643, "grad_norm": 1.4627549648284912, "learning_rate": 3.1186833279989215e-05, "loss": 1.7288, "step": 6918 }, { "epoch": 1.2463859491105607, "grad_norm": 1.350342035293579, "learning_rate": 3.117371320129469e-05, "loss": 1.4016, "step": 6919 }, { "epoch": 1.246566088718757, "grad_norm": 1.4918733835220337, "learning_rate": 3.116059463283573e-05, "loss": 1.5074, "step": 6920 }, { "epoch": 1.2467462283269535, "grad_norm": 1.5650421380996704, "learning_rate": 3.1147477575664675e-05, "loss": 1.7041, "step": 6921 }, { "epoch": 1.2469263679351497, "grad_norm": 1.3583215475082397, "learning_rate": 3.1134362030833775e-05, "loss": 1.2494, "step": 6922 }, { "epoch": 1.247106507543346, "grad_norm": 1.5212762355804443, "learning_rate": 3.1121247999395185e-05, "loss": 1.5584, "step": 6923 }, { "epoch": 1.2472866471515425, "grad_norm": 1.6421043872833252, "learning_rate": 3.110813548240089e-05, "loss": 1.5528, "step": 6924 }, { "epoch": 1.2474667867597389, "grad_norm": 1.327778935432434, "learning_rate": 3.1095024480902765e-05, "loss": 1.1003, "step": 6925 }, { "epoch": 1.247646926367935, "grad_norm": 1.5680298805236816, "learning_rate": 3.1081914995952564e-05, "loss": 1.588, "step": 6926 }, { "epoch": 1.2478270659761315, "grad_norm": 1.5872795581817627, "learning_rate": 3.1068807028601906e-05, "loss": 1.7875, "step": 6927 }, { "epoch": 1.2480072055843279, "grad_norm": 1.3631765842437744, "learning_rate": 3.1055700579902345e-05, "loss": 1.3209, "step": 6928 }, { "epoch": 1.2481873451925243, "grad_norm": 1.4500107765197754, "learning_rate": 3.104259565090526e-05, "loss": 1.3819, "step": 6929 }, { "epoch": 1.2483674848007205, "grad_norm": 1.4599426984786987, "learning_rate": 3.102949224266192e-05, "loss": 1.4269, "step": 6930 }, { "epoch": 1.2485476244089169, "grad_norm": 1.5013508796691895, "learning_rate": 3.101639035622345e-05, "loss": 1.3436, "step": 6931 }, { "epoch": 1.2487277640171133, "grad_norm": 1.3377251625061035, "learning_rate": 3.100328999264092e-05, "loss": 1.3642, "step": 6932 }, { "epoch": 1.2489079036253097, "grad_norm": 1.4358447790145874, "learning_rate": 3.099019115296521e-05, "loss": 1.5565, "step": 6933 }, { "epoch": 1.249088043233506, "grad_norm": 1.505300521850586, "learning_rate": 3.09770938382471e-05, "loss": 1.6027, "step": 6934 }, { "epoch": 1.2492681828417023, "grad_norm": 1.44719660282135, "learning_rate": 3.096399804953727e-05, "loss": 1.4905, "step": 6935 }, { "epoch": 1.2494483224498987, "grad_norm": 1.3589566946029663, "learning_rate": 3.095090378788622e-05, "loss": 1.3473, "step": 6936 }, { "epoch": 1.249628462058095, "grad_norm": 1.4965473413467407, "learning_rate": 3.093781105434441e-05, "loss": 1.517, "step": 6937 }, { "epoch": 1.2498086016662913, "grad_norm": 1.4648877382278442, "learning_rate": 3.092471984996211e-05, "loss": 1.4618, "step": 6938 }, { "epoch": 1.2499887412744877, "grad_norm": 1.5023448467254639, "learning_rate": 3.091163017578949e-05, "loss": 1.3562, "step": 6939 }, { "epoch": 1.250168880882684, "grad_norm": 1.553214192390442, "learning_rate": 3.0898542032876605e-05, "loss": 1.4443, "step": 6940 }, { "epoch": 1.2503490204908805, "grad_norm": 1.4273412227630615, "learning_rate": 3.088545542227336e-05, "loss": 1.4012, "step": 6941 }, { "epoch": 1.2505291600990769, "grad_norm": 1.4459377527236938, "learning_rate": 3.087237034502958e-05, "loss": 1.4371, "step": 6942 }, { "epoch": 1.250709299707273, "grad_norm": 1.450222134590149, "learning_rate": 3.085928680219494e-05, "loss": 1.5046, "step": 6943 }, { "epoch": 1.2508894393154695, "grad_norm": 1.4296579360961914, "learning_rate": 3.084620479481898e-05, "loss": 1.3686, "step": 6944 }, { "epoch": 1.2510695789236659, "grad_norm": 1.3699074983596802, "learning_rate": 3.083312432395113e-05, "loss": 1.2766, "step": 6945 }, { "epoch": 1.251249718531862, "grad_norm": 1.496502161026001, "learning_rate": 3.082004539064074e-05, "loss": 1.4537, "step": 6946 }, { "epoch": 1.2514298581400585, "grad_norm": 1.3376675844192505, "learning_rate": 3.080696799593695e-05, "loss": 1.1308, "step": 6947 }, { "epoch": 1.2516099977482549, "grad_norm": 1.4920117855072021, "learning_rate": 3.079389214088885e-05, "loss": 1.2374, "step": 6948 }, { "epoch": 1.2517901373564513, "grad_norm": 1.442320704460144, "learning_rate": 3.0780817826545356e-05, "loss": 1.1047, "step": 6949 }, { "epoch": 1.2519702769646477, "grad_norm": 1.4925241470336914, "learning_rate": 3.076774505395529e-05, "loss": 1.1603, "step": 6950 }, { "epoch": 1.2521504165728439, "grad_norm": 1.7362186908721924, "learning_rate": 3.075467382416736e-05, "loss": 1.4566, "step": 6951 }, { "epoch": 1.2523305561810403, "grad_norm": 1.2892107963562012, "learning_rate": 3.074160413823012e-05, "loss": 1.8149, "step": 6952 }, { "epoch": 1.2525106957892367, "grad_norm": 1.2806421518325806, "learning_rate": 3.072853599719201e-05, "loss": 1.6805, "step": 6953 }, { "epoch": 1.252690835397433, "grad_norm": 1.3249517679214478, "learning_rate": 3.071546940210136e-05, "loss": 1.8209, "step": 6954 }, { "epoch": 1.2528709750056293, "grad_norm": 1.3965421915054321, "learning_rate": 3.0702404354006345e-05, "loss": 1.6613, "step": 6955 }, { "epoch": 1.2530511146138257, "grad_norm": 1.4450734853744507, "learning_rate": 3.068934085395508e-05, "loss": 1.8908, "step": 6956 }, { "epoch": 1.253231254222022, "grad_norm": 1.4606119394302368, "learning_rate": 3.0676278902995475e-05, "loss": 1.7144, "step": 6957 }, { "epoch": 1.2534113938302185, "grad_norm": 1.5273696184158325, "learning_rate": 3.066321850217536e-05, "loss": 1.7206, "step": 6958 }, { "epoch": 1.253591533438415, "grad_norm": 1.531860589981079, "learning_rate": 3.065015965254243e-05, "loss": 1.8984, "step": 6959 }, { "epoch": 1.253771673046611, "grad_norm": 1.8183238506317139, "learning_rate": 3.0637102355144276e-05, "loss": 2.0119, "step": 6960 }, { "epoch": 1.2539518126548075, "grad_norm": 1.486114501953125, "learning_rate": 3.062404661102835e-05, "loss": 1.5818, "step": 6961 }, { "epoch": 1.254131952263004, "grad_norm": 1.3486278057098389, "learning_rate": 3.061099242124197e-05, "loss": 1.3069, "step": 6962 }, { "epoch": 1.2543120918712, "grad_norm": 1.2821143865585327, "learning_rate": 3.0597939786832324e-05, "loss": 1.325, "step": 6963 }, { "epoch": 1.2544922314793965, "grad_norm": 1.303511142730713, "learning_rate": 3.058488870884648e-05, "loss": 1.4196, "step": 6964 }, { "epoch": 1.2546723710875929, "grad_norm": 1.3509190082550049, "learning_rate": 3.057183918833143e-05, "loss": 1.2932, "step": 6965 }, { "epoch": 1.2548525106957893, "grad_norm": 1.4364148378372192, "learning_rate": 3.055879122633397e-05, "loss": 1.4473, "step": 6966 }, { "epoch": 1.2550326503039857, "grad_norm": 1.4226536750793457, "learning_rate": 3.054574482390081e-05, "loss": 1.4111, "step": 6967 }, { "epoch": 1.2552127899121819, "grad_norm": 1.3846882581710815, "learning_rate": 3.053269998207854e-05, "loss": 1.4484, "step": 6968 }, { "epoch": 1.2553929295203783, "grad_norm": 1.4553942680358887, "learning_rate": 3.0519656701913555e-05, "loss": 1.5139, "step": 6969 }, { "epoch": 1.2555730691285747, "grad_norm": 1.4015153646469116, "learning_rate": 3.0506614984452254e-05, "loss": 1.5118, "step": 6970 }, { "epoch": 1.2557532087367709, "grad_norm": 1.5202937126159668, "learning_rate": 3.04935748307408e-05, "loss": 1.493, "step": 6971 }, { "epoch": 1.2559333483449673, "grad_norm": 1.4150903224945068, "learning_rate": 3.0480536241825263e-05, "loss": 1.248, "step": 6972 }, { "epoch": 1.2561134879531637, "grad_norm": 1.2660809755325317, "learning_rate": 3.0467499218751605e-05, "loss": 1.338, "step": 6973 }, { "epoch": 1.25629362756136, "grad_norm": 1.5115392208099365, "learning_rate": 3.0454463762565643e-05, "loss": 1.592, "step": 6974 }, { "epoch": 1.2564737671695565, "grad_norm": 1.511918544769287, "learning_rate": 3.044142987431309e-05, "loss": 1.6061, "step": 6975 }, { "epoch": 1.2566539067777527, "grad_norm": 1.3115757703781128, "learning_rate": 3.042839755503949e-05, "loss": 1.2508, "step": 6976 }, { "epoch": 1.256834046385949, "grad_norm": 1.4423494338989258, "learning_rate": 3.0415366805790317e-05, "loss": 1.3977, "step": 6977 }, { "epoch": 1.2570141859941455, "grad_norm": 1.3164818286895752, "learning_rate": 3.0402337627610865e-05, "loss": 1.2694, "step": 6978 }, { "epoch": 1.2571943256023417, "grad_norm": 1.4696060419082642, "learning_rate": 3.0389310021546358e-05, "loss": 1.3781, "step": 6979 }, { "epoch": 1.257374465210538, "grad_norm": 1.4752376079559326, "learning_rate": 3.037628398864184e-05, "loss": 1.468, "step": 6980 }, { "epoch": 1.2575546048187345, "grad_norm": 1.4918098449707031, "learning_rate": 3.0363259529942273e-05, "loss": 1.42, "step": 6981 }, { "epoch": 1.257734744426931, "grad_norm": 1.4050174951553345, "learning_rate": 3.0350236646492457e-05, "loss": 1.4017, "step": 6982 }, { "epoch": 1.2579148840351273, "grad_norm": 1.383993148803711, "learning_rate": 3.0337215339337078e-05, "loss": 1.53, "step": 6983 }, { "epoch": 1.2580950236433237, "grad_norm": 1.379915475845337, "learning_rate": 3.032419560952071e-05, "loss": 1.1459, "step": 6984 }, { "epoch": 1.25827516325152, "grad_norm": 1.3971757888793945, "learning_rate": 3.031117745808779e-05, "loss": 1.3178, "step": 6985 }, { "epoch": 1.2584553028597163, "grad_norm": 1.4547984600067139, "learning_rate": 3.0298160886082628e-05, "loss": 1.3794, "step": 6986 }, { "epoch": 1.2586354424679127, "grad_norm": 1.431549072265625, "learning_rate": 3.0285145894549377e-05, "loss": 1.3986, "step": 6987 }, { "epoch": 1.2588155820761089, "grad_norm": 1.49636971950531, "learning_rate": 3.0272132484532145e-05, "loss": 1.5619, "step": 6988 }, { "epoch": 1.2589957216843053, "grad_norm": 1.364831805229187, "learning_rate": 3.0259120657074846e-05, "loss": 1.1707, "step": 6989 }, { "epoch": 1.2591758612925017, "grad_norm": 1.6209914684295654, "learning_rate": 3.0246110413221258e-05, "loss": 1.6512, "step": 6990 }, { "epoch": 1.259356000900698, "grad_norm": 1.5467591285705566, "learning_rate": 3.023310175401507e-05, "loss": 1.5577, "step": 6991 }, { "epoch": 1.2595361405088945, "grad_norm": 1.3463491201400757, "learning_rate": 3.022009468049982e-05, "loss": 1.242, "step": 6992 }, { "epoch": 1.2597162801170907, "grad_norm": 1.4590662717819214, "learning_rate": 3.0207089193718953e-05, "loss": 1.468, "step": 6993 }, { "epoch": 1.259896419725287, "grad_norm": 1.480472445487976, "learning_rate": 3.019408529471575e-05, "loss": 1.4001, "step": 6994 }, { "epoch": 1.2600765593334835, "grad_norm": 1.5388211011886597, "learning_rate": 3.0181082984533393e-05, "loss": 1.2213, "step": 6995 }, { "epoch": 1.2602566989416797, "grad_norm": 1.40041983127594, "learning_rate": 3.0168082264214893e-05, "loss": 1.3281, "step": 6996 }, { "epoch": 1.260436838549876, "grad_norm": 1.4714363813400269, "learning_rate": 3.015508313480315e-05, "loss": 1.4441, "step": 6997 }, { "epoch": 1.2606169781580725, "grad_norm": 1.4824481010437012, "learning_rate": 3.0142085597340997e-05, "loss": 1.22, "step": 6998 }, { "epoch": 1.260797117766269, "grad_norm": 1.4296058416366577, "learning_rate": 3.0129089652871064e-05, "loss": 1.3198, "step": 6999 }, { "epoch": 1.2609772573744653, "grad_norm": 1.6829460859298706, "learning_rate": 3.0116095302435876e-05, "loss": 1.3227, "step": 7000 }, { "epoch": 1.2611573969826615, "grad_norm": 1.3450504541397095, "learning_rate": 3.0103102547077834e-05, "loss": 1.1022, "step": 7001 }, { "epoch": 1.261337536590858, "grad_norm": 1.2480241060256958, "learning_rate": 3.0090111387839214e-05, "loss": 1.7888, "step": 7002 }, { "epoch": 1.2615176761990543, "grad_norm": 1.3191943168640137, "learning_rate": 3.0077121825762172e-05, "loss": 1.989, "step": 7003 }, { "epoch": 1.2616978158072505, "grad_norm": 1.4725444316864014, "learning_rate": 3.0064133861888712e-05, "loss": 2.083, "step": 7004 }, { "epoch": 1.261877955415447, "grad_norm": 1.3447837829589844, "learning_rate": 3.0051147497260717e-05, "loss": 1.8028, "step": 7005 }, { "epoch": 1.2620580950236433, "grad_norm": 1.4679718017578125, "learning_rate": 3.0038162732919946e-05, "loss": 1.8906, "step": 7006 }, { "epoch": 1.2622382346318397, "grad_norm": 1.4556591510772705, "learning_rate": 3.0025179569908047e-05, "loss": 1.6007, "step": 7007 }, { "epoch": 1.2624183742400361, "grad_norm": 1.4762115478515625, "learning_rate": 3.001219800926651e-05, "loss": 1.7174, "step": 7008 }, { "epoch": 1.2625985138482325, "grad_norm": 1.5539400577545166, "learning_rate": 2.9999218052036722e-05, "loss": 1.8245, "step": 7009 }, { "epoch": 1.2627786534564287, "grad_norm": 1.7731585502624512, "learning_rate": 2.9986239699259906e-05, "loss": 1.9254, "step": 7010 }, { "epoch": 1.262958793064625, "grad_norm": 1.9975221157073975, "learning_rate": 2.9973262951977198e-05, "loss": 2.1392, "step": 7011 }, { "epoch": 1.2631389326728215, "grad_norm": 1.57728111743927, "learning_rate": 2.9960287811229586e-05, "loss": 1.6129, "step": 7012 }, { "epoch": 1.2633190722810177, "grad_norm": 1.324569582939148, "learning_rate": 2.9947314278057927e-05, "loss": 1.3379, "step": 7013 }, { "epoch": 1.263499211889214, "grad_norm": 1.3974930047988892, "learning_rate": 2.9934342353502943e-05, "loss": 1.5554, "step": 7014 }, { "epoch": 1.2636793514974105, "grad_norm": 1.2897553443908691, "learning_rate": 2.9921372038605232e-05, "loss": 1.2555, "step": 7015 }, { "epoch": 1.263859491105607, "grad_norm": 1.3799039125442505, "learning_rate": 2.9908403334405295e-05, "loss": 1.3825, "step": 7016 }, { "epoch": 1.2640396307138033, "grad_norm": 1.441825032234192, "learning_rate": 2.989543624194345e-05, "loss": 1.5057, "step": 7017 }, { "epoch": 1.2642197703219995, "grad_norm": 1.4043766260147095, "learning_rate": 2.9882470762259917e-05, "loss": 1.3594, "step": 7018 }, { "epoch": 1.264399909930196, "grad_norm": 1.2256760597229004, "learning_rate": 2.9869506896394784e-05, "loss": 1.1936, "step": 7019 }, { "epoch": 1.2645800495383923, "grad_norm": 1.401892900466919, "learning_rate": 2.9856544645387974e-05, "loss": 1.4807, "step": 7020 }, { "epoch": 1.2647601891465885, "grad_norm": 1.5168848037719727, "learning_rate": 2.9843584010279378e-05, "loss": 1.5941, "step": 7021 }, { "epoch": 1.264940328754785, "grad_norm": 1.2363992929458618, "learning_rate": 2.9830624992108645e-05, "loss": 1.0569, "step": 7022 }, { "epoch": 1.2651204683629813, "grad_norm": 1.335463523864746, "learning_rate": 2.9817667591915345e-05, "loss": 1.414, "step": 7023 }, { "epoch": 1.2653006079711777, "grad_norm": 1.3160192966461182, "learning_rate": 2.9804711810738906e-05, "loss": 1.2895, "step": 7024 }, { "epoch": 1.2654807475793741, "grad_norm": 1.521207332611084, "learning_rate": 2.979175764961867e-05, "loss": 1.3923, "step": 7025 }, { "epoch": 1.2656608871875703, "grad_norm": 1.4431427717208862, "learning_rate": 2.977880510959379e-05, "loss": 1.3236, "step": 7026 }, { "epoch": 1.2658410267957667, "grad_norm": 1.5625979900360107, "learning_rate": 2.9765854191703326e-05, "loss": 1.6278, "step": 7027 }, { "epoch": 1.2660211664039631, "grad_norm": 1.393539309501648, "learning_rate": 2.975290489698617e-05, "loss": 1.4421, "step": 7028 }, { "epoch": 1.2662013060121593, "grad_norm": 1.6322964429855347, "learning_rate": 2.97399572264811e-05, "loss": 1.4769, "step": 7029 }, { "epoch": 1.2663814456203557, "grad_norm": 1.3377214670181274, "learning_rate": 2.9727011181226818e-05, "loss": 1.3165, "step": 7030 }, { "epoch": 1.2665615852285521, "grad_norm": 1.4006478786468506, "learning_rate": 2.9714066762261823e-05, "loss": 1.299, "step": 7031 }, { "epoch": 1.2667417248367485, "grad_norm": 1.383147120475769, "learning_rate": 2.9701123970624513e-05, "loss": 1.3153, "step": 7032 }, { "epoch": 1.266921864444945, "grad_norm": 1.3741413354873657, "learning_rate": 2.9688182807353156e-05, "loss": 1.3696, "step": 7033 }, { "epoch": 1.267102004053141, "grad_norm": 1.4689764976501465, "learning_rate": 2.9675243273485852e-05, "loss": 1.3693, "step": 7034 }, { "epoch": 1.2672821436613375, "grad_norm": 1.4250136613845825, "learning_rate": 2.966230537006065e-05, "loss": 1.5007, "step": 7035 }, { "epoch": 1.267462283269534, "grad_norm": 1.639159917831421, "learning_rate": 2.964936909811541e-05, "loss": 1.5369, "step": 7036 }, { "epoch": 1.2676424228777303, "grad_norm": 1.4493772983551025, "learning_rate": 2.9636434458687866e-05, "loss": 1.4696, "step": 7037 }, { "epoch": 1.2678225624859265, "grad_norm": 1.5093430280685425, "learning_rate": 2.9623501452815626e-05, "loss": 1.5496, "step": 7038 }, { "epoch": 1.268002702094123, "grad_norm": 1.5053790807724, "learning_rate": 2.9610570081536183e-05, "loss": 1.3887, "step": 7039 }, { "epoch": 1.2681828417023193, "grad_norm": 1.7349344491958618, "learning_rate": 2.959764034588687e-05, "loss": 1.1887, "step": 7040 }, { "epoch": 1.2683629813105157, "grad_norm": 1.6846961975097656, "learning_rate": 2.9584712246904918e-05, "loss": 1.5954, "step": 7041 }, { "epoch": 1.2685431209187121, "grad_norm": 1.5604982376098633, "learning_rate": 2.9571785785627405e-05, "loss": 1.6034, "step": 7042 }, { "epoch": 1.2687232605269083, "grad_norm": 1.5936533212661743, "learning_rate": 2.9558860963091274e-05, "loss": 1.4396, "step": 7043 }, { "epoch": 1.2689034001351047, "grad_norm": 1.3839609622955322, "learning_rate": 2.9545937780333367e-05, "loss": 1.3386, "step": 7044 }, { "epoch": 1.2690835397433011, "grad_norm": 1.5781564712524414, "learning_rate": 2.953301623839037e-05, "loss": 1.4553, "step": 7045 }, { "epoch": 1.2692636793514973, "grad_norm": 1.4864450693130493, "learning_rate": 2.952009633829884e-05, "loss": 1.4066, "step": 7046 }, { "epoch": 1.2694438189596937, "grad_norm": 1.6834286451339722, "learning_rate": 2.950717808109521e-05, "loss": 1.5763, "step": 7047 }, { "epoch": 1.2696239585678901, "grad_norm": 1.5803136825561523, "learning_rate": 2.9494261467815765e-05, "loss": 1.5549, "step": 7048 }, { "epoch": 1.2698040981760865, "grad_norm": 1.354555606842041, "learning_rate": 2.9481346499496682e-05, "loss": 1.0512, "step": 7049 }, { "epoch": 1.269984237784283, "grad_norm": 1.3255127668380737, "learning_rate": 2.9468433177173994e-05, "loss": 1.1053, "step": 7050 }, { "epoch": 1.2701643773924791, "grad_norm": 1.2747818231582642, "learning_rate": 2.9455521501883586e-05, "loss": 1.2028, "step": 7051 }, { "epoch": 1.2703445170006755, "grad_norm": 1.291144609451294, "learning_rate": 2.9442611474661226e-05, "loss": 1.7835, "step": 7052 }, { "epoch": 1.270524656608872, "grad_norm": 1.305700659751892, "learning_rate": 2.942970309654258e-05, "loss": 1.7217, "step": 7053 }, { "epoch": 1.270704796217068, "grad_norm": 1.4463049173355103, "learning_rate": 2.9416796368563136e-05, "loss": 2.2301, "step": 7054 }, { "epoch": 1.2708849358252645, "grad_norm": 1.3374251127243042, "learning_rate": 2.9403891291758256e-05, "loss": 1.8841, "step": 7055 }, { "epoch": 1.271065075433461, "grad_norm": 1.3790265321731567, "learning_rate": 2.9390987867163178e-05, "loss": 1.8791, "step": 7056 }, { "epoch": 1.2712452150416573, "grad_norm": 1.3896771669387817, "learning_rate": 2.9378086095813007e-05, "loss": 1.7174, "step": 7057 }, { "epoch": 1.2714253546498537, "grad_norm": 1.5280636548995972, "learning_rate": 2.936518597874274e-05, "loss": 1.606, "step": 7058 }, { "epoch": 1.27160549425805, "grad_norm": 1.5006309747695923, "learning_rate": 2.9352287516987198e-05, "loss": 1.7131, "step": 7059 }, { "epoch": 1.2717856338662463, "grad_norm": 1.5878580808639526, "learning_rate": 2.9339390711581105e-05, "loss": 1.9328, "step": 7060 }, { "epoch": 1.2719657734744427, "grad_norm": 1.8862226009368896, "learning_rate": 2.932649556355902e-05, "loss": 1.7538, "step": 7061 }, { "epoch": 1.272145913082639, "grad_norm": 1.4710617065429688, "learning_rate": 2.9313602073955364e-05, "loss": 1.5457, "step": 7062 }, { "epoch": 1.2723260526908353, "grad_norm": 1.3599189519882202, "learning_rate": 2.9300710243804498e-05, "loss": 1.5432, "step": 7063 }, { "epoch": 1.2725061922990317, "grad_norm": 1.2542777061462402, "learning_rate": 2.9287820074140576e-05, "loss": 1.3678, "step": 7064 }, { "epoch": 1.2726863319072281, "grad_norm": 1.3461740016937256, "learning_rate": 2.9274931565997642e-05, "loss": 1.4137, "step": 7065 }, { "epoch": 1.2728664715154245, "grad_norm": 1.3523504734039307, "learning_rate": 2.9262044720409588e-05, "loss": 1.3191, "step": 7066 }, { "epoch": 1.273046611123621, "grad_norm": 1.3251413106918335, "learning_rate": 2.9249159538410214e-05, "loss": 1.3222, "step": 7067 }, { "epoch": 1.2732267507318171, "grad_norm": 1.2500574588775635, "learning_rate": 2.923627602103316e-05, "loss": 1.2112, "step": 7068 }, { "epoch": 1.2734068903400135, "grad_norm": 1.4667209386825562, "learning_rate": 2.9223394169311924e-05, "loss": 1.4481, "step": 7069 }, { "epoch": 1.27358702994821, "grad_norm": 1.368598222732544, "learning_rate": 2.9210513984279884e-05, "loss": 1.3773, "step": 7070 }, { "epoch": 1.2737671695564061, "grad_norm": 1.486831545829773, "learning_rate": 2.9197635466970286e-05, "loss": 1.4937, "step": 7071 }, { "epoch": 1.2739473091646025, "grad_norm": 1.4855842590332031, "learning_rate": 2.9184758618416248e-05, "loss": 1.6067, "step": 7072 }, { "epoch": 1.274127448772799, "grad_norm": 1.3872230052947998, "learning_rate": 2.9171883439650728e-05, "loss": 1.4845, "step": 7073 }, { "epoch": 1.2743075883809953, "grad_norm": 1.47398042678833, "learning_rate": 2.9159009931706583e-05, "loss": 1.3943, "step": 7074 }, { "epoch": 1.2744877279891917, "grad_norm": 1.4635913372039795, "learning_rate": 2.9146138095616504e-05, "loss": 1.3138, "step": 7075 }, { "epoch": 1.274667867597388, "grad_norm": 1.5388904809951782, "learning_rate": 2.9133267932413046e-05, "loss": 1.5103, "step": 7076 }, { "epoch": 1.2748480072055843, "grad_norm": 1.4420770406723022, "learning_rate": 2.9120399443128692e-05, "loss": 1.3431, "step": 7077 }, { "epoch": 1.2750281468137807, "grad_norm": 1.5094555616378784, "learning_rate": 2.910753262879573e-05, "loss": 1.4352, "step": 7078 }, { "epoch": 1.275208286421977, "grad_norm": 1.5543296337127686, "learning_rate": 2.9094667490446338e-05, "loss": 1.6147, "step": 7079 }, { "epoch": 1.2753884260301733, "grad_norm": 1.4683845043182373, "learning_rate": 2.9081804029112503e-05, "loss": 1.3719, "step": 7080 }, { "epoch": 1.2755685656383697, "grad_norm": 1.3315753936767578, "learning_rate": 2.9068942245826177e-05, "loss": 1.4237, "step": 7081 }, { "epoch": 1.2757487052465661, "grad_norm": 1.4258633852005005, "learning_rate": 2.9056082141619106e-05, "loss": 1.3707, "step": 7082 }, { "epoch": 1.2759288448547625, "grad_norm": 1.4926718473434448, "learning_rate": 2.9043223717522927e-05, "loss": 1.526, "step": 7083 }, { "epoch": 1.2761089844629587, "grad_norm": 1.361735224723816, "learning_rate": 2.9030366974569136e-05, "loss": 1.2437, "step": 7084 }, { "epoch": 1.2762891240711551, "grad_norm": 1.4047073125839233, "learning_rate": 2.901751191378907e-05, "loss": 1.4558, "step": 7085 }, { "epoch": 1.2764692636793515, "grad_norm": 1.6143884658813477, "learning_rate": 2.9004658536213996e-05, "loss": 1.4707, "step": 7086 }, { "epoch": 1.2766494032875477, "grad_norm": 1.6838817596435547, "learning_rate": 2.899180684287499e-05, "loss": 1.6467, "step": 7087 }, { "epoch": 1.2768295428957441, "grad_norm": 1.6498833894729614, "learning_rate": 2.8978956834803006e-05, "loss": 1.4667, "step": 7088 }, { "epoch": 1.2770096825039405, "grad_norm": 1.5781311988830566, "learning_rate": 2.8966108513028868e-05, "loss": 1.263, "step": 7089 }, { "epoch": 1.277189822112137, "grad_norm": 1.5843472480773926, "learning_rate": 2.895326187858326e-05, "loss": 1.3936, "step": 7090 }, { "epoch": 1.2773699617203333, "grad_norm": 1.3745026588439941, "learning_rate": 2.8940416932496727e-05, "loss": 1.2256, "step": 7091 }, { "epoch": 1.2775501013285295, "grad_norm": 1.5958194732666016, "learning_rate": 2.8927573675799697e-05, "loss": 1.316, "step": 7092 }, { "epoch": 1.277730240936726, "grad_norm": 1.5327242612838745, "learning_rate": 2.8914732109522434e-05, "loss": 1.5621, "step": 7093 }, { "epoch": 1.2779103805449223, "grad_norm": 1.5197958946228027, "learning_rate": 2.8901892234695083e-05, "loss": 1.2194, "step": 7094 }, { "epoch": 1.2780905201531187, "grad_norm": 1.63090980052948, "learning_rate": 2.8889054052347668e-05, "loss": 1.5198, "step": 7095 }, { "epoch": 1.278270659761315, "grad_norm": 1.5595170259475708, "learning_rate": 2.887621756351006e-05, "loss": 1.5265, "step": 7096 }, { "epoch": 1.2784507993695113, "grad_norm": 1.6263370513916016, "learning_rate": 2.8863382769211988e-05, "loss": 1.4035, "step": 7097 }, { "epoch": 1.2786309389777077, "grad_norm": 1.5030648708343506, "learning_rate": 2.8850549670483063e-05, "loss": 1.1608, "step": 7098 }, { "epoch": 1.2788110785859041, "grad_norm": 1.4987504482269287, "learning_rate": 2.8837718268352732e-05, "loss": 1.307, "step": 7099 }, { "epoch": 1.2789912181941006, "grad_norm": 1.453192114830017, "learning_rate": 2.8824888563850337e-05, "loss": 1.4041, "step": 7100 }, { "epoch": 1.2791713578022967, "grad_norm": 1.5771466493606567, "learning_rate": 2.881206055800507e-05, "loss": 1.3558, "step": 7101 }, { "epoch": 1.2793514974104931, "grad_norm": 1.2904399633407593, "learning_rate": 2.879923425184598e-05, "loss": 1.8712, "step": 7102 }, { "epoch": 1.2795316370186895, "grad_norm": 1.2609823942184448, "learning_rate": 2.8786409646401997e-05, "loss": 1.7608, "step": 7103 }, { "epoch": 1.2797117766268857, "grad_norm": 1.3678944110870361, "learning_rate": 2.8773586742701873e-05, "loss": 1.8384, "step": 7104 }, { "epoch": 1.2798919162350821, "grad_norm": 1.3234736919403076, "learning_rate": 2.8760765541774303e-05, "loss": 1.8296, "step": 7105 }, { "epoch": 1.2800720558432785, "grad_norm": 1.474744200706482, "learning_rate": 2.8747946044647776e-05, "loss": 1.7938, "step": 7106 }, { "epoch": 1.280252195451475, "grad_norm": 1.4485154151916504, "learning_rate": 2.8735128252350673e-05, "loss": 1.6593, "step": 7107 }, { "epoch": 1.2804323350596714, "grad_norm": 1.4289631843566895, "learning_rate": 2.8722312165911204e-05, "loss": 1.7681, "step": 7108 }, { "epoch": 1.2806124746678675, "grad_norm": 1.5767881870269775, "learning_rate": 2.87094977863575e-05, "loss": 1.671, "step": 7109 }, { "epoch": 1.280792614276064, "grad_norm": 1.5129060745239258, "learning_rate": 2.8696685114717516e-05, "loss": 1.4741, "step": 7110 }, { "epoch": 1.2809727538842604, "grad_norm": 1.9507790803909302, "learning_rate": 2.8683874152019097e-05, "loss": 1.9831, "step": 7111 }, { "epoch": 1.2811528934924565, "grad_norm": 1.731133222579956, "learning_rate": 2.867106489928989e-05, "loss": 1.8499, "step": 7112 }, { "epoch": 1.281333033100653, "grad_norm": 1.4464892148971558, "learning_rate": 2.865825735755745e-05, "loss": 1.4283, "step": 7113 }, { "epoch": 1.2815131727088493, "grad_norm": 1.3960230350494385, "learning_rate": 2.8645451527849226e-05, "loss": 1.2875, "step": 7114 }, { "epoch": 1.2816933123170458, "grad_norm": 1.3529244661331177, "learning_rate": 2.8632647411192483e-05, "loss": 1.3898, "step": 7115 }, { "epoch": 1.2818734519252422, "grad_norm": 1.2842944860458374, "learning_rate": 2.8619845008614355e-05, "loss": 1.3809, "step": 7116 }, { "epoch": 1.2820535915334383, "grad_norm": 1.2405726909637451, "learning_rate": 2.8607044321141825e-05, "loss": 1.2132, "step": 7117 }, { "epoch": 1.2822337311416347, "grad_norm": 1.3347744941711426, "learning_rate": 2.8594245349801803e-05, "loss": 1.1413, "step": 7118 }, { "epoch": 1.2824138707498312, "grad_norm": 1.3706387281417847, "learning_rate": 2.858144809562099e-05, "loss": 1.452, "step": 7119 }, { "epoch": 1.2825940103580273, "grad_norm": 1.2984391450881958, "learning_rate": 2.8568652559625976e-05, "loss": 1.1992, "step": 7120 }, { "epoch": 1.2827741499662237, "grad_norm": 1.4117110967636108, "learning_rate": 2.8555858742843223e-05, "loss": 1.3602, "step": 7121 }, { "epoch": 1.2829542895744201, "grad_norm": 1.4401477575302124, "learning_rate": 2.8543066646299025e-05, "loss": 1.3341, "step": 7122 }, { "epoch": 1.2831344291826166, "grad_norm": 1.5523301362991333, "learning_rate": 2.8530276271019573e-05, "loss": 1.4858, "step": 7123 }, { "epoch": 1.283314568790813, "grad_norm": 1.37791907787323, "learning_rate": 2.851748761803091e-05, "loss": 1.2366, "step": 7124 }, { "epoch": 1.2834947083990094, "grad_norm": 1.5780267715454102, "learning_rate": 2.850470068835892e-05, "loss": 1.6152, "step": 7125 }, { "epoch": 1.2836748480072055, "grad_norm": 1.437329649925232, "learning_rate": 2.849191548302937e-05, "loss": 1.3599, "step": 7126 }, { "epoch": 1.283854987615402, "grad_norm": 1.451629877090454, "learning_rate": 2.8479132003067866e-05, "loss": 1.3958, "step": 7127 }, { "epoch": 1.2840351272235984, "grad_norm": 1.5319790840148926, "learning_rate": 2.846635024949993e-05, "loss": 1.3025, "step": 7128 }, { "epoch": 1.2842152668317945, "grad_norm": 1.39559805393219, "learning_rate": 2.8453570223350885e-05, "loss": 1.3347, "step": 7129 }, { "epoch": 1.284395406439991, "grad_norm": 1.4030119180679321, "learning_rate": 2.8440791925645942e-05, "loss": 1.2593, "step": 7130 }, { "epoch": 1.2845755460481874, "grad_norm": 1.4799177646636963, "learning_rate": 2.842801535741017e-05, "loss": 1.3758, "step": 7131 }, { "epoch": 1.2847556856563838, "grad_norm": 1.396166205406189, "learning_rate": 2.84152405196685e-05, "loss": 1.2077, "step": 7132 }, { "epoch": 1.2849358252645802, "grad_norm": 1.44206702709198, "learning_rate": 2.840246741344572e-05, "loss": 1.4144, "step": 7133 }, { "epoch": 1.2851159648727764, "grad_norm": 1.60263192653656, "learning_rate": 2.838969603976649e-05, "loss": 1.611, "step": 7134 }, { "epoch": 1.2852961044809728, "grad_norm": 1.6154299974441528, "learning_rate": 2.8376926399655312e-05, "loss": 1.6654, "step": 7135 }, { "epoch": 1.2854762440891692, "grad_norm": 1.5466952323913574, "learning_rate": 2.8364158494136552e-05, "loss": 1.5981, "step": 7136 }, { "epoch": 1.2856563836973653, "grad_norm": 1.4793764352798462, "learning_rate": 2.835139232423447e-05, "loss": 1.2664, "step": 7137 }, { "epoch": 1.2858365233055618, "grad_norm": 1.489059329032898, "learning_rate": 2.8338627890973158e-05, "loss": 1.2028, "step": 7138 }, { "epoch": 1.2860166629137582, "grad_norm": 1.7054269313812256, "learning_rate": 2.8325865195376566e-05, "loss": 1.65, "step": 7139 }, { "epoch": 1.2861968025219546, "grad_norm": 1.7295113801956177, "learning_rate": 2.8313104238468512e-05, "loss": 1.5, "step": 7140 }, { "epoch": 1.286376942130151, "grad_norm": 1.5239750146865845, "learning_rate": 2.8300345021272643e-05, "loss": 1.306, "step": 7141 }, { "epoch": 1.2865570817383472, "grad_norm": 1.3846197128295898, "learning_rate": 2.8287587544812555e-05, "loss": 1.1421, "step": 7142 }, { "epoch": 1.2867372213465436, "grad_norm": 1.5397863388061523, "learning_rate": 2.8274831810111625e-05, "loss": 1.5609, "step": 7143 }, { "epoch": 1.28691736095474, "grad_norm": 1.4834880828857422, "learning_rate": 2.826207781819311e-05, "loss": 1.213, "step": 7144 }, { "epoch": 1.2870975005629361, "grad_norm": 1.5617412328720093, "learning_rate": 2.824932557008009e-05, "loss": 1.4283, "step": 7145 }, { "epoch": 1.2872776401711326, "grad_norm": 1.2447694540023804, "learning_rate": 2.82365750667956e-05, "loss": 0.9551, "step": 7146 }, { "epoch": 1.287457779779329, "grad_norm": 1.4038580656051636, "learning_rate": 2.822382630936246e-05, "loss": 1.2449, "step": 7147 }, { "epoch": 1.2876379193875254, "grad_norm": 1.379562258720398, "learning_rate": 2.8211079298803365e-05, "loss": 1.2643, "step": 7148 }, { "epoch": 1.2878180589957218, "grad_norm": 1.6397968530654907, "learning_rate": 2.8198334036140874e-05, "loss": 1.2982, "step": 7149 }, { "epoch": 1.287998198603918, "grad_norm": 1.5758934020996094, "learning_rate": 2.8185590522397388e-05, "loss": 1.536, "step": 7150 }, { "epoch": 1.2881783382121144, "grad_norm": 1.4082257747650146, "learning_rate": 2.8172848758595227e-05, "loss": 1.1042, "step": 7151 }, { "epoch": 1.2883584778203108, "grad_norm": 1.3642839193344116, "learning_rate": 2.8160108745756508e-05, "loss": 1.7206, "step": 7152 }, { "epoch": 1.2885386174285072, "grad_norm": 1.303656816482544, "learning_rate": 2.8147370484903224e-05, "loss": 1.5467, "step": 7153 }, { "epoch": 1.2887187570367034, "grad_norm": 1.3603367805480957, "learning_rate": 2.8134633977057235e-05, "loss": 1.8569, "step": 7154 }, { "epoch": 1.2888988966448998, "grad_norm": 1.3758049011230469, "learning_rate": 2.8121899223240256e-05, "loss": 1.7634, "step": 7155 }, { "epoch": 1.2890790362530962, "grad_norm": 1.4273656606674194, "learning_rate": 2.810916622447387e-05, "loss": 1.7321, "step": 7156 }, { "epoch": 1.2892591758612926, "grad_norm": 1.3923115730285645, "learning_rate": 2.8096434981779508e-05, "loss": 1.7377, "step": 7157 }, { "epoch": 1.289439315469489, "grad_norm": 1.323893427848816, "learning_rate": 2.8083705496178457e-05, "loss": 1.4578, "step": 7158 }, { "epoch": 1.2896194550776852, "grad_norm": 1.5742992162704468, "learning_rate": 2.8070977768691863e-05, "loss": 1.6492, "step": 7159 }, { "epoch": 1.2897995946858816, "grad_norm": 1.5723830461502075, "learning_rate": 2.8058251800340762e-05, "loss": 1.6464, "step": 7160 }, { "epoch": 1.289979734294078, "grad_norm": 1.666792631149292, "learning_rate": 2.804552759214602e-05, "loss": 1.87, "step": 7161 }, { "epoch": 1.2901598739022742, "grad_norm": 1.5731443166732788, "learning_rate": 2.803280514512836e-05, "loss": 1.8203, "step": 7162 }, { "epoch": 1.2903400135104706, "grad_norm": 1.3674746751785278, "learning_rate": 2.8020084460308372e-05, "loss": 1.4863, "step": 7163 }, { "epoch": 1.290520153118667, "grad_norm": 1.3326148986816406, "learning_rate": 2.8007365538706503e-05, "loss": 1.3019, "step": 7164 }, { "epoch": 1.2907002927268634, "grad_norm": 1.3222126960754395, "learning_rate": 2.7994648381343058e-05, "loss": 1.2766, "step": 7165 }, { "epoch": 1.2908804323350598, "grad_norm": 1.3349494934082031, "learning_rate": 2.7981932989238203e-05, "loss": 1.2268, "step": 7166 }, { "epoch": 1.291060571943256, "grad_norm": 1.4164543151855469, "learning_rate": 2.796921936341196e-05, "loss": 1.4091, "step": 7167 }, { "epoch": 1.2912407115514524, "grad_norm": 1.3065745830535889, "learning_rate": 2.7956507504884198e-05, "loss": 1.5263, "step": 7168 }, { "epoch": 1.2914208511596488, "grad_norm": 1.3957542181015015, "learning_rate": 2.7943797414674656e-05, "loss": 1.3652, "step": 7169 }, { "epoch": 1.291600990767845, "grad_norm": 1.4634047746658325, "learning_rate": 2.7931089093802964e-05, "loss": 1.5178, "step": 7170 }, { "epoch": 1.2917811303760414, "grad_norm": 1.3141335248947144, "learning_rate": 2.7918382543288553e-05, "loss": 1.2781, "step": 7171 }, { "epoch": 1.2919612699842378, "grad_norm": 1.4111889600753784, "learning_rate": 2.790567776415074e-05, "loss": 1.3847, "step": 7172 }, { "epoch": 1.2921414095924342, "grad_norm": 1.3061233758926392, "learning_rate": 2.7892974757408673e-05, "loss": 1.2763, "step": 7173 }, { "epoch": 1.2923215492006306, "grad_norm": 1.4354208707809448, "learning_rate": 2.7880273524081423e-05, "loss": 1.3865, "step": 7174 }, { "epoch": 1.2925016888088268, "grad_norm": 1.4313865900039673, "learning_rate": 2.786757406518785e-05, "loss": 1.2978, "step": 7175 }, { "epoch": 1.2926818284170232, "grad_norm": 1.3059842586517334, "learning_rate": 2.7854876381746735e-05, "loss": 1.1552, "step": 7176 }, { "epoch": 1.2928619680252196, "grad_norm": 1.370823860168457, "learning_rate": 2.784218047477663e-05, "loss": 1.2846, "step": 7177 }, { "epoch": 1.2930421076334158, "grad_norm": 1.4305886030197144, "learning_rate": 2.782948634529599e-05, "loss": 1.4535, "step": 7178 }, { "epoch": 1.2932222472416122, "grad_norm": 1.459396481513977, "learning_rate": 2.7816793994323176e-05, "loss": 1.2932, "step": 7179 }, { "epoch": 1.2934023868498086, "grad_norm": 1.5988876819610596, "learning_rate": 2.780410342287635e-05, "loss": 1.5864, "step": 7180 }, { "epoch": 1.293582526458005, "grad_norm": 1.4623887538909912, "learning_rate": 2.7791414631973533e-05, "loss": 1.3431, "step": 7181 }, { "epoch": 1.2937626660662014, "grad_norm": 1.5250016450881958, "learning_rate": 2.7778727622632618e-05, "loss": 1.3522, "step": 7182 }, { "epoch": 1.2939428056743978, "grad_norm": 1.4584288597106934, "learning_rate": 2.7766042395871324e-05, "loss": 1.5749, "step": 7183 }, { "epoch": 1.294122945282594, "grad_norm": 1.3903683423995972, "learning_rate": 2.7753358952707297e-05, "loss": 1.1608, "step": 7184 }, { "epoch": 1.2943030848907904, "grad_norm": 1.421125888824463, "learning_rate": 2.7740677294157985e-05, "loss": 1.3196, "step": 7185 }, { "epoch": 1.2944832244989868, "grad_norm": 1.492103934288025, "learning_rate": 2.7727997421240697e-05, "loss": 1.4954, "step": 7186 }, { "epoch": 1.294663364107183, "grad_norm": 1.5505269765853882, "learning_rate": 2.771531933497261e-05, "loss": 1.3766, "step": 7187 }, { "epoch": 1.2948435037153794, "grad_norm": 1.4632881879806519, "learning_rate": 2.7702643036370747e-05, "loss": 1.2459, "step": 7188 }, { "epoch": 1.2950236433235758, "grad_norm": 1.5282247066497803, "learning_rate": 2.7689968526452003e-05, "loss": 1.5709, "step": 7189 }, { "epoch": 1.2952037829317722, "grad_norm": 1.4550132751464844, "learning_rate": 2.7677295806233118e-05, "loss": 1.4013, "step": 7190 }, { "epoch": 1.2953839225399686, "grad_norm": 1.7055833339691162, "learning_rate": 2.7664624876730683e-05, "loss": 1.4501, "step": 7191 }, { "epoch": 1.2955640621481648, "grad_norm": 1.4734947681427002, "learning_rate": 2.7651955738961144e-05, "loss": 1.3351, "step": 7192 }, { "epoch": 1.2957442017563612, "grad_norm": 1.4047929048538208, "learning_rate": 2.7639288393940853e-05, "loss": 1.2174, "step": 7193 }, { "epoch": 1.2959243413645576, "grad_norm": 1.8051222562789917, "learning_rate": 2.7626622842685955e-05, "loss": 1.6539, "step": 7194 }, { "epoch": 1.2961044809727538, "grad_norm": 1.5454577207565308, "learning_rate": 2.7613959086212475e-05, "loss": 1.4145, "step": 7195 }, { "epoch": 1.2962846205809502, "grad_norm": 1.4186687469482422, "learning_rate": 2.7601297125536295e-05, "loss": 1.2348, "step": 7196 }, { "epoch": 1.2964647601891466, "grad_norm": 1.3061327934265137, "learning_rate": 2.7588636961673148e-05, "loss": 1.0776, "step": 7197 }, { "epoch": 1.296644899797343, "grad_norm": 1.9398189783096313, "learning_rate": 2.7575978595638625e-05, "loss": 1.5619, "step": 7198 }, { "epoch": 1.2968250394055394, "grad_norm": 1.518505573272705, "learning_rate": 2.756332202844818e-05, "loss": 1.4203, "step": 7199 }, { "epoch": 1.2970051790137356, "grad_norm": 1.5813530683517456, "learning_rate": 2.7550667261117113e-05, "loss": 1.3887, "step": 7200 }, { "epoch": 1.297185318621932, "grad_norm": 1.55479097366333, "learning_rate": 2.7538014294660565e-05, "loss": 1.4567, "step": 7201 }, { "epoch": 1.2973654582301284, "grad_norm": 1.4022865295410156, "learning_rate": 2.7525363130093586e-05, "loss": 1.8743, "step": 7202 }, { "epoch": 1.2975455978383246, "grad_norm": 1.2930684089660645, "learning_rate": 2.751271376843103e-05, "loss": 1.8295, "step": 7203 }, { "epoch": 1.297725737446521, "grad_norm": 1.4828613996505737, "learning_rate": 2.7500066210687626e-05, "loss": 2.0995, "step": 7204 }, { "epoch": 1.2979058770547174, "grad_norm": 1.4192568063735962, "learning_rate": 2.7487420457877942e-05, "loss": 1.8038, "step": 7205 }, { "epoch": 1.2980860166629138, "grad_norm": 1.4027570486068726, "learning_rate": 2.7474776511016408e-05, "loss": 1.9283, "step": 7206 }, { "epoch": 1.2982661562711102, "grad_norm": 1.4486907720565796, "learning_rate": 2.7462134371117354e-05, "loss": 1.8553, "step": 7207 }, { "epoch": 1.2984462958793066, "grad_norm": 1.5106427669525146, "learning_rate": 2.74494940391949e-05, "loss": 1.567, "step": 7208 }, { "epoch": 1.2986264354875028, "grad_norm": 1.640448808670044, "learning_rate": 2.7436855516263073e-05, "loss": 2.043, "step": 7209 }, { "epoch": 1.2988065750956992, "grad_norm": 1.6078855991363525, "learning_rate": 2.742421880333569e-05, "loss": 1.7148, "step": 7210 }, { "epoch": 1.2989867147038956, "grad_norm": 1.810054063796997, "learning_rate": 2.7411583901426452e-05, "loss": 2.0094, "step": 7211 }, { "epoch": 1.2991668543120918, "grad_norm": 1.6464828252792358, "learning_rate": 2.7398950811548975e-05, "loss": 1.7425, "step": 7212 }, { "epoch": 1.2993469939202882, "grad_norm": 1.3600871562957764, "learning_rate": 2.7386319534716647e-05, "loss": 1.441, "step": 7213 }, { "epoch": 1.2995271335284846, "grad_norm": 1.462981104850769, "learning_rate": 2.7373690071942758e-05, "loss": 1.4793, "step": 7214 }, { "epoch": 1.299707273136681, "grad_norm": 1.445478916168213, "learning_rate": 2.7361062424240413e-05, "loss": 1.5306, "step": 7215 }, { "epoch": 1.2998874127448774, "grad_norm": 1.46418297290802, "learning_rate": 2.734843659262263e-05, "loss": 1.5081, "step": 7216 }, { "epoch": 1.3000675523530736, "grad_norm": 1.4876571893692017, "learning_rate": 2.7335812578102226e-05, "loss": 1.5218, "step": 7217 }, { "epoch": 1.30024769196127, "grad_norm": 1.3805893659591675, "learning_rate": 2.7323190381691903e-05, "loss": 1.4711, "step": 7218 }, { "epoch": 1.3004278315694664, "grad_norm": 1.5682705640792847, "learning_rate": 2.7310570004404202e-05, "loss": 1.5264, "step": 7219 }, { "epoch": 1.3006079711776626, "grad_norm": 1.3763188123703003, "learning_rate": 2.7297951447251524e-05, "loss": 1.3485, "step": 7220 }, { "epoch": 1.300788110785859, "grad_norm": 1.4283044338226318, "learning_rate": 2.728533471124612e-05, "loss": 1.4363, "step": 7221 }, { "epoch": 1.3009682503940554, "grad_norm": 1.2529871463775635, "learning_rate": 2.7272719797400102e-05, "loss": 1.128, "step": 7222 }, { "epoch": 1.3011483900022518, "grad_norm": 1.43123459815979, "learning_rate": 2.726010670672543e-05, "loss": 1.4492, "step": 7223 }, { "epoch": 1.3013285296104482, "grad_norm": 1.3271316289901733, "learning_rate": 2.7247495440233905e-05, "loss": 1.3584, "step": 7224 }, { "epoch": 1.3015086692186444, "grad_norm": 1.3395841121673584, "learning_rate": 2.723488599893724e-05, "loss": 1.3353, "step": 7225 }, { "epoch": 1.3016888088268408, "grad_norm": 1.3886768817901611, "learning_rate": 2.722227838384692e-05, "loss": 1.2368, "step": 7226 }, { "epoch": 1.3018689484350372, "grad_norm": 1.5242255926132202, "learning_rate": 2.7209672595974338e-05, "loss": 1.6965, "step": 7227 }, { "epoch": 1.3020490880432334, "grad_norm": 1.426235556602478, "learning_rate": 2.7197068636330723e-05, "loss": 1.409, "step": 7228 }, { "epoch": 1.3022292276514298, "grad_norm": 1.4048899412155151, "learning_rate": 2.7184466505927152e-05, "loss": 1.3561, "step": 7229 }, { "epoch": 1.3024093672596262, "grad_norm": 1.4246550798416138, "learning_rate": 2.7171866205774566e-05, "loss": 1.4794, "step": 7230 }, { "epoch": 1.3025895068678226, "grad_norm": 1.4218900203704834, "learning_rate": 2.7159267736883752e-05, "loss": 1.451, "step": 7231 }, { "epoch": 1.302769646476019, "grad_norm": 1.3649016618728638, "learning_rate": 2.7146671100265353e-05, "loss": 1.3299, "step": 7232 }, { "epoch": 1.3029497860842152, "grad_norm": 1.578447937965393, "learning_rate": 2.7134076296929866e-05, "loss": 1.6683, "step": 7233 }, { "epoch": 1.3031299256924116, "grad_norm": 1.5134035348892212, "learning_rate": 2.712148332788763e-05, "loss": 1.5424, "step": 7234 }, { "epoch": 1.303310065300608, "grad_norm": 1.4058001041412354, "learning_rate": 2.710889219414887e-05, "loss": 1.2976, "step": 7235 }, { "epoch": 1.3034902049088044, "grad_norm": 1.4663184881210327, "learning_rate": 2.709630289672363e-05, "loss": 1.3552, "step": 7236 }, { "epoch": 1.3036703445170006, "grad_norm": 1.6714893579483032, "learning_rate": 2.7083715436621815e-05, "loss": 1.7139, "step": 7237 }, { "epoch": 1.303850484125197, "grad_norm": 1.4534332752227783, "learning_rate": 2.7071129814853168e-05, "loss": 1.5504, "step": 7238 }, { "epoch": 1.3040306237333934, "grad_norm": 1.4148374795913696, "learning_rate": 2.7058546032427333e-05, "loss": 1.2533, "step": 7239 }, { "epoch": 1.3042107633415898, "grad_norm": 1.5228055715560913, "learning_rate": 2.704596409035377e-05, "loss": 1.2581, "step": 7240 }, { "epoch": 1.3043909029497862, "grad_norm": 1.5890846252441406, "learning_rate": 2.7033383989641804e-05, "loss": 1.3985, "step": 7241 }, { "epoch": 1.3045710425579824, "grad_norm": 1.4719725847244263, "learning_rate": 2.7020805731300556e-05, "loss": 1.451, "step": 7242 }, { "epoch": 1.3047511821661788, "grad_norm": 1.3679713010787964, "learning_rate": 2.7008229316339073e-05, "loss": 1.3359, "step": 7243 }, { "epoch": 1.3049313217743752, "grad_norm": 1.6730425357818604, "learning_rate": 2.6995654745766252e-05, "loss": 1.6712, "step": 7244 }, { "epoch": 1.3051114613825714, "grad_norm": 1.4474024772644043, "learning_rate": 2.6983082020590805e-05, "loss": 1.3266, "step": 7245 }, { "epoch": 1.3052916009907678, "grad_norm": 1.617191195487976, "learning_rate": 2.6970511141821307e-05, "loss": 1.498, "step": 7246 }, { "epoch": 1.3054717405989642, "grad_norm": 1.7057318687438965, "learning_rate": 2.6957942110466184e-05, "loss": 1.5397, "step": 7247 }, { "epoch": 1.3056518802071606, "grad_norm": 1.5168033838272095, "learning_rate": 2.69453749275337e-05, "loss": 1.4322, "step": 7248 }, { "epoch": 1.305832019815357, "grad_norm": 1.8632164001464844, "learning_rate": 2.6932809594032038e-05, "loss": 1.536, "step": 7249 }, { "epoch": 1.3060121594235532, "grad_norm": 1.5360517501831055, "learning_rate": 2.692024611096915e-05, "loss": 1.2614, "step": 7250 }, { "epoch": 1.3061922990317496, "grad_norm": 1.3333938121795654, "learning_rate": 2.6907684479352875e-05, "loss": 1.0758, "step": 7251 }, { "epoch": 1.306372438639946, "grad_norm": 1.1890655755996704, "learning_rate": 2.68951247001909e-05, "loss": 1.5101, "step": 7252 }, { "epoch": 1.3065525782481422, "grad_norm": 1.3975564241409302, "learning_rate": 2.6882566774490775e-05, "loss": 1.9661, "step": 7253 }, { "epoch": 1.3067327178563386, "grad_norm": 1.3566436767578125, "learning_rate": 2.6870010703259878e-05, "loss": 1.8625, "step": 7254 }, { "epoch": 1.306912857464535, "grad_norm": 1.277154803276062, "learning_rate": 2.685745648750546e-05, "loss": 1.653, "step": 7255 }, { "epoch": 1.3070929970727314, "grad_norm": 1.278106927871704, "learning_rate": 2.684490412823461e-05, "loss": 1.6864, "step": 7256 }, { "epoch": 1.3072731366809278, "grad_norm": 1.4362905025482178, "learning_rate": 2.6832353626454254e-05, "loss": 1.6785, "step": 7257 }, { "epoch": 1.307453276289124, "grad_norm": 1.507697343826294, "learning_rate": 2.681980498317122e-05, "loss": 1.7905, "step": 7258 }, { "epoch": 1.3076334158973204, "grad_norm": 1.7259743213653564, "learning_rate": 2.6807258199392144e-05, "loss": 1.9113, "step": 7259 }, { "epoch": 1.3078135555055168, "grad_norm": 1.672095775604248, "learning_rate": 2.6794713276123517e-05, "loss": 1.9838, "step": 7260 }, { "epoch": 1.307993695113713, "grad_norm": 1.7213810682296753, "learning_rate": 2.6782170214371686e-05, "loss": 1.8816, "step": 7261 }, { "epoch": 1.3081738347219094, "grad_norm": 1.4583821296691895, "learning_rate": 2.6769629015142857e-05, "loss": 1.5813, "step": 7262 }, { "epoch": 1.3083539743301058, "grad_norm": 1.152076244354248, "learning_rate": 2.675708967944307e-05, "loss": 1.2458, "step": 7263 }, { "epoch": 1.3085341139383022, "grad_norm": 1.4271306991577148, "learning_rate": 2.674455220827823e-05, "loss": 1.4204, "step": 7264 }, { "epoch": 1.3087142535464986, "grad_norm": 1.304468035697937, "learning_rate": 2.6732016602654074e-05, "loss": 1.3272, "step": 7265 }, { "epoch": 1.308894393154695, "grad_norm": 1.491913914680481, "learning_rate": 2.6719482863576207e-05, "loss": 1.5809, "step": 7266 }, { "epoch": 1.3090745327628912, "grad_norm": 1.234576940536499, "learning_rate": 2.6706950992050094e-05, "loss": 1.241, "step": 7267 }, { "epoch": 1.3092546723710876, "grad_norm": 1.2868402004241943, "learning_rate": 2.6694420989081038e-05, "loss": 1.2072, "step": 7268 }, { "epoch": 1.309434811979284, "grad_norm": 1.428783893585205, "learning_rate": 2.6681892855674168e-05, "loss": 1.6418, "step": 7269 }, { "epoch": 1.3096149515874802, "grad_norm": 1.375267505645752, "learning_rate": 2.66693665928345e-05, "loss": 1.3525, "step": 7270 }, { "epoch": 1.3097950911956766, "grad_norm": 1.5284193754196167, "learning_rate": 2.6656842201566855e-05, "loss": 1.4183, "step": 7271 }, { "epoch": 1.309975230803873, "grad_norm": 1.4594486951828003, "learning_rate": 2.6644319682875984e-05, "loss": 1.2222, "step": 7272 }, { "epoch": 1.3101553704120694, "grad_norm": 1.4691972732543945, "learning_rate": 2.6631799037766436e-05, "loss": 1.2174, "step": 7273 }, { "epoch": 1.3103355100202658, "grad_norm": 1.3081021308898926, "learning_rate": 2.6619280267242557e-05, "loss": 1.3187, "step": 7274 }, { "epoch": 1.310515649628462, "grad_norm": 1.6386879682540894, "learning_rate": 2.660676337230863e-05, "loss": 1.4259, "step": 7275 }, { "epoch": 1.3106957892366584, "grad_norm": 1.4025543928146362, "learning_rate": 2.6594248353968738e-05, "loss": 1.4932, "step": 7276 }, { "epoch": 1.3108759288448548, "grad_norm": 1.5073736906051636, "learning_rate": 2.6581735213226855e-05, "loss": 1.5067, "step": 7277 }, { "epoch": 1.311056068453051, "grad_norm": 1.4584907293319702, "learning_rate": 2.656922395108677e-05, "loss": 1.325, "step": 7278 }, { "epoch": 1.3112362080612474, "grad_norm": 1.4322322607040405, "learning_rate": 2.655671456855212e-05, "loss": 1.2326, "step": 7279 }, { "epoch": 1.3114163476694438, "grad_norm": 1.452122449874878, "learning_rate": 2.6544207066626397e-05, "loss": 1.4408, "step": 7280 }, { "epoch": 1.3115964872776402, "grad_norm": 1.5400147438049316, "learning_rate": 2.6531701446312974e-05, "loss": 1.429, "step": 7281 }, { "epoch": 1.3117766268858366, "grad_norm": 1.3828176259994507, "learning_rate": 2.6519197708615028e-05, "loss": 1.3844, "step": 7282 }, { "epoch": 1.3119567664940328, "grad_norm": 1.5221953392028809, "learning_rate": 2.6506695854535614e-05, "loss": 1.5843, "step": 7283 }, { "epoch": 1.3121369061022292, "grad_norm": 1.341744065284729, "learning_rate": 2.649419588507761e-05, "loss": 1.2313, "step": 7284 }, { "epoch": 1.3123170457104256, "grad_norm": 1.5417107343673706, "learning_rate": 2.648169780124376e-05, "loss": 1.4715, "step": 7285 }, { "epoch": 1.3124971853186218, "grad_norm": 1.6844868659973145, "learning_rate": 2.6469201604036657e-05, "loss": 1.8396, "step": 7286 }, { "epoch": 1.3126773249268182, "grad_norm": 1.4083354473114014, "learning_rate": 2.6456707294458742e-05, "loss": 1.2694, "step": 7287 }, { "epoch": 1.3128574645350146, "grad_norm": 1.430038332939148, "learning_rate": 2.6444214873512303e-05, "loss": 1.2239, "step": 7288 }, { "epoch": 1.313037604143211, "grad_norm": 1.4533299207687378, "learning_rate": 2.643172434219947e-05, "loss": 1.3972, "step": 7289 }, { "epoch": 1.3132177437514074, "grad_norm": 1.5285234451293945, "learning_rate": 2.641923570152221e-05, "loss": 1.4355, "step": 7290 }, { "epoch": 1.3133978833596036, "grad_norm": 1.6282265186309814, "learning_rate": 2.6406748952482397e-05, "loss": 1.404, "step": 7291 }, { "epoch": 1.3135780229678, "grad_norm": 1.5235157012939453, "learning_rate": 2.639426409608169e-05, "loss": 1.5045, "step": 7292 }, { "epoch": 1.3137581625759964, "grad_norm": 1.5599039793014526, "learning_rate": 2.638178113332162e-05, "loss": 1.4187, "step": 7293 }, { "epoch": 1.3139383021841928, "grad_norm": 1.4678492546081543, "learning_rate": 2.636930006520356e-05, "loss": 1.3195, "step": 7294 }, { "epoch": 1.314118441792389, "grad_norm": 1.5117708444595337, "learning_rate": 2.635682089272875e-05, "loss": 1.5204, "step": 7295 }, { "epoch": 1.3142985814005854, "grad_norm": 1.659947395324707, "learning_rate": 2.634434361689824e-05, "loss": 1.632, "step": 7296 }, { "epoch": 1.3144787210087818, "grad_norm": 1.5277550220489502, "learning_rate": 2.633186823871297e-05, "loss": 1.6065, "step": 7297 }, { "epoch": 1.3146588606169782, "grad_norm": 1.4691060781478882, "learning_rate": 2.63193947591737e-05, "loss": 1.3656, "step": 7298 }, { "epoch": 1.3148390002251746, "grad_norm": 1.492888331413269, "learning_rate": 2.630692317928103e-05, "loss": 1.3314, "step": 7299 }, { "epoch": 1.3150191398333708, "grad_norm": 1.4734760522842407, "learning_rate": 2.6294453500035453e-05, "loss": 1.3116, "step": 7300 }, { "epoch": 1.3151992794415672, "grad_norm": 1.5717759132385254, "learning_rate": 2.6281985722437273e-05, "loss": 1.4146, "step": 7301 }, { "epoch": 1.3153794190497636, "grad_norm": 1.3600084781646729, "learning_rate": 2.6269519847486657e-05, "loss": 1.8444, "step": 7302 }, { "epoch": 1.3155595586579598, "grad_norm": 1.3618195056915283, "learning_rate": 2.625705587618359e-05, "loss": 1.771, "step": 7303 }, { "epoch": 1.3157396982661562, "grad_norm": 1.3526585102081299, "learning_rate": 2.6244593809527918e-05, "loss": 1.7317, "step": 7304 }, { "epoch": 1.3159198378743526, "grad_norm": 1.41840398311615, "learning_rate": 2.6232133648519374e-05, "loss": 2.0681, "step": 7305 }, { "epoch": 1.316099977482549, "grad_norm": 1.3685473203659058, "learning_rate": 2.6219675394157516e-05, "loss": 1.7488, "step": 7306 }, { "epoch": 1.3162801170907454, "grad_norm": 1.4109721183776855, "learning_rate": 2.62072190474417e-05, "loss": 1.889, "step": 7307 }, { "epoch": 1.3164602566989416, "grad_norm": 1.4641557931900024, "learning_rate": 2.619476460937116e-05, "loss": 1.8409, "step": 7308 }, { "epoch": 1.316640396307138, "grad_norm": 1.449506163597107, "learning_rate": 2.6182312080945028e-05, "loss": 1.7929, "step": 7309 }, { "epoch": 1.3168205359153344, "grad_norm": 1.5877101421356201, "learning_rate": 2.6169861463162215e-05, "loss": 1.8019, "step": 7310 }, { "epoch": 1.3170006755235306, "grad_norm": 1.931904673576355, "learning_rate": 2.615741275702151e-05, "loss": 2.1412, "step": 7311 }, { "epoch": 1.317180815131727, "grad_norm": 1.7633779048919678, "learning_rate": 2.614496596352154e-05, "loss": 1.7031, "step": 7312 }, { "epoch": 1.3173609547399234, "grad_norm": 1.4810068607330322, "learning_rate": 2.6132521083660765e-05, "loss": 1.6009, "step": 7313 }, { "epoch": 1.3175410943481198, "grad_norm": 1.4078516960144043, "learning_rate": 2.612007811843754e-05, "loss": 1.4455, "step": 7314 }, { "epoch": 1.3177212339563162, "grad_norm": 1.3709933757781982, "learning_rate": 2.610763706885002e-05, "loss": 1.5757, "step": 7315 }, { "epoch": 1.3179013735645124, "grad_norm": 1.4359428882598877, "learning_rate": 2.6095197935896216e-05, "loss": 1.7435, "step": 7316 }, { "epoch": 1.3180815131727088, "grad_norm": 1.259642243385315, "learning_rate": 2.6082760720573985e-05, "loss": 1.2472, "step": 7317 }, { "epoch": 1.3182616527809052, "grad_norm": 1.457924723625183, "learning_rate": 2.607032542388105e-05, "loss": 1.6078, "step": 7318 }, { "epoch": 1.3184417923891014, "grad_norm": 1.337712287902832, "learning_rate": 2.605789204681494e-05, "loss": 1.372, "step": 7319 }, { "epoch": 1.3186219319972978, "grad_norm": 1.3357133865356445, "learning_rate": 2.6045460590373078e-05, "loss": 1.1382, "step": 7320 }, { "epoch": 1.3188020716054942, "grad_norm": 1.3546286821365356, "learning_rate": 2.603303105555269e-05, "loss": 1.5586, "step": 7321 }, { "epoch": 1.3189822112136906, "grad_norm": 1.471184492111206, "learning_rate": 2.6020603443350866e-05, "loss": 1.584, "step": 7322 }, { "epoch": 1.319162350821887, "grad_norm": 1.3240504264831543, "learning_rate": 2.6008177754764574e-05, "loss": 1.2226, "step": 7323 }, { "epoch": 1.3193424904300834, "grad_norm": 1.342200517654419, "learning_rate": 2.5995753990790573e-05, "loss": 1.1993, "step": 7324 }, { "epoch": 1.3195226300382796, "grad_norm": 1.262457251548767, "learning_rate": 2.59833321524255e-05, "loss": 1.2465, "step": 7325 }, { "epoch": 1.319702769646476, "grad_norm": 1.4489136934280396, "learning_rate": 2.5970912240665813e-05, "loss": 1.5042, "step": 7326 }, { "epoch": 1.3198829092546724, "grad_norm": 1.3953566551208496, "learning_rate": 2.5958494256507847e-05, "loss": 1.2865, "step": 7327 }, { "epoch": 1.3200630488628686, "grad_norm": 1.4870502948760986, "learning_rate": 2.5946078200947764e-05, "loss": 1.5319, "step": 7328 }, { "epoch": 1.320243188471065, "grad_norm": 1.491736650466919, "learning_rate": 2.5933664074981573e-05, "loss": 1.6044, "step": 7329 }, { "epoch": 1.3204233280792614, "grad_norm": 1.6358563899993896, "learning_rate": 2.5921251879605123e-05, "loss": 1.4241, "step": 7330 }, { "epoch": 1.3206034676874578, "grad_norm": 1.419915795326233, "learning_rate": 2.5908841615814105e-05, "loss": 1.4875, "step": 7331 }, { "epoch": 1.3207836072956542, "grad_norm": 1.488810420036316, "learning_rate": 2.5896433284604092e-05, "loss": 1.5792, "step": 7332 }, { "epoch": 1.3209637469038504, "grad_norm": 1.5382776260375977, "learning_rate": 2.588402688697046e-05, "loss": 1.4145, "step": 7333 }, { "epoch": 1.3211438865120468, "grad_norm": 1.418113350868225, "learning_rate": 2.5871622423908448e-05, "loss": 1.3075, "step": 7334 }, { "epoch": 1.3213240261202432, "grad_norm": 1.4907478094100952, "learning_rate": 2.5859219896413135e-05, "loss": 1.4728, "step": 7335 }, { "epoch": 1.3215041657284394, "grad_norm": 1.4709175825119019, "learning_rate": 2.584681930547942e-05, "loss": 1.4941, "step": 7336 }, { "epoch": 1.3216843053366358, "grad_norm": 1.6379138231277466, "learning_rate": 2.5834420652102114e-05, "loss": 1.5501, "step": 7337 }, { "epoch": 1.3218644449448322, "grad_norm": 1.6066511869430542, "learning_rate": 2.582202393727583e-05, "loss": 1.5001, "step": 7338 }, { "epoch": 1.3220445845530286, "grad_norm": 1.4030712842941284, "learning_rate": 2.5809629161994997e-05, "loss": 1.1984, "step": 7339 }, { "epoch": 1.322224724161225, "grad_norm": 1.468855857849121, "learning_rate": 2.5797236327253926e-05, "loss": 1.6492, "step": 7340 }, { "epoch": 1.3224048637694212, "grad_norm": 1.5526490211486816, "learning_rate": 2.578484543404675e-05, "loss": 1.438, "step": 7341 }, { "epoch": 1.3225850033776176, "grad_norm": 1.6583173274993896, "learning_rate": 2.5772456483367497e-05, "loss": 1.617, "step": 7342 }, { "epoch": 1.322765142985814, "grad_norm": 1.4880729913711548, "learning_rate": 2.576006947620998e-05, "loss": 1.4121, "step": 7343 }, { "epoch": 1.3229452825940102, "grad_norm": 1.4405739307403564, "learning_rate": 2.574768441356789e-05, "loss": 1.2916, "step": 7344 }, { "epoch": 1.3231254222022066, "grad_norm": 1.4879804849624634, "learning_rate": 2.573530129643472e-05, "loss": 1.4511, "step": 7345 }, { "epoch": 1.323305561810403, "grad_norm": 1.2999303340911865, "learning_rate": 2.5722920125803883e-05, "loss": 1.1854, "step": 7346 }, { "epoch": 1.3234857014185994, "grad_norm": 1.438851237297058, "learning_rate": 2.5710540902668568e-05, "loss": 1.2726, "step": 7347 }, { "epoch": 1.3236658410267959, "grad_norm": 1.591162919998169, "learning_rate": 2.5698163628021828e-05, "loss": 1.3435, "step": 7348 }, { "epoch": 1.323845980634992, "grad_norm": 1.3374578952789307, "learning_rate": 2.5685788302856567e-05, "loss": 1.0728, "step": 7349 }, { "epoch": 1.3240261202431884, "grad_norm": 1.6100682020187378, "learning_rate": 2.5673414928165523e-05, "loss": 1.339, "step": 7350 }, { "epoch": 1.3242062598513848, "grad_norm": 1.516313910484314, "learning_rate": 2.5661043504941285e-05, "loss": 1.2676, "step": 7351 }, { "epoch": 1.3243863994595813, "grad_norm": 1.4258757829666138, "learning_rate": 2.5648674034176285e-05, "loss": 1.9373, "step": 7352 }, { "epoch": 1.3245665390677774, "grad_norm": 1.2540708780288696, "learning_rate": 2.563630651686279e-05, "loss": 1.5331, "step": 7353 }, { "epoch": 1.3247466786759738, "grad_norm": 1.3426744937896729, "learning_rate": 2.562394095399292e-05, "loss": 2.0205, "step": 7354 }, { "epoch": 1.3249268182841702, "grad_norm": 1.2861336469650269, "learning_rate": 2.5611577346558612e-05, "loss": 1.7842, "step": 7355 }, { "epoch": 1.3251069578923667, "grad_norm": 1.3309264183044434, "learning_rate": 2.5599215695551714e-05, "loss": 1.5667, "step": 7356 }, { "epoch": 1.325287097500563, "grad_norm": 1.2930220365524292, "learning_rate": 2.5586856001963843e-05, "loss": 1.5863, "step": 7357 }, { "epoch": 1.3254672371087592, "grad_norm": 1.698184847831726, "learning_rate": 2.5574498266786496e-05, "loss": 1.7746, "step": 7358 }, { "epoch": 1.3256473767169556, "grad_norm": 1.5733425617218018, "learning_rate": 2.5562142491011e-05, "loss": 1.7823, "step": 7359 }, { "epoch": 1.325827516325152, "grad_norm": 1.4867327213287354, "learning_rate": 2.5549788675628527e-05, "loss": 1.7456, "step": 7360 }, { "epoch": 1.3260076559333482, "grad_norm": 1.7311596870422363, "learning_rate": 2.55374368216301e-05, "loss": 1.9907, "step": 7361 }, { "epoch": 1.3261877955415446, "grad_norm": 1.9057351350784302, "learning_rate": 2.552508693000658e-05, "loss": 2.1653, "step": 7362 }, { "epoch": 1.326367935149741, "grad_norm": 1.521274447441101, "learning_rate": 2.5512739001748663e-05, "loss": 1.6859, "step": 7363 }, { "epoch": 1.3265480747579375, "grad_norm": 1.3585267066955566, "learning_rate": 2.550039303784687e-05, "loss": 1.3846, "step": 7364 }, { "epoch": 1.3267282143661339, "grad_norm": 1.3551506996154785, "learning_rate": 2.548804903929164e-05, "loss": 1.4298, "step": 7365 }, { "epoch": 1.32690835397433, "grad_norm": 1.466292381286621, "learning_rate": 2.5475707007073176e-05, "loss": 1.4875, "step": 7366 }, { "epoch": 1.3270884935825265, "grad_norm": 1.3474199771881104, "learning_rate": 2.546336694218155e-05, "loss": 1.17, "step": 7367 }, { "epoch": 1.3272686331907229, "grad_norm": 1.3042031526565552, "learning_rate": 2.5451028845606673e-05, "loss": 1.3004, "step": 7368 }, { "epoch": 1.327448772798919, "grad_norm": 1.3461599349975586, "learning_rate": 2.5438692718338287e-05, "loss": 1.4078, "step": 7369 }, { "epoch": 1.3276289124071154, "grad_norm": 1.455161690711975, "learning_rate": 2.5426358561366027e-05, "loss": 1.5689, "step": 7370 }, { "epoch": 1.3278090520153119, "grad_norm": 1.3472305536270142, "learning_rate": 2.5414026375679335e-05, "loss": 1.3143, "step": 7371 }, { "epoch": 1.3279891916235083, "grad_norm": 1.335525393486023, "learning_rate": 2.5401696162267454e-05, "loss": 1.2696, "step": 7372 }, { "epoch": 1.3281693312317047, "grad_norm": 1.3115664720535278, "learning_rate": 2.5389367922119502e-05, "loss": 1.2453, "step": 7373 }, { "epoch": 1.3283494708399008, "grad_norm": 1.3081218004226685, "learning_rate": 2.5377041656224488e-05, "loss": 1.1292, "step": 7374 }, { "epoch": 1.3285296104480973, "grad_norm": 1.314295768737793, "learning_rate": 2.53647173655712e-05, "loss": 1.311, "step": 7375 }, { "epoch": 1.3287097500562937, "grad_norm": 1.3531287908554077, "learning_rate": 2.5352395051148292e-05, "loss": 1.2743, "step": 7376 }, { "epoch": 1.3288898896644898, "grad_norm": 1.4227545261383057, "learning_rate": 2.534007471394424e-05, "loss": 1.3979, "step": 7377 }, { "epoch": 1.3290700292726862, "grad_norm": 1.3450112342834473, "learning_rate": 2.5327756354947362e-05, "loss": 1.4086, "step": 7378 }, { "epoch": 1.3292501688808827, "grad_norm": 1.2894139289855957, "learning_rate": 2.5315439975145872e-05, "loss": 1.2673, "step": 7379 }, { "epoch": 1.329430308489079, "grad_norm": 1.369187593460083, "learning_rate": 2.5303125575527764e-05, "loss": 1.317, "step": 7380 }, { "epoch": 1.3296104480972755, "grad_norm": 1.5643956661224365, "learning_rate": 2.5290813157080895e-05, "loss": 1.5074, "step": 7381 }, { "epoch": 1.3297905877054719, "grad_norm": 1.4602714776992798, "learning_rate": 2.5278502720792963e-05, "loss": 1.2168, "step": 7382 }, { "epoch": 1.329970727313668, "grad_norm": 1.5449684858322144, "learning_rate": 2.52661942676515e-05, "loss": 1.5887, "step": 7383 }, { "epoch": 1.3301508669218645, "grad_norm": 1.3460958003997803, "learning_rate": 2.5253887798643876e-05, "loss": 1.2643, "step": 7384 }, { "epoch": 1.3303310065300609, "grad_norm": 1.3849482536315918, "learning_rate": 2.5241583314757327e-05, "loss": 1.3234, "step": 7385 }, { "epoch": 1.330511146138257, "grad_norm": 1.4551743268966675, "learning_rate": 2.5229280816978905e-05, "loss": 1.2632, "step": 7386 }, { "epoch": 1.3306912857464535, "grad_norm": 1.5274627208709717, "learning_rate": 2.521698030629549e-05, "loss": 1.5798, "step": 7387 }, { "epoch": 1.3308714253546499, "grad_norm": 1.4003686904907227, "learning_rate": 2.520468178369386e-05, "loss": 1.2641, "step": 7388 }, { "epoch": 1.3310515649628463, "grad_norm": 1.6020455360412598, "learning_rate": 2.5192385250160588e-05, "loss": 1.6004, "step": 7389 }, { "epoch": 1.3312317045710427, "grad_norm": 1.4557509422302246, "learning_rate": 2.518009070668208e-05, "loss": 1.3596, "step": 7390 }, { "epoch": 1.3314118441792389, "grad_norm": 1.5017496347427368, "learning_rate": 2.516779815424461e-05, "loss": 1.402, "step": 7391 }, { "epoch": 1.3315919837874353, "grad_norm": 1.5260274410247803, "learning_rate": 2.515550759383427e-05, "loss": 1.502, "step": 7392 }, { "epoch": 1.3317721233956317, "grad_norm": 1.3746528625488281, "learning_rate": 2.5143219026437022e-05, "loss": 1.3034, "step": 7393 }, { "epoch": 1.3319522630038279, "grad_norm": 1.4807591438293457, "learning_rate": 2.5130932453038626e-05, "loss": 1.3978, "step": 7394 }, { "epoch": 1.3321324026120243, "grad_norm": 1.4649735689163208, "learning_rate": 2.5118647874624723e-05, "loss": 1.3293, "step": 7395 }, { "epoch": 1.3323125422202207, "grad_norm": 1.4470797777175903, "learning_rate": 2.510636529218076e-05, "loss": 1.1737, "step": 7396 }, { "epoch": 1.332492681828417, "grad_norm": 1.6314496994018555, "learning_rate": 2.5094084706692033e-05, "loss": 1.3086, "step": 7397 }, { "epoch": 1.3326728214366135, "grad_norm": 1.5507962703704834, "learning_rate": 2.508180611914372e-05, "loss": 1.3525, "step": 7398 }, { "epoch": 1.3328529610448097, "grad_norm": 1.630357265472412, "learning_rate": 2.506952953052078e-05, "loss": 1.364, "step": 7399 }, { "epoch": 1.333033100653006, "grad_norm": 1.5774949789047241, "learning_rate": 2.505725494180804e-05, "loss": 1.264, "step": 7400 }, { "epoch": 1.3332132402612025, "grad_norm": 1.452436089515686, "learning_rate": 2.504498235399014e-05, "loss": 1.1149, "step": 7401 }, { "epoch": 1.3333933798693987, "grad_norm": 1.3649039268493652, "learning_rate": 2.503271176805162e-05, "loss": 1.966, "step": 7402 }, { "epoch": 1.333573519477595, "grad_norm": 1.2950999736785889, "learning_rate": 2.5020443184976816e-05, "loss": 1.8575, "step": 7403 }, { "epoch": 1.3337536590857915, "grad_norm": 1.31584632396698, "learning_rate": 2.500817660574988e-05, "loss": 1.9418, "step": 7404 }, { "epoch": 1.3339337986939879, "grad_norm": 1.3670109510421753, "learning_rate": 2.499591203135484e-05, "loss": 1.7432, "step": 7405 }, { "epoch": 1.3341139383021843, "grad_norm": 1.4113842248916626, "learning_rate": 2.498364946277554e-05, "loss": 1.8612, "step": 7406 }, { "epoch": 1.3342940779103807, "grad_norm": 1.447308897972107, "learning_rate": 2.4971388900995712e-05, "loss": 1.4803, "step": 7407 }, { "epoch": 1.3344742175185769, "grad_norm": 1.4973483085632324, "learning_rate": 2.4959130346998877e-05, "loss": 1.5137, "step": 7408 }, { "epoch": 1.3346543571267733, "grad_norm": 1.720306634902954, "learning_rate": 2.4946873801768407e-05, "loss": 1.8015, "step": 7409 }, { "epoch": 1.3348344967349697, "grad_norm": 1.6993073225021362, "learning_rate": 2.4934619266287516e-05, "loss": 1.6798, "step": 7410 }, { "epoch": 1.3350146363431659, "grad_norm": 1.5025004148483276, "learning_rate": 2.4922366741539243e-05, "loss": 1.6906, "step": 7411 }, { "epoch": 1.3351947759513623, "grad_norm": 1.539351224899292, "learning_rate": 2.4910116228506515e-05, "loss": 1.744, "step": 7412 }, { "epoch": 1.3353749155595587, "grad_norm": 1.4526416063308716, "learning_rate": 2.4897867728172043e-05, "loss": 1.6141, "step": 7413 }, { "epoch": 1.335555055167755, "grad_norm": 1.4780164957046509, "learning_rate": 2.4885621241518392e-05, "loss": 1.5224, "step": 7414 }, { "epoch": 1.3357351947759515, "grad_norm": 1.6061023473739624, "learning_rate": 2.487337676952797e-05, "loss": 1.7414, "step": 7415 }, { "epoch": 1.3359153343841477, "grad_norm": 1.3931218385696411, "learning_rate": 2.4861134313183027e-05, "loss": 1.4208, "step": 7416 }, { "epoch": 1.336095473992344, "grad_norm": 1.5883382558822632, "learning_rate": 2.4848893873465647e-05, "loss": 1.6416, "step": 7417 }, { "epoch": 1.3362756136005405, "grad_norm": 1.4585665464401245, "learning_rate": 2.4836655451357748e-05, "loss": 1.4885, "step": 7418 }, { "epoch": 1.3364557532087367, "grad_norm": 1.4005669355392456, "learning_rate": 2.482441904784109e-05, "loss": 1.4526, "step": 7419 }, { "epoch": 1.336635892816933, "grad_norm": 1.3478909730911255, "learning_rate": 2.481218466389726e-05, "loss": 1.2833, "step": 7420 }, { "epoch": 1.3368160324251295, "grad_norm": 1.5120099782943726, "learning_rate": 2.479995230050772e-05, "loss": 1.3637, "step": 7421 }, { "epoch": 1.3369961720333259, "grad_norm": 1.517782211303711, "learning_rate": 2.4787721958653737e-05, "loss": 1.5427, "step": 7422 }, { "epoch": 1.3371763116415223, "grad_norm": 1.373405933380127, "learning_rate": 2.4775493639316417e-05, "loss": 1.3379, "step": 7423 }, { "epoch": 1.3373564512497185, "grad_norm": 1.3311572074890137, "learning_rate": 2.476326734347671e-05, "loss": 1.3233, "step": 7424 }, { "epoch": 1.3375365908579149, "grad_norm": 1.3962302207946777, "learning_rate": 2.4751043072115404e-05, "loss": 1.5041, "step": 7425 }, { "epoch": 1.3377167304661113, "grad_norm": 1.4598067998886108, "learning_rate": 2.4738820826213127e-05, "loss": 1.5001, "step": 7426 }, { "epoch": 1.3378968700743075, "grad_norm": 1.3542522192001343, "learning_rate": 2.472660060675034e-05, "loss": 1.1943, "step": 7427 }, { "epoch": 1.3380770096825039, "grad_norm": 1.6554187536239624, "learning_rate": 2.4714382414707337e-05, "loss": 1.6644, "step": 7428 }, { "epoch": 1.3382571492907003, "grad_norm": 1.2895430326461792, "learning_rate": 2.470216625106424e-05, "loss": 1.2796, "step": 7429 }, { "epoch": 1.3384372888988967, "grad_norm": 1.3304355144500732, "learning_rate": 2.4689952116801067e-05, "loss": 1.3059, "step": 7430 }, { "epoch": 1.338617428507093, "grad_norm": 1.6134988069534302, "learning_rate": 2.467774001289761e-05, "loss": 1.5491, "step": 7431 }, { "epoch": 1.3387975681152893, "grad_norm": 1.3640635013580322, "learning_rate": 2.466552994033351e-05, "loss": 1.2825, "step": 7432 }, { "epoch": 1.3389777077234857, "grad_norm": 1.4744008779525757, "learning_rate": 2.465332190008826e-05, "loss": 1.6094, "step": 7433 }, { "epoch": 1.339157847331682, "grad_norm": 1.4444788694381714, "learning_rate": 2.4641115893141163e-05, "loss": 1.4454, "step": 7434 }, { "epoch": 1.3393379869398785, "grad_norm": 1.457944393157959, "learning_rate": 2.4628911920471416e-05, "loss": 1.4873, "step": 7435 }, { "epoch": 1.3395181265480747, "grad_norm": 1.4250948429107666, "learning_rate": 2.4616709983058018e-05, "loss": 1.3701, "step": 7436 }, { "epoch": 1.339698266156271, "grad_norm": 1.445516586303711, "learning_rate": 2.4604510081879756e-05, "loss": 1.4646, "step": 7437 }, { "epoch": 1.3398784057644675, "grad_norm": 1.5809369087219238, "learning_rate": 2.459231221791533e-05, "loss": 1.546, "step": 7438 }, { "epoch": 1.340058545372664, "grad_norm": 1.6723449230194092, "learning_rate": 2.4580116392143226e-05, "loss": 1.3507, "step": 7439 }, { "epoch": 1.3402386849808603, "grad_norm": 1.6411149501800537, "learning_rate": 2.4567922605541817e-05, "loss": 1.5675, "step": 7440 }, { "epoch": 1.3404188245890565, "grad_norm": 1.4077147245407104, "learning_rate": 2.455573085908927e-05, "loss": 1.294, "step": 7441 }, { "epoch": 1.3405989641972529, "grad_norm": 1.5889328718185425, "learning_rate": 2.4543541153763604e-05, "loss": 1.2656, "step": 7442 }, { "epoch": 1.3407791038054493, "grad_norm": 1.4541394710540771, "learning_rate": 2.4531353490542647e-05, "loss": 1.4248, "step": 7443 }, { "epoch": 1.3409592434136455, "grad_norm": 1.4782243967056274, "learning_rate": 2.4519167870404125e-05, "loss": 1.3322, "step": 7444 }, { "epoch": 1.3411393830218419, "grad_norm": 1.4873660802841187, "learning_rate": 2.4506984294325542e-05, "loss": 1.506, "step": 7445 }, { "epoch": 1.3413195226300383, "grad_norm": 1.4146454334259033, "learning_rate": 2.449480276328427e-05, "loss": 1.1864, "step": 7446 }, { "epoch": 1.3414996622382347, "grad_norm": 1.3631362915039062, "learning_rate": 2.4482623278257487e-05, "loss": 1.2718, "step": 7447 }, { "epoch": 1.341679801846431, "grad_norm": 1.581967830657959, "learning_rate": 2.4470445840222245e-05, "loss": 1.4732, "step": 7448 }, { "epoch": 1.3418599414546273, "grad_norm": 1.649193525314331, "learning_rate": 2.4458270450155396e-05, "loss": 1.4134, "step": 7449 }, { "epoch": 1.3420400810628237, "grad_norm": 1.3684557676315308, "learning_rate": 2.4446097109033657e-05, "loss": 0.9573, "step": 7450 }, { "epoch": 1.34222022067102, "grad_norm": 1.4057847261428833, "learning_rate": 2.443392581783355e-05, "loss": 1.2568, "step": 7451 }, { "epoch": 1.3424003602792163, "grad_norm": 1.3378297090530396, "learning_rate": 2.442175657753145e-05, "loss": 1.8224, "step": 7452 }, { "epoch": 1.3425804998874127, "grad_norm": 1.2546147108078003, "learning_rate": 2.44095893891036e-05, "loss": 1.5809, "step": 7453 }, { "epoch": 1.342760639495609, "grad_norm": 1.3928111791610718, "learning_rate": 2.4397424253526018e-05, "loss": 1.761, "step": 7454 }, { "epoch": 1.3429407791038055, "grad_norm": 1.4378395080566406, "learning_rate": 2.4385261171774597e-05, "loss": 2.167, "step": 7455 }, { "epoch": 1.343120918712002, "grad_norm": 1.4332181215286255, "learning_rate": 2.4373100144825052e-05, "loss": 1.8079, "step": 7456 }, { "epoch": 1.343301058320198, "grad_norm": 1.5470361709594727, "learning_rate": 2.4360941173652923e-05, "loss": 1.9301, "step": 7457 }, { "epoch": 1.3434811979283945, "grad_norm": 1.8123210668563843, "learning_rate": 2.4348784259233603e-05, "loss": 2.1844, "step": 7458 }, { "epoch": 1.343661337536591, "grad_norm": 1.8186489343643188, "learning_rate": 2.433662940254232e-05, "loss": 1.9889, "step": 7459 }, { "epoch": 1.343841477144787, "grad_norm": 1.817042589187622, "learning_rate": 2.432447660455413e-05, "loss": 1.8337, "step": 7460 }, { "epoch": 1.3440216167529835, "grad_norm": 1.5014989376068115, "learning_rate": 2.4312325866243918e-05, "loss": 1.617, "step": 7461 }, { "epoch": 1.34420175636118, "grad_norm": 1.4371072053909302, "learning_rate": 2.430017718858639e-05, "loss": 1.6313, "step": 7462 }, { "epoch": 1.3443818959693763, "grad_norm": 1.3879990577697754, "learning_rate": 2.4288030572556153e-05, "loss": 1.5081, "step": 7463 }, { "epoch": 1.3445620355775727, "grad_norm": 1.3962204456329346, "learning_rate": 2.4275886019127573e-05, "loss": 1.4773, "step": 7464 }, { "epoch": 1.344742175185769, "grad_norm": 1.3746598958969116, "learning_rate": 2.4263743529274897e-05, "loss": 1.4569, "step": 7465 }, { "epoch": 1.3449223147939653, "grad_norm": 1.3780293464660645, "learning_rate": 2.4251603103972154e-05, "loss": 1.3165, "step": 7466 }, { "epoch": 1.3451024544021617, "grad_norm": 1.3271169662475586, "learning_rate": 2.423946474419329e-05, "loss": 1.2733, "step": 7467 }, { "epoch": 1.345282594010358, "grad_norm": 1.3466829061508179, "learning_rate": 2.4227328450912034e-05, "loss": 1.3988, "step": 7468 }, { "epoch": 1.3454627336185543, "grad_norm": 1.4426360130310059, "learning_rate": 2.4215194225101918e-05, "loss": 1.3421, "step": 7469 }, { "epoch": 1.3456428732267507, "grad_norm": 1.5536413192749023, "learning_rate": 2.420306206773636e-05, "loss": 1.7887, "step": 7470 }, { "epoch": 1.345823012834947, "grad_norm": 1.3275916576385498, "learning_rate": 2.4190931979788577e-05, "loss": 1.4786, "step": 7471 }, { "epoch": 1.3460031524431435, "grad_norm": 1.328144907951355, "learning_rate": 2.4178803962231682e-05, "loss": 1.2672, "step": 7472 }, { "epoch": 1.34618329205134, "grad_norm": 1.2790991067886353, "learning_rate": 2.416667801603856e-05, "loss": 1.2191, "step": 7473 }, { "epoch": 1.346363431659536, "grad_norm": 1.2651121616363525, "learning_rate": 2.415455414218194e-05, "loss": 1.3081, "step": 7474 }, { "epoch": 1.3465435712677325, "grad_norm": 1.2784348726272583, "learning_rate": 2.4142432341634397e-05, "loss": 1.3523, "step": 7475 }, { "epoch": 1.346723710875929, "grad_norm": 1.40240478515625, "learning_rate": 2.4130312615368323e-05, "loss": 1.337, "step": 7476 }, { "epoch": 1.346903850484125, "grad_norm": 1.5089629888534546, "learning_rate": 2.4118194964355985e-05, "loss": 1.3523, "step": 7477 }, { "epoch": 1.3470839900923215, "grad_norm": 1.3649535179138184, "learning_rate": 2.4106079389569435e-05, "loss": 1.2622, "step": 7478 }, { "epoch": 1.347264129700518, "grad_norm": 1.5747923851013184, "learning_rate": 2.4093965891980592e-05, "loss": 1.5236, "step": 7479 }, { "epoch": 1.3474442693087143, "grad_norm": 1.4119004011154175, "learning_rate": 2.4081854472561184e-05, "loss": 1.2261, "step": 7480 }, { "epoch": 1.3476244089169107, "grad_norm": 1.328283667564392, "learning_rate": 2.406974513228279e-05, "loss": 1.3119, "step": 7481 }, { "epoch": 1.347804548525107, "grad_norm": 1.467720627784729, "learning_rate": 2.4057637872116808e-05, "loss": 1.5919, "step": 7482 }, { "epoch": 1.3479846881333033, "grad_norm": 1.3560082912445068, "learning_rate": 2.404553269303448e-05, "loss": 1.3715, "step": 7483 }, { "epoch": 1.3481648277414997, "grad_norm": 1.4915086030960083, "learning_rate": 2.4033429596006875e-05, "loss": 1.4866, "step": 7484 }, { "epoch": 1.3483449673496959, "grad_norm": 1.5317884683609009, "learning_rate": 2.4021328582004883e-05, "loss": 1.4028, "step": 7485 }, { "epoch": 1.3485251069578923, "grad_norm": 1.2985628843307495, "learning_rate": 2.4009229651999277e-05, "loss": 1.1345, "step": 7486 }, { "epoch": 1.3487052465660887, "grad_norm": 1.5905373096466064, "learning_rate": 2.399713280696061e-05, "loss": 1.4265, "step": 7487 }, { "epoch": 1.348885386174285, "grad_norm": 1.4643833637237549, "learning_rate": 2.398503804785928e-05, "loss": 1.2912, "step": 7488 }, { "epoch": 1.3490655257824815, "grad_norm": 1.410137414932251, "learning_rate": 2.3972945375665528e-05, "loss": 1.3174, "step": 7489 }, { "epoch": 1.3492456653906777, "grad_norm": 1.5130493640899658, "learning_rate": 2.3960854791349425e-05, "loss": 1.61, "step": 7490 }, { "epoch": 1.349425804998874, "grad_norm": 1.4430396556854248, "learning_rate": 2.3948766295880864e-05, "loss": 1.1788, "step": 7491 }, { "epoch": 1.3496059446070705, "grad_norm": 1.6037979125976562, "learning_rate": 2.393667989022958e-05, "loss": 1.5669, "step": 7492 }, { "epoch": 1.349786084215267, "grad_norm": 1.68007230758667, "learning_rate": 2.3924595575365138e-05, "loss": 1.6145, "step": 7493 }, { "epoch": 1.349966223823463, "grad_norm": 1.621848464012146, "learning_rate": 2.3912513352256927e-05, "loss": 1.5698, "step": 7494 }, { "epoch": 1.3501463634316595, "grad_norm": 1.3762249946594238, "learning_rate": 2.39004332218742e-05, "loss": 1.286, "step": 7495 }, { "epoch": 1.350326503039856, "grad_norm": 1.394285798072815, "learning_rate": 2.3888355185186014e-05, "loss": 1.3227, "step": 7496 }, { "epoch": 1.3505066426480523, "grad_norm": 1.5705859661102295, "learning_rate": 2.3876279243161255e-05, "loss": 1.4613, "step": 7497 }, { "epoch": 1.3506867822562487, "grad_norm": 1.4476512670516968, "learning_rate": 2.3864205396768645e-05, "loss": 1.4022, "step": 7498 }, { "epoch": 1.350866921864445, "grad_norm": 1.5966507196426392, "learning_rate": 2.3852133646976733e-05, "loss": 1.3322, "step": 7499 }, { "epoch": 1.3510470614726413, "grad_norm": 1.504564642906189, "learning_rate": 2.384006399475396e-05, "loss": 1.1595, "step": 7500 }, { "epoch": 1.3512272010808377, "grad_norm": 1.6095887422561646, "learning_rate": 2.3827996441068496e-05, "loss": 1.5881, "step": 7501 }, { "epoch": 1.351407340689034, "grad_norm": 1.4006251096725464, "learning_rate": 2.3815930986888406e-05, "loss": 1.9172, "step": 7502 }, { "epoch": 1.3515874802972303, "grad_norm": 1.2943110466003418, "learning_rate": 2.3803867633181574e-05, "loss": 1.7611, "step": 7503 }, { "epoch": 1.3517676199054267, "grad_norm": 1.3380517959594727, "learning_rate": 2.379180638091571e-05, "loss": 1.6191, "step": 7504 }, { "epoch": 1.3519477595136231, "grad_norm": 1.3716108798980713, "learning_rate": 2.377974723105839e-05, "loss": 1.9239, "step": 7505 }, { "epoch": 1.3521278991218195, "grad_norm": 1.3542637825012207, "learning_rate": 2.3767690184576975e-05, "loss": 1.8896, "step": 7506 }, { "epoch": 1.3523080387300157, "grad_norm": 1.525539755821228, "learning_rate": 2.3755635242438674e-05, "loss": 1.807, "step": 7507 }, { "epoch": 1.352488178338212, "grad_norm": 1.6884543895721436, "learning_rate": 2.3743582405610514e-05, "loss": 1.8735, "step": 7508 }, { "epoch": 1.3526683179464085, "grad_norm": 1.6111360788345337, "learning_rate": 2.3731531675059404e-05, "loss": 2.069, "step": 7509 }, { "epoch": 1.3528484575546047, "grad_norm": 1.5447648763656616, "learning_rate": 2.371948305175202e-05, "loss": 1.5468, "step": 7510 }, { "epoch": 1.353028597162801, "grad_norm": 1.9726524353027344, "learning_rate": 2.3707436536654913e-05, "loss": 2.3478, "step": 7511 }, { "epoch": 1.3532087367709975, "grad_norm": 1.633581519126892, "learning_rate": 2.3695392130734445e-05, "loss": 1.6174, "step": 7512 }, { "epoch": 1.353388876379194, "grad_norm": 1.3164770603179932, "learning_rate": 2.3683349834956797e-05, "loss": 1.3278, "step": 7513 }, { "epoch": 1.3535690159873903, "grad_norm": 1.4568259716033936, "learning_rate": 2.3671309650288017e-05, "loss": 1.5152, "step": 7514 }, { "epoch": 1.3537491555955865, "grad_norm": 1.4861228466033936, "learning_rate": 2.3659271577693942e-05, "loss": 1.4971, "step": 7515 }, { "epoch": 1.353929295203783, "grad_norm": 1.3664424419403076, "learning_rate": 2.364723561814028e-05, "loss": 1.5865, "step": 7516 }, { "epoch": 1.3541094348119793, "grad_norm": 1.6298110485076904, "learning_rate": 2.3635201772592537e-05, "loss": 1.7763, "step": 7517 }, { "epoch": 1.3542895744201755, "grad_norm": 1.426570177078247, "learning_rate": 2.3623170042016045e-05, "loss": 1.3826, "step": 7518 }, { "epoch": 1.354469714028372, "grad_norm": 1.3917127847671509, "learning_rate": 2.361114042737602e-05, "loss": 1.4356, "step": 7519 }, { "epoch": 1.3546498536365683, "grad_norm": 1.3347135782241821, "learning_rate": 2.359911292963747e-05, "loss": 1.2705, "step": 7520 }, { "epoch": 1.3548299932447647, "grad_norm": 1.599500298500061, "learning_rate": 2.3587087549765207e-05, "loss": 1.6841, "step": 7521 }, { "epoch": 1.3550101328529611, "grad_norm": 1.3436247110366821, "learning_rate": 2.3575064288723925e-05, "loss": 1.4051, "step": 7522 }, { "epoch": 1.3551902724611575, "grad_norm": 1.3791656494140625, "learning_rate": 2.3563043147478108e-05, "loss": 1.5266, "step": 7523 }, { "epoch": 1.3553704120693537, "grad_norm": 1.4408009052276611, "learning_rate": 2.35510241269921e-05, "loss": 1.4167, "step": 7524 }, { "epoch": 1.3555505516775501, "grad_norm": 1.5563013553619385, "learning_rate": 2.3539007228230048e-05, "loss": 1.5547, "step": 7525 }, { "epoch": 1.3557306912857465, "grad_norm": 1.438392996788025, "learning_rate": 2.352699245215595e-05, "loss": 1.3598, "step": 7526 }, { "epoch": 1.3559108308939427, "grad_norm": 1.457525372505188, "learning_rate": 2.35149797997336e-05, "loss": 1.4185, "step": 7527 }, { "epoch": 1.3560909705021391, "grad_norm": 1.6293107271194458, "learning_rate": 2.3502969271926694e-05, "loss": 1.2576, "step": 7528 }, { "epoch": 1.3562711101103355, "grad_norm": 1.534010410308838, "learning_rate": 2.349096086969868e-05, "loss": 1.325, "step": 7529 }, { "epoch": 1.356451249718532, "grad_norm": 1.5167186260223389, "learning_rate": 2.347895459401288e-05, "loss": 1.5054, "step": 7530 }, { "epoch": 1.3566313893267283, "grad_norm": 1.6355319023132324, "learning_rate": 2.3466950445832425e-05, "loss": 1.4301, "step": 7531 }, { "epoch": 1.3568115289349245, "grad_norm": 1.4158082008361816, "learning_rate": 2.345494842612026e-05, "loss": 1.3088, "step": 7532 }, { "epoch": 1.356991668543121, "grad_norm": 1.5112338066101074, "learning_rate": 2.3442948535839236e-05, "loss": 1.5923, "step": 7533 }, { "epoch": 1.3571718081513173, "grad_norm": 1.4129796028137207, "learning_rate": 2.343095077595193e-05, "loss": 1.3166, "step": 7534 }, { "epoch": 1.3573519477595135, "grad_norm": 1.4962005615234375, "learning_rate": 2.3418955147420813e-05, "loss": 1.4034, "step": 7535 }, { "epoch": 1.35753208736771, "grad_norm": 1.4700889587402344, "learning_rate": 2.3406961651208147e-05, "loss": 1.5653, "step": 7536 }, { "epoch": 1.3577122269759063, "grad_norm": 1.6155070066452026, "learning_rate": 2.3394970288276086e-05, "loss": 1.4549, "step": 7537 }, { "epoch": 1.3578923665841027, "grad_norm": 1.526595115661621, "learning_rate": 2.3382981059586546e-05, "loss": 1.497, "step": 7538 }, { "epoch": 1.3580725061922991, "grad_norm": 1.4552953243255615, "learning_rate": 2.33709939661013e-05, "loss": 1.4246, "step": 7539 }, { "epoch": 1.3582526458004953, "grad_norm": 1.4766429662704468, "learning_rate": 2.3359009008781945e-05, "loss": 1.6351, "step": 7540 }, { "epoch": 1.3584327854086917, "grad_norm": 1.5850279331207275, "learning_rate": 2.33470261885899e-05, "loss": 1.4329, "step": 7541 }, { "epoch": 1.3586129250168881, "grad_norm": 1.4105831384658813, "learning_rate": 2.3335045506486447e-05, "loss": 1.3036, "step": 7542 }, { "epoch": 1.3587930646250843, "grad_norm": 1.6465802192687988, "learning_rate": 2.3323066963432656e-05, "loss": 1.5756, "step": 7543 }, { "epoch": 1.3589732042332807, "grad_norm": 1.5706514120101929, "learning_rate": 2.3311090560389436e-05, "loss": 1.527, "step": 7544 }, { "epoch": 1.3591533438414771, "grad_norm": 1.4764642715454102, "learning_rate": 2.3299116298317553e-05, "loss": 1.3045, "step": 7545 }, { "epoch": 1.3593334834496735, "grad_norm": 1.35222327709198, "learning_rate": 2.3287144178177517e-05, "loss": 1.1929, "step": 7546 }, { "epoch": 1.35951362305787, "grad_norm": 1.383483648300171, "learning_rate": 2.3275174200929784e-05, "loss": 1.1262, "step": 7547 }, { "epoch": 1.3596937626660661, "grad_norm": 1.3822935819625854, "learning_rate": 2.3263206367534556e-05, "loss": 1.0622, "step": 7548 }, { "epoch": 1.3598739022742625, "grad_norm": 1.5210989713668823, "learning_rate": 2.325124067895189e-05, "loss": 1.3703, "step": 7549 }, { "epoch": 1.360054041882459, "grad_norm": 1.4833420515060425, "learning_rate": 2.3239277136141657e-05, "loss": 1.4321, "step": 7550 }, { "epoch": 1.3602341814906553, "grad_norm": 1.4262073040008545, "learning_rate": 2.3227315740063587e-05, "loss": 1.2162, "step": 7551 }, { "epoch": 1.3604143210988515, "grad_norm": 1.285927653312683, "learning_rate": 2.3215356491677216e-05, "loss": 1.6129, "step": 7552 }, { "epoch": 1.360594460707048, "grad_norm": 1.3243283033370972, "learning_rate": 2.3203399391941894e-05, "loss": 1.9194, "step": 7553 }, { "epoch": 1.3607746003152443, "grad_norm": 1.4116501808166504, "learning_rate": 2.3191444441816824e-05, "loss": 1.9162, "step": 7554 }, { "epoch": 1.3609547399234407, "grad_norm": 1.4555381536483765, "learning_rate": 2.317949164226102e-05, "loss": 1.7655, "step": 7555 }, { "epoch": 1.3611348795316371, "grad_norm": 1.432364821434021, "learning_rate": 2.3167540994233332e-05, "loss": 1.8505, "step": 7556 }, { "epoch": 1.3613150191398333, "grad_norm": 1.5342450141906738, "learning_rate": 2.315559249869244e-05, "loss": 1.8613, "step": 7557 }, { "epoch": 1.3614951587480297, "grad_norm": 1.6430089473724365, "learning_rate": 2.3143646156596845e-05, "loss": 2.2923, "step": 7558 }, { "epoch": 1.3616752983562261, "grad_norm": 1.593636393547058, "learning_rate": 2.3131701968904844e-05, "loss": 1.9379, "step": 7559 }, { "epoch": 1.3618554379644223, "grad_norm": 1.5477970838546753, "learning_rate": 2.3119759936574653e-05, "loss": 1.8448, "step": 7560 }, { "epoch": 1.3620355775726187, "grad_norm": 1.6547980308532715, "learning_rate": 2.3107820060564222e-05, "loss": 1.6139, "step": 7561 }, { "epoch": 1.3622157171808151, "grad_norm": 1.6107239723205566, "learning_rate": 2.3095882341831372e-05, "loss": 1.437, "step": 7562 }, { "epoch": 1.3623958567890115, "grad_norm": 1.5189756155014038, "learning_rate": 2.308394678133373e-05, "loss": 1.6508, "step": 7563 }, { "epoch": 1.362575996397208, "grad_norm": 1.6922158002853394, "learning_rate": 2.307201338002875e-05, "loss": 1.8281, "step": 7564 }, { "epoch": 1.3627561360054041, "grad_norm": 1.3784329891204834, "learning_rate": 2.306008213887378e-05, "loss": 1.4116, "step": 7565 }, { "epoch": 1.3629362756136005, "grad_norm": 1.5929391384124756, "learning_rate": 2.3048153058825878e-05, "loss": 1.7402, "step": 7566 }, { "epoch": 1.363116415221797, "grad_norm": 1.3289282321929932, "learning_rate": 2.303622614084201e-05, "loss": 1.3152, "step": 7567 }, { "epoch": 1.3632965548299931, "grad_norm": 1.4627569913864136, "learning_rate": 2.302430138587895e-05, "loss": 1.5799, "step": 7568 }, { "epoch": 1.3634766944381895, "grad_norm": 1.4813125133514404, "learning_rate": 2.3012378794893275e-05, "loss": 1.3834, "step": 7569 }, { "epoch": 1.363656834046386, "grad_norm": 1.4358408451080322, "learning_rate": 2.3000458368841442e-05, "loss": 1.3643, "step": 7570 }, { "epoch": 1.3638369736545823, "grad_norm": 1.3621000051498413, "learning_rate": 2.2988540108679692e-05, "loss": 1.4555, "step": 7571 }, { "epoch": 1.3640171132627787, "grad_norm": 1.3113750219345093, "learning_rate": 2.2976624015364095e-05, "loss": 1.1542, "step": 7572 }, { "epoch": 1.364197252870975, "grad_norm": 1.494115948677063, "learning_rate": 2.296471008985054e-05, "loss": 1.5979, "step": 7573 }, { "epoch": 1.3643773924791713, "grad_norm": 1.272529125213623, "learning_rate": 2.2952798333094788e-05, "loss": 1.2037, "step": 7574 }, { "epoch": 1.3645575320873677, "grad_norm": 1.3943688869476318, "learning_rate": 2.2940888746052386e-05, "loss": 1.2098, "step": 7575 }, { "epoch": 1.364737671695564, "grad_norm": 1.3633955717086792, "learning_rate": 2.29289813296787e-05, "loss": 1.4044, "step": 7576 }, { "epoch": 1.3649178113037603, "grad_norm": 1.6230963468551636, "learning_rate": 2.291707608492895e-05, "loss": 1.4544, "step": 7577 }, { "epoch": 1.3650979509119567, "grad_norm": 1.3644753694534302, "learning_rate": 2.2905173012758168e-05, "loss": 1.1211, "step": 7578 }, { "epoch": 1.3652780905201531, "grad_norm": 1.446577548980713, "learning_rate": 2.2893272114121206e-05, "loss": 1.4278, "step": 7579 }, { "epoch": 1.3654582301283495, "grad_norm": 1.5131093263626099, "learning_rate": 2.2881373389972753e-05, "loss": 1.3811, "step": 7580 }, { "epoch": 1.365638369736546, "grad_norm": 1.3555972576141357, "learning_rate": 2.286947684126732e-05, "loss": 1.2185, "step": 7581 }, { "epoch": 1.3658185093447421, "grad_norm": 1.4976664781570435, "learning_rate": 2.285758246895924e-05, "loss": 1.4818, "step": 7582 }, { "epoch": 1.3659986489529385, "grad_norm": 1.4231165647506714, "learning_rate": 2.2845690274002653e-05, "loss": 1.3611, "step": 7583 }, { "epoch": 1.366178788561135, "grad_norm": 1.4566525220870972, "learning_rate": 2.2833800257351586e-05, "loss": 1.3703, "step": 7584 }, { "epoch": 1.3663589281693311, "grad_norm": 1.4260296821594238, "learning_rate": 2.2821912419959825e-05, "loss": 1.3481, "step": 7585 }, { "epoch": 1.3665390677775275, "grad_norm": 1.4992929697036743, "learning_rate": 2.2810026762781017e-05, "loss": 1.3258, "step": 7586 }, { "epoch": 1.366719207385724, "grad_norm": 1.5576725006103516, "learning_rate": 2.2798143286768614e-05, "loss": 1.5788, "step": 7587 }, { "epoch": 1.3668993469939204, "grad_norm": 1.5120137929916382, "learning_rate": 2.2786261992875913e-05, "loss": 1.4573, "step": 7588 }, { "epoch": 1.3670794866021168, "grad_norm": 1.7288219928741455, "learning_rate": 2.2774382882056012e-05, "loss": 1.5498, "step": 7589 }, { "epoch": 1.367259626210313, "grad_norm": 1.5634130239486694, "learning_rate": 2.276250595526186e-05, "loss": 1.5107, "step": 7590 }, { "epoch": 1.3674397658185093, "grad_norm": 1.546922206878662, "learning_rate": 2.275063121344621e-05, "loss": 1.3574, "step": 7591 }, { "epoch": 1.3676199054267058, "grad_norm": 1.651639699935913, "learning_rate": 2.2738758657561636e-05, "loss": 1.5064, "step": 7592 }, { "epoch": 1.367800045034902, "grad_norm": 1.446218490600586, "learning_rate": 2.2726888288560582e-05, "loss": 1.5245, "step": 7593 }, { "epoch": 1.3679801846430983, "grad_norm": 1.435522437095642, "learning_rate": 2.271502010739527e-05, "loss": 1.325, "step": 7594 }, { "epoch": 1.3681603242512947, "grad_norm": 1.5314617156982422, "learning_rate": 2.270315411501775e-05, "loss": 1.4824, "step": 7595 }, { "epoch": 1.3683404638594912, "grad_norm": 1.4876232147216797, "learning_rate": 2.269129031237991e-05, "loss": 1.3691, "step": 7596 }, { "epoch": 1.3685206034676876, "grad_norm": 1.5717575550079346, "learning_rate": 2.2679428700433443e-05, "loss": 1.3863, "step": 7597 }, { "epoch": 1.3687007430758837, "grad_norm": 1.6156270503997803, "learning_rate": 2.266756928012993e-05, "loss": 1.2923, "step": 7598 }, { "epoch": 1.3688808826840801, "grad_norm": 1.5814160108566284, "learning_rate": 2.2655712052420687e-05, "loss": 1.5252, "step": 7599 }, { "epoch": 1.3690610222922766, "grad_norm": 1.4465832710266113, "learning_rate": 2.2643857018256898e-05, "loss": 1.2232, "step": 7600 }, { "epoch": 1.3692411619004727, "grad_norm": 1.3768585920333862, "learning_rate": 2.2632004178589555e-05, "loss": 1.0957, "step": 7601 }, { "epoch": 1.3694213015086691, "grad_norm": 1.2455940246582031, "learning_rate": 2.2620153534369528e-05, "loss": 1.6452, "step": 7602 }, { "epoch": 1.3696014411168655, "grad_norm": 1.3670068979263306, "learning_rate": 2.260830508654745e-05, "loss": 1.8486, "step": 7603 }, { "epoch": 1.369781580725062, "grad_norm": 1.342037320137024, "learning_rate": 2.2596458836073792e-05, "loss": 1.6132, "step": 7604 }, { "epoch": 1.3699617203332584, "grad_norm": 1.4381548166275024, "learning_rate": 2.2584614783898854e-05, "loss": 1.8604, "step": 7605 }, { "epoch": 1.3701418599414548, "grad_norm": 1.346655011177063, "learning_rate": 2.257277293097276e-05, "loss": 1.788, "step": 7606 }, { "epoch": 1.370321999549651, "grad_norm": 1.3799290657043457, "learning_rate": 2.2560933278245468e-05, "loss": 1.4876, "step": 7607 }, { "epoch": 1.3705021391578474, "grad_norm": 1.3988481760025024, "learning_rate": 2.254909582666675e-05, "loss": 1.7434, "step": 7608 }, { "epoch": 1.3706822787660438, "grad_norm": 1.4916144609451294, "learning_rate": 2.2537260577186186e-05, "loss": 1.7952, "step": 7609 }, { "epoch": 1.37086241837424, "grad_norm": 1.638384222984314, "learning_rate": 2.252542753075323e-05, "loss": 1.9852, "step": 7610 }, { "epoch": 1.3710425579824363, "grad_norm": 1.7106362581253052, "learning_rate": 2.2513596688317052e-05, "loss": 1.7957, "step": 7611 }, { "epoch": 1.3712226975906328, "grad_norm": 1.400754690170288, "learning_rate": 2.2501768050826782e-05, "loss": 1.4118, "step": 7612 }, { "epoch": 1.3714028371988292, "grad_norm": 1.4144703149795532, "learning_rate": 2.248994161923128e-05, "loss": 1.4364, "step": 7613 }, { "epoch": 1.3715829768070256, "grad_norm": 1.5763905048370361, "learning_rate": 2.2478117394479263e-05, "loss": 1.7449, "step": 7614 }, { "epoch": 1.3717631164152218, "grad_norm": 1.326772928237915, "learning_rate": 2.246629537751925e-05, "loss": 1.341, "step": 7615 }, { "epoch": 1.3719432560234182, "grad_norm": 1.3690229654312134, "learning_rate": 2.2454475569299627e-05, "loss": 1.4092, "step": 7616 }, { "epoch": 1.3721233956316146, "grad_norm": 1.367480993270874, "learning_rate": 2.2442657970768554e-05, "loss": 1.3212, "step": 7617 }, { "epoch": 1.3723035352398107, "grad_norm": 1.364949345588684, "learning_rate": 2.2430842582874034e-05, "loss": 1.2856, "step": 7618 }, { "epoch": 1.3724836748480072, "grad_norm": 1.5036259889602661, "learning_rate": 2.2419029406563895e-05, "loss": 1.7055, "step": 7619 }, { "epoch": 1.3726638144562036, "grad_norm": 1.3882821798324585, "learning_rate": 2.240721844278579e-05, "loss": 1.3846, "step": 7620 }, { "epoch": 1.3728439540644, "grad_norm": 1.9397857189178467, "learning_rate": 2.2395409692487175e-05, "loss": 1.4153, "step": 7621 }, { "epoch": 1.3730240936725964, "grad_norm": 1.4227429628372192, "learning_rate": 2.238360315661535e-05, "loss": 1.4628, "step": 7622 }, { "epoch": 1.3732042332807926, "grad_norm": 1.4572449922561646, "learning_rate": 2.237179883611743e-05, "loss": 1.4179, "step": 7623 }, { "epoch": 1.373384372888989, "grad_norm": 1.4244329929351807, "learning_rate": 2.235999673194035e-05, "loss": 1.1817, "step": 7624 }, { "epoch": 1.3735645124971854, "grad_norm": 1.6989593505859375, "learning_rate": 2.2348196845030846e-05, "loss": 1.4607, "step": 7625 }, { "epoch": 1.3737446521053815, "grad_norm": 1.462368130683899, "learning_rate": 2.233639917633554e-05, "loss": 1.5085, "step": 7626 }, { "epoch": 1.373924791713578, "grad_norm": 1.441068410873413, "learning_rate": 2.2324603726800823e-05, "loss": 1.2522, "step": 7627 }, { "epoch": 1.3741049313217744, "grad_norm": 1.4737187623977661, "learning_rate": 2.2312810497372917e-05, "loss": 1.3806, "step": 7628 }, { "epoch": 1.3742850709299708, "grad_norm": 1.33798086643219, "learning_rate": 2.2301019488997844e-05, "loss": 1.2479, "step": 7629 }, { "epoch": 1.3744652105381672, "grad_norm": 1.5182621479034424, "learning_rate": 2.2289230702621533e-05, "loss": 1.5805, "step": 7630 }, { "epoch": 1.3746453501463634, "grad_norm": 1.3349831104278564, "learning_rate": 2.227744413918962e-05, "loss": 1.244, "step": 7631 }, { "epoch": 1.3748254897545598, "grad_norm": 1.5687263011932373, "learning_rate": 2.226565979964763e-05, "loss": 1.5459, "step": 7632 }, { "epoch": 1.3750056293627562, "grad_norm": 1.5157266855239868, "learning_rate": 2.2253877684940912e-05, "loss": 1.5024, "step": 7633 }, { "epoch": 1.3751857689709526, "grad_norm": 1.406571626663208, "learning_rate": 2.2242097796014588e-05, "loss": 1.3794, "step": 7634 }, { "epoch": 1.3753659085791488, "grad_norm": 1.4581559896469116, "learning_rate": 2.2230320133813676e-05, "loss": 1.2482, "step": 7635 }, { "epoch": 1.3755460481873452, "grad_norm": 1.5845005512237549, "learning_rate": 2.2218544699282955e-05, "loss": 1.5547, "step": 7636 }, { "epoch": 1.3757261877955416, "grad_norm": 1.4608479738235474, "learning_rate": 2.2206771493367052e-05, "loss": 1.3299, "step": 7637 }, { "epoch": 1.375906327403738, "grad_norm": 1.4873682260513306, "learning_rate": 2.2195000517010395e-05, "loss": 1.3317, "step": 7638 }, { "epoch": 1.3760864670119344, "grad_norm": 1.5904486179351807, "learning_rate": 2.2183231771157243e-05, "loss": 1.5558, "step": 7639 }, { "epoch": 1.3762666066201306, "grad_norm": 1.5699273347854614, "learning_rate": 2.21714652567517e-05, "loss": 1.4763, "step": 7640 }, { "epoch": 1.376446746228327, "grad_norm": 1.5878998041152954, "learning_rate": 2.2159700974737662e-05, "loss": 1.5285, "step": 7641 }, { "epoch": 1.3766268858365234, "grad_norm": 1.5779407024383545, "learning_rate": 2.214793892605887e-05, "loss": 1.5131, "step": 7642 }, { "epoch": 1.3768070254447196, "grad_norm": 1.655317783355713, "learning_rate": 2.2136179111658816e-05, "loss": 1.3801, "step": 7643 }, { "epoch": 1.376987165052916, "grad_norm": 1.5089733600616455, "learning_rate": 2.212442153248092e-05, "loss": 1.5128, "step": 7644 }, { "epoch": 1.3771673046611124, "grad_norm": 1.4732587337493896, "learning_rate": 2.211266618946835e-05, "loss": 1.236, "step": 7645 }, { "epoch": 1.3773474442693088, "grad_norm": 1.4909825325012207, "learning_rate": 2.2100913083564113e-05, "loss": 1.3546, "step": 7646 }, { "epoch": 1.3775275838775052, "grad_norm": 1.6412643194198608, "learning_rate": 2.208916221571104e-05, "loss": 1.4825, "step": 7647 }, { "epoch": 1.3777077234857014, "grad_norm": 1.5115211009979248, "learning_rate": 2.2077413586851763e-05, "loss": 1.2195, "step": 7648 }, { "epoch": 1.3778878630938978, "grad_norm": 1.4692808389663696, "learning_rate": 2.2065667197928785e-05, "loss": 1.2303, "step": 7649 }, { "epoch": 1.3780680027020942, "grad_norm": 1.536497712135315, "learning_rate": 2.2053923049884378e-05, "loss": 1.2947, "step": 7650 }, { "epoch": 1.3782481423102904, "grad_norm": 1.413273811340332, "learning_rate": 2.2042181143660652e-05, "loss": 1.2069, "step": 7651 }, { "epoch": 1.3784282819184868, "grad_norm": 1.3353500366210938, "learning_rate": 2.2030441480199536e-05, "loss": 1.7804, "step": 7652 }, { "epoch": 1.3786084215266832, "grad_norm": 1.3828232288360596, "learning_rate": 2.201870406044278e-05, "loss": 1.7261, "step": 7653 }, { "epoch": 1.3787885611348796, "grad_norm": 1.3390487432479858, "learning_rate": 2.200696888533196e-05, "loss": 1.7177, "step": 7654 }, { "epoch": 1.378968700743076, "grad_norm": 1.2968525886535645, "learning_rate": 2.1995235955808452e-05, "loss": 1.4678, "step": 7655 }, { "epoch": 1.3791488403512722, "grad_norm": 1.4880366325378418, "learning_rate": 2.198350527281348e-05, "loss": 1.8316, "step": 7656 }, { "epoch": 1.3793289799594686, "grad_norm": 1.4430006742477417, "learning_rate": 2.1971776837288056e-05, "loss": 1.7934, "step": 7657 }, { "epoch": 1.379509119567665, "grad_norm": 1.4531675577163696, "learning_rate": 2.1960050650173054e-05, "loss": 1.8011, "step": 7658 }, { "epoch": 1.3796892591758612, "grad_norm": 1.6168450117111206, "learning_rate": 2.1948326712409128e-05, "loss": 1.7135, "step": 7659 }, { "epoch": 1.3798693987840576, "grad_norm": 1.6906057596206665, "learning_rate": 2.1936605024936775e-05, "loss": 1.882, "step": 7660 }, { "epoch": 1.380049538392254, "grad_norm": 1.670449137687683, "learning_rate": 2.1924885588696297e-05, "loss": 1.7769, "step": 7661 }, { "epoch": 1.3802296780004504, "grad_norm": 1.4827964305877686, "learning_rate": 2.191316840462782e-05, "loss": 1.4404, "step": 7662 }, { "epoch": 1.3804098176086468, "grad_norm": 1.3820644617080688, "learning_rate": 2.1901453473671295e-05, "loss": 1.4777, "step": 7663 }, { "epoch": 1.3805899572168432, "grad_norm": 1.4032403230667114, "learning_rate": 2.188974079676649e-05, "loss": 1.4918, "step": 7664 }, { "epoch": 1.3807700968250394, "grad_norm": 1.4772493839263916, "learning_rate": 2.1878030374852982e-05, "loss": 1.3491, "step": 7665 }, { "epoch": 1.3809502364332358, "grad_norm": 1.542166829109192, "learning_rate": 2.186632220887016e-05, "loss": 1.576, "step": 7666 }, { "epoch": 1.3811303760414322, "grad_norm": 1.3230671882629395, "learning_rate": 2.1854616299757285e-05, "loss": 1.2233, "step": 7667 }, { "epoch": 1.3813105156496284, "grad_norm": 1.523544192314148, "learning_rate": 2.1842912648453384e-05, "loss": 1.5538, "step": 7668 }, { "epoch": 1.3814906552578248, "grad_norm": 1.3466370105743408, "learning_rate": 2.1831211255897316e-05, "loss": 1.1174, "step": 7669 }, { "epoch": 1.3816707948660212, "grad_norm": 1.5028636455535889, "learning_rate": 2.1819512123027757e-05, "loss": 1.6191, "step": 7670 }, { "epoch": 1.3818509344742176, "grad_norm": 1.4414459466934204, "learning_rate": 2.1807815250783192e-05, "loss": 1.3771, "step": 7671 }, { "epoch": 1.382031074082414, "grad_norm": 1.4875317811965942, "learning_rate": 2.1796120640101976e-05, "loss": 1.5545, "step": 7672 }, { "epoch": 1.3822112136906102, "grad_norm": 1.4802964925765991, "learning_rate": 2.1784428291922222e-05, "loss": 1.6311, "step": 7673 }, { "epoch": 1.3823913532988066, "grad_norm": 1.5269854068756104, "learning_rate": 2.177273820718189e-05, "loss": 1.4111, "step": 7674 }, { "epoch": 1.382571492907003, "grad_norm": 1.4962408542633057, "learning_rate": 2.1761050386818765e-05, "loss": 1.4957, "step": 7675 }, { "epoch": 1.3827516325151992, "grad_norm": 1.3393136262893677, "learning_rate": 2.174936483177039e-05, "loss": 1.2472, "step": 7676 }, { "epoch": 1.3829317721233956, "grad_norm": 1.3940626382827759, "learning_rate": 2.1737681542974226e-05, "loss": 1.5632, "step": 7677 }, { "epoch": 1.383111911731592, "grad_norm": 1.353214144706726, "learning_rate": 2.172600052136748e-05, "loss": 1.301, "step": 7678 }, { "epoch": 1.3832920513397884, "grad_norm": 1.4809770584106445, "learning_rate": 2.1714321767887202e-05, "loss": 1.4772, "step": 7679 }, { "epoch": 1.3834721909479848, "grad_norm": 1.5792030096054077, "learning_rate": 2.1702645283470236e-05, "loss": 1.4207, "step": 7680 }, { "epoch": 1.383652330556181, "grad_norm": 1.4190375804901123, "learning_rate": 2.16909710690533e-05, "loss": 1.2509, "step": 7681 }, { "epoch": 1.3838324701643774, "grad_norm": 1.5044050216674805, "learning_rate": 2.1679299125572877e-05, "loss": 1.4268, "step": 7682 }, { "epoch": 1.3840126097725738, "grad_norm": 1.4418790340423584, "learning_rate": 2.1667629453965284e-05, "loss": 1.3362, "step": 7683 }, { "epoch": 1.38419274938077, "grad_norm": 1.4872791767120361, "learning_rate": 2.165596205516665e-05, "loss": 1.5002, "step": 7684 }, { "epoch": 1.3843728889889664, "grad_norm": 1.5594156980514526, "learning_rate": 2.164429693011294e-05, "loss": 1.5615, "step": 7685 }, { "epoch": 1.3845530285971628, "grad_norm": 1.5328456163406372, "learning_rate": 2.1632634079739922e-05, "loss": 1.5734, "step": 7686 }, { "epoch": 1.3847331682053592, "grad_norm": 1.5469337701797485, "learning_rate": 2.1620973504983172e-05, "loss": 1.5205, "step": 7687 }, { "epoch": 1.3849133078135556, "grad_norm": 1.5033612251281738, "learning_rate": 2.1609315206778108e-05, "loss": 1.1651, "step": 7688 }, { "epoch": 1.3850934474217518, "grad_norm": 1.5737769603729248, "learning_rate": 2.1597659186059955e-05, "loss": 1.5629, "step": 7689 }, { "epoch": 1.3852735870299482, "grad_norm": 1.4151990413665771, "learning_rate": 2.1586005443763722e-05, "loss": 1.2574, "step": 7690 }, { "epoch": 1.3854537266381446, "grad_norm": 1.4447829723358154, "learning_rate": 2.1574353980824315e-05, "loss": 1.4316, "step": 7691 }, { "epoch": 1.385633866246341, "grad_norm": 1.4259389638900757, "learning_rate": 2.1562704798176385e-05, "loss": 1.3603, "step": 7692 }, { "epoch": 1.3858140058545372, "grad_norm": 1.6830917596817017, "learning_rate": 2.1551057896754422e-05, "loss": 1.582, "step": 7693 }, { "epoch": 1.3859941454627336, "grad_norm": 1.476889729499817, "learning_rate": 2.153941327749272e-05, "loss": 1.2161, "step": 7694 }, { "epoch": 1.38617428507093, "grad_norm": 1.7358243465423584, "learning_rate": 2.1527770941325463e-05, "loss": 1.6931, "step": 7695 }, { "epoch": 1.3863544246791264, "grad_norm": 1.5765501260757446, "learning_rate": 2.1516130889186532e-05, "loss": 1.3892, "step": 7696 }, { "epoch": 1.3865345642873228, "grad_norm": 1.5216618776321411, "learning_rate": 2.1504493122009707e-05, "loss": 1.457, "step": 7697 }, { "epoch": 1.386714703895519, "grad_norm": 1.5311753749847412, "learning_rate": 2.1492857640728564e-05, "loss": 1.4901, "step": 7698 }, { "epoch": 1.3868948435037154, "grad_norm": 1.5865960121154785, "learning_rate": 2.1481224446276475e-05, "loss": 1.3783, "step": 7699 }, { "epoch": 1.3870749831119118, "grad_norm": 1.4798338413238525, "learning_rate": 2.1469593539586695e-05, "loss": 1.1062, "step": 7700 }, { "epoch": 1.387255122720108, "grad_norm": 1.4690207242965698, "learning_rate": 2.145796492159222e-05, "loss": 1.3676, "step": 7701 }, { "epoch": 1.3874352623283044, "grad_norm": 1.4369398355484009, "learning_rate": 2.1446338593225897e-05, "loss": 1.964, "step": 7702 }, { "epoch": 1.3876154019365008, "grad_norm": 1.4196062088012695, "learning_rate": 2.1434714555420383e-05, "loss": 2.0065, "step": 7703 }, { "epoch": 1.3877955415446972, "grad_norm": 1.4292179346084595, "learning_rate": 2.1423092809108138e-05, "loss": 1.974, "step": 7704 }, { "epoch": 1.3879756811528936, "grad_norm": 1.4649410247802734, "learning_rate": 2.1411473355221484e-05, "loss": 1.8785, "step": 7705 }, { "epoch": 1.3881558207610898, "grad_norm": 1.6712722778320312, "learning_rate": 2.1399856194692513e-05, "loss": 1.9159, "step": 7706 }, { "epoch": 1.3883359603692862, "grad_norm": 1.5062775611877441, "learning_rate": 2.1388241328453168e-05, "loss": 1.8301, "step": 7707 }, { "epoch": 1.3885160999774826, "grad_norm": 1.4207886457443237, "learning_rate": 2.137662875743513e-05, "loss": 1.6508, "step": 7708 }, { "epoch": 1.3886962395856788, "grad_norm": 1.4575504064559937, "learning_rate": 2.136501848257001e-05, "loss": 1.6889, "step": 7709 }, { "epoch": 1.3888763791938752, "grad_norm": 1.6852158308029175, "learning_rate": 2.1353410504789155e-05, "loss": 1.996, "step": 7710 }, { "epoch": 1.3890565188020716, "grad_norm": 1.7862882614135742, "learning_rate": 2.134180482502376e-05, "loss": 2.0916, "step": 7711 }, { "epoch": 1.389236658410268, "grad_norm": 1.588097333908081, "learning_rate": 2.1330201444204824e-05, "loss": 1.7535, "step": 7712 }, { "epoch": 1.3894167980184644, "grad_norm": 1.5554462671279907, "learning_rate": 2.131860036326314e-05, "loss": 1.5682, "step": 7713 }, { "epoch": 1.3895969376266606, "grad_norm": 1.4419959783554077, "learning_rate": 2.1307001583129387e-05, "loss": 1.3517, "step": 7714 }, { "epoch": 1.389777077234857, "grad_norm": 1.3815621137619019, "learning_rate": 2.1295405104733994e-05, "loss": 1.525, "step": 7715 }, { "epoch": 1.3899572168430534, "grad_norm": 1.4243804216384888, "learning_rate": 2.128381092900722e-05, "loss": 1.4571, "step": 7716 }, { "epoch": 1.3901373564512496, "grad_norm": 1.5259944200515747, "learning_rate": 2.1272219056879145e-05, "loss": 1.6857, "step": 7717 }, { "epoch": 1.390317496059446, "grad_norm": 1.5324000120162964, "learning_rate": 2.126062948927966e-05, "loss": 1.5487, "step": 7718 }, { "epoch": 1.3904976356676424, "grad_norm": 1.4288406372070312, "learning_rate": 2.1249042227138482e-05, "loss": 1.3615, "step": 7719 }, { "epoch": 1.3906777752758388, "grad_norm": 1.6133449077606201, "learning_rate": 2.1237457271385135e-05, "loss": 1.6416, "step": 7720 }, { "epoch": 1.3908579148840352, "grad_norm": 1.3956414461135864, "learning_rate": 2.1225874622948953e-05, "loss": 1.3475, "step": 7721 }, { "epoch": 1.3910380544922316, "grad_norm": 1.3993265628814697, "learning_rate": 2.1214294282759078e-05, "loss": 1.4546, "step": 7722 }, { "epoch": 1.3912181941004278, "grad_norm": 1.4617594480514526, "learning_rate": 2.1202716251744514e-05, "loss": 1.3715, "step": 7723 }, { "epoch": 1.3913983337086242, "grad_norm": 1.438252568244934, "learning_rate": 2.1191140530834024e-05, "loss": 1.4534, "step": 7724 }, { "epoch": 1.3915784733168206, "grad_norm": 1.406053900718689, "learning_rate": 2.1179567120956206e-05, "loss": 1.2718, "step": 7725 }, { "epoch": 1.3917586129250168, "grad_norm": 1.5558401346206665, "learning_rate": 2.1167996023039476e-05, "loss": 1.414, "step": 7726 }, { "epoch": 1.3919387525332132, "grad_norm": 1.377637267112732, "learning_rate": 2.1156427238012067e-05, "loss": 1.3197, "step": 7727 }, { "epoch": 1.3921188921414096, "grad_norm": 1.4500179290771484, "learning_rate": 2.114486076680201e-05, "loss": 1.3714, "step": 7728 }, { "epoch": 1.392299031749606, "grad_norm": 1.4364067316055298, "learning_rate": 2.1133296610337168e-05, "loss": 1.5057, "step": 7729 }, { "epoch": 1.3924791713578024, "grad_norm": 1.4090951681137085, "learning_rate": 2.1121734769545216e-05, "loss": 1.3518, "step": 7730 }, { "epoch": 1.3926593109659986, "grad_norm": 1.2920494079589844, "learning_rate": 2.1110175245353632e-05, "loss": 1.1892, "step": 7731 }, { "epoch": 1.392839450574195, "grad_norm": 1.4387471675872803, "learning_rate": 2.1098618038689694e-05, "loss": 1.3064, "step": 7732 }, { "epoch": 1.3930195901823914, "grad_norm": 1.4395453929901123, "learning_rate": 2.1087063150480562e-05, "loss": 1.4981, "step": 7733 }, { "epoch": 1.3931997297905876, "grad_norm": 1.7355272769927979, "learning_rate": 2.1075510581653146e-05, "loss": 1.6893, "step": 7734 }, { "epoch": 1.393379869398784, "grad_norm": 1.5270005464553833, "learning_rate": 2.1063960333134176e-05, "loss": 1.2634, "step": 7735 }, { "epoch": 1.3935600090069804, "grad_norm": 1.7191475629806519, "learning_rate": 2.1052412405850197e-05, "loss": 1.508, "step": 7736 }, { "epoch": 1.3937401486151768, "grad_norm": 1.381528377532959, "learning_rate": 2.1040866800727615e-05, "loss": 1.1787, "step": 7737 }, { "epoch": 1.3939202882233732, "grad_norm": 1.5310136079788208, "learning_rate": 2.1029323518692594e-05, "loss": 1.3795, "step": 7738 }, { "epoch": 1.3941004278315694, "grad_norm": 1.6061034202575684, "learning_rate": 2.1017782560671123e-05, "loss": 1.4098, "step": 7739 }, { "epoch": 1.3942805674397658, "grad_norm": 1.6720610857009888, "learning_rate": 2.1006243927589043e-05, "loss": 1.4564, "step": 7740 }, { "epoch": 1.3944607070479622, "grad_norm": 1.5601205825805664, "learning_rate": 2.099470762037191e-05, "loss": 1.3781, "step": 7741 }, { "epoch": 1.3946408466561584, "grad_norm": 1.4646220207214355, "learning_rate": 2.0983173639945232e-05, "loss": 1.3786, "step": 7742 }, { "epoch": 1.3948209862643548, "grad_norm": 1.5586062669754028, "learning_rate": 2.0971641987234227e-05, "loss": 1.4233, "step": 7743 }, { "epoch": 1.3950011258725512, "grad_norm": 1.6915987730026245, "learning_rate": 2.0960112663163957e-05, "loss": 1.4978, "step": 7744 }, { "epoch": 1.3951812654807476, "grad_norm": 1.725235939025879, "learning_rate": 2.0948585668659308e-05, "loss": 1.7309, "step": 7745 }, { "epoch": 1.395361405088944, "grad_norm": 1.4718632698059082, "learning_rate": 2.093706100464495e-05, "loss": 1.3467, "step": 7746 }, { "epoch": 1.3955415446971402, "grad_norm": 1.59532630443573, "learning_rate": 2.0925538672045412e-05, "loss": 1.3294, "step": 7747 }, { "epoch": 1.3957216843053366, "grad_norm": 1.705466628074646, "learning_rate": 2.0914018671785008e-05, "loss": 1.3364, "step": 7748 }, { "epoch": 1.395901823913533, "grad_norm": 1.4785093069076538, "learning_rate": 2.0902501004787862e-05, "loss": 1.5218, "step": 7749 }, { "epoch": 1.3960819635217294, "grad_norm": 1.6198146343231201, "learning_rate": 2.0890985671977904e-05, "loss": 1.3571, "step": 7750 }, { "epoch": 1.3962621031299256, "grad_norm": 1.4123060703277588, "learning_rate": 2.08794726742789e-05, "loss": 1.1225, "step": 7751 }, { "epoch": 1.396442242738122, "grad_norm": 1.4546451568603516, "learning_rate": 2.0867962012614418e-05, "loss": 1.6534, "step": 7752 }, { "epoch": 1.3966223823463184, "grad_norm": 1.234961748123169, "learning_rate": 2.0856453687907827e-05, "loss": 1.5735, "step": 7753 }, { "epoch": 1.3968025219545148, "grad_norm": 1.319766640663147, "learning_rate": 2.084494770108233e-05, "loss": 1.8374, "step": 7754 }, { "epoch": 1.3969826615627112, "grad_norm": 1.3299893140792847, "learning_rate": 2.083344405306091e-05, "loss": 1.8847, "step": 7755 }, { "epoch": 1.3971628011709074, "grad_norm": 1.3398196697235107, "learning_rate": 2.0821942744766414e-05, "loss": 1.683, "step": 7756 }, { "epoch": 1.3973429407791038, "grad_norm": 1.4054995775222778, "learning_rate": 2.0810443777121464e-05, "loss": 2.0796, "step": 7757 }, { "epoch": 1.3975230803873002, "grad_norm": 1.5112829208374023, "learning_rate": 2.07989471510485e-05, "loss": 1.8288, "step": 7758 }, { "epoch": 1.3977032199954964, "grad_norm": 1.4432308673858643, "learning_rate": 2.0787452867469764e-05, "loss": 1.8768, "step": 7759 }, { "epoch": 1.3978833596036928, "grad_norm": 1.4694452285766602, "learning_rate": 2.0775960927307337e-05, "loss": 2.0351, "step": 7760 }, { "epoch": 1.3980634992118892, "grad_norm": 1.7296438217163086, "learning_rate": 2.0764471331483083e-05, "loss": 2.1114, "step": 7761 }, { "epoch": 1.3982436388200856, "grad_norm": 1.6175496578216553, "learning_rate": 2.0752984080918703e-05, "loss": 1.753, "step": 7762 }, { "epoch": 1.398423778428282, "grad_norm": 1.4557983875274658, "learning_rate": 2.0741499176535695e-05, "loss": 1.6097, "step": 7763 }, { "epoch": 1.3986039180364782, "grad_norm": 1.279748558998108, "learning_rate": 2.073001661925535e-05, "loss": 1.408, "step": 7764 }, { "epoch": 1.3987840576446746, "grad_norm": 1.370500087738037, "learning_rate": 2.0718536409998834e-05, "loss": 1.6582, "step": 7765 }, { "epoch": 1.398964197252871, "grad_norm": 1.5277808904647827, "learning_rate": 2.0707058549687068e-05, "loss": 1.5586, "step": 7766 }, { "epoch": 1.3991443368610672, "grad_norm": 1.3419978618621826, "learning_rate": 2.0695583039240796e-05, "loss": 1.2688, "step": 7767 }, { "epoch": 1.3993244764692636, "grad_norm": 1.29177987575531, "learning_rate": 2.0684109879580577e-05, "loss": 1.3396, "step": 7768 }, { "epoch": 1.39950461607746, "grad_norm": 1.367904782295227, "learning_rate": 2.0672639071626765e-05, "loss": 1.41, "step": 7769 }, { "epoch": 1.3996847556856564, "grad_norm": 1.4549157619476318, "learning_rate": 2.0661170616299586e-05, "loss": 1.4626, "step": 7770 }, { "epoch": 1.3998648952938528, "grad_norm": 1.330974817276001, "learning_rate": 2.0649704514519004e-05, "loss": 1.3388, "step": 7771 }, { "epoch": 1.400045034902049, "grad_norm": 1.4854329824447632, "learning_rate": 2.0638240767204847e-05, "loss": 1.504, "step": 7772 }, { "epoch": 1.4002251745102454, "grad_norm": 1.4392707347869873, "learning_rate": 2.06267793752767e-05, "loss": 1.4376, "step": 7773 }, { "epoch": 1.4004053141184418, "grad_norm": 1.3751403093338013, "learning_rate": 2.0615320339653988e-05, "loss": 1.3028, "step": 7774 }, { "epoch": 1.400585453726638, "grad_norm": 1.3719784021377563, "learning_rate": 2.060386366125598e-05, "loss": 1.2964, "step": 7775 }, { "epoch": 1.4007655933348344, "grad_norm": 1.4615179300308228, "learning_rate": 2.0592409341001706e-05, "loss": 1.4476, "step": 7776 }, { "epoch": 1.4009457329430308, "grad_norm": 1.4169727563858032, "learning_rate": 2.058095737981004e-05, "loss": 1.1946, "step": 7777 }, { "epoch": 1.4011258725512272, "grad_norm": 1.5135514736175537, "learning_rate": 2.0569507778599617e-05, "loss": 1.3585, "step": 7778 }, { "epoch": 1.4013060121594236, "grad_norm": 1.5087056159973145, "learning_rate": 2.055806053828897e-05, "loss": 1.3662, "step": 7779 }, { "epoch": 1.40148615176762, "grad_norm": 1.3473104238510132, "learning_rate": 2.0546615659796364e-05, "loss": 1.3774, "step": 7780 }, { "epoch": 1.4016662913758162, "grad_norm": 1.464226484298706, "learning_rate": 2.0535173144039903e-05, "loss": 1.522, "step": 7781 }, { "epoch": 1.4018464309840126, "grad_norm": 1.3973422050476074, "learning_rate": 2.0523732991937498e-05, "loss": 1.2291, "step": 7782 }, { "epoch": 1.402026570592209, "grad_norm": 1.606042504310608, "learning_rate": 2.0512295204406878e-05, "loss": 1.6585, "step": 7783 }, { "epoch": 1.4022067102004052, "grad_norm": 1.6926218271255493, "learning_rate": 2.0500859782365573e-05, "loss": 1.7902, "step": 7784 }, { "epoch": 1.4023868498086016, "grad_norm": 1.49717116355896, "learning_rate": 2.048942672673093e-05, "loss": 1.5828, "step": 7785 }, { "epoch": 1.402566989416798, "grad_norm": 1.4541187286376953, "learning_rate": 2.04779960384201e-05, "loss": 1.326, "step": 7786 }, { "epoch": 1.4027471290249944, "grad_norm": 1.4698036909103394, "learning_rate": 2.0466567718350026e-05, "loss": 1.168, "step": 7787 }, { "epoch": 1.4029272686331908, "grad_norm": 1.5216681957244873, "learning_rate": 2.0455141767437524e-05, "loss": 1.5124, "step": 7788 }, { "epoch": 1.403107408241387, "grad_norm": 1.5490877628326416, "learning_rate": 2.0443718186599164e-05, "loss": 1.4346, "step": 7789 }, { "epoch": 1.4032875478495834, "grad_norm": 1.5008727312088013, "learning_rate": 2.043229697675133e-05, "loss": 1.2558, "step": 7790 }, { "epoch": 1.4034676874577798, "grad_norm": 1.8694827556610107, "learning_rate": 2.0420878138810227e-05, "loss": 1.7307, "step": 7791 }, { "epoch": 1.403647827065976, "grad_norm": 1.5814460515975952, "learning_rate": 2.040946167369188e-05, "loss": 1.4703, "step": 7792 }, { "epoch": 1.4038279666741724, "grad_norm": 1.6565017700195312, "learning_rate": 2.03980475823121e-05, "loss": 1.3891, "step": 7793 }, { "epoch": 1.4040081062823688, "grad_norm": 1.4405680894851685, "learning_rate": 2.0386635865586528e-05, "loss": 1.3573, "step": 7794 }, { "epoch": 1.4041882458905652, "grad_norm": 1.5605217218399048, "learning_rate": 2.0375226524430595e-05, "loss": 1.3605, "step": 7795 }, { "epoch": 1.4043683854987616, "grad_norm": 1.5561670064926147, "learning_rate": 2.0363819559759567e-05, "loss": 1.3627, "step": 7796 }, { "epoch": 1.4045485251069578, "grad_norm": 1.5371930599212646, "learning_rate": 2.035241497248848e-05, "loss": 1.3571, "step": 7797 }, { "epoch": 1.4047286647151542, "grad_norm": 1.4775609970092773, "learning_rate": 2.0341012763532243e-05, "loss": 1.4113, "step": 7798 }, { "epoch": 1.4049088043233506, "grad_norm": 1.5602045059204102, "learning_rate": 2.0329612933805515e-05, "loss": 1.2775, "step": 7799 }, { "epoch": 1.4050889439315468, "grad_norm": 1.3970675468444824, "learning_rate": 2.031821548422278e-05, "loss": 1.015, "step": 7800 }, { "epoch": 1.4052690835397432, "grad_norm": 1.5551159381866455, "learning_rate": 2.030682041569833e-05, "loss": 1.502, "step": 7801 }, { "epoch": 1.4054492231479396, "grad_norm": 1.2985392808914185, "learning_rate": 2.0295427729146293e-05, "loss": 1.5912, "step": 7802 }, { "epoch": 1.405629362756136, "grad_norm": 1.272081732749939, "learning_rate": 2.0284037425480583e-05, "loss": 1.6425, "step": 7803 }, { "epoch": 1.4058095023643324, "grad_norm": 1.3881675004959106, "learning_rate": 2.0272649505614915e-05, "loss": 2.0241, "step": 7804 }, { "epoch": 1.4059896419725288, "grad_norm": 1.5090227127075195, "learning_rate": 2.026126397046284e-05, "loss": 1.7893, "step": 7805 }, { "epoch": 1.406169781580725, "grad_norm": 1.481903314590454, "learning_rate": 2.024988082093765e-05, "loss": 1.6751, "step": 7806 }, { "epoch": 1.4063499211889214, "grad_norm": 1.3776445388793945, "learning_rate": 2.0238500057952547e-05, "loss": 1.8501, "step": 7807 }, { "epoch": 1.4065300607971178, "grad_norm": 1.4470198154449463, "learning_rate": 2.022712168242047e-05, "loss": 1.6271, "step": 7808 }, { "epoch": 1.406710200405314, "grad_norm": 1.6375354528427124, "learning_rate": 2.0215745695254194e-05, "loss": 1.9413, "step": 7809 }, { "epoch": 1.4068903400135104, "grad_norm": 1.6327897310256958, "learning_rate": 2.0204372097366287e-05, "loss": 1.878, "step": 7810 }, { "epoch": 1.4070704796217068, "grad_norm": 1.7835969924926758, "learning_rate": 2.019300088966912e-05, "loss": 1.8846, "step": 7811 }, { "epoch": 1.4072506192299032, "grad_norm": 1.527450442314148, "learning_rate": 2.0181632073074926e-05, "loss": 1.6527, "step": 7812 }, { "epoch": 1.4074307588380996, "grad_norm": 1.3308790922164917, "learning_rate": 2.017026564849568e-05, "loss": 1.4574, "step": 7813 }, { "epoch": 1.4076108984462958, "grad_norm": 1.3878746032714844, "learning_rate": 2.0158901616843196e-05, "loss": 1.5306, "step": 7814 }, { "epoch": 1.4077910380544922, "grad_norm": 1.296222448348999, "learning_rate": 2.0147539979029083e-05, "loss": 1.2255, "step": 7815 }, { "epoch": 1.4079711776626886, "grad_norm": 1.4993832111358643, "learning_rate": 2.0136180735964784e-05, "loss": 1.3738, "step": 7816 }, { "epoch": 1.4081513172708848, "grad_norm": 1.3295066356658936, "learning_rate": 2.0124823888561513e-05, "loss": 1.3389, "step": 7817 }, { "epoch": 1.4083314568790812, "grad_norm": 1.3483281135559082, "learning_rate": 2.0113469437730315e-05, "loss": 1.266, "step": 7818 }, { "epoch": 1.4085115964872776, "grad_norm": 1.450639009475708, "learning_rate": 2.0102117384382042e-05, "loss": 1.3115, "step": 7819 }, { "epoch": 1.408691736095474, "grad_norm": 1.4307678937911987, "learning_rate": 2.0090767729427336e-05, "loss": 1.2492, "step": 7820 }, { "epoch": 1.4088718757036705, "grad_norm": 1.321068286895752, "learning_rate": 2.007942047377669e-05, "loss": 1.1637, "step": 7821 }, { "epoch": 1.4090520153118666, "grad_norm": 1.4131584167480469, "learning_rate": 2.006807561834036e-05, "loss": 1.4678, "step": 7822 }, { "epoch": 1.409232154920063, "grad_norm": 1.5661684274673462, "learning_rate": 2.0056733164028422e-05, "loss": 1.4491, "step": 7823 }, { "epoch": 1.4094122945282594, "grad_norm": 1.3830509185791016, "learning_rate": 2.004539311175077e-05, "loss": 1.4107, "step": 7824 }, { "epoch": 1.4095924341364556, "grad_norm": 1.5468964576721191, "learning_rate": 2.003405546241709e-05, "loss": 1.6252, "step": 7825 }, { "epoch": 1.409772573744652, "grad_norm": 1.552641749382019, "learning_rate": 2.0022720216936886e-05, "loss": 1.6328, "step": 7826 }, { "epoch": 1.4099527133528484, "grad_norm": 1.480945110321045, "learning_rate": 2.0011387376219466e-05, "loss": 1.3122, "step": 7827 }, { "epoch": 1.4101328529610448, "grad_norm": 1.5036386251449585, "learning_rate": 2.000005694117394e-05, "loss": 1.6018, "step": 7828 }, { "epoch": 1.4103129925692413, "grad_norm": 1.361175537109375, "learning_rate": 1.9988728912709216e-05, "loss": 1.2247, "step": 7829 }, { "epoch": 1.4104931321774374, "grad_norm": 1.6424611806869507, "learning_rate": 1.997740329173406e-05, "loss": 1.6818, "step": 7830 }, { "epoch": 1.4106732717856338, "grad_norm": 1.5645921230316162, "learning_rate": 1.996608007915699e-05, "loss": 1.5336, "step": 7831 }, { "epoch": 1.4108534113938302, "grad_norm": 1.4245039224624634, "learning_rate": 1.995475927588634e-05, "loss": 1.3254, "step": 7832 }, { "epoch": 1.4110335510020267, "grad_norm": 1.4106066226959229, "learning_rate": 1.9943440882830273e-05, "loss": 1.4345, "step": 7833 }, { "epoch": 1.4112136906102228, "grad_norm": 1.5403735637664795, "learning_rate": 1.9932124900896722e-05, "loss": 1.5772, "step": 7834 }, { "epoch": 1.4113938302184192, "grad_norm": 1.509137749671936, "learning_rate": 1.9920811330993477e-05, "loss": 1.4109, "step": 7835 }, { "epoch": 1.4115739698266156, "grad_norm": 1.4068652391433716, "learning_rate": 1.99095001740281e-05, "loss": 1.2621, "step": 7836 }, { "epoch": 1.411754109434812, "grad_norm": 1.4332618713378906, "learning_rate": 1.9898191430907982e-05, "loss": 1.3474, "step": 7837 }, { "epoch": 1.4119342490430085, "grad_norm": 1.524052619934082, "learning_rate": 1.9886885102540264e-05, "loss": 1.329, "step": 7838 }, { "epoch": 1.4121143886512046, "grad_norm": 1.625367522239685, "learning_rate": 1.9875581189831943e-05, "loss": 1.3351, "step": 7839 }, { "epoch": 1.412294528259401, "grad_norm": 1.3909921646118164, "learning_rate": 1.986427969368984e-05, "loss": 1.2213, "step": 7840 }, { "epoch": 1.4124746678675975, "grad_norm": 1.5296363830566406, "learning_rate": 1.985298061502054e-05, "loss": 1.3934, "step": 7841 }, { "epoch": 1.4126548074757936, "grad_norm": 1.659833312034607, "learning_rate": 1.984168395473045e-05, "loss": 1.6676, "step": 7842 }, { "epoch": 1.41283494708399, "grad_norm": 1.5220714807510376, "learning_rate": 1.9830389713725773e-05, "loss": 1.3332, "step": 7843 }, { "epoch": 1.4130150866921865, "grad_norm": 1.4969030618667603, "learning_rate": 1.9819097892912548e-05, "loss": 1.3891, "step": 7844 }, { "epoch": 1.4131952263003829, "grad_norm": 1.4387528896331787, "learning_rate": 1.9807808493196594e-05, "loss": 1.3337, "step": 7845 }, { "epoch": 1.4133753659085793, "grad_norm": 1.5578863620758057, "learning_rate": 1.9796521515483533e-05, "loss": 1.4219, "step": 7846 }, { "epoch": 1.4135555055167754, "grad_norm": 1.5699931383132935, "learning_rate": 1.978523696067881e-05, "loss": 1.4139, "step": 7847 }, { "epoch": 1.4137356451249719, "grad_norm": 1.4896538257598877, "learning_rate": 1.977395482968766e-05, "loss": 1.3393, "step": 7848 }, { "epoch": 1.4139157847331683, "grad_norm": 1.4160312414169312, "learning_rate": 1.9762675123415124e-05, "loss": 1.0709, "step": 7849 }, { "epoch": 1.4140959243413644, "grad_norm": 1.5764027833938599, "learning_rate": 1.9751397842766067e-05, "loss": 1.2805, "step": 7850 }, { "epoch": 1.4142760639495608, "grad_norm": 1.6334166526794434, "learning_rate": 1.9740122988645138e-05, "loss": 1.2337, "step": 7851 }, { "epoch": 1.4144562035577573, "grad_norm": 1.3740922212600708, "learning_rate": 1.972885056195681e-05, "loss": 1.8473, "step": 7852 }, { "epoch": 1.4146363431659537, "grad_norm": 1.3389655351638794, "learning_rate": 1.971758056360532e-05, "loss": 1.9542, "step": 7853 }, { "epoch": 1.41481648277415, "grad_norm": 1.3268479108810425, "learning_rate": 1.970631299449479e-05, "loss": 1.7906, "step": 7854 }, { "epoch": 1.4149966223823462, "grad_norm": 1.448621392250061, "learning_rate": 1.9695047855529076e-05, "loss": 1.7253, "step": 7855 }, { "epoch": 1.4151767619905427, "grad_norm": 1.3105289936065674, "learning_rate": 1.968378514761186e-05, "loss": 1.6009, "step": 7856 }, { "epoch": 1.415356901598739, "grad_norm": 1.4124598503112793, "learning_rate": 1.967252487164663e-05, "loss": 1.6538, "step": 7857 }, { "epoch": 1.4155370412069352, "grad_norm": 1.8643112182617188, "learning_rate": 1.966126702853669e-05, "loss": 1.8717, "step": 7858 }, { "epoch": 1.4157171808151316, "grad_norm": 1.5061299800872803, "learning_rate": 1.9650011619185126e-05, "loss": 1.7982, "step": 7859 }, { "epoch": 1.415897320423328, "grad_norm": 1.7730798721313477, "learning_rate": 1.9638758644494848e-05, "loss": 1.9192, "step": 7860 }, { "epoch": 1.4160774600315245, "grad_norm": 1.5577809810638428, "learning_rate": 1.9627508105368564e-05, "loss": 1.5209, "step": 7861 }, { "epoch": 1.4162575996397209, "grad_norm": 1.4238172769546509, "learning_rate": 1.961626000270877e-05, "loss": 1.5473, "step": 7862 }, { "epoch": 1.4164377392479173, "grad_norm": 1.4410889148712158, "learning_rate": 1.9605014337417815e-05, "loss": 1.512, "step": 7863 }, { "epoch": 1.4166178788561135, "grad_norm": 1.244889736175537, "learning_rate": 1.9593771110397808e-05, "loss": 1.3998, "step": 7864 }, { "epoch": 1.4167980184643099, "grad_norm": 1.4741644859313965, "learning_rate": 1.9582530322550668e-05, "loss": 1.8002, "step": 7865 }, { "epoch": 1.4169781580725063, "grad_norm": 1.4130538702011108, "learning_rate": 1.9571291974778132e-05, "loss": 1.3553, "step": 7866 }, { "epoch": 1.4171582976807025, "grad_norm": 1.3103638887405396, "learning_rate": 1.9560056067981715e-05, "loss": 1.3527, "step": 7867 }, { "epoch": 1.4173384372888989, "grad_norm": 1.4565075635910034, "learning_rate": 1.9548822603062788e-05, "loss": 1.4666, "step": 7868 }, { "epoch": 1.4175185768970953, "grad_norm": 1.4128637313842773, "learning_rate": 1.95375915809225e-05, "loss": 1.3565, "step": 7869 }, { "epoch": 1.4176987165052917, "grad_norm": 1.4785748720169067, "learning_rate": 1.9526363002461758e-05, "loss": 1.3128, "step": 7870 }, { "epoch": 1.417878856113488, "grad_norm": 1.4779640436172485, "learning_rate": 1.9515136868581317e-05, "loss": 1.4045, "step": 7871 }, { "epoch": 1.4180589957216843, "grad_norm": 1.352829098701477, "learning_rate": 1.9503913180181767e-05, "loss": 1.2457, "step": 7872 }, { "epoch": 1.4182391353298807, "grad_norm": 1.3769781589508057, "learning_rate": 1.9492691938163443e-05, "loss": 1.2114, "step": 7873 }, { "epoch": 1.418419274938077, "grad_norm": 1.3709018230438232, "learning_rate": 1.9481473143426516e-05, "loss": 1.3453, "step": 7874 }, { "epoch": 1.4185994145462733, "grad_norm": 1.4437788724899292, "learning_rate": 1.9470256796870947e-05, "loss": 1.3438, "step": 7875 }, { "epoch": 1.4187795541544697, "grad_norm": 1.4368840456008911, "learning_rate": 1.9459042899396486e-05, "loss": 1.6159, "step": 7876 }, { "epoch": 1.418959693762666, "grad_norm": 1.4402023553848267, "learning_rate": 1.944783145190275e-05, "loss": 1.2944, "step": 7877 }, { "epoch": 1.4191398333708625, "grad_norm": 1.479618787765503, "learning_rate": 1.9436622455289093e-05, "loss": 1.2564, "step": 7878 }, { "epoch": 1.4193199729790589, "grad_norm": 1.4730867147445679, "learning_rate": 1.942541591045469e-05, "loss": 1.388, "step": 7879 }, { "epoch": 1.419500112587255, "grad_norm": 1.4355342388153076, "learning_rate": 1.941421181829854e-05, "loss": 1.4469, "step": 7880 }, { "epoch": 1.4196802521954515, "grad_norm": 1.4885388612747192, "learning_rate": 1.9403010179719417e-05, "loss": 1.2546, "step": 7881 }, { "epoch": 1.4198603918036479, "grad_norm": 1.3629711866378784, "learning_rate": 1.939181099561591e-05, "loss": 1.2152, "step": 7882 }, { "epoch": 1.420040531411844, "grad_norm": 1.5354400873184204, "learning_rate": 1.938061426688642e-05, "loss": 1.6531, "step": 7883 }, { "epoch": 1.4202206710200405, "grad_norm": 1.5731403827667236, "learning_rate": 1.9369419994429138e-05, "loss": 1.4817, "step": 7884 }, { "epoch": 1.4204008106282369, "grad_norm": 1.4022859334945679, "learning_rate": 1.9358228179142048e-05, "loss": 1.3337, "step": 7885 }, { "epoch": 1.4205809502364333, "grad_norm": 1.6270155906677246, "learning_rate": 1.9347038821922984e-05, "loss": 1.6178, "step": 7886 }, { "epoch": 1.4207610898446297, "grad_norm": 1.361730933189392, "learning_rate": 1.9335851923669536e-05, "loss": 1.2329, "step": 7887 }, { "epoch": 1.4209412294528259, "grad_norm": 1.5242886543273926, "learning_rate": 1.932466748527911e-05, "loss": 1.452, "step": 7888 }, { "epoch": 1.4211213690610223, "grad_norm": 1.4447822570800781, "learning_rate": 1.9313485507648922e-05, "loss": 1.5588, "step": 7889 }, { "epoch": 1.4213015086692187, "grad_norm": 1.4923052787780762, "learning_rate": 1.9302305991675973e-05, "loss": 1.5019, "step": 7890 }, { "epoch": 1.421481648277415, "grad_norm": 1.5655585527420044, "learning_rate": 1.9291128938257087e-05, "loss": 1.378, "step": 7891 }, { "epoch": 1.4216617878856113, "grad_norm": 1.5809687376022339, "learning_rate": 1.927995434828888e-05, "loss": 1.5549, "step": 7892 }, { "epoch": 1.4218419274938077, "grad_norm": 1.546939492225647, "learning_rate": 1.926878222266777e-05, "loss": 1.4146, "step": 7893 }, { "epoch": 1.422022067102004, "grad_norm": 1.3812103271484375, "learning_rate": 1.9257612562289967e-05, "loss": 1.231, "step": 7894 }, { "epoch": 1.4222022067102005, "grad_norm": 1.5994952917099, "learning_rate": 1.924644536805153e-05, "loss": 1.4235, "step": 7895 }, { "epoch": 1.4223823463183969, "grad_norm": 1.6997147798538208, "learning_rate": 1.9235280640848262e-05, "loss": 1.5143, "step": 7896 }, { "epoch": 1.422562485926593, "grad_norm": 1.5993354320526123, "learning_rate": 1.9224118381575797e-05, "loss": 1.1977, "step": 7897 }, { "epoch": 1.4227426255347895, "grad_norm": 1.4947186708450317, "learning_rate": 1.9212958591129565e-05, "loss": 1.3158, "step": 7898 }, { "epoch": 1.4229227651429859, "grad_norm": 1.6370208263397217, "learning_rate": 1.9201801270404778e-05, "loss": 1.398, "step": 7899 }, { "epoch": 1.423102904751182, "grad_norm": 1.4944663047790527, "learning_rate": 1.9190646420296517e-05, "loss": 1.216, "step": 7900 }, { "epoch": 1.4232830443593785, "grad_norm": 1.5353169441223145, "learning_rate": 1.9179494041699586e-05, "loss": 1.4229, "step": 7901 }, { "epoch": 1.4234631839675749, "grad_norm": 1.3661359548568726, "learning_rate": 1.9168344135508654e-05, "loss": 1.7543, "step": 7902 }, { "epoch": 1.4236433235757713, "grad_norm": 1.4645752906799316, "learning_rate": 1.915719670261812e-05, "loss": 1.8437, "step": 7903 }, { "epoch": 1.4238234631839677, "grad_norm": 1.3700424432754517, "learning_rate": 1.9146051743922223e-05, "loss": 1.8131, "step": 7904 }, { "epoch": 1.4240036027921639, "grad_norm": 1.4430216550827026, "learning_rate": 1.9134909260315044e-05, "loss": 1.6698, "step": 7905 }, { "epoch": 1.4241837424003603, "grad_norm": 1.344657063484192, "learning_rate": 1.912376925269041e-05, "loss": 1.5202, "step": 7906 }, { "epoch": 1.4243638820085567, "grad_norm": 1.4975886344909668, "learning_rate": 1.9112631721941972e-05, "loss": 1.9506, "step": 7907 }, { "epoch": 1.4245440216167529, "grad_norm": 1.504418969154358, "learning_rate": 1.9101496668963148e-05, "loss": 1.8791, "step": 7908 }, { "epoch": 1.4247241612249493, "grad_norm": 1.7046763896942139, "learning_rate": 1.9090364094647235e-05, "loss": 2.0661, "step": 7909 }, { "epoch": 1.4249043008331457, "grad_norm": 1.9011411666870117, "learning_rate": 1.907923399988725e-05, "loss": 2.2164, "step": 7910 }, { "epoch": 1.425084440441342, "grad_norm": 1.4955675601959229, "learning_rate": 1.9068106385576063e-05, "loss": 1.4977, "step": 7911 }, { "epoch": 1.4252645800495385, "grad_norm": 1.605182409286499, "learning_rate": 1.905698125260631e-05, "loss": 1.6462, "step": 7912 }, { "epoch": 1.4254447196577347, "grad_norm": 1.4227882623672485, "learning_rate": 1.9045858601870454e-05, "loss": 1.6901, "step": 7913 }, { "epoch": 1.425624859265931, "grad_norm": 1.2416832447052002, "learning_rate": 1.903473843426074e-05, "loss": 1.2759, "step": 7914 }, { "epoch": 1.4258049988741275, "grad_norm": 1.4357246160507202, "learning_rate": 1.9023620750669224e-05, "loss": 1.3224, "step": 7915 }, { "epoch": 1.4259851384823237, "grad_norm": 1.2815290689468384, "learning_rate": 1.9012505551987765e-05, "loss": 1.3066, "step": 7916 }, { "epoch": 1.42616527809052, "grad_norm": 1.4816241264343262, "learning_rate": 1.9001392839108018e-05, "loss": 1.4395, "step": 7917 }, { "epoch": 1.4263454176987165, "grad_norm": 1.3726794719696045, "learning_rate": 1.8990282612921424e-05, "loss": 1.3944, "step": 7918 }, { "epoch": 1.4265255573069129, "grad_norm": 1.4431177377700806, "learning_rate": 1.8979174874319265e-05, "loss": 1.4747, "step": 7919 }, { "epoch": 1.4267056969151093, "grad_norm": 1.4041320085525513, "learning_rate": 1.8968069624192593e-05, "loss": 1.3927, "step": 7920 }, { "epoch": 1.4268858365233057, "grad_norm": 1.5095077753067017, "learning_rate": 1.8956966863432256e-05, "loss": 1.5318, "step": 7921 }, { "epoch": 1.4270659761315019, "grad_norm": 1.327257513999939, "learning_rate": 1.8945866592928922e-05, "loss": 1.3714, "step": 7922 }, { "epoch": 1.4272461157396983, "grad_norm": 1.2723485231399536, "learning_rate": 1.8934768813573045e-05, "loss": 1.2151, "step": 7923 }, { "epoch": 1.4274262553478947, "grad_norm": 1.5590929985046387, "learning_rate": 1.8923673526254874e-05, "loss": 1.5145, "step": 7924 }, { "epoch": 1.4276063949560909, "grad_norm": 1.3956323862075806, "learning_rate": 1.8912580731864487e-05, "loss": 1.3634, "step": 7925 }, { "epoch": 1.4277865345642873, "grad_norm": 1.4058585166931152, "learning_rate": 1.8901490431291726e-05, "loss": 1.4765, "step": 7926 }, { "epoch": 1.4279666741724837, "grad_norm": 1.505709171295166, "learning_rate": 1.8890402625426235e-05, "loss": 1.4271, "step": 7927 }, { "epoch": 1.42814681378068, "grad_norm": 1.4091767072677612, "learning_rate": 1.8879317315157514e-05, "loss": 1.1701, "step": 7928 }, { "epoch": 1.4283269533888765, "grad_norm": 1.4743874073028564, "learning_rate": 1.88682345013748e-05, "loss": 1.3883, "step": 7929 }, { "epoch": 1.4285070929970727, "grad_norm": 1.495491862297058, "learning_rate": 1.8857154184967148e-05, "loss": 1.5767, "step": 7930 }, { "epoch": 1.428687232605269, "grad_norm": 1.622897744178772, "learning_rate": 1.8846076366823417e-05, "loss": 1.5282, "step": 7931 }, { "epoch": 1.4288673722134655, "grad_norm": 1.4698102474212646, "learning_rate": 1.883500104783225e-05, "loss": 1.2853, "step": 7932 }, { "epoch": 1.4290475118216617, "grad_norm": 1.4751147031784058, "learning_rate": 1.8823928228882137e-05, "loss": 1.3005, "step": 7933 }, { "epoch": 1.429227651429858, "grad_norm": 1.612786054611206, "learning_rate": 1.881285791086133e-05, "loss": 1.4869, "step": 7934 }, { "epoch": 1.4294077910380545, "grad_norm": 1.468007206916809, "learning_rate": 1.8801790094657856e-05, "loss": 1.3834, "step": 7935 }, { "epoch": 1.429587930646251, "grad_norm": 1.5922496318817139, "learning_rate": 1.879072478115957e-05, "loss": 1.5296, "step": 7936 }, { "epoch": 1.4297680702544473, "grad_norm": 1.5834506750106812, "learning_rate": 1.8779661971254153e-05, "loss": 1.4453, "step": 7937 }, { "epoch": 1.4299482098626435, "grad_norm": 1.535792350769043, "learning_rate": 1.876860166582905e-05, "loss": 1.1589, "step": 7938 }, { "epoch": 1.4301283494708399, "grad_norm": 1.5236412286758423, "learning_rate": 1.8757543865771504e-05, "loss": 1.2876, "step": 7939 }, { "epoch": 1.4303084890790363, "grad_norm": 1.5519678592681885, "learning_rate": 1.8746488571968574e-05, "loss": 1.4189, "step": 7940 }, { "epoch": 1.4304886286872325, "grad_norm": 1.5685690641403198, "learning_rate": 1.8735435785307094e-05, "loss": 1.4846, "step": 7941 }, { "epoch": 1.4306687682954289, "grad_norm": 1.5382126569747925, "learning_rate": 1.8724385506673735e-05, "loss": 1.5148, "step": 7942 }, { "epoch": 1.4308489079036253, "grad_norm": 1.5386821031570435, "learning_rate": 1.8713337736954945e-05, "loss": 1.3786, "step": 7943 }, { "epoch": 1.4310290475118217, "grad_norm": 1.5386455059051514, "learning_rate": 1.870229247703696e-05, "loss": 1.2948, "step": 7944 }, { "epoch": 1.431209187120018, "grad_norm": 1.5205636024475098, "learning_rate": 1.8691249727805832e-05, "loss": 1.3204, "step": 7945 }, { "epoch": 1.4313893267282143, "grad_norm": 1.6280258893966675, "learning_rate": 1.8680209490147395e-05, "loss": 1.3852, "step": 7946 }, { "epoch": 1.4315694663364107, "grad_norm": 1.5553555488586426, "learning_rate": 1.8669171764947297e-05, "loss": 1.4096, "step": 7947 }, { "epoch": 1.431749605944607, "grad_norm": 1.4723114967346191, "learning_rate": 1.865813655309098e-05, "loss": 1.2935, "step": 7948 }, { "epoch": 1.4319297455528035, "grad_norm": 1.5613638162612915, "learning_rate": 1.8647103855463688e-05, "loss": 1.3377, "step": 7949 }, { "epoch": 1.4321098851609997, "grad_norm": 1.4356449842453003, "learning_rate": 1.8636073672950433e-05, "loss": 1.38, "step": 7950 }, { "epoch": 1.432290024769196, "grad_norm": 1.3043460845947266, "learning_rate": 1.8625046006436086e-05, "loss": 1.018, "step": 7951 }, { "epoch": 1.4324701643773925, "grad_norm": 1.4227840900421143, "learning_rate": 1.861402085680527e-05, "loss": 1.9708, "step": 7952 }, { "epoch": 1.432650303985589, "grad_norm": 1.3337846994400024, "learning_rate": 1.860299822494241e-05, "loss": 1.9448, "step": 7953 }, { "epoch": 1.4328304435937853, "grad_norm": 1.4255822896957397, "learning_rate": 1.859197811173174e-05, "loss": 1.7514, "step": 7954 }, { "epoch": 1.4330105832019815, "grad_norm": 1.3270381689071655, "learning_rate": 1.8580960518057288e-05, "loss": 1.8815, "step": 7955 }, { "epoch": 1.433190722810178, "grad_norm": 1.4608460664749146, "learning_rate": 1.8569945444802882e-05, "loss": 1.8592, "step": 7956 }, { "epoch": 1.4333708624183743, "grad_norm": 1.541144609451294, "learning_rate": 1.8558932892852145e-05, "loss": 1.7285, "step": 7957 }, { "epoch": 1.4335510020265705, "grad_norm": 1.4751203060150146, "learning_rate": 1.854792286308849e-05, "loss": 1.6398, "step": 7958 }, { "epoch": 1.433731141634767, "grad_norm": 1.4892191886901855, "learning_rate": 1.8536915356395147e-05, "loss": 1.7831, "step": 7959 }, { "epoch": 1.4339112812429633, "grad_norm": 1.4279849529266357, "learning_rate": 1.852591037365511e-05, "loss": 1.5828, "step": 7960 }, { "epoch": 1.4340914208511597, "grad_norm": 1.8383522033691406, "learning_rate": 1.851490791575123e-05, "loss": 2.109, "step": 7961 }, { "epoch": 1.434271560459356, "grad_norm": 1.5862197875976562, "learning_rate": 1.8503907983566103e-05, "loss": 1.5968, "step": 7962 }, { "epoch": 1.4344517000675523, "grad_norm": 1.5333952903747559, "learning_rate": 1.8492910577982138e-05, "loss": 1.7282, "step": 7963 }, { "epoch": 1.4346318396757487, "grad_norm": 1.3018234968185425, "learning_rate": 1.8481915699881513e-05, "loss": 1.32, "step": 7964 }, { "epoch": 1.434811979283945, "grad_norm": 1.5543546676635742, "learning_rate": 1.8470923350146286e-05, "loss": 1.798, "step": 7965 }, { "epoch": 1.4349921188921413, "grad_norm": 1.4211477041244507, "learning_rate": 1.845993352965823e-05, "loss": 1.5113, "step": 7966 }, { "epoch": 1.4351722585003377, "grad_norm": 1.4054722785949707, "learning_rate": 1.8448946239298952e-05, "loss": 1.6904, "step": 7967 }, { "epoch": 1.435352398108534, "grad_norm": 1.287160038948059, "learning_rate": 1.8437961479949832e-05, "loss": 1.2892, "step": 7968 }, { "epoch": 1.4355325377167305, "grad_norm": 1.432945966720581, "learning_rate": 1.8426979252492045e-05, "loss": 1.3424, "step": 7969 }, { "epoch": 1.435712677324927, "grad_norm": 1.5032153129577637, "learning_rate": 1.8415999557806624e-05, "loss": 1.5446, "step": 7970 }, { "epoch": 1.435892816933123, "grad_norm": 1.3255146741867065, "learning_rate": 1.8405022396774337e-05, "loss": 1.2592, "step": 7971 }, { "epoch": 1.4360729565413195, "grad_norm": 1.4102063179016113, "learning_rate": 1.839404777027576e-05, "loss": 1.367, "step": 7972 }, { "epoch": 1.436253096149516, "grad_norm": 1.4789421558380127, "learning_rate": 1.8383075679191273e-05, "loss": 1.4415, "step": 7973 }, { "epoch": 1.436433235757712, "grad_norm": 1.4706363677978516, "learning_rate": 1.8372106124401045e-05, "loss": 1.4336, "step": 7974 }, { "epoch": 1.4366133753659085, "grad_norm": 1.3293424844741821, "learning_rate": 1.836113910678507e-05, "loss": 1.327, "step": 7975 }, { "epoch": 1.436793514974105, "grad_norm": 1.2669920921325684, "learning_rate": 1.8350174627223105e-05, "loss": 1.1628, "step": 7976 }, { "epoch": 1.4369736545823013, "grad_norm": 1.499607801437378, "learning_rate": 1.833921268659472e-05, "loss": 1.5666, "step": 7977 }, { "epoch": 1.4371537941904977, "grad_norm": 1.4095861911773682, "learning_rate": 1.8328253285779267e-05, "loss": 1.3281, "step": 7978 }, { "epoch": 1.4373339337986941, "grad_norm": 1.3976359367370605, "learning_rate": 1.831729642565591e-05, "loss": 1.377, "step": 7979 }, { "epoch": 1.4375140734068903, "grad_norm": 1.5039989948272705, "learning_rate": 1.830634210710361e-05, "loss": 1.5736, "step": 7980 }, { "epoch": 1.4376942130150867, "grad_norm": 1.4818403720855713, "learning_rate": 1.8295390331001096e-05, "loss": 1.487, "step": 7981 }, { "epoch": 1.4378743526232831, "grad_norm": 1.472092866897583, "learning_rate": 1.8284441098226934e-05, "loss": 1.5115, "step": 7982 }, { "epoch": 1.4380544922314793, "grad_norm": 1.629014253616333, "learning_rate": 1.8273494409659435e-05, "loss": 1.583, "step": 7983 }, { "epoch": 1.4382346318396757, "grad_norm": 1.5817917585372925, "learning_rate": 1.8262550266176787e-05, "loss": 1.3209, "step": 7984 }, { "epoch": 1.438414771447872, "grad_norm": 1.271173357963562, "learning_rate": 1.8251608668656894e-05, "loss": 1.1632, "step": 7985 }, { "epoch": 1.4385949110560685, "grad_norm": 1.4208099842071533, "learning_rate": 1.824066961797749e-05, "loss": 1.5138, "step": 7986 }, { "epoch": 1.438775050664265, "grad_norm": 1.5053945779800415, "learning_rate": 1.82297331150161e-05, "loss": 1.4114, "step": 7987 }, { "epoch": 1.438955190272461, "grad_norm": 1.5340497493743896, "learning_rate": 1.8218799160650047e-05, "loss": 1.4193, "step": 7988 }, { "epoch": 1.4391353298806575, "grad_norm": 1.4334325790405273, "learning_rate": 1.8207867755756442e-05, "loss": 1.4446, "step": 7989 }, { "epoch": 1.439315469488854, "grad_norm": 1.4936779737472534, "learning_rate": 1.8196938901212196e-05, "loss": 1.3615, "step": 7990 }, { "epoch": 1.43949560909705, "grad_norm": 1.3821886777877808, "learning_rate": 1.8186012597894026e-05, "loss": 1.1397, "step": 7991 }, { "epoch": 1.4396757487052465, "grad_norm": 1.6085041761398315, "learning_rate": 1.8175088846678412e-05, "loss": 1.3564, "step": 7992 }, { "epoch": 1.439855888313443, "grad_norm": 1.6198670864105225, "learning_rate": 1.8164167648441686e-05, "loss": 1.536, "step": 7993 }, { "epoch": 1.4400360279216393, "grad_norm": 1.6322762966156006, "learning_rate": 1.8153249004059925e-05, "loss": 1.454, "step": 7994 }, { "epoch": 1.4402161675298357, "grad_norm": 1.3694039583206177, "learning_rate": 1.8142332914409016e-05, "loss": 1.2246, "step": 7995 }, { "epoch": 1.440396307138032, "grad_norm": 1.5592377185821533, "learning_rate": 1.8131419380364644e-05, "loss": 1.352, "step": 7996 }, { "epoch": 1.4405764467462283, "grad_norm": 1.4743783473968506, "learning_rate": 1.812050840280227e-05, "loss": 1.3983, "step": 7997 }, { "epoch": 1.4407565863544247, "grad_norm": 1.5487103462219238, "learning_rate": 1.8109599982597203e-05, "loss": 1.2405, "step": 7998 }, { "epoch": 1.440936725962621, "grad_norm": 1.5841177701950073, "learning_rate": 1.8098694120624505e-05, "loss": 1.2875, "step": 7999 }, { "epoch": 1.4411168655708173, "grad_norm": 1.50828218460083, "learning_rate": 1.808779081775901e-05, "loss": 1.3283, "step": 8000 }, { "epoch": 1.4412970051790137, "grad_norm": 1.606026291847229, "learning_rate": 1.8076890074875374e-05, "loss": 1.3342, "step": 8001 }, { "epoch": 1.4414771447872101, "grad_norm": 1.2386553287506104, "learning_rate": 1.806599189284809e-05, "loss": 1.5013, "step": 8002 }, { "epoch": 1.4416572843954065, "grad_norm": 1.337441086769104, "learning_rate": 1.8055096272551375e-05, "loss": 1.7192, "step": 8003 }, { "epoch": 1.441837424003603, "grad_norm": 1.2560787200927734, "learning_rate": 1.804420321485928e-05, "loss": 1.5599, "step": 8004 }, { "epoch": 1.4420175636117991, "grad_norm": 1.3062278032302856, "learning_rate": 1.8033312720645635e-05, "loss": 1.6616, "step": 8005 }, { "epoch": 1.4421977032199955, "grad_norm": 1.3483303785324097, "learning_rate": 1.802242479078406e-05, "loss": 1.6165, "step": 8006 }, { "epoch": 1.442377842828192, "grad_norm": 1.520615577697754, "learning_rate": 1.801153942614801e-05, "loss": 1.7802, "step": 8007 }, { "epoch": 1.442557982436388, "grad_norm": 1.624891996383667, "learning_rate": 1.8000656627610683e-05, "loss": 1.9756, "step": 8008 }, { "epoch": 1.4427381220445845, "grad_norm": 1.5343197584152222, "learning_rate": 1.79897763960451e-05, "loss": 1.7663, "step": 8009 }, { "epoch": 1.442918261652781, "grad_norm": 1.9937756061553955, "learning_rate": 1.7978898732324055e-05, "loss": 1.8343, "step": 8010 }, { "epoch": 1.4430984012609773, "grad_norm": 1.3978335857391357, "learning_rate": 1.7968023637320164e-05, "loss": 1.3927, "step": 8011 }, { "epoch": 1.4432785408691737, "grad_norm": 1.4265486001968384, "learning_rate": 1.7957151111905813e-05, "loss": 1.36, "step": 8012 }, { "epoch": 1.44345868047737, "grad_norm": 1.3184529542922974, "learning_rate": 1.794628115695319e-05, "loss": 1.4875, "step": 8013 }, { "epoch": 1.4436388200855663, "grad_norm": 1.3715662956237793, "learning_rate": 1.793541377333428e-05, "loss": 1.3782, "step": 8014 }, { "epoch": 1.4438189596937627, "grad_norm": 1.4320085048675537, "learning_rate": 1.792454896192084e-05, "loss": 1.3684, "step": 8015 }, { "epoch": 1.443999099301959, "grad_norm": 1.4699143171310425, "learning_rate": 1.7913686723584478e-05, "loss": 1.4742, "step": 8016 }, { "epoch": 1.4441792389101553, "grad_norm": 1.3905134201049805, "learning_rate": 1.7902827059196542e-05, "loss": 1.572, "step": 8017 }, { "epoch": 1.4443593785183517, "grad_norm": 1.3693205118179321, "learning_rate": 1.7891969969628182e-05, "loss": 1.2682, "step": 8018 }, { "epoch": 1.4445395181265481, "grad_norm": 1.5497798919677734, "learning_rate": 1.788111545575035e-05, "loss": 1.5367, "step": 8019 }, { "epoch": 1.4447196577347445, "grad_norm": 1.3003859519958496, "learning_rate": 1.7870263518433793e-05, "loss": 1.1805, "step": 8020 }, { "epoch": 1.4448997973429407, "grad_norm": 1.261290431022644, "learning_rate": 1.785941415854905e-05, "loss": 1.2311, "step": 8021 }, { "epoch": 1.4450799369511371, "grad_norm": 1.5115928649902344, "learning_rate": 1.7848567376966447e-05, "loss": 1.6328, "step": 8022 }, { "epoch": 1.4452600765593335, "grad_norm": 1.4348713159561157, "learning_rate": 1.7837723174556102e-05, "loss": 1.3894, "step": 8023 }, { "epoch": 1.4454402161675297, "grad_norm": 1.4605377912521362, "learning_rate": 1.7826881552187946e-05, "loss": 1.4607, "step": 8024 }, { "epoch": 1.4456203557757261, "grad_norm": 1.4186657667160034, "learning_rate": 1.7816042510731663e-05, "loss": 1.2278, "step": 8025 }, { "epoch": 1.4458004953839225, "grad_norm": 1.4589608907699585, "learning_rate": 1.7805206051056788e-05, "loss": 1.2834, "step": 8026 }, { "epoch": 1.445980634992119, "grad_norm": 1.4106963872909546, "learning_rate": 1.7794372174032603e-05, "loss": 1.3708, "step": 8027 }, { "epoch": 1.4461607746003153, "grad_norm": 1.5547178983688354, "learning_rate": 1.7783540880528194e-05, "loss": 1.4757, "step": 8028 }, { "epoch": 1.4463409142085115, "grad_norm": 1.4539644718170166, "learning_rate": 1.7772712171412424e-05, "loss": 1.2575, "step": 8029 }, { "epoch": 1.446521053816708, "grad_norm": 1.3594244718551636, "learning_rate": 1.776188604755401e-05, "loss": 1.3355, "step": 8030 }, { "epoch": 1.4467011934249043, "grad_norm": 1.402726173400879, "learning_rate": 1.7751062509821392e-05, "loss": 1.4013, "step": 8031 }, { "epoch": 1.4468813330331007, "grad_norm": 1.6147027015686035, "learning_rate": 1.774024155908285e-05, "loss": 1.4934, "step": 8032 }, { "epoch": 1.447061472641297, "grad_norm": 1.512836217880249, "learning_rate": 1.7729423196206396e-05, "loss": 1.4415, "step": 8033 }, { "epoch": 1.4472416122494933, "grad_norm": 1.3208867311477661, "learning_rate": 1.771860742205988e-05, "loss": 1.2731, "step": 8034 }, { "epoch": 1.4474217518576897, "grad_norm": 1.4216922521591187, "learning_rate": 1.7707794237510973e-05, "loss": 1.3592, "step": 8035 }, { "epoch": 1.4476018914658861, "grad_norm": 1.3850549459457397, "learning_rate": 1.7696983643427078e-05, "loss": 1.3191, "step": 8036 }, { "epoch": 1.4477820310740825, "grad_norm": 1.3908941745758057, "learning_rate": 1.768617564067542e-05, "loss": 1.3333, "step": 8037 }, { "epoch": 1.4479621706822787, "grad_norm": 1.4673216342926025, "learning_rate": 1.7675370230123017e-05, "loss": 1.3204, "step": 8038 }, { "epoch": 1.4481423102904751, "grad_norm": 1.5468907356262207, "learning_rate": 1.766456741263665e-05, "loss": 1.3236, "step": 8039 }, { "epoch": 1.4483224498986715, "grad_norm": 1.6482319831848145, "learning_rate": 1.7653767189082955e-05, "loss": 1.5268, "step": 8040 }, { "epoch": 1.4485025895068677, "grad_norm": 1.6609482765197754, "learning_rate": 1.76429695603283e-05, "loss": 1.5336, "step": 8041 }, { "epoch": 1.4486827291150641, "grad_norm": 1.6685922145843506, "learning_rate": 1.7632174527238866e-05, "loss": 1.4974, "step": 8042 }, { "epoch": 1.4488628687232605, "grad_norm": 1.558226227760315, "learning_rate": 1.7621382090680628e-05, "loss": 1.4893, "step": 8043 }, { "epoch": 1.449043008331457, "grad_norm": 1.8582466840744019, "learning_rate": 1.761059225151934e-05, "loss": 1.6828, "step": 8044 }, { "epoch": 1.4492231479396533, "grad_norm": 1.4842251539230347, "learning_rate": 1.7599805010620574e-05, "loss": 1.4216, "step": 8045 }, { "epoch": 1.4494032875478495, "grad_norm": 1.576616644859314, "learning_rate": 1.7589020368849664e-05, "loss": 1.3209, "step": 8046 }, { "epoch": 1.449583427156046, "grad_norm": 1.6401101350784302, "learning_rate": 1.7578238327071748e-05, "loss": 1.3026, "step": 8047 }, { "epoch": 1.4497635667642423, "grad_norm": 1.5385409593582153, "learning_rate": 1.7567458886151754e-05, "loss": 1.3966, "step": 8048 }, { "epoch": 1.4499437063724385, "grad_norm": 1.4719061851501465, "learning_rate": 1.755668204695442e-05, "loss": 1.1243, "step": 8049 }, { "epoch": 1.450123845980635, "grad_norm": 1.4997024536132812, "learning_rate": 1.7545907810344254e-05, "loss": 1.3566, "step": 8050 }, { "epoch": 1.4503039855888313, "grad_norm": 1.4546380043029785, "learning_rate": 1.7535136177185556e-05, "loss": 1.3899, "step": 8051 }, { "epoch": 1.4504841251970277, "grad_norm": 1.4713420867919922, "learning_rate": 1.752436714834242e-05, "loss": 2.0031, "step": 8052 }, { "epoch": 1.4506642648052241, "grad_norm": 1.2983648777008057, "learning_rate": 1.751360072467873e-05, "loss": 1.857, "step": 8053 }, { "epoch": 1.4508444044134203, "grad_norm": 1.4595049619674683, "learning_rate": 1.7502836907058174e-05, "loss": 1.9324, "step": 8054 }, { "epoch": 1.4510245440216167, "grad_norm": 1.3958483934402466, "learning_rate": 1.7492075696344207e-05, "loss": 1.7987, "step": 8055 }, { "epoch": 1.4512046836298131, "grad_norm": 1.489006757736206, "learning_rate": 1.748131709340009e-05, "loss": 1.9766, "step": 8056 }, { "epoch": 1.4513848232380093, "grad_norm": 1.471548318862915, "learning_rate": 1.7470561099088867e-05, "loss": 1.6195, "step": 8057 }, { "epoch": 1.4515649628462057, "grad_norm": 1.4130973815917969, "learning_rate": 1.7459807714273402e-05, "loss": 1.677, "step": 8058 }, { "epoch": 1.4517451024544021, "grad_norm": 1.848110318183899, "learning_rate": 1.744905693981631e-05, "loss": 1.92, "step": 8059 }, { "epoch": 1.4519252420625985, "grad_norm": 1.632275104522705, "learning_rate": 1.743830877658002e-05, "loss": 1.9258, "step": 8060 }, { "epoch": 1.452105381670795, "grad_norm": 1.9830628633499146, "learning_rate": 1.7427563225426734e-05, "loss": 1.7233, "step": 8061 }, { "epoch": 1.4522855212789914, "grad_norm": 1.3974878787994385, "learning_rate": 1.741682028721845e-05, "loss": 1.3587, "step": 8062 }, { "epoch": 1.4524656608871875, "grad_norm": 1.3284026384353638, "learning_rate": 1.7406079962816986e-05, "loss": 1.3569, "step": 8063 }, { "epoch": 1.452645800495384, "grad_norm": 1.5110300779342651, "learning_rate": 1.7395342253083925e-05, "loss": 1.3713, "step": 8064 }, { "epoch": 1.4528259401035803, "grad_norm": 1.3864331245422363, "learning_rate": 1.738460715888061e-05, "loss": 1.5585, "step": 8065 }, { "epoch": 1.4530060797117765, "grad_norm": 1.436155080795288, "learning_rate": 1.737387468106823e-05, "loss": 1.359, "step": 8066 }, { "epoch": 1.453186219319973, "grad_norm": 1.6059166193008423, "learning_rate": 1.736314482050771e-05, "loss": 1.7009, "step": 8067 }, { "epoch": 1.4533663589281693, "grad_norm": 1.3695902824401855, "learning_rate": 1.735241757805983e-05, "loss": 1.3248, "step": 8068 }, { "epoch": 1.4535464985363657, "grad_norm": 1.3821109533309937, "learning_rate": 1.7341692954585113e-05, "loss": 1.2743, "step": 8069 }, { "epoch": 1.4537266381445622, "grad_norm": 1.43107008934021, "learning_rate": 1.733097095094388e-05, "loss": 1.4647, "step": 8070 }, { "epoch": 1.4539067777527583, "grad_norm": 1.3265044689178467, "learning_rate": 1.7320251567996232e-05, "loss": 1.3245, "step": 8071 }, { "epoch": 1.4540869173609547, "grad_norm": 1.2950725555419922, "learning_rate": 1.7309534806602097e-05, "loss": 1.3646, "step": 8072 }, { "epoch": 1.4542670569691512, "grad_norm": 1.5401910543441772, "learning_rate": 1.7298820667621156e-05, "loss": 1.7558, "step": 8073 }, { "epoch": 1.4544471965773473, "grad_norm": 1.3882255554199219, "learning_rate": 1.7288109151912895e-05, "loss": 1.3777, "step": 8074 }, { "epoch": 1.4546273361855437, "grad_norm": 1.4878950119018555, "learning_rate": 1.7277400260336584e-05, "loss": 1.3309, "step": 8075 }, { "epoch": 1.4548074757937401, "grad_norm": 1.3252559900283813, "learning_rate": 1.726669399375128e-05, "loss": 1.1664, "step": 8076 }, { "epoch": 1.4549876154019366, "grad_norm": 1.6111396551132202, "learning_rate": 1.725599035301585e-05, "loss": 1.5897, "step": 8077 }, { "epoch": 1.455167755010133, "grad_norm": 1.436450719833374, "learning_rate": 1.7245289338988917e-05, "loss": 1.341, "step": 8078 }, { "epoch": 1.4553478946183291, "grad_norm": 1.3740235567092896, "learning_rate": 1.7234590952528924e-05, "loss": 1.2053, "step": 8079 }, { "epoch": 1.4555280342265255, "grad_norm": 1.5159800052642822, "learning_rate": 1.722389519449408e-05, "loss": 1.5903, "step": 8080 }, { "epoch": 1.455708173834722, "grad_norm": 1.4350194931030273, "learning_rate": 1.721320206574239e-05, "loss": 1.2955, "step": 8081 }, { "epoch": 1.4558883134429181, "grad_norm": 1.4336975812911987, "learning_rate": 1.7202511567131672e-05, "loss": 1.3889, "step": 8082 }, { "epoch": 1.4560684530511145, "grad_norm": 1.5354095697402954, "learning_rate": 1.7191823699519506e-05, "loss": 1.4385, "step": 8083 }, { "epoch": 1.456248592659311, "grad_norm": 1.5346239805221558, "learning_rate": 1.718113846376326e-05, "loss": 1.5583, "step": 8084 }, { "epoch": 1.4564287322675074, "grad_norm": 1.5364880561828613, "learning_rate": 1.7170455860720096e-05, "loss": 1.5527, "step": 8085 }, { "epoch": 1.4566088718757038, "grad_norm": 1.5669749975204468, "learning_rate": 1.7159775891246976e-05, "loss": 1.4579, "step": 8086 }, { "epoch": 1.4567890114839, "grad_norm": 1.5393072366714478, "learning_rate": 1.7149098556200637e-05, "loss": 1.3381, "step": 8087 }, { "epoch": 1.4569691510920963, "grad_norm": 1.3815019130706787, "learning_rate": 1.7138423856437606e-05, "loss": 1.3856, "step": 8088 }, { "epoch": 1.4571492907002928, "grad_norm": 1.5448921918869019, "learning_rate": 1.7127751792814217e-05, "loss": 1.6371, "step": 8089 }, { "epoch": 1.4573294303084892, "grad_norm": 1.5260528326034546, "learning_rate": 1.7117082366186543e-05, "loss": 1.4921, "step": 8090 }, { "epoch": 1.4575095699166853, "grad_norm": 1.4143253564834595, "learning_rate": 1.710641557741053e-05, "loss": 1.2475, "step": 8091 }, { "epoch": 1.4576897095248817, "grad_norm": 1.579866886138916, "learning_rate": 1.7095751427341826e-05, "loss": 1.4775, "step": 8092 }, { "epoch": 1.4578698491330782, "grad_norm": 1.6168063879013062, "learning_rate": 1.7085089916835923e-05, "loss": 1.4495, "step": 8093 }, { "epoch": 1.4580499887412746, "grad_norm": 1.4802608489990234, "learning_rate": 1.7074431046748075e-05, "loss": 1.4819, "step": 8094 }, { "epoch": 1.458230128349471, "grad_norm": 1.4431006908416748, "learning_rate": 1.7063774817933313e-05, "loss": 1.4312, "step": 8095 }, { "epoch": 1.4584102679576672, "grad_norm": 1.596966028213501, "learning_rate": 1.705312123124653e-05, "loss": 1.4235, "step": 8096 }, { "epoch": 1.4585904075658636, "grad_norm": 1.4923477172851562, "learning_rate": 1.7042470287542288e-05, "loss": 1.2696, "step": 8097 }, { "epoch": 1.45877054717406, "grad_norm": 1.5950254201889038, "learning_rate": 1.703182198767503e-05, "loss": 1.3189, "step": 8098 }, { "epoch": 1.4589506867822561, "grad_norm": 1.7292513847351074, "learning_rate": 1.7021176332498938e-05, "loss": 1.7945, "step": 8099 }, { "epoch": 1.4591308263904526, "grad_norm": 1.3566813468933105, "learning_rate": 1.7010533322868034e-05, "loss": 1.2219, "step": 8100 }, { "epoch": 1.459310965998649, "grad_norm": 1.4696025848388672, "learning_rate": 1.6999892959636082e-05, "loss": 1.3622, "step": 8101 }, { "epoch": 1.4594911056068454, "grad_norm": 1.4046486616134644, "learning_rate": 1.6989255243656637e-05, "loss": 1.7419, "step": 8102 }, { "epoch": 1.4596712452150418, "grad_norm": 1.257022738456726, "learning_rate": 1.6978620175783062e-05, "loss": 1.725, "step": 8103 }, { "epoch": 1.459851384823238, "grad_norm": 1.4738199710845947, "learning_rate": 1.696798775686847e-05, "loss": 2.1063, "step": 8104 }, { "epoch": 1.4600315244314344, "grad_norm": 1.4084230661392212, "learning_rate": 1.6957357987765832e-05, "loss": 1.6651, "step": 8105 }, { "epoch": 1.4602116640396308, "grad_norm": 1.3796098232269287, "learning_rate": 1.6946730869327844e-05, "loss": 1.7655, "step": 8106 }, { "epoch": 1.460391803647827, "grad_norm": 1.490235686302185, "learning_rate": 1.6936106402406998e-05, "loss": 1.7923, "step": 8107 }, { "epoch": 1.4605719432560234, "grad_norm": 1.4591264724731445, "learning_rate": 1.69254845878556e-05, "loss": 1.6397, "step": 8108 }, { "epoch": 1.4607520828642198, "grad_norm": 1.6688789129257202, "learning_rate": 1.691486542652571e-05, "loss": 1.7579, "step": 8109 }, { "epoch": 1.4609322224724162, "grad_norm": 1.699428677558899, "learning_rate": 1.6904248919269212e-05, "loss": 1.9655, "step": 8110 }, { "epoch": 1.4611123620806126, "grad_norm": 1.4834977388381958, "learning_rate": 1.6893635066937737e-05, "loss": 1.7917, "step": 8111 }, { "epoch": 1.4612925016888088, "grad_norm": 1.4167309999465942, "learning_rate": 1.688302387038273e-05, "loss": 1.6533, "step": 8112 }, { "epoch": 1.4614726412970052, "grad_norm": 1.3667678833007812, "learning_rate": 1.6872415330455406e-05, "loss": 1.3328, "step": 8113 }, { "epoch": 1.4616527809052016, "grad_norm": 1.3453348875045776, "learning_rate": 1.6861809448006798e-05, "loss": 1.4076, "step": 8114 }, { "epoch": 1.4618329205133977, "grad_norm": 1.3253042697906494, "learning_rate": 1.68512062238877e-05, "loss": 1.3529, "step": 8115 }, { "epoch": 1.4620130601215942, "grad_norm": 1.3924373388290405, "learning_rate": 1.6840605658948695e-05, "loss": 1.4758, "step": 8116 }, { "epoch": 1.4621931997297906, "grad_norm": 1.377272367477417, "learning_rate": 1.683000775404015e-05, "loss": 1.4557, "step": 8117 }, { "epoch": 1.462373339337987, "grad_norm": 1.3477051258087158, "learning_rate": 1.6819412510012222e-05, "loss": 1.303, "step": 8118 }, { "epoch": 1.4625534789461834, "grad_norm": 1.280418038368225, "learning_rate": 1.680881992771487e-05, "loss": 1.3418, "step": 8119 }, { "epoch": 1.4627336185543798, "grad_norm": 1.3980178833007812, "learning_rate": 1.679823000799781e-05, "loss": 1.323, "step": 8120 }, { "epoch": 1.462913758162576, "grad_norm": 1.5017340183258057, "learning_rate": 1.678764275171057e-05, "loss": 1.4848, "step": 8121 }, { "epoch": 1.4630938977707724, "grad_norm": 1.4132311344146729, "learning_rate": 1.677705815970243e-05, "loss": 1.4837, "step": 8122 }, { "epoch": 1.4632740373789688, "grad_norm": 1.404493808746338, "learning_rate": 1.676647623282252e-05, "loss": 1.4461, "step": 8123 }, { "epoch": 1.463454176987165, "grad_norm": 1.4357677698135376, "learning_rate": 1.6755896971919704e-05, "loss": 1.3661, "step": 8124 }, { "epoch": 1.4636343165953614, "grad_norm": 1.599934697151184, "learning_rate": 1.674532037784264e-05, "loss": 1.6575, "step": 8125 }, { "epoch": 1.4638144562035578, "grad_norm": 1.3421075344085693, "learning_rate": 1.673474645143977e-05, "loss": 1.2776, "step": 8126 }, { "epoch": 1.4639945958117542, "grad_norm": 1.392279028892517, "learning_rate": 1.6724175193559327e-05, "loss": 1.3085, "step": 8127 }, { "epoch": 1.4641747354199506, "grad_norm": 1.3773834705352783, "learning_rate": 1.6713606605049355e-05, "loss": 1.2416, "step": 8128 }, { "epoch": 1.4643548750281468, "grad_norm": 1.3469650745391846, "learning_rate": 1.6703040686757666e-05, "loss": 1.2943, "step": 8129 }, { "epoch": 1.4645350146363432, "grad_norm": 1.4431020021438599, "learning_rate": 1.6692477439531817e-05, "loss": 1.2578, "step": 8130 }, { "epoch": 1.4647151542445396, "grad_norm": 1.6385438442230225, "learning_rate": 1.6681916864219206e-05, "loss": 1.6553, "step": 8131 }, { "epoch": 1.4648952938527358, "grad_norm": 1.5582624673843384, "learning_rate": 1.6671358961666977e-05, "loss": 1.3421, "step": 8132 }, { "epoch": 1.4650754334609322, "grad_norm": 1.5187559127807617, "learning_rate": 1.6660803732722114e-05, "loss": 1.484, "step": 8133 }, { "epoch": 1.4652555730691286, "grad_norm": 1.604551911354065, "learning_rate": 1.665025117823134e-05, "loss": 1.4912, "step": 8134 }, { "epoch": 1.465435712677325, "grad_norm": 1.59600830078125, "learning_rate": 1.6639701299041172e-05, "loss": 1.3918, "step": 8135 }, { "epoch": 1.4656158522855214, "grad_norm": 1.6133476495742798, "learning_rate": 1.6629154095997894e-05, "loss": 1.5449, "step": 8136 }, { "epoch": 1.4657959918937176, "grad_norm": 1.5654728412628174, "learning_rate": 1.6618609569947645e-05, "loss": 1.4359, "step": 8137 }, { "epoch": 1.465976131501914, "grad_norm": 1.5909291505813599, "learning_rate": 1.6608067721736277e-05, "loss": 1.4428, "step": 8138 }, { "epoch": 1.4661562711101104, "grad_norm": 1.531416654586792, "learning_rate": 1.6597528552209447e-05, "loss": 1.3344, "step": 8139 }, { "epoch": 1.4663364107183066, "grad_norm": 1.518561601638794, "learning_rate": 1.658699206221261e-05, "loss": 1.4462, "step": 8140 }, { "epoch": 1.466516550326503, "grad_norm": 1.4122933149337769, "learning_rate": 1.6576458252590986e-05, "loss": 1.0855, "step": 8141 }, { "epoch": 1.4666966899346994, "grad_norm": 1.4264907836914062, "learning_rate": 1.6565927124189613e-05, "loss": 1.346, "step": 8142 }, { "epoch": 1.4668768295428958, "grad_norm": 1.615363597869873, "learning_rate": 1.6555398677853273e-05, "loss": 1.5272, "step": 8143 }, { "epoch": 1.4670569691510922, "grad_norm": 1.4662690162658691, "learning_rate": 1.6544872914426563e-05, "loss": 1.4121, "step": 8144 }, { "epoch": 1.4672371087592884, "grad_norm": 1.3685420751571655, "learning_rate": 1.6534349834753855e-05, "loss": 1.1788, "step": 8145 }, { "epoch": 1.4674172483674848, "grad_norm": 1.6929986476898193, "learning_rate": 1.6523829439679284e-05, "loss": 1.3447, "step": 8146 }, { "epoch": 1.4675973879756812, "grad_norm": 1.7316865921020508, "learning_rate": 1.6513311730046828e-05, "loss": 1.5403, "step": 8147 }, { "epoch": 1.4677775275838776, "grad_norm": 1.5148394107818604, "learning_rate": 1.650279670670019e-05, "loss": 1.1846, "step": 8148 }, { "epoch": 1.4679576671920738, "grad_norm": 1.6269383430480957, "learning_rate": 1.6492284370482884e-05, "loss": 1.4486, "step": 8149 }, { "epoch": 1.4681378068002702, "grad_norm": 1.5044218301773071, "learning_rate": 1.648177472223821e-05, "loss": 1.1348, "step": 8150 }, { "epoch": 1.4683179464084666, "grad_norm": 1.5331660509109497, "learning_rate": 1.6471267762809238e-05, "loss": 1.1829, "step": 8151 }, { "epoch": 1.468498086016663, "grad_norm": 1.3183190822601318, "learning_rate": 1.646076349303884e-05, "loss": 1.6851, "step": 8152 }, { "epoch": 1.4686782256248594, "grad_norm": 1.2717913389205933, "learning_rate": 1.645026191376965e-05, "loss": 1.71, "step": 8153 }, { "epoch": 1.4688583652330556, "grad_norm": 1.273877501487732, "learning_rate": 1.6439763025844114e-05, "loss": 1.807, "step": 8154 }, { "epoch": 1.469038504841252, "grad_norm": 1.4363099336624146, "learning_rate": 1.6429266830104424e-05, "loss": 1.7865, "step": 8155 }, { "epoch": 1.4692186444494484, "grad_norm": 1.4654121398925781, "learning_rate": 1.6418773327392612e-05, "loss": 1.9925, "step": 8156 }, { "epoch": 1.4693987840576446, "grad_norm": 1.468063473701477, "learning_rate": 1.6408282518550444e-05, "loss": 1.7062, "step": 8157 }, { "epoch": 1.469578923665841, "grad_norm": 1.6224817037582397, "learning_rate": 1.6397794404419497e-05, "loss": 1.9611, "step": 8158 }, { "epoch": 1.4697590632740374, "grad_norm": 1.5204607248306274, "learning_rate": 1.638730898584111e-05, "loss": 1.7028, "step": 8159 }, { "epoch": 1.4699392028822338, "grad_norm": 1.6457748413085938, "learning_rate": 1.6376826263656413e-05, "loss": 1.6574, "step": 8160 }, { "epoch": 1.4701193424904302, "grad_norm": 1.8649173974990845, "learning_rate": 1.6366346238706365e-05, "loss": 1.9153, "step": 8161 }, { "epoch": 1.4702994820986264, "grad_norm": 1.559179663658142, "learning_rate": 1.6355868911831625e-05, "loss": 1.4524, "step": 8162 }, { "epoch": 1.4704796217068228, "grad_norm": 1.5441402196884155, "learning_rate": 1.634539428387269e-05, "loss": 1.663, "step": 8163 }, { "epoch": 1.4706597613150192, "grad_norm": 1.3922497034072876, "learning_rate": 1.6334922355669818e-05, "loss": 1.4285, "step": 8164 }, { "epoch": 1.4708399009232154, "grad_norm": 1.4064722061157227, "learning_rate": 1.6324453128063095e-05, "loss": 1.4873, "step": 8165 }, { "epoch": 1.4710200405314118, "grad_norm": 1.421367883682251, "learning_rate": 1.631398660189234e-05, "loss": 1.5008, "step": 8166 }, { "epoch": 1.4712001801396082, "grad_norm": 1.622439980506897, "learning_rate": 1.630352277799717e-05, "loss": 1.6112, "step": 8167 }, { "epoch": 1.4713803197478046, "grad_norm": 1.567834496498108, "learning_rate": 1.629306165721699e-05, "loss": 1.4846, "step": 8168 }, { "epoch": 1.471560459356001, "grad_norm": 1.425642728805542, "learning_rate": 1.628260324039097e-05, "loss": 1.2851, "step": 8169 }, { "epoch": 1.4717405989641972, "grad_norm": 1.3928115367889404, "learning_rate": 1.627214752835811e-05, "loss": 1.3105, "step": 8170 }, { "epoch": 1.4719207385723936, "grad_norm": 1.5022599697113037, "learning_rate": 1.626169452195715e-05, "loss": 1.4828, "step": 8171 }, { "epoch": 1.47210087818059, "grad_norm": 1.4822044372558594, "learning_rate": 1.625124422202662e-05, "loss": 1.4494, "step": 8172 }, { "epoch": 1.4722810177887862, "grad_norm": 1.4153070449829102, "learning_rate": 1.624079662940484e-05, "loss": 1.3555, "step": 8173 }, { "epoch": 1.4724611573969826, "grad_norm": 1.4269487857818604, "learning_rate": 1.623035174492991e-05, "loss": 1.2848, "step": 8174 }, { "epoch": 1.472641297005179, "grad_norm": 1.3872058391571045, "learning_rate": 1.6219909569439717e-05, "loss": 1.3322, "step": 8175 }, { "epoch": 1.4728214366133754, "grad_norm": 1.395554542541504, "learning_rate": 1.6209470103771923e-05, "loss": 1.1155, "step": 8176 }, { "epoch": 1.4730015762215718, "grad_norm": 1.4002515077590942, "learning_rate": 1.6199033348763975e-05, "loss": 1.271, "step": 8177 }, { "epoch": 1.4731817158297682, "grad_norm": 1.5238114595413208, "learning_rate": 1.61885993052531e-05, "loss": 1.5608, "step": 8178 }, { "epoch": 1.4733618554379644, "grad_norm": 1.462597370147705, "learning_rate": 1.617816797407633e-05, "loss": 1.3474, "step": 8179 }, { "epoch": 1.4735419950461608, "grad_norm": 1.5667364597320557, "learning_rate": 1.616773935607045e-05, "loss": 1.4376, "step": 8180 }, { "epoch": 1.4737221346543572, "grad_norm": 1.6095837354660034, "learning_rate": 1.6157313452072042e-05, "loss": 1.5954, "step": 8181 }, { "epoch": 1.4739022742625534, "grad_norm": 1.4854878187179565, "learning_rate": 1.614689026291747e-05, "loss": 1.3969, "step": 8182 }, { "epoch": 1.4740824138707498, "grad_norm": 1.3541326522827148, "learning_rate": 1.6136469789442864e-05, "loss": 1.3586, "step": 8183 }, { "epoch": 1.4742625534789462, "grad_norm": 1.4171067476272583, "learning_rate": 1.6126052032484157e-05, "loss": 1.3603, "step": 8184 }, { "epoch": 1.4744426930871426, "grad_norm": 1.4077274799346924, "learning_rate": 1.611563699287706e-05, "loss": 1.2536, "step": 8185 }, { "epoch": 1.474622832695339, "grad_norm": 1.596418023109436, "learning_rate": 1.610522467145706e-05, "loss": 1.3672, "step": 8186 }, { "epoch": 1.4748029723035352, "grad_norm": 1.4944181442260742, "learning_rate": 1.6094815069059428e-05, "loss": 1.3574, "step": 8187 }, { "epoch": 1.4749831119117316, "grad_norm": 1.5643279552459717, "learning_rate": 1.6084408186519196e-05, "loss": 1.5955, "step": 8188 }, { "epoch": 1.475163251519928, "grad_norm": 1.5402911901474, "learning_rate": 1.6074004024671234e-05, "loss": 1.5891, "step": 8189 }, { "epoch": 1.4753433911281242, "grad_norm": 1.412166714668274, "learning_rate": 1.606360258435015e-05, "loss": 1.4377, "step": 8190 }, { "epoch": 1.4755235307363206, "grad_norm": 1.6369582414627075, "learning_rate": 1.6053203866390336e-05, "loss": 1.4425, "step": 8191 }, { "epoch": 1.475703670344517, "grad_norm": 1.497342824935913, "learning_rate": 1.6042807871625953e-05, "loss": 1.2867, "step": 8192 }, { "epoch": 1.4758838099527134, "grad_norm": 1.5337201356887817, "learning_rate": 1.6032414600891005e-05, "loss": 1.408, "step": 8193 }, { "epoch": 1.4760639495609098, "grad_norm": 1.6361474990844727, "learning_rate": 1.6022024055019224e-05, "loss": 1.523, "step": 8194 }, { "epoch": 1.476244089169106, "grad_norm": 1.427764654159546, "learning_rate": 1.6011636234844106e-05, "loss": 1.243, "step": 8195 }, { "epoch": 1.4764242287773024, "grad_norm": 1.5448062419891357, "learning_rate": 1.600125114119898e-05, "loss": 1.3187, "step": 8196 }, { "epoch": 1.4766043683854988, "grad_norm": 1.6024812459945679, "learning_rate": 1.5990868774916906e-05, "loss": 1.5284, "step": 8197 }, { "epoch": 1.476784507993695, "grad_norm": 1.610843300819397, "learning_rate": 1.5980489136830794e-05, "loss": 1.3388, "step": 8198 }, { "epoch": 1.4769646476018914, "grad_norm": 1.6666816473007202, "learning_rate": 1.597011222777327e-05, "loss": 1.4057, "step": 8199 }, { "epoch": 1.4771447872100878, "grad_norm": 1.4728418588638306, "learning_rate": 1.5959738048576767e-05, "loss": 1.1331, "step": 8200 }, { "epoch": 1.4773249268182842, "grad_norm": 1.389033317565918, "learning_rate": 1.5949366600073502e-05, "loss": 1.0723, "step": 8201 }, { "epoch": 1.4775050664264806, "grad_norm": 1.417446255683899, "learning_rate": 1.5938997883095442e-05, "loss": 2.1171, "step": 8202 }, { "epoch": 1.477685206034677, "grad_norm": 1.3437901735305786, "learning_rate": 1.5928631898474393e-05, "loss": 1.7456, "step": 8203 }, { "epoch": 1.4778653456428732, "grad_norm": 1.3137058019638062, "learning_rate": 1.5918268647041908e-05, "loss": 1.7551, "step": 8204 }, { "epoch": 1.4780454852510696, "grad_norm": 1.312812089920044, "learning_rate": 1.5907908129629305e-05, "loss": 1.5694, "step": 8205 }, { "epoch": 1.478225624859266, "grad_norm": 1.5139602422714233, "learning_rate": 1.5897550347067706e-05, "loss": 1.7434, "step": 8206 }, { "epoch": 1.4784057644674622, "grad_norm": 1.526865005493164, "learning_rate": 1.588719530018801e-05, "loss": 1.6474, "step": 8207 }, { "epoch": 1.4785859040756586, "grad_norm": 1.5279940366744995, "learning_rate": 1.5876842989820895e-05, "loss": 1.6849, "step": 8208 }, { "epoch": 1.478766043683855, "grad_norm": 1.6920865774154663, "learning_rate": 1.5866493416796808e-05, "loss": 2.2204, "step": 8209 }, { "epoch": 1.4789461832920514, "grad_norm": 1.884311318397522, "learning_rate": 1.5856146581945995e-05, "loss": 1.8946, "step": 8210 }, { "epoch": 1.4791263229002478, "grad_norm": 1.6508115530014038, "learning_rate": 1.584580248609846e-05, "loss": 1.7426, "step": 8211 }, { "epoch": 1.479306462508444, "grad_norm": 1.573927879333496, "learning_rate": 1.583546113008402e-05, "loss": 1.6406, "step": 8212 }, { "epoch": 1.4794866021166404, "grad_norm": 1.3276898860931396, "learning_rate": 1.582512251473226e-05, "loss": 1.3468, "step": 8213 }, { "epoch": 1.4796667417248368, "grad_norm": 1.588862657546997, "learning_rate": 1.581478664087252e-05, "loss": 1.5124, "step": 8214 }, { "epoch": 1.479846881333033, "grad_norm": 1.2988438606262207, "learning_rate": 1.5804453509333944e-05, "loss": 1.3068, "step": 8215 }, { "epoch": 1.4800270209412294, "grad_norm": 1.3855034112930298, "learning_rate": 1.5794123120945447e-05, "loss": 1.4494, "step": 8216 }, { "epoch": 1.4802071605494258, "grad_norm": 1.3087507486343384, "learning_rate": 1.578379547653574e-05, "loss": 1.312, "step": 8217 }, { "epoch": 1.4803873001576222, "grad_norm": 1.395614504814148, "learning_rate": 1.5773470576933292e-05, "loss": 1.3759, "step": 8218 }, { "epoch": 1.4805674397658186, "grad_norm": 1.3604282140731812, "learning_rate": 1.5763148422966363e-05, "loss": 1.1742, "step": 8219 }, { "epoch": 1.4807475793740148, "grad_norm": 1.4736851453781128, "learning_rate": 1.575282901546297e-05, "loss": 1.5942, "step": 8220 }, { "epoch": 1.4809277189822112, "grad_norm": 1.3666789531707764, "learning_rate": 1.574251235525097e-05, "loss": 1.2418, "step": 8221 }, { "epoch": 1.4811078585904076, "grad_norm": 1.466553807258606, "learning_rate": 1.5732198443157947e-05, "loss": 1.4776, "step": 8222 }, { "epoch": 1.4812879981986038, "grad_norm": 1.5183740854263306, "learning_rate": 1.572188728001127e-05, "loss": 1.4796, "step": 8223 }, { "epoch": 1.4814681378068002, "grad_norm": 1.3407610654830933, "learning_rate": 1.5711578866638104e-05, "loss": 1.25, "step": 8224 }, { "epoch": 1.4816482774149966, "grad_norm": 1.4137070178985596, "learning_rate": 1.5701273203865358e-05, "loss": 1.3589, "step": 8225 }, { "epoch": 1.481828417023193, "grad_norm": 1.4186859130859375, "learning_rate": 1.5690970292519807e-05, "loss": 1.4058, "step": 8226 }, { "epoch": 1.4820085566313894, "grad_norm": 1.342305064201355, "learning_rate": 1.5680670133427888e-05, "loss": 1.2409, "step": 8227 }, { "epoch": 1.4821886962395856, "grad_norm": 1.4955202341079712, "learning_rate": 1.5670372727415895e-05, "loss": 1.525, "step": 8228 }, { "epoch": 1.482368835847782, "grad_norm": 1.556486964225769, "learning_rate": 1.5660078075309865e-05, "loss": 1.5135, "step": 8229 }, { "epoch": 1.4825489754559784, "grad_norm": 1.4578251838684082, "learning_rate": 1.564978617793566e-05, "loss": 1.3671, "step": 8230 }, { "epoch": 1.4827291150641748, "grad_norm": 1.5932451486587524, "learning_rate": 1.5639497036118877e-05, "loss": 1.4459, "step": 8231 }, { "epoch": 1.482909254672371, "grad_norm": 1.3448258638381958, "learning_rate": 1.5629210650684906e-05, "loss": 1.2717, "step": 8232 }, { "epoch": 1.4830893942805674, "grad_norm": 1.6008577346801758, "learning_rate": 1.5618927022458917e-05, "loss": 1.6105, "step": 8233 }, { "epoch": 1.4832695338887638, "grad_norm": 1.4718765020370483, "learning_rate": 1.560864615226584e-05, "loss": 1.3592, "step": 8234 }, { "epoch": 1.4834496734969602, "grad_norm": 1.5544114112854004, "learning_rate": 1.5598368040930426e-05, "loss": 1.5255, "step": 8235 }, { "epoch": 1.4836298131051566, "grad_norm": 1.5295480489730835, "learning_rate": 1.558809268927718e-05, "loss": 1.2372, "step": 8236 }, { "epoch": 1.4838099527133528, "grad_norm": 1.587200403213501, "learning_rate": 1.557782009813037e-05, "loss": 1.3634, "step": 8237 }, { "epoch": 1.4839900923215492, "grad_norm": 1.5351425409317017, "learning_rate": 1.5567550268314084e-05, "loss": 1.4644, "step": 8238 }, { "epoch": 1.4841702319297456, "grad_norm": 1.5293402671813965, "learning_rate": 1.5557283200652118e-05, "loss": 1.3356, "step": 8239 }, { "epoch": 1.4843503715379418, "grad_norm": 1.566402792930603, "learning_rate": 1.5547018895968126e-05, "loss": 1.4628, "step": 8240 }, { "epoch": 1.4845305111461382, "grad_norm": 1.6386185884475708, "learning_rate": 1.5536757355085497e-05, "loss": 1.4203, "step": 8241 }, { "epoch": 1.4847106507543346, "grad_norm": 1.5085557699203491, "learning_rate": 1.5526498578827407e-05, "loss": 1.5069, "step": 8242 }, { "epoch": 1.484890790362531, "grad_norm": 1.5629349946975708, "learning_rate": 1.55162425680168e-05, "loss": 1.4052, "step": 8243 }, { "epoch": 1.4850709299707274, "grad_norm": 1.669832468032837, "learning_rate": 1.5505989323476428e-05, "loss": 1.4681, "step": 8244 }, { "epoch": 1.4852510695789236, "grad_norm": 1.5910685062408447, "learning_rate": 1.549573884602879e-05, "loss": 1.3384, "step": 8245 }, { "epoch": 1.48543120918712, "grad_norm": 1.6272332668304443, "learning_rate": 1.5485491136496176e-05, "loss": 1.4858, "step": 8246 }, { "epoch": 1.4856113487953164, "grad_norm": 1.501037836074829, "learning_rate": 1.5475246195700653e-05, "loss": 1.2718, "step": 8247 }, { "epoch": 1.4857914884035126, "grad_norm": 1.5413844585418701, "learning_rate": 1.5465004024464063e-05, "loss": 1.2205, "step": 8248 }, { "epoch": 1.485971628011709, "grad_norm": 1.4610179662704468, "learning_rate": 1.5454764623608032e-05, "loss": 1.2234, "step": 8249 }, { "epoch": 1.4861517676199054, "grad_norm": 1.7109169960021973, "learning_rate": 1.544452799395395e-05, "loss": 1.4434, "step": 8250 }, { "epoch": 1.4863319072281018, "grad_norm": 1.5412770509719849, "learning_rate": 1.5434294136323006e-05, "loss": 1.537, "step": 8251 }, { "epoch": 1.4865120468362982, "grad_norm": 1.4056484699249268, "learning_rate": 1.5424063051536147e-05, "loss": 1.9708, "step": 8252 }, { "epoch": 1.4866921864444944, "grad_norm": 1.4181978702545166, "learning_rate": 1.5413834740414096e-05, "loss": 1.8486, "step": 8253 }, { "epoch": 1.4868723260526908, "grad_norm": 1.3953505754470825, "learning_rate": 1.540360920377739e-05, "loss": 1.8603, "step": 8254 }, { "epoch": 1.4870524656608872, "grad_norm": 1.4219179153442383, "learning_rate": 1.53933864424463e-05, "loss": 1.663, "step": 8255 }, { "epoch": 1.4872326052690834, "grad_norm": 1.4097083806991577, "learning_rate": 1.53831664572409e-05, "loss": 1.7531, "step": 8256 }, { "epoch": 1.4874127448772798, "grad_norm": 1.4414830207824707, "learning_rate": 1.5372949248981005e-05, "loss": 1.7384, "step": 8257 }, { "epoch": 1.4875928844854762, "grad_norm": 1.6061363220214844, "learning_rate": 1.5362734818486284e-05, "loss": 1.9557, "step": 8258 }, { "epoch": 1.4877730240936726, "grad_norm": 1.4928147792816162, "learning_rate": 1.5352523166576095e-05, "loss": 1.6007, "step": 8259 }, { "epoch": 1.487953163701869, "grad_norm": 1.912382960319519, "learning_rate": 1.5342314294069614e-05, "loss": 2.0942, "step": 8260 }, { "epoch": 1.4881333033100654, "grad_norm": 1.760590672492981, "learning_rate": 1.5332108201785807e-05, "loss": 1.8964, "step": 8261 }, { "epoch": 1.4883134429182616, "grad_norm": 1.6435855627059937, "learning_rate": 1.5321904890543377e-05, "loss": 1.8126, "step": 8262 }, { "epoch": 1.488493582526458, "grad_norm": 1.5011520385742188, "learning_rate": 1.5311704361160854e-05, "loss": 1.4877, "step": 8263 }, { "epoch": 1.4886737221346544, "grad_norm": 1.3183709383010864, "learning_rate": 1.5301506614456516e-05, "loss": 1.1734, "step": 8264 }, { "epoch": 1.4888538617428506, "grad_norm": 1.4418071508407593, "learning_rate": 1.5291311651248417e-05, "loss": 1.4261, "step": 8265 }, { "epoch": 1.489034001351047, "grad_norm": 1.5169801712036133, "learning_rate": 1.5281119472354395e-05, "loss": 1.533, "step": 8266 }, { "epoch": 1.4892141409592434, "grad_norm": 1.3048747777938843, "learning_rate": 1.527093007859204e-05, "loss": 1.3612, "step": 8267 }, { "epoch": 1.4893942805674398, "grad_norm": 1.378918170928955, "learning_rate": 1.5260743470778766e-05, "loss": 1.3104, "step": 8268 }, { "epoch": 1.4895744201756362, "grad_norm": 1.417015552520752, "learning_rate": 1.5250559649731739e-05, "loss": 1.4097, "step": 8269 }, { "epoch": 1.4897545597838324, "grad_norm": 1.5416224002838135, "learning_rate": 1.5240378616267886e-05, "loss": 1.4881, "step": 8270 }, { "epoch": 1.4899346993920288, "grad_norm": 1.4898866415023804, "learning_rate": 1.5230200371203934e-05, "loss": 1.5489, "step": 8271 }, { "epoch": 1.4901148390002252, "grad_norm": 1.4133309125900269, "learning_rate": 1.5220024915356369e-05, "loss": 1.2889, "step": 8272 }, { "epoch": 1.4902949786084214, "grad_norm": 1.4658639430999756, "learning_rate": 1.5209852249541468e-05, "loss": 1.5949, "step": 8273 }, { "epoch": 1.4904751182166178, "grad_norm": 1.3493504524230957, "learning_rate": 1.519968237457527e-05, "loss": 1.2713, "step": 8274 }, { "epoch": 1.4906552578248142, "grad_norm": 1.4170231819152832, "learning_rate": 1.5189515291273604e-05, "loss": 1.3645, "step": 8275 }, { "epoch": 1.4908353974330106, "grad_norm": 1.4936116933822632, "learning_rate": 1.5179351000452046e-05, "loss": 1.4379, "step": 8276 }, { "epoch": 1.491015537041207, "grad_norm": 1.4865164756774902, "learning_rate": 1.5169189502926008e-05, "loss": 1.5397, "step": 8277 }, { "epoch": 1.4911956766494032, "grad_norm": 1.4955211877822876, "learning_rate": 1.5159030799510627e-05, "loss": 1.3052, "step": 8278 }, { "epoch": 1.4913758162575996, "grad_norm": 1.407193899154663, "learning_rate": 1.514887489102082e-05, "loss": 1.3724, "step": 8279 }, { "epoch": 1.491555955865796, "grad_norm": 1.3906277418136597, "learning_rate": 1.5138721778271292e-05, "loss": 1.2316, "step": 8280 }, { "epoch": 1.4917360954739922, "grad_norm": 1.5691041946411133, "learning_rate": 1.5128571462076524e-05, "loss": 1.4848, "step": 8281 }, { "epoch": 1.4919162350821886, "grad_norm": 1.4598407745361328, "learning_rate": 1.5118423943250771e-05, "loss": 1.3737, "step": 8282 }, { "epoch": 1.492096374690385, "grad_norm": 1.4482777118682861, "learning_rate": 1.5108279222608058e-05, "loss": 1.2949, "step": 8283 }, { "epoch": 1.4922765142985814, "grad_norm": 1.4670007228851318, "learning_rate": 1.509813730096219e-05, "loss": 1.4747, "step": 8284 }, { "epoch": 1.4924566539067778, "grad_norm": 1.4364105463027954, "learning_rate": 1.5087998179126733e-05, "loss": 1.2779, "step": 8285 }, { "epoch": 1.492636793514974, "grad_norm": 1.527872085571289, "learning_rate": 1.5077861857915071e-05, "loss": 1.4822, "step": 8286 }, { "epoch": 1.4928169331231704, "grad_norm": 1.6420036554336548, "learning_rate": 1.5067728338140324e-05, "loss": 1.4718, "step": 8287 }, { "epoch": 1.4929970727313668, "grad_norm": 1.4591742753982544, "learning_rate": 1.5057597620615394e-05, "loss": 1.1637, "step": 8288 }, { "epoch": 1.4931772123395632, "grad_norm": 1.4519966840744019, "learning_rate": 1.5047469706152962e-05, "loss": 1.3354, "step": 8289 }, { "epoch": 1.4933573519477594, "grad_norm": 1.5276570320129395, "learning_rate": 1.5037344595565467e-05, "loss": 1.2669, "step": 8290 }, { "epoch": 1.4935374915559558, "grad_norm": 1.4279955625534058, "learning_rate": 1.5027222289665194e-05, "loss": 1.1563, "step": 8291 }, { "epoch": 1.4937176311641522, "grad_norm": 1.5719612836837769, "learning_rate": 1.5017102789264099e-05, "loss": 1.5391, "step": 8292 }, { "epoch": 1.4938977707723486, "grad_norm": 1.5116100311279297, "learning_rate": 1.5006986095173974e-05, "loss": 1.4209, "step": 8293 }, { "epoch": 1.494077910380545, "grad_norm": 1.625718593597412, "learning_rate": 1.499687220820638e-05, "loss": 1.6656, "step": 8294 }, { "epoch": 1.4942580499887412, "grad_norm": 1.6613056659698486, "learning_rate": 1.4986761129172638e-05, "loss": 1.4554, "step": 8295 }, { "epoch": 1.4944381895969376, "grad_norm": 1.6278865337371826, "learning_rate": 1.4976652858883877e-05, "loss": 1.4606, "step": 8296 }, { "epoch": 1.494618329205134, "grad_norm": 1.567407250404358, "learning_rate": 1.4966547398150966e-05, "loss": 1.3358, "step": 8297 }, { "epoch": 1.4947984688133302, "grad_norm": 1.6071711778640747, "learning_rate": 1.4956444747784554e-05, "loss": 1.5608, "step": 8298 }, { "epoch": 1.4949786084215266, "grad_norm": 1.6230558156967163, "learning_rate": 1.4946344908595061e-05, "loss": 1.5254, "step": 8299 }, { "epoch": 1.495158748029723, "grad_norm": 1.419877529144287, "learning_rate": 1.4936247881392723e-05, "loss": 1.1523, "step": 8300 }, { "epoch": 1.4953388876379194, "grad_norm": 1.37412691116333, "learning_rate": 1.4926153666987503e-05, "loss": 1.1959, "step": 8301 }, { "epoch": 1.4955190272461159, "grad_norm": 1.4982872009277344, "learning_rate": 1.4916062266189145e-05, "loss": 2.0673, "step": 8302 }, { "epoch": 1.495699166854312, "grad_norm": 1.4154436588287354, "learning_rate": 1.4905973679807205e-05, "loss": 1.9184, "step": 8303 }, { "epoch": 1.4958793064625084, "grad_norm": 1.2990942001342773, "learning_rate": 1.489588790865093e-05, "loss": 1.8955, "step": 8304 }, { "epoch": 1.4960594460707048, "grad_norm": 1.3932422399520874, "learning_rate": 1.4885804953529443e-05, "loss": 1.8519, "step": 8305 }, { "epoch": 1.496239585678901, "grad_norm": 1.3909327983856201, "learning_rate": 1.487572481525158e-05, "loss": 1.8113, "step": 8306 }, { "epoch": 1.4964197252870974, "grad_norm": 1.4670466184616089, "learning_rate": 1.4865647494625961e-05, "loss": 1.8791, "step": 8307 }, { "epoch": 1.4965998648952938, "grad_norm": 1.5743836164474487, "learning_rate": 1.4855572992460992e-05, "loss": 1.7762, "step": 8308 }, { "epoch": 1.4967800045034902, "grad_norm": 1.6498053073883057, "learning_rate": 1.484550130956482e-05, "loss": 2.1521, "step": 8309 }, { "epoch": 1.4969601441116867, "grad_norm": 1.7136973142623901, "learning_rate": 1.4835432446745428e-05, "loss": 1.7715, "step": 8310 }, { "epoch": 1.4971402837198828, "grad_norm": 2.12508225440979, "learning_rate": 1.4825366404810514e-05, "loss": 2.0838, "step": 8311 }, { "epoch": 1.4973204233280792, "grad_norm": 1.7046000957489014, "learning_rate": 1.4815303184567575e-05, "loss": 1.6362, "step": 8312 }, { "epoch": 1.4975005629362756, "grad_norm": 1.3992172479629517, "learning_rate": 1.480524278682388e-05, "loss": 1.4775, "step": 8313 }, { "epoch": 1.4976807025444718, "grad_norm": 1.513905644416809, "learning_rate": 1.4795185212386465e-05, "loss": 1.6337, "step": 8314 }, { "epoch": 1.4978608421526682, "grad_norm": 1.4843629598617554, "learning_rate": 1.4785130462062142e-05, "loss": 1.4203, "step": 8315 }, { "epoch": 1.4980409817608646, "grad_norm": 1.4706205129623413, "learning_rate": 1.47750785366575e-05, "loss": 1.1799, "step": 8316 }, { "epoch": 1.498221121369061, "grad_norm": 1.5224649906158447, "learning_rate": 1.47650294369789e-05, "loss": 1.3566, "step": 8317 }, { "epoch": 1.4984012609772575, "grad_norm": 1.3990535736083984, "learning_rate": 1.4754983163832464e-05, "loss": 1.4349, "step": 8318 }, { "epoch": 1.4985814005854539, "grad_norm": 1.396898865699768, "learning_rate": 1.4744939718024126e-05, "loss": 1.2324, "step": 8319 }, { "epoch": 1.49876154019365, "grad_norm": 1.420369267463684, "learning_rate": 1.4734899100359551e-05, "loss": 1.4519, "step": 8320 }, { "epoch": 1.4989416798018464, "grad_norm": 1.3734114170074463, "learning_rate": 1.4724861311644189e-05, "loss": 1.4056, "step": 8321 }, { "epoch": 1.4991218194100429, "grad_norm": 1.3760509490966797, "learning_rate": 1.4714826352683275e-05, "loss": 1.3771, "step": 8322 }, { "epoch": 1.499301959018239, "grad_norm": 1.2371797561645508, "learning_rate": 1.470479422428181e-05, "loss": 1.2243, "step": 8323 }, { "epoch": 1.4994820986264354, "grad_norm": 1.5186550617218018, "learning_rate": 1.4694764927244553e-05, "loss": 1.6891, "step": 8324 }, { "epoch": 1.4996622382346319, "grad_norm": 1.4722535610198975, "learning_rate": 1.4684738462376057e-05, "loss": 1.3139, "step": 8325 }, { "epoch": 1.4998423778428283, "grad_norm": 1.4951199293136597, "learning_rate": 1.467471483048064e-05, "loss": 1.4969, "step": 8326 }, { "epoch": 1.5000225174510247, "grad_norm": 1.3578238487243652, "learning_rate": 1.466469403236238e-05, "loss": 1.3798, "step": 8327 }, { "epoch": 1.5002026570592208, "grad_norm": 1.335727334022522, "learning_rate": 1.4654676068825169e-05, "loss": 1.2296, "step": 8328 }, { "epoch": 1.5003827966674173, "grad_norm": 1.554796576499939, "learning_rate": 1.4644660940672627e-05, "loss": 1.2578, "step": 8329 }, { "epoch": 1.5005629362756134, "grad_norm": 1.4066554307937622, "learning_rate": 1.4634648648708165e-05, "loss": 1.3122, "step": 8330 }, { "epoch": 1.5007430758838098, "grad_norm": 1.4520604610443115, "learning_rate": 1.4624639193734962e-05, "loss": 1.5269, "step": 8331 }, { "epoch": 1.5009232154920062, "grad_norm": 1.507185697555542, "learning_rate": 1.4614632576555954e-05, "loss": 1.3771, "step": 8332 }, { "epoch": 1.5011033551002027, "grad_norm": 1.4310791492462158, "learning_rate": 1.46046287979739e-05, "loss": 1.2709, "step": 8333 }, { "epoch": 1.501283494708399, "grad_norm": 1.4253461360931396, "learning_rate": 1.4594627858791287e-05, "loss": 1.3773, "step": 8334 }, { "epoch": 1.5014636343165955, "grad_norm": 1.449432134628296, "learning_rate": 1.458462975981038e-05, "loss": 1.4128, "step": 8335 }, { "epoch": 1.5016437739247919, "grad_norm": 1.4495176076889038, "learning_rate": 1.4574634501833224e-05, "loss": 1.3953, "step": 8336 }, { "epoch": 1.501823913532988, "grad_norm": 1.549580693244934, "learning_rate": 1.4564642085661634e-05, "loss": 1.5367, "step": 8337 }, { "epoch": 1.5020040531411845, "grad_norm": 1.4680416584014893, "learning_rate": 1.4554652512097194e-05, "loss": 1.3608, "step": 8338 }, { "epoch": 1.5021841927493806, "grad_norm": 1.7308632135391235, "learning_rate": 1.4544665781941263e-05, "loss": 1.5143, "step": 8339 }, { "epoch": 1.502364332357577, "grad_norm": 1.5684367418289185, "learning_rate": 1.4534681895994978e-05, "loss": 1.2402, "step": 8340 }, { "epoch": 1.5025444719657735, "grad_norm": 1.4874870777130127, "learning_rate": 1.452470085505922e-05, "loss": 1.3738, "step": 8341 }, { "epoch": 1.5027246115739699, "grad_norm": 1.5809177160263062, "learning_rate": 1.4514722659934693e-05, "loss": 1.3483, "step": 8342 }, { "epoch": 1.5029047511821663, "grad_norm": 1.3894163370132446, "learning_rate": 1.4504747311421835e-05, "loss": 1.3323, "step": 8343 }, { "epoch": 1.5030848907903627, "grad_norm": 1.480005145072937, "learning_rate": 1.4494774810320855e-05, "loss": 1.3526, "step": 8344 }, { "epoch": 1.5032650303985589, "grad_norm": 1.4905613660812378, "learning_rate": 1.4484805157431747e-05, "loss": 1.2468, "step": 8345 }, { "epoch": 1.5034451700067553, "grad_norm": 1.3716827630996704, "learning_rate": 1.4474838353554266e-05, "loss": 1.3311, "step": 8346 }, { "epoch": 1.5036253096149514, "grad_norm": 1.5514984130859375, "learning_rate": 1.4464874399487955e-05, "loss": 1.3753, "step": 8347 }, { "epoch": 1.5038054492231478, "grad_norm": 1.5787135362625122, "learning_rate": 1.4454913296032108e-05, "loss": 1.4103, "step": 8348 }, { "epoch": 1.5039855888313443, "grad_norm": 1.5407532453536987, "learning_rate": 1.4444955043985803e-05, "loss": 1.3342, "step": 8349 }, { "epoch": 1.5041657284395407, "grad_norm": 1.5277503728866577, "learning_rate": 1.4434999644147867e-05, "loss": 1.1601, "step": 8350 }, { "epoch": 1.504345868047737, "grad_norm": 1.4100755453109741, "learning_rate": 1.4425047097316957e-05, "loss": 1.1101, "step": 8351 }, { "epoch": 1.5045260076559335, "grad_norm": 1.469811201095581, "learning_rate": 1.4415097404291438e-05, "loss": 1.9185, "step": 8352 }, { "epoch": 1.5047061472641297, "grad_norm": 1.3146687746047974, "learning_rate": 1.4405150565869473e-05, "loss": 1.6853, "step": 8353 }, { "epoch": 1.504886286872326, "grad_norm": 1.4314987659454346, "learning_rate": 1.4395206582848997e-05, "loss": 2.0995, "step": 8354 }, { "epoch": 1.5050664264805222, "grad_norm": 1.3999438285827637, "learning_rate": 1.4385265456027685e-05, "loss": 1.7929, "step": 8355 }, { "epoch": 1.5052465660887187, "grad_norm": 1.340320348739624, "learning_rate": 1.4375327186203063e-05, "loss": 1.7079, "step": 8356 }, { "epoch": 1.505426705696915, "grad_norm": 1.5397825241088867, "learning_rate": 1.4365391774172327e-05, "loss": 1.5989, "step": 8357 }, { "epoch": 1.5056068453051115, "grad_norm": 1.6149042844772339, "learning_rate": 1.435545922073251e-05, "loss": 1.9178, "step": 8358 }, { "epoch": 1.5057869849133079, "grad_norm": 1.5382217168807983, "learning_rate": 1.4345529526680385e-05, "loss": 1.7802, "step": 8359 }, { "epoch": 1.5059671245215043, "grad_norm": 1.7813255786895752, "learning_rate": 1.4335602692812506e-05, "loss": 2.0355, "step": 8360 }, { "epoch": 1.5061472641297007, "grad_norm": 1.8131059408187866, "learning_rate": 1.4325678719925217e-05, "loss": 1.9474, "step": 8361 }, { "epoch": 1.5063274037378969, "grad_norm": 1.4389322996139526, "learning_rate": 1.4315757608814613e-05, "loss": 1.4792, "step": 8362 }, { "epoch": 1.5065075433460933, "grad_norm": 1.405253291130066, "learning_rate": 1.4305839360276541e-05, "loss": 1.6627, "step": 8363 }, { "epoch": 1.5066876829542895, "grad_norm": 1.3129521608352661, "learning_rate": 1.429592397510664e-05, "loss": 1.4173, "step": 8364 }, { "epoch": 1.5068678225624859, "grad_norm": 1.51478910446167, "learning_rate": 1.4286011454100345e-05, "loss": 1.7013, "step": 8365 }, { "epoch": 1.5070479621706823, "grad_norm": 1.4286258220672607, "learning_rate": 1.427610179805281e-05, "loss": 1.5803, "step": 8366 }, { "epoch": 1.5072281017788787, "grad_norm": 1.5034891366958618, "learning_rate": 1.4266195007758992e-05, "loss": 1.4936, "step": 8367 }, { "epoch": 1.507408241387075, "grad_norm": 1.4833680391311646, "learning_rate": 1.4256291084013623e-05, "loss": 1.498, "step": 8368 }, { "epoch": 1.5075883809952715, "grad_norm": 1.495717167854309, "learning_rate": 1.424639002761114e-05, "loss": 1.5051, "step": 8369 }, { "epoch": 1.5077685206034677, "grad_norm": 1.3419215679168701, "learning_rate": 1.4236491839345844e-05, "loss": 1.3533, "step": 8370 }, { "epoch": 1.507948660211664, "grad_norm": 1.4875454902648926, "learning_rate": 1.4226596520011759e-05, "loss": 1.6129, "step": 8371 }, { "epoch": 1.5081287998198603, "grad_norm": 1.600865125656128, "learning_rate": 1.4216704070402675e-05, "loss": 1.6304, "step": 8372 }, { "epoch": 1.5083089394280567, "grad_norm": 1.4899908304214478, "learning_rate": 1.420681449131216e-05, "loss": 1.2069, "step": 8373 }, { "epoch": 1.508489079036253, "grad_norm": 1.4615834951400757, "learning_rate": 1.4196927783533537e-05, "loss": 1.4899, "step": 8374 }, { "epoch": 1.5086692186444495, "grad_norm": 1.5631928443908691, "learning_rate": 1.4187043947859945e-05, "loss": 1.5518, "step": 8375 }, { "epoch": 1.5088493582526459, "grad_norm": 1.4729647636413574, "learning_rate": 1.4177162985084242e-05, "loss": 1.4203, "step": 8376 }, { "epoch": 1.5090294978608423, "grad_norm": 1.4309210777282715, "learning_rate": 1.4167284895999079e-05, "loss": 1.4526, "step": 8377 }, { "epoch": 1.5092096374690385, "grad_norm": 1.4715665578842163, "learning_rate": 1.4157409681396866e-05, "loss": 1.3414, "step": 8378 }, { "epoch": 1.5093897770772349, "grad_norm": 1.4777817726135254, "learning_rate": 1.414753734206979e-05, "loss": 1.5413, "step": 8379 }, { "epoch": 1.509569916685431, "grad_norm": 1.5771071910858154, "learning_rate": 1.4137667878809802e-05, "loss": 1.3945, "step": 8380 }, { "epoch": 1.5097500562936275, "grad_norm": 1.687414526939392, "learning_rate": 1.4127801292408633e-05, "loss": 1.5659, "step": 8381 }, { "epoch": 1.5099301959018239, "grad_norm": 1.4822789430618286, "learning_rate": 1.411793758365777e-05, "loss": 1.3918, "step": 8382 }, { "epoch": 1.5101103355100203, "grad_norm": 1.4594035148620605, "learning_rate": 1.4108076753348465e-05, "loss": 1.4652, "step": 8383 }, { "epoch": 1.5102904751182167, "grad_norm": 1.484213948249817, "learning_rate": 1.4098218802271773e-05, "loss": 1.4436, "step": 8384 }, { "epoch": 1.510470614726413, "grad_norm": 1.4155118465423584, "learning_rate": 1.4088363731218479e-05, "loss": 1.2711, "step": 8385 }, { "epoch": 1.5106507543346093, "grad_norm": 1.5056843757629395, "learning_rate": 1.4078511540979156e-05, "loss": 1.4301, "step": 8386 }, { "epoch": 1.5108308939428057, "grad_norm": 1.61029851436615, "learning_rate": 1.4068662232344143e-05, "loss": 1.5778, "step": 8387 }, { "epoch": 1.511011033551002, "grad_norm": 1.5662527084350586, "learning_rate": 1.4058815806103542e-05, "loss": 1.4208, "step": 8388 }, { "epoch": 1.5111911731591983, "grad_norm": 1.535174012184143, "learning_rate": 1.4048972263047228e-05, "loss": 1.6055, "step": 8389 }, { "epoch": 1.5113713127673947, "grad_norm": 1.526949405670166, "learning_rate": 1.403913160396485e-05, "loss": 1.2925, "step": 8390 }, { "epoch": 1.511551452375591, "grad_norm": 1.6564172506332397, "learning_rate": 1.4029293829645823e-05, "loss": 1.5368, "step": 8391 }, { "epoch": 1.5117315919837875, "grad_norm": 1.6053402423858643, "learning_rate": 1.4019458940879304e-05, "loss": 1.477, "step": 8392 }, { "epoch": 1.5119117315919839, "grad_norm": 1.4639637470245361, "learning_rate": 1.4009626938454279e-05, "loss": 1.2375, "step": 8393 }, { "epoch": 1.5120918712001803, "grad_norm": 1.4958629608154297, "learning_rate": 1.3999797823159455e-05, "loss": 1.4628, "step": 8394 }, { "epoch": 1.5122720108083765, "grad_norm": 1.4583030939102173, "learning_rate": 1.3989971595783313e-05, "loss": 1.2396, "step": 8395 }, { "epoch": 1.5124521504165729, "grad_norm": 1.4834370613098145, "learning_rate": 1.3980148257114107e-05, "loss": 1.2876, "step": 8396 }, { "epoch": 1.512632290024769, "grad_norm": 1.7045390605926514, "learning_rate": 1.3970327807939849e-05, "loss": 1.5498, "step": 8397 }, { "epoch": 1.5128124296329655, "grad_norm": 1.590372920036316, "learning_rate": 1.3960510249048358e-05, "loss": 1.3776, "step": 8398 }, { "epoch": 1.5129925692411619, "grad_norm": 1.6044691801071167, "learning_rate": 1.3950695581227186e-05, "loss": 1.2668, "step": 8399 }, { "epoch": 1.5131727088493583, "grad_norm": 1.6583843231201172, "learning_rate": 1.394088380526365e-05, "loss": 1.6151, "step": 8400 }, { "epoch": 1.5133528484575547, "grad_norm": 1.5915814638137817, "learning_rate": 1.393107492194487e-05, "loss": 1.6831, "step": 8401 }, { "epoch": 1.513532988065751, "grad_norm": 1.2983510494232178, "learning_rate": 1.3921268932057663e-05, "loss": 1.7787, "step": 8402 }, { "epoch": 1.5137131276739473, "grad_norm": 1.352957844734192, "learning_rate": 1.3911465836388699e-05, "loss": 1.7723, "step": 8403 }, { "epoch": 1.5138932672821437, "grad_norm": 1.4373608827590942, "learning_rate": 1.390166563572437e-05, "loss": 1.7958, "step": 8404 }, { "epoch": 1.5140734068903399, "grad_norm": 1.3368573188781738, "learning_rate": 1.3891868330850838e-05, "loss": 1.8473, "step": 8405 }, { "epoch": 1.5142535464985363, "grad_norm": 1.392109751701355, "learning_rate": 1.3882073922554029e-05, "loss": 1.8177, "step": 8406 }, { "epoch": 1.5144336861067327, "grad_norm": 1.4237622022628784, "learning_rate": 1.3872282411619669e-05, "loss": 1.5074, "step": 8407 }, { "epoch": 1.514613825714929, "grad_norm": 1.5282012224197388, "learning_rate": 1.386249379883322e-05, "loss": 2.0453, "step": 8408 }, { "epoch": 1.5147939653231255, "grad_norm": 1.5520250797271729, "learning_rate": 1.3852708084979921e-05, "loss": 1.5891, "step": 8409 }, { "epoch": 1.514974104931322, "grad_norm": 1.579027771949768, "learning_rate": 1.3842925270844764e-05, "loss": 1.7107, "step": 8410 }, { "epoch": 1.515154244539518, "grad_norm": 1.7695492506027222, "learning_rate": 1.3833145357212535e-05, "loss": 1.8077, "step": 8411 }, { "epoch": 1.5153343841477145, "grad_norm": 1.5642077922821045, "learning_rate": 1.3823368344867765e-05, "loss": 1.4737, "step": 8412 }, { "epoch": 1.5155145237559107, "grad_norm": 1.3876491785049438, "learning_rate": 1.3813594234594773e-05, "loss": 1.3701, "step": 8413 }, { "epoch": 1.515694663364107, "grad_norm": 1.4011150598526, "learning_rate": 1.380382302717762e-05, "loss": 1.4581, "step": 8414 }, { "epoch": 1.5158748029723035, "grad_norm": 1.4258620738983154, "learning_rate": 1.3794054723400152e-05, "loss": 1.4507, "step": 8415 }, { "epoch": 1.5160549425804999, "grad_norm": 1.419507622718811, "learning_rate": 1.3784289324045969e-05, "loss": 1.2879, "step": 8416 }, { "epoch": 1.5162350821886963, "grad_norm": 1.3497275114059448, "learning_rate": 1.3774526829898465e-05, "loss": 1.1851, "step": 8417 }, { "epoch": 1.5164152217968927, "grad_norm": 1.3190562725067139, "learning_rate": 1.3764767241740778e-05, "loss": 1.3401, "step": 8418 }, { "epoch": 1.516595361405089, "grad_norm": 1.534015417098999, "learning_rate": 1.3755010560355814e-05, "loss": 1.5609, "step": 8419 }, { "epoch": 1.5167755010132853, "grad_norm": 1.3188953399658203, "learning_rate": 1.3745256786526235e-05, "loss": 1.256, "step": 8420 }, { "epoch": 1.5169556406214817, "grad_norm": 1.4393165111541748, "learning_rate": 1.3735505921034526e-05, "loss": 1.2934, "step": 8421 }, { "epoch": 1.5171357802296779, "grad_norm": 1.4774068593978882, "learning_rate": 1.372575796466285e-05, "loss": 1.5429, "step": 8422 }, { "epoch": 1.5173159198378743, "grad_norm": 1.5270869731903076, "learning_rate": 1.3716012918193205e-05, "loss": 1.6515, "step": 8423 }, { "epoch": 1.5174960594460707, "grad_norm": 1.541034460067749, "learning_rate": 1.3706270782407326e-05, "loss": 1.6425, "step": 8424 }, { "epoch": 1.517676199054267, "grad_norm": 1.4326303005218506, "learning_rate": 1.3696531558086717e-05, "loss": 1.3795, "step": 8425 }, { "epoch": 1.5178563386624635, "grad_norm": 1.4946346282958984, "learning_rate": 1.3686795246012673e-05, "loss": 1.4344, "step": 8426 }, { "epoch": 1.51803647827066, "grad_norm": 1.567612648010254, "learning_rate": 1.3677061846966233e-05, "loss": 1.6448, "step": 8427 }, { "epoch": 1.518216617878856, "grad_norm": 1.353171944618225, "learning_rate": 1.3667331361728197e-05, "loss": 1.1792, "step": 8428 }, { "epoch": 1.5183967574870525, "grad_norm": 1.4202461242675781, "learning_rate": 1.3657603791079143e-05, "loss": 1.2336, "step": 8429 }, { "epoch": 1.5185768970952487, "grad_norm": 1.4545245170593262, "learning_rate": 1.3647879135799396e-05, "loss": 1.4073, "step": 8430 }, { "epoch": 1.518757036703445, "grad_norm": 1.5074716806411743, "learning_rate": 1.3638157396669088e-05, "loss": 1.5228, "step": 8431 }, { "epoch": 1.5189371763116415, "grad_norm": 1.3752111196517944, "learning_rate": 1.362843857446809e-05, "loss": 1.4161, "step": 8432 }, { "epoch": 1.519117315919838, "grad_norm": 1.363057255744934, "learning_rate": 1.3618722669976047e-05, "loss": 1.286, "step": 8433 }, { "epoch": 1.5192974555280343, "grad_norm": 1.432416319847107, "learning_rate": 1.3609009683972318e-05, "loss": 1.1755, "step": 8434 }, { "epoch": 1.5194775951362307, "grad_norm": 1.5536136627197266, "learning_rate": 1.3599299617236122e-05, "loss": 1.5197, "step": 8435 }, { "epoch": 1.519657734744427, "grad_norm": 1.5242769718170166, "learning_rate": 1.3589592470546381e-05, "loss": 1.4136, "step": 8436 }, { "epoch": 1.5198378743526233, "grad_norm": 1.5510059595108032, "learning_rate": 1.3579888244681798e-05, "loss": 1.3505, "step": 8437 }, { "epoch": 1.5200180139608195, "grad_norm": 1.4648244380950928, "learning_rate": 1.3570186940420836e-05, "loss": 1.2745, "step": 8438 }, { "epoch": 1.5201981535690159, "grad_norm": 1.4273651838302612, "learning_rate": 1.3560488558541724e-05, "loss": 1.2076, "step": 8439 }, { "epoch": 1.5203782931772123, "grad_norm": 1.5484436750411987, "learning_rate": 1.3550793099822484e-05, "loss": 1.452, "step": 8440 }, { "epoch": 1.5205584327854087, "grad_norm": 1.5063382387161255, "learning_rate": 1.3541100565040871e-05, "loss": 1.246, "step": 8441 }, { "epoch": 1.520738572393605, "grad_norm": 1.4981944561004639, "learning_rate": 1.3531410954974406e-05, "loss": 1.2674, "step": 8442 }, { "epoch": 1.5209187120018015, "grad_norm": 1.6496658325195312, "learning_rate": 1.3521724270400393e-05, "loss": 1.5407, "step": 8443 }, { "epoch": 1.5210988516099977, "grad_norm": 1.4470162391662598, "learning_rate": 1.3512040512095891e-05, "loss": 1.412, "step": 8444 }, { "epoch": 1.521278991218194, "grad_norm": 1.5615785121917725, "learning_rate": 1.3502359680837728e-05, "loss": 1.4588, "step": 8445 }, { "epoch": 1.5214591308263905, "grad_norm": 1.448725938796997, "learning_rate": 1.3492681777402494e-05, "loss": 1.1237, "step": 8446 }, { "epoch": 1.5216392704345867, "grad_norm": 1.5661933422088623, "learning_rate": 1.3483006802566544e-05, "loss": 1.2942, "step": 8447 }, { "epoch": 1.521819410042783, "grad_norm": 1.6213743686676025, "learning_rate": 1.3473334757105988e-05, "loss": 1.3139, "step": 8448 }, { "epoch": 1.5219995496509795, "grad_norm": 1.6604522466659546, "learning_rate": 1.3463665641796741e-05, "loss": 1.2844, "step": 8449 }, { "epoch": 1.522179689259176, "grad_norm": 1.5329192876815796, "learning_rate": 1.345399945741444e-05, "loss": 1.2786, "step": 8450 }, { "epoch": 1.5223598288673723, "grad_norm": 1.6042360067367554, "learning_rate": 1.3444336204734504e-05, "loss": 1.3243, "step": 8451 }, { "epoch": 1.5225399684755687, "grad_norm": 1.5694210529327393, "learning_rate": 1.343467588453211e-05, "loss": 2.1818, "step": 8452 }, { "epoch": 1.522720108083765, "grad_norm": 1.4153255224227905, "learning_rate": 1.3425018497582204e-05, "loss": 1.6971, "step": 8453 }, { "epoch": 1.5229002476919613, "grad_norm": 1.4332348108291626, "learning_rate": 1.34153640446595e-05, "loss": 1.9152, "step": 8454 }, { "epoch": 1.5230803873001575, "grad_norm": 1.4661483764648438, "learning_rate": 1.3405712526538478e-05, "loss": 1.9448, "step": 8455 }, { "epoch": 1.523260526908354, "grad_norm": 1.3632382154464722, "learning_rate": 1.3396063943993365e-05, "loss": 1.4259, "step": 8456 }, { "epoch": 1.5234406665165503, "grad_norm": 1.4935468435287476, "learning_rate": 1.3386418297798165e-05, "loss": 1.5784, "step": 8457 }, { "epoch": 1.5236208061247467, "grad_norm": 1.4813027381896973, "learning_rate": 1.3376775588726664e-05, "loss": 2.0049, "step": 8458 }, { "epoch": 1.5238009457329431, "grad_norm": 1.437522530555725, "learning_rate": 1.336713581755239e-05, "loss": 1.6885, "step": 8459 }, { "epoch": 1.5239810853411395, "grad_norm": 1.6935616731643677, "learning_rate": 1.3357498985048638e-05, "loss": 2.0892, "step": 8460 }, { "epoch": 1.5241612249493357, "grad_norm": 1.7641370296478271, "learning_rate": 1.3347865091988465e-05, "loss": 1.8358, "step": 8461 }, { "epoch": 1.524341364557532, "grad_norm": 1.5443445444107056, "learning_rate": 1.333823413914469e-05, "loss": 1.4988, "step": 8462 }, { "epoch": 1.5245215041657283, "grad_norm": 1.286849856376648, "learning_rate": 1.3328606127289934e-05, "loss": 1.2725, "step": 8463 }, { "epoch": 1.5247016437739247, "grad_norm": 1.5175888538360596, "learning_rate": 1.3318981057196529e-05, "loss": 1.5116, "step": 8464 }, { "epoch": 1.524881783382121, "grad_norm": 1.5164495706558228, "learning_rate": 1.3309358929636612e-05, "loss": 1.4129, "step": 8465 }, { "epoch": 1.5250619229903175, "grad_norm": 1.4837632179260254, "learning_rate": 1.3299739745382033e-05, "loss": 1.2452, "step": 8466 }, { "epoch": 1.525242062598514, "grad_norm": 1.3592618703842163, "learning_rate": 1.3290123505204444e-05, "loss": 1.3376, "step": 8467 }, { "epoch": 1.5254222022067103, "grad_norm": 1.4977359771728516, "learning_rate": 1.328051020987528e-05, "loss": 1.5322, "step": 8468 }, { "epoch": 1.5256023418149065, "grad_norm": 1.494917631149292, "learning_rate": 1.3270899860165698e-05, "loss": 1.6695, "step": 8469 }, { "epoch": 1.525782481423103, "grad_norm": 1.4956611394882202, "learning_rate": 1.3261292456846647e-05, "loss": 1.4165, "step": 8470 }, { "epoch": 1.525962621031299, "grad_norm": 1.5039005279541016, "learning_rate": 1.32516880006888e-05, "loss": 1.5244, "step": 8471 }, { "epoch": 1.5261427606394955, "grad_norm": 1.2097077369689941, "learning_rate": 1.3242086492462658e-05, "loss": 1.0662, "step": 8472 }, { "epoch": 1.526322900247692, "grad_norm": 1.3735398054122925, "learning_rate": 1.3232487932938431e-05, "loss": 1.3092, "step": 8473 }, { "epoch": 1.5265030398558883, "grad_norm": 1.441636562347412, "learning_rate": 1.3222892322886115e-05, "loss": 1.5211, "step": 8474 }, { "epoch": 1.5266831794640847, "grad_norm": 1.4389063119888306, "learning_rate": 1.3213299663075468e-05, "loss": 1.3159, "step": 8475 }, { "epoch": 1.5268633190722811, "grad_norm": 1.383254051208496, "learning_rate": 1.3203709954276e-05, "loss": 1.2766, "step": 8476 }, { "epoch": 1.5270434586804775, "grad_norm": 1.38627290725708, "learning_rate": 1.3194123197256996e-05, "loss": 1.3993, "step": 8477 }, { "epoch": 1.5272235982886737, "grad_norm": 1.471824049949646, "learning_rate": 1.3184539392787505e-05, "loss": 1.4703, "step": 8478 }, { "epoch": 1.5274037378968701, "grad_norm": 1.5234025716781616, "learning_rate": 1.3174958541636328e-05, "loss": 1.4725, "step": 8479 }, { "epoch": 1.5275838775050663, "grad_norm": 1.4237037897109985, "learning_rate": 1.316538064457204e-05, "loss": 1.4489, "step": 8480 }, { "epoch": 1.5277640171132627, "grad_norm": 1.7419742345809937, "learning_rate": 1.3155805702362961e-05, "loss": 1.6456, "step": 8481 }, { "epoch": 1.5279441567214591, "grad_norm": 1.4707708358764648, "learning_rate": 1.3146233715777218e-05, "loss": 1.3318, "step": 8482 }, { "epoch": 1.5281242963296555, "grad_norm": 1.4871906042099, "learning_rate": 1.3136664685582656e-05, "loss": 1.375, "step": 8483 }, { "epoch": 1.528304435937852, "grad_norm": 1.5011922121047974, "learning_rate": 1.3127098612546895e-05, "loss": 1.5781, "step": 8484 }, { "epoch": 1.5284845755460483, "grad_norm": 1.263919472694397, "learning_rate": 1.3117535497437327e-05, "loss": 1.1398, "step": 8485 }, { "epoch": 1.5286647151542445, "grad_norm": 1.6035559177398682, "learning_rate": 1.3107975341021094e-05, "loss": 1.4106, "step": 8486 }, { "epoch": 1.528844854762441, "grad_norm": 1.5318305492401123, "learning_rate": 1.3098418144065111e-05, "loss": 1.456, "step": 8487 }, { "epoch": 1.529024994370637, "grad_norm": 1.5810279846191406, "learning_rate": 1.3088863907336047e-05, "loss": 1.6726, "step": 8488 }, { "epoch": 1.5292051339788335, "grad_norm": 1.5620367527008057, "learning_rate": 1.3079312631600344e-05, "loss": 1.4859, "step": 8489 }, { "epoch": 1.52938527358703, "grad_norm": 1.5283035039901733, "learning_rate": 1.3069764317624183e-05, "loss": 1.3507, "step": 8490 }, { "epoch": 1.5295654131952263, "grad_norm": 1.5750842094421387, "learning_rate": 1.3060218966173554e-05, "loss": 1.4837, "step": 8491 }, { "epoch": 1.5297455528034227, "grad_norm": 1.3221359252929688, "learning_rate": 1.305067657801417e-05, "loss": 1.1077, "step": 8492 }, { "epoch": 1.5299256924116191, "grad_norm": 1.6749347448349, "learning_rate": 1.3041137153911515e-05, "loss": 1.4105, "step": 8493 }, { "epoch": 1.5301058320198153, "grad_norm": 1.623597264289856, "learning_rate": 1.303160069463083e-05, "loss": 1.3625, "step": 8494 }, { "epoch": 1.5302859716280117, "grad_norm": 1.5514707565307617, "learning_rate": 1.3022067200937116e-05, "loss": 1.5256, "step": 8495 }, { "epoch": 1.530466111236208, "grad_norm": 1.5476737022399902, "learning_rate": 1.3012536673595177e-05, "loss": 1.3902, "step": 8496 }, { "epoch": 1.5306462508444043, "grad_norm": 1.446799635887146, "learning_rate": 1.300300911336953e-05, "loss": 1.3214, "step": 8497 }, { "epoch": 1.5308263904526007, "grad_norm": 1.6114540100097656, "learning_rate": 1.299348452102449e-05, "loss": 1.3176, "step": 8498 }, { "epoch": 1.5310065300607971, "grad_norm": 1.3178235292434692, "learning_rate": 1.2983962897324059e-05, "loss": 1.1009, "step": 8499 }, { "epoch": 1.5311866696689935, "grad_norm": 1.5109543800354004, "learning_rate": 1.2974444243032114e-05, "loss": 1.2304, "step": 8500 }, { "epoch": 1.53136680927719, "grad_norm": 1.3390576839447021, "learning_rate": 1.296492855891222e-05, "loss": 1.1027, "step": 8501 }, { "epoch": 1.5315469488853863, "grad_norm": 1.2594733238220215, "learning_rate": 1.2955415845727714e-05, "loss": 1.5455, "step": 8502 }, { "epoch": 1.5317270884935825, "grad_norm": 1.3651126623153687, "learning_rate": 1.2945906104241706e-05, "loss": 1.8172, "step": 8503 }, { "epoch": 1.531907228101779, "grad_norm": 1.351478099822998, "learning_rate": 1.2936399335217042e-05, "loss": 1.7814, "step": 8504 }, { "epoch": 1.5320873677099751, "grad_norm": 1.3493454456329346, "learning_rate": 1.2926895539416394e-05, "loss": 1.4755, "step": 8505 }, { "epoch": 1.5322675073181715, "grad_norm": 1.450313687324524, "learning_rate": 1.2917394717602121e-05, "loss": 1.7402, "step": 8506 }, { "epoch": 1.532447646926368, "grad_norm": 1.495362639427185, "learning_rate": 1.2907896870536385e-05, "loss": 1.9777, "step": 8507 }, { "epoch": 1.5326277865345643, "grad_norm": 1.3716552257537842, "learning_rate": 1.2898401998981096e-05, "loss": 1.6616, "step": 8508 }, { "epoch": 1.5328079261427607, "grad_norm": 1.6226818561553955, "learning_rate": 1.2888910103697926e-05, "loss": 1.8247, "step": 8509 }, { "epoch": 1.5329880657509571, "grad_norm": 1.6547714471817017, "learning_rate": 1.2879421185448309e-05, "loss": 1.5338, "step": 8510 }, { "epoch": 1.5331682053591533, "grad_norm": 1.6544560194015503, "learning_rate": 1.2869935244993447e-05, "loss": 1.8283, "step": 8511 }, { "epoch": 1.5333483449673497, "grad_norm": 2.0752575397491455, "learning_rate": 1.2860452283094293e-05, "loss": 1.9163, "step": 8512 }, { "epoch": 1.533528484575546, "grad_norm": 1.375159740447998, "learning_rate": 1.285097230051155e-05, "loss": 1.4159, "step": 8513 }, { "epoch": 1.5337086241837423, "grad_norm": 1.5268782377243042, "learning_rate": 1.2841495298005734e-05, "loss": 1.5116, "step": 8514 }, { "epoch": 1.5338887637919387, "grad_norm": 1.589659571647644, "learning_rate": 1.2832021276337065e-05, "loss": 1.5728, "step": 8515 }, { "epoch": 1.5340689034001351, "grad_norm": 1.4498947858810425, "learning_rate": 1.282255023626554e-05, "loss": 1.4522, "step": 8516 }, { "epoch": 1.5342490430083315, "grad_norm": 1.3239405155181885, "learning_rate": 1.2813082178550929e-05, "loss": 1.2979, "step": 8517 }, { "epoch": 1.534429182616528, "grad_norm": 1.4620486497879028, "learning_rate": 1.2803617103952752e-05, "loss": 1.3065, "step": 8518 }, { "epoch": 1.5346093222247241, "grad_norm": 1.4922089576721191, "learning_rate": 1.2794155013230296e-05, "loss": 1.5853, "step": 8519 }, { "epoch": 1.5347894618329205, "grad_norm": 1.3442966938018799, "learning_rate": 1.2784695907142602e-05, "loss": 1.3448, "step": 8520 }, { "epoch": 1.5349696014411167, "grad_norm": 1.5153357982635498, "learning_rate": 1.2775239786448473e-05, "loss": 1.6984, "step": 8521 }, { "epoch": 1.5351497410493131, "grad_norm": 1.4586541652679443, "learning_rate": 1.2765786651906481e-05, "loss": 1.1627, "step": 8522 }, { "epoch": 1.5353298806575095, "grad_norm": 1.424187421798706, "learning_rate": 1.275633650427493e-05, "loss": 1.3935, "step": 8523 }, { "epoch": 1.535510020265706, "grad_norm": 1.5735300779342651, "learning_rate": 1.274688934431194e-05, "loss": 1.3414, "step": 8524 }, { "epoch": 1.5356901598739023, "grad_norm": 1.4543471336364746, "learning_rate": 1.2737445172775342e-05, "loss": 1.3258, "step": 8525 }, { "epoch": 1.5358702994820987, "grad_norm": 1.348741888999939, "learning_rate": 1.2728003990422737e-05, "loss": 1.2971, "step": 8526 }, { "epoch": 1.536050439090295, "grad_norm": 1.3355053663253784, "learning_rate": 1.271856579801149e-05, "loss": 1.2999, "step": 8527 }, { "epoch": 1.5362305786984913, "grad_norm": 1.512103796005249, "learning_rate": 1.2709130596298746e-05, "loss": 1.6508, "step": 8528 }, { "epoch": 1.5364107183066875, "grad_norm": 1.4918079376220703, "learning_rate": 1.269969838604138e-05, "loss": 1.4288, "step": 8529 }, { "epoch": 1.536590857914884, "grad_norm": 1.3475688695907593, "learning_rate": 1.269026916799606e-05, "loss": 1.2065, "step": 8530 }, { "epoch": 1.5367709975230803, "grad_norm": 1.5625334978103638, "learning_rate": 1.2680842942919158e-05, "loss": 1.2757, "step": 8531 }, { "epoch": 1.5369511371312767, "grad_norm": 1.5092414617538452, "learning_rate": 1.267141971156684e-05, "loss": 1.3177, "step": 8532 }, { "epoch": 1.5371312767394731, "grad_norm": 1.4448717832565308, "learning_rate": 1.2661999474695069e-05, "loss": 1.3904, "step": 8533 }, { "epoch": 1.5373114163476695, "grad_norm": 1.5039159059524536, "learning_rate": 1.2652582233059512e-05, "loss": 1.4093, "step": 8534 }, { "epoch": 1.537491555955866, "grad_norm": 1.530164361000061, "learning_rate": 1.2643167987415622e-05, "loss": 1.5026, "step": 8535 }, { "epoch": 1.5376716955640621, "grad_norm": 1.509382963180542, "learning_rate": 1.2633756738518593e-05, "loss": 1.3776, "step": 8536 }, { "epoch": 1.5378518351722585, "grad_norm": 1.6182764768600464, "learning_rate": 1.2624348487123388e-05, "loss": 1.4133, "step": 8537 }, { "epoch": 1.5380319747804547, "grad_norm": 1.5430703163146973, "learning_rate": 1.2614943233984755e-05, "loss": 1.5458, "step": 8538 }, { "epoch": 1.5382121143886511, "grad_norm": 1.3606374263763428, "learning_rate": 1.2605540979857167e-05, "loss": 1.2496, "step": 8539 }, { "epoch": 1.5383922539968475, "grad_norm": 1.6063823699951172, "learning_rate": 1.2596141725494865e-05, "loss": 1.4912, "step": 8540 }, { "epoch": 1.538572393605044, "grad_norm": 1.3954979181289673, "learning_rate": 1.2586745471651856e-05, "loss": 1.2741, "step": 8541 }, { "epoch": 1.5387525332132403, "grad_norm": 1.4321208000183105, "learning_rate": 1.2577352219081905e-05, "loss": 1.3587, "step": 8542 }, { "epoch": 1.5389326728214368, "grad_norm": 1.566691517829895, "learning_rate": 1.2567961968538528e-05, "loss": 1.4978, "step": 8543 }, { "epoch": 1.539112812429633, "grad_norm": 1.4372515678405762, "learning_rate": 1.2558574720775013e-05, "loss": 1.2638, "step": 8544 }, { "epoch": 1.5392929520378293, "grad_norm": 1.4506967067718506, "learning_rate": 1.2549190476544393e-05, "loss": 1.2999, "step": 8545 }, { "epoch": 1.5394730916460255, "grad_norm": 1.5573097467422485, "learning_rate": 1.2539809236599453e-05, "loss": 1.4117, "step": 8546 }, { "epoch": 1.539653231254222, "grad_norm": 1.500807285308838, "learning_rate": 1.2530431001692795e-05, "loss": 1.2566, "step": 8547 }, { "epoch": 1.5398333708624183, "grad_norm": 1.485992670059204, "learning_rate": 1.2521055772576701e-05, "loss": 1.188, "step": 8548 }, { "epoch": 1.5400135104706147, "grad_norm": 1.6053001880645752, "learning_rate": 1.2511683550003261e-05, "loss": 1.2021, "step": 8549 }, { "epoch": 1.5401936500788111, "grad_norm": 1.5648465156555176, "learning_rate": 1.2502314334724313e-05, "loss": 1.3417, "step": 8550 }, { "epoch": 1.5403737896870076, "grad_norm": 1.3934253454208374, "learning_rate": 1.2492948127491438e-05, "loss": 1.0801, "step": 8551 }, { "epoch": 1.5405539292952037, "grad_norm": 1.1661428213119507, "learning_rate": 1.2483584929055997e-05, "loss": 1.5584, "step": 8552 }, { "epoch": 1.5407340689034001, "grad_norm": 1.3349653482437134, "learning_rate": 1.24742247401691e-05, "loss": 1.7135, "step": 8553 }, { "epoch": 1.5409142085115963, "grad_norm": 1.3491472005844116, "learning_rate": 1.2464867561581612e-05, "loss": 1.6945, "step": 8554 }, { "epoch": 1.5410943481197927, "grad_norm": 1.5111083984375, "learning_rate": 1.2455513394044156e-05, "loss": 1.9851, "step": 8555 }, { "epoch": 1.5412744877279891, "grad_norm": 1.4901374578475952, "learning_rate": 1.244616223830714e-05, "loss": 1.8563, "step": 8556 }, { "epoch": 1.5414546273361855, "grad_norm": 1.4646332263946533, "learning_rate": 1.2436814095120696e-05, "loss": 2.0575, "step": 8557 }, { "epoch": 1.541634766944382, "grad_norm": 1.5264774560928345, "learning_rate": 1.2427468965234735e-05, "loss": 1.6241, "step": 8558 }, { "epoch": 1.5418149065525784, "grad_norm": 1.4877851009368896, "learning_rate": 1.2418126849398909e-05, "loss": 1.7371, "step": 8559 }, { "epoch": 1.5419950461607748, "grad_norm": 1.741468906402588, "learning_rate": 1.2408787748362626e-05, "loss": 1.8813, "step": 8560 }, { "epoch": 1.542175185768971, "grad_norm": 1.8243359327316284, "learning_rate": 1.2399451662875094e-05, "loss": 1.8387, "step": 8561 }, { "epoch": 1.5423553253771674, "grad_norm": 1.5647733211517334, "learning_rate": 1.2390118593685235e-05, "loss": 1.3895, "step": 8562 }, { "epoch": 1.5425354649853635, "grad_norm": 1.4561927318572998, "learning_rate": 1.238078854154176e-05, "loss": 1.4689, "step": 8563 }, { "epoch": 1.54271560459356, "grad_norm": 1.5138221979141235, "learning_rate": 1.2371461507193078e-05, "loss": 1.4717, "step": 8564 }, { "epoch": 1.5428957442017563, "grad_norm": 1.284593939781189, "learning_rate": 1.2362137491387432e-05, "loss": 1.2359, "step": 8565 }, { "epoch": 1.5430758838099528, "grad_norm": 1.575137972831726, "learning_rate": 1.2352816494872793e-05, "loss": 1.5824, "step": 8566 }, { "epoch": 1.5432560234181492, "grad_norm": 1.377861499786377, "learning_rate": 1.2343498518396873e-05, "loss": 1.2198, "step": 8567 }, { "epoch": 1.5434361630263456, "grad_norm": 1.2628295421600342, "learning_rate": 1.2334183562707158e-05, "loss": 1.2969, "step": 8568 }, { "epoch": 1.5436163026345417, "grad_norm": 1.365851879119873, "learning_rate": 1.2324871628550877e-05, "loss": 1.3515, "step": 8569 }, { "epoch": 1.5437964422427382, "grad_norm": 1.325805902481079, "learning_rate": 1.2315562716675062e-05, "loss": 1.1759, "step": 8570 }, { "epoch": 1.5439765818509343, "grad_norm": 1.4242055416107178, "learning_rate": 1.2306256827826445e-05, "loss": 1.3285, "step": 8571 }, { "epoch": 1.5441567214591307, "grad_norm": 1.4825645685195923, "learning_rate": 1.229695396275155e-05, "loss": 1.4229, "step": 8572 }, { "epoch": 1.5443368610673271, "grad_norm": 1.4225372076034546, "learning_rate": 1.228765412219664e-05, "loss": 1.3283, "step": 8573 }, { "epoch": 1.5445170006755236, "grad_norm": 1.410843014717102, "learning_rate": 1.227835730690775e-05, "loss": 1.2663, "step": 8574 }, { "epoch": 1.54469714028372, "grad_norm": 1.3840010166168213, "learning_rate": 1.2269063517630663e-05, "loss": 1.3268, "step": 8575 }, { "epoch": 1.5448772798919164, "grad_norm": 1.4351036548614502, "learning_rate": 1.225977275511092e-05, "loss": 1.3796, "step": 8576 }, { "epoch": 1.5450574195001125, "grad_norm": 1.5183186531066895, "learning_rate": 1.2250485020093827e-05, "loss": 1.6787, "step": 8577 }, { "epoch": 1.545237559108309, "grad_norm": 1.4989542961120605, "learning_rate": 1.2241200313324425e-05, "loss": 1.4513, "step": 8578 }, { "epoch": 1.5454176987165051, "grad_norm": 1.4515091180801392, "learning_rate": 1.2231918635547556e-05, "loss": 1.437, "step": 8579 }, { "epoch": 1.5455978383247015, "grad_norm": 1.346286654472351, "learning_rate": 1.2222639987507778e-05, "loss": 1.1592, "step": 8580 }, { "epoch": 1.545777977932898, "grad_norm": 1.5514501333236694, "learning_rate": 1.2213364369949414e-05, "loss": 1.3718, "step": 8581 }, { "epoch": 1.5459581175410944, "grad_norm": 1.404780626296997, "learning_rate": 1.2204091783616562e-05, "loss": 1.3441, "step": 8582 }, { "epoch": 1.5461382571492908, "grad_norm": 1.6831082105636597, "learning_rate": 1.2194822229253051e-05, "loss": 1.5636, "step": 8583 }, { "epoch": 1.5463183967574872, "grad_norm": 1.6924959421157837, "learning_rate": 1.2185555707602492e-05, "loss": 1.5399, "step": 8584 }, { "epoch": 1.5464985363656834, "grad_norm": 1.4593991041183472, "learning_rate": 1.217629221940823e-05, "loss": 1.1899, "step": 8585 }, { "epoch": 1.5466786759738798, "grad_norm": 1.3241924047470093, "learning_rate": 1.216703176541339e-05, "loss": 1.0874, "step": 8586 }, { "epoch": 1.5468588155820762, "grad_norm": 1.5010648965835571, "learning_rate": 1.2157774346360828e-05, "loss": 1.3995, "step": 8587 }, { "epoch": 1.5470389551902723, "grad_norm": 1.7728914022445679, "learning_rate": 1.2148519962993166e-05, "loss": 1.6562, "step": 8588 }, { "epoch": 1.5472190947984688, "grad_norm": 1.4874451160430908, "learning_rate": 1.2139268616052807e-05, "loss": 1.3433, "step": 8589 }, { "epoch": 1.5473992344066652, "grad_norm": 1.477943778038025, "learning_rate": 1.2130020306281881e-05, "loss": 1.3239, "step": 8590 }, { "epoch": 1.5475793740148616, "grad_norm": 1.4550631046295166, "learning_rate": 1.2120775034422276e-05, "loss": 1.3155, "step": 8591 }, { "epoch": 1.547759513623058, "grad_norm": 1.3794419765472412, "learning_rate": 1.2111532801215636e-05, "loss": 1.1849, "step": 8592 }, { "epoch": 1.5479396532312544, "grad_norm": 1.4824434518814087, "learning_rate": 1.2102293607403391e-05, "loss": 1.4421, "step": 8593 }, { "epoch": 1.5481197928394506, "grad_norm": 1.513271689414978, "learning_rate": 1.2093057453726697e-05, "loss": 1.3528, "step": 8594 }, { "epoch": 1.548299932447647, "grad_norm": 1.6666795015335083, "learning_rate": 1.2083824340926486e-05, "loss": 1.5194, "step": 8595 }, { "epoch": 1.5484800720558431, "grad_norm": 1.5944799184799194, "learning_rate": 1.20745942697434e-05, "loss": 1.3517, "step": 8596 }, { "epoch": 1.5486602116640396, "grad_norm": 1.3408061265945435, "learning_rate": 1.2065367240917874e-05, "loss": 1.193, "step": 8597 }, { "epoch": 1.548840351272236, "grad_norm": 1.4287751913070679, "learning_rate": 1.2056143255190128e-05, "loss": 1.138, "step": 8598 }, { "epoch": 1.5490204908804324, "grad_norm": 1.4982671737670898, "learning_rate": 1.2046922313300086e-05, "loss": 1.1075, "step": 8599 }, { "epoch": 1.5492006304886288, "grad_norm": 1.4751111268997192, "learning_rate": 1.2037704415987454e-05, "loss": 1.2775, "step": 8600 }, { "epoch": 1.5493807700968252, "grad_norm": 1.3570735454559326, "learning_rate": 1.2028489563991679e-05, "loss": 1.2004, "step": 8601 }, { "epoch": 1.5495609097050214, "grad_norm": 1.3632498979568481, "learning_rate": 1.201927775805196e-05, "loss": 1.7741, "step": 8602 }, { "epoch": 1.5497410493132178, "grad_norm": 1.2828961610794067, "learning_rate": 1.2010068998907298e-05, "loss": 1.7022, "step": 8603 }, { "epoch": 1.549921188921414, "grad_norm": 1.2892394065856934, "learning_rate": 1.2000863287296394e-05, "loss": 1.6525, "step": 8604 }, { "epoch": 1.5501013285296104, "grad_norm": 1.4127252101898193, "learning_rate": 1.1991660623957728e-05, "loss": 1.8569, "step": 8605 }, { "epoch": 1.5502814681378068, "grad_norm": 1.4736652374267578, "learning_rate": 1.1982461009629536e-05, "loss": 1.7014, "step": 8606 }, { "epoch": 1.5504616077460032, "grad_norm": 1.449845314025879, "learning_rate": 1.1973264445049804e-05, "loss": 1.6665, "step": 8607 }, { "epoch": 1.5506417473541996, "grad_norm": 1.4491453170776367, "learning_rate": 1.1964070930956273e-05, "loss": 1.5762, "step": 8608 }, { "epoch": 1.550821886962396, "grad_norm": 1.5829616785049438, "learning_rate": 1.195488046808645e-05, "loss": 1.7513, "step": 8609 }, { "epoch": 1.5510020265705922, "grad_norm": 1.8279757499694824, "learning_rate": 1.194569305717758e-05, "loss": 1.8266, "step": 8610 }, { "epoch": 1.5511821661787886, "grad_norm": 1.6994513273239136, "learning_rate": 1.1936508698966664e-05, "loss": 1.7551, "step": 8611 }, { "epoch": 1.5513623057869848, "grad_norm": 1.5221192836761475, "learning_rate": 1.1927327394190496e-05, "loss": 1.5015, "step": 8612 }, { "epoch": 1.5515424453951812, "grad_norm": 1.4585553407669067, "learning_rate": 1.1918149143585582e-05, "loss": 1.3764, "step": 8613 }, { "epoch": 1.5517225850033776, "grad_norm": 1.5565168857574463, "learning_rate": 1.1908973947888191e-05, "loss": 1.4802, "step": 8614 }, { "epoch": 1.551902724611574, "grad_norm": 1.4045219421386719, "learning_rate": 1.189980180783436e-05, "loss": 1.2302, "step": 8615 }, { "epoch": 1.5520828642197704, "grad_norm": 1.4543061256408691, "learning_rate": 1.1890632724159866e-05, "loss": 1.4389, "step": 8616 }, { "epoch": 1.5522630038279668, "grad_norm": 1.552564024925232, "learning_rate": 1.1881466697600252e-05, "loss": 1.5942, "step": 8617 }, { "epoch": 1.5524431434361632, "grad_norm": 1.3444478511810303, "learning_rate": 1.1872303728890816e-05, "loss": 1.4607, "step": 8618 }, { "epoch": 1.5526232830443594, "grad_norm": 1.3649972677230835, "learning_rate": 1.1863143818766603e-05, "loss": 1.3506, "step": 8619 }, { "epoch": 1.5528034226525558, "grad_norm": 1.3263585567474365, "learning_rate": 1.18539869679624e-05, "loss": 1.212, "step": 8620 }, { "epoch": 1.552983562260752, "grad_norm": 1.3795740604400635, "learning_rate": 1.18448331772128e-05, "loss": 1.4327, "step": 8621 }, { "epoch": 1.5531637018689484, "grad_norm": 1.6514915227890015, "learning_rate": 1.1835682447252095e-05, "loss": 1.5076, "step": 8622 }, { "epoch": 1.5533438414771448, "grad_norm": 1.4656131267547607, "learning_rate": 1.1826534778814358e-05, "loss": 1.4235, "step": 8623 }, { "epoch": 1.5535239810853412, "grad_norm": 1.4518013000488281, "learning_rate": 1.1817390172633403e-05, "loss": 1.2619, "step": 8624 }, { "epoch": 1.5537041206935376, "grad_norm": 1.571103811264038, "learning_rate": 1.1808248629442803e-05, "loss": 1.3302, "step": 8625 }, { "epoch": 1.553884260301734, "grad_norm": 1.4875376224517822, "learning_rate": 1.17991101499759e-05, "loss": 1.4112, "step": 8626 }, { "epoch": 1.5540643999099302, "grad_norm": 1.4393737316131592, "learning_rate": 1.1789974734965797e-05, "loss": 1.2955, "step": 8627 }, { "epoch": 1.5542445395181266, "grad_norm": 1.4137779474258423, "learning_rate": 1.178084238514529e-05, "loss": 1.384, "step": 8628 }, { "epoch": 1.5544246791263228, "grad_norm": 1.520317792892456, "learning_rate": 1.1771713101246995e-05, "loss": 1.1605, "step": 8629 }, { "epoch": 1.5546048187345192, "grad_norm": 1.4095158576965332, "learning_rate": 1.1762586884003235e-05, "loss": 1.3425, "step": 8630 }, { "epoch": 1.5547849583427156, "grad_norm": 1.4087046384811401, "learning_rate": 1.1753463734146148e-05, "loss": 1.2212, "step": 8631 }, { "epoch": 1.554965097950912, "grad_norm": 1.5679315328598022, "learning_rate": 1.174434365240757e-05, "loss": 1.4374, "step": 8632 }, { "epoch": 1.5551452375591084, "grad_norm": 1.5481170415878296, "learning_rate": 1.173522663951911e-05, "loss": 1.5784, "step": 8633 }, { "epoch": 1.5553253771673048, "grad_norm": 1.4032551050186157, "learning_rate": 1.1726112696212121e-05, "loss": 1.394, "step": 8634 }, { "epoch": 1.555505516775501, "grad_norm": 1.5116862058639526, "learning_rate": 1.1717001823217738e-05, "loss": 1.3542, "step": 8635 }, { "epoch": 1.5556856563836974, "grad_norm": 1.334087610244751, "learning_rate": 1.1707894021266824e-05, "loss": 1.1887, "step": 8636 }, { "epoch": 1.5558657959918936, "grad_norm": 1.5564337968826294, "learning_rate": 1.1698789291090001e-05, "loss": 1.3328, "step": 8637 }, { "epoch": 1.55604593560009, "grad_norm": 1.647221326828003, "learning_rate": 1.168968763341764e-05, "loss": 1.6553, "step": 8638 }, { "epoch": 1.5562260752082864, "grad_norm": 1.248002290725708, "learning_rate": 1.1680589048979885e-05, "loss": 1.0252, "step": 8639 }, { "epoch": 1.5564062148164828, "grad_norm": 1.4508148431777954, "learning_rate": 1.1671493538506606e-05, "loss": 1.395, "step": 8640 }, { "epoch": 1.5565863544246792, "grad_norm": 1.4866935014724731, "learning_rate": 1.166240110272745e-05, "loss": 1.3563, "step": 8641 }, { "epoch": 1.5567664940328756, "grad_norm": 1.5526775121688843, "learning_rate": 1.1653311742371797e-05, "loss": 1.4164, "step": 8642 }, { "epoch": 1.5569466336410718, "grad_norm": 1.5380525588989258, "learning_rate": 1.1644225458168806e-05, "loss": 1.4179, "step": 8643 }, { "epoch": 1.5571267732492682, "grad_norm": 1.6545181274414062, "learning_rate": 1.1635142250847347e-05, "loss": 1.3571, "step": 8644 }, { "epoch": 1.5573069128574646, "grad_norm": 1.5748405456542969, "learning_rate": 1.1626062121136106e-05, "loss": 1.2285, "step": 8645 }, { "epoch": 1.5574870524656608, "grad_norm": 1.5831753015518188, "learning_rate": 1.161698506976347e-05, "loss": 1.2879, "step": 8646 }, { "epoch": 1.5576671920738572, "grad_norm": 1.5573097467422485, "learning_rate": 1.1607911097457596e-05, "loss": 1.2037, "step": 8647 }, { "epoch": 1.5578473316820536, "grad_norm": 1.6150676012039185, "learning_rate": 1.1598840204946394e-05, "loss": 1.4767, "step": 8648 }, { "epoch": 1.55802747129025, "grad_norm": 1.5323644876480103, "learning_rate": 1.1589772392957526e-05, "loss": 1.2217, "step": 8649 }, { "epoch": 1.5582076108984464, "grad_norm": 1.5842088460922241, "learning_rate": 1.158070766221841e-05, "loss": 1.2831, "step": 8650 }, { "epoch": 1.5583877505066428, "grad_norm": 1.4812527894973755, "learning_rate": 1.1571646013456206e-05, "loss": 1.088, "step": 8651 }, { "epoch": 1.558567890114839, "grad_norm": 1.3270530700683594, "learning_rate": 1.1562587447397844e-05, "loss": 1.7395, "step": 8652 }, { "epoch": 1.5587480297230354, "grad_norm": 1.3473864793777466, "learning_rate": 1.1553531964769982e-05, "loss": 1.6648, "step": 8653 }, { "epoch": 1.5589281693312316, "grad_norm": 1.386386513710022, "learning_rate": 1.1544479566299076e-05, "loss": 1.7889, "step": 8654 }, { "epoch": 1.559108308939428, "grad_norm": 1.4458447694778442, "learning_rate": 1.1535430252711282e-05, "loss": 1.7524, "step": 8655 }, { "epoch": 1.5592884485476244, "grad_norm": 1.5281251668930054, "learning_rate": 1.1526384024732539e-05, "loss": 1.477, "step": 8656 }, { "epoch": 1.5594685881558208, "grad_norm": 1.5493708848953247, "learning_rate": 1.1517340883088534e-05, "loss": 1.8962, "step": 8657 }, { "epoch": 1.5596487277640172, "grad_norm": 1.7481637001037598, "learning_rate": 1.150830082850468e-05, "loss": 2.1099, "step": 8658 }, { "epoch": 1.5598288673722136, "grad_norm": 1.6272221803665161, "learning_rate": 1.1499263861706205e-05, "loss": 1.7286, "step": 8659 }, { "epoch": 1.5600090069804098, "grad_norm": 1.947574257850647, "learning_rate": 1.1490229983418038e-05, "loss": 2.13, "step": 8660 }, { "epoch": 1.5601891465886062, "grad_norm": 1.7890394926071167, "learning_rate": 1.1481199194364856e-05, "loss": 1.952, "step": 8661 }, { "epoch": 1.5603692861968024, "grad_norm": 1.5355799198150635, "learning_rate": 1.1472171495271095e-05, "loss": 1.437, "step": 8662 }, { "epoch": 1.5605494258049988, "grad_norm": 1.4334068298339844, "learning_rate": 1.1463146886860981e-05, "loss": 1.6783, "step": 8663 }, { "epoch": 1.5607295654131952, "grad_norm": 1.4627766609191895, "learning_rate": 1.1454125369858459e-05, "loss": 1.5114, "step": 8664 }, { "epoch": 1.5609097050213916, "grad_norm": 1.4727448225021362, "learning_rate": 1.1445106944987222e-05, "loss": 1.4182, "step": 8665 }, { "epoch": 1.561089844629588, "grad_norm": 1.345745325088501, "learning_rate": 1.1436091612970728e-05, "loss": 1.207, "step": 8666 }, { "epoch": 1.5612699842377844, "grad_norm": 1.46124267578125, "learning_rate": 1.1427079374532162e-05, "loss": 1.5473, "step": 8667 }, { "epoch": 1.5614501238459806, "grad_norm": 1.4334505796432495, "learning_rate": 1.1418070230394523e-05, "loss": 1.417, "step": 8668 }, { "epoch": 1.561630263454177, "grad_norm": 1.4410444498062134, "learning_rate": 1.1409064181280493e-05, "loss": 1.2146, "step": 8669 }, { "epoch": 1.5618104030623732, "grad_norm": 1.5506155490875244, "learning_rate": 1.1400061227912534e-05, "loss": 1.5874, "step": 8670 }, { "epoch": 1.5619905426705696, "grad_norm": 1.5400606393814087, "learning_rate": 1.1391061371012867e-05, "loss": 1.5057, "step": 8671 }, { "epoch": 1.562170682278766, "grad_norm": 1.4226422309875488, "learning_rate": 1.138206461130345e-05, "loss": 1.3539, "step": 8672 }, { "epoch": 1.5623508218869624, "grad_norm": 1.448081374168396, "learning_rate": 1.1373070949505998e-05, "loss": 1.2107, "step": 8673 }, { "epoch": 1.5625309614951588, "grad_norm": 1.4487507343292236, "learning_rate": 1.136408038634198e-05, "loss": 1.3585, "step": 8674 }, { "epoch": 1.5627111011033552, "grad_norm": 1.63332200050354, "learning_rate": 1.1355092922532618e-05, "loss": 1.6179, "step": 8675 }, { "epoch": 1.5628912407115516, "grad_norm": 1.4600070714950562, "learning_rate": 1.1346108558798862e-05, "loss": 1.3085, "step": 8676 }, { "epoch": 1.5630713803197478, "grad_norm": 1.3906023502349854, "learning_rate": 1.1337127295861465e-05, "loss": 1.3178, "step": 8677 }, { "epoch": 1.5632515199279442, "grad_norm": 1.3391629457473755, "learning_rate": 1.132814913444088e-05, "loss": 1.2214, "step": 8678 }, { "epoch": 1.5634316595361404, "grad_norm": 1.5805213451385498, "learning_rate": 1.1319174075257338e-05, "loss": 1.5774, "step": 8679 }, { "epoch": 1.5636117991443368, "grad_norm": 1.6098612546920776, "learning_rate": 1.1310202119030815e-05, "loss": 1.5041, "step": 8680 }, { "epoch": 1.5637919387525332, "grad_norm": 1.6560418605804443, "learning_rate": 1.1301233266481025e-05, "loss": 1.5958, "step": 8681 }, { "epoch": 1.5639720783607296, "grad_norm": 1.4953657388687134, "learning_rate": 1.129226751832746e-05, "loss": 1.3866, "step": 8682 }, { "epoch": 1.564152217968926, "grad_norm": 1.575158715248108, "learning_rate": 1.1283304875289336e-05, "loss": 1.3636, "step": 8683 }, { "epoch": 1.5643323575771224, "grad_norm": 1.5307196378707886, "learning_rate": 1.127434533808564e-05, "loss": 1.3651, "step": 8684 }, { "epoch": 1.5645124971853186, "grad_norm": 1.5176597833633423, "learning_rate": 1.1265388907435081e-05, "loss": 1.389, "step": 8685 }, { "epoch": 1.564692636793515, "grad_norm": 1.6231279373168945, "learning_rate": 1.1256435584056173e-05, "loss": 1.3781, "step": 8686 }, { "epoch": 1.5648727764017112, "grad_norm": 1.5850392580032349, "learning_rate": 1.1247485368667132e-05, "loss": 1.583, "step": 8687 }, { "epoch": 1.5650529160099076, "grad_norm": 1.4881261587142944, "learning_rate": 1.1238538261985937e-05, "loss": 1.3389, "step": 8688 }, { "epoch": 1.565233055618104, "grad_norm": 1.3968855142593384, "learning_rate": 1.1229594264730326e-05, "loss": 1.1427, "step": 8689 }, { "epoch": 1.5654131952263004, "grad_norm": 1.5665854215621948, "learning_rate": 1.1220653377617763e-05, "loss": 1.4724, "step": 8690 }, { "epoch": 1.5655933348344968, "grad_norm": 1.4646254777908325, "learning_rate": 1.1211715601365507e-05, "loss": 1.3174, "step": 8691 }, { "epoch": 1.5657734744426932, "grad_norm": 1.413590908050537, "learning_rate": 1.1202780936690549e-05, "loss": 1.3014, "step": 8692 }, { "epoch": 1.5659536140508894, "grad_norm": 1.6356289386749268, "learning_rate": 1.1193849384309595e-05, "loss": 1.5751, "step": 8693 }, { "epoch": 1.5661337536590858, "grad_norm": 1.5420008897781372, "learning_rate": 1.1184920944939143e-05, "loss": 1.3866, "step": 8694 }, { "epoch": 1.566313893267282, "grad_norm": 1.489952802658081, "learning_rate": 1.117599561929541e-05, "loss": 1.2339, "step": 8695 }, { "epoch": 1.5664940328754784, "grad_norm": 1.566988468170166, "learning_rate": 1.1167073408094409e-05, "loss": 1.5127, "step": 8696 }, { "epoch": 1.5666741724836748, "grad_norm": 1.7159370183944702, "learning_rate": 1.1158154312051866e-05, "loss": 1.5014, "step": 8697 }, { "epoch": 1.5668543120918712, "grad_norm": 1.6454249620437622, "learning_rate": 1.1149238331883266e-05, "loss": 1.557, "step": 8698 }, { "epoch": 1.5670344517000676, "grad_norm": 1.5364835262298584, "learning_rate": 1.1140325468303831e-05, "loss": 1.4042, "step": 8699 }, { "epoch": 1.567214591308264, "grad_norm": 1.4747416973114014, "learning_rate": 1.1131415722028571e-05, "loss": 1.1796, "step": 8700 }, { "epoch": 1.5673947309164604, "grad_norm": 1.4206022024154663, "learning_rate": 1.1122509093772209e-05, "loss": 1.24, "step": 8701 }, { "epoch": 1.5675748705246566, "grad_norm": 1.3547396659851074, "learning_rate": 1.1113605584249237e-05, "loss": 1.9937, "step": 8702 }, { "epoch": 1.567755010132853, "grad_norm": 1.353610873222351, "learning_rate": 1.1104705194173876e-05, "loss": 1.7046, "step": 8703 }, { "epoch": 1.5679351497410492, "grad_norm": 1.425837755203247, "learning_rate": 1.1095807924260121e-05, "loss": 1.9371, "step": 8704 }, { "epoch": 1.5681152893492456, "grad_norm": 1.339847445487976, "learning_rate": 1.1086913775221709e-05, "loss": 1.6754, "step": 8705 }, { "epoch": 1.568295428957442, "grad_norm": 1.453599452972412, "learning_rate": 1.1078022747772116e-05, "loss": 1.979, "step": 8706 }, { "epoch": 1.5684755685656384, "grad_norm": 1.5275856256484985, "learning_rate": 1.1069134842624585e-05, "loss": 1.8355, "step": 8707 }, { "epoch": 1.5686557081738348, "grad_norm": 1.4347549676895142, "learning_rate": 1.106025006049209e-05, "loss": 1.5313, "step": 8708 }, { "epoch": 1.5688358477820312, "grad_norm": 1.5533716678619385, "learning_rate": 1.1051368402087353e-05, "loss": 1.6628, "step": 8709 }, { "epoch": 1.5690159873902274, "grad_norm": 1.687168836593628, "learning_rate": 1.1042489868122885e-05, "loss": 1.7631, "step": 8710 }, { "epoch": 1.5691961269984238, "grad_norm": 2.1341562271118164, "learning_rate": 1.1033614459310909e-05, "loss": 2.064, "step": 8711 }, { "epoch": 1.56937626660662, "grad_norm": 1.457038164138794, "learning_rate": 1.1024742176363401e-05, "loss": 1.4305, "step": 8712 }, { "epoch": 1.5695564062148164, "grad_norm": 1.4856160879135132, "learning_rate": 1.1015873019992085e-05, "loss": 1.6931, "step": 8713 }, { "epoch": 1.5697365458230128, "grad_norm": 1.5376152992248535, "learning_rate": 1.1007006990908452e-05, "loss": 1.5286, "step": 8714 }, { "epoch": 1.5699166854312092, "grad_norm": 1.4504339694976807, "learning_rate": 1.0998144089823725e-05, "loss": 1.4434, "step": 8715 }, { "epoch": 1.5700968250394056, "grad_norm": 1.3297656774520874, "learning_rate": 1.0989284317448883e-05, "loss": 1.3496, "step": 8716 }, { "epoch": 1.570276964647602, "grad_norm": 1.4830807447433472, "learning_rate": 1.0980427674494648e-05, "loss": 1.5667, "step": 8717 }, { "epoch": 1.5704571042557982, "grad_norm": 1.1919097900390625, "learning_rate": 1.0971574161671489e-05, "loss": 0.9992, "step": 8718 }, { "epoch": 1.5706372438639946, "grad_norm": 1.2866847515106201, "learning_rate": 1.0962723779689654e-05, "loss": 1.3087, "step": 8719 }, { "epoch": 1.5708173834721908, "grad_norm": 1.39894700050354, "learning_rate": 1.0953876529259099e-05, "loss": 1.4516, "step": 8720 }, { "epoch": 1.5709975230803872, "grad_norm": 1.2938555479049683, "learning_rate": 1.0945032411089557e-05, "loss": 1.295, "step": 8721 }, { "epoch": 1.5711776626885836, "grad_norm": 1.3598833084106445, "learning_rate": 1.0936191425890486e-05, "loss": 1.3623, "step": 8722 }, { "epoch": 1.57135780229678, "grad_norm": 1.4263765811920166, "learning_rate": 1.0927353574371101e-05, "loss": 1.4898, "step": 8723 }, { "epoch": 1.5715379419049764, "grad_norm": 1.3458033800125122, "learning_rate": 1.0918518857240395e-05, "loss": 1.4364, "step": 8724 }, { "epoch": 1.5717180815131728, "grad_norm": 1.3973339796066284, "learning_rate": 1.090968727520708e-05, "loss": 1.3795, "step": 8725 }, { "epoch": 1.571898221121369, "grad_norm": 1.4609380960464478, "learning_rate": 1.0900858828979598e-05, "loss": 1.2824, "step": 8726 }, { "epoch": 1.5720783607295654, "grad_norm": 1.5024406909942627, "learning_rate": 1.0892033519266164e-05, "loss": 1.4705, "step": 8727 }, { "epoch": 1.5722585003377616, "grad_norm": 1.7645328044891357, "learning_rate": 1.0883211346774768e-05, "loss": 1.5015, "step": 8728 }, { "epoch": 1.572438639945958, "grad_norm": 1.6559275388717651, "learning_rate": 1.0874392312213106e-05, "loss": 1.5571, "step": 8729 }, { "epoch": 1.5726187795541544, "grad_norm": 1.4846141338348389, "learning_rate": 1.086557641628863e-05, "loss": 1.3511, "step": 8730 }, { "epoch": 1.5727989191623508, "grad_norm": 1.466202974319458, "learning_rate": 1.0856763659708558e-05, "loss": 1.468, "step": 8731 }, { "epoch": 1.5729790587705472, "grad_norm": 1.507413625717163, "learning_rate": 1.084795404317982e-05, "loss": 1.378, "step": 8732 }, { "epoch": 1.5731591983787436, "grad_norm": 1.5746922492980957, "learning_rate": 1.0839147567409158e-05, "loss": 1.5838, "step": 8733 }, { "epoch": 1.57333933798694, "grad_norm": 1.5736324787139893, "learning_rate": 1.0830344233103002e-05, "loss": 1.5805, "step": 8734 }, { "epoch": 1.5735194775951362, "grad_norm": 1.5431368350982666, "learning_rate": 1.0821544040967546e-05, "loss": 1.4128, "step": 8735 }, { "epoch": 1.5736996172033326, "grad_norm": 1.585395336151123, "learning_rate": 1.081274699170875e-05, "loss": 1.3835, "step": 8736 }, { "epoch": 1.5738797568115288, "grad_norm": 1.4899364709854126, "learning_rate": 1.0803953086032303e-05, "loss": 1.4272, "step": 8737 }, { "epoch": 1.5740598964197252, "grad_norm": 1.4143012762069702, "learning_rate": 1.0795162324643648e-05, "loss": 1.485, "step": 8738 }, { "epoch": 1.5742400360279216, "grad_norm": 1.39919912815094, "learning_rate": 1.078637470824797e-05, "loss": 1.3085, "step": 8739 }, { "epoch": 1.574420175636118, "grad_norm": 1.431362509727478, "learning_rate": 1.0777590237550217e-05, "loss": 1.3317, "step": 8740 }, { "epoch": 1.5746003152443144, "grad_norm": 1.5391736030578613, "learning_rate": 1.0768808913255057e-05, "loss": 1.2625, "step": 8741 }, { "epoch": 1.5747804548525108, "grad_norm": 1.604718804359436, "learning_rate": 1.0760030736066951e-05, "loss": 1.5051, "step": 8742 }, { "epoch": 1.574960594460707, "grad_norm": 1.5461753606796265, "learning_rate": 1.0751255706690066e-05, "loss": 1.2147, "step": 8743 }, { "epoch": 1.5751407340689034, "grad_norm": 1.4858249425888062, "learning_rate": 1.074248382582833e-05, "loss": 1.3766, "step": 8744 }, { "epoch": 1.5753208736770996, "grad_norm": 1.5699418783187866, "learning_rate": 1.073371509418542e-05, "loss": 1.477, "step": 8745 }, { "epoch": 1.575501013285296, "grad_norm": 1.7523880004882812, "learning_rate": 1.072494951246476e-05, "loss": 1.4182, "step": 8746 }, { "epoch": 1.5756811528934924, "grad_norm": 1.4491461515426636, "learning_rate": 1.0716187081369516e-05, "loss": 1.3406, "step": 8747 }, { "epoch": 1.5758612925016888, "grad_norm": 1.5331447124481201, "learning_rate": 1.0707427801602615e-05, "loss": 1.2322, "step": 8748 }, { "epoch": 1.5760414321098852, "grad_norm": 1.5976214408874512, "learning_rate": 1.0698671673866718e-05, "loss": 1.2848, "step": 8749 }, { "epoch": 1.5762215717180816, "grad_norm": 1.499138593673706, "learning_rate": 1.0689918698864232e-05, "loss": 1.2546, "step": 8750 }, { "epoch": 1.5764017113262778, "grad_norm": 1.5307953357696533, "learning_rate": 1.0681168877297303e-05, "loss": 1.3887, "step": 8751 }, { "epoch": 1.5765818509344742, "grad_norm": 1.4458649158477783, "learning_rate": 1.0672422209867878e-05, "loss": 2.1768, "step": 8752 }, { "epoch": 1.5767619905426704, "grad_norm": 1.2190377712249756, "learning_rate": 1.0663678697277586e-05, "loss": 1.6207, "step": 8753 }, { "epoch": 1.5769421301508668, "grad_norm": 1.38332998752594, "learning_rate": 1.0654938340227821e-05, "loss": 1.7679, "step": 8754 }, { "epoch": 1.5771222697590632, "grad_norm": 1.447637677192688, "learning_rate": 1.0646201139419732e-05, "loss": 1.6161, "step": 8755 }, { "epoch": 1.5773024093672596, "grad_norm": 1.3600705862045288, "learning_rate": 1.0637467095554231e-05, "loss": 1.6255, "step": 8756 }, { "epoch": 1.577482548975456, "grad_norm": 1.5435174703598022, "learning_rate": 1.0628736209331962e-05, "loss": 1.7634, "step": 8757 }, { "epoch": 1.5776626885836524, "grad_norm": 1.5020148754119873, "learning_rate": 1.062000848145328e-05, "loss": 1.9926, "step": 8758 }, { "epoch": 1.5778428281918488, "grad_norm": 1.4623502492904663, "learning_rate": 1.0611283912618342e-05, "loss": 1.6484, "step": 8759 }, { "epoch": 1.578022967800045, "grad_norm": 1.5832890272140503, "learning_rate": 1.0602562503527003e-05, "loss": 1.6794, "step": 8760 }, { "epoch": 1.5782031074082414, "grad_norm": 1.8005592823028564, "learning_rate": 1.0593844254878926e-05, "loss": 2.0905, "step": 8761 }, { "epoch": 1.5783832470164376, "grad_norm": 1.6440430879592896, "learning_rate": 1.058512916737347e-05, "loss": 1.7672, "step": 8762 }, { "epoch": 1.578563386624634, "grad_norm": 1.4857702255249023, "learning_rate": 1.0576417241709751e-05, "loss": 1.6237, "step": 8763 }, { "epoch": 1.5787435262328304, "grad_norm": 1.4623029232025146, "learning_rate": 1.0567708478586642e-05, "loss": 1.7061, "step": 8764 }, { "epoch": 1.5789236658410268, "grad_norm": 1.4218295812606812, "learning_rate": 1.0559002878702734e-05, "loss": 1.3973, "step": 8765 }, { "epoch": 1.5791038054492232, "grad_norm": 1.3829760551452637, "learning_rate": 1.0550300442756422e-05, "loss": 1.2975, "step": 8766 }, { "epoch": 1.5792839450574196, "grad_norm": 1.3502843379974365, "learning_rate": 1.0541601171445791e-05, "loss": 1.3248, "step": 8767 }, { "epoch": 1.5794640846656158, "grad_norm": 1.4077351093292236, "learning_rate": 1.0532905065468695e-05, "loss": 1.3455, "step": 8768 }, { "epoch": 1.5796442242738122, "grad_norm": 1.3918960094451904, "learning_rate": 1.0524212125522732e-05, "loss": 1.203, "step": 8769 }, { "epoch": 1.5798243638820084, "grad_norm": 1.37799870967865, "learning_rate": 1.0515522352305246e-05, "loss": 1.5085, "step": 8770 }, { "epoch": 1.5800045034902048, "grad_norm": 1.4105336666107178, "learning_rate": 1.0506835746513321e-05, "loss": 1.1745, "step": 8771 }, { "epoch": 1.5801846430984012, "grad_norm": 1.4574953317642212, "learning_rate": 1.0498152308843796e-05, "loss": 1.4043, "step": 8772 }, { "epoch": 1.5803647827065976, "grad_norm": 1.4460452795028687, "learning_rate": 1.0489472039993254e-05, "loss": 1.6298, "step": 8773 }, { "epoch": 1.580544922314794, "grad_norm": 1.3603572845458984, "learning_rate": 1.0480794940658006e-05, "loss": 1.1927, "step": 8774 }, { "epoch": 1.5807250619229904, "grad_norm": 1.5552875995635986, "learning_rate": 1.0472121011534153e-05, "loss": 1.4476, "step": 8775 }, { "epoch": 1.5809052015311866, "grad_norm": 1.4770456552505493, "learning_rate": 1.0463450253317497e-05, "loss": 1.2685, "step": 8776 }, { "epoch": 1.581085341139383, "grad_norm": 1.4912656545639038, "learning_rate": 1.0454782666703605e-05, "loss": 1.3914, "step": 8777 }, { "epoch": 1.5812654807475792, "grad_norm": 1.3550387620925903, "learning_rate": 1.0446118252387787e-05, "loss": 1.3277, "step": 8778 }, { "epoch": 1.5814456203557756, "grad_norm": 1.4726510047912598, "learning_rate": 1.0437457011065093e-05, "loss": 1.3966, "step": 8779 }, { "epoch": 1.581625759963972, "grad_norm": 1.511576533317566, "learning_rate": 1.0428798943430328e-05, "loss": 1.4058, "step": 8780 }, { "epoch": 1.5818058995721684, "grad_norm": 1.4912424087524414, "learning_rate": 1.0420144050178043e-05, "loss": 1.4979, "step": 8781 }, { "epoch": 1.5819860391803648, "grad_norm": 1.489510416984558, "learning_rate": 1.041149233200252e-05, "loss": 1.5692, "step": 8782 }, { "epoch": 1.5821661787885613, "grad_norm": 1.7289700508117676, "learning_rate": 1.0402843789597783e-05, "loss": 1.5418, "step": 8783 }, { "epoch": 1.5823463183967574, "grad_norm": 1.5123237371444702, "learning_rate": 1.0394198423657648e-05, "loss": 1.3976, "step": 8784 }, { "epoch": 1.5825264580049538, "grad_norm": 1.5076872110366821, "learning_rate": 1.0385556234875622e-05, "loss": 1.3539, "step": 8785 }, { "epoch": 1.5827065976131502, "grad_norm": 1.6992595195770264, "learning_rate": 1.0376917223944981e-05, "loss": 1.6975, "step": 8786 }, { "epoch": 1.5828867372213464, "grad_norm": 1.6513583660125732, "learning_rate": 1.0368281391558737e-05, "loss": 1.6185, "step": 8787 }, { "epoch": 1.5830668768295428, "grad_norm": 1.449255108833313, "learning_rate": 1.0359648738409645e-05, "loss": 1.4475, "step": 8788 }, { "epoch": 1.5832470164377392, "grad_norm": 1.5623551607131958, "learning_rate": 1.0351019265190237e-05, "loss": 1.4849, "step": 8789 }, { "epoch": 1.5834271560459356, "grad_norm": 1.5850846767425537, "learning_rate": 1.0342392972592762e-05, "loss": 1.3448, "step": 8790 }, { "epoch": 1.583607295654132, "grad_norm": 1.5581655502319336, "learning_rate": 1.03337698613092e-05, "loss": 1.3803, "step": 8791 }, { "epoch": 1.5837874352623285, "grad_norm": 1.5412063598632812, "learning_rate": 1.032514993203128e-05, "loss": 1.3988, "step": 8792 }, { "epoch": 1.5839675748705246, "grad_norm": 1.5423779487609863, "learning_rate": 1.0316533185450522e-05, "loss": 1.4385, "step": 8793 }, { "epoch": 1.584147714478721, "grad_norm": 1.5907313823699951, "learning_rate": 1.0307919622258139e-05, "loss": 1.3948, "step": 8794 }, { "epoch": 1.5843278540869172, "grad_norm": 1.5788952112197876, "learning_rate": 1.0299309243145117e-05, "loss": 1.3184, "step": 8795 }, { "epoch": 1.5845079936951136, "grad_norm": 1.5469717979431152, "learning_rate": 1.0290702048802164e-05, "loss": 1.2234, "step": 8796 }, { "epoch": 1.58468813330331, "grad_norm": 1.5656107664108276, "learning_rate": 1.028209803991974e-05, "loss": 1.2106, "step": 8797 }, { "epoch": 1.5848682729115064, "grad_norm": 1.4900894165039062, "learning_rate": 1.0273497217188076e-05, "loss": 1.1863, "step": 8798 }, { "epoch": 1.5850484125197029, "grad_norm": 1.8199594020843506, "learning_rate": 1.0264899581297122e-05, "loss": 1.5803, "step": 8799 }, { "epoch": 1.5852285521278993, "grad_norm": 1.5054301023483276, "learning_rate": 1.0256305132936567e-05, "loss": 1.2635, "step": 8800 }, { "epoch": 1.5854086917360954, "grad_norm": 1.4701454639434814, "learning_rate": 1.024771387279585e-05, "loss": 0.9962, "step": 8801 }, { "epoch": 1.5855888313442918, "grad_norm": 1.417427897453308, "learning_rate": 1.023912580156417e-05, "loss": 1.5873, "step": 8802 }, { "epoch": 1.585768970952488, "grad_norm": 1.3216639757156372, "learning_rate": 1.0230540919930448e-05, "loss": 1.5739, "step": 8803 }, { "epoch": 1.5859491105606844, "grad_norm": 1.4012728929519653, "learning_rate": 1.0221959228583366e-05, "loss": 1.6963, "step": 8804 }, { "epoch": 1.5861292501688808, "grad_norm": 1.4364832639694214, "learning_rate": 1.0213380728211336e-05, "loss": 1.8545, "step": 8805 }, { "epoch": 1.5863093897770773, "grad_norm": 1.3880946636199951, "learning_rate": 1.0204805419502511e-05, "loss": 1.7965, "step": 8806 }, { "epoch": 1.5864895293852737, "grad_norm": 1.4016976356506348, "learning_rate": 1.0196233303144826e-05, "loss": 1.5051, "step": 8807 }, { "epoch": 1.58666966899347, "grad_norm": 1.522895336151123, "learning_rate": 1.0187664379825917e-05, "loss": 1.9196, "step": 8808 }, { "epoch": 1.5868498086016662, "grad_norm": 1.7216529846191406, "learning_rate": 1.0179098650233182e-05, "loss": 2.0875, "step": 8809 }, { "epoch": 1.5870299482098627, "grad_norm": 1.7687259912490845, "learning_rate": 1.0170536115053753e-05, "loss": 1.7505, "step": 8810 }, { "epoch": 1.5872100878180588, "grad_norm": 1.7006326913833618, "learning_rate": 1.0161976774974519e-05, "loss": 1.8563, "step": 8811 }, { "epoch": 1.5873902274262552, "grad_norm": 1.5786316394805908, "learning_rate": 1.015342063068211e-05, "loss": 1.668, "step": 8812 }, { "epoch": 1.5875703670344516, "grad_norm": 1.4506906270980835, "learning_rate": 1.014486768286288e-05, "loss": 1.6047, "step": 8813 }, { "epoch": 1.587750506642648, "grad_norm": 1.423282265663147, "learning_rate": 1.0136317932202955e-05, "loss": 1.4585, "step": 8814 }, { "epoch": 1.5879306462508445, "grad_norm": 1.341646671295166, "learning_rate": 1.012777137938819e-05, "loss": 1.2959, "step": 8815 }, { "epoch": 1.5881107858590409, "grad_norm": 1.3415254354476929, "learning_rate": 1.0119228025104166e-05, "loss": 1.3727, "step": 8816 }, { "epoch": 1.5882909254672373, "grad_norm": 1.4170063734054565, "learning_rate": 1.0110687870036262e-05, "loss": 1.0576, "step": 8817 }, { "epoch": 1.5884710650754335, "grad_norm": 1.4202964305877686, "learning_rate": 1.0102150914869545e-05, "loss": 1.4034, "step": 8818 }, { "epoch": 1.5886512046836299, "grad_norm": 1.484820008277893, "learning_rate": 1.0093617160288853e-05, "loss": 1.5881, "step": 8819 }, { "epoch": 1.588831344291826, "grad_norm": 1.463269829750061, "learning_rate": 1.0085086606978739e-05, "loss": 1.3581, "step": 8820 }, { "epoch": 1.5890114839000224, "grad_norm": 1.4107099771499634, "learning_rate": 1.0076559255623546e-05, "loss": 1.323, "step": 8821 }, { "epoch": 1.5891916235082189, "grad_norm": 1.4247010946273804, "learning_rate": 1.0068035106907342e-05, "loss": 1.3528, "step": 8822 }, { "epoch": 1.5893717631164153, "grad_norm": 1.4731537103652954, "learning_rate": 1.0059514161513895e-05, "loss": 1.4214, "step": 8823 }, { "epoch": 1.5895519027246117, "grad_norm": 1.4545427560806274, "learning_rate": 1.0050996420126768e-05, "loss": 1.3684, "step": 8824 }, { "epoch": 1.589732042332808, "grad_norm": 1.497930884361267, "learning_rate": 1.0042481883429234e-05, "loss": 1.3873, "step": 8825 }, { "epoch": 1.5899121819410043, "grad_norm": 1.3994158506393433, "learning_rate": 1.0033970552104354e-05, "loss": 1.1974, "step": 8826 }, { "epoch": 1.5900923215492007, "grad_norm": 1.3913419246673584, "learning_rate": 1.0025462426834897e-05, "loss": 1.2426, "step": 8827 }, { "epoch": 1.5902724611573968, "grad_norm": 1.438128113746643, "learning_rate": 1.0016957508303365e-05, "loss": 1.4723, "step": 8828 }, { "epoch": 1.5904526007655932, "grad_norm": 1.4592293500900269, "learning_rate": 1.0008455797192029e-05, "loss": 1.3464, "step": 8829 }, { "epoch": 1.5906327403737897, "grad_norm": 1.5633761882781982, "learning_rate": 9.999957294182877e-06, "loss": 1.4671, "step": 8830 }, { "epoch": 1.590812879981986, "grad_norm": 1.4533586502075195, "learning_rate": 9.991461999957686e-06, "loss": 1.4118, "step": 8831 }, { "epoch": 1.5909930195901825, "grad_norm": 1.4530785083770752, "learning_rate": 9.982969915197915e-06, "loss": 1.3829, "step": 8832 }, { "epoch": 1.5911731591983789, "grad_norm": 1.5011948347091675, "learning_rate": 9.974481040584815e-06, "loss": 1.4278, "step": 8833 }, { "epoch": 1.591353298806575, "grad_norm": 1.4997286796569824, "learning_rate": 9.965995376799348e-06, "loss": 1.5595, "step": 8834 }, { "epoch": 1.5915334384147715, "grad_norm": 1.6865862607955933, "learning_rate": 9.957512924522227e-06, "loss": 1.7658, "step": 8835 }, { "epoch": 1.5917135780229676, "grad_norm": 1.48899245262146, "learning_rate": 9.949033684433922e-06, "loss": 1.5466, "step": 8836 }, { "epoch": 1.591893717631164, "grad_norm": 1.644566535949707, "learning_rate": 9.940557657214622e-06, "loss": 1.2391, "step": 8837 }, { "epoch": 1.5920738572393605, "grad_norm": 1.5699700117111206, "learning_rate": 9.932084843544277e-06, "loss": 1.3142, "step": 8838 }, { "epoch": 1.5922539968475569, "grad_norm": 1.5089904069900513, "learning_rate": 9.923615244102558e-06, "loss": 1.3419, "step": 8839 }, { "epoch": 1.5924341364557533, "grad_norm": 1.5472806692123413, "learning_rate": 9.915148859568912e-06, "loss": 1.41, "step": 8840 }, { "epoch": 1.5926142760639497, "grad_norm": 1.543796181678772, "learning_rate": 9.906685690622503e-06, "loss": 1.5795, "step": 8841 }, { "epoch": 1.5927944156721459, "grad_norm": 1.555656909942627, "learning_rate": 9.89822573794224e-06, "loss": 1.3925, "step": 8842 }, { "epoch": 1.5929745552803423, "grad_norm": 1.4736770391464233, "learning_rate": 9.889769002206773e-06, "loss": 1.2486, "step": 8843 }, { "epoch": 1.5931546948885387, "grad_norm": 1.7369734048843384, "learning_rate": 9.8813154840945e-06, "loss": 1.4568, "step": 8844 }, { "epoch": 1.5933348344967349, "grad_norm": 1.5936263799667358, "learning_rate": 9.872865184283559e-06, "loss": 1.4271, "step": 8845 }, { "epoch": 1.5935149741049313, "grad_norm": 1.5042322874069214, "learning_rate": 9.864418103451828e-06, "loss": 1.2519, "step": 8846 }, { "epoch": 1.5936951137131277, "grad_norm": 1.5108041763305664, "learning_rate": 9.855974242276923e-06, "loss": 1.4058, "step": 8847 }, { "epoch": 1.593875253321324, "grad_norm": 1.5147268772125244, "learning_rate": 9.847533601436204e-06, "loss": 1.2817, "step": 8848 }, { "epoch": 1.5940553929295205, "grad_norm": 1.6327966451644897, "learning_rate": 9.839096181606789e-06, "loss": 1.3359, "step": 8849 }, { "epoch": 1.5942355325377169, "grad_norm": 1.4893242120742798, "learning_rate": 9.830661983465522e-06, "loss": 1.2338, "step": 8850 }, { "epoch": 1.594415672145913, "grad_norm": 1.377326250076294, "learning_rate": 9.822231007688981e-06, "loss": 1.1019, "step": 8851 }, { "epoch": 1.5945958117541095, "grad_norm": 1.2911468744277954, "learning_rate": 9.813803254953503e-06, "loss": 1.391, "step": 8852 }, { "epoch": 1.5947759513623057, "grad_norm": 1.2680950164794922, "learning_rate": 9.805378725935133e-06, "loss": 1.4327, "step": 8853 }, { "epoch": 1.594956090970502, "grad_norm": 1.289388656616211, "learning_rate": 9.796957421309738e-06, "loss": 1.7376, "step": 8854 }, { "epoch": 1.5951362305786985, "grad_norm": 1.5124446153640747, "learning_rate": 9.788539341752816e-06, "loss": 2.1833, "step": 8855 }, { "epoch": 1.5953163701868949, "grad_norm": 1.6897313594818115, "learning_rate": 9.780124487939691e-06, "loss": 2.0108, "step": 8856 }, { "epoch": 1.5954965097950913, "grad_norm": 1.4944745302200317, "learning_rate": 9.771712860545379e-06, "loss": 1.4695, "step": 8857 }, { "epoch": 1.5956766494032877, "grad_norm": 1.509650468826294, "learning_rate": 9.76330446024466e-06, "loss": 1.5227, "step": 8858 }, { "epoch": 1.5958567890114839, "grad_norm": 1.6214643716812134, "learning_rate": 9.754899287712072e-06, "loss": 1.9105, "step": 8859 }, { "epoch": 1.5960369286196803, "grad_norm": 1.7892626523971558, "learning_rate": 9.746497343621857e-06, "loss": 1.9426, "step": 8860 }, { "epoch": 1.5962170682278765, "grad_norm": 1.7339506149291992, "learning_rate": 9.738098628648023e-06, "loss": 2.1087, "step": 8861 }, { "epoch": 1.5963972078360729, "grad_norm": 1.6403619050979614, "learning_rate": 9.729703143464292e-06, "loss": 1.6197, "step": 8862 }, { "epoch": 1.5965773474442693, "grad_norm": 1.554610013961792, "learning_rate": 9.721310888744167e-06, "loss": 1.6336, "step": 8863 }, { "epoch": 1.5967574870524657, "grad_norm": 1.4719735383987427, "learning_rate": 9.712921865160867e-06, "loss": 1.5205, "step": 8864 }, { "epoch": 1.596937626660662, "grad_norm": 1.3846261501312256, "learning_rate": 9.704536073387355e-06, "loss": 1.3065, "step": 8865 }, { "epoch": 1.5971177662688585, "grad_norm": 1.4613518714904785, "learning_rate": 9.696153514096329e-06, "loss": 1.4671, "step": 8866 }, { "epoch": 1.5972979058770547, "grad_norm": 1.4590027332305908, "learning_rate": 9.687774187960236e-06, "loss": 1.1502, "step": 8867 }, { "epoch": 1.597478045485251, "grad_norm": 1.4454160928726196, "learning_rate": 9.679398095651265e-06, "loss": 1.3929, "step": 8868 }, { "epoch": 1.5976581850934473, "grad_norm": 1.4497108459472656, "learning_rate": 9.671025237841335e-06, "loss": 1.4831, "step": 8869 }, { "epoch": 1.5978383247016437, "grad_norm": 1.4551922082901, "learning_rate": 9.66265561520212e-06, "loss": 1.4239, "step": 8870 }, { "epoch": 1.59801846430984, "grad_norm": 1.5123260021209717, "learning_rate": 9.654289228405028e-06, "loss": 1.5833, "step": 8871 }, { "epoch": 1.5981986039180365, "grad_norm": 1.4101616144180298, "learning_rate": 9.645926078121181e-06, "loss": 1.296, "step": 8872 }, { "epoch": 1.5983787435262329, "grad_norm": 1.456728219985962, "learning_rate": 9.637566165021506e-06, "loss": 1.5427, "step": 8873 }, { "epoch": 1.5985588831344293, "grad_norm": 1.4670246839523315, "learning_rate": 9.629209489776614e-06, "loss": 1.4958, "step": 8874 }, { "epoch": 1.5987390227426257, "grad_norm": 1.5197744369506836, "learning_rate": 9.620856053056871e-06, "loss": 1.5192, "step": 8875 }, { "epoch": 1.5989191623508219, "grad_norm": 1.3986202478408813, "learning_rate": 9.61250585553239e-06, "loss": 1.4055, "step": 8876 }, { "epoch": 1.5990993019590183, "grad_norm": 1.4559756517410278, "learning_rate": 9.604158897873012e-06, "loss": 1.3765, "step": 8877 }, { "epoch": 1.5992794415672145, "grad_norm": 1.3098924160003662, "learning_rate": 9.595815180748335e-06, "loss": 1.2494, "step": 8878 }, { "epoch": 1.5994595811754109, "grad_norm": 1.5794737339019775, "learning_rate": 9.58747470482768e-06, "loss": 1.522, "step": 8879 }, { "epoch": 1.5996397207836073, "grad_norm": 1.500978946685791, "learning_rate": 9.579137470780125e-06, "loss": 1.554, "step": 8880 }, { "epoch": 1.5998198603918037, "grad_norm": 1.487963080406189, "learning_rate": 9.57080347927446e-06, "loss": 1.3358, "step": 8881 }, { "epoch": 1.6, "grad_norm": 1.5781112909317017, "learning_rate": 9.562472730979261e-06, "loss": 1.4216, "step": 8882 }, { "epoch": 1.6001801396081965, "grad_norm": 1.4555845260620117, "learning_rate": 9.5541452265628e-06, "loss": 1.4076, "step": 8883 }, { "epoch": 1.6003602792163927, "grad_norm": 1.5419166088104248, "learning_rate": 9.545820966693115e-06, "loss": 1.4619, "step": 8884 }, { "epoch": 1.600540418824589, "grad_norm": 1.4731258153915405, "learning_rate": 9.537499952037948e-06, "loss": 1.3367, "step": 8885 }, { "epoch": 1.6007205584327853, "grad_norm": 1.6260346174240112, "learning_rate": 9.52918218326484e-06, "loss": 1.5014, "step": 8886 }, { "epoch": 1.6009006980409817, "grad_norm": 1.6821283102035522, "learning_rate": 9.520867661041038e-06, "loss": 1.5689, "step": 8887 }, { "epoch": 1.601080837649178, "grad_norm": 1.597687005996704, "learning_rate": 9.512556386033505e-06, "loss": 1.5017, "step": 8888 }, { "epoch": 1.6012609772573745, "grad_norm": 1.5506452322006226, "learning_rate": 9.504248358908974e-06, "loss": 1.4065, "step": 8889 }, { "epoch": 1.601441116865571, "grad_norm": 1.5384083986282349, "learning_rate": 9.495943580333905e-06, "loss": 1.4699, "step": 8890 }, { "epoch": 1.6016212564737673, "grad_norm": 1.5358400344848633, "learning_rate": 9.487642050974526e-06, "loss": 1.4864, "step": 8891 }, { "epoch": 1.6018013960819635, "grad_norm": 1.7625243663787842, "learning_rate": 9.479343771496768e-06, "loss": 1.6111, "step": 8892 }, { "epoch": 1.6019815356901599, "grad_norm": 1.5033992528915405, "learning_rate": 9.471048742566313e-06, "loss": 1.2479, "step": 8893 }, { "epoch": 1.602161675298356, "grad_norm": 1.6612857580184937, "learning_rate": 9.462756964848591e-06, "loss": 1.3696, "step": 8894 }, { "epoch": 1.6023418149065525, "grad_norm": 1.4729400873184204, "learning_rate": 9.454468439008745e-06, "loss": 1.3871, "step": 8895 }, { "epoch": 1.6025219545147489, "grad_norm": 1.7338024377822876, "learning_rate": 9.446183165711708e-06, "loss": 1.5381, "step": 8896 }, { "epoch": 1.6027020941229453, "grad_norm": 1.5807453393936157, "learning_rate": 9.437901145622102e-06, "loss": 1.461, "step": 8897 }, { "epoch": 1.6028822337311417, "grad_norm": 1.515012502670288, "learning_rate": 9.429622379404308e-06, "loss": 1.1404, "step": 8898 }, { "epoch": 1.603062373339338, "grad_norm": 1.58295738697052, "learning_rate": 9.421346867722447e-06, "loss": 1.2525, "step": 8899 }, { "epoch": 1.6032425129475345, "grad_norm": 1.4897652864456177, "learning_rate": 9.413074611240374e-06, "loss": 1.2147, "step": 8900 }, { "epoch": 1.6034226525557307, "grad_norm": 1.432344675064087, "learning_rate": 9.404805610621687e-06, "loss": 1.2557, "step": 8901 }, { "epoch": 1.603602792163927, "grad_norm": 1.4904383420944214, "learning_rate": 9.396539866529718e-06, "loss": 2.1437, "step": 8902 }, { "epoch": 1.6037829317721233, "grad_norm": 1.2578002214431763, "learning_rate": 9.388277379627546e-06, "loss": 1.4787, "step": 8903 }, { "epoch": 1.6039630713803197, "grad_norm": 1.3708674907684326, "learning_rate": 9.380018150577975e-06, "loss": 1.8865, "step": 8904 }, { "epoch": 1.604143210988516, "grad_norm": 1.3499674797058105, "learning_rate": 9.37176218004357e-06, "loss": 1.6424, "step": 8905 }, { "epoch": 1.6043233505967125, "grad_norm": 1.3097155094146729, "learning_rate": 9.36350946868661e-06, "loss": 1.6283, "step": 8906 }, { "epoch": 1.604503490204909, "grad_norm": 1.6581426858901978, "learning_rate": 9.355260017169131e-06, "loss": 2.1028, "step": 8907 }, { "epoch": 1.6046836298131053, "grad_norm": 1.4766243696212769, "learning_rate": 9.347013826152895e-06, "loss": 1.7667, "step": 8908 }, { "epoch": 1.6048637694213015, "grad_norm": 1.5135400295257568, "learning_rate": 9.33877089629941e-06, "loss": 1.6915, "step": 8909 }, { "epoch": 1.605043909029498, "grad_norm": 1.5758635997772217, "learning_rate": 9.330531228269917e-06, "loss": 2.1053, "step": 8910 }, { "epoch": 1.605224048637694, "grad_norm": 2.0041654109954834, "learning_rate": 9.322294822725403e-06, "loss": 2.1725, "step": 8911 }, { "epoch": 1.6054041882458905, "grad_norm": 1.4589606523513794, "learning_rate": 9.31406168032658e-06, "loss": 1.6749, "step": 8912 }, { "epoch": 1.605584327854087, "grad_norm": 1.4244757890701294, "learning_rate": 9.3058318017339e-06, "loss": 1.6318, "step": 8913 }, { "epoch": 1.6057644674622833, "grad_norm": 1.3773863315582275, "learning_rate": 9.297605187607584e-06, "loss": 1.239, "step": 8914 }, { "epoch": 1.6059446070704797, "grad_norm": 1.3393505811691284, "learning_rate": 9.289381838607552e-06, "loss": 1.2184, "step": 8915 }, { "epoch": 1.606124746678676, "grad_norm": 1.317397117614746, "learning_rate": 9.281161755393486e-06, "loss": 1.1994, "step": 8916 }, { "epoch": 1.6063048862868723, "grad_norm": 1.5283825397491455, "learning_rate": 9.27294493862479e-06, "loss": 1.2776, "step": 8917 }, { "epoch": 1.6064850258950687, "grad_norm": 1.5297576189041138, "learning_rate": 9.264731388960595e-06, "loss": 1.478, "step": 8918 }, { "epoch": 1.6066651655032649, "grad_norm": 1.592772364616394, "learning_rate": 9.256521107059834e-06, "loss": 1.5737, "step": 8919 }, { "epoch": 1.6068453051114613, "grad_norm": 1.403741478919983, "learning_rate": 9.248314093581095e-06, "loss": 1.2837, "step": 8920 }, { "epoch": 1.6070254447196577, "grad_norm": 1.459051251411438, "learning_rate": 9.240110349182745e-06, "loss": 1.4449, "step": 8921 }, { "epoch": 1.607205584327854, "grad_norm": 1.3451504707336426, "learning_rate": 9.231909874522893e-06, "loss": 1.2766, "step": 8922 }, { "epoch": 1.6073857239360505, "grad_norm": 1.545535922050476, "learning_rate": 9.22371267025936e-06, "loss": 1.5564, "step": 8923 }, { "epoch": 1.607565863544247, "grad_norm": 1.5190519094467163, "learning_rate": 9.21551873704975e-06, "loss": 1.4685, "step": 8924 }, { "epoch": 1.607746003152443, "grad_norm": 1.5024069547653198, "learning_rate": 9.207328075551359e-06, "loss": 1.4392, "step": 8925 }, { "epoch": 1.6079261427606395, "grad_norm": 1.4219722747802734, "learning_rate": 9.199140686421237e-06, "loss": 1.4907, "step": 8926 }, { "epoch": 1.6081062823688357, "grad_norm": 1.3777090311050415, "learning_rate": 9.190956570316167e-06, "loss": 1.4123, "step": 8927 }, { "epoch": 1.608286421977032, "grad_norm": 1.3739651441574097, "learning_rate": 9.182775727892696e-06, "loss": 1.257, "step": 8928 }, { "epoch": 1.6084665615852285, "grad_norm": 1.3471442461013794, "learning_rate": 9.174598159807073e-06, "loss": 1.3525, "step": 8929 }, { "epoch": 1.608646701193425, "grad_norm": 1.5186573266983032, "learning_rate": 9.166423866715302e-06, "loss": 1.4414, "step": 8930 }, { "epoch": 1.6088268408016213, "grad_norm": 1.4564911127090454, "learning_rate": 9.15825284927312e-06, "loss": 1.3723, "step": 8931 }, { "epoch": 1.6090069804098177, "grad_norm": 1.4761847257614136, "learning_rate": 9.150085108136003e-06, "loss": 1.4438, "step": 8932 }, { "epoch": 1.6091871200180141, "grad_norm": 1.5565615892410278, "learning_rate": 9.141920643959156e-06, "loss": 1.5325, "step": 8933 }, { "epoch": 1.6093672596262103, "grad_norm": 1.4822925329208374, "learning_rate": 9.133759457397534e-06, "loss": 1.492, "step": 8934 }, { "epoch": 1.6095473992344067, "grad_norm": 1.4930733442306519, "learning_rate": 9.125601549105827e-06, "loss": 1.3302, "step": 8935 }, { "epoch": 1.609727538842603, "grad_norm": 1.7272508144378662, "learning_rate": 9.117446919738453e-06, "loss": 1.5707, "step": 8936 }, { "epoch": 1.6099076784507993, "grad_norm": 1.5761549472808838, "learning_rate": 9.10929556994956e-06, "loss": 1.3571, "step": 8937 }, { "epoch": 1.6100878180589957, "grad_norm": 1.6181126832962036, "learning_rate": 9.101147500393076e-06, "loss": 1.423, "step": 8938 }, { "epoch": 1.610267957667192, "grad_norm": 1.5674889087677002, "learning_rate": 9.093002711722615e-06, "loss": 1.4229, "step": 8939 }, { "epoch": 1.6104480972753885, "grad_norm": 1.5214717388153076, "learning_rate": 9.084861204591549e-06, "loss": 1.2723, "step": 8940 }, { "epoch": 1.610628236883585, "grad_norm": 1.5210275650024414, "learning_rate": 9.07672297965299e-06, "loss": 1.3522, "step": 8941 }, { "epoch": 1.610808376491781, "grad_norm": 1.4901388883590698, "learning_rate": 9.068588037559782e-06, "loss": 1.1276, "step": 8942 }, { "epoch": 1.6109885160999775, "grad_norm": 1.5613468885421753, "learning_rate": 9.060456378964504e-06, "loss": 1.2203, "step": 8943 }, { "epoch": 1.6111686557081737, "grad_norm": 1.7145702838897705, "learning_rate": 9.052328004519473e-06, "loss": 1.4383, "step": 8944 }, { "epoch": 1.61134879531637, "grad_norm": 1.5754945278167725, "learning_rate": 9.044202914876748e-06, "loss": 1.1826, "step": 8945 }, { "epoch": 1.6115289349245665, "grad_norm": 1.55842924118042, "learning_rate": 9.0360811106881e-06, "loss": 1.2462, "step": 8946 }, { "epoch": 1.611709074532763, "grad_norm": 1.5444365739822388, "learning_rate": 9.027962592605089e-06, "loss": 1.4904, "step": 8947 }, { "epoch": 1.6118892141409593, "grad_norm": 1.647307276725769, "learning_rate": 9.01984736127896e-06, "loss": 1.4093, "step": 8948 }, { "epoch": 1.6120693537491557, "grad_norm": 1.537060260772705, "learning_rate": 9.011735417360712e-06, "loss": 1.2332, "step": 8949 }, { "epoch": 1.612249493357352, "grad_norm": 1.8365521430969238, "learning_rate": 9.003626761501089e-06, "loss": 1.6036, "step": 8950 }, { "epoch": 1.6124296329655483, "grad_norm": 1.367088794708252, "learning_rate": 8.995521394350542e-06, "loss": 1.0866, "step": 8951 }, { "epoch": 1.6126097725737445, "grad_norm": 1.4737699031829834, "learning_rate": 8.987419316559326e-06, "loss": 1.9999, "step": 8952 }, { "epoch": 1.612789912181941, "grad_norm": 1.35165274143219, "learning_rate": 8.979320528777341e-06, "loss": 1.7983, "step": 8953 }, { "epoch": 1.6129700517901373, "grad_norm": 1.4746110439300537, "learning_rate": 8.971225031654278e-06, "loss": 1.7234, "step": 8954 }, { "epoch": 1.6131501913983337, "grad_norm": 1.5031001567840576, "learning_rate": 8.963132825839554e-06, "loss": 2.0904, "step": 8955 }, { "epoch": 1.6133303310065301, "grad_norm": 1.5136126279830933, "learning_rate": 8.95504391198233e-06, "loss": 1.8051, "step": 8956 }, { "epoch": 1.6135104706147265, "grad_norm": 1.6458314657211304, "learning_rate": 8.9469582907315e-06, "loss": 1.9362, "step": 8957 }, { "epoch": 1.613690610222923, "grad_norm": 1.4249788522720337, "learning_rate": 8.938875962735676e-06, "loss": 1.6812, "step": 8958 }, { "epoch": 1.6138707498311191, "grad_norm": 1.5732581615447998, "learning_rate": 8.93079692864322e-06, "loss": 1.8222, "step": 8959 }, { "epoch": 1.6140508894393155, "grad_norm": 1.5990383625030518, "learning_rate": 8.922721189102217e-06, "loss": 1.5322, "step": 8960 }, { "epoch": 1.6142310290475117, "grad_norm": 1.8646399974822998, "learning_rate": 8.914648744760528e-06, "loss": 2.2475, "step": 8961 }, { "epoch": 1.614411168655708, "grad_norm": 1.4296687841415405, "learning_rate": 8.906579596265707e-06, "loss": 1.5818, "step": 8962 }, { "epoch": 1.6145913082639045, "grad_norm": 1.4705822467803955, "learning_rate": 8.898513744265052e-06, "loss": 1.5749, "step": 8963 }, { "epoch": 1.614771447872101, "grad_norm": 1.3082259893417358, "learning_rate": 8.890451189405618e-06, "loss": 1.2823, "step": 8964 }, { "epoch": 1.6149515874802973, "grad_norm": 1.5380620956420898, "learning_rate": 8.882391932334139e-06, "loss": 1.4484, "step": 8965 }, { "epoch": 1.6151317270884937, "grad_norm": 1.3446846008300781, "learning_rate": 8.874335973697168e-06, "loss": 1.0838, "step": 8966 }, { "epoch": 1.61531186669669, "grad_norm": 1.515052080154419, "learning_rate": 8.866283314140927e-06, "loss": 1.5796, "step": 8967 }, { "epoch": 1.6154920063048863, "grad_norm": 1.3485757112503052, "learning_rate": 8.858233954311406e-06, "loss": 1.252, "step": 8968 }, { "epoch": 1.6156721459130825, "grad_norm": 1.3139350414276123, "learning_rate": 8.850187894854306e-06, "loss": 1.1304, "step": 8969 }, { "epoch": 1.615852285521279, "grad_norm": 1.3755433559417725, "learning_rate": 8.842145136415103e-06, "loss": 1.2409, "step": 8970 }, { "epoch": 1.6160324251294753, "grad_norm": 1.446356177330017, "learning_rate": 8.834105679638972e-06, "loss": 1.4494, "step": 8971 }, { "epoch": 1.6162125647376717, "grad_norm": 1.4663615226745605, "learning_rate": 8.826069525170827e-06, "loss": 1.4345, "step": 8972 }, { "epoch": 1.6163927043458681, "grad_norm": 1.426906943321228, "learning_rate": 8.818036673655333e-06, "loss": 1.3279, "step": 8973 }, { "epoch": 1.6165728439540645, "grad_norm": 1.5902485847473145, "learning_rate": 8.810007125736874e-06, "loss": 1.2541, "step": 8974 }, { "epoch": 1.6167529835622607, "grad_norm": 1.474242925643921, "learning_rate": 8.801980882059586e-06, "loss": 1.3267, "step": 8975 }, { "epoch": 1.6169331231704571, "grad_norm": 1.34149169921875, "learning_rate": 8.793957943267323e-06, "loss": 1.1325, "step": 8976 }, { "epoch": 1.6171132627786533, "grad_norm": 1.5356731414794922, "learning_rate": 8.785938310003683e-06, "loss": 1.5951, "step": 8977 }, { "epoch": 1.6172934023868497, "grad_norm": 1.5027077198028564, "learning_rate": 8.777921982911996e-06, "loss": 1.3397, "step": 8978 }, { "epoch": 1.6174735419950461, "grad_norm": 1.3907259702682495, "learning_rate": 8.769908962635315e-06, "loss": 1.1936, "step": 8979 }, { "epoch": 1.6176536816032425, "grad_norm": 1.6222527027130127, "learning_rate": 8.76189924981647e-06, "loss": 1.6759, "step": 8980 }, { "epoch": 1.617833821211439, "grad_norm": 1.7455780506134033, "learning_rate": 8.753892845097978e-06, "loss": 1.6083, "step": 8981 }, { "epoch": 1.6180139608196353, "grad_norm": 1.456418752670288, "learning_rate": 8.745889749122115e-06, "loss": 1.4658, "step": 8982 }, { "epoch": 1.6181941004278315, "grad_norm": 1.4400017261505127, "learning_rate": 8.737889962530865e-06, "loss": 1.4093, "step": 8983 }, { "epoch": 1.618374240036028, "grad_norm": 1.4574759006500244, "learning_rate": 8.729893485966012e-06, "loss": 1.4719, "step": 8984 }, { "epoch": 1.6185543796442243, "grad_norm": 1.6979870796203613, "learning_rate": 8.721900320068992e-06, "loss": 1.3265, "step": 8985 }, { "epoch": 1.6187345192524205, "grad_norm": 1.6142722368240356, "learning_rate": 8.713910465481017e-06, "loss": 1.5635, "step": 8986 }, { "epoch": 1.618914658860617, "grad_norm": 1.4823176860809326, "learning_rate": 8.70592392284304e-06, "loss": 1.3805, "step": 8987 }, { "epoch": 1.6190947984688133, "grad_norm": 1.5214725732803345, "learning_rate": 8.69794069279572e-06, "loss": 1.4628, "step": 8988 }, { "epoch": 1.6192749380770097, "grad_norm": 1.5264830589294434, "learning_rate": 8.689960775979489e-06, "loss": 1.4645, "step": 8989 }, { "epoch": 1.6194550776852061, "grad_norm": 1.5529534816741943, "learning_rate": 8.68198417303448e-06, "loss": 1.506, "step": 8990 }, { "epoch": 1.6196352172934025, "grad_norm": 1.557588815689087, "learning_rate": 8.67401088460058e-06, "loss": 1.4007, "step": 8991 }, { "epoch": 1.6198153569015987, "grad_norm": 1.444764494895935, "learning_rate": 8.666040911317397e-06, "loss": 1.3443, "step": 8992 }, { "epoch": 1.6199954965097951, "grad_norm": 1.6465357542037964, "learning_rate": 8.658074253824266e-06, "loss": 1.421, "step": 8993 }, { "epoch": 1.6201756361179913, "grad_norm": 1.5755488872528076, "learning_rate": 8.650110912760296e-06, "loss": 1.478, "step": 8994 }, { "epoch": 1.6203557757261877, "grad_norm": 1.746640682220459, "learning_rate": 8.642150888764289e-06, "loss": 1.7575, "step": 8995 }, { "epoch": 1.6205359153343841, "grad_norm": 1.4601953029632568, "learning_rate": 8.634194182474786e-06, "loss": 1.3062, "step": 8996 }, { "epoch": 1.6207160549425805, "grad_norm": 1.610750675201416, "learning_rate": 8.626240794530083e-06, "loss": 1.6286, "step": 8997 }, { "epoch": 1.620896194550777, "grad_norm": 1.4666494131088257, "learning_rate": 8.61829072556819e-06, "loss": 1.3697, "step": 8998 }, { "epoch": 1.6210763341589733, "grad_norm": 1.5435643196105957, "learning_rate": 8.610343976226858e-06, "loss": 1.4625, "step": 8999 }, { "epoch": 1.6212564737671695, "grad_norm": 1.5185898542404175, "learning_rate": 8.602400547143574e-06, "loss": 1.2593, "step": 9000 }, { "epoch": 1.621436613375366, "grad_norm": 1.493996024131775, "learning_rate": 8.594460438955548e-06, "loss": 1.1408, "step": 9001 }, { "epoch": 1.6216167529835621, "grad_norm": 1.2784415483474731, "learning_rate": 8.586523652299733e-06, "loss": 1.6844, "step": 9002 }, { "epoch": 1.6217968925917585, "grad_norm": 1.3722058534622192, "learning_rate": 8.57859018781283e-06, "loss": 1.712, "step": 9003 }, { "epoch": 1.621977032199955, "grad_norm": 1.3358838558197021, "learning_rate": 8.57066004613124e-06, "loss": 1.8055, "step": 9004 }, { "epoch": 1.6221571718081513, "grad_norm": 1.402470588684082, "learning_rate": 8.562733227891128e-06, "loss": 1.8546, "step": 9005 }, { "epoch": 1.6223373114163477, "grad_norm": 1.435928225517273, "learning_rate": 8.55480973372837e-06, "loss": 1.861, "step": 9006 }, { "epoch": 1.6225174510245441, "grad_norm": 1.5125575065612793, "learning_rate": 8.546889564278587e-06, "loss": 1.9096, "step": 9007 }, { "epoch": 1.6226975906327403, "grad_norm": 1.5165061950683594, "learning_rate": 8.538972720177135e-06, "loss": 1.6049, "step": 9008 }, { "epoch": 1.6228777302409367, "grad_norm": 1.7191143035888672, "learning_rate": 8.531059202059094e-06, "loss": 2.1135, "step": 9009 }, { "epoch": 1.623057869849133, "grad_norm": 1.563788652420044, "learning_rate": 8.52314901055929e-06, "loss": 1.7805, "step": 9010 }, { "epoch": 1.6232380094573293, "grad_norm": 1.6980916261672974, "learning_rate": 8.515242146312252e-06, "loss": 1.954, "step": 9011 }, { "epoch": 1.6234181490655257, "grad_norm": 1.6332604885101318, "learning_rate": 8.507338609952298e-06, "loss": 1.717, "step": 9012 }, { "epoch": 1.6235982886737221, "grad_norm": 1.4301986694335938, "learning_rate": 8.49943840211343e-06, "loss": 1.4415, "step": 9013 }, { "epoch": 1.6237784282819185, "grad_norm": 1.519019365310669, "learning_rate": 8.491541523429397e-06, "loss": 1.5076, "step": 9014 }, { "epoch": 1.623958567890115, "grad_norm": 1.4183235168457031, "learning_rate": 8.483647974533688e-06, "loss": 1.4486, "step": 9015 }, { "epoch": 1.6241387074983114, "grad_norm": 1.4618350267410278, "learning_rate": 8.4757577560595e-06, "loss": 1.4031, "step": 9016 }, { "epoch": 1.6243188471065075, "grad_norm": 1.3977642059326172, "learning_rate": 8.467870868639826e-06, "loss": 1.248, "step": 9017 }, { "epoch": 1.624498986714704, "grad_norm": 1.4015337228775024, "learning_rate": 8.459987312907308e-06, "loss": 1.3719, "step": 9018 }, { "epoch": 1.6246791263229001, "grad_norm": 1.4100414514541626, "learning_rate": 8.452107089494376e-06, "loss": 1.4763, "step": 9019 }, { "epoch": 1.6248592659310965, "grad_norm": 1.5313351154327393, "learning_rate": 8.444230199033166e-06, "loss": 1.6206, "step": 9020 }, { "epoch": 1.625039405539293, "grad_norm": 1.4793330430984497, "learning_rate": 8.436356642155573e-06, "loss": 1.3862, "step": 9021 }, { "epoch": 1.6252195451474893, "grad_norm": 1.4847387075424194, "learning_rate": 8.428486419493208e-06, "loss": 1.475, "step": 9022 }, { "epoch": 1.6253996847556857, "grad_norm": 1.28648042678833, "learning_rate": 8.42061953167742e-06, "loss": 1.2547, "step": 9023 }, { "epoch": 1.6255798243638822, "grad_norm": 1.4302302598953247, "learning_rate": 8.412755979339275e-06, "loss": 1.3175, "step": 9024 }, { "epoch": 1.6257599639720783, "grad_norm": 1.51126229763031, "learning_rate": 8.404895763109578e-06, "loss": 1.2884, "step": 9025 }, { "epoch": 1.6259401035802747, "grad_norm": 1.4226932525634766, "learning_rate": 8.397038883618896e-06, "loss": 1.2495, "step": 9026 }, { "epoch": 1.626120243188471, "grad_norm": 1.3277056217193604, "learning_rate": 8.38918534149749e-06, "loss": 1.2746, "step": 9027 }, { "epoch": 1.6263003827966673, "grad_norm": 1.2354819774627686, "learning_rate": 8.381335137375367e-06, "loss": 1.116, "step": 9028 }, { "epoch": 1.6264805224048637, "grad_norm": 1.5247491598129272, "learning_rate": 8.37348827188228e-06, "loss": 1.4898, "step": 9029 }, { "epoch": 1.6266606620130601, "grad_norm": 1.4630602598190308, "learning_rate": 8.365644745647666e-06, "loss": 1.5886, "step": 9030 }, { "epoch": 1.6268408016212565, "grad_norm": 1.4480348825454712, "learning_rate": 8.357804559300758e-06, "loss": 1.447, "step": 9031 }, { "epoch": 1.627020941229453, "grad_norm": 1.3790920972824097, "learning_rate": 8.349967713470491e-06, "loss": 1.2481, "step": 9032 }, { "epoch": 1.6272010808376491, "grad_norm": 1.5948432683944702, "learning_rate": 8.342134208785523e-06, "loss": 1.5668, "step": 9033 }, { "epoch": 1.6273812204458455, "grad_norm": 1.5799834728240967, "learning_rate": 8.334304045874247e-06, "loss": 1.407, "step": 9034 }, { "epoch": 1.6275613600540417, "grad_norm": 1.5855259895324707, "learning_rate": 8.326477225364821e-06, "loss": 1.5919, "step": 9035 }, { "epoch": 1.6277414996622381, "grad_norm": 1.464565634727478, "learning_rate": 8.318653747885092e-06, "loss": 1.2988, "step": 9036 }, { "epoch": 1.6279216392704345, "grad_norm": 1.5801305770874023, "learning_rate": 8.310833614062651e-06, "loss": 1.4633, "step": 9037 }, { "epoch": 1.628101778878631, "grad_norm": 1.6051896810531616, "learning_rate": 8.30301682452484e-06, "loss": 1.4006, "step": 9038 }, { "epoch": 1.6282819184868274, "grad_norm": 1.6969918012619019, "learning_rate": 8.295203379898703e-06, "loss": 1.539, "step": 9039 }, { "epoch": 1.6284620580950238, "grad_norm": 1.599530816078186, "learning_rate": 8.28739328081104e-06, "loss": 1.4317, "step": 9040 }, { "epoch": 1.62864219770322, "grad_norm": 1.5828198194503784, "learning_rate": 8.279586527888373e-06, "loss": 1.3214, "step": 9041 }, { "epoch": 1.6288223373114163, "grad_norm": 1.5459054708480835, "learning_rate": 8.271783121756949e-06, "loss": 1.4288, "step": 9042 }, { "epoch": 1.6290024769196128, "grad_norm": 1.5309839248657227, "learning_rate": 8.263983063042757e-06, "loss": 1.4071, "step": 9043 }, { "epoch": 1.629182616527809, "grad_norm": 1.6980780363082886, "learning_rate": 8.256186352371509e-06, "loss": 1.3543, "step": 9044 }, { "epoch": 1.6293627561360053, "grad_norm": 1.4955205917358398, "learning_rate": 8.24839299036867e-06, "loss": 1.325, "step": 9045 }, { "epoch": 1.6295428957442017, "grad_norm": 1.580418348312378, "learning_rate": 8.240602977659406e-06, "loss": 1.334, "step": 9046 }, { "epoch": 1.6297230353523982, "grad_norm": 1.582728385925293, "learning_rate": 8.23281631486863e-06, "loss": 1.5868, "step": 9047 }, { "epoch": 1.6299031749605946, "grad_norm": 1.5436362028121948, "learning_rate": 8.225033002620974e-06, "loss": 1.2998, "step": 9048 }, { "epoch": 1.630083314568791, "grad_norm": 1.5646473169326782, "learning_rate": 8.217253041540845e-06, "loss": 1.3856, "step": 9049 }, { "epoch": 1.6302634541769871, "grad_norm": 1.6701630353927612, "learning_rate": 8.209476432252317e-06, "loss": 1.471, "step": 9050 }, { "epoch": 1.6304435937851836, "grad_norm": 1.4872112274169922, "learning_rate": 8.201703175379234e-06, "loss": 1.1637, "step": 9051 }, { "epoch": 1.6306237333933797, "grad_norm": 1.4328562021255493, "learning_rate": 8.193933271545163e-06, "loss": 1.783, "step": 9052 }, { "epoch": 1.6308038730015761, "grad_norm": 1.2777864933013916, "learning_rate": 8.18616672137339e-06, "loss": 1.631, "step": 9053 }, { "epoch": 1.6309840126097725, "grad_norm": 1.324245810508728, "learning_rate": 8.178403525486966e-06, "loss": 1.6477, "step": 9054 }, { "epoch": 1.631164152217969, "grad_norm": 1.5102808475494385, "learning_rate": 8.170643684508638e-06, "loss": 2.1058, "step": 9055 }, { "epoch": 1.6313442918261654, "grad_norm": 1.4648336172103882, "learning_rate": 8.162887199060903e-06, "loss": 1.8486, "step": 9056 }, { "epoch": 1.6315244314343618, "grad_norm": 1.5705616474151611, "learning_rate": 8.15513406976598e-06, "loss": 1.9165, "step": 9057 }, { "epoch": 1.631704571042558, "grad_norm": 1.4184564352035522, "learning_rate": 8.147384297245803e-06, "loss": 1.7502, "step": 9058 }, { "epoch": 1.6318847106507544, "grad_norm": 1.6010007858276367, "learning_rate": 8.139637882122087e-06, "loss": 1.8379, "step": 9059 }, { "epoch": 1.6320648502589505, "grad_norm": 1.7304044961929321, "learning_rate": 8.131894825016234e-06, "loss": 1.7166, "step": 9060 }, { "epoch": 1.632244989867147, "grad_norm": 1.8300931453704834, "learning_rate": 8.124155126549393e-06, "loss": 1.8421, "step": 9061 }, { "epoch": 1.6324251294753434, "grad_norm": 1.4604002237319946, "learning_rate": 8.116418787342405e-06, "loss": 1.5491, "step": 9062 }, { "epoch": 1.6326052690835398, "grad_norm": 1.4102141857147217, "learning_rate": 8.10868580801592e-06, "loss": 1.5145, "step": 9063 }, { "epoch": 1.6327854086917362, "grad_norm": 1.4036797285079956, "learning_rate": 8.10095618919025e-06, "loss": 1.3797, "step": 9064 }, { "epoch": 1.6329655482999326, "grad_norm": 1.4088903665542603, "learning_rate": 8.093229931485463e-06, "loss": 1.4173, "step": 9065 }, { "epoch": 1.6331456879081288, "grad_norm": 1.513932704925537, "learning_rate": 8.085507035521367e-06, "loss": 1.3483, "step": 9066 }, { "epoch": 1.6333258275163252, "grad_norm": 1.5356560945510864, "learning_rate": 8.07778750191746e-06, "loss": 1.4576, "step": 9067 }, { "epoch": 1.6335059671245213, "grad_norm": 1.4211124181747437, "learning_rate": 8.070071331293039e-06, "loss": 1.3563, "step": 9068 }, { "epoch": 1.6336861067327177, "grad_norm": 1.4256093502044678, "learning_rate": 8.062358524267071e-06, "loss": 1.5777, "step": 9069 }, { "epoch": 1.6338662463409142, "grad_norm": 1.4369701147079468, "learning_rate": 8.05464908145827e-06, "loss": 1.3761, "step": 9070 }, { "epoch": 1.6340463859491106, "grad_norm": 1.608688235282898, "learning_rate": 8.046943003485097e-06, "loss": 1.5038, "step": 9071 }, { "epoch": 1.634226525557307, "grad_norm": 1.4133819341659546, "learning_rate": 8.039240290965716e-06, "loss": 1.2728, "step": 9072 }, { "epoch": 1.6344066651655034, "grad_norm": 1.513113260269165, "learning_rate": 8.031540944518046e-06, "loss": 1.4137, "step": 9073 }, { "epoch": 1.6345868047736998, "grad_norm": 1.3872692584991455, "learning_rate": 8.023844964759713e-06, "loss": 1.3946, "step": 9074 }, { "epoch": 1.634766944381896, "grad_norm": 1.3328146934509277, "learning_rate": 8.016152352308093e-06, "loss": 1.2578, "step": 9075 }, { "epoch": 1.6349470839900924, "grad_norm": 1.6357322931289673, "learning_rate": 8.008463107780274e-06, "loss": 1.6611, "step": 9076 }, { "epoch": 1.6351272235982885, "grad_norm": 1.5515234470367432, "learning_rate": 8.000777231793094e-06, "loss": 1.2876, "step": 9077 }, { "epoch": 1.635307363206485, "grad_norm": 1.6367772817611694, "learning_rate": 7.993094724963113e-06, "loss": 1.6553, "step": 9078 }, { "epoch": 1.6354875028146814, "grad_norm": 1.5051878690719604, "learning_rate": 7.985415587906615e-06, "loss": 1.449, "step": 9079 }, { "epoch": 1.6356676424228778, "grad_norm": 1.4763249158859253, "learning_rate": 7.97773982123961e-06, "loss": 1.3077, "step": 9080 }, { "epoch": 1.6358477820310742, "grad_norm": 1.245093822479248, "learning_rate": 7.970067425577848e-06, "loss": 1.1179, "step": 9081 }, { "epoch": 1.6360279216392706, "grad_norm": 1.5718320608139038, "learning_rate": 7.962398401536809e-06, "loss": 1.4774, "step": 9082 }, { "epoch": 1.6362080612474668, "grad_norm": 1.5183449983596802, "learning_rate": 7.954732749731692e-06, "loss": 1.5541, "step": 9083 }, { "epoch": 1.6363882008556632, "grad_norm": 1.3619437217712402, "learning_rate": 7.94707047077743e-06, "loss": 1.2706, "step": 9084 }, { "epoch": 1.6365683404638594, "grad_norm": 1.5544508695602417, "learning_rate": 7.939411565288695e-06, "loss": 1.5961, "step": 9085 }, { "epoch": 1.6367484800720558, "grad_norm": 1.4584760665893555, "learning_rate": 7.931756033879867e-06, "loss": 1.3377, "step": 9086 }, { "epoch": 1.6369286196802522, "grad_norm": 1.5793532133102417, "learning_rate": 7.92410387716509e-06, "loss": 1.3234, "step": 9087 }, { "epoch": 1.6371087592884486, "grad_norm": 1.4982151985168457, "learning_rate": 7.916455095758208e-06, "loss": 1.1766, "step": 9088 }, { "epoch": 1.637288898896645, "grad_norm": 1.5931265354156494, "learning_rate": 7.908809690272794e-06, "loss": 1.4055, "step": 9089 }, { "epoch": 1.6374690385048414, "grad_norm": 1.537518858909607, "learning_rate": 7.901167661322162e-06, "loss": 1.4299, "step": 9090 }, { "epoch": 1.6376491781130376, "grad_norm": 1.6215969324111938, "learning_rate": 7.893529009519357e-06, "loss": 1.6207, "step": 9091 }, { "epoch": 1.637829317721234, "grad_norm": 1.5307815074920654, "learning_rate": 7.88589373547715e-06, "loss": 1.399, "step": 9092 }, { "epoch": 1.6380094573294302, "grad_norm": 1.554375410079956, "learning_rate": 7.878261839808038e-06, "loss": 1.4142, "step": 9093 }, { "epoch": 1.6381895969376266, "grad_norm": 1.5103033781051636, "learning_rate": 7.870633323124249e-06, "loss": 1.2846, "step": 9094 }, { "epoch": 1.638369736545823, "grad_norm": 1.5561450719833374, "learning_rate": 7.863008186037713e-06, "loss": 1.2516, "step": 9095 }, { "epoch": 1.6385498761540194, "grad_norm": 1.5411745309829712, "learning_rate": 7.85538642916015e-06, "loss": 1.3571, "step": 9096 }, { "epoch": 1.6387300157622158, "grad_norm": 1.5924540758132935, "learning_rate": 7.84776805310296e-06, "loss": 1.5043, "step": 9097 }, { "epoch": 1.6389101553704122, "grad_norm": 1.6135451793670654, "learning_rate": 7.840153058477284e-06, "loss": 1.3234, "step": 9098 }, { "epoch": 1.6390902949786086, "grad_norm": 1.5516250133514404, "learning_rate": 7.832541445893994e-06, "loss": 1.3139, "step": 9099 }, { "epoch": 1.6392704345868048, "grad_norm": 1.586106538772583, "learning_rate": 7.824933215963676e-06, "loss": 1.2976, "step": 9100 }, { "epoch": 1.6394505741950012, "grad_norm": 1.8480298519134521, "learning_rate": 7.817328369296689e-06, "loss": 1.3535, "step": 9101 }, { "epoch": 1.6396307138031974, "grad_norm": 1.4122203588485718, "learning_rate": 7.809726906503073e-06, "loss": 1.8084, "step": 9102 }, { "epoch": 1.6398108534113938, "grad_norm": 1.3309080600738525, "learning_rate": 7.802128828192611e-06, "loss": 1.6608, "step": 9103 }, { "epoch": 1.6399909930195902, "grad_norm": 1.4233009815216064, "learning_rate": 7.794534134974823e-06, "loss": 1.8228, "step": 9104 }, { "epoch": 1.6401711326277866, "grad_norm": 1.4729584455490112, "learning_rate": 7.786942827458949e-06, "loss": 1.8481, "step": 9105 }, { "epoch": 1.640351272235983, "grad_norm": 1.4456026554107666, "learning_rate": 7.77935490625396e-06, "loss": 1.7278, "step": 9106 }, { "epoch": 1.6405314118441794, "grad_norm": 1.4141380786895752, "learning_rate": 7.771770371968556e-06, "loss": 1.4236, "step": 9107 }, { "epoch": 1.6407115514523756, "grad_norm": 1.4544466733932495, "learning_rate": 7.764189225211166e-06, "loss": 1.5494, "step": 9108 }, { "epoch": 1.640891691060572, "grad_norm": 1.5620163679122925, "learning_rate": 7.75661146658993e-06, "loss": 1.7708, "step": 9109 }, { "epoch": 1.6410718306687682, "grad_norm": 1.7449952363967896, "learning_rate": 7.749037096712758e-06, "loss": 2.2241, "step": 9110 }, { "epoch": 1.6412519702769646, "grad_norm": 1.486367106437683, "learning_rate": 7.741466116187246e-06, "loss": 1.5255, "step": 9111 }, { "epoch": 1.641432109885161, "grad_norm": 1.333660364151001, "learning_rate": 7.733898525620747e-06, "loss": 1.1848, "step": 9112 }, { "epoch": 1.6416122494933574, "grad_norm": 1.441994309425354, "learning_rate": 7.726334325620304e-06, "loss": 1.5103, "step": 9113 }, { "epoch": 1.6417923891015538, "grad_norm": 1.3525357246398926, "learning_rate": 7.718773516792754e-06, "loss": 1.3706, "step": 9114 }, { "epoch": 1.6419725287097502, "grad_norm": 1.3716084957122803, "learning_rate": 7.71121609974459e-06, "loss": 1.3065, "step": 9115 }, { "epoch": 1.6421526683179464, "grad_norm": 1.2797802686691284, "learning_rate": 7.70366207508207e-06, "loss": 1.2084, "step": 9116 }, { "epoch": 1.6423328079261428, "grad_norm": 1.4668563604354858, "learning_rate": 7.696111443411174e-06, "loss": 1.6296, "step": 9117 }, { "epoch": 1.642512947534339, "grad_norm": 1.3905410766601562, "learning_rate": 7.688564205337606e-06, "loss": 1.3526, "step": 9118 }, { "epoch": 1.6426930871425354, "grad_norm": 1.450563907623291, "learning_rate": 7.681020361466817e-06, "loss": 1.4415, "step": 9119 }, { "epoch": 1.6428732267507318, "grad_norm": 1.4846205711364746, "learning_rate": 7.673479912403963e-06, "loss": 1.4199, "step": 9120 }, { "epoch": 1.6430533663589282, "grad_norm": 1.2510420083999634, "learning_rate": 7.665942858753932e-06, "loss": 1.2963, "step": 9121 }, { "epoch": 1.6432335059671246, "grad_norm": 1.455659031867981, "learning_rate": 7.658409201121347e-06, "loss": 1.4012, "step": 9122 }, { "epoch": 1.643413645575321, "grad_norm": 1.512078881263733, "learning_rate": 7.65087894011054e-06, "loss": 1.478, "step": 9123 }, { "epoch": 1.6435937851835172, "grad_norm": 1.5768029689788818, "learning_rate": 7.643352076325604e-06, "loss": 1.4387, "step": 9124 }, { "epoch": 1.6437739247917136, "grad_norm": 1.46713125705719, "learning_rate": 7.635828610370338e-06, "loss": 1.2989, "step": 9125 }, { "epoch": 1.6439540643999098, "grad_norm": 1.411866545677185, "learning_rate": 7.628308542848278e-06, "loss": 1.4271, "step": 9126 }, { "epoch": 1.6441342040081062, "grad_norm": 1.4002352952957153, "learning_rate": 7.620791874362643e-06, "loss": 1.278, "step": 9127 }, { "epoch": 1.6443143436163026, "grad_norm": 1.4821597337722778, "learning_rate": 7.613278605516455e-06, "loss": 1.3851, "step": 9128 }, { "epoch": 1.644494483224499, "grad_norm": 1.4351940155029297, "learning_rate": 7.6057687369124105e-06, "loss": 1.3596, "step": 9129 }, { "epoch": 1.6446746228326954, "grad_norm": 1.3682835102081299, "learning_rate": 7.59826226915295e-06, "loss": 1.1133, "step": 9130 }, { "epoch": 1.6448547624408918, "grad_norm": 1.66762113571167, "learning_rate": 7.590759202840237e-06, "loss": 1.678, "step": 9131 }, { "epoch": 1.6450349020490882, "grad_norm": 1.4181160926818848, "learning_rate": 7.583259538576149e-06, "loss": 1.2646, "step": 9132 }, { "epoch": 1.6452150416572844, "grad_norm": 1.5533076524734497, "learning_rate": 7.575763276962333e-06, "loss": 1.3039, "step": 9133 }, { "epoch": 1.6453951812654808, "grad_norm": 1.5733705759048462, "learning_rate": 7.568270418600126e-06, "loss": 1.3135, "step": 9134 }, { "epoch": 1.645575320873677, "grad_norm": 1.5810977220535278, "learning_rate": 7.5607809640905915e-06, "loss": 1.6116, "step": 9135 }, { "epoch": 1.6457554604818734, "grad_norm": 1.513537049293518, "learning_rate": 7.553294914034542e-06, "loss": 1.3003, "step": 9136 }, { "epoch": 1.6459356000900698, "grad_norm": 1.4928573369979858, "learning_rate": 7.545812269032498e-06, "loss": 1.199, "step": 9137 }, { "epoch": 1.6461157396982662, "grad_norm": 1.3583133220672607, "learning_rate": 7.538333029684719e-06, "loss": 1.1891, "step": 9138 }, { "epoch": 1.6462958793064626, "grad_norm": 1.3966648578643799, "learning_rate": 7.530857196591179e-06, "loss": 1.2593, "step": 9139 }, { "epoch": 1.646476018914659, "grad_norm": 1.447985053062439, "learning_rate": 7.523384770351588e-06, "loss": 1.3002, "step": 9140 }, { "epoch": 1.6466561585228552, "grad_norm": 1.4398032426834106, "learning_rate": 7.515915751565367e-06, "loss": 1.3891, "step": 9141 }, { "epoch": 1.6468362981310516, "grad_norm": 1.6720916032791138, "learning_rate": 7.508450140831708e-06, "loss": 1.4786, "step": 9142 }, { "epoch": 1.6470164377392478, "grad_norm": 1.5649749040603638, "learning_rate": 7.500987938749477e-06, "loss": 1.5055, "step": 9143 }, { "epoch": 1.6471965773474442, "grad_norm": 1.571779727935791, "learning_rate": 7.493529145917294e-06, "loss": 1.4743, "step": 9144 }, { "epoch": 1.6473767169556406, "grad_norm": 1.3726614713668823, "learning_rate": 7.4860737629335045e-06, "loss": 1.134, "step": 9145 }, { "epoch": 1.647556856563837, "grad_norm": 1.498897910118103, "learning_rate": 7.478621790396162e-06, "loss": 1.2898, "step": 9146 }, { "epoch": 1.6477369961720334, "grad_norm": 1.6689149141311646, "learning_rate": 7.471173228903072e-06, "loss": 1.6573, "step": 9147 }, { "epoch": 1.6479171357802298, "grad_norm": 1.5871111154556274, "learning_rate": 7.463728079051751e-06, "loss": 1.2576, "step": 9148 }, { "epoch": 1.648097275388426, "grad_norm": 1.6523500680923462, "learning_rate": 7.4562863414394445e-06, "loss": 1.3293, "step": 9149 }, { "epoch": 1.6482774149966224, "grad_norm": 1.5012227296829224, "learning_rate": 7.448848016663124e-06, "loss": 1.4425, "step": 9150 }, { "epoch": 1.6484575546048186, "grad_norm": 1.4702601432800293, "learning_rate": 7.441413105319478e-06, "loss": 1.0992, "step": 9151 }, { "epoch": 1.648637694213015, "grad_norm": 1.3958467245101929, "learning_rate": 7.433981608004958e-06, "loss": 2.1669, "step": 9152 }, { "epoch": 1.6488178338212114, "grad_norm": 1.3880884647369385, "learning_rate": 7.426553525315699e-06, "loss": 1.8119, "step": 9153 }, { "epoch": 1.6489979734294078, "grad_norm": 1.3903489112854004, "learning_rate": 7.419128857847574e-06, "loss": 1.6083, "step": 9154 }, { "epoch": 1.6491781130376042, "grad_norm": 1.4274431467056274, "learning_rate": 7.4117076061961885e-06, "loss": 1.6551, "step": 9155 }, { "epoch": 1.6493582526458006, "grad_norm": 1.4004285335540771, "learning_rate": 7.404289770956879e-06, "loss": 1.5025, "step": 9156 }, { "epoch": 1.649538392253997, "grad_norm": 1.47087824344635, "learning_rate": 7.3968753527247026e-06, "loss": 1.7137, "step": 9157 }, { "epoch": 1.6497185318621932, "grad_norm": 1.6097174882888794, "learning_rate": 7.389464352094428e-06, "loss": 1.7584, "step": 9158 }, { "epoch": 1.6498986714703896, "grad_norm": 1.7657599449157715, "learning_rate": 7.382056769660583e-06, "loss": 2.2776, "step": 9159 }, { "epoch": 1.6500788110785858, "grad_norm": 1.9096273183822632, "learning_rate": 7.374652606017363e-06, "loss": 2.0394, "step": 9160 }, { "epoch": 1.6502589506867822, "grad_norm": 1.5374268293380737, "learning_rate": 7.3672518617587615e-06, "loss": 1.5644, "step": 9161 }, { "epoch": 1.6504390902949786, "grad_norm": 1.4642930030822754, "learning_rate": 7.35985453747845e-06, "loss": 1.5732, "step": 9162 }, { "epoch": 1.650619229903175, "grad_norm": 1.4503756761550903, "learning_rate": 7.352460633769836e-06, "loss": 1.6083, "step": 9163 }, { "epoch": 1.6507993695113714, "grad_norm": 1.3482158184051514, "learning_rate": 7.345070151226058e-06, "loss": 1.3422, "step": 9164 }, { "epoch": 1.6509795091195678, "grad_norm": 1.4234977960586548, "learning_rate": 7.337683090439962e-06, "loss": 1.3128, "step": 9165 }, { "epoch": 1.651159648727764, "grad_norm": 1.351873755455017, "learning_rate": 7.330299452004163e-06, "loss": 1.4488, "step": 9166 }, { "epoch": 1.6513397883359604, "grad_norm": 1.4494565725326538, "learning_rate": 7.322919236510955e-06, "loss": 1.3232, "step": 9167 }, { "epoch": 1.6515199279441566, "grad_norm": 1.4370148181915283, "learning_rate": 7.315542444552381e-06, "loss": 1.4195, "step": 9168 }, { "epoch": 1.651700067552353, "grad_norm": 1.5724514722824097, "learning_rate": 7.308169076720201e-06, "loss": 1.5587, "step": 9169 }, { "epoch": 1.6518802071605494, "grad_norm": 1.5104765892028809, "learning_rate": 7.300799133605907e-06, "loss": 1.4954, "step": 9170 }, { "epoch": 1.6520603467687458, "grad_norm": 1.5433485507965088, "learning_rate": 7.293432615800705e-06, "loss": 1.6514, "step": 9171 }, { "epoch": 1.6522404863769422, "grad_norm": 1.3862031698226929, "learning_rate": 7.286069523895533e-06, "loss": 1.3225, "step": 9172 }, { "epoch": 1.6524206259851386, "grad_norm": 1.4417142868041992, "learning_rate": 7.278709858481064e-06, "loss": 1.4194, "step": 9173 }, { "epoch": 1.6526007655933348, "grad_norm": 1.4928771257400513, "learning_rate": 7.2713536201476695e-06, "loss": 1.3339, "step": 9174 }, { "epoch": 1.6527809052015312, "grad_norm": 1.5620970726013184, "learning_rate": 7.264000809485483e-06, "loss": 1.7744, "step": 9175 }, { "epoch": 1.6529610448097274, "grad_norm": 1.2913447618484497, "learning_rate": 7.256651427084338e-06, "loss": 1.1589, "step": 9176 }, { "epoch": 1.6531411844179238, "grad_norm": 1.6886887550354004, "learning_rate": 7.249305473533796e-06, "loss": 1.4549, "step": 9177 }, { "epoch": 1.6533213240261202, "grad_norm": 1.4848970174789429, "learning_rate": 7.241962949423148e-06, "loss": 1.5128, "step": 9178 }, { "epoch": 1.6535014636343166, "grad_norm": 1.5131266117095947, "learning_rate": 7.234623855341405e-06, "loss": 1.5067, "step": 9179 }, { "epoch": 1.653681603242513, "grad_norm": 1.5141117572784424, "learning_rate": 7.227288191877302e-06, "loss": 1.4605, "step": 9180 }, { "epoch": 1.6538617428507094, "grad_norm": 1.4768322706222534, "learning_rate": 7.219955959619307e-06, "loss": 1.3815, "step": 9181 }, { "epoch": 1.6540418824589056, "grad_norm": 1.3588227033615112, "learning_rate": 7.21262715915561e-06, "loss": 1.3051, "step": 9182 }, { "epoch": 1.654222022067102, "grad_norm": 1.657265067100525, "learning_rate": 7.205301791074109e-06, "loss": 1.6501, "step": 9183 }, { "epoch": 1.6544021616752984, "grad_norm": 1.4423168897628784, "learning_rate": 7.197979855962466e-06, "loss": 1.2638, "step": 9184 }, { "epoch": 1.6545823012834946, "grad_norm": 1.5935659408569336, "learning_rate": 7.190661354408029e-06, "loss": 1.4209, "step": 9185 }, { "epoch": 1.654762440891691, "grad_norm": 1.3804147243499756, "learning_rate": 7.1833462869978885e-06, "loss": 1.015, "step": 9186 }, { "epoch": 1.6549425804998874, "grad_norm": 1.4230194091796875, "learning_rate": 7.176034654318853e-06, "loss": 1.497, "step": 9187 }, { "epoch": 1.6551227201080838, "grad_norm": 1.480778694152832, "learning_rate": 7.168726456957448e-06, "loss": 1.286, "step": 9188 }, { "epoch": 1.6553028597162802, "grad_norm": 1.4998599290847778, "learning_rate": 7.161421695499954e-06, "loss": 1.2958, "step": 9189 }, { "epoch": 1.6554829993244766, "grad_norm": 1.3281853199005127, "learning_rate": 7.1541203705323465e-06, "loss": 1.101, "step": 9190 }, { "epoch": 1.6556631389326728, "grad_norm": 1.5923738479614258, "learning_rate": 7.1468224826403475e-06, "loss": 1.4158, "step": 9191 }, { "epoch": 1.6558432785408692, "grad_norm": 1.5447237491607666, "learning_rate": 7.139528032409359e-06, "loss": 1.5122, "step": 9192 }, { "epoch": 1.6560234181490654, "grad_norm": 1.6370972394943237, "learning_rate": 7.132237020424548e-06, "loss": 1.7458, "step": 9193 }, { "epoch": 1.6562035577572618, "grad_norm": 1.373033046722412, "learning_rate": 7.124949447270812e-06, "loss": 1.2028, "step": 9194 }, { "epoch": 1.6563836973654582, "grad_norm": 1.6207705736160278, "learning_rate": 7.117665313532751e-06, "loss": 1.442, "step": 9195 }, { "epoch": 1.6565638369736546, "grad_norm": 1.4296326637268066, "learning_rate": 7.110384619794685e-06, "loss": 1.0397, "step": 9196 }, { "epoch": 1.656743976581851, "grad_norm": 1.5252666473388672, "learning_rate": 7.103107366640671e-06, "loss": 1.1754, "step": 9197 }, { "epoch": 1.6569241161900474, "grad_norm": 1.6323976516723633, "learning_rate": 7.095833554654497e-06, "loss": 1.2867, "step": 9198 }, { "epoch": 1.6571042557982436, "grad_norm": 1.6161121129989624, "learning_rate": 7.088563184419655e-06, "loss": 1.2683, "step": 9199 }, { "epoch": 1.65728439540644, "grad_norm": 1.4537461996078491, "learning_rate": 7.081296256519376e-06, "loss": 1.1056, "step": 9200 }, { "epoch": 1.6574645350146362, "grad_norm": 1.4482789039611816, "learning_rate": 7.074032771536604e-06, "loss": 1.0953, "step": 9201 }, { "epoch": 1.6576446746228326, "grad_norm": 1.3574049472808838, "learning_rate": 7.066772730054017e-06, "loss": 1.6399, "step": 9202 }, { "epoch": 1.657824814231029, "grad_norm": 1.353438377380371, "learning_rate": 7.059516132654009e-06, "loss": 1.9004, "step": 9203 }, { "epoch": 1.6580049538392254, "grad_norm": 1.368770956993103, "learning_rate": 7.0522629799187e-06, "loss": 1.9645, "step": 9204 }, { "epoch": 1.6581850934474218, "grad_norm": 1.4041920900344849, "learning_rate": 7.045013272429935e-06, "loss": 1.7341, "step": 9205 }, { "epoch": 1.6583652330556182, "grad_norm": 1.4023265838623047, "learning_rate": 7.0377670107692865e-06, "loss": 1.6694, "step": 9206 }, { "epoch": 1.6585453726638144, "grad_norm": 1.4291603565216064, "learning_rate": 7.030524195518024e-06, "loss": 1.6597, "step": 9207 }, { "epoch": 1.6587255122720108, "grad_norm": 1.6233329772949219, "learning_rate": 7.0232848272571985e-06, "loss": 1.9106, "step": 9208 }, { "epoch": 1.658905651880207, "grad_norm": 1.3920167684555054, "learning_rate": 7.0160489065675294e-06, "loss": 1.4505, "step": 9209 }, { "epoch": 1.6590857914884034, "grad_norm": 1.6637388467788696, "learning_rate": 7.008816434029481e-06, "loss": 1.8336, "step": 9210 }, { "epoch": 1.6592659310965998, "grad_norm": 1.675485610961914, "learning_rate": 7.001587410223237e-06, "loss": 1.6661, "step": 9211 }, { "epoch": 1.6594460707047962, "grad_norm": 1.5309358835220337, "learning_rate": 6.994361835728708e-06, "loss": 1.4504, "step": 9212 }, { "epoch": 1.6596262103129926, "grad_norm": 1.3489881753921509, "learning_rate": 6.987139711125524e-06, "loss": 1.3182, "step": 9213 }, { "epoch": 1.659806349921189, "grad_norm": 1.4475346803665161, "learning_rate": 6.979921036993042e-06, "loss": 1.4292, "step": 9214 }, { "epoch": 1.6599864895293854, "grad_norm": 1.2777899503707886, "learning_rate": 6.972705813910341e-06, "loss": 1.1309, "step": 9215 }, { "epoch": 1.6601666291375816, "grad_norm": 1.556312918663025, "learning_rate": 6.965494042456205e-06, "loss": 1.654, "step": 9216 }, { "epoch": 1.660346768745778, "grad_norm": 1.3114372491836548, "learning_rate": 6.958285723209196e-06, "loss": 1.396, "step": 9217 }, { "epoch": 1.6605269083539742, "grad_norm": 1.3989691734313965, "learning_rate": 6.951080856747533e-06, "loss": 1.1988, "step": 9218 }, { "epoch": 1.6607070479621706, "grad_norm": 1.2972607612609863, "learning_rate": 6.943879443649198e-06, "loss": 1.1605, "step": 9219 }, { "epoch": 1.660887187570367, "grad_norm": 1.4657961130142212, "learning_rate": 6.936681484491875e-06, "loss": 1.2247, "step": 9220 }, { "epoch": 1.6610673271785634, "grad_norm": 1.2845828533172607, "learning_rate": 6.929486979852995e-06, "loss": 1.0902, "step": 9221 }, { "epoch": 1.6612474667867598, "grad_norm": 1.448779821395874, "learning_rate": 6.922295930309691e-06, "loss": 1.4701, "step": 9222 }, { "epoch": 1.6614276063949562, "grad_norm": 1.4207104444503784, "learning_rate": 6.915108336438836e-06, "loss": 1.3069, "step": 9223 }, { "epoch": 1.6616077460031524, "grad_norm": 1.3946537971496582, "learning_rate": 6.907924198816995e-06, "loss": 1.2854, "step": 9224 }, { "epoch": 1.6617878856113488, "grad_norm": 1.581763744354248, "learning_rate": 6.900743518020475e-06, "loss": 1.5648, "step": 9225 }, { "epoch": 1.661968025219545, "grad_norm": 1.4212431907653809, "learning_rate": 6.893566294625326e-06, "loss": 1.243, "step": 9226 }, { "epoch": 1.6621481648277414, "grad_norm": 1.6154202222824097, "learning_rate": 6.886392529207292e-06, "loss": 1.438, "step": 9227 }, { "epoch": 1.6623283044359378, "grad_norm": 1.4189852476119995, "learning_rate": 6.879222222341852e-06, "loss": 1.4055, "step": 9228 }, { "epoch": 1.6625084440441342, "grad_norm": 1.5226151943206787, "learning_rate": 6.872055374604197e-06, "loss": 1.6372, "step": 9229 }, { "epoch": 1.6626885836523306, "grad_norm": 1.573737621307373, "learning_rate": 6.86489198656925e-06, "loss": 1.4532, "step": 9230 }, { "epoch": 1.662868723260527, "grad_norm": 1.535461664199829, "learning_rate": 6.857732058811661e-06, "loss": 1.3273, "step": 9231 }, { "epoch": 1.6630488628687232, "grad_norm": 1.455527663230896, "learning_rate": 6.850575591905794e-06, "loss": 1.4525, "step": 9232 }, { "epoch": 1.6632290024769196, "grad_norm": 1.6086820363998413, "learning_rate": 6.843422586425741e-06, "loss": 1.5924, "step": 9233 }, { "epoch": 1.6634091420851158, "grad_norm": 1.5959326028823853, "learning_rate": 6.836273042945302e-06, "loss": 1.3925, "step": 9234 }, { "epoch": 1.6635892816933122, "grad_norm": 1.4982845783233643, "learning_rate": 6.829126962038013e-06, "loss": 1.3325, "step": 9235 }, { "epoch": 1.6637694213015086, "grad_norm": 1.42143976688385, "learning_rate": 6.821984344277138e-06, "loss": 1.3187, "step": 9236 }, { "epoch": 1.663949560909705, "grad_norm": 1.4057400226593018, "learning_rate": 6.814845190235641e-06, "loss": 1.307, "step": 9237 }, { "epoch": 1.6641297005179014, "grad_norm": 1.5301775932312012, "learning_rate": 6.807709500486237e-06, "loss": 1.3539, "step": 9238 }, { "epoch": 1.6643098401260978, "grad_norm": 1.4692734479904175, "learning_rate": 6.800577275601317e-06, "loss": 1.3664, "step": 9239 }, { "epoch": 1.664489979734294, "grad_norm": 1.64423406124115, "learning_rate": 6.793448516153067e-06, "loss": 1.5151, "step": 9240 }, { "epoch": 1.6646701193424904, "grad_norm": 1.59138822555542, "learning_rate": 6.786323222713326e-06, "loss": 1.4887, "step": 9241 }, { "epoch": 1.6648502589506868, "grad_norm": 1.5972872972488403, "learning_rate": 6.779201395853685e-06, "loss": 1.3675, "step": 9242 }, { "epoch": 1.665030398558883, "grad_norm": 1.5561999082565308, "learning_rate": 6.77208303614546e-06, "loss": 1.318, "step": 9243 }, { "epoch": 1.6652105381670794, "grad_norm": 1.621854543685913, "learning_rate": 6.764968144159678e-06, "loss": 1.3312, "step": 9244 }, { "epoch": 1.6653906777752758, "grad_norm": 1.637001633644104, "learning_rate": 6.757856720467093e-06, "loss": 1.425, "step": 9245 }, { "epoch": 1.6655708173834722, "grad_norm": 1.5609912872314453, "learning_rate": 6.750748765638182e-06, "loss": 1.3637, "step": 9246 }, { "epoch": 1.6657509569916686, "grad_norm": 1.51085364818573, "learning_rate": 6.743644280243133e-06, "loss": 1.3939, "step": 9247 }, { "epoch": 1.665931096599865, "grad_norm": 1.4847136735916138, "learning_rate": 6.736543264851864e-06, "loss": 1.394, "step": 9248 }, { "epoch": 1.6661112362080612, "grad_norm": 1.6096235513687134, "learning_rate": 6.729445720034028e-06, "loss": 1.3906, "step": 9249 }, { "epoch": 1.6662913758162576, "grad_norm": 1.6262269020080566, "learning_rate": 6.722351646358987e-06, "loss": 1.2838, "step": 9250 }, { "epoch": 1.6664715154244538, "grad_norm": 1.3291374444961548, "learning_rate": 6.71526104439581e-06, "loss": 0.9427, "step": 9251 }, { "epoch": 1.6666516550326502, "grad_norm": 1.3149293661117554, "learning_rate": 6.7081739147133115e-06, "loss": 1.531, "step": 9252 }, { "epoch": 1.6668317946408466, "grad_norm": 1.3284752368927002, "learning_rate": 6.701090257880005e-06, "loss": 1.8974, "step": 9253 }, { "epoch": 1.667011934249043, "grad_norm": 1.3059722185134888, "learning_rate": 6.694010074464158e-06, "loss": 1.7937, "step": 9254 }, { "epoch": 1.6671920738572394, "grad_norm": 1.4482176303863525, "learning_rate": 6.686933365033726e-06, "loss": 1.9599, "step": 9255 }, { "epoch": 1.6673722134654358, "grad_norm": 1.4542229175567627, "learning_rate": 6.6798601301564175e-06, "loss": 1.9155, "step": 9256 }, { "epoch": 1.667552353073632, "grad_norm": 1.3910964727401733, "learning_rate": 6.672790370399618e-06, "loss": 1.6094, "step": 9257 }, { "epoch": 1.6677324926818284, "grad_norm": 1.4538975954055786, "learning_rate": 6.6657240863304605e-06, "loss": 1.6888, "step": 9258 }, { "epoch": 1.6679126322900246, "grad_norm": 1.408825397491455, "learning_rate": 6.658661278515815e-06, "loss": 1.5511, "step": 9259 }, { "epoch": 1.668092771898221, "grad_norm": 1.6331695318222046, "learning_rate": 6.651601947522257e-06, "loss": 1.8638, "step": 9260 }, { "epoch": 1.6682729115064174, "grad_norm": 1.7563532590866089, "learning_rate": 6.644546093916076e-06, "loss": 1.8483, "step": 9261 }, { "epoch": 1.6684530511146138, "grad_norm": 1.6008085012435913, "learning_rate": 6.637493718263277e-06, "loss": 1.9279, "step": 9262 }, { "epoch": 1.6686331907228102, "grad_norm": 1.3369431495666504, "learning_rate": 6.630444821129622e-06, "loss": 1.4001, "step": 9263 }, { "epoch": 1.6688133303310067, "grad_norm": 1.3782161474227905, "learning_rate": 6.623399403080555e-06, "loss": 1.4676, "step": 9264 }, { "epoch": 1.6689934699392028, "grad_norm": 1.4244213104248047, "learning_rate": 6.616357464681267e-06, "loss": 1.4251, "step": 9265 }, { "epoch": 1.6691736095473992, "grad_norm": 1.4525614976882935, "learning_rate": 6.609319006496645e-06, "loss": 1.4063, "step": 9266 }, { "epoch": 1.6693537491555954, "grad_norm": 1.5354183912277222, "learning_rate": 6.602284029091321e-06, "loss": 1.7287, "step": 9267 }, { "epoch": 1.6695338887637918, "grad_norm": 1.479678750038147, "learning_rate": 6.595252533029633e-06, "loss": 1.3172, "step": 9268 }, { "epoch": 1.6697140283719882, "grad_norm": 1.5796678066253662, "learning_rate": 6.588224518875647e-06, "loss": 1.4513, "step": 9269 }, { "epoch": 1.6698941679801846, "grad_norm": 1.4105284214019775, "learning_rate": 6.581199987193143e-06, "loss": 1.3071, "step": 9270 }, { "epoch": 1.670074307588381, "grad_norm": 1.412529706954956, "learning_rate": 6.574178938545627e-06, "loss": 1.3735, "step": 9271 }, { "epoch": 1.6702544471965775, "grad_norm": 1.3892160654067993, "learning_rate": 6.567161373496316e-06, "loss": 1.4019, "step": 9272 }, { "epoch": 1.6704345868047739, "grad_norm": 1.3655197620391846, "learning_rate": 6.5601472926081766e-06, "loss": 1.2421, "step": 9273 }, { "epoch": 1.67061472641297, "grad_norm": 1.4209932088851929, "learning_rate": 6.553136696443856e-06, "loss": 1.4158, "step": 9274 }, { "epoch": 1.6707948660211664, "grad_norm": 1.394223690032959, "learning_rate": 6.546129585565752e-06, "loss": 1.1676, "step": 9275 }, { "epoch": 1.6709750056293626, "grad_norm": 1.493454933166504, "learning_rate": 6.539125960535969e-06, "loss": 1.4004, "step": 9276 }, { "epoch": 1.671155145237559, "grad_norm": 1.5593960285186768, "learning_rate": 6.53212582191633e-06, "loss": 1.4255, "step": 9277 }, { "epoch": 1.6713352848457554, "grad_norm": 1.4963762760162354, "learning_rate": 6.525129170268385e-06, "loss": 1.5255, "step": 9278 }, { "epoch": 1.6715154244539518, "grad_norm": 1.4560383558273315, "learning_rate": 6.518136006153403e-06, "loss": 1.4282, "step": 9279 }, { "epoch": 1.6716955640621483, "grad_norm": 1.279828429222107, "learning_rate": 6.511146330132367e-06, "loss": 1.1918, "step": 9280 }, { "epoch": 1.6718757036703447, "grad_norm": 1.5695408582687378, "learning_rate": 6.504160142765986e-06, "loss": 1.4093, "step": 9281 }, { "epoch": 1.6720558432785408, "grad_norm": 1.4600454568862915, "learning_rate": 6.4971774446147e-06, "loss": 1.2941, "step": 9282 }, { "epoch": 1.6722359828867372, "grad_norm": 1.4232903718948364, "learning_rate": 6.490198236238648e-06, "loss": 1.2589, "step": 9283 }, { "epoch": 1.6724161224949334, "grad_norm": 1.5326049327850342, "learning_rate": 6.4832225181977015e-06, "loss": 1.4691, "step": 9284 }, { "epoch": 1.6725962621031298, "grad_norm": 1.533703327178955, "learning_rate": 6.47625029105145e-06, "loss": 1.4436, "step": 9285 }, { "epoch": 1.6727764017113262, "grad_norm": 1.6406062841415405, "learning_rate": 6.469281555359185e-06, "loss": 1.4567, "step": 9286 }, { "epoch": 1.6729565413195226, "grad_norm": 1.5250133275985718, "learning_rate": 6.462316311679967e-06, "loss": 1.3538, "step": 9287 }, { "epoch": 1.673136680927719, "grad_norm": 1.650330662727356, "learning_rate": 6.455354560572535e-06, "loss": 1.3333, "step": 9288 }, { "epoch": 1.6733168205359155, "grad_norm": 1.5261865854263306, "learning_rate": 6.448396302595344e-06, "loss": 1.321, "step": 9289 }, { "epoch": 1.6734969601441116, "grad_norm": 1.5919733047485352, "learning_rate": 6.4414415383065755e-06, "loss": 1.4509, "step": 9290 }, { "epoch": 1.673677099752308, "grad_norm": 1.4564836025238037, "learning_rate": 6.43449026826416e-06, "loss": 1.328, "step": 9291 }, { "epoch": 1.6738572393605042, "grad_norm": 1.517279863357544, "learning_rate": 6.427542493025712e-06, "loss": 1.4691, "step": 9292 }, { "epoch": 1.6740373789687006, "grad_norm": 1.5118247270584106, "learning_rate": 6.4205982131485875e-06, "loss": 1.1655, "step": 9293 }, { "epoch": 1.674217518576897, "grad_norm": 1.6690576076507568, "learning_rate": 6.413657429189845e-06, "loss": 1.5493, "step": 9294 }, { "epoch": 1.6743976581850935, "grad_norm": 1.4392532110214233, "learning_rate": 6.406720141706263e-06, "loss": 1.1293, "step": 9295 }, { "epoch": 1.6745777977932899, "grad_norm": 1.528138518333435, "learning_rate": 6.399786351254372e-06, "loss": 1.2934, "step": 9296 }, { "epoch": 1.6747579374014863, "grad_norm": 1.455649971961975, "learning_rate": 6.392856058390379e-06, "loss": 1.377, "step": 9297 }, { "epoch": 1.6749380770096827, "grad_norm": 1.555851697921753, "learning_rate": 6.385929263670232e-06, "loss": 1.3059, "step": 9298 }, { "epoch": 1.6751182166178789, "grad_norm": 1.9327064752578735, "learning_rate": 6.3790059676495984e-06, "loss": 1.7124, "step": 9299 }, { "epoch": 1.6752983562260753, "grad_norm": 1.417898178100586, "learning_rate": 6.372086170883857e-06, "loss": 1.2291, "step": 9300 }, { "epoch": 1.6754784958342714, "grad_norm": 1.3643193244934082, "learning_rate": 6.365169873928112e-06, "loss": 1.142, "step": 9301 }, { "epoch": 1.6756586354424678, "grad_norm": 1.4310061931610107, "learning_rate": 6.3582570773371875e-06, "loss": 1.9516, "step": 9302 }, { "epoch": 1.6758387750506643, "grad_norm": 1.3903883695602417, "learning_rate": 6.351347781665623e-06, "loss": 1.7865, "step": 9303 }, { "epoch": 1.6760189146588607, "grad_norm": 1.3864984512329102, "learning_rate": 6.344441987467664e-06, "loss": 1.7112, "step": 9304 }, { "epoch": 1.676199054267057, "grad_norm": 1.479129672050476, "learning_rate": 6.337539695297323e-06, "loss": 1.7727, "step": 9305 }, { "epoch": 1.6763791938752535, "grad_norm": 1.475406289100647, "learning_rate": 6.330640905708279e-06, "loss": 1.6852, "step": 9306 }, { "epoch": 1.6765593334834497, "grad_norm": 1.3398133516311646, "learning_rate": 6.323745619253946e-06, "loss": 1.4649, "step": 9307 }, { "epoch": 1.676739473091646, "grad_norm": 1.526917815208435, "learning_rate": 6.316853836487469e-06, "loss": 1.8893, "step": 9308 }, { "epoch": 1.6769196126998422, "grad_norm": 1.5974851846694946, "learning_rate": 6.3099655579617e-06, "loss": 2.005, "step": 9309 }, { "epoch": 1.6770997523080386, "grad_norm": 1.6952717304229736, "learning_rate": 6.303080784229215e-06, "loss": 1.6756, "step": 9310 }, { "epoch": 1.677279891916235, "grad_norm": 2.03130841255188, "learning_rate": 6.296199515842299e-06, "loss": 2.1392, "step": 9311 }, { "epoch": 1.6774600315244315, "grad_norm": 1.6839735507965088, "learning_rate": 6.289321753352978e-06, "loss": 1.844, "step": 9312 }, { "epoch": 1.6776401711326279, "grad_norm": 1.39813232421875, "learning_rate": 6.282447497312971e-06, "loss": 1.5295, "step": 9313 }, { "epoch": 1.6778203107408243, "grad_norm": 1.2228069305419922, "learning_rate": 6.275576748273721e-06, "loss": 1.1335, "step": 9314 }, { "epoch": 1.6780004503490205, "grad_norm": 1.3315496444702148, "learning_rate": 6.268709506786419e-06, "loss": 1.3848, "step": 9315 }, { "epoch": 1.6781805899572169, "grad_norm": 1.4042381048202515, "learning_rate": 6.2618457734019364e-06, "loss": 1.46, "step": 9316 }, { "epoch": 1.678360729565413, "grad_norm": 1.5339460372924805, "learning_rate": 6.254985548670889e-06, "loss": 1.4393, "step": 9317 }, { "epoch": 1.6785408691736095, "grad_norm": 1.3979800939559937, "learning_rate": 6.248128833143574e-06, "loss": 1.3633, "step": 9318 }, { "epoch": 1.6787210087818059, "grad_norm": 1.4021457433700562, "learning_rate": 6.241275627370069e-06, "loss": 1.4095, "step": 9319 }, { "epoch": 1.6789011483900023, "grad_norm": 1.347480297088623, "learning_rate": 6.2344259319001175e-06, "loss": 1.2728, "step": 9320 }, { "epoch": 1.6790812879981987, "grad_norm": 1.5199741125106812, "learning_rate": 6.227579747283213e-06, "loss": 1.3622, "step": 9321 }, { "epoch": 1.679261427606395, "grad_norm": 1.4238966703414917, "learning_rate": 6.2207370740685275e-06, "loss": 1.3901, "step": 9322 }, { "epoch": 1.6794415672145913, "grad_norm": 1.5909440517425537, "learning_rate": 6.213897912804984e-06, "loss": 1.6959, "step": 9323 }, { "epoch": 1.6796217068227877, "grad_norm": 1.3264161348342896, "learning_rate": 6.207062264041225e-06, "loss": 1.2573, "step": 9324 }, { "epoch": 1.6798018464309838, "grad_norm": 1.4370753765106201, "learning_rate": 6.200230128325607e-06, "loss": 1.2657, "step": 9325 }, { "epoch": 1.6799819860391803, "grad_norm": 1.4973523616790771, "learning_rate": 6.193401506206192e-06, "loss": 1.3223, "step": 9326 }, { "epoch": 1.6801621256473767, "grad_norm": 1.409073829650879, "learning_rate": 6.18657639823077e-06, "loss": 1.2728, "step": 9327 }, { "epoch": 1.680342265255573, "grad_norm": 1.5397162437438965, "learning_rate": 6.179754804946836e-06, "loss": 1.4265, "step": 9328 }, { "epoch": 1.6805224048637695, "grad_norm": 1.6442248821258545, "learning_rate": 6.1729367269016406e-06, "loss": 1.3897, "step": 9329 }, { "epoch": 1.6807025444719659, "grad_norm": 1.4820566177368164, "learning_rate": 6.166122164642113e-06, "loss": 1.3509, "step": 9330 }, { "epoch": 1.6808826840801623, "grad_norm": 1.413561224937439, "learning_rate": 6.1593111187149165e-06, "loss": 1.2368, "step": 9331 }, { "epoch": 1.6810628236883585, "grad_norm": 1.5177332162857056, "learning_rate": 6.152503589666425e-06, "loss": 1.3211, "step": 9332 }, { "epoch": 1.6812429632965549, "grad_norm": 1.386770486831665, "learning_rate": 6.145699578042741e-06, "loss": 1.2337, "step": 9333 }, { "epoch": 1.681423102904751, "grad_norm": 1.364485502243042, "learning_rate": 6.138899084389677e-06, "loss": 1.2659, "step": 9334 }, { "epoch": 1.6816032425129475, "grad_norm": 1.471837043762207, "learning_rate": 6.132102109252763e-06, "loss": 1.1493, "step": 9335 }, { "epoch": 1.6817833821211439, "grad_norm": 1.4224473237991333, "learning_rate": 6.125308653177247e-06, "loss": 1.3251, "step": 9336 }, { "epoch": 1.6819635217293403, "grad_norm": 1.3569118976593018, "learning_rate": 6.118518716708094e-06, "loss": 1.2716, "step": 9337 }, { "epoch": 1.6821436613375367, "grad_norm": 1.5761306285858154, "learning_rate": 6.1117323003900054e-06, "loss": 1.4294, "step": 9338 }, { "epoch": 1.682323800945733, "grad_norm": 1.5127991437911987, "learning_rate": 6.1049494047673736e-06, "loss": 1.3854, "step": 9339 }, { "epoch": 1.6825039405539293, "grad_norm": 1.5416299104690552, "learning_rate": 6.09817003038432e-06, "loss": 1.4102, "step": 9340 }, { "epoch": 1.6826840801621257, "grad_norm": 1.5679646730422974, "learning_rate": 6.091394177784681e-06, "loss": 1.4769, "step": 9341 }, { "epoch": 1.6828642197703219, "grad_norm": 1.5497963428497314, "learning_rate": 6.084621847512017e-06, "loss": 1.2456, "step": 9342 }, { "epoch": 1.6830443593785183, "grad_norm": 1.6438498497009277, "learning_rate": 6.077853040109593e-06, "loss": 1.4494, "step": 9343 }, { "epoch": 1.6832244989867147, "grad_norm": 1.6951961517333984, "learning_rate": 6.071087756120414e-06, "loss": 1.4121, "step": 9344 }, { "epoch": 1.683404638594911, "grad_norm": 1.3651118278503418, "learning_rate": 6.064325996087172e-06, "loss": 1.1641, "step": 9345 }, { "epoch": 1.6835847782031075, "grad_norm": 1.4344110488891602, "learning_rate": 6.057567760552285e-06, "loss": 1.2485, "step": 9346 }, { "epoch": 1.6837649178113039, "grad_norm": 1.5804636478424072, "learning_rate": 6.050813050057924e-06, "loss": 1.3798, "step": 9347 }, { "epoch": 1.6839450574195, "grad_norm": 1.6897717714309692, "learning_rate": 6.044061865145934e-06, "loss": 1.5196, "step": 9348 }, { "epoch": 1.6841251970276965, "grad_norm": 1.5246527194976807, "learning_rate": 6.037314206357886e-06, "loss": 1.2769, "step": 9349 }, { "epoch": 1.6843053366358927, "grad_norm": 1.5533305406570435, "learning_rate": 6.030570074235081e-06, "loss": 1.3791, "step": 9350 }, { "epoch": 1.684485476244089, "grad_norm": 1.7750823497772217, "learning_rate": 6.023829469318521e-06, "loss": 1.4337, "step": 9351 }, { "epoch": 1.6846656158522855, "grad_norm": 1.0884023904800415, "learning_rate": 6.017092392148943e-06, "loss": 1.3946, "step": 9352 }, { "epoch": 1.6848457554604819, "grad_norm": 1.3627616167068481, "learning_rate": 6.010358843266811e-06, "loss": 1.705, "step": 9353 }, { "epoch": 1.6850258950686783, "grad_norm": 1.3868818283081055, "learning_rate": 6.003628823212248e-06, "loss": 1.5316, "step": 9354 }, { "epoch": 1.6852060346768747, "grad_norm": 1.3360754251480103, "learning_rate": 5.996902332525145e-06, "loss": 1.6387, "step": 9355 }, { "epoch": 1.685386174285071, "grad_norm": 1.4895849227905273, "learning_rate": 5.990179371745108e-06, "loss": 1.9594, "step": 9356 }, { "epoch": 1.6855663138932673, "grad_norm": 1.4065252542495728, "learning_rate": 5.983459941411451e-06, "loss": 1.8091, "step": 9357 }, { "epoch": 1.6857464535014637, "grad_norm": 1.3718338012695312, "learning_rate": 5.976744042063198e-06, "loss": 1.5074, "step": 9358 }, { "epoch": 1.6859265931096599, "grad_norm": 1.6097397804260254, "learning_rate": 5.97003167423909e-06, "loss": 1.6376, "step": 9359 }, { "epoch": 1.6861067327178563, "grad_norm": 1.5765243768692017, "learning_rate": 5.963322838477592e-06, "loss": 1.5906, "step": 9360 }, { "epoch": 1.6862868723260527, "grad_norm": 1.573461890220642, "learning_rate": 5.956617535316888e-06, "loss": 1.8628, "step": 9361 }, { "epoch": 1.686467011934249, "grad_norm": 1.9458311796188354, "learning_rate": 5.9499157652948775e-06, "loss": 2.0262, "step": 9362 }, { "epoch": 1.6866471515424455, "grad_norm": 1.615006923675537, "learning_rate": 5.943217528949168e-06, "loss": 1.7074, "step": 9363 }, { "epoch": 1.686827291150642, "grad_norm": 1.247019648551941, "learning_rate": 5.936522826817092e-06, "loss": 1.0771, "step": 9364 }, { "epoch": 1.687007430758838, "grad_norm": 1.457051396369934, "learning_rate": 5.929831659435686e-06, "loss": 1.3708, "step": 9365 }, { "epoch": 1.6871875703670345, "grad_norm": 1.389910340309143, "learning_rate": 5.923144027341726e-06, "loss": 1.436, "step": 9366 }, { "epoch": 1.6873677099752307, "grad_norm": 1.5140084028244019, "learning_rate": 5.916459931071677e-06, "loss": 1.6493, "step": 9367 }, { "epoch": 1.687547849583427, "grad_norm": 1.4190173149108887, "learning_rate": 5.909779371161739e-06, "loss": 1.3909, "step": 9368 }, { "epoch": 1.6877279891916235, "grad_norm": 1.3805214166641235, "learning_rate": 5.903102348147821e-06, "loss": 1.4369, "step": 9369 }, { "epoch": 1.6879081287998199, "grad_norm": 1.4165334701538086, "learning_rate": 5.896428862565556e-06, "loss": 1.4759, "step": 9370 }, { "epoch": 1.6880882684080163, "grad_norm": 1.440468192100525, "learning_rate": 5.8897589149502886e-06, "loss": 1.2986, "step": 9371 }, { "epoch": 1.6882684080162127, "grad_norm": 1.5241209268569946, "learning_rate": 5.883092505837079e-06, "loss": 1.3565, "step": 9372 }, { "epoch": 1.6884485476244089, "grad_norm": 1.3354606628417969, "learning_rate": 5.876429635760699e-06, "loss": 1.3258, "step": 9373 }, { "epoch": 1.6886286872326053, "grad_norm": 1.402143120765686, "learning_rate": 5.86977030525564e-06, "loss": 1.2903, "step": 9374 }, { "epoch": 1.6888088268408015, "grad_norm": 1.5762059688568115, "learning_rate": 5.86311451485611e-06, "loss": 1.4327, "step": 9375 }, { "epoch": 1.6889889664489979, "grad_norm": 1.5700163841247559, "learning_rate": 5.856462265096041e-06, "loss": 1.422, "step": 9376 }, { "epoch": 1.6891691060571943, "grad_norm": 1.5477482080459595, "learning_rate": 5.849813556509059e-06, "loss": 1.429, "step": 9377 }, { "epoch": 1.6893492456653907, "grad_norm": 1.3903214931488037, "learning_rate": 5.843168389628534e-06, "loss": 1.1545, "step": 9378 }, { "epoch": 1.689529385273587, "grad_norm": 1.381422758102417, "learning_rate": 5.836526764987527e-06, "loss": 1.2447, "step": 9379 }, { "epoch": 1.6897095248817835, "grad_norm": 1.4002543687820435, "learning_rate": 5.829888683118834e-06, "loss": 1.3316, "step": 9380 }, { "epoch": 1.6898896644899797, "grad_norm": 1.3596376180648804, "learning_rate": 5.823254144554963e-06, "loss": 1.2525, "step": 9381 }, { "epoch": 1.690069804098176, "grad_norm": 1.468553900718689, "learning_rate": 5.816623149828121e-06, "loss": 1.4356, "step": 9382 }, { "epoch": 1.6902499437063725, "grad_norm": 1.5850248336791992, "learning_rate": 5.809995699470239e-06, "loss": 1.5161, "step": 9383 }, { "epoch": 1.6904300833145687, "grad_norm": 1.5036015510559082, "learning_rate": 5.80337179401299e-06, "loss": 1.4627, "step": 9384 }, { "epoch": 1.690610222922765, "grad_norm": 1.4955673217773438, "learning_rate": 5.796751433987729e-06, "loss": 1.257, "step": 9385 }, { "epoch": 1.6907903625309615, "grad_norm": 1.4623759984970093, "learning_rate": 5.790134619925547e-06, "loss": 1.3062, "step": 9386 }, { "epoch": 1.690970502139158, "grad_norm": 1.423148512840271, "learning_rate": 5.7835213523572244e-06, "loss": 1.3111, "step": 9387 }, { "epoch": 1.6911506417473543, "grad_norm": 1.6185672283172607, "learning_rate": 5.77691163181327e-06, "loss": 1.4842, "step": 9388 }, { "epoch": 1.6913307813555507, "grad_norm": 1.4737343788146973, "learning_rate": 5.770305458823932e-06, "loss": 1.3992, "step": 9389 }, { "epoch": 1.691510920963747, "grad_norm": 1.5779510736465454, "learning_rate": 5.763702833919149e-06, "loss": 1.3094, "step": 9390 }, { "epoch": 1.6916910605719433, "grad_norm": 1.5953152179718018, "learning_rate": 5.757103757628573e-06, "loss": 1.3171, "step": 9391 }, { "epoch": 1.6918712001801395, "grad_norm": 1.5416433811187744, "learning_rate": 5.750508230481588e-06, "loss": 1.14, "step": 9392 }, { "epoch": 1.6920513397883359, "grad_norm": 1.4649804830551147, "learning_rate": 5.74391625300727e-06, "loss": 1.4061, "step": 9393 }, { "epoch": 1.6922314793965323, "grad_norm": 1.6010898351669312, "learning_rate": 5.737327825734445e-06, "loss": 1.3209, "step": 9394 }, { "epoch": 1.6924116190047287, "grad_norm": 1.6245843172073364, "learning_rate": 5.73074294919162e-06, "loss": 1.475, "step": 9395 }, { "epoch": 1.692591758612925, "grad_norm": 1.4712828397750854, "learning_rate": 5.724161623907032e-06, "loss": 1.3641, "step": 9396 }, { "epoch": 1.6927718982211215, "grad_norm": 1.6046422719955444, "learning_rate": 5.717583850408631e-06, "loss": 1.2939, "step": 9397 }, { "epoch": 1.6929520378293177, "grad_norm": 1.5541064739227295, "learning_rate": 5.71100962922409e-06, "loss": 1.4546, "step": 9398 }, { "epoch": 1.693132177437514, "grad_norm": 1.495434284210205, "learning_rate": 5.704438960880781e-06, "loss": 1.3868, "step": 9399 }, { "epoch": 1.6933123170457103, "grad_norm": 1.4846793413162231, "learning_rate": 5.6978718459058e-06, "loss": 1.3019, "step": 9400 }, { "epoch": 1.6934924566539067, "grad_norm": 1.448927402496338, "learning_rate": 5.691308284825964e-06, "loss": 1.0433, "step": 9401 }, { "epoch": 1.693672596262103, "grad_norm": 1.3860310316085815, "learning_rate": 5.6847482781677845e-06, "loss": 1.638, "step": 9402 }, { "epoch": 1.6938527358702995, "grad_norm": 1.4743907451629639, "learning_rate": 5.678191826457524e-06, "loss": 1.8487, "step": 9403 }, { "epoch": 1.694032875478496, "grad_norm": 1.6351152658462524, "learning_rate": 5.671638930221129e-06, "loss": 2.0537, "step": 9404 }, { "epoch": 1.6942130150866923, "grad_norm": 1.420829176902771, "learning_rate": 5.665089589984263e-06, "loss": 1.8793, "step": 9405 }, { "epoch": 1.6943931546948885, "grad_norm": 1.4770466089248657, "learning_rate": 5.65854380627232e-06, "loss": 1.8737, "step": 9406 }, { "epoch": 1.694573294303085, "grad_norm": 1.6435444355010986, "learning_rate": 5.652001579610389e-06, "loss": 1.8561, "step": 9407 }, { "epoch": 1.694753433911281, "grad_norm": 1.4760105609893799, "learning_rate": 5.645462910523297e-06, "loss": 1.558, "step": 9408 }, { "epoch": 1.6949335735194775, "grad_norm": 1.5246566534042358, "learning_rate": 5.6389277995355595e-06, "loss": 1.648, "step": 9409 }, { "epoch": 1.695113713127674, "grad_norm": 1.4946370124816895, "learning_rate": 5.6323962471714286e-06, "loss": 1.5447, "step": 9410 }, { "epoch": 1.6952938527358703, "grad_norm": 1.8241276741027832, "learning_rate": 5.625868253954852e-06, "loss": 1.982, "step": 9411 }, { "epoch": 1.6954739923440667, "grad_norm": 1.7427071332931519, "learning_rate": 5.619343820409517e-06, "loss": 1.5575, "step": 9412 }, { "epoch": 1.695654131952263, "grad_norm": 1.602441430091858, "learning_rate": 5.612822947058805e-06, "loss": 1.6197, "step": 9413 }, { "epoch": 1.6958342715604595, "grad_norm": 1.5788562297821045, "learning_rate": 5.606305634425818e-06, "loss": 1.6529, "step": 9414 }, { "epoch": 1.6960144111686557, "grad_norm": 1.4368292093276978, "learning_rate": 5.599791883033368e-06, "loss": 1.4649, "step": 9415 }, { "epoch": 1.696194550776852, "grad_norm": 1.597377896308899, "learning_rate": 5.593281693403974e-06, "loss": 1.3605, "step": 9416 }, { "epoch": 1.6963746903850483, "grad_norm": 1.4312036037445068, "learning_rate": 5.586775066059907e-06, "loss": 1.4693, "step": 9417 }, { "epoch": 1.6965548299932447, "grad_norm": 1.3958052396774292, "learning_rate": 5.580272001523118e-06, "loss": 1.3418, "step": 9418 }, { "epoch": 1.696734969601441, "grad_norm": 1.4030656814575195, "learning_rate": 5.573772500315266e-06, "loss": 1.2413, "step": 9419 }, { "epoch": 1.6969151092096375, "grad_norm": 1.4695589542388916, "learning_rate": 5.5672765629577464e-06, "loss": 1.5439, "step": 9420 }, { "epoch": 1.697095248817834, "grad_norm": 1.5302057266235352, "learning_rate": 5.56078418997164e-06, "loss": 1.5623, "step": 9421 }, { "epoch": 1.6972753884260303, "grad_norm": 1.4400105476379395, "learning_rate": 5.5542953818778e-06, "loss": 1.2953, "step": 9422 }, { "epoch": 1.6974555280342265, "grad_norm": 1.526248574256897, "learning_rate": 5.5478101391967294e-06, "loss": 1.5861, "step": 9423 }, { "epoch": 1.697635667642423, "grad_norm": 1.382607102394104, "learning_rate": 5.541328462448681e-06, "loss": 1.3512, "step": 9424 }, { "epoch": 1.697815807250619, "grad_norm": 1.459639549255371, "learning_rate": 5.534850352153598e-06, "loss": 1.4039, "step": 9425 }, { "epoch": 1.6979959468588155, "grad_norm": 1.4033910036087036, "learning_rate": 5.528375808831171e-06, "loss": 1.2526, "step": 9426 }, { "epoch": 1.698176086467012, "grad_norm": 1.4770731925964355, "learning_rate": 5.521904833000774e-06, "loss": 1.4194, "step": 9427 }, { "epoch": 1.6983562260752083, "grad_norm": 1.5288301706314087, "learning_rate": 5.515437425181508e-06, "loss": 1.4453, "step": 9428 }, { "epoch": 1.6985363656834047, "grad_norm": 1.61703622341156, "learning_rate": 5.508973585892185e-06, "loss": 1.41, "step": 9429 }, { "epoch": 1.6987165052916011, "grad_norm": 1.382460117340088, "learning_rate": 5.5025133156513255e-06, "loss": 1.3888, "step": 9430 }, { "epoch": 1.6988966448997973, "grad_norm": 1.3052409887313843, "learning_rate": 5.49605661497718e-06, "loss": 1.1898, "step": 9431 }, { "epoch": 1.6990767845079937, "grad_norm": 1.4444466829299927, "learning_rate": 5.48960348438769e-06, "loss": 1.2514, "step": 9432 }, { "epoch": 1.69925692411619, "grad_norm": 1.455181360244751, "learning_rate": 5.48315392440053e-06, "loss": 1.3771, "step": 9433 }, { "epoch": 1.6994370637243863, "grad_norm": 1.5011643171310425, "learning_rate": 5.476707935533082e-06, "loss": 1.3661, "step": 9434 }, { "epoch": 1.6996172033325827, "grad_norm": 1.4729206562042236, "learning_rate": 5.470265518302425e-06, "loss": 1.2943, "step": 9435 }, { "epoch": 1.699797342940779, "grad_norm": 1.5996304750442505, "learning_rate": 5.463826673225392e-06, "loss": 1.524, "step": 9436 }, { "epoch": 1.6999774825489755, "grad_norm": 1.5848191976547241, "learning_rate": 5.457391400818484e-06, "loss": 1.4143, "step": 9437 }, { "epoch": 1.700157622157172, "grad_norm": 1.4248039722442627, "learning_rate": 5.450959701597946e-06, "loss": 1.2437, "step": 9438 }, { "epoch": 1.700337761765368, "grad_norm": 1.6933271884918213, "learning_rate": 5.44453157607972e-06, "loss": 1.3748, "step": 9439 }, { "epoch": 1.7005179013735645, "grad_norm": 1.5392844676971436, "learning_rate": 5.438107024779471e-06, "loss": 1.2576, "step": 9440 }, { "epoch": 1.700698040981761, "grad_norm": 1.4799362421035767, "learning_rate": 5.43168604821257e-06, "loss": 1.3125, "step": 9441 }, { "epoch": 1.700878180589957, "grad_norm": 1.6582175493240356, "learning_rate": 5.425268646894105e-06, "loss": 1.4644, "step": 9442 }, { "epoch": 1.7010583201981535, "grad_norm": 1.523781657218933, "learning_rate": 5.418854821338876e-06, "loss": 1.2672, "step": 9443 }, { "epoch": 1.70123845980635, "grad_norm": 1.5922198295593262, "learning_rate": 5.412444572061387e-06, "loss": 1.5397, "step": 9444 }, { "epoch": 1.7014185994145463, "grad_norm": 1.6494568586349487, "learning_rate": 5.406037899575894e-06, "loss": 1.4975, "step": 9445 }, { "epoch": 1.7015987390227427, "grad_norm": 1.7342473268508911, "learning_rate": 5.399634804396314e-06, "loss": 1.4612, "step": 9446 }, { "epoch": 1.7017788786309391, "grad_norm": 1.7269192934036255, "learning_rate": 5.393235287036308e-06, "loss": 1.4092, "step": 9447 }, { "epoch": 1.7019590182391353, "grad_norm": 1.401110053062439, "learning_rate": 5.386839348009226e-06, "loss": 1.0819, "step": 9448 }, { "epoch": 1.7021391578473317, "grad_norm": 1.6218016147613525, "learning_rate": 5.38044698782818e-06, "loss": 1.463, "step": 9449 }, { "epoch": 1.702319297455528, "grad_norm": 1.578056812286377, "learning_rate": 5.374058207005944e-06, "loss": 1.4303, "step": 9450 }, { "epoch": 1.7024994370637243, "grad_norm": 1.610072135925293, "learning_rate": 5.367673006055018e-06, "loss": 1.5022, "step": 9451 }, { "epoch": 1.7026795766719207, "grad_norm": 1.4195905923843384, "learning_rate": 5.361291385487621e-06, "loss": 1.6489, "step": 9452 }, { "epoch": 1.7028597162801171, "grad_norm": 1.4838440418243408, "learning_rate": 5.354913345815676e-06, "loss": 1.8911, "step": 9453 }, { "epoch": 1.7030398558883135, "grad_norm": 1.364926815032959, "learning_rate": 5.3485388875508525e-06, "loss": 1.7884, "step": 9454 }, { "epoch": 1.70321999549651, "grad_norm": 1.4396288394927979, "learning_rate": 5.342168011204485e-06, "loss": 1.6244, "step": 9455 }, { "epoch": 1.7034001351047061, "grad_norm": 1.4937666654586792, "learning_rate": 5.335800717287648e-06, "loss": 1.7083, "step": 9456 }, { "epoch": 1.7035802747129025, "grad_norm": 1.5352312326431274, "learning_rate": 5.329437006311122e-06, "loss": 1.8709, "step": 9457 }, { "epoch": 1.7037604143210987, "grad_norm": 1.511246681213379, "learning_rate": 5.323076878785394e-06, "loss": 1.943, "step": 9458 }, { "epoch": 1.703940553929295, "grad_norm": 1.666136384010315, "learning_rate": 5.316720335220682e-06, "loss": 1.7991, "step": 9459 }, { "epoch": 1.7041206935374915, "grad_norm": 1.6682777404785156, "learning_rate": 5.310367376126907e-06, "loss": 1.8095, "step": 9460 }, { "epoch": 1.704300833145688, "grad_norm": 1.6242525577545166, "learning_rate": 5.304018002013688e-06, "loss": 1.7541, "step": 9461 }, { "epoch": 1.7044809727538843, "grad_norm": 1.6742832660675049, "learning_rate": 5.2976722133903755e-06, "loss": 1.6623, "step": 9462 }, { "epoch": 1.7046611123620807, "grad_norm": 1.5368788242340088, "learning_rate": 5.291330010766027e-06, "loss": 1.7078, "step": 9463 }, { "epoch": 1.704841251970277, "grad_norm": 1.4520397186279297, "learning_rate": 5.284991394649408e-06, "loss": 1.583, "step": 9464 }, { "epoch": 1.7050213915784733, "grad_norm": 1.5874533653259277, "learning_rate": 5.278656365548995e-06, "loss": 1.6232, "step": 9465 }, { "epoch": 1.7052015311866695, "grad_norm": 1.264407753944397, "learning_rate": 5.272324923972988e-06, "loss": 1.1589, "step": 9466 }, { "epoch": 1.705381670794866, "grad_norm": 1.468383550643921, "learning_rate": 5.2659970704292795e-06, "loss": 1.306, "step": 9467 }, { "epoch": 1.7055618104030623, "grad_norm": 1.4647568464279175, "learning_rate": 5.259672805425509e-06, "loss": 1.5583, "step": 9468 }, { "epoch": 1.7057419500112587, "grad_norm": 1.4793637990951538, "learning_rate": 5.253352129468991e-06, "loss": 1.343, "step": 9469 }, { "epoch": 1.7059220896194551, "grad_norm": 1.5027028322219849, "learning_rate": 5.2470350430667725e-06, "loss": 1.3803, "step": 9470 }, { "epoch": 1.7061022292276515, "grad_norm": 1.3048604726791382, "learning_rate": 5.240721546725602e-06, "loss": 1.1931, "step": 9471 }, { "epoch": 1.706282368835848, "grad_norm": 1.4758661985397339, "learning_rate": 5.234411640951947e-06, "loss": 1.5332, "step": 9472 }, { "epoch": 1.7064625084440441, "grad_norm": 1.5689239501953125, "learning_rate": 5.228105326251986e-06, "loss": 1.4179, "step": 9473 }, { "epoch": 1.7066426480522405, "grad_norm": 1.4091473817825317, "learning_rate": 5.221802603131609e-06, "loss": 1.3525, "step": 9474 }, { "epoch": 1.7068227876604367, "grad_norm": 1.368336796760559, "learning_rate": 5.215503472096417e-06, "loss": 1.2375, "step": 9475 }, { "epoch": 1.7070029272686331, "grad_norm": 1.6077932119369507, "learning_rate": 5.2092079336517104e-06, "loss": 1.4976, "step": 9476 }, { "epoch": 1.7071830668768295, "grad_norm": 1.5955305099487305, "learning_rate": 5.202915988302542e-06, "loss": 1.4345, "step": 9477 }, { "epoch": 1.707363206485026, "grad_norm": 1.3216801881790161, "learning_rate": 5.19662763655363e-06, "loss": 1.3437, "step": 9478 }, { "epoch": 1.7075433460932223, "grad_norm": 1.474382758140564, "learning_rate": 5.190342878909421e-06, "loss": 1.4399, "step": 9479 }, { "epoch": 1.7077234857014187, "grad_norm": 1.4468094110488892, "learning_rate": 5.184061715874089e-06, "loss": 1.3137, "step": 9480 }, { "epoch": 1.707903625309615, "grad_norm": 1.406829595565796, "learning_rate": 5.177784147951481e-06, "loss": 1.3677, "step": 9481 }, { "epoch": 1.7080837649178113, "grad_norm": 1.3837640285491943, "learning_rate": 5.171510175645211e-06, "loss": 1.1095, "step": 9482 }, { "epoch": 1.7082639045260075, "grad_norm": 1.5724126100540161, "learning_rate": 5.165239799458565e-06, "loss": 1.2619, "step": 9483 }, { "epoch": 1.708444044134204, "grad_norm": 1.479369878768921, "learning_rate": 5.158973019894536e-06, "loss": 1.2666, "step": 9484 }, { "epoch": 1.7086241837424003, "grad_norm": 1.56902015209198, "learning_rate": 5.152709837455849e-06, "loss": 1.3841, "step": 9485 }, { "epoch": 1.7088043233505967, "grad_norm": 1.6762185096740723, "learning_rate": 5.146450252644924e-06, "loss": 1.3552, "step": 9486 }, { "epoch": 1.7089844629587931, "grad_norm": 1.6517653465270996, "learning_rate": 5.140194265963927e-06, "loss": 1.3634, "step": 9487 }, { "epoch": 1.7091646025669895, "grad_norm": 1.5506644248962402, "learning_rate": 5.133941877914694e-06, "loss": 1.3462, "step": 9488 }, { "epoch": 1.7093447421751857, "grad_norm": 1.483857274055481, "learning_rate": 5.127693088998786e-06, "loss": 1.5136, "step": 9489 }, { "epoch": 1.7095248817833821, "grad_norm": 1.7106719017028809, "learning_rate": 5.121447899717474e-06, "loss": 1.3607, "step": 9490 }, { "epoch": 1.7097050213915783, "grad_norm": 1.4973533153533936, "learning_rate": 5.115206310571763e-06, "loss": 1.1919, "step": 9491 }, { "epoch": 1.7098851609997747, "grad_norm": 1.7351492643356323, "learning_rate": 5.1089683220623406e-06, "loss": 1.5578, "step": 9492 }, { "epoch": 1.7100653006079711, "grad_norm": 1.389843463897705, "learning_rate": 5.102733934689613e-06, "loss": 1.1202, "step": 9493 }, { "epoch": 1.7102454402161675, "grad_norm": 1.5885761976242065, "learning_rate": 5.096503148953697e-06, "loss": 1.4481, "step": 9494 }, { "epoch": 1.710425579824364, "grad_norm": 1.4827508926391602, "learning_rate": 5.090275965354429e-06, "loss": 1.2605, "step": 9495 }, { "epoch": 1.7106057194325603, "grad_norm": 1.606197714805603, "learning_rate": 5.084052384391346e-06, "loss": 1.5177, "step": 9496 }, { "epoch": 1.7107858590407568, "grad_norm": 1.622270107269287, "learning_rate": 5.077832406563704e-06, "loss": 1.377, "step": 9497 }, { "epoch": 1.710965998648953, "grad_norm": 1.5996332168579102, "learning_rate": 5.071616032370469e-06, "loss": 1.2593, "step": 9498 }, { "epoch": 1.7111461382571493, "grad_norm": 1.6517282724380493, "learning_rate": 5.065403262310309e-06, "loss": 1.5048, "step": 9499 }, { "epoch": 1.7113262778653455, "grad_norm": 1.6243906021118164, "learning_rate": 5.059194096881603e-06, "loss": 1.1542, "step": 9500 }, { "epoch": 1.711506417473542, "grad_norm": 1.429052472114563, "learning_rate": 5.05298853658247e-06, "loss": 1.0496, "step": 9501 }, { "epoch": 1.7116865570817383, "grad_norm": 1.4577680826187134, "learning_rate": 5.046786581910701e-06, "loss": 1.925, "step": 9502 }, { "epoch": 1.7118666966899347, "grad_norm": 1.3322657346725464, "learning_rate": 5.040588233363813e-06, "loss": 1.6145, "step": 9503 }, { "epoch": 1.7120468362981311, "grad_norm": 1.3788909912109375, "learning_rate": 5.034393491439043e-06, "loss": 1.69, "step": 9504 }, { "epoch": 1.7122269759063276, "grad_norm": 1.3408071994781494, "learning_rate": 5.0282023566333215e-06, "loss": 1.5195, "step": 9505 }, { "epoch": 1.7124071155145237, "grad_norm": 1.4907881021499634, "learning_rate": 5.0220148294433055e-06, "loss": 1.7621, "step": 9506 }, { "epoch": 1.7125872551227201, "grad_norm": 1.6795850992202759, "learning_rate": 5.015830910365349e-06, "loss": 1.6156, "step": 9507 }, { "epoch": 1.7127673947309163, "grad_norm": 1.6554476022720337, "learning_rate": 5.009650599895527e-06, "loss": 2.0536, "step": 9508 }, { "epoch": 1.7129475343391127, "grad_norm": 1.539005994796753, "learning_rate": 5.0034738985296095e-06, "loss": 1.6712, "step": 9509 }, { "epoch": 1.7131276739473091, "grad_norm": 2.006694793701172, "learning_rate": 4.997300806763111e-06, "loss": 1.874, "step": 9510 }, { "epoch": 1.7133078135555055, "grad_norm": 2.0179882049560547, "learning_rate": 4.99113132509122e-06, "loss": 1.8106, "step": 9511 }, { "epoch": 1.713487953163702, "grad_norm": 1.5218029022216797, "learning_rate": 4.98496545400885e-06, "loss": 1.3044, "step": 9512 }, { "epoch": 1.7136680927718984, "grad_norm": 1.4121038913726807, "learning_rate": 4.978803194010623e-06, "loss": 1.4674, "step": 9513 }, { "epoch": 1.7138482323800945, "grad_norm": 1.510233759880066, "learning_rate": 4.972644545590871e-06, "loss": 1.5547, "step": 9514 }, { "epoch": 1.714028371988291, "grad_norm": 1.3907474279403687, "learning_rate": 4.966489509243655e-06, "loss": 1.4112, "step": 9515 }, { "epoch": 1.7142085115964871, "grad_norm": 1.3813812732696533, "learning_rate": 4.960338085462713e-06, "loss": 1.3058, "step": 9516 }, { "epoch": 1.7143886512046835, "grad_norm": 1.4947965145111084, "learning_rate": 4.9541902747415045e-06, "loss": 1.521, "step": 9517 }, { "epoch": 1.71456879081288, "grad_norm": 1.4388364553451538, "learning_rate": 4.948046077573204e-06, "loss": 1.3979, "step": 9518 }, { "epoch": 1.7147489304210763, "grad_norm": 1.4114837646484375, "learning_rate": 4.941905494450716e-06, "loss": 1.2735, "step": 9519 }, { "epoch": 1.7149290700292728, "grad_norm": 1.451129674911499, "learning_rate": 4.935768525866618e-06, "loss": 1.3008, "step": 9520 }, { "epoch": 1.7151092096374692, "grad_norm": 1.3653805255889893, "learning_rate": 4.929635172313219e-06, "loss": 1.2658, "step": 9521 }, { "epoch": 1.7152893492456653, "grad_norm": 1.4697612524032593, "learning_rate": 4.923505434282538e-06, "loss": 1.3208, "step": 9522 }, { "epoch": 1.7154694888538617, "grad_norm": 1.2516428232192993, "learning_rate": 4.917379312266279e-06, "loss": 1.103, "step": 9523 }, { "epoch": 1.715649628462058, "grad_norm": 1.42966628074646, "learning_rate": 4.911256806755909e-06, "loss": 1.3999, "step": 9524 }, { "epoch": 1.7158297680702543, "grad_norm": 1.522367238998413, "learning_rate": 4.9051379182425525e-06, "loss": 1.4984, "step": 9525 }, { "epoch": 1.7160099076784507, "grad_norm": 1.519760251045227, "learning_rate": 4.899022647217066e-06, "loss": 1.5008, "step": 9526 }, { "epoch": 1.7161900472866471, "grad_norm": 1.4056733846664429, "learning_rate": 4.892910994170019e-06, "loss": 1.4095, "step": 9527 }, { "epoch": 1.7163701868948436, "grad_norm": 1.4832727909088135, "learning_rate": 4.886802959591674e-06, "loss": 1.5056, "step": 9528 }, { "epoch": 1.71655032650304, "grad_norm": 1.3613221645355225, "learning_rate": 4.880698543972029e-06, "loss": 1.3821, "step": 9529 }, { "epoch": 1.7167304661112364, "grad_norm": 1.445778727531433, "learning_rate": 4.874597747800769e-06, "loss": 1.2269, "step": 9530 }, { "epoch": 1.7169106057194325, "grad_norm": 1.4002124071121216, "learning_rate": 4.8685005715672915e-06, "loss": 1.1958, "step": 9531 }, { "epoch": 1.717090745327629, "grad_norm": 1.5762535333633423, "learning_rate": 4.862407015760712e-06, "loss": 1.6436, "step": 9532 }, { "epoch": 1.7172708849358251, "grad_norm": 1.7474557161331177, "learning_rate": 4.856317080869865e-06, "loss": 1.6265, "step": 9533 }, { "epoch": 1.7174510245440215, "grad_norm": 1.5225788354873657, "learning_rate": 4.850230767383274e-06, "loss": 1.408, "step": 9534 }, { "epoch": 1.717631164152218, "grad_norm": 1.4675949811935425, "learning_rate": 4.844148075789173e-06, "loss": 1.3727, "step": 9535 }, { "epoch": 1.7178113037604144, "grad_norm": 1.518534541130066, "learning_rate": 4.838069006575524e-06, "loss": 1.5101, "step": 9536 }, { "epoch": 1.7179914433686108, "grad_norm": 1.394788146018982, "learning_rate": 4.8319935602299805e-06, "loss": 1.2311, "step": 9537 }, { "epoch": 1.7181715829768072, "grad_norm": 1.578537106513977, "learning_rate": 4.825921737239914e-06, "loss": 1.3598, "step": 9538 }, { "epoch": 1.7183517225850033, "grad_norm": 1.6075000762939453, "learning_rate": 4.8198535380924e-06, "loss": 1.4754, "step": 9539 }, { "epoch": 1.7185318621931998, "grad_norm": 1.4809800386428833, "learning_rate": 4.81378896327423e-06, "loss": 1.4137, "step": 9540 }, { "epoch": 1.718712001801396, "grad_norm": 1.6020078659057617, "learning_rate": 4.8077280132719006e-06, "loss": 1.3505, "step": 9541 }, { "epoch": 1.7188921414095923, "grad_norm": 1.5865106582641602, "learning_rate": 4.801670688571608e-06, "loss": 1.4025, "step": 9542 }, { "epoch": 1.7190722810177888, "grad_norm": 1.6301887035369873, "learning_rate": 4.795616989659285e-06, "loss": 1.4478, "step": 9543 }, { "epoch": 1.7192524206259852, "grad_norm": 1.5299969911575317, "learning_rate": 4.789566917020549e-06, "loss": 1.3583, "step": 9544 }, { "epoch": 1.7194325602341816, "grad_norm": 1.5433638095855713, "learning_rate": 4.783520471140735e-06, "loss": 1.4669, "step": 9545 }, { "epoch": 1.719612699842378, "grad_norm": 1.5660542249679565, "learning_rate": 4.777477652504875e-06, "loss": 1.3841, "step": 9546 }, { "epoch": 1.7197928394505742, "grad_norm": 1.6740514039993286, "learning_rate": 4.771438461597744e-06, "loss": 1.5447, "step": 9547 }, { "epoch": 1.7199729790587706, "grad_norm": 1.7518460750579834, "learning_rate": 4.765402898903792e-06, "loss": 1.6251, "step": 9548 }, { "epoch": 1.7201531186669667, "grad_norm": 1.6132197380065918, "learning_rate": 4.759370964907184e-06, "loss": 1.3366, "step": 9549 }, { "epoch": 1.7203332582751631, "grad_norm": 1.5110498666763306, "learning_rate": 4.753342660091797e-06, "loss": 1.3389, "step": 9550 }, { "epoch": 1.7205133978833596, "grad_norm": 1.4261059761047363, "learning_rate": 4.747317984941213e-06, "loss": 0.8839, "step": 9551 }, { "epoch": 1.720693537491556, "grad_norm": 1.5000559091567993, "learning_rate": 4.7412969399387495e-06, "loss": 1.8664, "step": 9552 }, { "epoch": 1.7208736770997524, "grad_norm": 1.3451316356658936, "learning_rate": 4.735279525567399e-06, "loss": 1.766, "step": 9553 }, { "epoch": 1.7210538167079488, "grad_norm": 1.648999571800232, "learning_rate": 4.72926574230988e-06, "loss": 1.7428, "step": 9554 }, { "epoch": 1.7212339563161452, "grad_norm": 1.5035638809204102, "learning_rate": 4.723255590648601e-06, "loss": 1.9098, "step": 9555 }, { "epoch": 1.7214140959243414, "grad_norm": 1.4117939472198486, "learning_rate": 4.7172490710657155e-06, "loss": 1.8428, "step": 9556 }, { "epoch": 1.7215942355325378, "grad_norm": 1.565474033355713, "learning_rate": 4.71124618404305e-06, "loss": 1.8322, "step": 9557 }, { "epoch": 1.721774375140734, "grad_norm": 1.5569462776184082, "learning_rate": 4.705246930062163e-06, "loss": 2.1899, "step": 9558 }, { "epoch": 1.7219545147489304, "grad_norm": 1.4860347509384155, "learning_rate": 4.6992513096042965e-06, "loss": 1.7084, "step": 9559 }, { "epoch": 1.7221346543571268, "grad_norm": 1.6857378482818604, "learning_rate": 4.693259323150428e-06, "loss": 1.8986, "step": 9560 }, { "epoch": 1.7223147939653232, "grad_norm": 1.7689216136932373, "learning_rate": 4.687270971181234e-06, "loss": 1.7561, "step": 9561 }, { "epoch": 1.7224949335735196, "grad_norm": 1.583787202835083, "learning_rate": 4.6812862541770865e-06, "loss": 1.7573, "step": 9562 }, { "epoch": 1.722675073181716, "grad_norm": 1.6196471452713013, "learning_rate": 4.675305172618083e-06, "loss": 1.7679, "step": 9563 }, { "epoch": 1.7228552127899122, "grad_norm": 1.5555570125579834, "learning_rate": 4.669327726984019e-06, "loss": 1.7591, "step": 9564 }, { "epoch": 1.7230353523981086, "grad_norm": 1.3879055976867676, "learning_rate": 4.663353917754399e-06, "loss": 1.3522, "step": 9565 }, { "epoch": 1.7232154920063047, "grad_norm": 1.4087027311325073, "learning_rate": 4.65738374540845e-06, "loss": 1.4467, "step": 9566 }, { "epoch": 1.7233956316145012, "grad_norm": 1.3224717378616333, "learning_rate": 4.651417210425097e-06, "loss": 1.2774, "step": 9567 }, { "epoch": 1.7235757712226976, "grad_norm": 1.3739231824874878, "learning_rate": 4.645454313282965e-06, "loss": 1.3561, "step": 9568 }, { "epoch": 1.723755910830894, "grad_norm": 1.5082818269729614, "learning_rate": 4.6394950544603966e-06, "loss": 1.5909, "step": 9569 }, { "epoch": 1.7239360504390904, "grad_norm": 1.4290140867233276, "learning_rate": 4.633539434435441e-06, "loss": 1.3721, "step": 9570 }, { "epoch": 1.7241161900472868, "grad_norm": 1.546866774559021, "learning_rate": 4.62758745368585e-06, "loss": 1.658, "step": 9571 }, { "epoch": 1.724296329655483, "grad_norm": 1.5424435138702393, "learning_rate": 4.621639112689096e-06, "loss": 1.409, "step": 9572 }, { "epoch": 1.7244764692636794, "grad_norm": 1.443482756614685, "learning_rate": 4.615694411922355e-06, "loss": 1.5087, "step": 9573 }, { "epoch": 1.7246566088718756, "grad_norm": 1.4095914363861084, "learning_rate": 4.609753351862489e-06, "loss": 1.3148, "step": 9574 }, { "epoch": 1.724836748480072, "grad_norm": 1.5599360466003418, "learning_rate": 4.603815932986105e-06, "loss": 1.6604, "step": 9575 }, { "epoch": 1.7250168880882684, "grad_norm": 1.382007360458374, "learning_rate": 4.597882155769501e-06, "loss": 1.2242, "step": 9576 }, { "epoch": 1.7251970276964648, "grad_norm": 1.5322506427764893, "learning_rate": 4.591952020688673e-06, "loss": 1.406, "step": 9577 }, { "epoch": 1.7253771673046612, "grad_norm": 1.4547069072723389, "learning_rate": 4.58602552821934e-06, "loss": 1.4651, "step": 9578 }, { "epoch": 1.7255573069128576, "grad_norm": 1.5488733053207397, "learning_rate": 4.580102678836906e-06, "loss": 1.4405, "step": 9579 }, { "epoch": 1.7257374465210538, "grad_norm": 1.599420189857483, "learning_rate": 4.57418347301653e-06, "loss": 1.4575, "step": 9580 }, { "epoch": 1.7259175861292502, "grad_norm": 1.4575682878494263, "learning_rate": 4.568267911233021e-06, "loss": 1.3867, "step": 9581 }, { "epoch": 1.7260977257374466, "grad_norm": 1.4409619569778442, "learning_rate": 4.562355993960931e-06, "loss": 1.2522, "step": 9582 }, { "epoch": 1.7262778653456428, "grad_norm": 1.6051923036575317, "learning_rate": 4.556447721674506e-06, "loss": 1.4629, "step": 9583 }, { "epoch": 1.7264580049538392, "grad_norm": 1.5101816654205322, "learning_rate": 4.5505430948477145e-06, "loss": 1.4694, "step": 9584 }, { "epoch": 1.7266381445620356, "grad_norm": 1.429427981376648, "learning_rate": 4.544642113954223e-06, "loss": 1.2766, "step": 9585 }, { "epoch": 1.726818284170232, "grad_norm": 1.5331566333770752, "learning_rate": 4.538744779467396e-06, "loss": 1.6293, "step": 9586 }, { "epoch": 1.7269984237784284, "grad_norm": 1.4530586004257202, "learning_rate": 4.532851091860324e-06, "loss": 1.2788, "step": 9587 }, { "epoch": 1.7271785633866248, "grad_norm": 1.541965126991272, "learning_rate": 4.526961051605777e-06, "loss": 1.3954, "step": 9588 }, { "epoch": 1.727358702994821, "grad_norm": 1.6654447317123413, "learning_rate": 4.521074659176277e-06, "loss": 1.4566, "step": 9589 }, { "epoch": 1.7275388426030174, "grad_norm": 1.4552208185195923, "learning_rate": 4.515191915044014e-06, "loss": 1.2776, "step": 9590 }, { "epoch": 1.7277189822112136, "grad_norm": 1.4932048320770264, "learning_rate": 4.509312819680905e-06, "loss": 1.4921, "step": 9591 }, { "epoch": 1.72789912181941, "grad_norm": 1.4901301860809326, "learning_rate": 4.5034373735585664e-06, "loss": 1.3097, "step": 9592 }, { "epoch": 1.7280792614276064, "grad_norm": 1.4690343141555786, "learning_rate": 4.49756557714831e-06, "loss": 1.2068, "step": 9593 }, { "epoch": 1.7282594010358028, "grad_norm": 1.6519129276275635, "learning_rate": 4.491697430921182e-06, "loss": 1.3556, "step": 9594 }, { "epoch": 1.7284395406439992, "grad_norm": 1.4559097290039062, "learning_rate": 4.485832935347928e-06, "loss": 1.1919, "step": 9595 }, { "epoch": 1.7286196802521956, "grad_norm": 1.389897346496582, "learning_rate": 4.479972090898982e-06, "loss": 1.2374, "step": 9596 }, { "epoch": 1.7287998198603918, "grad_norm": 1.7157933712005615, "learning_rate": 4.47411489804449e-06, "loss": 1.4309, "step": 9597 }, { "epoch": 1.7289799594685882, "grad_norm": 1.5302042961120605, "learning_rate": 4.468261357254339e-06, "loss": 1.3498, "step": 9598 }, { "epoch": 1.7291600990767844, "grad_norm": 1.4530069828033447, "learning_rate": 4.462411468998085e-06, "loss": 1.2727, "step": 9599 }, { "epoch": 1.7293402386849808, "grad_norm": 1.5920439958572388, "learning_rate": 4.456565233745003e-06, "loss": 1.1456, "step": 9600 }, { "epoch": 1.7295203782931772, "grad_norm": 1.650061845779419, "learning_rate": 4.450722651964073e-06, "loss": 1.2532, "step": 9601 }, { "epoch": 1.7297005179013736, "grad_norm": 1.317950963973999, "learning_rate": 4.444883724123983e-06, "loss": 1.7089, "step": 9602 }, { "epoch": 1.72988065750957, "grad_norm": 1.2714366912841797, "learning_rate": 4.439048450693134e-06, "loss": 1.5815, "step": 9603 }, { "epoch": 1.7300607971177664, "grad_norm": 1.484217643737793, "learning_rate": 4.433216832139625e-06, "loss": 2.1427, "step": 9604 }, { "epoch": 1.7302409367259626, "grad_norm": 1.3989589214324951, "learning_rate": 4.42738886893127e-06, "loss": 1.7924, "step": 9605 }, { "epoch": 1.730421076334159, "grad_norm": 1.5513818264007568, "learning_rate": 4.42156456153558e-06, "loss": 1.8296, "step": 9606 }, { "epoch": 1.7306012159423552, "grad_norm": 1.5463838577270508, "learning_rate": 4.415743910419773e-06, "loss": 2.0924, "step": 9607 }, { "epoch": 1.7307813555505516, "grad_norm": 1.521953821182251, "learning_rate": 4.409926916050794e-06, "loss": 1.6124, "step": 9608 }, { "epoch": 1.730961495158748, "grad_norm": 1.443172812461853, "learning_rate": 4.40411357889527e-06, "loss": 1.6189, "step": 9609 }, { "epoch": 1.7311416347669444, "grad_norm": 1.6563770771026611, "learning_rate": 4.398303899419548e-06, "loss": 1.5947, "step": 9610 }, { "epoch": 1.7313217743751408, "grad_norm": 1.528943657875061, "learning_rate": 4.392497878089669e-06, "loss": 1.4543, "step": 9611 }, { "epoch": 1.7315019139833372, "grad_norm": 1.5132312774658203, "learning_rate": 4.3866955153713975e-06, "loss": 1.6889, "step": 9612 }, { "epoch": 1.7316820535915336, "grad_norm": 1.5697942972183228, "learning_rate": 4.380896811730212e-06, "loss": 1.638, "step": 9613 }, { "epoch": 1.7318621931997298, "grad_norm": 1.4939794540405273, "learning_rate": 4.375101767631251e-06, "loss": 1.3965, "step": 9614 }, { "epoch": 1.7320423328079262, "grad_norm": 1.364015817642212, "learning_rate": 4.369310383539399e-06, "loss": 1.4775, "step": 9615 }, { "epoch": 1.7322224724161224, "grad_norm": 1.4071285724639893, "learning_rate": 4.363522659919239e-06, "loss": 1.3233, "step": 9616 }, { "epoch": 1.7324026120243188, "grad_norm": 1.4012006521224976, "learning_rate": 4.357738597235067e-06, "loss": 1.4234, "step": 9617 }, { "epoch": 1.7325827516325152, "grad_norm": 1.2294416427612305, "learning_rate": 4.351958195950878e-06, "loss": 1.1702, "step": 9618 }, { "epoch": 1.7327628912407116, "grad_norm": 1.31303870677948, "learning_rate": 4.3461814565303625e-06, "loss": 1.3742, "step": 9619 }, { "epoch": 1.732943030848908, "grad_norm": 1.772634744644165, "learning_rate": 4.3404083794369385e-06, "loss": 1.4506, "step": 9620 }, { "epoch": 1.7331231704571044, "grad_norm": 1.32028067111969, "learning_rate": 4.334638965133703e-06, "loss": 1.3303, "step": 9621 }, { "epoch": 1.7333033100653006, "grad_norm": 1.5191190242767334, "learning_rate": 4.328873214083495e-06, "loss": 1.5835, "step": 9622 }, { "epoch": 1.733483449673497, "grad_norm": 1.4485267400741577, "learning_rate": 4.323111126748836e-06, "loss": 1.3661, "step": 9623 }, { "epoch": 1.7336635892816932, "grad_norm": 1.4925565719604492, "learning_rate": 4.317352703591948e-06, "loss": 1.384, "step": 9624 }, { "epoch": 1.7338437288898896, "grad_norm": 1.5818308591842651, "learning_rate": 4.31159794507478e-06, "loss": 1.4439, "step": 9625 }, { "epoch": 1.734023868498086, "grad_norm": 1.4135690927505493, "learning_rate": 4.305846851658968e-06, "loss": 1.2722, "step": 9626 }, { "epoch": 1.7342040081062824, "grad_norm": 1.4261449575424194, "learning_rate": 4.3000994238058644e-06, "loss": 1.3275, "step": 9627 }, { "epoch": 1.7343841477144788, "grad_norm": 1.3457294702529907, "learning_rate": 4.294355661976529e-06, "loss": 1.1279, "step": 9628 }, { "epoch": 1.7345642873226752, "grad_norm": 1.6347578763961792, "learning_rate": 4.288615566631715e-06, "loss": 1.6883, "step": 9629 }, { "epoch": 1.7347444269308714, "grad_norm": 1.4078067541122437, "learning_rate": 4.282879138231888e-06, "loss": 1.2738, "step": 9630 }, { "epoch": 1.7349245665390678, "grad_norm": 1.4295357465744019, "learning_rate": 4.27714637723724e-06, "loss": 1.3309, "step": 9631 }, { "epoch": 1.735104706147264, "grad_norm": 1.5635004043579102, "learning_rate": 4.2714172841076375e-06, "loss": 1.4521, "step": 9632 }, { "epoch": 1.7352848457554604, "grad_norm": 1.458160161972046, "learning_rate": 4.265691859302668e-06, "loss": 1.2881, "step": 9633 }, { "epoch": 1.7354649853636568, "grad_norm": 1.4123752117156982, "learning_rate": 4.2599701032816185e-06, "loss": 1.4002, "step": 9634 }, { "epoch": 1.7356451249718532, "grad_norm": 1.5402745008468628, "learning_rate": 4.2542520165034904e-06, "loss": 1.332, "step": 9635 }, { "epoch": 1.7358252645800496, "grad_norm": 1.4241961240768433, "learning_rate": 4.248537599426982e-06, "loss": 1.2178, "step": 9636 }, { "epoch": 1.736005404188246, "grad_norm": 1.4660217761993408, "learning_rate": 4.242826852510501e-06, "loss": 1.3843, "step": 9637 }, { "epoch": 1.7361855437964422, "grad_norm": 1.7371903657913208, "learning_rate": 4.2371197762121676e-06, "loss": 1.4731, "step": 9638 }, { "epoch": 1.7363656834046386, "grad_norm": 1.4508026838302612, "learning_rate": 4.231416370989782e-06, "loss": 1.1948, "step": 9639 }, { "epoch": 1.736545823012835, "grad_norm": 1.5844625234603882, "learning_rate": 4.225716637300892e-06, "loss": 1.4146, "step": 9640 }, { "epoch": 1.7367259626210312, "grad_norm": 1.5621311664581299, "learning_rate": 4.220020575602718e-06, "loss": 1.4425, "step": 9641 }, { "epoch": 1.7369061022292276, "grad_norm": 1.711195468902588, "learning_rate": 4.2143281863521955e-06, "loss": 1.2537, "step": 9642 }, { "epoch": 1.737086241837424, "grad_norm": 1.542397379875183, "learning_rate": 4.20863947000596e-06, "loss": 1.3599, "step": 9643 }, { "epoch": 1.7372663814456204, "grad_norm": 1.608813762664795, "learning_rate": 4.202954427020356e-06, "loss": 1.315, "step": 9644 }, { "epoch": 1.7374465210538168, "grad_norm": 1.5510209798812866, "learning_rate": 4.197273057851464e-06, "loss": 1.2582, "step": 9645 }, { "epoch": 1.7376266606620132, "grad_norm": 1.5419559478759766, "learning_rate": 4.191595362955003e-06, "loss": 1.3826, "step": 9646 }, { "epoch": 1.7378068002702094, "grad_norm": 1.4365555047988892, "learning_rate": 4.185921342786448e-06, "loss": 1.3205, "step": 9647 }, { "epoch": 1.7379869398784058, "grad_norm": 1.6133428812026978, "learning_rate": 4.180250997800972e-06, "loss": 1.3987, "step": 9648 }, { "epoch": 1.738167079486602, "grad_norm": 1.4892632961273193, "learning_rate": 4.174584328453429e-06, "loss": 1.2221, "step": 9649 }, { "epoch": 1.7383472190947984, "grad_norm": 1.7542753219604492, "learning_rate": 4.168921335198428e-06, "loss": 1.641, "step": 9650 }, { "epoch": 1.7385273587029948, "grad_norm": 1.3664988279342651, "learning_rate": 4.163262018490227e-06, "loss": 1.1056, "step": 9651 }, { "epoch": 1.7387074983111912, "grad_norm": 1.446403980255127, "learning_rate": 4.157606378782819e-06, "loss": 1.7099, "step": 9652 }, { "epoch": 1.7388876379193876, "grad_norm": 1.3117256164550781, "learning_rate": 4.151954416529896e-06, "loss": 1.5486, "step": 9653 }, { "epoch": 1.739067777527584, "grad_norm": 1.3918155431747437, "learning_rate": 4.146306132184863e-06, "loss": 1.9564, "step": 9654 }, { "epoch": 1.7392479171357802, "grad_norm": 1.4384269714355469, "learning_rate": 4.1406615262008175e-06, "loss": 2.0072, "step": 9655 }, { "epoch": 1.7394280567439766, "grad_norm": 1.4911695718765259, "learning_rate": 4.1350205990305644e-06, "loss": 1.9314, "step": 9656 }, { "epoch": 1.7396081963521728, "grad_norm": 1.3657127618789673, "learning_rate": 4.1293833511266315e-06, "loss": 1.7937, "step": 9657 }, { "epoch": 1.7397883359603692, "grad_norm": 1.3657654523849487, "learning_rate": 4.123749782941206e-06, "loss": 1.3959, "step": 9658 }, { "epoch": 1.7399684755685656, "grad_norm": 1.6948521137237549, "learning_rate": 4.118119894926236e-06, "loss": 1.5854, "step": 9659 }, { "epoch": 1.740148615176762, "grad_norm": 1.6691831350326538, "learning_rate": 4.1124936875333365e-06, "loss": 1.6065, "step": 9660 }, { "epoch": 1.7403287547849584, "grad_norm": 1.6761059761047363, "learning_rate": 4.106871161213849e-06, "loss": 1.6582, "step": 9661 }, { "epoch": 1.7405088943931548, "grad_norm": 1.5670331716537476, "learning_rate": 4.101252316418791e-06, "loss": 1.8422, "step": 9662 }, { "epoch": 1.740689034001351, "grad_norm": 1.4325658082962036, "learning_rate": 4.095637153598919e-06, "loss": 1.4125, "step": 9663 }, { "epoch": 1.7408691736095474, "grad_norm": 1.3561815023422241, "learning_rate": 4.090025673204684e-06, "loss": 1.3679, "step": 9664 }, { "epoch": 1.7410493132177436, "grad_norm": 1.2881895303726196, "learning_rate": 4.0844178756862195e-06, "loss": 1.2346, "step": 9665 }, { "epoch": 1.74122945282594, "grad_norm": 1.5560855865478516, "learning_rate": 4.078813761493394e-06, "loss": 1.3696, "step": 9666 }, { "epoch": 1.7414095924341364, "grad_norm": 1.4385424852371216, "learning_rate": 4.073213331075759e-06, "loss": 1.4158, "step": 9667 }, { "epoch": 1.7415897320423328, "grad_norm": 1.4203969240188599, "learning_rate": 4.067616584882583e-06, "loss": 1.6062, "step": 9668 }, { "epoch": 1.7417698716505292, "grad_norm": 1.4013786315917969, "learning_rate": 4.062023523362829e-06, "loss": 1.324, "step": 9669 }, { "epoch": 1.7419500112587256, "grad_norm": 1.30453622341156, "learning_rate": 4.056434146965171e-06, "loss": 1.2226, "step": 9670 }, { "epoch": 1.742130150866922, "grad_norm": 1.3937655687332153, "learning_rate": 4.05084845613799e-06, "loss": 1.3982, "step": 9671 }, { "epoch": 1.7423102904751182, "grad_norm": 1.4252763986587524, "learning_rate": 4.045266451329361e-06, "loss": 1.4014, "step": 9672 }, { "epoch": 1.7424904300833146, "grad_norm": 1.439408779144287, "learning_rate": 4.039688132987074e-06, "loss": 1.372, "step": 9673 }, { "epoch": 1.7426705696915108, "grad_norm": 1.4185879230499268, "learning_rate": 4.034113501558623e-06, "loss": 1.3766, "step": 9674 }, { "epoch": 1.7428507092997072, "grad_norm": 1.472978115081787, "learning_rate": 4.028542557491194e-06, "loss": 1.3818, "step": 9675 }, { "epoch": 1.7430308489079036, "grad_norm": 1.5642225742340088, "learning_rate": 4.022975301231685e-06, "loss": 1.5645, "step": 9676 }, { "epoch": 1.7432109885161, "grad_norm": 1.5769459009170532, "learning_rate": 4.017411733226717e-06, "loss": 1.5387, "step": 9677 }, { "epoch": 1.7433911281242964, "grad_norm": 1.4561134576797485, "learning_rate": 4.011851853922572e-06, "loss": 1.3251, "step": 9678 }, { "epoch": 1.7435712677324928, "grad_norm": 1.4647002220153809, "learning_rate": 4.006295663765275e-06, "loss": 1.5141, "step": 9679 }, { "epoch": 1.743751407340689, "grad_norm": 1.4577445983886719, "learning_rate": 4.000743163200532e-06, "loss": 1.472, "step": 9680 }, { "epoch": 1.7439315469488854, "grad_norm": 1.4081840515136719, "learning_rate": 3.995194352673765e-06, "loss": 1.2726, "step": 9681 }, { "epoch": 1.7441116865570816, "grad_norm": 1.6844600439071655, "learning_rate": 3.989649232630105e-06, "loss": 1.5609, "step": 9682 }, { "epoch": 1.744291826165278, "grad_norm": 1.575162410736084, "learning_rate": 3.984107803514369e-06, "loss": 1.4024, "step": 9683 }, { "epoch": 1.7444719657734744, "grad_norm": 1.460941195487976, "learning_rate": 3.978570065771098e-06, "loss": 1.4437, "step": 9684 }, { "epoch": 1.7446521053816708, "grad_norm": 1.5462037324905396, "learning_rate": 3.9730360198445175e-06, "loss": 1.6569, "step": 9685 }, { "epoch": 1.7448322449898672, "grad_norm": 1.506374716758728, "learning_rate": 3.967505666178556e-06, "loss": 1.4948, "step": 9686 }, { "epoch": 1.7450123845980636, "grad_norm": 1.4880439043045044, "learning_rate": 3.961979005216881e-06, "loss": 1.4177, "step": 9687 }, { "epoch": 1.7451925242062598, "grad_norm": 1.5648796558380127, "learning_rate": 3.956456037402828e-06, "loss": 1.5054, "step": 9688 }, { "epoch": 1.7453726638144562, "grad_norm": 1.5347487926483154, "learning_rate": 3.950936763179442e-06, "loss": 1.4358, "step": 9689 }, { "epoch": 1.7455528034226524, "grad_norm": 1.480939507484436, "learning_rate": 3.945421182989478e-06, "loss": 1.3459, "step": 9690 }, { "epoch": 1.7457329430308488, "grad_norm": 1.677643895149231, "learning_rate": 3.939909297275396e-06, "loss": 1.4973, "step": 9691 }, { "epoch": 1.7459130826390452, "grad_norm": 1.633876085281372, "learning_rate": 3.9344011064793516e-06, "loss": 1.4317, "step": 9692 }, { "epoch": 1.7460932222472416, "grad_norm": 1.576006293296814, "learning_rate": 3.92889661104322e-06, "loss": 1.634, "step": 9693 }, { "epoch": 1.746273361855438, "grad_norm": 1.6767064332962036, "learning_rate": 3.923395811408553e-06, "loss": 1.5312, "step": 9694 }, { "epoch": 1.7464535014636344, "grad_norm": 1.525294303894043, "learning_rate": 3.917898708016627e-06, "loss": 1.3689, "step": 9695 }, { "epoch": 1.7466336410718308, "grad_norm": 1.742080807685852, "learning_rate": 3.912405301308431e-06, "loss": 1.662, "step": 9696 }, { "epoch": 1.746813780680027, "grad_norm": 1.5430021286010742, "learning_rate": 3.90691559172463e-06, "loss": 1.057, "step": 9697 }, { "epoch": 1.7469939202882234, "grad_norm": 1.7851451635360718, "learning_rate": 3.901429579705612e-06, "loss": 1.5243, "step": 9698 }, { "epoch": 1.7471740598964196, "grad_norm": 1.400574803352356, "learning_rate": 3.895947265691458e-06, "loss": 1.177, "step": 9699 }, { "epoch": 1.747354199504616, "grad_norm": 1.47464919090271, "learning_rate": 3.890468650121959e-06, "loss": 1.1998, "step": 9700 }, { "epoch": 1.7475343391128124, "grad_norm": 1.5411518812179565, "learning_rate": 3.884993733436604e-06, "loss": 1.4171, "step": 9701 }, { "epoch": 1.7477144787210088, "grad_norm": 1.4549037218093872, "learning_rate": 3.87952251607459e-06, "loss": 1.9851, "step": 9702 }, { "epoch": 1.7478946183292052, "grad_norm": 1.2915027141571045, "learning_rate": 3.874054998474813e-06, "loss": 1.7468, "step": 9703 }, { "epoch": 1.7480747579374016, "grad_norm": 1.4986714124679565, "learning_rate": 3.86859118107587e-06, "loss": 2.1113, "step": 9704 }, { "epoch": 1.7482548975455978, "grad_norm": 1.4523217678070068, "learning_rate": 3.863131064316083e-06, "loss": 1.7455, "step": 9705 }, { "epoch": 1.7484350371537942, "grad_norm": 1.4350519180297852, "learning_rate": 3.857674648633453e-06, "loss": 1.8288, "step": 9706 }, { "epoch": 1.7486151767619904, "grad_norm": 1.4179859161376953, "learning_rate": 3.852221934465683e-06, "loss": 1.6406, "step": 9707 }, { "epoch": 1.7487953163701868, "grad_norm": 1.5210756063461304, "learning_rate": 3.8467729222501915e-06, "loss": 1.7797, "step": 9708 }, { "epoch": 1.7489754559783832, "grad_norm": 1.595882773399353, "learning_rate": 3.841327612424084e-06, "loss": 1.8956, "step": 9709 }, { "epoch": 1.7491555955865796, "grad_norm": 1.7114648818969727, "learning_rate": 3.835886005424216e-06, "loss": 1.8549, "step": 9710 }, { "epoch": 1.749335735194776, "grad_norm": 1.6545852422714233, "learning_rate": 3.830448101687073e-06, "loss": 2.0044, "step": 9711 }, { "epoch": 1.7495158748029724, "grad_norm": 1.608636498451233, "learning_rate": 3.825013901648899e-06, "loss": 1.6402, "step": 9712 }, { "epoch": 1.7496960144111686, "grad_norm": 1.3891401290893555, "learning_rate": 3.819583405745614e-06, "loss": 1.3771, "step": 9713 }, { "epoch": 1.749876154019365, "grad_norm": 1.3837445974349976, "learning_rate": 3.8141566144128504e-06, "loss": 1.5487, "step": 9714 }, { "epoch": 1.7500562936275612, "grad_norm": 1.4634850025177002, "learning_rate": 3.8087335280859558e-06, "loss": 1.4804, "step": 9715 }, { "epoch": 1.7502364332357576, "grad_norm": 1.568115234375, "learning_rate": 3.8033141471999547e-06, "loss": 1.4828, "step": 9716 }, { "epoch": 1.750416572843954, "grad_norm": 1.5026984214782715, "learning_rate": 3.7978984721895927e-06, "loss": 1.5035, "step": 9717 }, { "epoch": 1.7505967124521504, "grad_norm": 1.4704281091690063, "learning_rate": 3.7924865034893063e-06, "loss": 1.5866, "step": 9718 }, { "epoch": 1.7507768520603468, "grad_norm": 1.4158943891525269, "learning_rate": 3.7870782415332486e-06, "loss": 1.384, "step": 9719 }, { "epoch": 1.7509569916685432, "grad_norm": 1.3173227310180664, "learning_rate": 3.781673686755266e-06, "loss": 1.312, "step": 9720 }, { "epoch": 1.7511371312767394, "grad_norm": 1.3533389568328857, "learning_rate": 3.7762728395889124e-06, "loss": 1.2193, "step": 9721 }, { "epoch": 1.7513172708849358, "grad_norm": 1.4190833568572998, "learning_rate": 3.770875700467441e-06, "loss": 1.4416, "step": 9722 }, { "epoch": 1.751497410493132, "grad_norm": 1.5404233932495117, "learning_rate": 3.7654822698237892e-06, "loss": 1.5274, "step": 9723 }, { "epoch": 1.7516775501013284, "grad_norm": 1.3749133348464966, "learning_rate": 3.7600925480906324e-06, "loss": 1.3512, "step": 9724 }, { "epoch": 1.7518576897095248, "grad_norm": 1.4603270292282104, "learning_rate": 3.7547065357003365e-06, "loss": 1.36, "step": 9725 }, { "epoch": 1.7520378293177212, "grad_norm": 1.504769206047058, "learning_rate": 3.7493242330849552e-06, "loss": 1.3747, "step": 9726 }, { "epoch": 1.7522179689259176, "grad_norm": 1.4987685680389404, "learning_rate": 3.7439456406762497e-06, "loss": 1.4548, "step": 9727 }, { "epoch": 1.752398108534114, "grad_norm": 1.418268084526062, "learning_rate": 3.7385707589056905e-06, "loss": 1.2073, "step": 9728 }, { "epoch": 1.7525782481423104, "grad_norm": 1.525145411491394, "learning_rate": 3.7331995882044614e-06, "loss": 1.4641, "step": 9729 }, { "epoch": 1.7527583877505066, "grad_norm": 1.651598572731018, "learning_rate": 3.727832129003428e-06, "loss": 1.3245, "step": 9730 }, { "epoch": 1.752938527358703, "grad_norm": 1.4429457187652588, "learning_rate": 3.7224683817331573e-06, "loss": 1.4994, "step": 9731 }, { "epoch": 1.7531186669668992, "grad_norm": 1.4865858554840088, "learning_rate": 3.717108346823933e-06, "loss": 1.5121, "step": 9732 }, { "epoch": 1.7532988065750956, "grad_norm": 1.5316241979599, "learning_rate": 3.711752024705739e-06, "loss": 1.3634, "step": 9733 }, { "epoch": 1.753478946183292, "grad_norm": 1.3886276483535767, "learning_rate": 3.7063994158082425e-06, "loss": 1.3008, "step": 9734 }, { "epoch": 1.7536590857914884, "grad_norm": 1.5707337856292725, "learning_rate": 3.7010505205608448e-06, "loss": 1.3854, "step": 9735 }, { "epoch": 1.7538392253996848, "grad_norm": 1.5333585739135742, "learning_rate": 3.695705339392619e-06, "loss": 1.3357, "step": 9736 }, { "epoch": 1.7540193650078812, "grad_norm": 1.5735903978347778, "learning_rate": 3.6903638727323444e-06, "loss": 1.4418, "step": 9737 }, { "epoch": 1.7541995046160774, "grad_norm": 1.4835413694381714, "learning_rate": 3.685026121008539e-06, "loss": 1.3883, "step": 9738 }, { "epoch": 1.7543796442242738, "grad_norm": 1.624558925628662, "learning_rate": 3.679692084649372e-06, "loss": 1.5624, "step": 9739 }, { "epoch": 1.75455978383247, "grad_norm": 1.594003438949585, "learning_rate": 3.674361764082751e-06, "loss": 1.567, "step": 9740 }, { "epoch": 1.7547399234406664, "grad_norm": 1.5150173902511597, "learning_rate": 3.6690351597362615e-06, "loss": 1.3894, "step": 9741 }, { "epoch": 1.7549200630488628, "grad_norm": 1.5619381666183472, "learning_rate": 3.663712272037206e-06, "loss": 1.5707, "step": 9742 }, { "epoch": 1.7551002026570592, "grad_norm": 1.520041584968567, "learning_rate": 3.6583931014125773e-06, "loss": 1.303, "step": 9743 }, { "epoch": 1.7552803422652556, "grad_norm": 1.6436876058578491, "learning_rate": 3.653077648289088e-06, "loss": 1.4566, "step": 9744 }, { "epoch": 1.755460481873452, "grad_norm": 1.525728702545166, "learning_rate": 3.647765913093132e-06, "loss": 1.3305, "step": 9745 }, { "epoch": 1.7556406214816482, "grad_norm": 1.6755917072296143, "learning_rate": 3.6424578962508115e-06, "loss": 1.5057, "step": 9746 }, { "epoch": 1.7558207610898446, "grad_norm": 1.423240303993225, "learning_rate": 3.6371535981879423e-06, "loss": 1.05, "step": 9747 }, { "epoch": 1.7560009006980408, "grad_norm": 1.746389389038086, "learning_rate": 3.631853019330034e-06, "loss": 1.4926, "step": 9748 }, { "epoch": 1.7561810403062372, "grad_norm": 1.4243775606155396, "learning_rate": 3.6265561601022847e-06, "loss": 1.1602, "step": 9749 }, { "epoch": 1.7563611799144336, "grad_norm": 1.49173104763031, "learning_rate": 3.621263020929616e-06, "loss": 1.2521, "step": 9750 }, { "epoch": 1.75654131952263, "grad_norm": 1.4891126155853271, "learning_rate": 3.615973602236633e-06, "loss": 1.1817, "step": 9751 }, { "epoch": 1.7567214591308264, "grad_norm": 1.4392690658569336, "learning_rate": 3.610687904447657e-06, "loss": 1.6769, "step": 9752 }, { "epoch": 1.7569015987390229, "grad_norm": 1.20699942111969, "learning_rate": 3.6054059279867047e-06, "loss": 1.4982, "step": 9753 }, { "epoch": 1.7570817383472193, "grad_norm": 1.3411917686462402, "learning_rate": 3.6001276732774926e-06, "loss": 1.7788, "step": 9754 }, { "epoch": 1.7572618779554154, "grad_norm": 1.2381129264831543, "learning_rate": 3.594853140743443e-06, "loss": 1.5767, "step": 9755 }, { "epoch": 1.7574420175636118, "grad_norm": 1.5269241333007812, "learning_rate": 3.589582330807656e-06, "loss": 1.8997, "step": 9756 }, { "epoch": 1.757622157171808, "grad_norm": 1.4806115627288818, "learning_rate": 3.5843152438929773e-06, "loss": 1.7014, "step": 9757 }, { "epoch": 1.7578022967800044, "grad_norm": 1.5065656900405884, "learning_rate": 3.579051880421924e-06, "loss": 1.9056, "step": 9758 }, { "epoch": 1.7579824363882008, "grad_norm": 1.7371292114257812, "learning_rate": 3.5737922408167145e-06, "loss": 2.4525, "step": 9759 }, { "epoch": 1.7581625759963972, "grad_norm": 1.6146286725997925, "learning_rate": 3.568536325499272e-06, "loss": 1.8608, "step": 9760 }, { "epoch": 1.7583427156045937, "grad_norm": 1.8243255615234375, "learning_rate": 3.563284134891237e-06, "loss": 1.8993, "step": 9761 }, { "epoch": 1.75852285521279, "grad_norm": 1.4930227994918823, "learning_rate": 3.5580356694139282e-06, "loss": 1.5501, "step": 9762 }, { "epoch": 1.7587029948209862, "grad_norm": 1.514678716659546, "learning_rate": 3.5527909294883756e-06, "loss": 1.453, "step": 9763 }, { "epoch": 1.7588831344291826, "grad_norm": 1.4491084814071655, "learning_rate": 3.547549915535314e-06, "loss": 1.3982, "step": 9764 }, { "epoch": 1.7590632740373788, "grad_norm": 1.4310634136199951, "learning_rate": 3.542312627975175e-06, "loss": 1.4546, "step": 9765 }, { "epoch": 1.7592434136455752, "grad_norm": 1.4227977991104126, "learning_rate": 3.537079067228083e-06, "loss": 1.3456, "step": 9766 }, { "epoch": 1.7594235532537716, "grad_norm": 1.3339054584503174, "learning_rate": 3.5318492337138798e-06, "loss": 1.2823, "step": 9767 }, { "epoch": 1.759603692861968, "grad_norm": 1.4997001886367798, "learning_rate": 3.526623127852097e-06, "loss": 1.5565, "step": 9768 }, { "epoch": 1.7597838324701645, "grad_norm": 1.3836314678192139, "learning_rate": 3.5214007500619705e-06, "loss": 1.1732, "step": 9769 }, { "epoch": 1.7599639720783609, "grad_norm": 1.3238518238067627, "learning_rate": 3.5161821007624272e-06, "loss": 1.242, "step": 9770 }, { "epoch": 1.760144111686557, "grad_norm": 1.3837451934814453, "learning_rate": 3.510967180372127e-06, "loss": 1.3414, "step": 9771 }, { "epoch": 1.7603242512947535, "grad_norm": 1.37803053855896, "learning_rate": 3.5057559893093892e-06, "loss": 1.3634, "step": 9772 }, { "epoch": 1.7605043909029496, "grad_norm": 1.4191161394119263, "learning_rate": 3.5005485279922645e-06, "loss": 1.2114, "step": 9773 }, { "epoch": 1.760684530511146, "grad_norm": 1.3937416076660156, "learning_rate": 3.4953447968384734e-06, "loss": 1.299, "step": 9774 }, { "epoch": 1.7608646701193424, "grad_norm": 1.494080901145935, "learning_rate": 3.4901447962654933e-06, "loss": 1.5288, "step": 9775 }, { "epoch": 1.7610448097275389, "grad_norm": 1.2838844060897827, "learning_rate": 3.4849485266904347e-06, "loss": 1.0898, "step": 9776 }, { "epoch": 1.7612249493357353, "grad_norm": 1.4366638660430908, "learning_rate": 3.479755988530148e-06, "loss": 1.2903, "step": 9777 }, { "epoch": 1.7614050889439317, "grad_norm": 1.4605692625045776, "learning_rate": 3.4745671822011715e-06, "loss": 1.285, "step": 9778 }, { "epoch": 1.7615852285521278, "grad_norm": 1.4743674993515015, "learning_rate": 3.4693821081197506e-06, "loss": 1.3517, "step": 9779 }, { "epoch": 1.7617653681603243, "grad_norm": 1.3900632858276367, "learning_rate": 3.4642007667018416e-06, "loss": 1.3849, "step": 9780 }, { "epoch": 1.7619455077685207, "grad_norm": 1.491556167602539, "learning_rate": 3.459023158363084e-06, "loss": 1.3595, "step": 9781 }, { "epoch": 1.7621256473767168, "grad_norm": 1.4030952453613281, "learning_rate": 3.453849283518812e-06, "loss": 1.2637, "step": 9782 }, { "epoch": 1.7623057869849132, "grad_norm": 1.4194015264511108, "learning_rate": 3.4486791425840716e-06, "loss": 1.2555, "step": 9783 }, { "epoch": 1.7624859265931097, "grad_norm": 1.5971589088439941, "learning_rate": 3.4435127359736253e-06, "loss": 1.5054, "step": 9784 }, { "epoch": 1.762666066201306, "grad_norm": 1.571773648262024, "learning_rate": 3.438350064101914e-06, "loss": 1.4324, "step": 9785 }, { "epoch": 1.7628462058095025, "grad_norm": 1.3381184339523315, "learning_rate": 3.4331911273830784e-06, "loss": 1.1895, "step": 9786 }, { "epoch": 1.7630263454176989, "grad_norm": 1.3946419954299927, "learning_rate": 3.4280359262309767e-06, "loss": 1.1587, "step": 9787 }, { "epoch": 1.763206485025895, "grad_norm": 1.4441640377044678, "learning_rate": 3.4228844610591336e-06, "loss": 1.1807, "step": 9788 }, { "epoch": 1.7633866246340915, "grad_norm": 1.522282361984253, "learning_rate": 3.4177367322808183e-06, "loss": 1.4291, "step": 9789 }, { "epoch": 1.7635667642422876, "grad_norm": 1.5009187459945679, "learning_rate": 3.412592740308973e-06, "loss": 1.2038, "step": 9790 }, { "epoch": 1.763746903850484, "grad_norm": 1.5902810096740723, "learning_rate": 3.407452485556245e-06, "loss": 1.2673, "step": 9791 }, { "epoch": 1.7639270434586805, "grad_norm": 1.555992603302002, "learning_rate": 3.402315968434988e-06, "loss": 1.3651, "step": 9792 }, { "epoch": 1.7641071830668769, "grad_norm": 1.6070942878723145, "learning_rate": 3.3971831893572338e-06, "loss": 1.3466, "step": 9793 }, { "epoch": 1.7642873226750733, "grad_norm": 1.5451864004135132, "learning_rate": 3.392054148734758e-06, "loss": 1.3503, "step": 9794 }, { "epoch": 1.7644674622832697, "grad_norm": 1.509764313697815, "learning_rate": 3.386928846978993e-06, "loss": 1.3564, "step": 9795 }, { "epoch": 1.7646476018914659, "grad_norm": 1.5163006782531738, "learning_rate": 3.381807284501093e-06, "loss": 1.332, "step": 9796 }, { "epoch": 1.7648277414996623, "grad_norm": 1.5303181409835815, "learning_rate": 3.376689461711907e-06, "loss": 1.255, "step": 9797 }, { "epoch": 1.7650078811078584, "grad_norm": 1.6993863582611084, "learning_rate": 3.3715753790219794e-06, "loss": 1.601, "step": 9798 }, { "epoch": 1.7651880207160549, "grad_norm": 1.4489957094192505, "learning_rate": 3.366465036841565e-06, "loss": 1.1217, "step": 9799 }, { "epoch": 1.7653681603242513, "grad_norm": 1.4787137508392334, "learning_rate": 3.3613584355806138e-06, "loss": 1.1798, "step": 9800 }, { "epoch": 1.7655482999324477, "grad_norm": 1.288694977760315, "learning_rate": 3.3562555756487703e-06, "loss": 1.0289, "step": 9801 }, { "epoch": 1.765728439540644, "grad_norm": 1.3893877267837524, "learning_rate": 3.3511564574553735e-06, "loss": 1.8824, "step": 9802 }, { "epoch": 1.7659085791488405, "grad_norm": 1.3039926290512085, "learning_rate": 3.346061081409496e-06, "loss": 1.7071, "step": 9803 }, { "epoch": 1.7660887187570367, "grad_norm": 1.3711354732513428, "learning_rate": 3.340969447919873e-06, "loss": 1.9659, "step": 9804 }, { "epoch": 1.766268858365233, "grad_norm": 1.3592208623886108, "learning_rate": 3.335881557394954e-06, "loss": 1.764, "step": 9805 }, { "epoch": 1.7664489979734292, "grad_norm": 1.447879433631897, "learning_rate": 3.3307974102428863e-06, "loss": 1.7053, "step": 9806 }, { "epoch": 1.7666291375816257, "grad_norm": 1.5441514253616333, "learning_rate": 3.3257170068715205e-06, "loss": 1.8472, "step": 9807 }, { "epoch": 1.766809277189822, "grad_norm": 1.4021772146224976, "learning_rate": 3.320640347688397e-06, "loss": 1.6305, "step": 9808 }, { "epoch": 1.7669894167980185, "grad_norm": 1.5168946981430054, "learning_rate": 3.3155674331007745e-06, "loss": 1.8199, "step": 9809 }, { "epoch": 1.7671695564062149, "grad_norm": 1.6763941049575806, "learning_rate": 3.3104982635155932e-06, "loss": 2.0727, "step": 9810 }, { "epoch": 1.7673496960144113, "grad_norm": 1.576102614402771, "learning_rate": 3.3054328393394895e-06, "loss": 1.8597, "step": 9811 }, { "epoch": 1.7675298356226077, "grad_norm": 1.7728216648101807, "learning_rate": 3.300371160978827e-06, "loss": 1.6148, "step": 9812 }, { "epoch": 1.7677099752308039, "grad_norm": 1.4950711727142334, "learning_rate": 3.2953132288396415e-06, "loss": 1.6179, "step": 9813 }, { "epoch": 1.7678901148390003, "grad_norm": 1.4749361276626587, "learning_rate": 3.2902590433276813e-06, "loss": 1.5794, "step": 9814 }, { "epoch": 1.7680702544471965, "grad_norm": 1.4258922338485718, "learning_rate": 3.285208604848389e-06, "loss": 1.3797, "step": 9815 }, { "epoch": 1.7682503940553929, "grad_norm": 1.352752685546875, "learning_rate": 3.2801619138069007e-06, "loss": 1.2688, "step": 9816 }, { "epoch": 1.7684305336635893, "grad_norm": 1.4454792737960815, "learning_rate": 3.2751189706080767e-06, "loss": 1.4482, "step": 9817 }, { "epoch": 1.7686106732717857, "grad_norm": 1.5224086046218872, "learning_rate": 3.2700797756564483e-06, "loss": 1.3349, "step": 9818 }, { "epoch": 1.768790812879982, "grad_norm": 1.6277973651885986, "learning_rate": 3.2650443293562704e-06, "loss": 1.8212, "step": 9819 }, { "epoch": 1.7689709524881785, "grad_norm": 1.555992603302002, "learning_rate": 3.2600126321114587e-06, "loss": 1.4072, "step": 9820 }, { "epoch": 1.7691510920963747, "grad_norm": 1.6057409048080444, "learning_rate": 3.2549846843256627e-06, "loss": 1.5905, "step": 9821 }, { "epoch": 1.769331231704571, "grad_norm": 1.6142410039901733, "learning_rate": 3.249960486402237e-06, "loss": 1.3907, "step": 9822 }, { "epoch": 1.7695113713127673, "grad_norm": 1.4140647649765015, "learning_rate": 3.2449400387442042e-06, "loss": 1.2653, "step": 9823 }, { "epoch": 1.7696915109209637, "grad_norm": 1.6014339923858643, "learning_rate": 3.239923341754314e-06, "loss": 1.4371, "step": 9824 }, { "epoch": 1.76987165052916, "grad_norm": 1.5181461572647095, "learning_rate": 3.2349103958349837e-06, "loss": 1.3349, "step": 9825 }, { "epoch": 1.7700517901373565, "grad_norm": 1.475053310394287, "learning_rate": 3.2299012013883746e-06, "loss": 1.3329, "step": 9826 }, { "epoch": 1.7702319297455529, "grad_norm": 1.5775713920593262, "learning_rate": 3.2248957588163097e-06, "loss": 1.4734, "step": 9827 }, { "epoch": 1.7704120693537493, "grad_norm": 1.482925295829773, "learning_rate": 3.219894068520324e-06, "loss": 1.1697, "step": 9828 }, { "epoch": 1.7705922089619455, "grad_norm": 1.5949487686157227, "learning_rate": 3.2148961309016514e-06, "loss": 1.4629, "step": 9829 }, { "epoch": 1.7707723485701419, "grad_norm": 1.5618815422058105, "learning_rate": 3.209901946361221e-06, "loss": 1.4689, "step": 9830 }, { "epoch": 1.770952488178338, "grad_norm": 1.6027182340621948, "learning_rate": 3.2049115152996633e-06, "loss": 1.5968, "step": 9831 }, { "epoch": 1.7711326277865345, "grad_norm": 1.5154054164886475, "learning_rate": 3.199924838117313e-06, "loss": 1.6709, "step": 9832 }, { "epoch": 1.7713127673947309, "grad_norm": 1.4988441467285156, "learning_rate": 3.1949419152142e-06, "loss": 1.4155, "step": 9833 }, { "epoch": 1.7714929070029273, "grad_norm": 1.5211819410324097, "learning_rate": 3.189962746990044e-06, "loss": 1.3442, "step": 9834 }, { "epoch": 1.7716730466111237, "grad_norm": 1.5229166746139526, "learning_rate": 3.184987333844269e-06, "loss": 1.3498, "step": 9835 }, { "epoch": 1.77185318621932, "grad_norm": 1.5959737300872803, "learning_rate": 3.180015676176018e-06, "loss": 1.3395, "step": 9836 }, { "epoch": 1.7720333258275163, "grad_norm": 1.5616458654403687, "learning_rate": 3.1750477743841044e-06, "loss": 1.2583, "step": 9837 }, { "epoch": 1.7722134654357127, "grad_norm": 1.6109622716903687, "learning_rate": 3.1700836288670486e-06, "loss": 1.403, "step": 9838 }, { "epoch": 1.772393605043909, "grad_norm": 1.4488798379898071, "learning_rate": 3.1651232400230656e-06, "loss": 1.0806, "step": 9839 }, { "epoch": 1.7725737446521053, "grad_norm": 1.4988574981689453, "learning_rate": 3.160166608250098e-06, "loss": 1.2333, "step": 9840 }, { "epoch": 1.7727538842603017, "grad_norm": 1.6694145202636719, "learning_rate": 3.1552137339457445e-06, "loss": 1.2971, "step": 9841 }, { "epoch": 1.772934023868498, "grad_norm": 1.688083291053772, "learning_rate": 3.1502646175073312e-06, "loss": 1.5732, "step": 9842 }, { "epoch": 1.7731141634766945, "grad_norm": 1.4416602849960327, "learning_rate": 3.145319259331869e-06, "loss": 1.3582, "step": 9843 }, { "epoch": 1.7732943030848909, "grad_norm": 1.5128344297409058, "learning_rate": 3.140377659816063e-06, "loss": 1.2716, "step": 9844 }, { "epoch": 1.7734744426930873, "grad_norm": 1.4940046072006226, "learning_rate": 3.135439819356345e-06, "loss": 1.3417, "step": 9845 }, { "epoch": 1.7736545823012835, "grad_norm": 1.8050631284713745, "learning_rate": 3.130505738348821e-06, "loss": 1.418, "step": 9846 }, { "epoch": 1.7738347219094799, "grad_norm": 1.4687649011611938, "learning_rate": 3.1255754171892914e-06, "loss": 1.1303, "step": 9847 }, { "epoch": 1.774014861517676, "grad_norm": 1.7391098737716675, "learning_rate": 3.1206488562732783e-06, "loss": 1.3444, "step": 9848 }, { "epoch": 1.7741950011258725, "grad_norm": 1.6519453525543213, "learning_rate": 3.1157260559959657e-06, "loss": 1.311, "step": 9849 }, { "epoch": 1.7743751407340689, "grad_norm": 1.5089085102081299, "learning_rate": 3.1108070167522817e-06, "loss": 1.1043, "step": 9850 }, { "epoch": 1.7745552803422653, "grad_norm": 1.464198112487793, "learning_rate": 3.1058917389368223e-06, "loss": 1.1819, "step": 9851 }, { "epoch": 1.7747354199504617, "grad_norm": 1.376810908317566, "learning_rate": 3.1009802229438887e-06, "loss": 1.634, "step": 9852 }, { "epoch": 1.774915559558658, "grad_norm": 1.411827564239502, "learning_rate": 3.096072469167466e-06, "loss": 1.7541, "step": 9853 }, { "epoch": 1.7750956991668543, "grad_norm": 1.490149736404419, "learning_rate": 3.0911684780012718e-06, "loss": 1.5701, "step": 9854 }, { "epoch": 1.7752758387750507, "grad_norm": 1.3167155981063843, "learning_rate": 3.0862682498386917e-06, "loss": 1.4447, "step": 9855 }, { "epoch": 1.7754559783832469, "grad_norm": 1.3508093357086182, "learning_rate": 3.081371785072823e-06, "loss": 1.5733, "step": 9856 }, { "epoch": 1.7756361179914433, "grad_norm": 1.58484947681427, "learning_rate": 3.0764790840964563e-06, "loss": 1.9759, "step": 9857 }, { "epoch": 1.7758162575996397, "grad_norm": 1.5745463371276855, "learning_rate": 3.071590147302078e-06, "loss": 1.9436, "step": 9858 }, { "epoch": 1.775996397207836, "grad_norm": 1.5669615268707275, "learning_rate": 3.0667049750818854e-06, "loss": 1.3913, "step": 9859 }, { "epoch": 1.7761765368160325, "grad_norm": 1.794481635093689, "learning_rate": 3.061823567827754e-06, "loss": 1.7634, "step": 9860 }, { "epoch": 1.776356676424229, "grad_norm": 1.576206922531128, "learning_rate": 3.0569459259312818e-06, "loss": 1.4121, "step": 9861 }, { "epoch": 1.776536816032425, "grad_norm": 1.522792935371399, "learning_rate": 3.0520720497837385e-06, "loss": 1.4583, "step": 9862 }, { "epoch": 1.7767169556406215, "grad_norm": 1.2773865461349487, "learning_rate": 3.0472019397761064e-06, "loss": 1.2119, "step": 9863 }, { "epoch": 1.7768970952488177, "grad_norm": 1.3955366611480713, "learning_rate": 3.042335596299062e-06, "loss": 1.3323, "step": 9864 }, { "epoch": 1.777077234857014, "grad_norm": 1.4555565118789673, "learning_rate": 3.037473019742987e-06, "loss": 1.4389, "step": 9865 }, { "epoch": 1.7772573744652105, "grad_norm": 1.4232789278030396, "learning_rate": 3.0326142104979525e-06, "loss": 1.445, "step": 9866 }, { "epoch": 1.7774375140734069, "grad_norm": 1.341517686843872, "learning_rate": 3.0277591689537254e-06, "loss": 1.4613, "step": 9867 }, { "epoch": 1.7776176536816033, "grad_norm": 1.4803911447525024, "learning_rate": 3.0229078954997823e-06, "loss": 1.3171, "step": 9868 }, { "epoch": 1.7777977932897997, "grad_norm": 1.4366044998168945, "learning_rate": 3.0180603905252846e-06, "loss": 1.3199, "step": 9869 }, { "epoch": 1.777977932897996, "grad_norm": 1.4099029302597046, "learning_rate": 3.0132166544190986e-06, "loss": 1.3858, "step": 9870 }, { "epoch": 1.7781580725061923, "grad_norm": 1.508741021156311, "learning_rate": 3.008376687569786e-06, "loss": 1.4026, "step": 9871 }, { "epoch": 1.7783382121143887, "grad_norm": 1.6278587579727173, "learning_rate": 3.0035404903656085e-06, "loss": 1.4795, "step": 9872 }, { "epoch": 1.7785183517225849, "grad_norm": 1.4682656526565552, "learning_rate": 2.998708063194522e-06, "loss": 1.178, "step": 9873 }, { "epoch": 1.7786984913307813, "grad_norm": 1.4815837144851685, "learning_rate": 2.9938794064441776e-06, "loss": 1.3455, "step": 9874 }, { "epoch": 1.7788786309389777, "grad_norm": 1.4454247951507568, "learning_rate": 2.9890545205019328e-06, "loss": 1.3776, "step": 9875 }, { "epoch": 1.779058770547174, "grad_norm": 1.4900965690612793, "learning_rate": 2.984233405754838e-06, "loss": 1.3231, "step": 9876 }, { "epoch": 1.7792389101553705, "grad_norm": 1.458905816078186, "learning_rate": 2.9794160625896296e-06, "loss": 1.21, "step": 9877 }, { "epoch": 1.779419049763567, "grad_norm": 1.3248931169509888, "learning_rate": 2.9746024913927693e-06, "loss": 1.2997, "step": 9878 }, { "epoch": 1.779599189371763, "grad_norm": 1.495949387550354, "learning_rate": 2.9697926925503936e-06, "loss": 1.4589, "step": 9879 }, { "epoch": 1.7797793289799595, "grad_norm": 1.5799137353897095, "learning_rate": 2.9649866664483385e-06, "loss": 1.5057, "step": 9880 }, { "epoch": 1.7799594685881557, "grad_norm": 1.5541681051254272, "learning_rate": 2.960184413472139e-06, "loss": 1.3934, "step": 9881 }, { "epoch": 1.780139608196352, "grad_norm": 1.615139126777649, "learning_rate": 2.9553859340070377e-06, "loss": 1.5931, "step": 9882 }, { "epoch": 1.7803197478045485, "grad_norm": 1.4375929832458496, "learning_rate": 2.9505912284379598e-06, "loss": 1.389, "step": 9883 }, { "epoch": 1.780499887412745, "grad_norm": 1.4612473249435425, "learning_rate": 2.945800297149548e-06, "loss": 1.4184, "step": 9884 }, { "epoch": 1.7806800270209413, "grad_norm": 1.5776232481002808, "learning_rate": 2.941013140526111e-06, "loss": 1.3939, "step": 9885 }, { "epoch": 1.7808601666291377, "grad_norm": 1.5297393798828125, "learning_rate": 2.9362297589516696e-06, "loss": 1.2477, "step": 9886 }, { "epoch": 1.781040306237334, "grad_norm": 1.4318976402282715, "learning_rate": 2.9314501528099615e-06, "loss": 1.0685, "step": 9887 }, { "epoch": 1.7812204458455303, "grad_norm": 1.5197341442108154, "learning_rate": 2.9266743224843907e-06, "loss": 1.383, "step": 9888 }, { "epoch": 1.7814005854537265, "grad_norm": 1.6641758680343628, "learning_rate": 2.9219022683580786e-06, "loss": 1.4712, "step": 9889 }, { "epoch": 1.7815807250619229, "grad_norm": 1.6749756336212158, "learning_rate": 2.9171339908138308e-06, "loss": 1.7038, "step": 9890 }, { "epoch": 1.7817608646701193, "grad_norm": 1.6026268005371094, "learning_rate": 2.912369490234168e-06, "loss": 1.5003, "step": 9891 }, { "epoch": 1.7819410042783157, "grad_norm": 1.505325436592102, "learning_rate": 2.907608767001291e-06, "loss": 1.2014, "step": 9892 }, { "epoch": 1.782121143886512, "grad_norm": 1.4283524751663208, "learning_rate": 2.902851821497099e-06, "loss": 1.12, "step": 9893 }, { "epoch": 1.7823012834947085, "grad_norm": 1.6709409952163696, "learning_rate": 2.898098654103193e-06, "loss": 1.5111, "step": 9894 }, { "epoch": 1.782481423102905, "grad_norm": 1.493607997894287, "learning_rate": 2.8933492652008677e-06, "loss": 1.3331, "step": 9895 }, { "epoch": 1.782661562711101, "grad_norm": 1.562774658203125, "learning_rate": 2.888603655171124e-06, "loss": 1.3574, "step": 9896 }, { "epoch": 1.7828417023192975, "grad_norm": 1.5309172868728638, "learning_rate": 2.883861824394646e-06, "loss": 1.1357, "step": 9897 }, { "epoch": 1.7830218419274937, "grad_norm": 1.5188308954238892, "learning_rate": 2.879123773251824e-06, "loss": 1.2806, "step": 9898 }, { "epoch": 1.78320198153569, "grad_norm": 1.5923372507095337, "learning_rate": 2.874389502122743e-06, "loss": 1.2997, "step": 9899 }, { "epoch": 1.7833821211438865, "grad_norm": 1.5409752130508423, "learning_rate": 2.8696590113871714e-06, "loss": 1.2403, "step": 9900 }, { "epoch": 1.783562260752083, "grad_norm": 1.587744116783142, "learning_rate": 2.8649323014246054e-06, "loss": 1.1927, "step": 9901 }, { "epoch": 1.7837424003602793, "grad_norm": 1.4742180109024048, "learning_rate": 2.860209372614209e-06, "loss": 1.7464, "step": 9902 }, { "epoch": 1.7839225399684757, "grad_norm": 1.4704670906066895, "learning_rate": 2.8554902253348614e-06, "loss": 2.0764, "step": 9903 }, { "epoch": 1.784102679576672, "grad_norm": 1.3590621948242188, "learning_rate": 2.850774859965122e-06, "loss": 1.7817, "step": 9904 }, { "epoch": 1.7842828191848683, "grad_norm": 1.3859045505523682, "learning_rate": 2.8460632768832596e-06, "loss": 1.7231, "step": 9905 }, { "epoch": 1.7844629587930645, "grad_norm": 1.3658157587051392, "learning_rate": 2.841355476467228e-06, "loss": 1.6551, "step": 9906 }, { "epoch": 1.784643098401261, "grad_norm": 1.5751011371612549, "learning_rate": 2.8366514590946967e-06, "loss": 1.7875, "step": 9907 }, { "epoch": 1.7848232380094573, "grad_norm": 1.6245322227478027, "learning_rate": 2.831951225143009e-06, "loss": 1.8343, "step": 9908 }, { "epoch": 1.7850033776176537, "grad_norm": 1.5365079641342163, "learning_rate": 2.827254774989213e-06, "loss": 1.5304, "step": 9909 }, { "epoch": 1.7851835172258501, "grad_norm": 1.5759741067886353, "learning_rate": 2.8225621090100686e-06, "loss": 1.7605, "step": 9910 }, { "epoch": 1.7853636568340465, "grad_norm": 2.3076226711273193, "learning_rate": 2.817873227582013e-06, "loss": 2.274, "step": 9911 }, { "epoch": 1.7855437964422427, "grad_norm": 1.6789138317108154, "learning_rate": 2.8131881310811846e-06, "loss": 1.6764, "step": 9912 }, { "epoch": 1.785723936050439, "grad_norm": 1.4506316184997559, "learning_rate": 2.808506819883422e-06, "loss": 1.4689, "step": 9913 }, { "epoch": 1.7859040756586353, "grad_norm": 1.386644721031189, "learning_rate": 2.8038292943642465e-06, "loss": 1.3536, "step": 9914 }, { "epoch": 1.7860842152668317, "grad_norm": 1.3460304737091064, "learning_rate": 2.7991555548989024e-06, "loss": 1.3028, "step": 9915 }, { "epoch": 1.786264354875028, "grad_norm": 1.4798442125320435, "learning_rate": 2.7944856018623124e-06, "loss": 1.4623, "step": 9916 }, { "epoch": 1.7864444944832245, "grad_norm": 1.450947642326355, "learning_rate": 2.789819435629104e-06, "loss": 1.4857, "step": 9917 }, { "epoch": 1.786624634091421, "grad_norm": 1.4580761194229126, "learning_rate": 2.785157056573567e-06, "loss": 1.5624, "step": 9918 }, { "epoch": 1.7868047736996173, "grad_norm": 1.5128990411758423, "learning_rate": 2.780498465069742e-06, "loss": 1.3527, "step": 9919 }, { "epoch": 1.7869849133078135, "grad_norm": 1.3524466753005981, "learning_rate": 2.7758436614913287e-06, "loss": 1.1834, "step": 9920 }, { "epoch": 1.78716505291601, "grad_norm": 1.5410993099212646, "learning_rate": 2.7711926462117346e-06, "loss": 1.4377, "step": 9921 }, { "epoch": 1.787345192524206, "grad_norm": 1.4928367137908936, "learning_rate": 2.7665454196040664e-06, "loss": 1.4857, "step": 9922 }, { "epoch": 1.7875253321324025, "grad_norm": 1.6126383543014526, "learning_rate": 2.761901982041104e-06, "loss": 1.59, "step": 9923 }, { "epoch": 1.787705471740599, "grad_norm": 1.371206521987915, "learning_rate": 2.7572623338953662e-06, "loss": 1.1243, "step": 9924 }, { "epoch": 1.7878856113487953, "grad_norm": 1.5020833015441895, "learning_rate": 2.752626475539033e-06, "loss": 1.573, "step": 9925 }, { "epoch": 1.7880657509569917, "grad_norm": 1.4009459018707275, "learning_rate": 2.7479944073439957e-06, "loss": 1.3558, "step": 9926 }, { "epoch": 1.7882458905651881, "grad_norm": 1.4487234354019165, "learning_rate": 2.7433661296818235e-06, "loss": 1.3079, "step": 9927 }, { "epoch": 1.7884260301733845, "grad_norm": 1.7979350090026855, "learning_rate": 2.7387416429238086e-06, "loss": 1.686, "step": 9928 }, { "epoch": 1.7886061697815807, "grad_norm": 1.4197977781295776, "learning_rate": 2.734120947440916e-06, "loss": 1.283, "step": 9929 }, { "epoch": 1.7887863093897771, "grad_norm": 1.463405728340149, "learning_rate": 2.7295040436038145e-06, "loss": 1.2608, "step": 9930 }, { "epoch": 1.7889664489979733, "grad_norm": 1.5794692039489746, "learning_rate": 2.724890931782881e-06, "loss": 1.4955, "step": 9931 }, { "epoch": 1.7891465886061697, "grad_norm": 1.5418800115585327, "learning_rate": 2.720281612348158e-06, "loss": 1.4654, "step": 9932 }, { "epoch": 1.7893267282143661, "grad_norm": 1.467890739440918, "learning_rate": 2.7156760856694276e-06, "loss": 1.3422, "step": 9933 }, { "epoch": 1.7895068678225625, "grad_norm": 1.5787708759307861, "learning_rate": 2.7110743521161277e-06, "loss": 1.4593, "step": 9934 }, { "epoch": 1.789687007430759, "grad_norm": 1.5096412897109985, "learning_rate": 2.706476412057407e-06, "loss": 1.3807, "step": 9935 }, { "epoch": 1.7898671470389553, "grad_norm": 1.6212842464447021, "learning_rate": 2.7018822658621155e-06, "loss": 1.4962, "step": 9936 }, { "epoch": 1.7900472866471515, "grad_norm": 1.4657515287399292, "learning_rate": 2.697291913898786e-06, "loss": 1.2455, "step": 9937 }, { "epoch": 1.790227426255348, "grad_norm": 1.503367304801941, "learning_rate": 2.6927053565356632e-06, "loss": 1.3998, "step": 9938 }, { "epoch": 1.790407565863544, "grad_norm": 1.681787371635437, "learning_rate": 2.688122594140674e-06, "loss": 1.4904, "step": 9939 }, { "epoch": 1.7905877054717405, "grad_norm": 1.5369207859039307, "learning_rate": 2.6835436270814475e-06, "loss": 1.3873, "step": 9940 }, { "epoch": 1.790767845079937, "grad_norm": 1.6265339851379395, "learning_rate": 2.6789684557253005e-06, "loss": 1.3087, "step": 9941 }, { "epoch": 1.7909479846881333, "grad_norm": 1.5250691175460815, "learning_rate": 2.6743970804392448e-06, "loss": 1.3881, "step": 9942 }, { "epoch": 1.7911281242963297, "grad_norm": 1.463868260383606, "learning_rate": 2.6698295015900156e-06, "loss": 1.2515, "step": 9943 }, { "epoch": 1.7913082639045261, "grad_norm": 1.5700215101242065, "learning_rate": 2.6652657195440132e-06, "loss": 1.3948, "step": 9944 }, { "epoch": 1.7914884035127223, "grad_norm": 1.5048481225967407, "learning_rate": 2.660705734667335e-06, "loss": 1.3173, "step": 9945 }, { "epoch": 1.7916685431209187, "grad_norm": 1.688209891319275, "learning_rate": 2.6561495473257758e-06, "loss": 1.6279, "step": 9946 }, { "epoch": 1.791848682729115, "grad_norm": 1.4761959314346313, "learning_rate": 2.65159715788485e-06, "loss": 1.1893, "step": 9947 }, { "epoch": 1.7920288223373113, "grad_norm": 1.6242260932922363, "learning_rate": 2.647048566709731e-06, "loss": 1.348, "step": 9948 }, { "epoch": 1.7922089619455077, "grad_norm": 1.7724230289459229, "learning_rate": 2.6425037741653227e-06, "loss": 1.6359, "step": 9949 }, { "epoch": 1.7923891015537041, "grad_norm": 1.585125207901001, "learning_rate": 2.6379627806161877e-06, "loss": 1.2213, "step": 9950 }, { "epoch": 1.7925692411619005, "grad_norm": 1.6363455057144165, "learning_rate": 2.6334255864266076e-06, "loss": 1.379, "step": 9951 }, { "epoch": 1.792749380770097, "grad_norm": 1.349861979484558, "learning_rate": 2.6288921919605524e-06, "loss": 1.8176, "step": 9952 }, { "epoch": 1.7929295203782933, "grad_norm": 1.3355356454849243, "learning_rate": 2.6243625975816977e-06, "loss": 1.7971, "step": 9953 }, { "epoch": 1.7931096599864895, "grad_norm": 1.287998080253601, "learning_rate": 2.6198368036534026e-06, "loss": 1.4331, "step": 9954 }, { "epoch": 1.793289799594686, "grad_norm": 1.4241411685943604, "learning_rate": 2.615314810538716e-06, "loss": 1.755, "step": 9955 }, { "epoch": 1.7934699392028821, "grad_norm": 1.3556983470916748, "learning_rate": 2.610796618600386e-06, "loss": 1.5629, "step": 9956 }, { "epoch": 1.7936500788110785, "grad_norm": 1.5641840696334839, "learning_rate": 2.606282228200879e-06, "loss": 1.8697, "step": 9957 }, { "epoch": 1.793830218419275, "grad_norm": 1.5936095714569092, "learning_rate": 2.6017716397023275e-06, "loss": 1.7644, "step": 9958 }, { "epoch": 1.7940103580274713, "grad_norm": 1.7188745737075806, "learning_rate": 2.597264853466569e-06, "loss": 1.717, "step": 9959 }, { "epoch": 1.7941904976356677, "grad_norm": 1.7600808143615723, "learning_rate": 2.592761869855137e-06, "loss": 1.952, "step": 9960 }, { "epoch": 1.7943706372438641, "grad_norm": 1.7443944215774536, "learning_rate": 2.588262689229254e-06, "loss": 1.8312, "step": 9961 }, { "epoch": 1.7945507768520603, "grad_norm": 1.5954912900924683, "learning_rate": 2.583767311949842e-06, "loss": 1.4167, "step": 9962 }, { "epoch": 1.7947309164602567, "grad_norm": 1.4277865886688232, "learning_rate": 2.5792757383775233e-06, "loss": 1.4405, "step": 9963 }, { "epoch": 1.794911056068453, "grad_norm": 1.5450223684310913, "learning_rate": 2.574787968872605e-06, "loss": 1.5595, "step": 9964 }, { "epoch": 1.7950911956766493, "grad_norm": 1.4645463228225708, "learning_rate": 2.570304003795088e-06, "loss": 1.3917, "step": 9965 }, { "epoch": 1.7952713352848457, "grad_norm": 1.2948360443115234, "learning_rate": 2.5658238435046943e-06, "loss": 1.3636, "step": 9966 }, { "epoch": 1.7954514748930421, "grad_norm": 1.3741048574447632, "learning_rate": 2.5613474883608045e-06, "loss": 1.5765, "step": 9967 }, { "epoch": 1.7956316145012385, "grad_norm": 1.3922216892242432, "learning_rate": 2.556874938722509e-06, "loss": 1.4731, "step": 9968 }, { "epoch": 1.795811754109435, "grad_norm": 1.344000220298767, "learning_rate": 2.5524061949486033e-06, "loss": 1.3584, "step": 9969 }, { "epoch": 1.7959918937176311, "grad_norm": 1.2065109014511108, "learning_rate": 2.547941257397557e-06, "loss": 1.097, "step": 9970 }, { "epoch": 1.7961720333258275, "grad_norm": 1.4392669200897217, "learning_rate": 2.54348012642755e-06, "loss": 1.378, "step": 9971 }, { "epoch": 1.7963521729340237, "grad_norm": 1.4213449954986572, "learning_rate": 2.539022802396457e-06, "loss": 1.3661, "step": 9972 }, { "epoch": 1.7965323125422201, "grad_norm": 1.3919795751571655, "learning_rate": 2.534569285661831e-06, "loss": 1.2925, "step": 9973 }, { "epoch": 1.7967124521504165, "grad_norm": 1.3523578643798828, "learning_rate": 2.530119576580936e-06, "loss": 1.264, "step": 9974 }, { "epoch": 1.796892591758613, "grad_norm": 1.437835931777954, "learning_rate": 2.5256736755107314e-06, "loss": 1.3298, "step": 9975 }, { "epoch": 1.7970727313668093, "grad_norm": 1.4455056190490723, "learning_rate": 2.521231582807859e-06, "loss": 1.2947, "step": 9976 }, { "epoch": 1.7972528709750057, "grad_norm": 1.5071290731430054, "learning_rate": 2.5167932988286623e-06, "loss": 1.309, "step": 9977 }, { "epoch": 1.797433010583202, "grad_norm": 1.5807194709777832, "learning_rate": 2.512358823929184e-06, "loss": 1.4594, "step": 9978 }, { "epoch": 1.7976131501913983, "grad_norm": 1.6512035131454468, "learning_rate": 2.5079281584651393e-06, "loss": 1.5128, "step": 9979 }, { "epoch": 1.7977932897995947, "grad_norm": 1.501667857170105, "learning_rate": 2.5035013027919717e-06, "loss": 1.4507, "step": 9980 }, { "epoch": 1.797973429407791, "grad_norm": 1.4809303283691406, "learning_rate": 2.4990782572647975e-06, "loss": 1.3401, "step": 9981 }, { "epoch": 1.7981535690159873, "grad_norm": 1.4275723695755005, "learning_rate": 2.494659022238438e-06, "loss": 1.3783, "step": 9982 }, { "epoch": 1.7983337086241837, "grad_norm": 1.6030213832855225, "learning_rate": 2.490243598067382e-06, "loss": 1.5512, "step": 9983 }, { "epoch": 1.7985138482323801, "grad_norm": 1.6979684829711914, "learning_rate": 2.4858319851058354e-06, "loss": 1.5776, "step": 9984 }, { "epoch": 1.7986939878405765, "grad_norm": 1.3408842086791992, "learning_rate": 2.4814241837077145e-06, "loss": 1.1557, "step": 9985 }, { "epoch": 1.798874127448773, "grad_norm": 1.671297311782837, "learning_rate": 2.4770201942265927e-06, "loss": 1.5044, "step": 9986 }, { "epoch": 1.7990542670569691, "grad_norm": 1.49205482006073, "learning_rate": 2.47262001701577e-06, "loss": 1.3709, "step": 9987 }, { "epoch": 1.7992344066651655, "grad_norm": 1.4086471796035767, "learning_rate": 2.4682236524282143e-06, "loss": 1.3189, "step": 9988 }, { "epoch": 1.7994145462733617, "grad_norm": 1.503443956375122, "learning_rate": 2.4638311008166047e-06, "loss": 1.3862, "step": 9989 }, { "epoch": 1.7995946858815581, "grad_norm": 1.35723876953125, "learning_rate": 2.459442362533315e-06, "loss": 1.2539, "step": 9990 }, { "epoch": 1.7997748254897545, "grad_norm": 1.5897128582000732, "learning_rate": 2.4550574379304027e-06, "loss": 1.5706, "step": 9991 }, { "epoch": 1.799954965097951, "grad_norm": 1.6506224870681763, "learning_rate": 2.4506763273596247e-06, "loss": 1.4168, "step": 9992 }, { "epoch": 1.8001351047061473, "grad_norm": 1.6067757606506348, "learning_rate": 2.446299031172433e-06, "loss": 1.4711, "step": 9993 }, { "epoch": 1.8003152443143438, "grad_norm": 1.526239037513733, "learning_rate": 2.4419255497199646e-06, "loss": 1.3983, "step": 9994 }, { "epoch": 1.80049538392254, "grad_norm": 1.463149070739746, "learning_rate": 2.4375558833530653e-06, "loss": 1.1949, "step": 9995 }, { "epoch": 1.8006755235307363, "grad_norm": 1.6321347951889038, "learning_rate": 2.4331900324222723e-06, "loss": 1.5065, "step": 9996 }, { "epoch": 1.8008556631389325, "grad_norm": 1.6577446460723877, "learning_rate": 2.428827997277794e-06, "loss": 1.4318, "step": 9997 }, { "epoch": 1.801035802747129, "grad_norm": 1.56265389919281, "learning_rate": 2.4244697782695726e-06, "loss": 1.2484, "step": 9998 }, { "epoch": 1.8012159423553253, "grad_norm": 1.4934924840927124, "learning_rate": 2.420115375747206e-06, "loss": 1.2096, "step": 9999 }, { "epoch": 1.8013960819635217, "grad_norm": 1.493435025215149, "learning_rate": 2.4157647900600154e-06, "loss": 1.2484, "step": 10000 }, { "epoch": 1.8015762215717182, "grad_norm": 1.6239197254180908, "learning_rate": 2.4114180215569936e-06, "loss": 1.3808, "step": 10001 }, { "epoch": 1.8017563611799146, "grad_norm": 1.2958792448043823, "learning_rate": 2.4070750705868394e-06, "loss": 1.5937, "step": 10002 }, { "epoch": 1.8019365007881107, "grad_norm": 1.387211561203003, "learning_rate": 2.4027359374979407e-06, "loss": 1.8858, "step": 10003 }, { "epoch": 1.8021166403963071, "grad_norm": 1.3409655094146729, "learning_rate": 2.3984006226383805e-06, "loss": 1.6068, "step": 10004 }, { "epoch": 1.8022967800045033, "grad_norm": 1.3563859462738037, "learning_rate": 2.3940691263559356e-06, "loss": 1.7366, "step": 10005 }, { "epoch": 1.8024769196126997, "grad_norm": 1.4528411626815796, "learning_rate": 2.3897414489980786e-06, "loss": 1.7807, "step": 10006 }, { "epoch": 1.8026570592208961, "grad_norm": 1.4837846755981445, "learning_rate": 2.3854175909119645e-06, "loss": 1.7435, "step": 10007 }, { "epoch": 1.8028371988290925, "grad_norm": 1.4679380655288696, "learning_rate": 2.381097552444467e-06, "loss": 1.647, "step": 10008 }, { "epoch": 1.803017338437289, "grad_norm": 1.5496721267700195, "learning_rate": 2.3767813339421295e-06, "loss": 1.7539, "step": 10009 }, { "epoch": 1.8031974780454854, "grad_norm": 1.6995376348495483, "learning_rate": 2.3724689357511986e-06, "loss": 1.7397, "step": 10010 }, { "epoch": 1.8033776176536818, "grad_norm": 1.9499925374984741, "learning_rate": 2.368160358217603e-06, "loss": 2.0504, "step": 10011 }, { "epoch": 1.803557757261878, "grad_norm": 1.675574541091919, "learning_rate": 2.3638556016869875e-06, "loss": 1.8564, "step": 10012 }, { "epoch": 1.8037378968700744, "grad_norm": 1.4830348491668701, "learning_rate": 2.3595546665046765e-06, "loss": 1.3947, "step": 10013 }, { "epoch": 1.8039180364782705, "grad_norm": 1.472398281097412, "learning_rate": 2.3552575530156885e-06, "loss": 1.4108, "step": 10014 }, { "epoch": 1.804098176086467, "grad_norm": 1.3872883319854736, "learning_rate": 2.350964261564725e-06, "loss": 1.3408, "step": 10015 }, { "epoch": 1.8042783156946633, "grad_norm": 1.427467703819275, "learning_rate": 2.3466747924961952e-06, "loss": 1.1747, "step": 10016 }, { "epoch": 1.8044584553028598, "grad_norm": 1.5777684450149536, "learning_rate": 2.3423891461542113e-06, "loss": 1.463, "step": 10017 }, { "epoch": 1.8046385949110562, "grad_norm": 1.277117371559143, "learning_rate": 2.3381073228825543e-06, "loss": 1.0313, "step": 10018 }, { "epoch": 1.8048187345192526, "grad_norm": 1.3496284484863281, "learning_rate": 2.3338293230247166e-06, "loss": 1.327, "step": 10019 }, { "epoch": 1.8049988741274487, "grad_norm": 1.3696247339248657, "learning_rate": 2.3295551469238675e-06, "loss": 1.2093, "step": 10020 }, { "epoch": 1.8051790137356452, "grad_norm": 1.4592262506484985, "learning_rate": 2.3252847949228827e-06, "loss": 1.3693, "step": 10021 }, { "epoch": 1.8053591533438413, "grad_norm": 1.3412666320800781, "learning_rate": 2.321018267364333e-06, "loss": 1.1342, "step": 10022 }, { "epoch": 1.8055392929520377, "grad_norm": 1.7362686395645142, "learning_rate": 2.3167555645904783e-06, "loss": 1.4592, "step": 10023 }, { "epoch": 1.8057194325602342, "grad_norm": 1.5533839464187622, "learning_rate": 2.312496686943261e-06, "loss": 1.533, "step": 10024 }, { "epoch": 1.8058995721684306, "grad_norm": 1.3072752952575684, "learning_rate": 2.3082416347643352e-06, "loss": 1.3038, "step": 10025 }, { "epoch": 1.806079711776627, "grad_norm": 1.2663114070892334, "learning_rate": 2.303990408395035e-06, "loss": 1.1934, "step": 10026 }, { "epoch": 1.8062598513848234, "grad_norm": 1.5046476125717163, "learning_rate": 2.2997430081763916e-06, "loss": 1.4549, "step": 10027 }, { "epoch": 1.8064399909930196, "grad_norm": 1.4995585680007935, "learning_rate": 2.295499434449133e-06, "loss": 1.3057, "step": 10028 }, { "epoch": 1.806620130601216, "grad_norm": 1.4494708776474, "learning_rate": 2.2912596875536707e-06, "loss": 1.3853, "step": 10029 }, { "epoch": 1.8068002702094121, "grad_norm": 1.4352213144302368, "learning_rate": 2.2870237678301154e-06, "loss": 1.2069, "step": 10030 }, { "epoch": 1.8069804098176085, "grad_norm": 1.6235201358795166, "learning_rate": 2.282791675618279e-06, "loss": 1.6152, "step": 10031 }, { "epoch": 1.807160549425805, "grad_norm": 1.4555021524429321, "learning_rate": 2.2785634112576505e-06, "loss": 1.292, "step": 10032 }, { "epoch": 1.8073406890340014, "grad_norm": 1.5760633945465088, "learning_rate": 2.27433897508742e-06, "loss": 1.3454, "step": 10033 }, { "epoch": 1.8075208286421978, "grad_norm": 1.4704110622406006, "learning_rate": 2.2701183674464722e-06, "loss": 1.4319, "step": 10034 }, { "epoch": 1.8077009682503942, "grad_norm": 1.3243273496627808, "learning_rate": 2.26590158867338e-06, "loss": 1.0766, "step": 10035 }, { "epoch": 1.8078811078585904, "grad_norm": 1.3688443899154663, "learning_rate": 2.261688639106413e-06, "loss": 1.1436, "step": 10036 }, { "epoch": 1.8080612474667868, "grad_norm": 1.4448790550231934, "learning_rate": 2.2574795190835275e-06, "loss": 1.2796, "step": 10037 }, { "epoch": 1.8082413870749832, "grad_norm": 1.5203858613967896, "learning_rate": 2.2532742289423815e-06, "loss": 1.2263, "step": 10038 }, { "epoch": 1.8084215266831793, "grad_norm": 1.6407034397125244, "learning_rate": 2.249072769020316e-06, "loss": 1.4458, "step": 10039 }, { "epoch": 1.8086016662913758, "grad_norm": 1.4767647981643677, "learning_rate": 2.2448751396543787e-06, "loss": 1.3189, "step": 10040 }, { "epoch": 1.8087818058995722, "grad_norm": 1.5921157598495483, "learning_rate": 2.2406813411812997e-06, "loss": 1.3455, "step": 10041 }, { "epoch": 1.8089619455077686, "grad_norm": 1.569536566734314, "learning_rate": 2.2364913739374995e-06, "loss": 1.4725, "step": 10042 }, { "epoch": 1.809142085115965, "grad_norm": 1.5300394296646118, "learning_rate": 2.232305238259097e-06, "loss": 1.2021, "step": 10043 }, { "epoch": 1.8093222247241614, "grad_norm": 1.6046299934387207, "learning_rate": 2.228122934481891e-06, "loss": 1.3953, "step": 10044 }, { "epoch": 1.8095023643323576, "grad_norm": 1.498541235923767, "learning_rate": 2.223944462941402e-06, "loss": 1.3705, "step": 10045 }, { "epoch": 1.809682503940554, "grad_norm": 1.7935336828231812, "learning_rate": 2.219769823972828e-06, "loss": 1.5541, "step": 10046 }, { "epoch": 1.8098626435487501, "grad_norm": 1.536406397819519, "learning_rate": 2.2155990179110353e-06, "loss": 1.409, "step": 10047 }, { "epoch": 1.8100427831569466, "grad_norm": 1.6193208694458008, "learning_rate": 2.2114320450906166e-06, "loss": 1.2533, "step": 10048 }, { "epoch": 1.810222922765143, "grad_norm": 1.6491739749908447, "learning_rate": 2.2072689058458384e-06, "loss": 1.3188, "step": 10049 }, { "epoch": 1.8104030623733394, "grad_norm": 1.6443055868148804, "learning_rate": 2.203109600510672e-06, "loss": 1.3765, "step": 10050 }, { "epoch": 1.8105832019815358, "grad_norm": 1.5825101137161255, "learning_rate": 2.198954129418773e-06, "loss": 1.1676, "step": 10051 }, { "epoch": 1.8107633415897322, "grad_norm": 1.3962745666503906, "learning_rate": 2.194802492903492e-06, "loss": 1.7069, "step": 10052 }, { "epoch": 1.8109434811979284, "grad_norm": 1.2918355464935303, "learning_rate": 2.190654691297861e-06, "loss": 1.7, "step": 10053 }, { "epoch": 1.8111236208061248, "grad_norm": 1.3591047525405884, "learning_rate": 2.186510724934632e-06, "loss": 1.9171, "step": 10054 }, { "epoch": 1.811303760414321, "grad_norm": 1.430289626121521, "learning_rate": 2.1823705941462215e-06, "loss": 1.5948, "step": 10055 }, { "epoch": 1.8114839000225174, "grad_norm": 1.529053807258606, "learning_rate": 2.178234299264753e-06, "loss": 1.8438, "step": 10056 }, { "epoch": 1.8116640396307138, "grad_norm": 1.5243395566940308, "learning_rate": 2.174101840622034e-06, "loss": 1.8101, "step": 10057 }, { "epoch": 1.8118441792389102, "grad_norm": 1.5169636011123657, "learning_rate": 2.1699732185495757e-06, "loss": 1.9676, "step": 10058 }, { "epoch": 1.8120243188471066, "grad_norm": 1.5018396377563477, "learning_rate": 2.1658484333785643e-06, "loss": 1.5021, "step": 10059 }, { "epoch": 1.812204458455303, "grad_norm": 1.6437705755233765, "learning_rate": 2.1617274854398905e-06, "loss": 1.7939, "step": 10060 }, { "epoch": 1.8123845980634992, "grad_norm": 1.9435685873031616, "learning_rate": 2.15761037506414e-06, "loss": 2.1778, "step": 10061 }, { "epoch": 1.8125647376716956, "grad_norm": 1.6255899667739868, "learning_rate": 2.153497102581581e-06, "loss": 1.7574, "step": 10062 }, { "epoch": 1.8127448772798918, "grad_norm": 1.5100345611572266, "learning_rate": 2.1493876683221727e-06, "loss": 1.5662, "step": 10063 }, { "epoch": 1.8129250168880882, "grad_norm": 1.451920986175537, "learning_rate": 2.1452820726155844e-06, "loss": 1.4867, "step": 10064 }, { "epoch": 1.8131051564962846, "grad_norm": 1.4553110599517822, "learning_rate": 2.141180315791158e-06, "loss": 1.5681, "step": 10065 }, { "epoch": 1.813285296104481, "grad_norm": 1.444446086883545, "learning_rate": 2.1370823981779364e-06, "loss": 1.4385, "step": 10066 }, { "epoch": 1.8134654357126774, "grad_norm": 1.4607234001159668, "learning_rate": 2.1329883201046506e-06, "loss": 1.4436, "step": 10067 }, { "epoch": 1.8136455753208738, "grad_norm": 1.399939775466919, "learning_rate": 2.1288980818997275e-06, "loss": 1.2261, "step": 10068 }, { "epoch": 1.8138257149290702, "grad_norm": 1.4652578830718994, "learning_rate": 2.1248116838912867e-06, "loss": 1.4139, "step": 10069 }, { "epoch": 1.8140058545372664, "grad_norm": 1.2725590467453003, "learning_rate": 2.1207291264071284e-06, "loss": 1.2006, "step": 10070 }, { "epoch": 1.8141859941454628, "grad_norm": 1.3044129610061646, "learning_rate": 2.116650409774762e-06, "loss": 1.1308, "step": 10071 }, { "epoch": 1.814366133753659, "grad_norm": 1.4853923320770264, "learning_rate": 2.112575534321365e-06, "loss": 1.4973, "step": 10072 }, { "epoch": 1.8145462733618554, "grad_norm": 1.2549855709075928, "learning_rate": 2.1085045003738478e-06, "loss": 1.1359, "step": 10073 }, { "epoch": 1.8147264129700518, "grad_norm": 1.3858438730239868, "learning_rate": 2.1044373082587664e-06, "loss": 1.334, "step": 10074 }, { "epoch": 1.8149065525782482, "grad_norm": 1.4075618982315063, "learning_rate": 2.1003739583023984e-06, "loss": 1.3103, "step": 10075 }, { "epoch": 1.8150866921864446, "grad_norm": 1.3848905563354492, "learning_rate": 2.0963144508306944e-06, "loss": 1.3577, "step": 10076 }, { "epoch": 1.815266831794641, "grad_norm": 1.6225776672363281, "learning_rate": 2.09225878616931e-06, "loss": 1.6012, "step": 10077 }, { "epoch": 1.8154469714028372, "grad_norm": 1.4673770666122437, "learning_rate": 2.0882069646435964e-06, "loss": 1.2149, "step": 10078 }, { "epoch": 1.8156271110110336, "grad_norm": 1.5840201377868652, "learning_rate": 2.0841589865785938e-06, "loss": 1.4817, "step": 10079 }, { "epoch": 1.8158072506192298, "grad_norm": 1.5192190408706665, "learning_rate": 2.080114852299009e-06, "loss": 1.4626, "step": 10080 }, { "epoch": 1.8159873902274262, "grad_norm": 1.4348398447036743, "learning_rate": 2.07607456212926e-06, "loss": 1.458, "step": 10081 }, { "epoch": 1.8161675298356226, "grad_norm": 1.342737078666687, "learning_rate": 2.0720381163934765e-06, "loss": 1.2706, "step": 10082 }, { "epoch": 1.816347669443819, "grad_norm": 1.45393967628479, "learning_rate": 2.06800551541545e-06, "loss": 1.2374, "step": 10083 }, { "epoch": 1.8165278090520154, "grad_norm": 1.415820598602295, "learning_rate": 2.0639767595186765e-06, "loss": 1.1386, "step": 10084 }, { "epoch": 1.8167079486602118, "grad_norm": 1.3049286603927612, "learning_rate": 2.0599518490263316e-06, "loss": 1.2418, "step": 10085 }, { "epoch": 1.816888088268408, "grad_norm": 1.3920007944107056, "learning_rate": 2.0559307842613007e-06, "loss": 1.1824, "step": 10086 }, { "epoch": 1.8170682278766044, "grad_norm": 1.5656129121780396, "learning_rate": 2.0519135655461484e-06, "loss": 1.242, "step": 10087 }, { "epoch": 1.8172483674848006, "grad_norm": 1.453320860862732, "learning_rate": 2.0479001932031393e-06, "loss": 1.3428, "step": 10088 }, { "epoch": 1.817428507092997, "grad_norm": 1.6345810890197754, "learning_rate": 2.0438906675542214e-06, "loss": 1.4914, "step": 10089 }, { "epoch": 1.8176086467011934, "grad_norm": 1.692877173423767, "learning_rate": 2.0398849889210313e-06, "loss": 1.446, "step": 10090 }, { "epoch": 1.8177887863093898, "grad_norm": 1.5764058828353882, "learning_rate": 2.0358831576249126e-06, "loss": 1.4218, "step": 10091 }, { "epoch": 1.8179689259175862, "grad_norm": 1.6342402696609497, "learning_rate": 2.0318851739868805e-06, "loss": 1.4039, "step": 10092 }, { "epoch": 1.8181490655257826, "grad_norm": 1.6368536949157715, "learning_rate": 2.027891038327656e-06, "loss": 1.4346, "step": 10093 }, { "epoch": 1.818329205133979, "grad_norm": 1.5874720811843872, "learning_rate": 2.0239007509676443e-06, "loss": 1.4866, "step": 10094 }, { "epoch": 1.8185093447421752, "grad_norm": 1.6649432182312012, "learning_rate": 2.0199143122269394e-06, "loss": 1.3545, "step": 10095 }, { "epoch": 1.8186894843503716, "grad_norm": 1.6224302053451538, "learning_rate": 2.0159317224253517e-06, "loss": 1.2969, "step": 10096 }, { "epoch": 1.8188696239585678, "grad_norm": 1.5880509614944458, "learning_rate": 2.0119529818823423e-06, "loss": 1.4174, "step": 10097 }, { "epoch": 1.8190497635667642, "grad_norm": 1.609045386314392, "learning_rate": 2.00797809091709e-06, "loss": 1.4974, "step": 10098 }, { "epoch": 1.8192299031749606, "grad_norm": 1.6164910793304443, "learning_rate": 2.004007049848461e-06, "loss": 1.2302, "step": 10099 }, { "epoch": 1.819410042783157, "grad_norm": 1.6428788900375366, "learning_rate": 2.000039858995012e-06, "loss": 1.3782, "step": 10100 }, { "epoch": 1.8195901823913534, "grad_norm": 1.3995789289474487, "learning_rate": 1.9960765186749777e-06, "loss": 0.9754, "step": 10101 }, { "epoch": 1.8197703219995498, "grad_norm": 1.344395637512207, "learning_rate": 1.9921170292063086e-06, "loss": 1.6348, "step": 10102 }, { "epoch": 1.819950461607746, "grad_norm": 1.2843656539916992, "learning_rate": 1.9881613909066287e-06, "loss": 1.6399, "step": 10103 }, { "epoch": 1.8201306012159424, "grad_norm": 1.4430584907531738, "learning_rate": 1.9842096040932457e-06, "loss": 1.5001, "step": 10104 }, { "epoch": 1.8203107408241386, "grad_norm": 1.6491752862930298, "learning_rate": 1.980261669083189e-06, "loss": 2.1396, "step": 10105 }, { "epoch": 1.820490880432335, "grad_norm": 1.5357755422592163, "learning_rate": 1.976317586193155e-06, "loss": 1.9067, "step": 10106 }, { "epoch": 1.8206710200405314, "grad_norm": 1.4185600280761719, "learning_rate": 1.9723773557395297e-06, "loss": 1.5255, "step": 10107 }, { "epoch": 1.8208511596487278, "grad_norm": 1.4339519739151, "learning_rate": 1.9684409780383992e-06, "loss": 1.6866, "step": 10108 }, { "epoch": 1.8210312992569242, "grad_norm": 1.5583972930908203, "learning_rate": 1.964508453405528e-06, "loss": 1.7289, "step": 10109 }, { "epoch": 1.8212114388651206, "grad_norm": 1.7545312643051147, "learning_rate": 1.960579782156402e-06, "loss": 1.6404, "step": 10110 }, { "epoch": 1.8213915784733168, "grad_norm": 1.6488924026489258, "learning_rate": 1.956654964606175e-06, "loss": 1.8884, "step": 10111 }, { "epoch": 1.8215717180815132, "grad_norm": 1.586043357849121, "learning_rate": 1.952734001069678e-06, "loss": 1.7718, "step": 10112 }, { "epoch": 1.8217518576897094, "grad_norm": 1.465941071510315, "learning_rate": 1.948816891861455e-06, "loss": 1.4703, "step": 10113 }, { "epoch": 1.8219319972979058, "grad_norm": 1.3636362552642822, "learning_rate": 1.9449036372957307e-06, "loss": 1.5068, "step": 10114 }, { "epoch": 1.8221121369061022, "grad_norm": 1.4096664190292358, "learning_rate": 1.9409942376864333e-06, "loss": 1.4697, "step": 10115 }, { "epoch": 1.8222922765142986, "grad_norm": 1.3681811094284058, "learning_rate": 1.937088693347172e-06, "loss": 1.0556, "step": 10116 }, { "epoch": 1.822472416122495, "grad_norm": 1.343756079673767, "learning_rate": 1.933187004591247e-06, "loss": 1.2496, "step": 10117 }, { "epoch": 1.8226525557306914, "grad_norm": 1.4849203824996948, "learning_rate": 1.929289171731641e-06, "loss": 1.2679, "step": 10118 }, { "epoch": 1.8228326953388876, "grad_norm": 1.2953969240188599, "learning_rate": 1.9253951950810478e-06, "loss": 1.2433, "step": 10119 }, { "epoch": 1.823012834947084, "grad_norm": 1.337950587272644, "learning_rate": 1.9215050749518348e-06, "loss": 1.2363, "step": 10120 }, { "epoch": 1.8231929745552802, "grad_norm": 1.5039666891098022, "learning_rate": 1.9176188116560688e-06, "loss": 1.5663, "step": 10121 }, { "epoch": 1.8233731141634766, "grad_norm": 1.3163189888000488, "learning_rate": 1.9137364055055006e-06, "loss": 1.3075, "step": 10122 }, { "epoch": 1.823553253771673, "grad_norm": 1.4876776933670044, "learning_rate": 1.9098578568115754e-06, "loss": 1.3066, "step": 10123 }, { "epoch": 1.8237333933798694, "grad_norm": 1.507344126701355, "learning_rate": 1.9059831658854278e-06, "loss": 1.5504, "step": 10124 }, { "epoch": 1.8239135329880658, "grad_norm": 1.416939616203308, "learning_rate": 1.9021123330378864e-06, "loss": 1.3996, "step": 10125 }, { "epoch": 1.8240936725962622, "grad_norm": 1.486570119857788, "learning_rate": 1.8982453585794647e-06, "loss": 1.4995, "step": 10126 }, { "epoch": 1.8242738122044586, "grad_norm": 1.489565134048462, "learning_rate": 1.8943822428203751e-06, "loss": 1.3586, "step": 10127 }, { "epoch": 1.8244539518126548, "grad_norm": 1.5417814254760742, "learning_rate": 1.890522986070503e-06, "loss": 1.3543, "step": 10128 }, { "epoch": 1.8246340914208512, "grad_norm": 1.3175015449523926, "learning_rate": 1.8866675886394457e-06, "loss": 1.1671, "step": 10129 }, { "epoch": 1.8248142310290474, "grad_norm": 1.414724588394165, "learning_rate": 1.8828160508364833e-06, "loss": 1.2677, "step": 10130 }, { "epoch": 1.8249943706372438, "grad_norm": 1.4064226150512695, "learning_rate": 1.8789683729705799e-06, "loss": 1.4351, "step": 10131 }, { "epoch": 1.8251745102454402, "grad_norm": 1.5006777048110962, "learning_rate": 1.8751245553503937e-06, "loss": 1.2262, "step": 10132 }, { "epoch": 1.8253546498536366, "grad_norm": 1.5483022928237915, "learning_rate": 1.871284598284273e-06, "loss": 1.4874, "step": 10133 }, { "epoch": 1.825534789461833, "grad_norm": 1.5291799306869507, "learning_rate": 1.86744850208026e-06, "loss": 1.3124, "step": 10134 }, { "epoch": 1.8257149290700294, "grad_norm": 1.542891502380371, "learning_rate": 1.8636162670460867e-06, "loss": 1.3712, "step": 10135 }, { "epoch": 1.8258950686782256, "grad_norm": 1.6982064247131348, "learning_rate": 1.8597878934891677e-06, "loss": 1.5616, "step": 10136 }, { "epoch": 1.826075208286422, "grad_norm": 1.5469884872436523, "learning_rate": 1.8559633817166133e-06, "loss": 1.4207, "step": 10137 }, { "epoch": 1.8262553478946182, "grad_norm": 1.6189945936203003, "learning_rate": 1.8521427320352281e-06, "loss": 1.3411, "step": 10138 }, { "epoch": 1.8264354875028146, "grad_norm": 1.6197736263275146, "learning_rate": 1.8483259447515e-06, "loss": 1.4551, "step": 10139 }, { "epoch": 1.826615627111011, "grad_norm": 1.43265962600708, "learning_rate": 1.8445130201716121e-06, "loss": 1.3722, "step": 10140 }, { "epoch": 1.8267957667192074, "grad_norm": 1.5799026489257812, "learning_rate": 1.8407039586014363e-06, "loss": 1.3739, "step": 10141 }, { "epoch": 1.8269759063274038, "grad_norm": 1.7282644510269165, "learning_rate": 1.8368987603465225e-06, "loss": 1.5924, "step": 10142 }, { "epoch": 1.8271560459356002, "grad_norm": 1.562292456626892, "learning_rate": 1.833097425712138e-06, "loss": 1.436, "step": 10143 }, { "epoch": 1.8273361855437964, "grad_norm": 1.6173843145370483, "learning_rate": 1.8292999550032218e-06, "loss": 1.3085, "step": 10144 }, { "epoch": 1.8275163251519928, "grad_norm": 1.633520483970642, "learning_rate": 1.8255063485243973e-06, "loss": 1.3988, "step": 10145 }, { "epoch": 1.827696464760189, "grad_norm": 1.6982747316360474, "learning_rate": 1.8217166065799818e-06, "loss": 1.5243, "step": 10146 }, { "epoch": 1.8278766043683854, "grad_norm": 1.6105185747146606, "learning_rate": 1.817930729473999e-06, "loss": 1.4113, "step": 10147 }, { "epoch": 1.8280567439765818, "grad_norm": 1.845295786857605, "learning_rate": 1.814148717510139e-06, "loss": 1.4898, "step": 10148 }, { "epoch": 1.8282368835847782, "grad_norm": 1.5970302820205688, "learning_rate": 1.810370570991804e-06, "loss": 1.3528, "step": 10149 }, { "epoch": 1.8284170231929746, "grad_norm": 1.4034368991851807, "learning_rate": 1.806596290222068e-06, "loss": 1.0075, "step": 10150 }, { "epoch": 1.828597162801171, "grad_norm": 1.4156461954116821, "learning_rate": 1.8028258755036997e-06, "loss": 1.2236, "step": 10151 }, { "epoch": 1.8287773024093674, "grad_norm": 1.4610676765441895, "learning_rate": 1.7990593271391688e-06, "loss": 2.1319, "step": 10152 }, { "epoch": 1.8289574420175636, "grad_norm": 1.227811336517334, "learning_rate": 1.7952966454306163e-06, "loss": 1.7249, "step": 10153 }, { "epoch": 1.82913758162576, "grad_norm": 1.4469199180603027, "learning_rate": 1.7915378306798902e-06, "loss": 1.9271, "step": 10154 }, { "epoch": 1.8293177212339562, "grad_norm": 1.3816676139831543, "learning_rate": 1.7877828831885213e-06, "loss": 1.6807, "step": 10155 }, { "epoch": 1.8294978608421526, "grad_norm": 1.3797568082809448, "learning_rate": 1.7840318032577185e-06, "loss": 1.7105, "step": 10156 }, { "epoch": 1.829678000450349, "grad_norm": 1.4409549236297607, "learning_rate": 1.7802845911884081e-06, "loss": 1.7099, "step": 10157 }, { "epoch": 1.8298581400585454, "grad_norm": 1.6754438877105713, "learning_rate": 1.7765412472811771e-06, "loss": 1.8905, "step": 10158 }, { "epoch": 1.8300382796667418, "grad_norm": 1.5919604301452637, "learning_rate": 1.7728017718363133e-06, "loss": 1.7661, "step": 10159 }, { "epoch": 1.8302184192749382, "grad_norm": 1.6055443286895752, "learning_rate": 1.7690661651537989e-06, "loss": 1.7528, "step": 10160 }, { "epoch": 1.8303985588831344, "grad_norm": 1.9046741724014282, "learning_rate": 1.7653344275333105e-06, "loss": 1.8019, "step": 10161 }, { "epoch": 1.8305786984913308, "grad_norm": 1.4081676006317139, "learning_rate": 1.7616065592742038e-06, "loss": 1.4979, "step": 10162 }, { "epoch": 1.830758838099527, "grad_norm": 1.531377911567688, "learning_rate": 1.7578825606755168e-06, "loss": 1.5468, "step": 10163 }, { "epoch": 1.8309389777077234, "grad_norm": 1.4688129425048828, "learning_rate": 1.754162432035994e-06, "loss": 1.5188, "step": 10164 }, { "epoch": 1.8311191173159198, "grad_norm": 1.3672255277633667, "learning_rate": 1.7504461736540635e-06, "loss": 1.3315, "step": 10165 }, { "epoch": 1.8312992569241162, "grad_norm": 1.5113074779510498, "learning_rate": 1.7467337858278421e-06, "loss": 1.4907, "step": 10166 }, { "epoch": 1.8314793965323126, "grad_norm": 1.4541507959365845, "learning_rate": 1.7430252688551251e-06, "loss": 1.4352, "step": 10167 }, { "epoch": 1.831659536140509, "grad_norm": 1.515793800354004, "learning_rate": 1.7393206230334247e-06, "loss": 1.298, "step": 10168 }, { "epoch": 1.8318396757487052, "grad_norm": 1.4644867181777954, "learning_rate": 1.7356198486599085e-06, "loss": 1.4508, "step": 10169 }, { "epoch": 1.8320198153569016, "grad_norm": 1.5751250982284546, "learning_rate": 1.7319229460314612e-06, "loss": 1.6029, "step": 10170 }, { "epoch": 1.8321999549650978, "grad_norm": 1.658138394355774, "learning_rate": 1.7282299154446457e-06, "loss": 1.5106, "step": 10171 }, { "epoch": 1.8323800945732942, "grad_norm": 1.5127851963043213, "learning_rate": 1.7245407571957138e-06, "loss": 1.4804, "step": 10172 }, { "epoch": 1.8325602341814906, "grad_norm": 1.344985842704773, "learning_rate": 1.7208554715806123e-06, "loss": 1.3022, "step": 10173 }, { "epoch": 1.832740373789687, "grad_norm": 1.5787599086761475, "learning_rate": 1.7171740588949604e-06, "loss": 1.3923, "step": 10174 }, { "epoch": 1.8329205133978834, "grad_norm": 1.4782702922821045, "learning_rate": 1.7134965194340935e-06, "loss": 1.3885, "step": 10175 }, { "epoch": 1.8331006530060798, "grad_norm": 1.4548991918563843, "learning_rate": 1.7098228534930205e-06, "loss": 1.3111, "step": 10176 }, { "epoch": 1.833280792614276, "grad_norm": 1.478531837463379, "learning_rate": 1.7061530613664334e-06, "loss": 1.4396, "step": 10177 }, { "epoch": 1.8334609322224724, "grad_norm": 1.427119493484497, "learning_rate": 1.7024871433487244e-06, "loss": 1.2653, "step": 10178 }, { "epoch": 1.8336410718306688, "grad_norm": 1.5624186992645264, "learning_rate": 1.6988250997339638e-06, "loss": 1.4843, "step": 10179 }, { "epoch": 1.833821211438865, "grad_norm": 1.4301177263259888, "learning_rate": 1.6951669308159335e-06, "loss": 1.3281, "step": 10180 }, { "epoch": 1.8340013510470614, "grad_norm": 1.442164421081543, "learning_rate": 1.6915126368880873e-06, "loss": 1.243, "step": 10181 }, { "epoch": 1.8341814906552578, "grad_norm": 1.5827133655548096, "learning_rate": 1.6878622182435632e-06, "loss": 1.4054, "step": 10182 }, { "epoch": 1.8343616302634542, "grad_norm": 1.5139058828353882, "learning_rate": 1.6842156751751991e-06, "loss": 1.3846, "step": 10183 }, { "epoch": 1.8345417698716506, "grad_norm": 1.407804012298584, "learning_rate": 1.6805730079755167e-06, "loss": 1.3686, "step": 10184 }, { "epoch": 1.834721909479847, "grad_norm": 1.5105146169662476, "learning_rate": 1.6769342169367376e-06, "loss": 1.3396, "step": 10185 }, { "epoch": 1.8349020490880432, "grad_norm": 1.4453729391098022, "learning_rate": 1.6732993023507616e-06, "loss": 1.2886, "step": 10186 }, { "epoch": 1.8350821886962396, "grad_norm": 1.4646527767181396, "learning_rate": 1.6696682645091722e-06, "loss": 1.2723, "step": 10187 }, { "epoch": 1.8352623283044358, "grad_norm": 1.4350448846817017, "learning_rate": 1.666041103703253e-06, "loss": 1.2644, "step": 10188 }, { "epoch": 1.8354424679126322, "grad_norm": 1.661260962486267, "learning_rate": 1.6624178202239771e-06, "loss": 1.4881, "step": 10189 }, { "epoch": 1.8356226075208286, "grad_norm": 1.526147723197937, "learning_rate": 1.658798414361995e-06, "loss": 1.2691, "step": 10190 }, { "epoch": 1.835802747129025, "grad_norm": 1.576967477798462, "learning_rate": 1.6551828864076634e-06, "loss": 1.3672, "step": 10191 }, { "epoch": 1.8359828867372214, "grad_norm": 1.7092487812042236, "learning_rate": 1.651571236651006e-06, "loss": 1.4823, "step": 10192 }, { "epoch": 1.8361630263454178, "grad_norm": 1.584929347038269, "learning_rate": 1.647963465381752e-06, "loss": 1.4123, "step": 10193 }, { "epoch": 1.836343165953614, "grad_norm": 1.4314305782318115, "learning_rate": 1.6443595728893257e-06, "loss": 1.2829, "step": 10194 }, { "epoch": 1.8365233055618104, "grad_norm": 1.5391454696655273, "learning_rate": 1.6407595594628177e-06, "loss": 1.4597, "step": 10195 }, { "epoch": 1.8367034451700066, "grad_norm": 1.4913978576660156, "learning_rate": 1.637163425391025e-06, "loss": 1.1647, "step": 10196 }, { "epoch": 1.836883584778203, "grad_norm": 1.5547447204589844, "learning_rate": 1.6335711709624225e-06, "loss": 1.3477, "step": 10197 }, { "epoch": 1.8370637243863994, "grad_norm": 1.6399352550506592, "learning_rate": 1.629982796465185e-06, "loss": 1.1574, "step": 10198 }, { "epoch": 1.8372438639945958, "grad_norm": 1.5328298807144165, "learning_rate": 1.626398302187171e-06, "loss": 1.3126, "step": 10199 }, { "epoch": 1.8374240036027922, "grad_norm": 1.4650849103927612, "learning_rate": 1.6228176884159175e-06, "loss": 1.0858, "step": 10200 }, { "epoch": 1.8376041432109886, "grad_norm": 1.6660605669021606, "learning_rate": 1.6192409554386666e-06, "loss": 1.2572, "step": 10201 }, { "epoch": 1.8377842828191848, "grad_norm": 1.3632694482803345, "learning_rate": 1.6156681035423339e-06, "loss": 1.8665, "step": 10202 }, { "epoch": 1.8379644224273812, "grad_norm": 1.2494187355041504, "learning_rate": 1.6120991330135503e-06, "loss": 1.6396, "step": 10203 }, { "epoch": 1.8381445620355774, "grad_norm": 1.468569278717041, "learning_rate": 1.6085340441385987e-06, "loss": 1.7284, "step": 10204 }, { "epoch": 1.8383247016437738, "grad_norm": 1.3867123126983643, "learning_rate": 1.6049728372034778e-06, "loss": 1.7598, "step": 10205 }, { "epoch": 1.8385048412519702, "grad_norm": 1.3675591945648193, "learning_rate": 1.6014155124938647e-06, "loss": 1.4209, "step": 10206 }, { "epoch": 1.8386849808601666, "grad_norm": 1.4294353723526, "learning_rate": 1.5978620702951197e-06, "loss": 1.4595, "step": 10207 }, { "epoch": 1.838865120468363, "grad_norm": 1.5362040996551514, "learning_rate": 1.594312510892304e-06, "loss": 1.8629, "step": 10208 }, { "epoch": 1.8390452600765594, "grad_norm": 1.6337419748306274, "learning_rate": 1.590766834570173e-06, "loss": 1.6611, "step": 10209 }, { "epoch": 1.8392253996847558, "grad_norm": 1.7228219509124756, "learning_rate": 1.5872250416131385e-06, "loss": 1.8913, "step": 10210 }, { "epoch": 1.839405539292952, "grad_norm": 1.7560341358184814, "learning_rate": 1.5836871323053282e-06, "loss": 1.8003, "step": 10211 }, { "epoch": 1.8395856789011484, "grad_norm": 1.6732127666473389, "learning_rate": 1.5801531069305543e-06, "loss": 1.7057, "step": 10212 }, { "epoch": 1.8397658185093446, "grad_norm": 1.679484486579895, "learning_rate": 1.576622965772312e-06, "loss": 1.652, "step": 10213 }, { "epoch": 1.839945958117541, "grad_norm": 1.478257417678833, "learning_rate": 1.5730967091137972e-06, "loss": 1.3543, "step": 10214 }, { "epoch": 1.8401260977257374, "grad_norm": 1.4780102968215942, "learning_rate": 1.5695743372378724e-06, "loss": 1.3114, "step": 10215 }, { "epoch": 1.8403062373339338, "grad_norm": 1.3429474830627441, "learning_rate": 1.566055850427095e-06, "loss": 1.3006, "step": 10216 }, { "epoch": 1.8404863769421302, "grad_norm": 1.4642975330352783, "learning_rate": 1.5625412489637337e-06, "loss": 1.6605, "step": 10217 }, { "epoch": 1.8406665165503266, "grad_norm": 1.5364854335784912, "learning_rate": 1.5590305331297239e-06, "loss": 1.3993, "step": 10218 }, { "epoch": 1.8408466561585228, "grad_norm": 1.497288465499878, "learning_rate": 1.5555237032066905e-06, "loss": 1.5424, "step": 10219 }, { "epoch": 1.8410267957667192, "grad_norm": 1.3543816804885864, "learning_rate": 1.5520207594759416e-06, "loss": 1.2565, "step": 10220 }, { "epoch": 1.8412069353749154, "grad_norm": 1.4981905221939087, "learning_rate": 1.5485217022184973e-06, "loss": 1.4057, "step": 10221 }, { "epoch": 1.8413870749831118, "grad_norm": 1.4843984842300415, "learning_rate": 1.5450265317150381e-06, "loss": 1.5085, "step": 10222 }, { "epoch": 1.8415672145913082, "grad_norm": 1.5825599431991577, "learning_rate": 1.541535248245951e-06, "loss": 1.6376, "step": 10223 }, { "epoch": 1.8417473541995046, "grad_norm": 1.3740825653076172, "learning_rate": 1.5380478520913011e-06, "loss": 1.2755, "step": 10224 }, { "epoch": 1.841927493807701, "grad_norm": 1.5060142278671265, "learning_rate": 1.5345643435308421e-06, "loss": 1.3943, "step": 10225 }, { "epoch": 1.8421076334158974, "grad_norm": 1.491155743598938, "learning_rate": 1.531084722844034e-06, "loss": 1.4939, "step": 10226 }, { "epoch": 1.8422877730240936, "grad_norm": 1.6226614713668823, "learning_rate": 1.5276089903099977e-06, "loss": 1.4909, "step": 10227 }, { "epoch": 1.84246791263229, "grad_norm": 1.465377926826477, "learning_rate": 1.5241371462075548e-06, "loss": 1.3031, "step": 10228 }, { "epoch": 1.8426480522404862, "grad_norm": 1.575509786605835, "learning_rate": 1.520669190815227e-06, "loss": 1.5349, "step": 10229 }, { "epoch": 1.8428281918486826, "grad_norm": 1.548424243927002, "learning_rate": 1.5172051244111972e-06, "loss": 1.3707, "step": 10230 }, { "epoch": 1.843008331456879, "grad_norm": 1.5996744632720947, "learning_rate": 1.5137449472733546e-06, "loss": 1.6673, "step": 10231 }, { "epoch": 1.8431884710650754, "grad_norm": 1.638129472732544, "learning_rate": 1.5102886596792821e-06, "loss": 1.4558, "step": 10232 }, { "epoch": 1.8433686106732718, "grad_norm": 1.462013840675354, "learning_rate": 1.5068362619062304e-06, "loss": 1.421, "step": 10233 }, { "epoch": 1.8435487502814683, "grad_norm": 1.371056318283081, "learning_rate": 1.5033877542311558e-06, "loss": 1.3634, "step": 10234 }, { "epoch": 1.8437288898896644, "grad_norm": 1.4632117748260498, "learning_rate": 1.4999431369306872e-06, "loss": 1.3429, "step": 10235 }, { "epoch": 1.8439090294978608, "grad_norm": 1.5581333637237549, "learning_rate": 1.4965024102811642e-06, "loss": 1.3619, "step": 10236 }, { "epoch": 1.8440891691060572, "grad_norm": 1.3980218172073364, "learning_rate": 1.4930655745585886e-06, "loss": 1.2804, "step": 10237 }, { "epoch": 1.8442693087142534, "grad_norm": 1.5534332990646362, "learning_rate": 1.4896326300386676e-06, "loss": 1.4238, "step": 10238 }, { "epoch": 1.8444494483224498, "grad_norm": 1.6386970281600952, "learning_rate": 1.4862035769967864e-06, "loss": 1.3799, "step": 10239 }, { "epoch": 1.8446295879306462, "grad_norm": 1.5318262577056885, "learning_rate": 1.4827784157080248e-06, "loss": 1.4185, "step": 10240 }, { "epoch": 1.8448097275388426, "grad_norm": 1.6147505044937134, "learning_rate": 1.4793571464471522e-06, "loss": 1.4969, "step": 10241 }, { "epoch": 1.844989867147039, "grad_norm": 1.628230333328247, "learning_rate": 1.4759397694886157e-06, "loss": 1.448, "step": 10242 }, { "epoch": 1.8451700067552355, "grad_norm": 1.668972134590149, "learning_rate": 1.4725262851065513e-06, "loss": 1.4275, "step": 10243 }, { "epoch": 1.8453501463634316, "grad_norm": 1.4975521564483643, "learning_rate": 1.4691166935747846e-06, "loss": 1.3199, "step": 10244 }, { "epoch": 1.845530285971628, "grad_norm": 1.5816705226898193, "learning_rate": 1.4657109951668468e-06, "loss": 1.5044, "step": 10245 }, { "epoch": 1.8457104255798242, "grad_norm": 1.6212525367736816, "learning_rate": 1.4623091901559306e-06, "loss": 1.4703, "step": 10246 }, { "epoch": 1.8458905651880206, "grad_norm": 1.731925129890442, "learning_rate": 1.458911278814934e-06, "loss": 1.3719, "step": 10247 }, { "epoch": 1.846070704796217, "grad_norm": 1.4780364036560059, "learning_rate": 1.455517261416428e-06, "loss": 1.0591, "step": 10248 }, { "epoch": 1.8462508444044134, "grad_norm": 1.4438854455947876, "learning_rate": 1.452127138232673e-06, "loss": 1.1401, "step": 10249 }, { "epoch": 1.8464309840126099, "grad_norm": 1.4837759733200073, "learning_rate": 1.4487409095356398e-06, "loss": 1.2347, "step": 10250 }, { "epoch": 1.8466111236208063, "grad_norm": 1.4581249952316284, "learning_rate": 1.4453585755969667e-06, "loss": 1.2771, "step": 10251 }, { "epoch": 1.8467912632290024, "grad_norm": 1.3369414806365967, "learning_rate": 1.441980136687976e-06, "loss": 1.8201, "step": 10252 }, { "epoch": 1.8469714028371989, "grad_norm": 1.4064689874649048, "learning_rate": 1.4386055930796837e-06, "loss": 1.9002, "step": 10253 }, { "epoch": 1.847151542445395, "grad_norm": 1.3935283422470093, "learning_rate": 1.4352349450428015e-06, "loss": 1.8927, "step": 10254 }, { "epoch": 1.8473316820535914, "grad_norm": 1.5412906408309937, "learning_rate": 1.431868192847713e-06, "loss": 1.9827, "step": 10255 }, { "epoch": 1.8475118216617878, "grad_norm": 1.3270776271820068, "learning_rate": 1.4285053367645074e-06, "loss": 1.682, "step": 10256 }, { "epoch": 1.8476919612699843, "grad_norm": 1.4654545783996582, "learning_rate": 1.4251463770629414e-06, "loss": 1.7095, "step": 10257 }, { "epoch": 1.8478721008781807, "grad_norm": 1.601901888847351, "learning_rate": 1.4217913140124661e-06, "loss": 1.8171, "step": 10258 }, { "epoch": 1.848052240486377, "grad_norm": 1.5529544353485107, "learning_rate": 1.4184401478822385e-06, "loss": 1.6876, "step": 10259 }, { "epoch": 1.8482323800945732, "grad_norm": 1.6309280395507812, "learning_rate": 1.4150928789410767e-06, "loss": 1.7556, "step": 10260 }, { "epoch": 1.8484125197027697, "grad_norm": 1.6407078504562378, "learning_rate": 1.4117495074574993e-06, "loss": 1.817, "step": 10261 }, { "epoch": 1.8485926593109658, "grad_norm": 1.434386968612671, "learning_rate": 1.4084100336997085e-06, "loss": 1.4728, "step": 10262 }, { "epoch": 1.8487727989191622, "grad_norm": 1.4617233276367188, "learning_rate": 1.4050744579356011e-06, "loss": 1.6581, "step": 10263 }, { "epoch": 1.8489529385273586, "grad_norm": 1.372172236442566, "learning_rate": 1.4017427804327466e-06, "loss": 1.4673, "step": 10264 }, { "epoch": 1.849133078135555, "grad_norm": 1.4211292266845703, "learning_rate": 1.3984150014584197e-06, "loss": 1.3121, "step": 10265 }, { "epoch": 1.8493132177437515, "grad_norm": 1.3662176132202148, "learning_rate": 1.3950911212795682e-06, "loss": 1.2187, "step": 10266 }, { "epoch": 1.8494933573519479, "grad_norm": 1.4869539737701416, "learning_rate": 1.3917711401628231e-06, "loss": 1.51, "step": 10267 }, { "epoch": 1.8496734969601443, "grad_norm": 1.4698007106781006, "learning_rate": 1.388455058374527e-06, "loss": 1.5938, "step": 10268 }, { "epoch": 1.8498536365683405, "grad_norm": 1.5569158792495728, "learning_rate": 1.3851428761806895e-06, "loss": 1.6096, "step": 10269 }, { "epoch": 1.8500337761765369, "grad_norm": 1.3702348470687866, "learning_rate": 1.3818345938470089e-06, "loss": 1.1617, "step": 10270 }, { "epoch": 1.850213915784733, "grad_norm": 1.372243046760559, "learning_rate": 1.3785302116388786e-06, "loss": 1.2194, "step": 10271 }, { "epoch": 1.8503940553929294, "grad_norm": 1.5003570318222046, "learning_rate": 1.3752297298213646e-06, "loss": 1.4444, "step": 10272 }, { "epoch": 1.8505741950011259, "grad_norm": 1.478769063949585, "learning_rate": 1.371933148659249e-06, "loss": 1.4254, "step": 10273 }, { "epoch": 1.8507543346093223, "grad_norm": 1.5099936723709106, "learning_rate": 1.368640468416965e-06, "loss": 1.4137, "step": 10274 }, { "epoch": 1.8509344742175187, "grad_norm": 1.5282105207443237, "learning_rate": 1.3653516893586571e-06, "loss": 1.44, "step": 10275 }, { "epoch": 1.851114613825715, "grad_norm": 1.4914833307266235, "learning_rate": 1.3620668117481472e-06, "loss": 1.4503, "step": 10276 }, { "epoch": 1.8512947534339113, "grad_norm": 1.5417181253433228, "learning_rate": 1.3587858358489413e-06, "loss": 1.3782, "step": 10277 }, { "epoch": 1.8514748930421077, "grad_norm": 1.473596215248108, "learning_rate": 1.3555087619242456e-06, "loss": 1.3942, "step": 10278 }, { "epoch": 1.8516550326503038, "grad_norm": 1.4994829893112183, "learning_rate": 1.3522355902369444e-06, "loss": 1.383, "step": 10279 }, { "epoch": 1.8518351722585003, "grad_norm": 1.4472023248672485, "learning_rate": 1.3489663210496105e-06, "loss": 1.3407, "step": 10280 }, { "epoch": 1.8520153118666967, "grad_norm": 1.6378897428512573, "learning_rate": 1.345700954624496e-06, "loss": 1.472, "step": 10281 }, { "epoch": 1.852195451474893, "grad_norm": 1.3848503828048706, "learning_rate": 1.3424394912235572e-06, "loss": 1.3026, "step": 10282 }, { "epoch": 1.8523755910830895, "grad_norm": 1.7458934783935547, "learning_rate": 1.3391819311084243e-06, "loss": 1.6856, "step": 10283 }, { "epoch": 1.8525557306912859, "grad_norm": 1.5376888513565063, "learning_rate": 1.33592827454041e-06, "loss": 1.3225, "step": 10284 }, { "epoch": 1.852735870299482, "grad_norm": 1.5704020261764526, "learning_rate": 1.3326785217805282e-06, "loss": 1.6235, "step": 10285 }, { "epoch": 1.8529160099076785, "grad_norm": 1.5847327709197998, "learning_rate": 1.3294326730894702e-06, "loss": 1.411, "step": 10286 }, { "epoch": 1.8530961495158746, "grad_norm": 1.4361884593963623, "learning_rate": 1.3261907287276165e-06, "loss": 1.2121, "step": 10287 }, { "epoch": 1.853276289124071, "grad_norm": 1.5476760864257812, "learning_rate": 1.322952688955037e-06, "loss": 1.3703, "step": 10288 }, { "epoch": 1.8534564287322675, "grad_norm": 1.4903324842453003, "learning_rate": 1.3197185540314794e-06, "loss": 1.4153, "step": 10289 }, { "epoch": 1.8536365683404639, "grad_norm": 1.6558328866958618, "learning_rate": 1.3164883242163917e-06, "loss": 1.617, "step": 10290 }, { "epoch": 1.8538167079486603, "grad_norm": 1.6648749113082886, "learning_rate": 1.313261999768889e-06, "loss": 1.4665, "step": 10291 }, { "epoch": 1.8539968475568567, "grad_norm": 1.6215368509292603, "learning_rate": 1.3100395809478027e-06, "loss": 1.5125, "step": 10292 }, { "epoch": 1.854176987165053, "grad_norm": 1.5777924060821533, "learning_rate": 1.306821068011621e-06, "loss": 1.4679, "step": 10293 }, { "epoch": 1.8543571267732493, "grad_norm": 1.4901269674301147, "learning_rate": 1.3036064612185372e-06, "loss": 1.2107, "step": 10294 }, { "epoch": 1.8545372663814457, "grad_norm": 1.6526262760162354, "learning_rate": 1.3003957608264284e-06, "loss": 1.6491, "step": 10295 }, { "epoch": 1.8547174059896419, "grad_norm": 1.4791837930679321, "learning_rate": 1.2971889670928439e-06, "loss": 1.2922, "step": 10296 }, { "epoch": 1.8548975455978383, "grad_norm": 1.5801347494125366, "learning_rate": 1.2939860802750447e-06, "loss": 1.4372, "step": 10297 }, { "epoch": 1.8550776852060347, "grad_norm": 1.7281008958816528, "learning_rate": 1.2907871006299532e-06, "loss": 1.3421, "step": 10298 }, { "epoch": 1.855257824814231, "grad_norm": 1.4185928106307983, "learning_rate": 1.2875920284141973e-06, "loss": 1.2514, "step": 10299 }, { "epoch": 1.8554379644224275, "grad_norm": 1.4465584754943848, "learning_rate": 1.2844008638840777e-06, "loss": 1.2245, "step": 10300 }, { "epoch": 1.8556181040306239, "grad_norm": 1.5240312814712524, "learning_rate": 1.2812136072955948e-06, "loss": 1.5414, "step": 10301 }, { "epoch": 1.85579824363882, "grad_norm": 1.5047955513000488, "learning_rate": 1.278030258904428e-06, "loss": 1.7555, "step": 10302 }, { "epoch": 1.8559783832470165, "grad_norm": 1.3868755102157593, "learning_rate": 1.2748508189659447e-06, "loss": 1.8682, "step": 10303 }, { "epoch": 1.8561585228552127, "grad_norm": 1.3334591388702393, "learning_rate": 1.2716752877351912e-06, "loss": 1.6443, "step": 10304 }, { "epoch": 1.856338662463409, "grad_norm": 1.424626111984253, "learning_rate": 1.268503665466908e-06, "loss": 1.8996, "step": 10305 }, { "epoch": 1.8565188020716055, "grad_norm": 1.4766196012496948, "learning_rate": 1.2653359524155361e-06, "loss": 1.8598, "step": 10306 }, { "epoch": 1.8566989416798019, "grad_norm": 1.4316827058792114, "learning_rate": 1.2621721488351668e-06, "loss": 1.4301, "step": 10307 }, { "epoch": 1.8568790812879983, "grad_norm": 1.541999340057373, "learning_rate": 1.2590122549796134e-06, "loss": 1.6946, "step": 10308 }, { "epoch": 1.8570592208961947, "grad_norm": 1.4967455863952637, "learning_rate": 1.255856271102346e-06, "loss": 1.6284, "step": 10309 }, { "epoch": 1.8572393605043909, "grad_norm": 1.6613128185272217, "learning_rate": 1.2527041974565556e-06, "loss": 1.778, "step": 10310 }, { "epoch": 1.8574195001125873, "grad_norm": 2.0601134300231934, "learning_rate": 1.2495560342950851e-06, "loss": 2.1886, "step": 10311 }, { "epoch": 1.8575996397207835, "grad_norm": 1.4511455297470093, "learning_rate": 1.2464117818704878e-06, "loss": 1.3462, "step": 10312 }, { "epoch": 1.8577797793289799, "grad_norm": 1.3765714168548584, "learning_rate": 1.2432714404349954e-06, "loss": 1.4886, "step": 10313 }, { "epoch": 1.8579599189371763, "grad_norm": 1.4717071056365967, "learning_rate": 1.2401350102405063e-06, "loss": 1.4262, "step": 10314 }, { "epoch": 1.8581400585453727, "grad_norm": 1.4502239227294922, "learning_rate": 1.237002491538647e-06, "loss": 1.457, "step": 10315 }, { "epoch": 1.858320198153569, "grad_norm": 1.3519161939620972, "learning_rate": 1.2338738845806941e-06, "loss": 1.3211, "step": 10316 }, { "epoch": 1.8585003377617655, "grad_norm": 1.4037913084030151, "learning_rate": 1.2307491896176248e-06, "loss": 1.3838, "step": 10317 }, { "epoch": 1.8586804773699617, "grad_norm": 1.3911057710647583, "learning_rate": 1.2276284069001053e-06, "loss": 1.4691, "step": 10318 }, { "epoch": 1.858860616978158, "grad_norm": 1.5189173221588135, "learning_rate": 1.2245115366784687e-06, "loss": 1.403, "step": 10319 }, { "epoch": 1.8590407565863543, "grad_norm": 1.36824369430542, "learning_rate": 1.221398579202765e-06, "loss": 1.318, "step": 10320 }, { "epoch": 1.8592208961945507, "grad_norm": 1.467315912246704, "learning_rate": 1.2182895347227052e-06, "loss": 1.4373, "step": 10321 }, { "epoch": 1.859401035802747, "grad_norm": 1.5092490911483765, "learning_rate": 1.2151844034877013e-06, "loss": 1.4128, "step": 10322 }, { "epoch": 1.8595811754109435, "grad_norm": 1.3840872049331665, "learning_rate": 1.2120831857468373e-06, "loss": 1.2558, "step": 10323 }, { "epoch": 1.8597613150191399, "grad_norm": 1.416157603263855, "learning_rate": 1.208985881748903e-06, "loss": 1.2283, "step": 10324 }, { "epoch": 1.8599414546273363, "grad_norm": 1.3469048738479614, "learning_rate": 1.2058924917423498e-06, "loss": 1.3275, "step": 10325 }, { "epoch": 1.8601215942355327, "grad_norm": 1.4587441682815552, "learning_rate": 1.20280301597534e-06, "loss": 1.3504, "step": 10326 }, { "epoch": 1.8603017338437289, "grad_norm": 1.4653880596160889, "learning_rate": 1.1997174546957034e-06, "loss": 1.6289, "step": 10327 }, { "epoch": 1.8604818734519253, "grad_norm": 1.6527965068817139, "learning_rate": 1.1966358081509588e-06, "loss": 1.3499, "step": 10328 }, { "epoch": 1.8606620130601215, "grad_norm": 1.6877119541168213, "learning_rate": 1.1935580765883248e-06, "loss": 1.4594, "step": 10329 }, { "epoch": 1.8608421526683179, "grad_norm": 1.4581581354141235, "learning_rate": 1.1904842602546873e-06, "loss": 1.3978, "step": 10330 }, { "epoch": 1.8610222922765143, "grad_norm": 1.4317772388458252, "learning_rate": 1.1874143593966214e-06, "loss": 1.1962, "step": 10331 }, { "epoch": 1.8612024318847107, "grad_norm": 1.6028050184249878, "learning_rate": 1.1843483742604022e-06, "loss": 1.3752, "step": 10332 }, { "epoch": 1.861382571492907, "grad_norm": 1.6119439601898193, "learning_rate": 1.1812863050919831e-06, "loss": 1.5033, "step": 10333 }, { "epoch": 1.8615627111011035, "grad_norm": 1.5995889902114868, "learning_rate": 1.1782281521369954e-06, "loss": 1.3692, "step": 10334 }, { "epoch": 1.8617428507092997, "grad_norm": 1.4851839542388916, "learning_rate": 1.1751739156407649e-06, "loss": 1.3061, "step": 10335 }, { "epoch": 1.861922990317496, "grad_norm": 1.6096994876861572, "learning_rate": 1.1721235958483013e-06, "loss": 1.5765, "step": 10336 }, { "epoch": 1.8621031299256923, "grad_norm": 1.521888256072998, "learning_rate": 1.1690771930042975e-06, "loss": 1.4017, "step": 10337 }, { "epoch": 1.8622832695338887, "grad_norm": 1.445374846458435, "learning_rate": 1.1660347073531475e-06, "loss": 1.2434, "step": 10338 }, { "epoch": 1.862463409142085, "grad_norm": 1.6131882667541504, "learning_rate": 1.1629961391388998e-06, "loss": 1.6562, "step": 10339 }, { "epoch": 1.8626435487502815, "grad_norm": 1.5619280338287354, "learning_rate": 1.15996148860531e-06, "loss": 1.5297, "step": 10340 }, { "epoch": 1.862823688358478, "grad_norm": 1.5652689933776855, "learning_rate": 1.1569307559958276e-06, "loss": 1.3402, "step": 10341 }, { "epoch": 1.8630038279666743, "grad_norm": 1.4430828094482422, "learning_rate": 1.153903941553558e-06, "loss": 1.288, "step": 10342 }, { "epoch": 1.8631839675748705, "grad_norm": 1.5014451742172241, "learning_rate": 1.1508810455213348e-06, "loss": 1.4052, "step": 10343 }, { "epoch": 1.8633641071830669, "grad_norm": 1.6292515993118286, "learning_rate": 1.1478620681416363e-06, "loss": 1.4215, "step": 10344 }, { "epoch": 1.863544246791263, "grad_norm": 1.6442686319351196, "learning_rate": 1.1448470096566466e-06, "loss": 1.5864, "step": 10345 }, { "epoch": 1.8637243863994595, "grad_norm": 1.4768873453140259, "learning_rate": 1.1418358703082278e-06, "loss": 1.2326, "step": 10346 }, { "epoch": 1.8639045260076559, "grad_norm": 1.530089020729065, "learning_rate": 1.138828650337942e-06, "loss": 1.2744, "step": 10347 }, { "epoch": 1.8640846656158523, "grad_norm": 2.0019547939300537, "learning_rate": 1.1358253499870241e-06, "loss": 1.481, "step": 10348 }, { "epoch": 1.8642648052240487, "grad_norm": 1.6834994554519653, "learning_rate": 1.1328259694963982e-06, "loss": 1.5595, "step": 10349 }, { "epoch": 1.864444944832245, "grad_norm": 1.7519543170928955, "learning_rate": 1.1298305091066664e-06, "loss": 1.4956, "step": 10350 }, { "epoch": 1.8646250844404415, "grad_norm": 1.3320525884628296, "learning_rate": 1.1268389690581306e-06, "loss": 1.073, "step": 10351 }, { "epoch": 1.8648052240486377, "grad_norm": 1.2492834329605103, "learning_rate": 1.1238513495907654e-06, "loss": 1.6528, "step": 10352 }, { "epoch": 1.864985363656834, "grad_norm": 1.3353514671325684, "learning_rate": 1.1208676509442407e-06, "loss": 1.6936, "step": 10353 }, { "epoch": 1.8651655032650303, "grad_norm": 1.387160301208496, "learning_rate": 1.1178878733579035e-06, "loss": 1.7173, "step": 10354 }, { "epoch": 1.8653456428732267, "grad_norm": 1.4492627382278442, "learning_rate": 1.1149120170707905e-06, "loss": 1.81, "step": 10355 }, { "epoch": 1.865525782481423, "grad_norm": 1.3958799839019775, "learning_rate": 1.111940082321622e-06, "loss": 1.7279, "step": 10356 }, { "epoch": 1.8657059220896195, "grad_norm": 1.4937512874603271, "learning_rate": 1.108972069348807e-06, "loss": 1.7007, "step": 10357 }, { "epoch": 1.865886061697816, "grad_norm": 1.4598029851913452, "learning_rate": 1.1060079783904443e-06, "loss": 1.6331, "step": 10358 }, { "epoch": 1.8660662013060123, "grad_norm": 1.5105829238891602, "learning_rate": 1.1030478096843045e-06, "loss": 1.7168, "step": 10359 }, { "epoch": 1.8662463409142085, "grad_norm": 1.688501000404358, "learning_rate": 1.1000915634678532e-06, "loss": 1.7834, "step": 10360 }, { "epoch": 1.866426480522405, "grad_norm": 1.9013447761535645, "learning_rate": 1.0971392399782343e-06, "loss": 2.3872, "step": 10361 }, { "epoch": 1.866606620130601, "grad_norm": 1.5750701427459717, "learning_rate": 1.0941908394522914e-06, "loss": 1.6268, "step": 10362 }, { "epoch": 1.8667867597387975, "grad_norm": 1.4531400203704834, "learning_rate": 1.0912463621265356e-06, "loss": 1.7259, "step": 10363 }, { "epoch": 1.866966899346994, "grad_norm": 1.500709056854248, "learning_rate": 1.0883058082371721e-06, "loss": 1.4662, "step": 10364 }, { "epoch": 1.8671470389551903, "grad_norm": 1.5037617683410645, "learning_rate": 1.0853691780200904e-06, "loss": 1.3406, "step": 10365 }, { "epoch": 1.8673271785633867, "grad_norm": 1.3896816968917847, "learning_rate": 1.0824364717108737e-06, "loss": 1.3752, "step": 10366 }, { "epoch": 1.867507318171583, "grad_norm": 1.3719779253005981, "learning_rate": 1.079507689544773e-06, "loss": 1.3369, "step": 10367 }, { "epoch": 1.8676874577797793, "grad_norm": 1.5353895425796509, "learning_rate": 1.0765828317567395e-06, "loss": 1.6057, "step": 10368 }, { "epoch": 1.8678675973879757, "grad_norm": 1.48717200756073, "learning_rate": 1.0736618985814018e-06, "loss": 1.3854, "step": 10369 }, { "epoch": 1.8680477369961719, "grad_norm": 1.3276771306991577, "learning_rate": 1.070744890253067e-06, "loss": 1.3142, "step": 10370 }, { "epoch": 1.8682278766043683, "grad_norm": 1.4071564674377441, "learning_rate": 1.0678318070057592e-06, "loss": 1.3445, "step": 10371 }, { "epoch": 1.8684080162125647, "grad_norm": 1.3767555952072144, "learning_rate": 1.0649226490731411e-06, "loss": 1.2885, "step": 10372 }, { "epoch": 1.868588155820761, "grad_norm": 1.540975570678711, "learning_rate": 1.0620174166885931e-06, "loss": 1.5272, "step": 10373 }, { "epoch": 1.8687682954289575, "grad_norm": 1.39052414894104, "learning_rate": 1.0591161100851676e-06, "loss": 1.294, "step": 10374 }, { "epoch": 1.868948435037154, "grad_norm": 1.4286513328552246, "learning_rate": 1.0562187294956172e-06, "loss": 1.2455, "step": 10375 }, { "epoch": 1.86912857464535, "grad_norm": 1.4543054103851318, "learning_rate": 1.0533252751523559e-06, "loss": 1.4353, "step": 10376 }, { "epoch": 1.8693087142535465, "grad_norm": 1.527773141860962, "learning_rate": 1.0504357472874981e-06, "loss": 1.3891, "step": 10377 }, { "epoch": 1.869488853861743, "grad_norm": 1.5044718980789185, "learning_rate": 1.0475501461328473e-06, "loss": 1.4894, "step": 10378 }, { "epoch": 1.869668993469939, "grad_norm": 1.5600088834762573, "learning_rate": 1.044668471919874e-06, "loss": 1.5145, "step": 10379 }, { "epoch": 1.8698491330781355, "grad_norm": 1.5081301927566528, "learning_rate": 1.0417907248797542e-06, "loss": 1.4865, "step": 10380 }, { "epoch": 1.870029272686332, "grad_norm": 1.356370210647583, "learning_rate": 1.0389169052433368e-06, "loss": 1.221, "step": 10381 }, { "epoch": 1.8702094122945283, "grad_norm": 1.578597903251648, "learning_rate": 1.0360470132411593e-06, "loss": 1.3449, "step": 10382 }, { "epoch": 1.8703895519027247, "grad_norm": 1.7410928010940552, "learning_rate": 1.0331810491034434e-06, "loss": 1.5618, "step": 10383 }, { "epoch": 1.8705696915109211, "grad_norm": 1.689487099647522, "learning_rate": 1.0303190130600825e-06, "loss": 1.5637, "step": 10384 }, { "epoch": 1.8707498311191173, "grad_norm": 1.6730750799179077, "learning_rate": 1.0274609053406824e-06, "loss": 1.5663, "step": 10385 }, { "epoch": 1.8709299707273137, "grad_norm": 1.58219313621521, "learning_rate": 1.0246067261745152e-06, "loss": 1.424, "step": 10386 }, { "epoch": 1.87111011033551, "grad_norm": 1.6409006118774414, "learning_rate": 1.0217564757905419e-06, "loss": 1.5234, "step": 10387 }, { "epoch": 1.8712902499437063, "grad_norm": 1.4316788911819458, "learning_rate": 1.0189101544174018e-06, "loss": 1.3608, "step": 10388 }, { "epoch": 1.8714703895519027, "grad_norm": 1.677889108657837, "learning_rate": 1.0160677622834401e-06, "loss": 1.5967, "step": 10389 }, { "epoch": 1.871650529160099, "grad_norm": 1.6832510232925415, "learning_rate": 1.0132292996166582e-06, "loss": 1.2584, "step": 10390 }, { "epoch": 1.8718306687682955, "grad_norm": 1.5509663820266724, "learning_rate": 1.010394766644762e-06, "loss": 1.2566, "step": 10391 }, { "epoch": 1.872010808376492, "grad_norm": 1.6645439863204956, "learning_rate": 1.0075641635951372e-06, "loss": 1.6477, "step": 10392 }, { "epoch": 1.872190947984688, "grad_norm": 1.3653062582015991, "learning_rate": 1.0047374906948516e-06, "loss": 1.1367, "step": 10393 }, { "epoch": 1.8723710875928845, "grad_norm": 1.8081492185592651, "learning_rate": 1.0019147481706625e-06, "loss": 1.7334, "step": 10394 }, { "epoch": 1.8725512272010807, "grad_norm": 1.5390008687973022, "learning_rate": 9.990959362490003e-07, "loss": 1.3011, "step": 10395 }, { "epoch": 1.872731366809277, "grad_norm": 1.559303641319275, "learning_rate": 9.96281055156001e-07, "loss": 1.2945, "step": 10396 }, { "epoch": 1.8729115064174735, "grad_norm": 1.6995283365249634, "learning_rate": 9.93470105117461e-07, "loss": 1.269, "step": 10397 }, { "epoch": 1.87309164602567, "grad_norm": 1.572171688079834, "learning_rate": 9.906630863588784e-07, "loss": 1.5208, "step": 10398 }, { "epoch": 1.8732717856338663, "grad_norm": 1.4155441522598267, "learning_rate": 9.878599991054337e-07, "loss": 1.1741, "step": 10399 }, { "epoch": 1.8734519252420627, "grad_norm": 1.5453004837036133, "learning_rate": 9.850608435819864e-07, "loss": 1.189, "step": 10400 }, { "epoch": 1.873632064850259, "grad_norm": 1.5081183910369873, "learning_rate": 9.822656200130842e-07, "loss": 1.2334, "step": 10401 }, { "epoch": 1.8738122044584553, "grad_norm": 1.2858686447143555, "learning_rate": 9.79474328622959e-07, "loss": 1.4632, "step": 10402 }, { "epoch": 1.8739923440666515, "grad_norm": 1.393790602684021, "learning_rate": 9.76686969635532e-07, "loss": 1.8111, "step": 10403 }, { "epoch": 1.874172483674848, "grad_norm": 1.4437096118927002, "learning_rate": 9.739035432743903e-07, "loss": 2.123, "step": 10404 }, { "epoch": 1.8743526232830443, "grad_norm": 1.3957126140594482, "learning_rate": 9.711240497628281e-07, "loss": 1.5462, "step": 10405 }, { "epoch": 1.8745327628912407, "grad_norm": 1.5514625310897827, "learning_rate": 9.683484893238115e-07, "loss": 2.0841, "step": 10406 }, { "epoch": 1.8747129024994371, "grad_norm": 1.3577128648757935, "learning_rate": 9.655768621799954e-07, "loss": 1.4013, "step": 10407 }, { "epoch": 1.8748930421076335, "grad_norm": 1.664527416229248, "learning_rate": 9.628091685537244e-07, "loss": 2.1026, "step": 10408 }, { "epoch": 1.87507318171583, "grad_norm": 1.5408542156219482, "learning_rate": 9.600454086670152e-07, "loss": 1.7735, "step": 10409 }, { "epoch": 1.8752533213240261, "grad_norm": 1.6988085508346558, "learning_rate": 9.572855827415794e-07, "loss": 1.8174, "step": 10410 }, { "epoch": 1.8754334609322225, "grad_norm": 1.9394913911819458, "learning_rate": 9.54529690998801e-07, "loss": 1.761, "step": 10411 }, { "epoch": 1.8756136005404187, "grad_norm": 1.610665202140808, "learning_rate": 9.517777336597644e-07, "loss": 1.6989, "step": 10412 }, { "epoch": 1.875793740148615, "grad_norm": 1.3886268138885498, "learning_rate": 9.490297109452262e-07, "loss": 1.5696, "step": 10413 }, { "epoch": 1.8759738797568115, "grad_norm": 1.6117889881134033, "learning_rate": 9.462856230756323e-07, "loss": 1.4527, "step": 10414 }, { "epoch": 1.876154019365008, "grad_norm": 1.267108678817749, "learning_rate": 9.435454702711177e-07, "loss": 1.2498, "step": 10415 }, { "epoch": 1.8763341589732043, "grad_norm": 1.3357423543930054, "learning_rate": 9.408092527514845e-07, "loss": 1.2348, "step": 10416 }, { "epoch": 1.8765142985814007, "grad_norm": 1.5065866708755493, "learning_rate": 9.380769707362403e-07, "loss": 1.544, "step": 10417 }, { "epoch": 1.876694438189597, "grad_norm": 1.4108260869979858, "learning_rate": 9.353486244445653e-07, "loss": 1.3682, "step": 10418 }, { "epoch": 1.8768745777977933, "grad_norm": 1.4301656484603882, "learning_rate": 9.326242140953234e-07, "loss": 1.2654, "step": 10419 }, { "epoch": 1.8770547174059895, "grad_norm": 1.4730911254882812, "learning_rate": 9.299037399070676e-07, "loss": 1.3764, "step": 10420 }, { "epoch": 1.877234857014186, "grad_norm": 1.4049005508422852, "learning_rate": 9.271872020980288e-07, "loss": 1.5352, "step": 10421 }, { "epoch": 1.8774149966223823, "grad_norm": 1.3225533962249756, "learning_rate": 9.244746008861327e-07, "loss": 1.1391, "step": 10422 }, { "epoch": 1.8775951362305787, "grad_norm": 1.3799593448638916, "learning_rate": 9.21765936488983e-07, "loss": 1.4217, "step": 10423 }, { "epoch": 1.8777752758387751, "grad_norm": 1.4268847703933716, "learning_rate": 9.19061209123867e-07, "loss": 1.3977, "step": 10424 }, { "epoch": 1.8779554154469715, "grad_norm": 1.5707372426986694, "learning_rate": 9.163604190077502e-07, "loss": 1.4816, "step": 10425 }, { "epoch": 1.8781355550551677, "grad_norm": 1.4575191736221313, "learning_rate": 9.136635663572924e-07, "loss": 1.3698, "step": 10426 }, { "epoch": 1.8783156946633641, "grad_norm": 1.4714783430099487, "learning_rate": 9.109706513888372e-07, "loss": 1.4828, "step": 10427 }, { "epoch": 1.8784958342715603, "grad_norm": 1.539023756980896, "learning_rate": 9.082816743184008e-07, "loss": 1.4209, "step": 10428 }, { "epoch": 1.8786759738797567, "grad_norm": 1.3515119552612305, "learning_rate": 9.055966353616996e-07, "loss": 1.0816, "step": 10429 }, { "epoch": 1.8788561134879531, "grad_norm": 1.5612529516220093, "learning_rate": 9.029155347341223e-07, "loss": 1.385, "step": 10430 }, { "epoch": 1.8790362530961495, "grad_norm": 1.56065034866333, "learning_rate": 9.002383726507469e-07, "loss": 1.4927, "step": 10431 }, { "epoch": 1.879216392704346, "grad_norm": 1.60688054561615, "learning_rate": 8.975651493263349e-07, "loss": 1.6076, "step": 10432 }, { "epoch": 1.8793965323125423, "grad_norm": 1.6395341157913208, "learning_rate": 8.948958649753314e-07, "loss": 1.35, "step": 10433 }, { "epoch": 1.8795766719207385, "grad_norm": 1.517236590385437, "learning_rate": 8.922305198118597e-07, "loss": 1.4386, "step": 10434 }, { "epoch": 1.879756811528935, "grad_norm": 1.5569759607315063, "learning_rate": 8.895691140497376e-07, "loss": 1.2799, "step": 10435 }, { "epoch": 1.8799369511371313, "grad_norm": 1.5908175706863403, "learning_rate": 8.869116479024608e-07, "loss": 1.4303, "step": 10436 }, { "epoch": 1.8801170907453275, "grad_norm": 1.7033532857894897, "learning_rate": 8.84258121583209e-07, "loss": 1.5695, "step": 10437 }, { "epoch": 1.880297230353524, "grad_norm": 1.4639214277267456, "learning_rate": 8.816085353048454e-07, "loss": 1.2193, "step": 10438 }, { "epoch": 1.8804773699617203, "grad_norm": 1.488935112953186, "learning_rate": 8.789628892799218e-07, "loss": 1.1333, "step": 10439 }, { "epoch": 1.8806575095699167, "grad_norm": 1.5773502588272095, "learning_rate": 8.763211837206686e-07, "loss": 1.4225, "step": 10440 }, { "epoch": 1.8808376491781131, "grad_norm": 1.4575687646865845, "learning_rate": 8.736834188390053e-07, "loss": 1.2027, "step": 10441 }, { "epoch": 1.8810177887863095, "grad_norm": 1.5660375356674194, "learning_rate": 8.710495948465292e-07, "loss": 1.4276, "step": 10442 }, { "epoch": 1.8811979283945057, "grad_norm": 1.5975563526153564, "learning_rate": 8.684197119545212e-07, "loss": 1.4262, "step": 10443 }, { "epoch": 1.8813780680027021, "grad_norm": 1.5464791059494019, "learning_rate": 8.657937703739516e-07, "loss": 1.4262, "step": 10444 }, { "epoch": 1.8815582076108983, "grad_norm": 1.4302457571029663, "learning_rate": 8.631717703154796e-07, "loss": 1.2234, "step": 10445 }, { "epoch": 1.8817383472190947, "grad_norm": 1.5488194227218628, "learning_rate": 8.605537119894313e-07, "loss": 1.3303, "step": 10446 }, { "epoch": 1.8819184868272911, "grad_norm": 1.6429537534713745, "learning_rate": 8.579395956058279e-07, "loss": 1.3334, "step": 10447 }, { "epoch": 1.8820986264354875, "grad_norm": 1.6289007663726807, "learning_rate": 8.55329421374379e-07, "loss": 1.3364, "step": 10448 }, { "epoch": 1.882278766043684, "grad_norm": 1.5279297828674316, "learning_rate": 8.527231895044619e-07, "loss": 1.3556, "step": 10449 }, { "epoch": 1.8824589056518803, "grad_norm": 1.5164117813110352, "learning_rate": 8.501209002051535e-07, "loss": 1.2232, "step": 10450 }, { "epoch": 1.8826390452600765, "grad_norm": 1.638219952583313, "learning_rate": 8.475225536852038e-07, "loss": 1.3615, "step": 10451 }, { "epoch": 1.882819184868273, "grad_norm": 1.413591742515564, "learning_rate": 8.44928150153057e-07, "loss": 1.8522, "step": 10452 }, { "epoch": 1.8829993244764691, "grad_norm": 1.4006863832473755, "learning_rate": 8.423376898168245e-07, "loss": 1.9277, "step": 10453 }, { "epoch": 1.8831794640846655, "grad_norm": 1.4539275169372559, "learning_rate": 8.397511728843233e-07, "loss": 1.8151, "step": 10454 }, { "epoch": 1.883359603692862, "grad_norm": 1.541306972503662, "learning_rate": 8.371685995630429e-07, "loss": 1.72, "step": 10455 }, { "epoch": 1.8835397433010583, "grad_norm": 1.3538320064544678, "learning_rate": 8.345899700601456e-07, "loss": 1.5999, "step": 10456 }, { "epoch": 1.8837198829092547, "grad_norm": 1.625203013420105, "learning_rate": 8.320152845824935e-07, "loss": 1.9385, "step": 10457 }, { "epoch": 1.8839000225174511, "grad_norm": 1.4941275119781494, "learning_rate": 8.294445433366327e-07, "loss": 1.4806, "step": 10458 }, { "epoch": 1.8840801621256473, "grad_norm": 1.6255981922149658, "learning_rate": 8.268777465287758e-07, "loss": 1.9224, "step": 10459 }, { "epoch": 1.8842603017338437, "grad_norm": 1.6820573806762695, "learning_rate": 8.243148943648305e-07, "loss": 1.428, "step": 10460 }, { "epoch": 1.88444044134204, "grad_norm": 1.7153754234313965, "learning_rate": 8.217559870503988e-07, "loss": 1.9918, "step": 10461 }, { "epoch": 1.8846205809502363, "grad_norm": 1.9701718091964722, "learning_rate": 8.192010247907445e-07, "loss": 1.9317, "step": 10462 }, { "epoch": 1.8848007205584327, "grad_norm": 1.584980845451355, "learning_rate": 8.166500077908257e-07, "loss": 1.7143, "step": 10463 }, { "epoch": 1.8849808601666291, "grad_norm": 1.4482595920562744, "learning_rate": 8.141029362552899e-07, "loss": 1.5345, "step": 10464 }, { "epoch": 1.8851609997748255, "grad_norm": 1.411331295967102, "learning_rate": 8.115598103884569e-07, "loss": 1.3958, "step": 10465 }, { "epoch": 1.885341139383022, "grad_norm": 1.4702675342559814, "learning_rate": 8.090206303943415e-07, "loss": 1.3084, "step": 10466 }, { "epoch": 1.8855212789912184, "grad_norm": 1.4231278896331787, "learning_rate": 8.06485396476625e-07, "loss": 1.4413, "step": 10467 }, { "epoch": 1.8857014185994145, "grad_norm": 1.5550378561019897, "learning_rate": 8.039541088387004e-07, "loss": 1.5643, "step": 10468 }, { "epoch": 1.885881558207611, "grad_norm": 1.3691565990447998, "learning_rate": 8.014267676836051e-07, "loss": 1.3971, "step": 10469 }, { "epoch": 1.8860616978158071, "grad_norm": 1.6286625862121582, "learning_rate": 7.989033732140938e-07, "loss": 1.7423, "step": 10470 }, { "epoch": 1.8862418374240035, "grad_norm": 1.3730206489562988, "learning_rate": 7.963839256325877e-07, "loss": 1.3751, "step": 10471 }, { "epoch": 1.8864219770322, "grad_norm": 1.4568886756896973, "learning_rate": 7.938684251411921e-07, "loss": 1.3351, "step": 10472 }, { "epoch": 1.8866021166403963, "grad_norm": 1.4766813516616821, "learning_rate": 7.91356871941712e-07, "loss": 1.3378, "step": 10473 }, { "epoch": 1.8867822562485927, "grad_norm": 1.369766116142273, "learning_rate": 7.888492662356084e-07, "loss": 1.3716, "step": 10474 }, { "epoch": 1.8869623958567892, "grad_norm": 1.5298302173614502, "learning_rate": 7.863456082240539e-07, "loss": 1.5975, "step": 10475 }, { "epoch": 1.8871425354649853, "grad_norm": 1.4681299924850464, "learning_rate": 7.838458981078767e-07, "loss": 1.453, "step": 10476 }, { "epoch": 1.8873226750731817, "grad_norm": 1.540401577949524, "learning_rate": 7.813501360876107e-07, "loss": 1.347, "step": 10477 }, { "epoch": 1.887502814681378, "grad_norm": 1.538756012916565, "learning_rate": 7.788583223634627e-07, "loss": 1.4703, "step": 10478 }, { "epoch": 1.8876829542895743, "grad_norm": 1.3124223947525024, "learning_rate": 7.763704571353281e-07, "loss": 1.18, "step": 10479 }, { "epoch": 1.8878630938977707, "grad_norm": 1.5526539087295532, "learning_rate": 7.738865406027862e-07, "loss": 1.6838, "step": 10480 }, { "epoch": 1.8880432335059671, "grad_norm": 1.5394126176834106, "learning_rate": 7.714065729650776e-07, "loss": 1.5818, "step": 10481 }, { "epoch": 1.8882233731141636, "grad_norm": 1.473955512046814, "learning_rate": 7.689305544211601e-07, "loss": 1.3791, "step": 10482 }, { "epoch": 1.88840351272236, "grad_norm": 1.4336568117141724, "learning_rate": 7.664584851696522e-07, "loss": 1.3102, "step": 10483 }, { "epoch": 1.8885836523305561, "grad_norm": 1.5253362655639648, "learning_rate": 7.639903654088676e-07, "loss": 1.4434, "step": 10484 }, { "epoch": 1.8887637919387525, "grad_norm": 1.4720733165740967, "learning_rate": 7.61526195336787e-07, "loss": 1.3012, "step": 10485 }, { "epoch": 1.8889439315469487, "grad_norm": 1.557500958442688, "learning_rate": 7.590659751510965e-07, "loss": 1.4302, "step": 10486 }, { "epoch": 1.8891240711551451, "grad_norm": 1.4172635078430176, "learning_rate": 7.566097050491438e-07, "loss": 1.0922, "step": 10487 }, { "epoch": 1.8893042107633415, "grad_norm": 1.4982826709747314, "learning_rate": 7.541573852279826e-07, "loss": 1.4263, "step": 10488 }, { "epoch": 1.889484350371538, "grad_norm": 1.4319500923156738, "learning_rate": 7.517090158843276e-07, "loss": 1.3974, "step": 10489 }, { "epoch": 1.8896644899797344, "grad_norm": 1.498268485069275, "learning_rate": 7.492645972145829e-07, "loss": 1.3361, "step": 10490 }, { "epoch": 1.8898446295879308, "grad_norm": 1.580480933189392, "learning_rate": 7.46824129414847e-07, "loss": 1.3082, "step": 10491 }, { "epoch": 1.8900247691961272, "grad_norm": 1.388025164604187, "learning_rate": 7.443876126808913e-07, "loss": 1.2081, "step": 10492 }, { "epoch": 1.8902049088043233, "grad_norm": 1.6182637214660645, "learning_rate": 7.419550472081648e-07, "loss": 1.502, "step": 10493 }, { "epoch": 1.8903850484125198, "grad_norm": 1.5960328578948975, "learning_rate": 7.395264331918117e-07, "loss": 1.3649, "step": 10494 }, { "epoch": 1.890565188020716, "grad_norm": 1.561490774154663, "learning_rate": 7.371017708266536e-07, "loss": 1.3794, "step": 10495 }, { "epoch": 1.8907453276289123, "grad_norm": 1.5892928838729858, "learning_rate": 7.346810603072018e-07, "loss": 1.2528, "step": 10496 }, { "epoch": 1.8909254672371087, "grad_norm": 1.6903246641159058, "learning_rate": 7.322643018276343e-07, "loss": 1.5753, "step": 10497 }, { "epoch": 1.8911056068453052, "grad_norm": 1.6042462587356567, "learning_rate": 7.298514955818292e-07, "loss": 1.3472, "step": 10498 }, { "epoch": 1.8912857464535016, "grad_norm": 1.5658302307128906, "learning_rate": 7.274426417633373e-07, "loss": 1.3118, "step": 10499 }, { "epoch": 1.891465886061698, "grad_norm": 1.4369940757751465, "learning_rate": 7.250377405653986e-07, "loss": 1.1206, "step": 10500 }, { "epoch": 1.8916460256698941, "grad_norm": 1.5997005701065063, "learning_rate": 7.226367921809307e-07, "loss": 1.4936, "step": 10501 }, { "epoch": 1.8918261652780906, "grad_norm": 1.4643075466156006, "learning_rate": 7.202397968025354e-07, "loss": 1.9299, "step": 10502 }, { "epoch": 1.8920063048862867, "grad_norm": 1.4001431465148926, "learning_rate": 7.178467546225032e-07, "loss": 1.8039, "step": 10503 }, { "epoch": 1.8921864444944831, "grad_norm": 1.3389837741851807, "learning_rate": 7.154576658327972e-07, "loss": 1.8528, "step": 10504 }, { "epoch": 1.8923665841026795, "grad_norm": 1.4355262517929077, "learning_rate": 7.130725306250696e-07, "loss": 1.7639, "step": 10505 }, { "epoch": 1.892546723710876, "grad_norm": 1.3411816358566284, "learning_rate": 7.106913491906619e-07, "loss": 1.524, "step": 10506 }, { "epoch": 1.8927268633190724, "grad_norm": 1.4855798482894897, "learning_rate": 7.083141217205824e-07, "loss": 1.7806, "step": 10507 }, { "epoch": 1.8929070029272688, "grad_norm": 1.4194215536117554, "learning_rate": 7.059408484055396e-07, "loss": 1.7253, "step": 10508 }, { "epoch": 1.893087142535465, "grad_norm": 1.7397346496582031, "learning_rate": 7.035715294359036e-07, "loss": 1.8814, "step": 10509 }, { "epoch": 1.8932672821436614, "grad_norm": 1.546053409576416, "learning_rate": 7.012061650017555e-07, "loss": 1.8013, "step": 10510 }, { "epoch": 1.8934474217518575, "grad_norm": 1.7077016830444336, "learning_rate": 6.988447552928323e-07, "loss": 2.008, "step": 10511 }, { "epoch": 1.893627561360054, "grad_norm": 1.927232265472412, "learning_rate": 6.964873004985717e-07, "loss": 1.986, "step": 10512 }, { "epoch": 1.8938077009682504, "grad_norm": 1.558251976966858, "learning_rate": 6.941338008080889e-07, "loss": 1.5316, "step": 10513 }, { "epoch": 1.8939878405764468, "grad_norm": 1.4736559391021729, "learning_rate": 6.917842564101662e-07, "loss": 1.401, "step": 10514 }, { "epoch": 1.8941679801846432, "grad_norm": 1.651094675064087, "learning_rate": 6.894386674933029e-07, "loss": 1.864, "step": 10515 }, { "epoch": 1.8943481197928396, "grad_norm": 1.3020305633544922, "learning_rate": 6.870970342456484e-07, "loss": 1.2879, "step": 10516 }, { "epoch": 1.8945282594010358, "grad_norm": 1.5028489828109741, "learning_rate": 6.84759356855047e-07, "loss": 1.44, "step": 10517 }, { "epoch": 1.8947083990092322, "grad_norm": 1.3594685792922974, "learning_rate": 6.824256355090319e-07, "loss": 1.2845, "step": 10518 }, { "epoch": 1.8948885386174283, "grad_norm": 1.4115666151046753, "learning_rate": 6.800958703948091e-07, "loss": 1.4618, "step": 10519 }, { "epoch": 1.8950686782256247, "grad_norm": 1.3464326858520508, "learning_rate": 6.777700616992733e-07, "loss": 1.2171, "step": 10520 }, { "epoch": 1.8952488178338212, "grad_norm": 1.4653886556625366, "learning_rate": 6.754482096089976e-07, "loss": 1.4268, "step": 10521 }, { "epoch": 1.8954289574420176, "grad_norm": 1.4941991567611694, "learning_rate": 6.73130314310244e-07, "loss": 1.597, "step": 10522 }, { "epoch": 1.895609097050214, "grad_norm": 1.4208091497421265, "learning_rate": 6.70816375988953e-07, "loss": 1.4384, "step": 10523 }, { "epoch": 1.8957892366584104, "grad_norm": 1.5423327684402466, "learning_rate": 6.685063948307424e-07, "loss": 1.4755, "step": 10524 }, { "epoch": 1.8959693762666068, "grad_norm": 1.451150894165039, "learning_rate": 6.662003710209197e-07, "loss": 1.3439, "step": 10525 }, { "epoch": 1.896149515874803, "grad_norm": 1.5733636617660522, "learning_rate": 6.638983047444758e-07, "loss": 1.4717, "step": 10526 }, { "epoch": 1.8963296554829994, "grad_norm": 1.4216341972351074, "learning_rate": 6.616001961860741e-07, "loss": 1.5663, "step": 10527 }, { "epoch": 1.8965097950911955, "grad_norm": 1.428039312362671, "learning_rate": 6.593060455300726e-07, "loss": 1.3089, "step": 10528 }, { "epoch": 1.896689934699392, "grad_norm": 1.3801791667938232, "learning_rate": 6.57015852960513e-07, "loss": 1.2624, "step": 10529 }, { "epoch": 1.8968700743075884, "grad_norm": 1.535874605178833, "learning_rate": 6.547296186611041e-07, "loss": 1.4809, "step": 10530 }, { "epoch": 1.8970502139157848, "grad_norm": 1.6675653457641602, "learning_rate": 6.524473428152489e-07, "loss": 1.4368, "step": 10531 }, { "epoch": 1.8972303535239812, "grad_norm": 1.5514951944351196, "learning_rate": 6.501690256060344e-07, "loss": 1.3244, "step": 10532 }, { "epoch": 1.8974104931321776, "grad_norm": 1.6219271421432495, "learning_rate": 6.478946672162256e-07, "loss": 1.5808, "step": 10533 }, { "epoch": 1.8975906327403738, "grad_norm": 1.5591564178466797, "learning_rate": 6.456242678282654e-07, "loss": 1.3291, "step": 10534 }, { "epoch": 1.8977707723485702, "grad_norm": 1.5878000259399414, "learning_rate": 6.433578276242858e-07, "loss": 1.4047, "step": 10535 }, { "epoch": 1.8979509119567664, "grad_norm": 1.5497514009475708, "learning_rate": 6.410953467860969e-07, "loss": 1.5151, "step": 10536 }, { "epoch": 1.8981310515649628, "grad_norm": 1.6520183086395264, "learning_rate": 6.388368254951982e-07, "loss": 1.4225, "step": 10537 }, { "epoch": 1.8983111911731592, "grad_norm": 1.4138176441192627, "learning_rate": 6.365822639327723e-07, "loss": 1.3424, "step": 10538 }, { "epoch": 1.8984913307813556, "grad_norm": 1.5050724744796753, "learning_rate": 6.343316622796691e-07, "loss": 1.3083, "step": 10539 }, { "epoch": 1.898671470389552, "grad_norm": 1.4484436511993408, "learning_rate": 6.320850207164386e-07, "loss": 1.3235, "step": 10540 }, { "epoch": 1.8988516099977484, "grad_norm": 1.8164695501327515, "learning_rate": 6.298423394232978e-07, "loss": 1.6193, "step": 10541 }, { "epoch": 1.8990317496059446, "grad_norm": 1.5286498069763184, "learning_rate": 6.276036185801526e-07, "loss": 1.3495, "step": 10542 }, { "epoch": 1.899211889214141, "grad_norm": 1.5159372091293335, "learning_rate": 6.25368858366604e-07, "loss": 1.2539, "step": 10543 }, { "epoch": 1.8993920288223372, "grad_norm": 1.628412127494812, "learning_rate": 6.23138058961914e-07, "loss": 1.444, "step": 10544 }, { "epoch": 1.8995721684305336, "grad_norm": 1.5929714441299438, "learning_rate": 6.209112205450452e-07, "loss": 1.3879, "step": 10545 }, { "epoch": 1.89975230803873, "grad_norm": 1.566931128501892, "learning_rate": 6.186883432946211e-07, "loss": 1.2622, "step": 10546 }, { "epoch": 1.8999324476469264, "grad_norm": 1.6450026035308838, "learning_rate": 6.164694273889604e-07, "loss": 1.4355, "step": 10547 }, { "epoch": 1.9001125872551228, "grad_norm": 1.5769636631011963, "learning_rate": 6.142544730060706e-07, "loss": 1.2818, "step": 10548 }, { "epoch": 1.9002927268633192, "grad_norm": 1.4638614654541016, "learning_rate": 6.120434803236374e-07, "loss": 1.354, "step": 10549 }, { "epoch": 1.9004728664715156, "grad_norm": 1.5841487646102905, "learning_rate": 6.098364495190135e-07, "loss": 1.4453, "step": 10550 }, { "epoch": 1.9006530060797118, "grad_norm": 1.3609451055526733, "learning_rate": 6.076333807692514e-07, "loss": 1.1144, "step": 10551 }, { "epoch": 1.9008331456879082, "grad_norm": 1.4796746969223022, "learning_rate": 6.054342742510877e-07, "loss": 1.8244, "step": 10552 }, { "epoch": 1.9010132852961044, "grad_norm": 1.2966715097427368, "learning_rate": 6.032391301409257e-07, "loss": 1.7389, "step": 10553 }, { "epoch": 1.9011934249043008, "grad_norm": 1.3886855840682983, "learning_rate": 6.01047948614858e-07, "loss": 1.9183, "step": 10554 }, { "epoch": 1.9013735645124972, "grad_norm": 1.391626000404358, "learning_rate": 5.98860729848666e-07, "loss": 1.9345, "step": 10555 }, { "epoch": 1.9015537041206936, "grad_norm": 1.4676650762557983, "learning_rate": 5.966774740178039e-07, "loss": 1.8951, "step": 10556 }, { "epoch": 1.90173384372889, "grad_norm": 1.3372501134872437, "learning_rate": 5.944981812974093e-07, "loss": 1.6935, "step": 10557 }, { "epoch": 1.9019139833370864, "grad_norm": 1.4513601064682007, "learning_rate": 5.923228518623036e-07, "loss": 1.402, "step": 10558 }, { "epoch": 1.9020941229452826, "grad_norm": 1.8146361112594604, "learning_rate": 5.901514858869972e-07, "loss": 2.262, "step": 10559 }, { "epoch": 1.902274262553479, "grad_norm": 1.6722593307495117, "learning_rate": 5.879840835456674e-07, "loss": 1.7593, "step": 10560 }, { "epoch": 1.9024544021616752, "grad_norm": 1.9109129905700684, "learning_rate": 5.858206450121917e-07, "loss": 1.7727, "step": 10561 }, { "epoch": 1.9026345417698716, "grad_norm": 1.459641695022583, "learning_rate": 5.836611704601147e-07, "loss": 1.3479, "step": 10562 }, { "epoch": 1.902814681378068, "grad_norm": 1.444750189781189, "learning_rate": 5.815056600626645e-07, "loss": 1.4824, "step": 10563 }, { "epoch": 1.9029948209862644, "grad_norm": 1.3514031171798706, "learning_rate": 5.793541139927638e-07, "loss": 1.3809, "step": 10564 }, { "epoch": 1.9031749605944608, "grad_norm": 1.3343899250030518, "learning_rate": 5.772065324230025e-07, "loss": 1.3201, "step": 10565 }, { "epoch": 1.9033551002026572, "grad_norm": 1.4790526628494263, "learning_rate": 5.750629155256593e-07, "loss": 1.3902, "step": 10566 }, { "epoch": 1.9035352398108534, "grad_norm": 1.3490937948226929, "learning_rate": 5.729232634726966e-07, "loss": 1.2023, "step": 10567 }, { "epoch": 1.9037153794190498, "grad_norm": 1.4462461471557617, "learning_rate": 5.707875764357551e-07, "loss": 1.3443, "step": 10568 }, { "epoch": 1.903895519027246, "grad_norm": 1.5272160768508911, "learning_rate": 5.686558545861532e-07, "loss": 1.4835, "step": 10569 }, { "epoch": 1.9040756586354424, "grad_norm": 1.3934524059295654, "learning_rate": 5.66528098094904e-07, "loss": 1.176, "step": 10570 }, { "epoch": 1.9042557982436388, "grad_norm": 1.2959026098251343, "learning_rate": 5.644043071326932e-07, "loss": 1.1733, "step": 10571 }, { "epoch": 1.9044359378518352, "grad_norm": 1.4108245372772217, "learning_rate": 5.622844818698903e-07, "loss": 1.2941, "step": 10572 }, { "epoch": 1.9046160774600316, "grad_norm": 1.5393667221069336, "learning_rate": 5.601686224765479e-07, "loss": 1.3567, "step": 10573 }, { "epoch": 1.904796217068228, "grad_norm": 1.3832629919052124, "learning_rate": 5.580567291223914e-07, "loss": 1.2798, "step": 10574 }, { "epoch": 1.9049763566764242, "grad_norm": 1.4247702360153198, "learning_rate": 5.559488019768466e-07, "loss": 1.2418, "step": 10575 }, { "epoch": 1.9051564962846206, "grad_norm": 1.3773174285888672, "learning_rate": 5.538448412090058e-07, "loss": 1.155, "step": 10576 }, { "epoch": 1.905336635892817, "grad_norm": 1.4633855819702148, "learning_rate": 5.517448469876452e-07, "loss": 1.4682, "step": 10577 }, { "epoch": 1.9055167755010132, "grad_norm": 1.356995940208435, "learning_rate": 5.496488194812355e-07, "loss": 1.1869, "step": 10578 }, { "epoch": 1.9056969151092096, "grad_norm": 1.4500218629837036, "learning_rate": 5.475567588578978e-07, "loss": 1.4127, "step": 10579 }, { "epoch": 1.905877054717406, "grad_norm": 1.528525710105896, "learning_rate": 5.454686652854757e-07, "loss": 1.4854, "step": 10580 }, { "epoch": 1.9060571943256024, "grad_norm": 1.7621057033538818, "learning_rate": 5.433845389314685e-07, "loss": 1.5677, "step": 10581 }, { "epoch": 1.9062373339337988, "grad_norm": 1.508873701095581, "learning_rate": 5.413043799630646e-07, "loss": 1.4941, "step": 10582 }, { "epoch": 1.9064174735419952, "grad_norm": 1.3634029626846313, "learning_rate": 5.392281885471362e-07, "loss": 1.2119, "step": 10583 }, { "epoch": 1.9065976131501914, "grad_norm": 1.4658923149108887, "learning_rate": 5.371559648502223e-07, "loss": 1.328, "step": 10584 }, { "epoch": 1.9067777527583878, "grad_norm": 1.3000259399414062, "learning_rate": 5.350877090385731e-07, "loss": 1.0919, "step": 10585 }, { "epoch": 1.906957892366584, "grad_norm": 1.559252381324768, "learning_rate": 5.330234212780893e-07, "loss": 1.616, "step": 10586 }, { "epoch": 1.9071380319747804, "grad_norm": 1.4746516942977905, "learning_rate": 5.309631017343774e-07, "loss": 1.4716, "step": 10587 }, { "epoch": 1.9073181715829768, "grad_norm": 1.6650214195251465, "learning_rate": 5.289067505727052e-07, "loss": 1.4426, "step": 10588 }, { "epoch": 1.9074983111911732, "grad_norm": 1.5495333671569824, "learning_rate": 5.268543679580407e-07, "loss": 1.4142, "step": 10589 }, { "epoch": 1.9076784507993696, "grad_norm": 1.5035241842269897, "learning_rate": 5.248059540550187e-07, "loss": 1.2558, "step": 10590 }, { "epoch": 1.907858590407566, "grad_norm": 1.639175295829773, "learning_rate": 5.227615090279637e-07, "loss": 1.4712, "step": 10591 }, { "epoch": 1.9080387300157622, "grad_norm": 1.4025806188583374, "learning_rate": 5.207210330408829e-07, "loss": 1.164, "step": 10592 }, { "epoch": 1.9082188696239586, "grad_norm": 1.6409562826156616, "learning_rate": 5.186845262574569e-07, "loss": 1.5255, "step": 10593 }, { "epoch": 1.9083990092321548, "grad_norm": 1.4403104782104492, "learning_rate": 5.166519888410604e-07, "loss": 1.3298, "step": 10594 }, { "epoch": 1.9085791488403512, "grad_norm": 1.6716806888580322, "learning_rate": 5.146234209547351e-07, "loss": 1.3915, "step": 10595 }, { "epoch": 1.9087592884485476, "grad_norm": 1.6240124702453613, "learning_rate": 5.125988227612233e-07, "loss": 1.2375, "step": 10596 }, { "epoch": 1.908939428056744, "grad_norm": 1.4897117614746094, "learning_rate": 5.105781944229226e-07, "loss": 1.2471, "step": 10597 }, { "epoch": 1.9091195676649404, "grad_norm": 1.5866055488586426, "learning_rate": 5.085615361019369e-07, "loss": 1.2568, "step": 10598 }, { "epoch": 1.9092997072731368, "grad_norm": 1.5033975839614868, "learning_rate": 5.065488479600367e-07, "loss": 1.3493, "step": 10599 }, { "epoch": 1.909479846881333, "grad_norm": 1.587776780128479, "learning_rate": 5.045401301586817e-07, "loss": 1.5081, "step": 10600 }, { "epoch": 1.9096599864895294, "grad_norm": 1.5601608753204346, "learning_rate": 5.025353828590095e-07, "loss": 1.5215, "step": 10601 }, { "epoch": 1.9098401260977256, "grad_norm": 1.330634593963623, "learning_rate": 5.005346062218308e-07, "loss": 1.8015, "step": 10602 }, { "epoch": 1.910020265705922, "grad_norm": 1.224668025970459, "learning_rate": 4.985378004076669e-07, "loss": 1.5842, "step": 10603 }, { "epoch": 1.9102004053141184, "grad_norm": 1.403169870376587, "learning_rate": 4.96544965576684e-07, "loss": 1.9326, "step": 10604 }, { "epoch": 1.9103805449223148, "grad_norm": 1.532300591468811, "learning_rate": 4.945561018887546e-07, "loss": 1.9527, "step": 10605 }, { "epoch": 1.9105606845305112, "grad_norm": 1.553962230682373, "learning_rate": 4.925712095034174e-07, "loss": 1.7572, "step": 10606 }, { "epoch": 1.9107408241387076, "grad_norm": 1.4353445768356323, "learning_rate": 4.905902885799063e-07, "loss": 1.6705, "step": 10607 }, { "epoch": 1.910920963746904, "grad_norm": 1.4835301637649536, "learning_rate": 4.886133392771274e-07, "loss": 1.5493, "step": 10608 }, { "epoch": 1.9111011033551002, "grad_norm": 1.726707100868225, "learning_rate": 4.866403617536708e-07, "loss": 1.7632, "step": 10609 }, { "epoch": 1.9112812429632966, "grad_norm": 1.7077559232711792, "learning_rate": 4.846713561678096e-07, "loss": 1.8504, "step": 10610 }, { "epoch": 1.9114613825714928, "grad_norm": 1.9443711042404175, "learning_rate": 4.827063226774897e-07, "loss": 1.7915, "step": 10611 }, { "epoch": 1.9116415221796892, "grad_norm": 1.7511098384857178, "learning_rate": 4.807452614403463e-07, "loss": 1.6048, "step": 10612 }, { "epoch": 1.9118216617878856, "grad_norm": 1.4773104190826416, "learning_rate": 4.787881726137034e-07, "loss": 1.3727, "step": 10613 }, { "epoch": 1.912001801396082, "grad_norm": 1.495373010635376, "learning_rate": 4.7683505635455204e-07, "loss": 1.5537, "step": 10614 }, { "epoch": 1.9121819410042784, "grad_norm": 1.4386756420135498, "learning_rate": 4.748859128195726e-07, "loss": 1.4702, "step": 10615 }, { "epoch": 1.9123620806124748, "grad_norm": 1.4554927349090576, "learning_rate": 4.7294074216511773e-07, "loss": 1.5219, "step": 10616 }, { "epoch": 1.912542220220671, "grad_norm": 1.428983449935913, "learning_rate": 4.709995445472348e-07, "loss": 1.3191, "step": 10617 }, { "epoch": 1.9127223598288674, "grad_norm": 1.515053153038025, "learning_rate": 4.6906232012164377e-07, "loss": 1.5178, "step": 10618 }, { "epoch": 1.9129024994370636, "grad_norm": 1.345089316368103, "learning_rate": 4.6712906904374797e-07, "loss": 1.1412, "step": 10619 }, { "epoch": 1.91308263904526, "grad_norm": 1.5110143423080444, "learning_rate": 4.65199791468629e-07, "loss": 1.502, "step": 10620 }, { "epoch": 1.9132627786534564, "grad_norm": 1.428783655166626, "learning_rate": 4.632744875510631e-07, "loss": 1.2869, "step": 10621 }, { "epoch": 1.9134429182616528, "grad_norm": 1.4827884435653687, "learning_rate": 4.6135315744548216e-07, "loss": 1.5696, "step": 10622 }, { "epoch": 1.9136230578698492, "grad_norm": 1.4285590648651123, "learning_rate": 4.5943580130602406e-07, "loss": 1.4427, "step": 10623 }, { "epoch": 1.9138031974780456, "grad_norm": 1.4759881496429443, "learning_rate": 4.575224192864935e-07, "loss": 1.3399, "step": 10624 }, { "epoch": 1.9139833370862418, "grad_norm": 1.4492740631103516, "learning_rate": 4.5561301154038447e-07, "loss": 1.4786, "step": 10625 }, { "epoch": 1.9141634766944382, "grad_norm": 1.3105170726776123, "learning_rate": 4.537075782208688e-07, "loss": 1.1276, "step": 10626 }, { "epoch": 1.9143436163026344, "grad_norm": 1.3947352170944214, "learning_rate": 4.518061194807965e-07, "loss": 1.2279, "step": 10627 }, { "epoch": 1.9145237559108308, "grad_norm": 1.4282366037368774, "learning_rate": 4.4990863547270114e-07, "loss": 1.4077, "step": 10628 }, { "epoch": 1.9147038955190272, "grad_norm": 1.5841740369796753, "learning_rate": 4.4801512634880527e-07, "loss": 1.4595, "step": 10629 }, { "epoch": 1.9148840351272236, "grad_norm": 1.5349650382995605, "learning_rate": 4.461255922609986e-07, "loss": 1.562, "step": 10630 }, { "epoch": 1.91506417473542, "grad_norm": 1.4783697128295898, "learning_rate": 4.4424003336085427e-07, "loss": 1.3324, "step": 10631 }, { "epoch": 1.9152443143436164, "grad_norm": 1.5419166088104248, "learning_rate": 4.423584497996458e-07, "loss": 1.3299, "step": 10632 }, { "epoch": 1.9154244539518126, "grad_norm": 1.2100845575332642, "learning_rate": 4.4048084172829684e-07, "loss": 1.0755, "step": 10633 }, { "epoch": 1.915604593560009, "grad_norm": 1.5638476610183716, "learning_rate": 4.386072092974369e-07, "loss": 1.6335, "step": 10634 }, { "epoch": 1.9157847331682054, "grad_norm": 1.4543256759643555, "learning_rate": 4.36737552657368e-07, "loss": 1.3808, "step": 10635 }, { "epoch": 1.9159648727764016, "grad_norm": 1.5768142938613892, "learning_rate": 4.348718719580702e-07, "loss": 1.3743, "step": 10636 }, { "epoch": 1.916145012384598, "grad_norm": 1.5087361335754395, "learning_rate": 4.330101673492071e-07, "loss": 1.2882, "step": 10637 }, { "epoch": 1.9163251519927944, "grad_norm": 1.5756261348724365, "learning_rate": 4.311524389801258e-07, "loss": 1.5029, "step": 10638 }, { "epoch": 1.9165052916009908, "grad_norm": 1.4810669422149658, "learning_rate": 4.292986869998517e-07, "loss": 1.3864, "step": 10639 }, { "epoch": 1.9166854312091872, "grad_norm": 1.7460476160049438, "learning_rate": 4.274489115570934e-07, "loss": 1.5851, "step": 10640 }, { "epoch": 1.9168655708173836, "grad_norm": 1.6554853916168213, "learning_rate": 4.256031128002325e-07, "loss": 1.5184, "step": 10641 }, { "epoch": 1.9170457104255798, "grad_norm": 1.4849661588668823, "learning_rate": 4.2376129087735027e-07, "loss": 1.3295, "step": 10642 }, { "epoch": 1.9172258500337762, "grad_norm": 1.5633811950683594, "learning_rate": 4.2192344593618985e-07, "loss": 1.5452, "step": 10643 }, { "epoch": 1.9174059896419724, "grad_norm": 1.5531846284866333, "learning_rate": 4.200895781241776e-07, "loss": 1.5133, "step": 10644 }, { "epoch": 1.9175861292501688, "grad_norm": 1.3759820461273193, "learning_rate": 4.1825968758842925e-07, "loss": 1.1063, "step": 10645 }, { "epoch": 1.9177662688583652, "grad_norm": 1.5527273416519165, "learning_rate": 4.164337744757385e-07, "loss": 1.4813, "step": 10646 }, { "epoch": 1.9179464084665616, "grad_norm": 1.5540648698806763, "learning_rate": 4.146118389325826e-07, "loss": 1.3267, "step": 10647 }, { "epoch": 1.918126548074758, "grad_norm": 1.63435959815979, "learning_rate": 4.127938811051113e-07, "loss": 1.6224, "step": 10648 }, { "epoch": 1.9183066876829544, "grad_norm": 1.5442606210708618, "learning_rate": 4.10979901139158e-07, "loss": 1.3054, "step": 10649 }, { "epoch": 1.9184868272911506, "grad_norm": 1.5588059425354004, "learning_rate": 4.0916989918024507e-07, "loss": 1.4591, "step": 10650 }, { "epoch": 1.918666966899347, "grad_norm": 1.57216477394104, "learning_rate": 4.073638753735731e-07, "loss": 1.2147, "step": 10651 }, { "epoch": 1.9188471065075432, "grad_norm": 1.2488489151000977, "learning_rate": 4.055618298640096e-07, "loss": 1.4645, "step": 10652 }, { "epoch": 1.9190272461157396, "grad_norm": 1.3164516687393188, "learning_rate": 4.0376376279612214e-07, "loss": 1.754, "step": 10653 }, { "epoch": 1.919207385723936, "grad_norm": 1.4535037279129028, "learning_rate": 4.01969674314151e-07, "loss": 1.897, "step": 10654 }, { "epoch": 1.9193875253321324, "grad_norm": 1.4546953439712524, "learning_rate": 4.001795645620088e-07, "loss": 1.8098, "step": 10655 }, { "epoch": 1.9195676649403288, "grad_norm": 1.3311939239501953, "learning_rate": 3.983934336833028e-07, "loss": 1.6202, "step": 10656 }, { "epoch": 1.9197478045485252, "grad_norm": 1.642439842224121, "learning_rate": 3.9661128182131855e-07, "loss": 1.9907, "step": 10657 }, { "epoch": 1.9199279441567214, "grad_norm": 1.6593586206436157, "learning_rate": 3.9483310911901384e-07, "loss": 1.625, "step": 10658 }, { "epoch": 1.9201080837649178, "grad_norm": 1.6188393831253052, "learning_rate": 3.930589157190356e-07, "loss": 1.8402, "step": 10659 }, { "epoch": 1.920288223373114, "grad_norm": 1.7021390199661255, "learning_rate": 3.91288701763709e-07, "loss": 2.1461, "step": 10660 }, { "epoch": 1.9204683629813104, "grad_norm": 1.8295954465866089, "learning_rate": 3.895224673950426e-07, "loss": 1.4995, "step": 10661 }, { "epoch": 1.9206485025895068, "grad_norm": 1.4977326393127441, "learning_rate": 3.8776021275471753e-07, "loss": 1.4413, "step": 10662 }, { "epoch": 1.9208286421977032, "grad_norm": 1.2695984840393066, "learning_rate": 3.8600193798409847e-07, "loss": 1.2023, "step": 10663 }, { "epoch": 1.9210087818058996, "grad_norm": 1.5357704162597656, "learning_rate": 3.8424764322424476e-07, "loss": 1.4993, "step": 10664 }, { "epoch": 1.921188921414096, "grad_norm": 1.516182541847229, "learning_rate": 3.824973286158717e-07, "loss": 1.6087, "step": 10665 }, { "epoch": 1.9213690610222924, "grad_norm": 1.377296805381775, "learning_rate": 3.8075099429940034e-07, "loss": 1.3571, "step": 10666 }, { "epoch": 1.9215492006304886, "grad_norm": 1.4332109689712524, "learning_rate": 3.790086404149129e-07, "loss": 1.5036, "step": 10667 }, { "epoch": 1.921729340238685, "grad_norm": 1.3980786800384521, "learning_rate": 3.7727026710218103e-07, "loss": 1.4707, "step": 10668 }, { "epoch": 1.9219094798468812, "grad_norm": 1.4621315002441406, "learning_rate": 3.7553587450065984e-07, "loss": 1.3877, "step": 10669 }, { "epoch": 1.9220896194550776, "grad_norm": 1.5376617908477783, "learning_rate": 3.738054627494825e-07, "loss": 1.42, "step": 10670 }, { "epoch": 1.922269759063274, "grad_norm": 1.4621822834014893, "learning_rate": 3.720790319874545e-07, "loss": 1.2867, "step": 10671 }, { "epoch": 1.9224498986714704, "grad_norm": 1.4044430255889893, "learning_rate": 3.7035658235307634e-07, "loss": 1.3123, "step": 10672 }, { "epoch": 1.9226300382796668, "grad_norm": 1.7046087980270386, "learning_rate": 3.686381139845152e-07, "loss": 1.5982, "step": 10673 }, { "epoch": 1.9228101778878632, "grad_norm": 1.3808777332305908, "learning_rate": 3.66923627019633e-07, "loss": 1.3092, "step": 10674 }, { "epoch": 1.9229903174960594, "grad_norm": 1.6141752004623413, "learning_rate": 3.652131215959642e-07, "loss": 1.4804, "step": 10675 }, { "epoch": 1.9231704571042558, "grad_norm": 1.6133005619049072, "learning_rate": 3.635065978507213e-07, "loss": 1.6464, "step": 10676 }, { "epoch": 1.923350596712452, "grad_norm": 1.408409595489502, "learning_rate": 3.6180405592080027e-07, "loss": 1.3072, "step": 10677 }, { "epoch": 1.9235307363206484, "grad_norm": 1.482015609741211, "learning_rate": 3.6010549594278073e-07, "loss": 1.6211, "step": 10678 }, { "epoch": 1.9237108759288448, "grad_norm": 1.5925298929214478, "learning_rate": 3.584109180529205e-07, "loss": 1.6316, "step": 10679 }, { "epoch": 1.9238910155370412, "grad_norm": 1.5678397417068481, "learning_rate": 3.567203223871607e-07, "loss": 1.2776, "step": 10680 }, { "epoch": 1.9240711551452376, "grad_norm": 1.5976370573043823, "learning_rate": 3.550337090811096e-07, "loss": 1.5719, "step": 10681 }, { "epoch": 1.924251294753434, "grad_norm": 1.414941430091858, "learning_rate": 3.533510782700755e-07, "loss": 1.323, "step": 10682 }, { "epoch": 1.9244314343616302, "grad_norm": 1.5755207538604736, "learning_rate": 3.5167243008903396e-07, "loss": 1.5761, "step": 10683 }, { "epoch": 1.9246115739698266, "grad_norm": 1.5255422592163086, "learning_rate": 3.499977646726493e-07, "loss": 1.5319, "step": 10684 }, { "epoch": 1.9247917135780228, "grad_norm": 1.579970121383667, "learning_rate": 3.4832708215525865e-07, "loss": 1.4748, "step": 10685 }, { "epoch": 1.9249718531862192, "grad_norm": 1.544723391532898, "learning_rate": 3.4666038267088807e-07, "loss": 1.3649, "step": 10686 }, { "epoch": 1.9251519927944156, "grad_norm": 1.4438368082046509, "learning_rate": 3.449976663532362e-07, "loss": 1.0696, "step": 10687 }, { "epoch": 1.925332132402612, "grad_norm": 1.4790719747543335, "learning_rate": 3.4333893333568514e-07, "loss": 1.22, "step": 10688 }, { "epoch": 1.9255122720108084, "grad_norm": 1.6829428672790527, "learning_rate": 3.416841837512952e-07, "loss": 1.6393, "step": 10689 }, { "epoch": 1.9256924116190048, "grad_norm": 1.6428983211517334, "learning_rate": 3.400334177328157e-07, "loss": 1.4522, "step": 10690 }, { "epoch": 1.9258725512272012, "grad_norm": 1.5570341348648071, "learning_rate": 3.3838663541266304e-07, "loss": 1.2209, "step": 10691 }, { "epoch": 1.9260526908353974, "grad_norm": 1.540089726448059, "learning_rate": 3.367438369229481e-07, "loss": 1.3248, "step": 10692 }, { "epoch": 1.9262328304435938, "grad_norm": 1.6681678295135498, "learning_rate": 3.3510502239544885e-07, "loss": 1.5186, "step": 10693 }, { "epoch": 1.92641297005179, "grad_norm": 1.493974208831787, "learning_rate": 3.3347019196163787e-07, "loss": 1.293, "step": 10694 }, { "epoch": 1.9265931096599864, "grad_norm": 1.546829104423523, "learning_rate": 3.3183934575265473e-07, "loss": 1.4679, "step": 10695 }, { "epoch": 1.9267732492681828, "grad_norm": 1.701459527015686, "learning_rate": 3.302124838993281e-07, "loss": 1.5203, "step": 10696 }, { "epoch": 1.9269533888763792, "grad_norm": 1.6763008832931519, "learning_rate": 3.285896065321592e-07, "loss": 1.3289, "step": 10697 }, { "epoch": 1.9271335284845756, "grad_norm": 1.5818840265274048, "learning_rate": 3.269707137813438e-07, "loss": 1.3309, "step": 10698 }, { "epoch": 1.927313668092772, "grad_norm": 1.643554925918579, "learning_rate": 3.253558057767392e-07, "loss": 1.4619, "step": 10699 }, { "epoch": 1.9274938077009682, "grad_norm": 1.4544090032577515, "learning_rate": 3.237448826478973e-07, "loss": 1.1499, "step": 10700 }, { "epoch": 1.9276739473091646, "grad_norm": 1.4293798208236694, "learning_rate": 3.2213794452404245e-07, "loss": 1.2158, "step": 10701 }, { "epoch": 1.9278540869173608, "grad_norm": 1.4748966693878174, "learning_rate": 3.2053499153408273e-07, "loss": 2.0419, "step": 10702 }, { "epoch": 1.9280342265255572, "grad_norm": 1.3608410358428955, "learning_rate": 3.1893602380661523e-07, "loss": 1.7188, "step": 10703 }, { "epoch": 1.9282143661337536, "grad_norm": 1.4280856847763062, "learning_rate": 3.17341041469893e-07, "loss": 1.6843, "step": 10704 }, { "epoch": 1.92839450574195, "grad_norm": 1.4475690126419067, "learning_rate": 3.1575004465187466e-07, "loss": 1.7292, "step": 10705 }, { "epoch": 1.9285746453501464, "grad_norm": 1.3864495754241943, "learning_rate": 3.141630334801915e-07, "loss": 1.7678, "step": 10706 }, { "epoch": 1.9287547849583428, "grad_norm": 1.4281917810440063, "learning_rate": 3.125800080821528e-07, "loss": 1.78, "step": 10707 }, { "epoch": 1.928934924566539, "grad_norm": 1.5134031772613525, "learning_rate": 3.1100096858473483e-07, "loss": 1.6078, "step": 10708 }, { "epoch": 1.9291150641747354, "grad_norm": 1.6015766859054565, "learning_rate": 3.094259151146195e-07, "loss": 1.9317, "step": 10709 }, { "epoch": 1.9292952037829316, "grad_norm": 1.6528242826461792, "learning_rate": 3.0785484779815576e-07, "loss": 1.6737, "step": 10710 }, { "epoch": 1.929475343391128, "grad_norm": 1.6990431547164917, "learning_rate": 3.062877667613706e-07, "loss": 2.1245, "step": 10711 }, { "epoch": 1.9296554829993244, "grad_norm": 1.7260316610336304, "learning_rate": 3.0472467212997456e-07, "loss": 1.7464, "step": 10712 }, { "epoch": 1.9298356226075208, "grad_norm": 1.4424936771392822, "learning_rate": 3.031655640293618e-07, "loss": 1.4541, "step": 10713 }, { "epoch": 1.9300157622157172, "grad_norm": 1.5798828601837158, "learning_rate": 3.0161044258459337e-07, "loss": 1.4402, "step": 10714 }, { "epoch": 1.9301959018239137, "grad_norm": 1.3193916082382202, "learning_rate": 3.000593079204361e-07, "loss": 1.3844, "step": 10715 }, { "epoch": 1.9303760414321098, "grad_norm": 1.3552300930023193, "learning_rate": 2.9851216016131276e-07, "loss": 1.3721, "step": 10716 }, { "epoch": 1.9305561810403062, "grad_norm": 1.4207758903503418, "learning_rate": 2.969689994313352e-07, "loss": 1.2524, "step": 10717 }, { "epoch": 1.9307363206485024, "grad_norm": 1.4526771306991577, "learning_rate": 2.954298258542931e-07, "loss": 1.4771, "step": 10718 }, { "epoch": 1.9309164602566988, "grad_norm": 1.4651854038238525, "learning_rate": 2.9389463955366016e-07, "loss": 1.4377, "step": 10719 }, { "epoch": 1.9310965998648952, "grad_norm": 1.4527208805084229, "learning_rate": 2.923634406525877e-07, "loss": 1.3331, "step": 10720 }, { "epoch": 1.9312767394730916, "grad_norm": 1.3058305978775024, "learning_rate": 2.908362292739109e-07, "loss": 1.1443, "step": 10721 }, { "epoch": 1.931456879081288, "grad_norm": 1.3540574312210083, "learning_rate": 2.8931300554013717e-07, "loss": 1.3873, "step": 10722 }, { "epoch": 1.9316370186894845, "grad_norm": 1.4709054231643677, "learning_rate": 2.8779376957346335e-07, "loss": 1.438, "step": 10723 }, { "epoch": 1.9318171582976809, "grad_norm": 1.6403948068618774, "learning_rate": 2.8627852149575306e-07, "loss": 1.538, "step": 10724 }, { "epoch": 1.931997297905877, "grad_norm": 1.4392701387405396, "learning_rate": 2.8476726142857013e-07, "loss": 1.4715, "step": 10725 }, { "epoch": 1.9321774375140734, "grad_norm": 1.5689250230789185, "learning_rate": 2.8325998949314536e-07, "loss": 1.4759, "step": 10726 }, { "epoch": 1.9323575771222696, "grad_norm": 1.5069090127944946, "learning_rate": 2.8175670581038206e-07, "loss": 1.2367, "step": 10727 }, { "epoch": 1.932537716730466, "grad_norm": 1.6274744272232056, "learning_rate": 2.8025741050088375e-07, "loss": 1.5639, "step": 10728 }, { "epoch": 1.9327178563386624, "grad_norm": 1.5826215744018555, "learning_rate": 2.787621036849153e-07, "loss": 1.4002, "step": 10729 }, { "epoch": 1.9328979959468588, "grad_norm": 1.3495070934295654, "learning_rate": 2.772707854824308e-07, "loss": 1.2874, "step": 10730 }, { "epoch": 1.9330781355550553, "grad_norm": 1.550771951675415, "learning_rate": 2.7578345601306785e-07, "loss": 1.5333, "step": 10731 }, { "epoch": 1.9332582751632517, "grad_norm": 1.5069234371185303, "learning_rate": 2.7430011539613667e-07, "loss": 1.4189, "step": 10732 }, { "epoch": 1.9334384147714478, "grad_norm": 1.568824052810669, "learning_rate": 2.7282076375062524e-07, "loss": 1.2596, "step": 10733 }, { "epoch": 1.9336185543796442, "grad_norm": 1.6439555883407593, "learning_rate": 2.713454011952166e-07, "loss": 1.5865, "step": 10734 }, { "epoch": 1.9337986939878404, "grad_norm": 1.3567376136779785, "learning_rate": 2.6987402784825476e-07, "loss": 1.1757, "step": 10735 }, { "epoch": 1.9339788335960368, "grad_norm": 1.6360039710998535, "learning_rate": 2.684066438277788e-07, "loss": 1.5079, "step": 10736 }, { "epoch": 1.9341589732042332, "grad_norm": 1.4489951133728027, "learning_rate": 2.669432492514945e-07, "loss": 1.3818, "step": 10737 }, { "epoch": 1.9343391128124297, "grad_norm": 1.4558089971542358, "learning_rate": 2.6548384423679684e-07, "loss": 1.2452, "step": 10738 }, { "epoch": 1.934519252420626, "grad_norm": 1.5748984813690186, "learning_rate": 2.640284289007644e-07, "loss": 1.3351, "step": 10739 }, { "epoch": 1.9346993920288225, "grad_norm": 1.6168311834335327, "learning_rate": 2.6257700336014824e-07, "loss": 1.5136, "step": 10740 }, { "epoch": 1.9348795316370186, "grad_norm": 1.6417700052261353, "learning_rate": 2.611295677313774e-07, "loss": 1.5047, "step": 10741 }, { "epoch": 1.935059671245215, "grad_norm": 1.5892904996871948, "learning_rate": 2.596861221305591e-07, "loss": 1.5453, "step": 10742 }, { "epoch": 1.9352398108534112, "grad_norm": 1.641786813735962, "learning_rate": 2.582466666735006e-07, "loss": 1.5017, "step": 10743 }, { "epoch": 1.9354199504616076, "grad_norm": 1.4978071451187134, "learning_rate": 2.5681120147566517e-07, "loss": 1.3077, "step": 10744 }, { "epoch": 1.935600090069804, "grad_norm": 1.4038392305374146, "learning_rate": 2.5537972665219955e-07, "loss": 1.1902, "step": 10745 }, { "epoch": 1.9357802296780005, "grad_norm": 1.3287270069122314, "learning_rate": 2.5395224231795076e-07, "loss": 1.1484, "step": 10746 }, { "epoch": 1.9359603692861969, "grad_norm": 1.5975278615951538, "learning_rate": 2.5252874858741614e-07, "loss": 1.4272, "step": 10747 }, { "epoch": 1.9361405088943933, "grad_norm": 1.56769597530365, "learning_rate": 2.511092455747932e-07, "loss": 1.4345, "step": 10748 }, { "epoch": 1.9363206485025897, "grad_norm": 1.6250433921813965, "learning_rate": 2.4969373339396307e-07, "loss": 1.2983, "step": 10749 }, { "epoch": 1.9365007881107859, "grad_norm": 1.6011165380477905, "learning_rate": 2.482822121584627e-07, "loss": 1.3162, "step": 10750 }, { "epoch": 1.9366809277189823, "grad_norm": 1.5798112154006958, "learning_rate": 2.4687468198153487e-07, "loss": 1.2486, "step": 10751 }, { "epoch": 1.9368610673271784, "grad_norm": 1.4361294507980347, "learning_rate": 2.454711429760781e-07, "loss": 1.9416, "step": 10752 }, { "epoch": 1.9370412069353748, "grad_norm": 1.3630291223526, "learning_rate": 2.4407159525469683e-07, "loss": 1.6923, "step": 10753 }, { "epoch": 1.9372213465435713, "grad_norm": 1.381807804107666, "learning_rate": 2.426760389296623e-07, "loss": 1.8007, "step": 10754 }, { "epoch": 1.9374014861517677, "grad_norm": 1.340575098991394, "learning_rate": 2.4128447411291275e-07, "loss": 1.6226, "step": 10755 }, { "epoch": 1.937581625759964, "grad_norm": 1.5154651403427124, "learning_rate": 2.3989690091608673e-07, "loss": 1.9511, "step": 10756 }, { "epoch": 1.9377617653681605, "grad_norm": 1.347575306892395, "learning_rate": 2.3851331945049517e-07, "loss": 1.4401, "step": 10757 }, { "epoch": 1.9379419049763567, "grad_norm": 1.4433164596557617, "learning_rate": 2.37133729827127e-07, "loss": 1.8063, "step": 10758 }, { "epoch": 1.938122044584553, "grad_norm": 1.5673457384109497, "learning_rate": 2.3575813215664934e-07, "loss": 1.6251, "step": 10759 }, { "epoch": 1.9383021841927492, "grad_norm": 1.8343981504440308, "learning_rate": 2.3438652654941828e-07, "loss": 1.8027, "step": 10760 }, { "epoch": 1.9384823238009457, "grad_norm": 1.6909205913543701, "learning_rate": 2.3301891311546253e-07, "loss": 1.7972, "step": 10761 }, { "epoch": 1.938662463409142, "grad_norm": 1.730273962020874, "learning_rate": 2.3165529196448877e-07, "loss": 1.4655, "step": 10762 }, { "epoch": 1.9388426030173385, "grad_norm": 1.3355976343154907, "learning_rate": 2.302956632058817e-07, "loss": 1.4093, "step": 10763 }, { "epoch": 1.9390227426255349, "grad_norm": 1.4256105422973633, "learning_rate": 2.289400269487152e-07, "loss": 1.4884, "step": 10764 }, { "epoch": 1.9392028822337313, "grad_norm": 1.3288540840148926, "learning_rate": 2.2758838330174114e-07, "loss": 1.181, "step": 10765 }, { "epoch": 1.9393830218419275, "grad_norm": 1.348707914352417, "learning_rate": 2.262407323733784e-07, "loss": 1.1041, "step": 10766 }, { "epoch": 1.9395631614501239, "grad_norm": 1.5383409261703491, "learning_rate": 2.24897074271746e-07, "loss": 1.5553, "step": 10767 }, { "epoch": 1.93974330105832, "grad_norm": 1.4523781538009644, "learning_rate": 2.2355740910461886e-07, "loss": 1.3102, "step": 10768 }, { "epoch": 1.9399234406665165, "grad_norm": 1.2538048028945923, "learning_rate": 2.2222173697947768e-07, "loss": 0.9925, "step": 10769 }, { "epoch": 1.9401035802747129, "grad_norm": 1.3417613506317139, "learning_rate": 2.2089005800345898e-07, "loss": 1.198, "step": 10770 }, { "epoch": 1.9402837198829093, "grad_norm": 1.3026797771453857, "learning_rate": 2.1956237228339395e-07, "loss": 1.1336, "step": 10771 }, { "epoch": 1.9404638594911057, "grad_norm": 1.6009228229522705, "learning_rate": 2.1823867992579184e-07, "loss": 1.5021, "step": 10772 }, { "epoch": 1.940643999099302, "grad_norm": 1.5452401638031006, "learning_rate": 2.1691898103682883e-07, "loss": 1.4621, "step": 10773 }, { "epoch": 1.9408241387074983, "grad_norm": 1.6131672859191895, "learning_rate": 2.1560327572238138e-07, "loss": 1.4616, "step": 10774 }, { "epoch": 1.9410042783156947, "grad_norm": 1.4504743814468384, "learning_rate": 2.1429156408798722e-07, "loss": 1.2417, "step": 10775 }, { "epoch": 1.941184417923891, "grad_norm": 1.3496614694595337, "learning_rate": 2.1298384623887334e-07, "loss": 1.32, "step": 10776 }, { "epoch": 1.9413645575320873, "grad_norm": 1.4966177940368652, "learning_rate": 2.1168012227995026e-07, "loss": 1.4228, "step": 10777 }, { "epoch": 1.9415446971402837, "grad_norm": 1.5653752088546753, "learning_rate": 2.1038039231578987e-07, "loss": 1.505, "step": 10778 }, { "epoch": 1.94172483674848, "grad_norm": 1.335074782371521, "learning_rate": 2.0908465645066432e-07, "loss": 1.2096, "step": 10779 }, { "epoch": 1.9419049763566765, "grad_norm": 1.5789923667907715, "learning_rate": 2.0779291478851826e-07, "loss": 1.3691, "step": 10780 }, { "epoch": 1.9420851159648729, "grad_norm": 1.6408206224441528, "learning_rate": 2.0650516743297432e-07, "loss": 1.573, "step": 10781 }, { "epoch": 1.9422652555730693, "grad_norm": 1.667970895767212, "learning_rate": 2.0522141448732768e-07, "loss": 1.5264, "step": 10782 }, { "epoch": 1.9424453951812655, "grad_norm": 1.3787291049957275, "learning_rate": 2.0394165605456262e-07, "loss": 1.5143, "step": 10783 }, { "epoch": 1.9426255347894619, "grad_norm": 1.527496099472046, "learning_rate": 2.02665892237347e-07, "loss": 1.5217, "step": 10784 }, { "epoch": 1.942805674397658, "grad_norm": 1.5242236852645874, "learning_rate": 2.013941231380212e-07, "loss": 1.2894, "step": 10785 }, { "epoch": 1.9429858140058545, "grad_norm": 1.5073187351226807, "learning_rate": 2.0012634885859804e-07, "loss": 1.48, "step": 10786 }, { "epoch": 1.9431659536140509, "grad_norm": 1.4628822803497314, "learning_rate": 1.9886256950078507e-07, "loss": 1.3702, "step": 10787 }, { "epoch": 1.9433460932222473, "grad_norm": 1.5349504947662354, "learning_rate": 1.9760278516595677e-07, "loss": 1.3916, "step": 10788 }, { "epoch": 1.9435262328304437, "grad_norm": 1.3617768287658691, "learning_rate": 1.963469959551767e-07, "loss": 1.2469, "step": 10789 }, { "epoch": 1.94370637243864, "grad_norm": 1.4949501752853394, "learning_rate": 1.9509520196918095e-07, "loss": 1.3154, "step": 10790 }, { "epoch": 1.9438865120468363, "grad_norm": 1.5577994585037231, "learning_rate": 1.9384740330838925e-07, "loss": 1.4972, "step": 10791 }, { "epoch": 1.9440666516550327, "grad_norm": 1.5346155166625977, "learning_rate": 1.9260360007289923e-07, "loss": 1.3894, "step": 10792 }, { "epoch": 1.9442467912632289, "grad_norm": 1.5204501152038574, "learning_rate": 1.9136379236249225e-07, "loss": 1.3432, "step": 10793 }, { "epoch": 1.9444269308714253, "grad_norm": 1.6208473443984985, "learning_rate": 1.901279802766165e-07, "loss": 1.4404, "step": 10794 }, { "epoch": 1.9446070704796217, "grad_norm": 1.6193093061447144, "learning_rate": 1.888961639144149e-07, "loss": 1.3629, "step": 10795 }, { "epoch": 1.944787210087818, "grad_norm": 1.4994977712631226, "learning_rate": 1.876683433746973e-07, "loss": 1.3601, "step": 10796 }, { "epoch": 1.9449673496960145, "grad_norm": 1.4976441860198975, "learning_rate": 1.864445187559627e-07, "loss": 1.3416, "step": 10797 }, { "epoch": 1.9451474893042109, "grad_norm": 1.5912138223648071, "learning_rate": 1.852246901563881e-07, "loss": 1.2986, "step": 10798 }, { "epoch": 1.945327628912407, "grad_norm": 1.664944052696228, "learning_rate": 1.8400885767382302e-07, "loss": 1.3124, "step": 10799 }, { "epoch": 1.9455077685206035, "grad_norm": 1.4616655111312866, "learning_rate": 1.8279702140580612e-07, "loss": 1.1692, "step": 10800 }, { "epoch": 1.9456879081287997, "grad_norm": 1.4720104932785034, "learning_rate": 1.8158918144954296e-07, "loss": 1.2591, "step": 10801 }, { "epoch": 1.945868047736996, "grad_norm": 1.329436182975769, "learning_rate": 1.803853379019338e-07, "loss": 1.5564, "step": 10802 }, { "epoch": 1.9460481873451925, "grad_norm": 1.4360246658325195, "learning_rate": 1.7918549085954582e-07, "loss": 1.913, "step": 10803 }, { "epoch": 1.9462283269533889, "grad_norm": 1.4227567911148071, "learning_rate": 1.7798964041862987e-07, "loss": 1.7613, "step": 10804 }, { "epoch": 1.9464084665615853, "grad_norm": 1.3533034324645996, "learning_rate": 1.7679778667512026e-07, "loss": 1.7592, "step": 10805 }, { "epoch": 1.9465886061697817, "grad_norm": 1.3975611925125122, "learning_rate": 1.7560992972461832e-07, "loss": 1.7061, "step": 10806 }, { "epoch": 1.946768745777978, "grad_norm": 1.4783165454864502, "learning_rate": 1.7442606966242004e-07, "loss": 1.8563, "step": 10807 }, { "epoch": 1.9469488853861743, "grad_norm": 1.3704092502593994, "learning_rate": 1.7324620658349945e-07, "loss": 1.3216, "step": 10808 }, { "epoch": 1.9471290249943707, "grad_norm": 1.3841757774353027, "learning_rate": 1.7207034058249195e-07, "loss": 1.3249, "step": 10809 }, { "epoch": 1.9473091646025669, "grad_norm": 1.4620943069458008, "learning_rate": 1.7089847175373874e-07, "loss": 1.6252, "step": 10810 }, { "epoch": 1.9474893042107633, "grad_norm": 1.6428823471069336, "learning_rate": 1.6973060019123132e-07, "loss": 1.807, "step": 10811 }, { "epoch": 1.9476694438189597, "grad_norm": 1.942167043685913, "learning_rate": 1.6856672598866695e-07, "loss": 1.6585, "step": 10812 }, { "epoch": 1.947849583427156, "grad_norm": 1.476116418838501, "learning_rate": 1.6740684923940986e-07, "loss": 1.6478, "step": 10813 }, { "epoch": 1.9480297230353525, "grad_norm": 1.370652437210083, "learning_rate": 1.662509700364967e-07, "loss": 1.298, "step": 10814 }, { "epoch": 1.948209862643549, "grad_norm": 1.313159704208374, "learning_rate": 1.650990884726644e-07, "loss": 1.4107, "step": 10815 }, { "epoch": 1.948390002251745, "grad_norm": 1.7205506563186646, "learning_rate": 1.6395120464030578e-07, "loss": 1.6049, "step": 10816 }, { "epoch": 1.9485701418599415, "grad_norm": 1.349006175994873, "learning_rate": 1.6280731863150822e-07, "loss": 1.3423, "step": 10817 }, { "epoch": 1.9487502814681377, "grad_norm": 1.4364091157913208, "learning_rate": 1.616674305380317e-07, "loss": 1.4075, "step": 10818 }, { "epoch": 1.948930421076334, "grad_norm": 1.4425793886184692, "learning_rate": 1.605315404513197e-07, "loss": 1.3551, "step": 10819 }, { "epoch": 1.9491105606845305, "grad_norm": 1.3886240720748901, "learning_rate": 1.5939964846249378e-07, "loss": 1.336, "step": 10820 }, { "epoch": 1.9492907002927269, "grad_norm": 1.368329405784607, "learning_rate": 1.58271754662348e-07, "loss": 1.4743, "step": 10821 }, { "epoch": 1.9494708399009233, "grad_norm": 1.42078697681427, "learning_rate": 1.5714785914136553e-07, "loss": 1.3425, "step": 10822 }, { "epoch": 1.9496509795091197, "grad_norm": 1.4186526536941528, "learning_rate": 1.560279619897076e-07, "loss": 1.2651, "step": 10823 }, { "epoch": 1.9498311191173159, "grad_norm": 1.5221070051193237, "learning_rate": 1.549120632972023e-07, "loss": 1.5366, "step": 10824 }, { "epoch": 1.9500112587255123, "grad_norm": 1.567468285560608, "learning_rate": 1.5380016315337808e-07, "loss": 1.3349, "step": 10825 }, { "epoch": 1.9501913983337085, "grad_norm": 1.3771719932556152, "learning_rate": 1.5269226164742468e-07, "loss": 1.2236, "step": 10826 }, { "epoch": 1.9503715379419049, "grad_norm": 1.568207025527954, "learning_rate": 1.5158835886821544e-07, "loss": 1.4075, "step": 10827 }, { "epoch": 1.9505516775501013, "grad_norm": 1.3922921419143677, "learning_rate": 1.5048845490431286e-07, "loss": 1.2534, "step": 10828 }, { "epoch": 1.9507318171582977, "grad_norm": 1.7033252716064453, "learning_rate": 1.493925498439408e-07, "loss": 1.6314, "step": 10829 }, { "epoch": 1.950911956766494, "grad_norm": 1.4187091588974, "learning_rate": 1.4830064377501784e-07, "loss": 1.0673, "step": 10830 }, { "epoch": 1.9510920963746905, "grad_norm": 1.4200152158737183, "learning_rate": 1.4721273678513504e-07, "loss": 1.4227, "step": 10831 }, { "epoch": 1.9512722359828867, "grad_norm": 1.5980957746505737, "learning_rate": 1.4612882896156698e-07, "loss": 1.367, "step": 10832 }, { "epoch": 1.951452375591083, "grad_norm": 1.3979201316833496, "learning_rate": 1.4504892039125528e-07, "loss": 1.4113, "step": 10833 }, { "epoch": 1.9516325151992795, "grad_norm": 1.4458646774291992, "learning_rate": 1.439730111608417e-07, "loss": 1.2033, "step": 10834 }, { "epoch": 1.9518126548074757, "grad_norm": 1.5067696571350098, "learning_rate": 1.4290110135662394e-07, "loss": 1.377, "step": 10835 }, { "epoch": 1.951992794415672, "grad_norm": 1.5077192783355713, "learning_rate": 1.418331910645998e-07, "loss": 1.3417, "step": 10836 }, { "epoch": 1.9521729340238685, "grad_norm": 1.4355510473251343, "learning_rate": 1.4076928037043413e-07, "loss": 1.4292, "step": 10837 }, { "epoch": 1.952353073632065, "grad_norm": 1.4707227945327759, "learning_rate": 1.3970936935946976e-07, "loss": 1.2702, "step": 10838 }, { "epoch": 1.9525332132402613, "grad_norm": 1.6267133951187134, "learning_rate": 1.3865345811673314e-07, "loss": 1.4466, "step": 10839 }, { "epoch": 1.9527133528484577, "grad_norm": 1.3773984909057617, "learning_rate": 1.3760154672692316e-07, "loss": 1.2227, "step": 10840 }, { "epoch": 1.952893492456654, "grad_norm": 1.6703267097473145, "learning_rate": 1.3655363527443343e-07, "loss": 1.4158, "step": 10841 }, { "epoch": 1.9530736320648503, "grad_norm": 1.5142369270324707, "learning_rate": 1.3550972384333004e-07, "loss": 1.3778, "step": 10842 }, { "epoch": 1.9532537716730465, "grad_norm": 1.5401586294174194, "learning_rate": 1.3446981251734048e-07, "loss": 1.2658, "step": 10843 }, { "epoch": 1.9534339112812429, "grad_norm": 1.498421549797058, "learning_rate": 1.3343390137989797e-07, "loss": 1.2119, "step": 10844 }, { "epoch": 1.9536140508894393, "grad_norm": 1.5495822429656982, "learning_rate": 1.3240199051409718e-07, "loss": 1.4924, "step": 10845 }, { "epoch": 1.9537941904976357, "grad_norm": 1.6219462156295776, "learning_rate": 1.3137408000272188e-07, "loss": 1.196, "step": 10846 }, { "epoch": 1.953974330105832, "grad_norm": 1.4840922355651855, "learning_rate": 1.303501699282228e-07, "loss": 1.1492, "step": 10847 }, { "epoch": 1.9541544697140285, "grad_norm": 1.5634247064590454, "learning_rate": 1.2933026037274532e-07, "loss": 1.3837, "step": 10848 }, { "epoch": 1.9543346093222247, "grad_norm": 1.4500490427017212, "learning_rate": 1.2831435141810179e-07, "loss": 1.1717, "step": 10849 }, { "epoch": 1.954514748930421, "grad_norm": 1.3409775495529175, "learning_rate": 1.2730244314579364e-07, "loss": 1.0463, "step": 10850 }, { "epoch": 1.9546948885386173, "grad_norm": 1.3928147554397583, "learning_rate": 1.2629453563698933e-07, "loss": 0.9688, "step": 10851 }, { "epoch": 1.9548750281468137, "grad_norm": 1.4606311321258545, "learning_rate": 1.2529062897254084e-07, "loss": 1.9747, "step": 10852 }, { "epoch": 1.95505516775501, "grad_norm": 1.2397841215133667, "learning_rate": 1.242907232329893e-07, "loss": 1.5706, "step": 10853 }, { "epoch": 1.9552353073632065, "grad_norm": 1.363538146018982, "learning_rate": 1.2329481849854273e-07, "loss": 1.7423, "step": 10854 }, { "epoch": 1.955415446971403, "grad_norm": 1.4047261476516724, "learning_rate": 1.2230291484908728e-07, "loss": 1.8596, "step": 10855 }, { "epoch": 1.9555955865795993, "grad_norm": 1.5757447481155396, "learning_rate": 1.213150123641982e-07, "loss": 1.9238, "step": 10856 }, { "epoch": 1.9557757261877955, "grad_norm": 1.3981974124908447, "learning_rate": 1.2033111112312868e-07, "loss": 1.5019, "step": 10857 }, { "epoch": 1.955955865795992, "grad_norm": 1.4073365926742554, "learning_rate": 1.1935121120479898e-07, "loss": 1.6148, "step": 10858 }, { "epoch": 1.956136005404188, "grad_norm": 1.6653339862823486, "learning_rate": 1.1837531268781843e-07, "loss": 1.8307, "step": 10859 }, { "epoch": 1.9563161450123845, "grad_norm": 1.809057354927063, "learning_rate": 1.1740341565047442e-07, "loss": 1.9895, "step": 10860 }, { "epoch": 1.956496284620581, "grad_norm": 1.9179248809814453, "learning_rate": 1.1643552017073233e-07, "loss": 2.0714, "step": 10861 }, { "epoch": 1.9566764242287773, "grad_norm": 1.498245120048523, "learning_rate": 1.1547162632623565e-07, "loss": 1.562, "step": 10862 }, { "epoch": 1.9568565638369737, "grad_norm": 1.3244699239730835, "learning_rate": 1.1451173419430583e-07, "loss": 1.4839, "step": 10863 }, { "epoch": 1.9570367034451701, "grad_norm": 1.4634143114089966, "learning_rate": 1.1355584385194795e-07, "loss": 1.4893, "step": 10864 }, { "epoch": 1.9572168430533665, "grad_norm": 1.5573850870132446, "learning_rate": 1.1260395537583956e-07, "loss": 1.7772, "step": 10865 }, { "epoch": 1.9573969826615627, "grad_norm": 1.4590612649917603, "learning_rate": 1.1165606884234181e-07, "loss": 1.3446, "step": 10866 }, { "epoch": 1.957577122269759, "grad_norm": 1.2843468189239502, "learning_rate": 1.1071218432749941e-07, "loss": 1.2392, "step": 10867 }, { "epoch": 1.9577572618779553, "grad_norm": 1.4397944211959839, "learning_rate": 1.0977230190701848e-07, "loss": 1.4551, "step": 10868 }, { "epoch": 1.9579374014861517, "grad_norm": 1.4734441041946411, "learning_rate": 1.0883642165630536e-07, "loss": 1.2826, "step": 10869 }, { "epoch": 1.958117541094348, "grad_norm": 1.4606026411056519, "learning_rate": 1.079045436504389e-07, "loss": 1.3579, "step": 10870 }, { "epoch": 1.9582976807025445, "grad_norm": 1.4172842502593994, "learning_rate": 1.0697666796415929e-07, "loss": 1.3519, "step": 10871 }, { "epoch": 1.958477820310741, "grad_norm": 1.3345050811767578, "learning_rate": 1.0605279467191254e-07, "loss": 1.3285, "step": 10872 }, { "epoch": 1.9586579599189373, "grad_norm": 1.2070847749710083, "learning_rate": 1.0513292384781159e-07, "loss": 1.0915, "step": 10873 }, { "epoch": 1.9588380995271335, "grad_norm": 1.311422348022461, "learning_rate": 1.0421705556564188e-07, "loss": 1.2526, "step": 10874 }, { "epoch": 1.95901823913533, "grad_norm": 1.4291080236434937, "learning_rate": 1.0330518989887794e-07, "loss": 1.2534, "step": 10875 }, { "epoch": 1.959198378743526, "grad_norm": 1.427431344985962, "learning_rate": 1.0239732692066684e-07, "loss": 1.3307, "step": 10876 }, { "epoch": 1.9593785183517225, "grad_norm": 1.4616867303848267, "learning_rate": 1.0149346670383919e-07, "loss": 1.2988, "step": 10877 }, { "epoch": 1.959558657959919, "grad_norm": 1.517516851425171, "learning_rate": 1.0059360932089812e-07, "loss": 1.5038, "step": 10878 }, { "epoch": 1.9597387975681153, "grad_norm": 1.4344525337219238, "learning_rate": 9.969775484403587e-08, "loss": 1.4157, "step": 10879 }, { "epoch": 1.9599189371763117, "grad_norm": 1.4902291297912598, "learning_rate": 9.880590334511163e-08, "loss": 1.3343, "step": 10880 }, { "epoch": 1.9600990767845081, "grad_norm": 1.385625958442688, "learning_rate": 9.791805489567374e-08, "loss": 1.3547, "step": 10881 }, { "epoch": 1.9602792163927043, "grad_norm": 1.5179256200790405, "learning_rate": 9.703420956694299e-08, "loss": 1.4067, "step": 10882 }, { "epoch": 1.9604593560009007, "grad_norm": 1.357257604598999, "learning_rate": 9.615436742981821e-08, "loss": 1.1885, "step": 10883 }, { "epoch": 1.960639495609097, "grad_norm": 1.3937796354293823, "learning_rate": 9.527852855488739e-08, "loss": 1.2516, "step": 10884 }, { "epoch": 1.9608196352172933, "grad_norm": 1.3831685781478882, "learning_rate": 9.44066930123999e-08, "loss": 1.0506, "step": 10885 }, { "epoch": 1.9609997748254897, "grad_norm": 1.5415778160095215, "learning_rate": 9.353886087230535e-08, "loss": 1.5944, "step": 10886 }, { "epoch": 1.9611799144336861, "grad_norm": 1.6939843893051147, "learning_rate": 9.267503220420915e-08, "loss": 1.4236, "step": 10887 }, { "epoch": 1.9613600540418825, "grad_norm": 1.6533327102661133, "learning_rate": 9.181520707741697e-08, "loss": 1.6249, "step": 10888 }, { "epoch": 1.961540193650079, "grad_norm": 1.6216720342636108, "learning_rate": 9.09593855608959e-08, "loss": 1.5197, "step": 10889 }, { "epoch": 1.9617203332582753, "grad_norm": 1.5947545766830444, "learning_rate": 9.010756772330764e-08, "loss": 1.4806, "step": 10890 }, { "epoch": 1.9619004728664715, "grad_norm": 1.623610258102417, "learning_rate": 8.925975363297534e-08, "loss": 1.3992, "step": 10891 }, { "epoch": 1.962080612474668, "grad_norm": 1.6965842247009277, "learning_rate": 8.841594335791681e-08, "loss": 1.4058, "step": 10892 }, { "epoch": 1.962260752082864, "grad_norm": 1.4621773958206177, "learning_rate": 8.757613696582234e-08, "loss": 1.2826, "step": 10893 }, { "epoch": 1.9624408916910605, "grad_norm": 1.6303375959396362, "learning_rate": 8.674033452405472e-08, "loss": 1.371, "step": 10894 }, { "epoch": 1.962621031299257, "grad_norm": 1.5197093486785889, "learning_rate": 8.590853609966587e-08, "loss": 1.3054, "step": 10895 }, { "epoch": 1.9628011709074533, "grad_norm": 1.6302917003631592, "learning_rate": 8.508074175938574e-08, "loss": 1.4609, "step": 10896 }, { "epoch": 1.9629813105156497, "grad_norm": 1.446485996246338, "learning_rate": 8.425695156961677e-08, "loss": 1.1313, "step": 10897 }, { "epoch": 1.9631614501238461, "grad_norm": 1.534677505493164, "learning_rate": 8.343716559643944e-08, "loss": 1.2485, "step": 10898 }, { "epoch": 1.9633415897320423, "grad_norm": 1.6634607315063477, "learning_rate": 8.26213839056178e-08, "loss": 1.3829, "step": 10899 }, { "epoch": 1.9635217293402387, "grad_norm": 1.5477266311645508, "learning_rate": 8.180960656259395e-08, "loss": 1.4037, "step": 10900 }, { "epoch": 1.963701868948435, "grad_norm": 1.4062843322753906, "learning_rate": 8.100183363249359e-08, "loss": 1.2043, "step": 10901 }, { "epoch": 1.9638820085566313, "grad_norm": 1.351867914199829, "learning_rate": 8.01980651801093e-08, "loss": 1.5283, "step": 10902 }, { "epoch": 1.9640621481648277, "grad_norm": 1.2529815435409546, "learning_rate": 7.939830126992286e-08, "loss": 1.6594, "step": 10903 }, { "epoch": 1.9642422877730241, "grad_norm": 1.3346374034881592, "learning_rate": 7.860254196608296e-08, "loss": 1.7968, "step": 10904 }, { "epoch": 1.9644224273812205, "grad_norm": 1.3766579627990723, "learning_rate": 7.781078733243852e-08, "loss": 1.7012, "step": 10905 }, { "epoch": 1.964602566989417, "grad_norm": 1.4812413454055786, "learning_rate": 7.702303743249428e-08, "loss": 1.7673, "step": 10906 }, { "epoch": 1.9647827065976131, "grad_norm": 1.4670212268829346, "learning_rate": 7.623929232944416e-08, "loss": 1.6641, "step": 10907 }, { "epoch": 1.9649628462058095, "grad_norm": 1.50018310546875, "learning_rate": 7.545955208616006e-08, "loss": 1.7005, "step": 10908 }, { "epoch": 1.9651429858140057, "grad_norm": 1.6693171262741089, "learning_rate": 7.468381676519198e-08, "loss": 1.7138, "step": 10909 }, { "epoch": 1.965323125422202, "grad_norm": 1.7613017559051514, "learning_rate": 7.391208642877345e-08, "loss": 1.9698, "step": 10910 }, { "epoch": 1.9655032650303985, "grad_norm": 1.6921327114105225, "learning_rate": 7.314436113881051e-08, "loss": 1.7468, "step": 10911 }, { "epoch": 1.965683404638595, "grad_norm": 1.4408345222473145, "learning_rate": 7.238064095688724e-08, "loss": 1.452, "step": 10912 }, { "epoch": 1.9658635442467913, "grad_norm": 1.486197590827942, "learning_rate": 7.162092594427128e-08, "loss": 1.3393, "step": 10913 }, { "epoch": 1.9660436838549877, "grad_norm": 1.571014165878296, "learning_rate": 7.086521616190279e-08, "loss": 1.5181, "step": 10914 }, { "epoch": 1.966223823463184, "grad_norm": 1.4618765115737915, "learning_rate": 7.011351167041103e-08, "loss": 1.4172, "step": 10915 }, { "epoch": 1.9664039630713803, "grad_norm": 1.2717589139938354, "learning_rate": 6.936581253008667e-08, "loss": 1.3009, "step": 10916 }, { "epoch": 1.9665841026795765, "grad_norm": 1.3492579460144043, "learning_rate": 6.862211880092061e-08, "loss": 1.2506, "step": 10917 }, { "epoch": 1.966764242287773, "grad_norm": 1.5550481081008911, "learning_rate": 6.788243054257071e-08, "loss": 1.5462, "step": 10918 }, { "epoch": 1.9669443818959693, "grad_norm": 1.3130022287368774, "learning_rate": 6.71467478143617e-08, "loss": 1.0724, "step": 10919 }, { "epoch": 1.9671245215041657, "grad_norm": 1.4852937459945679, "learning_rate": 6.64150706753297e-08, "loss": 1.5518, "step": 10920 }, { "epoch": 1.9673046611123621, "grad_norm": 1.4843921661376953, "learning_rate": 6.568739918415001e-08, "loss": 1.3119, "step": 10921 }, { "epoch": 1.9674848007205585, "grad_norm": 1.416107177734375, "learning_rate": 6.49637333992148e-08, "loss": 1.4588, "step": 10922 }, { "epoch": 1.967664940328755, "grad_norm": 1.555823802947998, "learning_rate": 6.424407337856097e-08, "loss": 1.6191, "step": 10923 }, { "epoch": 1.9678450799369511, "grad_norm": 1.5154531002044678, "learning_rate": 6.352841917992014e-08, "loss": 1.619, "step": 10924 }, { "epoch": 1.9680252195451475, "grad_norm": 1.5035817623138428, "learning_rate": 6.281677086071303e-08, "loss": 1.4539, "step": 10925 }, { "epoch": 1.9682053591533437, "grad_norm": 1.5245845317840576, "learning_rate": 6.210912847802175e-08, "loss": 1.3894, "step": 10926 }, { "epoch": 1.9683854987615401, "grad_norm": 1.4736533164978027, "learning_rate": 6.140549208860646e-08, "loss": 1.4583, "step": 10927 }, { "epoch": 1.9685656383697365, "grad_norm": 1.3531192541122437, "learning_rate": 6.070586174892201e-08, "loss": 1.2688, "step": 10928 }, { "epoch": 1.968745777977933, "grad_norm": 1.503147840499878, "learning_rate": 6.001023751509016e-08, "loss": 1.2753, "step": 10929 }, { "epoch": 1.9689259175861293, "grad_norm": 1.4450464248657227, "learning_rate": 5.9318619442905175e-08, "loss": 1.2475, "step": 10930 }, { "epoch": 1.9691060571943257, "grad_norm": 1.4868841171264648, "learning_rate": 5.8631007587861554e-08, "loss": 1.4115, "step": 10931 }, { "epoch": 1.969286196802522, "grad_norm": 1.5354737043380737, "learning_rate": 5.794740200511517e-08, "loss": 1.6124, "step": 10932 }, { "epoch": 1.9694663364107183, "grad_norm": 1.5318485498428345, "learning_rate": 5.7267802749494394e-08, "loss": 1.4552, "step": 10933 }, { "epoch": 1.9696464760189145, "grad_norm": 1.6651866436004639, "learning_rate": 5.6592209875533374e-08, "loss": 1.3626, "step": 10934 }, { "epoch": 1.969826615627111, "grad_norm": 1.391055703163147, "learning_rate": 5.5920623437411e-08, "loss": 1.2536, "step": 10935 }, { "epoch": 1.9700067552353073, "grad_norm": 1.3962472677230835, "learning_rate": 5.525304348901195e-08, "loss": 1.2328, "step": 10936 }, { "epoch": 1.9701868948435037, "grad_norm": 1.5940313339233398, "learning_rate": 5.458947008388782e-08, "loss": 1.4838, "step": 10937 }, { "epoch": 1.9703670344517001, "grad_norm": 1.5195205211639404, "learning_rate": 5.392990327526826e-08, "loss": 1.4395, "step": 10938 }, { "epoch": 1.9705471740598965, "grad_norm": 1.5199916362762451, "learning_rate": 5.327434311606094e-08, "loss": 1.3739, "step": 10939 }, { "epoch": 1.9707273136680927, "grad_norm": 1.5955090522766113, "learning_rate": 5.2622789658862695e-08, "loss": 1.5875, "step": 10940 }, { "epoch": 1.9709074532762891, "grad_norm": 1.6120820045471191, "learning_rate": 5.19752429559317e-08, "loss": 1.3275, "step": 10941 }, { "epoch": 1.9710875928844853, "grad_norm": 1.4519851207733154, "learning_rate": 5.133170305922086e-08, "loss": 1.352, "step": 10942 }, { "epoch": 1.9712677324926817, "grad_norm": 1.3888740539550781, "learning_rate": 5.069217002034998e-08, "loss": 1.1401, "step": 10943 }, { "epoch": 1.9714478721008781, "grad_norm": 1.6639899015426636, "learning_rate": 5.005664389062248e-08, "loss": 1.399, "step": 10944 }, { "epoch": 1.9716280117090745, "grad_norm": 1.5957750082015991, "learning_rate": 4.942512472102534e-08, "loss": 1.6113, "step": 10945 }, { "epoch": 1.971808151317271, "grad_norm": 1.5716404914855957, "learning_rate": 4.8797612562212494e-08, "loss": 1.3791, "step": 10946 }, { "epoch": 1.9719882909254673, "grad_norm": 1.6756418943405151, "learning_rate": 4.8174107464527e-08, "loss": 1.4122, "step": 10947 }, { "epoch": 1.9721684305336638, "grad_norm": 1.5390774011611938, "learning_rate": 4.755460947797885e-08, "loss": 1.255, "step": 10948 }, { "epoch": 1.97234857014186, "grad_norm": 1.6439656019210815, "learning_rate": 4.693911865227274e-08, "loss": 1.3975, "step": 10949 }, { "epoch": 1.9725287097500563, "grad_norm": 1.3778516054153442, "learning_rate": 4.6327635036780284e-08, "loss": 1.0747, "step": 10950 }, { "epoch": 1.9727088493582525, "grad_norm": 1.6551183462142944, "learning_rate": 4.572015868055113e-08, "loss": 1.1972, "step": 10951 }, { "epoch": 1.972888988966449, "grad_norm": 1.451918125152588, "learning_rate": 4.511668963231852e-08, "loss": 1.7403, "step": 10952 }, { "epoch": 1.9730691285746453, "grad_norm": 1.4560006856918335, "learning_rate": 4.451722794049373e-08, "loss": 2.1494, "step": 10953 }, { "epoch": 1.9732492681828417, "grad_norm": 1.3630493879318237, "learning_rate": 4.3921773653160524e-08, "loss": 1.7216, "step": 10954 }, { "epoch": 1.9734294077910381, "grad_norm": 1.3320837020874023, "learning_rate": 4.3330326818097345e-08, "loss": 1.5358, "step": 10955 }, { "epoch": 1.9736095473992346, "grad_norm": 1.4620627164840698, "learning_rate": 4.274288748273847e-08, "loss": 1.7386, "step": 10956 }, { "epoch": 1.9737896870074307, "grad_norm": 1.380967378616333, "learning_rate": 4.215945569421287e-08, "loss": 1.4616, "step": 10957 }, { "epoch": 1.9739698266156271, "grad_norm": 1.469442367553711, "learning_rate": 4.158003149932199e-08, "loss": 1.419, "step": 10958 }, { "epoch": 1.9741499662238233, "grad_norm": 1.909218668937683, "learning_rate": 4.100461494454533e-08, "loss": 1.896, "step": 10959 }, { "epoch": 1.9743301058320197, "grad_norm": 1.758531093597412, "learning_rate": 4.043320607604595e-08, "loss": 1.667, "step": 10960 }, { "epoch": 1.9745102454402161, "grad_norm": 2.0069632530212402, "learning_rate": 3.986580493965941e-08, "loss": 1.969, "step": 10961 }, { "epoch": 1.9746903850484125, "grad_norm": 1.6590137481689453, "learning_rate": 3.930241158090486e-08, "loss": 1.7043, "step": 10962 }, { "epoch": 1.974870524656609, "grad_norm": 1.4012058973312378, "learning_rate": 3.874302604497393e-08, "loss": 1.3647, "step": 10963 }, { "epoch": 1.9750506642648054, "grad_norm": 1.4960945844650269, "learning_rate": 3.818764837674182e-08, "loss": 1.6016, "step": 10964 }, { "epoch": 1.9752308038730015, "grad_norm": 1.4604511260986328, "learning_rate": 3.76362786207618e-08, "loss": 1.5025, "step": 10965 }, { "epoch": 1.975410943481198, "grad_norm": 1.5214426517486572, "learning_rate": 3.708891682126514e-08, "loss": 1.4757, "step": 10966 }, { "epoch": 1.9755910830893941, "grad_norm": 1.4050499200820923, "learning_rate": 3.6545563022155614e-08, "loss": 1.3559, "step": 10967 }, { "epoch": 1.9757712226975905, "grad_norm": 1.327239751815796, "learning_rate": 3.600621726703168e-08, "loss": 1.4349, "step": 10968 }, { "epoch": 1.975951362305787, "grad_norm": 1.399746060371399, "learning_rate": 3.5470879599147635e-08, "loss": 1.2301, "step": 10969 }, { "epoch": 1.9761315019139833, "grad_norm": 1.3997142314910889, "learning_rate": 3.493955006145244e-08, "loss": 1.3044, "step": 10970 }, { "epoch": 1.9763116415221798, "grad_norm": 1.5404049158096313, "learning_rate": 3.4412228696567573e-08, "loss": 1.5043, "step": 10971 }, { "epoch": 1.9764917811303762, "grad_norm": 1.6258344650268555, "learning_rate": 3.3888915546803624e-08, "loss": 1.6682, "step": 10972 }, { "epoch": 1.9766719207385723, "grad_norm": 1.5348584651947021, "learning_rate": 3.3369610654127025e-08, "loss": 1.4256, "step": 10973 }, { "epoch": 1.9768520603467687, "grad_norm": 1.3409672975540161, "learning_rate": 3.285431406020445e-08, "loss": 1.4082, "step": 10974 }, { "epoch": 1.9770321999549652, "grad_norm": 1.2668845653533936, "learning_rate": 3.23430258063695e-08, "loss": 1.2347, "step": 10975 }, { "epoch": 1.9772123395631613, "grad_norm": 1.4846363067626953, "learning_rate": 3.183574593363936e-08, "loss": 1.3976, "step": 10976 }, { "epoch": 1.9773924791713577, "grad_norm": 1.5372200012207031, "learning_rate": 3.1332474482709265e-08, "loss": 1.4191, "step": 10977 }, { "epoch": 1.9775726187795541, "grad_norm": 1.5466917753219604, "learning_rate": 3.083321149394691e-08, "loss": 1.3708, "step": 10978 }, { "epoch": 1.9777527583877506, "grad_norm": 1.4514127969741821, "learning_rate": 3.0337957007403604e-08, "loss": 1.3012, "step": 10979 }, { "epoch": 1.977932897995947, "grad_norm": 1.435525894165039, "learning_rate": 2.984671106281422e-08, "loss": 1.3044, "step": 10980 }, { "epoch": 1.9781130376041434, "grad_norm": 1.5968031883239746, "learning_rate": 2.9359473699580585e-08, "loss": 1.5017, "step": 10981 }, { "epoch": 1.9782931772123395, "grad_norm": 1.5171699523925781, "learning_rate": 2.887624495678809e-08, "loss": 1.469, "step": 10982 }, { "epoch": 1.978473316820536, "grad_norm": 1.4652512073516846, "learning_rate": 2.8397024873205723e-08, "loss": 1.3148, "step": 10983 }, { "epoch": 1.9786534564287321, "grad_norm": 1.6118950843811035, "learning_rate": 2.792181348726941e-08, "loss": 1.3349, "step": 10984 }, { "epoch": 1.9788335960369285, "grad_norm": 1.5601909160614014, "learning_rate": 2.7450610837109758e-08, "loss": 1.4196, "step": 10985 }, { "epoch": 1.979013735645125, "grad_norm": 1.4142730236053467, "learning_rate": 2.698341696051876e-08, "loss": 1.249, "step": 10986 }, { "epoch": 1.9791938752533214, "grad_norm": 1.5420595407485962, "learning_rate": 2.6520231894977543e-08, "loss": 1.3778, "step": 10987 }, { "epoch": 1.9793740148615178, "grad_norm": 1.5133724212646484, "learning_rate": 2.6061055677639724e-08, "loss": 1.4749, "step": 10988 }, { "epoch": 1.9795541544697142, "grad_norm": 1.6216070652008057, "learning_rate": 2.560588834534805e-08, "loss": 1.482, "step": 10989 }, { "epoch": 1.9797342940779104, "grad_norm": 1.5707279443740845, "learning_rate": 2.5154729934606657e-08, "loss": 1.3134, "step": 10990 }, { "epoch": 1.9799144336861068, "grad_norm": 1.6374320983886719, "learning_rate": 2.4707580481608816e-08, "loss": 1.406, "step": 10991 }, { "epoch": 1.980094573294303, "grad_norm": 1.7445958852767944, "learning_rate": 2.426444002223138e-08, "loss": 1.5866, "step": 10992 }, { "epoch": 1.9802747129024993, "grad_norm": 1.592361569404602, "learning_rate": 2.3825308592018146e-08, "loss": 1.4782, "step": 10993 }, { "epoch": 1.9804548525106958, "grad_norm": 1.3805537223815918, "learning_rate": 2.3390186226190936e-08, "loss": 1.1608, "step": 10994 }, { "epoch": 1.9806349921188922, "grad_norm": 1.4970101118087769, "learning_rate": 2.295907295966626e-08, "loss": 1.2956, "step": 10995 }, { "epoch": 1.9808151317270886, "grad_norm": 1.5703026056289673, "learning_rate": 2.253196882701647e-08, "loss": 1.453, "step": 10996 }, { "epoch": 1.980995271335285, "grad_norm": 1.7089860439300537, "learning_rate": 2.2108873862514144e-08, "loss": 1.6542, "step": 10997 }, { "epoch": 1.9811754109434812, "grad_norm": 1.4807592630386353, "learning_rate": 2.1689788100093254e-08, "loss": 1.3048, "step": 10998 }, { "epoch": 1.9813555505516776, "grad_norm": 1.4494138956069946, "learning_rate": 2.1274711573371353e-08, "loss": 1.1937, "step": 10999 }, { "epoch": 1.9815356901598737, "grad_norm": 1.5827856063842773, "learning_rate": 2.0863644315649577e-08, "loss": 1.3806, "step": 11000 }, { "epoch": 1.9817158297680701, "grad_norm": 1.4371733665466309, "learning_rate": 2.0456586359901553e-08, "loss": 1.2542, "step": 11001 }, { "epoch": 1.9818959693762666, "grad_norm": 1.4399656057357788, "learning_rate": 2.005353773878449e-08, "loss": 1.9351, "step": 11002 }, { "epoch": 1.982076108984463, "grad_norm": 1.3993446826934814, "learning_rate": 1.9654498484628083e-08, "loss": 2.0298, "step": 11003 }, { "epoch": 1.9822562485926594, "grad_norm": 1.2894805669784546, "learning_rate": 1.9259468629440057e-08, "loss": 1.8392, "step": 11004 }, { "epoch": 1.9824363882008558, "grad_norm": 1.357730507850647, "learning_rate": 1.886844820491729e-08, "loss": 1.6466, "step": 11005 }, { "epoch": 1.9826165278090522, "grad_norm": 1.3873244524002075, "learning_rate": 1.848143724241802e-08, "loss": 1.6024, "step": 11006 }, { "epoch": 1.9827966674172484, "grad_norm": 1.4892712831497192, "learning_rate": 1.809843577299519e-08, "loss": 1.8916, "step": 11007 }, { "epoch": 1.9829768070254448, "grad_norm": 1.4920061826705933, "learning_rate": 1.7719443827368677e-08, "loss": 1.6953, "step": 11008 }, { "epoch": 1.983156946633641, "grad_norm": 1.6392779350280762, "learning_rate": 1.7344461435947477e-08, "loss": 1.9488, "step": 11009 }, { "epoch": 1.9833370862418374, "grad_norm": 1.813663125038147, "learning_rate": 1.697348862880199e-08, "loss": 2.1107, "step": 11010 }, { "epoch": 1.9835172258500338, "grad_norm": 1.9045401811599731, "learning_rate": 1.6606525435702848e-08, "loss": 1.8947, "step": 11011 }, { "epoch": 1.9836973654582302, "grad_norm": 1.6111780405044556, "learning_rate": 1.6243571886082055e-08, "loss": 1.5044, "step": 11012 }, { "epoch": 1.9838775050664266, "grad_norm": 1.3917593955993652, "learning_rate": 1.5884628009049664e-08, "loss": 1.4241, "step": 11013 }, { "epoch": 1.984057644674623, "grad_norm": 1.5415117740631104, "learning_rate": 1.552969383341041e-08, "loss": 1.5877, "step": 11014 }, { "epoch": 1.9842377842828192, "grad_norm": 1.4373112916946411, "learning_rate": 1.517876938763041e-08, "loss": 1.4897, "step": 11015 }, { "epoch": 1.9844179238910156, "grad_norm": 1.382867693901062, "learning_rate": 1.4831854699864921e-08, "loss": 1.3131, "step": 11016 }, { "epoch": 1.9845980634992118, "grad_norm": 1.376491904258728, "learning_rate": 1.4488949797941687e-08, "loss": 1.2922, "step": 11017 }, { "epoch": 1.9847782031074082, "grad_norm": 1.4327082633972168, "learning_rate": 1.4150054709366479e-08, "loss": 1.3557, "step": 11018 }, { "epoch": 1.9849583427156046, "grad_norm": 1.3487615585327148, "learning_rate": 1.3815169461323107e-08, "loss": 1.2362, "step": 11019 }, { "epoch": 1.985138482323801, "grad_norm": 1.5588520765304565, "learning_rate": 1.3484294080684523e-08, "loss": 1.5735, "step": 11020 }, { "epoch": 1.9853186219319974, "grad_norm": 1.3944504261016846, "learning_rate": 1.3157428593990607e-08, "loss": 1.3834, "step": 11021 }, { "epoch": 1.9854987615401938, "grad_norm": 1.5337153673171997, "learning_rate": 1.2834573027453723e-08, "loss": 1.2569, "step": 11022 }, { "epoch": 1.98567890114839, "grad_norm": 1.44857919216156, "learning_rate": 1.2515727406980926e-08, "loss": 1.3367, "step": 11023 }, { "epoch": 1.9858590407565864, "grad_norm": 1.6034716367721558, "learning_rate": 1.2200891758151756e-08, "loss": 1.6182, "step": 11024 }, { "epoch": 1.9860391803647826, "grad_norm": 1.4651128053665161, "learning_rate": 1.1890066106212683e-08, "loss": 1.3752, "step": 11025 }, { "epoch": 1.986219319972979, "grad_norm": 1.527335286140442, "learning_rate": 1.1583250476110419e-08, "loss": 1.4843, "step": 11026 }, { "epoch": 1.9863994595811754, "grad_norm": 1.7838940620422363, "learning_rate": 1.1280444892447505e-08, "loss": 1.4359, "step": 11027 }, { "epoch": 1.9865795991893718, "grad_norm": 1.3547061681747437, "learning_rate": 1.0981649379521175e-08, "loss": 1.1103, "step": 11028 }, { "epoch": 1.9867597387975682, "grad_norm": 1.4999009370803833, "learning_rate": 1.068686396129559e-08, "loss": 1.4789, "step": 11029 }, { "epoch": 1.9869398784057646, "grad_norm": 1.4078631401062012, "learning_rate": 1.0396088661424053e-08, "loss": 1.2074, "step": 11030 }, { "epoch": 1.9871200180139608, "grad_norm": 1.4278786182403564, "learning_rate": 1.0109323503226797e-08, "loss": 1.2206, "step": 11031 }, { "epoch": 1.9873001576221572, "grad_norm": 1.4073715209960938, "learning_rate": 9.826568509713197e-09, "loss": 1.2757, "step": 11032 }, { "epoch": 1.9874802972303536, "grad_norm": 1.5547624826431274, "learning_rate": 9.547823703559556e-09, "loss": 1.3254, "step": 11033 }, { "epoch": 1.9876604368385498, "grad_norm": 1.6270281076431274, "learning_rate": 9.273089107131316e-09, "loss": 1.6017, "step": 11034 }, { "epoch": 1.9878405764467462, "grad_norm": 1.5338398218154907, "learning_rate": 9.002364742466408e-09, "loss": 1.4097, "step": 11035 }, { "epoch": 1.9880207160549426, "grad_norm": 1.7316972017288208, "learning_rate": 8.73565063128079e-09, "loss": 1.5932, "step": 11036 }, { "epoch": 1.988200855663139, "grad_norm": 1.4048669338226318, "learning_rate": 8.472946794974013e-09, "loss": 1.1715, "step": 11037 }, { "epoch": 1.9883809952713354, "grad_norm": 1.526852011680603, "learning_rate": 8.21425325461811e-09, "loss": 1.2814, "step": 11038 }, { "epoch": 1.9885611348795318, "grad_norm": 1.5486758947372437, "learning_rate": 7.959570030963149e-09, "loss": 1.3398, "step": 11039 }, { "epoch": 1.988741274487728, "grad_norm": 1.6917542219161987, "learning_rate": 7.708897144437233e-09, "loss": 1.7221, "step": 11040 }, { "epoch": 1.9889214140959244, "grad_norm": 1.8022210597991943, "learning_rate": 7.462234615157605e-09, "loss": 1.6295, "step": 11041 }, { "epoch": 1.9891015537041206, "grad_norm": 1.5328019857406616, "learning_rate": 7.219582462908437e-09, "loss": 1.3111, "step": 11042 }, { "epoch": 1.989281693312317, "grad_norm": 1.5486127138137817, "learning_rate": 6.980940707146389e-09, "loss": 1.4873, "step": 11043 }, { "epoch": 1.9894618329205134, "grad_norm": 1.4653786420822144, "learning_rate": 6.746309367028358e-09, "loss": 1.1981, "step": 11044 }, { "epoch": 1.9896419725287098, "grad_norm": 1.4437733888626099, "learning_rate": 6.515688461372627e-09, "loss": 1.2348, "step": 11045 }, { "epoch": 1.9898221121369062, "grad_norm": 1.507846474647522, "learning_rate": 6.2890780086755085e-09, "loss": 1.2631, "step": 11046 }, { "epoch": 1.9900022517451026, "grad_norm": 1.671010971069336, "learning_rate": 6.066478027116907e-09, "loss": 1.5826, "step": 11047 }, { "epoch": 1.9901823913532988, "grad_norm": 1.4583747386932373, "learning_rate": 5.847888534549206e-09, "loss": 1.0864, "step": 11048 }, { "epoch": 1.9903625309614952, "grad_norm": 1.508095145225525, "learning_rate": 5.633309548519483e-09, "loss": 1.2126, "step": 11049 }, { "epoch": 1.9905426705696914, "grad_norm": 1.6497952938079834, "learning_rate": 5.4227410862306425e-09, "loss": 1.2905, "step": 11050 }, { "epoch": 1.9907228101778878, "grad_norm": 1.4284565448760986, "learning_rate": 5.216183164580279e-09, "loss": 1.0046, "step": 11051 }, { "epoch": 1.9909029497860842, "grad_norm": 1.3137928247451782, "learning_rate": 5.013635800138472e-09, "loss": 1.8234, "step": 11052 }, { "epoch": 1.9910830893942806, "grad_norm": 1.2813129425048828, "learning_rate": 4.8150990091477835e-09, "loss": 1.7937, "step": 11053 }, { "epoch": 1.991263229002477, "grad_norm": 1.3043104410171509, "learning_rate": 4.620572807534362e-09, "loss": 1.6349, "step": 11054 }, { "epoch": 1.9914433686106734, "grad_norm": 1.4180114269256592, "learning_rate": 4.4300572109134966e-09, "loss": 1.9602, "step": 11055 }, { "epoch": 1.9916235082188696, "grad_norm": 1.4721637964248657, "learning_rate": 4.243552234556302e-09, "loss": 1.8934, "step": 11056 }, { "epoch": 1.991803647827066, "grad_norm": 1.333228349685669, "learning_rate": 4.061057893434139e-09, "loss": 1.5447, "step": 11057 }, { "epoch": 1.9919837874352622, "grad_norm": 1.3397862911224365, "learning_rate": 3.882574202174194e-09, "loss": 1.5206, "step": 11058 }, { "epoch": 1.9921639270434586, "grad_norm": 1.6484938859939575, "learning_rate": 3.7081011751094463e-09, "loss": 1.7874, "step": 11059 }, { "epoch": 1.992344066651655, "grad_norm": 1.6707037687301636, "learning_rate": 3.5376388262287063e-09, "loss": 1.5398, "step": 11060 }, { "epoch": 1.9925242062598514, "grad_norm": 1.6993014812469482, "learning_rate": 3.3711871691988196e-09, "loss": 1.7502, "step": 11061 }, { "epoch": 1.9927043458680478, "grad_norm": 1.7855199575424194, "learning_rate": 3.208746217386871e-09, "loss": 1.5786, "step": 11062 }, { "epoch": 1.9928844854762442, "grad_norm": 1.5815858840942383, "learning_rate": 3.050315983815777e-09, "loss": 1.5104, "step": 11063 }, { "epoch": 1.9930646250844406, "grad_norm": 1.4299613237380981, "learning_rate": 2.8958964811920396e-09, "loss": 1.3902, "step": 11064 }, { "epoch": 1.9932447646926368, "grad_norm": 1.2640552520751953, "learning_rate": 2.7454877219112996e-09, "loss": 1.1825, "step": 11065 }, { "epoch": 1.9934249043008332, "grad_norm": 1.384771704673767, "learning_rate": 2.5990897180305785e-09, "loss": 1.3792, "step": 11066 }, { "epoch": 1.9936050439090294, "grad_norm": 1.379815697669983, "learning_rate": 2.456702481301587e-09, "loss": 1.2922, "step": 11067 }, { "epoch": 1.9937851835172258, "grad_norm": 1.4666544198989868, "learning_rate": 2.318326023137418e-09, "loss": 1.535, "step": 11068 }, { "epoch": 1.9939653231254222, "grad_norm": 1.35880446434021, "learning_rate": 2.183960354651404e-09, "loss": 1.2615, "step": 11069 }, { "epoch": 1.9941454627336186, "grad_norm": 1.5199602842330933, "learning_rate": 2.0536054866071573e-09, "loss": 1.5071, "step": 11070 }, { "epoch": 1.994325602341815, "grad_norm": 1.4640576839447021, "learning_rate": 1.927261429474081e-09, "loss": 1.3601, "step": 11071 }, { "epoch": 1.9945057419500114, "grad_norm": 1.38802170753479, "learning_rate": 1.8049281933829597e-09, "loss": 1.415, "step": 11072 }, { "epoch": 1.9946858815582076, "grad_norm": 1.406002163887024, "learning_rate": 1.6866057881481656e-09, "loss": 1.2901, "step": 11073 }, { "epoch": 1.994866021166404, "grad_norm": 1.3924516439437866, "learning_rate": 1.5722942232621052e-09, "loss": 1.446, "step": 11074 }, { "epoch": 1.9950461607746002, "grad_norm": 1.330531120300293, "learning_rate": 1.4619935078896697e-09, "loss": 1.1766, "step": 11075 }, { "epoch": 1.9952263003827966, "grad_norm": 1.377027153968811, "learning_rate": 1.3557036508793363e-09, "loss": 1.1435, "step": 11076 }, { "epoch": 1.995406439990993, "grad_norm": 1.4500690698623657, "learning_rate": 1.2534246607687205e-09, "loss": 1.4477, "step": 11077 }, { "epoch": 1.9955865795991894, "grad_norm": 1.5224453210830688, "learning_rate": 1.1551565457512682e-09, "loss": 1.4601, "step": 11078 }, { "epoch": 1.9957667192073858, "grad_norm": 1.3258839845657349, "learning_rate": 1.0608993137151135e-09, "loss": 1.2186, "step": 11079 }, { "epoch": 1.9959468588155822, "grad_norm": 1.5319766998291016, "learning_rate": 9.706529722153247e-10, "loss": 1.6331, "step": 11080 }, { "epoch": 1.9961269984237784, "grad_norm": 1.6158783435821533, "learning_rate": 8.844175285016576e-10, "loss": 1.6333, "step": 11081 }, { "epoch": 1.9963071380319748, "grad_norm": 1.5090970993041992, "learning_rate": 8.021929894796998e-10, "loss": 1.2663, "step": 11082 }, { "epoch": 1.996487277640171, "grad_norm": 1.4330544471740723, "learning_rate": 7.239793617608293e-10, "loss": 1.5099, "step": 11083 }, { "epoch": 1.9966674172483674, "grad_norm": 1.5570321083068848, "learning_rate": 6.497766516067039e-10, "loss": 1.4506, "step": 11084 }, { "epoch": 1.9968475568565638, "grad_norm": 1.3722858428955078, "learning_rate": 5.79584864968119e-10, "loss": 1.21, "step": 11085 }, { "epoch": 1.9970276964647602, "grad_norm": 1.6175187826156616, "learning_rate": 5.134040074905588e-10, "loss": 1.551, "step": 11086 }, { "epoch": 1.9972078360729566, "grad_norm": 1.7320308685302734, "learning_rate": 4.512340844697871e-10, "loss": 1.41, "step": 11087 }, { "epoch": 1.997387975681153, "grad_norm": 1.6305747032165527, "learning_rate": 3.9307510089625634e-10, "loss": 1.4626, "step": 11088 }, { "epoch": 1.9975681152893494, "grad_norm": 1.5165183544158936, "learning_rate": 3.3892706143845434e-10, "loss": 1.3816, "step": 11089 }, { "epoch": 1.9977482548975456, "grad_norm": 1.546207308769226, "learning_rate": 2.88789970431802e-10, "loss": 1.4033, "step": 11090 }, { "epoch": 1.997928394505742, "grad_norm": 1.57394278049469, "learning_rate": 2.4266383191196007e-10, "loss": 1.1819, "step": 11091 }, { "epoch": 1.9981085341139382, "grad_norm": 1.4905561208724976, "learning_rate": 2.005486495704201e-10, "loss": 1.2591, "step": 11092 }, { "epoch": 1.9982886737221346, "grad_norm": 1.6398800611495972, "learning_rate": 1.6244442678226002e-10, "loss": 1.6037, "step": 11093 }, { "epoch": 1.998468813330331, "grad_norm": 1.5621079206466675, "learning_rate": 1.2835116661169545e-10, "loss": 1.2738, "step": 11094 }, { "epoch": 1.9986489529385274, "grad_norm": 1.5807408094406128, "learning_rate": 9.826887178987499e-11, "loss": 1.4298, "step": 11095 }, { "epoch": 1.9988290925467238, "grad_norm": 1.3767751455307007, "learning_rate": 7.219754473153373e-11, "loss": 1.1201, "step": 11096 }, { "epoch": 1.9990092321549202, "grad_norm": 1.3734205961227417, "learning_rate": 5.0137187529442075e-11, "loss": 1.1917, "step": 11097 }, { "epoch": 1.9991893717631164, "grad_norm": 1.6411293745040894, "learning_rate": 3.2087801948854635e-11, "loss": 1.4585, "step": 11098 }, { "epoch": 1.9993695113713128, "grad_norm": 1.5668938159942627, "learning_rate": 1.8049389438612453e-11, "loss": 1.3119, "step": 11099 }, { "epoch": 1.999549650979509, "grad_norm": 1.656908392906189, "learning_rate": 8.021951131143013e-12, "loss": 1.4597, "step": 11100 }, { "epoch": 1.9997297905877054, "grad_norm": 1.4120724201202393, "learning_rate": 2.0054878202557804e-12, "loss": 1.1435, "step": 11101 }, { "epoch": 1.9999099301959018, "grad_norm": 1.3890012502670288, "learning_rate": 0.0, "loss": 1.6596, "step": 11102 }, { "epoch": 1.9999099301959018, "eval_loss": 1.5088356733322144, "eval_runtime": 184.9011, "eval_samples_per_second": 50.568, "eval_steps_per_second": 12.645, "step": 11102 } ], "logging_steps": 1, "max_steps": 11102, "num_input_tokens_seen": 0, "num_train_epochs": 2, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 8.499053016215716e+17, "train_batch_size": 4, "trial_name": null, "trial_params": null }