{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.5, "eval_steps": 500, "global_step": 6250, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 8e-05, "grad_norm": 8.342381477355957, "learning_rate": 1.0000000000000002e-06, "loss": 0.704, "step": 1 }, { "epoch": 0.00016, "grad_norm": 13.919276237487793, "learning_rate": 2.0000000000000003e-06, "loss": 0.5506, "step": 2 }, { "epoch": 0.00024, "grad_norm": 9.644050598144531, "learning_rate": 3e-06, "loss": 0.5138, "step": 3 }, { "epoch": 0.00032, "grad_norm": 9.632057189941406, "learning_rate": 4.000000000000001e-06, "loss": 0.7317, "step": 4 }, { "epoch": 0.0004, "grad_norm": 8.935699462890625, "learning_rate": 5e-06, "loss": 0.7372, "step": 5 }, { "epoch": 0.00048, "grad_norm": 9.070698738098145, "learning_rate": 6e-06, "loss": 0.7517, "step": 6 }, { "epoch": 0.00056, "grad_norm": 9.226506233215332, "learning_rate": 7e-06, "loss": 0.6523, "step": 7 }, { "epoch": 0.00064, "grad_norm": 11.089458465576172, "learning_rate": 8.000000000000001e-06, "loss": 0.6743, "step": 8 }, { "epoch": 0.00072, "grad_norm": 7.038223743438721, "learning_rate": 9e-06, "loss": 0.6536, "step": 9 }, { "epoch": 0.0008, "grad_norm": 12.974882125854492, "learning_rate": 1e-05, "loss": 0.6122, "step": 10 }, { "epoch": 0.00088, "grad_norm": 8.054683685302734, "learning_rate": 9.999999841833366e-06, "loss": 0.5658, "step": 11 }, { "epoch": 0.00096, "grad_norm": 15.936073303222656, "learning_rate": 9.99999936733347e-06, "loss": 0.8491, "step": 12 }, { "epoch": 0.00104, "grad_norm": 4.686435699462891, "learning_rate": 9.999998576500346e-06, "loss": 0.4673, "step": 13 }, { "epoch": 0.00112, "grad_norm": 9.04387092590332, "learning_rate": 9.999997469334042e-06, "loss": 0.6238, "step": 14 }, { "epoch": 0.0012, "grad_norm": 10.498541831970215, "learning_rate": 9.999996045834626e-06, "loss": 0.6662, "step": 15 }, { "epoch": 0.00128, "grad_norm": 5.166777610778809, "learning_rate": 9.999994306002192e-06, "loss": 0.5575, "step": 16 }, { "epoch": 0.00136, "grad_norm": 7.151340484619141, "learning_rate": 9.99999224983685e-06, "loss": 0.5892, "step": 17 }, { "epoch": 0.00144, "grad_norm": 5.04518985748291, "learning_rate": 9.999989877338725e-06, "loss": 0.6417, "step": 18 }, { "epoch": 0.00152, "grad_norm": 5.931295394897461, "learning_rate": 9.999987188507972e-06, "loss": 0.4939, "step": 19 }, { "epoch": 0.0016, "grad_norm": 5.467217445373535, "learning_rate": 9.99998418334476e-06, "loss": 0.6131, "step": 20 }, { "epoch": 0.00168, "grad_norm": 5.837316989898682, "learning_rate": 9.999980861849277e-06, "loss": 0.5316, "step": 21 }, { "epoch": 0.00176, "grad_norm": 4.237757205963135, "learning_rate": 9.999977224021735e-06, "loss": 0.5499, "step": 22 }, { "epoch": 0.00184, "grad_norm": 6.357263565063477, "learning_rate": 9.999973269862366e-06, "loss": 0.5405, "step": 23 }, { "epoch": 0.00192, "grad_norm": 6.0218939781188965, "learning_rate": 9.999968999371416e-06, "loss": 0.543, "step": 24 }, { "epoch": 0.002, "grad_norm": 5.242684841156006, "learning_rate": 9.99996441254916e-06, "loss": 0.7568, "step": 25 }, { "epoch": 0.00208, "grad_norm": 5.868856906890869, "learning_rate": 9.999959509395884e-06, "loss": 0.5211, "step": 26 }, { "epoch": 0.00216, "grad_norm": 5.401463985443115, "learning_rate": 9.9999542899119e-06, "loss": 0.4879, "step": 27 }, { "epoch": 0.00224, "grad_norm": 4.193491458892822, "learning_rate": 9.999948754097538e-06, "loss": 0.409, "step": 28 }, { "epoch": 0.00232, "grad_norm": 8.131255149841309, "learning_rate": 9.999942901953148e-06, "loss": 0.5406, "step": 29 }, { "epoch": 0.0024, "grad_norm": 8.522489547729492, "learning_rate": 9.9999367334791e-06, "loss": 0.4171, "step": 30 }, { "epoch": 0.00248, "grad_norm": 5.474879264831543, "learning_rate": 9.999930248675784e-06, "loss": 0.3518, "step": 31 }, { "epoch": 0.00256, "grad_norm": 3.1692543029785156, "learning_rate": 9.999923447543614e-06, "loss": 0.3889, "step": 32 }, { "epoch": 0.00264, "grad_norm": 5.542096138000488, "learning_rate": 9.999916330083015e-06, "loss": 0.4787, "step": 33 }, { "epoch": 0.00272, "grad_norm": 4.017666339874268, "learning_rate": 9.99990889629444e-06, "loss": 0.4756, "step": 34 }, { "epoch": 0.0028, "grad_norm": 4.048304080963135, "learning_rate": 9.99990114617836e-06, "loss": 0.4496, "step": 35 }, { "epoch": 0.00288, "grad_norm": 9.737591743469238, "learning_rate": 9.999893079735262e-06, "loss": 0.5132, "step": 36 }, { "epoch": 0.00296, "grad_norm": 5.1411051750183105, "learning_rate": 9.99988469696566e-06, "loss": 0.4311, "step": 37 }, { "epoch": 0.00304, "grad_norm": 7.761783599853516, "learning_rate": 9.999875997870081e-06, "loss": 0.6614, "step": 38 }, { "epoch": 0.00312, "grad_norm": 2.5379042625427246, "learning_rate": 9.99986698244908e-06, "loss": 0.4899, "step": 39 }, { "epoch": 0.0032, "grad_norm": 5.286753177642822, "learning_rate": 9.999857650703224e-06, "loss": 0.4297, "step": 40 }, { "epoch": 0.00328, "grad_norm": 4.113813400268555, "learning_rate": 9.999848002633102e-06, "loss": 0.3484, "step": 41 }, { "epoch": 0.00336, "grad_norm": 4.206110954284668, "learning_rate": 9.999838038239327e-06, "loss": 0.5235, "step": 42 }, { "epoch": 0.00344, "grad_norm": 8.220080375671387, "learning_rate": 9.999827757522531e-06, "loss": 0.5051, "step": 43 }, { "epoch": 0.00352, "grad_norm": 3.16642427444458, "learning_rate": 9.99981716048336e-06, "loss": 0.3753, "step": 44 }, { "epoch": 0.0036, "grad_norm": 2.503070592880249, "learning_rate": 9.999806247122488e-06, "loss": 0.453, "step": 45 }, { "epoch": 0.00368, "grad_norm": 4.834280967712402, "learning_rate": 9.999795017440603e-06, "loss": 0.3499, "step": 46 }, { "epoch": 0.00376, "grad_norm": 2.557096004486084, "learning_rate": 9.999783471438419e-06, "loss": 0.3253, "step": 47 }, { "epoch": 0.00384, "grad_norm": 3.6029672622680664, "learning_rate": 9.999771609116662e-06, "loss": 0.4115, "step": 48 }, { "epoch": 0.00392, "grad_norm": 3.1142241954803467, "learning_rate": 9.999759430476084e-06, "loss": 0.5014, "step": 49 }, { "epoch": 0.004, "grad_norm": 3.7333784103393555, "learning_rate": 9.999746935517457e-06, "loss": 0.3711, "step": 50 }, { "epoch": 0.00408, "grad_norm": 3.2080132961273193, "learning_rate": 9.999734124241571e-06, "loss": 0.3347, "step": 51 }, { "epoch": 0.00416, "grad_norm": 3.805007219314575, "learning_rate": 9.999720996649235e-06, "loss": 0.7125, "step": 52 }, { "epoch": 0.00424, "grad_norm": 3.14184308052063, "learning_rate": 9.999707552741283e-06, "loss": 0.5121, "step": 53 }, { "epoch": 0.00432, "grad_norm": 3.540736436843872, "learning_rate": 9.999693792518562e-06, "loss": 0.4294, "step": 54 }, { "epoch": 0.0044, "grad_norm": 3.0372300148010254, "learning_rate": 9.999679715981942e-06, "loss": 0.4628, "step": 55 }, { "epoch": 0.00448, "grad_norm": 2.61629056930542, "learning_rate": 9.999665323132317e-06, "loss": 0.4936, "step": 56 }, { "epoch": 0.00456, "grad_norm": 3.4021859169006348, "learning_rate": 9.999650613970597e-06, "loss": 0.539, "step": 57 }, { "epoch": 0.00464, "grad_norm": 1.9579095840454102, "learning_rate": 9.99963558849771e-06, "loss": 0.3475, "step": 58 }, { "epoch": 0.00472, "grad_norm": 3.136955976486206, "learning_rate": 9.999620246714607e-06, "loss": 0.389, "step": 59 }, { "epoch": 0.0048, "grad_norm": 3.5431883335113525, "learning_rate": 9.999604588622263e-06, "loss": 0.4231, "step": 60 }, { "epoch": 0.00488, "grad_norm": 4.076716899871826, "learning_rate": 9.999588614221663e-06, "loss": 0.4758, "step": 61 }, { "epoch": 0.00496, "grad_norm": 2.444188117980957, "learning_rate": 9.99957232351382e-06, "loss": 0.4112, "step": 62 }, { "epoch": 0.00504, "grad_norm": 8.321596145629883, "learning_rate": 9.999555716499766e-06, "loss": 0.4371, "step": 63 }, { "epoch": 0.00512, "grad_norm": 2.7783851623535156, "learning_rate": 9.99953879318055e-06, "loss": 0.4137, "step": 64 }, { "epoch": 0.0052, "grad_norm": 3.005558729171753, "learning_rate": 9.999521553557243e-06, "loss": 0.3781, "step": 65 }, { "epoch": 0.00528, "grad_norm": 2.853501558303833, "learning_rate": 9.999503997630934e-06, "loss": 0.4312, "step": 66 }, { "epoch": 0.00536, "grad_norm": 2.694535732269287, "learning_rate": 9.999486125402738e-06, "loss": 0.3815, "step": 67 }, { "epoch": 0.00544, "grad_norm": 4.3195013999938965, "learning_rate": 9.999467936873783e-06, "loss": 0.4915, "step": 68 }, { "epoch": 0.00552, "grad_norm": 2.009920358657837, "learning_rate": 9.999449432045218e-06, "loss": 0.4457, "step": 69 }, { "epoch": 0.0056, "grad_norm": 2.0476291179656982, "learning_rate": 9.999430610918217e-06, "loss": 0.4017, "step": 70 }, { "epoch": 0.00568, "grad_norm": 3.428873300552368, "learning_rate": 9.99941147349397e-06, "loss": 0.4943, "step": 71 }, { "epoch": 0.00576, "grad_norm": 4.138948917388916, "learning_rate": 9.999392019773685e-06, "loss": 0.4353, "step": 72 }, { "epoch": 0.00584, "grad_norm": 3.7678720951080322, "learning_rate": 9.999372249758596e-06, "loss": 0.3594, "step": 73 }, { "epoch": 0.00592, "grad_norm": 2.1992523670196533, "learning_rate": 9.999352163449954e-06, "loss": 0.4057, "step": 74 }, { "epoch": 0.006, "grad_norm": 3.9209282398223877, "learning_rate": 9.999331760849028e-06, "loss": 0.3694, "step": 75 }, { "epoch": 0.00608, "grad_norm": 2.271817684173584, "learning_rate": 9.999311041957109e-06, "loss": 0.3515, "step": 76 }, { "epoch": 0.00616, "grad_norm": 3.229722023010254, "learning_rate": 9.999290006775507e-06, "loss": 0.3017, "step": 77 }, { "epoch": 0.00624, "grad_norm": 2.318446397781372, "learning_rate": 9.999268655305556e-06, "loss": 0.3885, "step": 78 }, { "epoch": 0.00632, "grad_norm": 2.2932703495025635, "learning_rate": 9.999246987548603e-06, "loss": 0.3392, "step": 79 }, { "epoch": 0.0064, "grad_norm": 2.111154794692993, "learning_rate": 9.999225003506021e-06, "loss": 0.3128, "step": 80 }, { "epoch": 0.00648, "grad_norm": 4.121140003204346, "learning_rate": 9.9992027031792e-06, "loss": 0.4418, "step": 81 }, { "epoch": 0.00656, "grad_norm": 1.6190462112426758, "learning_rate": 9.999180086569553e-06, "loss": 0.2905, "step": 82 }, { "epoch": 0.00664, "grad_norm": 3.0115609169006348, "learning_rate": 9.999157153678509e-06, "loss": 0.3379, "step": 83 }, { "epoch": 0.00672, "grad_norm": 3.357482671737671, "learning_rate": 9.999133904507518e-06, "loss": 0.4827, "step": 84 }, { "epoch": 0.0068, "grad_norm": 2.7103617191314697, "learning_rate": 9.99911033905805e-06, "loss": 0.3324, "step": 85 }, { "epoch": 0.00688, "grad_norm": 2.003753662109375, "learning_rate": 9.999086457331603e-06, "loss": 0.4019, "step": 86 }, { "epoch": 0.00696, "grad_norm": 2.259552478790283, "learning_rate": 9.999062259329679e-06, "loss": 0.4511, "step": 87 }, { "epoch": 0.00704, "grad_norm": 2.696129322052002, "learning_rate": 9.999037745053814e-06, "loss": 0.3631, "step": 88 }, { "epoch": 0.00712, "grad_norm": 2.0996644496917725, "learning_rate": 9.999012914505559e-06, "loss": 0.4356, "step": 89 }, { "epoch": 0.0072, "grad_norm": 1.694945216178894, "learning_rate": 9.998987767686482e-06, "loss": 0.3558, "step": 90 }, { "epoch": 0.00728, "grad_norm": 4.748021602630615, "learning_rate": 9.998962304598175e-06, "loss": 0.403, "step": 91 }, { "epoch": 0.00736, "grad_norm": 2.026329278945923, "learning_rate": 9.998936525242251e-06, "loss": 0.3913, "step": 92 }, { "epoch": 0.00744, "grad_norm": 1.9248204231262207, "learning_rate": 9.99891042962034e-06, "loss": 0.2967, "step": 93 }, { "epoch": 0.00752, "grad_norm": 2.36529278755188, "learning_rate": 9.998884017734091e-06, "loss": 0.4356, "step": 94 }, { "epoch": 0.0076, "grad_norm": 2.3117401599884033, "learning_rate": 9.998857289585177e-06, "loss": 0.362, "step": 95 }, { "epoch": 0.00768, "grad_norm": 2.4276554584503174, "learning_rate": 9.998830245175288e-06, "loss": 0.5628, "step": 96 }, { "epoch": 0.00776, "grad_norm": 3.106940984725952, "learning_rate": 9.998802884506136e-06, "loss": 0.3412, "step": 97 }, { "epoch": 0.00784, "grad_norm": 1.9471877813339233, "learning_rate": 9.998775207579452e-06, "loss": 0.506, "step": 98 }, { "epoch": 0.00792, "grad_norm": 2.589988946914673, "learning_rate": 9.998747214396987e-06, "loss": 0.5048, "step": 99 }, { "epoch": 0.008, "grad_norm": 2.1943140029907227, "learning_rate": 9.998718904960511e-06, "loss": 0.3406, "step": 100 }, { "epoch": 0.00808, "grad_norm": 2.6359894275665283, "learning_rate": 9.998690279271815e-06, "loss": 0.4702, "step": 101 }, { "epoch": 0.00816, "grad_norm": 2.8097708225250244, "learning_rate": 9.99866133733271e-06, "loss": 0.2897, "step": 102 }, { "epoch": 0.00824, "grad_norm": 1.7844816446304321, "learning_rate": 9.99863207914503e-06, "loss": 0.3935, "step": 103 }, { "epoch": 0.00832, "grad_norm": 2.048438549041748, "learning_rate": 9.998602504710623e-06, "loss": 0.4605, "step": 104 }, { "epoch": 0.0084, "grad_norm": 2.508671522140503, "learning_rate": 9.99857261403136e-06, "loss": 0.3009, "step": 105 }, { "epoch": 0.00848, "grad_norm": 1.8178949356079102, "learning_rate": 9.998542407109135e-06, "loss": 0.4019, "step": 106 }, { "epoch": 0.00856, "grad_norm": 4.293577671051025, "learning_rate": 9.998511883945855e-06, "loss": 0.3646, "step": 107 }, { "epoch": 0.00864, "grad_norm": 2.485862970352173, "learning_rate": 9.998481044543452e-06, "loss": 0.4891, "step": 108 }, { "epoch": 0.00872, "grad_norm": 2.0601084232330322, "learning_rate": 9.998449888903881e-06, "loss": 0.3481, "step": 109 }, { "epoch": 0.0088, "grad_norm": 2.0768215656280518, "learning_rate": 9.99841841702911e-06, "loss": 0.3108, "step": 110 }, { "epoch": 0.00888, "grad_norm": 1.9751044511795044, "learning_rate": 9.99838662892113e-06, "loss": 0.3349, "step": 111 }, { "epoch": 0.00896, "grad_norm": 2.3447659015655518, "learning_rate": 9.998354524581953e-06, "loss": 0.4507, "step": 112 }, { "epoch": 0.00904, "grad_norm": 2.0622479915618896, "learning_rate": 9.998322104013609e-06, "loss": 0.3494, "step": 113 }, { "epoch": 0.00912, "grad_norm": 2.2754018306732178, "learning_rate": 9.998289367218151e-06, "loss": 0.3852, "step": 114 }, { "epoch": 0.0092, "grad_norm": 2.0593910217285156, "learning_rate": 9.998256314197648e-06, "loss": 0.3999, "step": 115 }, { "epoch": 0.00928, "grad_norm": 2.1019093990325928, "learning_rate": 9.998222944954193e-06, "loss": 0.4743, "step": 116 }, { "epoch": 0.00936, "grad_norm": 1.9430328607559204, "learning_rate": 9.998189259489897e-06, "loss": 0.3751, "step": 117 }, { "epoch": 0.00944, "grad_norm": 1.9289934635162354, "learning_rate": 9.99815525780689e-06, "loss": 0.3586, "step": 118 }, { "epoch": 0.00952, "grad_norm": 2.437016248703003, "learning_rate": 9.998120939907323e-06, "loss": 0.4204, "step": 119 }, { "epoch": 0.0096, "grad_norm": 2.8940718173980713, "learning_rate": 9.998086305793368e-06, "loss": 0.379, "step": 120 }, { "epoch": 0.00968, "grad_norm": 2.258979558944702, "learning_rate": 9.998051355467215e-06, "loss": 0.4011, "step": 121 }, { "epoch": 0.00976, "grad_norm": 1.7911226749420166, "learning_rate": 9.99801608893108e-06, "loss": 0.3489, "step": 122 }, { "epoch": 0.00984, "grad_norm": 3.431490659713745, "learning_rate": 9.997980506187188e-06, "loss": 0.4482, "step": 123 }, { "epoch": 0.00992, "grad_norm": 2.332395315170288, "learning_rate": 9.997944607237791e-06, "loss": 0.3858, "step": 124 }, { "epoch": 0.01, "grad_norm": 2.693103790283203, "learning_rate": 9.997908392085164e-06, "loss": 0.4227, "step": 125 }, { "epoch": 0.01008, "grad_norm": 2.643721580505371, "learning_rate": 9.997871860731596e-06, "loss": 0.3627, "step": 126 }, { "epoch": 0.01016, "grad_norm": 2.5970005989074707, "learning_rate": 9.997835013179397e-06, "loss": 0.5068, "step": 127 }, { "epoch": 0.01024, "grad_norm": 1.9385316371917725, "learning_rate": 9.997797849430902e-06, "loss": 0.2864, "step": 128 }, { "epoch": 0.01032, "grad_norm": 1.7153620719909668, "learning_rate": 9.997760369488458e-06, "loss": 0.3723, "step": 129 }, { "epoch": 0.0104, "grad_norm": 1.6988774538040161, "learning_rate": 9.997722573354438e-06, "loss": 0.3445, "step": 130 }, { "epoch": 0.01048, "grad_norm": 2.6378743648529053, "learning_rate": 9.997684461031235e-06, "loss": 0.5199, "step": 131 }, { "epoch": 0.01056, "grad_norm": 2.355621099472046, "learning_rate": 9.997646032521256e-06, "loss": 0.4259, "step": 132 }, { "epoch": 0.01064, "grad_norm": 2.3169424533843994, "learning_rate": 9.997607287826937e-06, "loss": 0.5117, "step": 133 }, { "epoch": 0.01072, "grad_norm": 2.7013490200042725, "learning_rate": 9.997568226950723e-06, "loss": 0.6017, "step": 134 }, { "epoch": 0.0108, "grad_norm": 1.5648651123046875, "learning_rate": 9.997528849895092e-06, "loss": 0.401, "step": 135 }, { "epoch": 0.01088, "grad_norm": 2.346057415008545, "learning_rate": 9.997489156662532e-06, "loss": 0.6035, "step": 136 }, { "epoch": 0.01096, "grad_norm": 3.0977964401245117, "learning_rate": 9.997449147255556e-06, "loss": 0.5332, "step": 137 }, { "epoch": 0.01104, "grad_norm": 2.7511343955993652, "learning_rate": 9.997408821676693e-06, "loss": 0.3371, "step": 138 }, { "epoch": 0.01112, "grad_norm": 3.267756938934326, "learning_rate": 9.997368179928495e-06, "loss": 0.349, "step": 139 }, { "epoch": 0.0112, "grad_norm": 2.05903959274292, "learning_rate": 9.997327222013533e-06, "loss": 0.3592, "step": 140 }, { "epoch": 0.01128, "grad_norm": 1.7902599573135376, "learning_rate": 9.9972859479344e-06, "loss": 0.3498, "step": 141 }, { "epoch": 0.01136, "grad_norm": 1.8080183267593384, "learning_rate": 9.997244357693704e-06, "loss": 0.3041, "step": 142 }, { "epoch": 0.01144, "grad_norm": 2.1454615592956543, "learning_rate": 9.99720245129408e-06, "loss": 0.2934, "step": 143 }, { "epoch": 0.01152, "grad_norm": 2.059626340866089, "learning_rate": 9.997160228738178e-06, "loss": 0.3284, "step": 144 }, { "epoch": 0.0116, "grad_norm": 2.013929605484009, "learning_rate": 9.997117690028668e-06, "loss": 0.3648, "step": 145 }, { "epoch": 0.01168, "grad_norm": 2.726135015487671, "learning_rate": 9.997074835168243e-06, "loss": 0.5155, "step": 146 }, { "epoch": 0.01176, "grad_norm": 1.494078516960144, "learning_rate": 9.997031664159614e-06, "loss": 0.342, "step": 147 }, { "epoch": 0.01184, "grad_norm": 3.9255640506744385, "learning_rate": 9.996988177005508e-06, "loss": 0.3398, "step": 148 }, { "epoch": 0.01192, "grad_norm": 1.7292317152023315, "learning_rate": 9.996944373708683e-06, "loss": 0.3688, "step": 149 }, { "epoch": 0.012, "grad_norm": 1.9214504957199097, "learning_rate": 9.996900254271909e-06, "loss": 0.3196, "step": 150 }, { "epoch": 0.01208, "grad_norm": 2.378833293914795, "learning_rate": 9.996855818697973e-06, "loss": 0.4263, "step": 151 }, { "epoch": 0.01216, "grad_norm": 2.5661027431488037, "learning_rate": 9.996811066989688e-06, "loss": 0.4251, "step": 152 }, { "epoch": 0.01224, "grad_norm": 2.3186323642730713, "learning_rate": 9.996765999149888e-06, "loss": 0.5136, "step": 153 }, { "epoch": 0.01232, "grad_norm": 4.518758296966553, "learning_rate": 9.996720615181422e-06, "loss": 0.4732, "step": 154 }, { "epoch": 0.0124, "grad_norm": 1.8078404664993286, "learning_rate": 9.996674915087161e-06, "loss": 0.342, "step": 155 }, { "epoch": 0.01248, "grad_norm": 2.0094892978668213, "learning_rate": 9.996628898869998e-06, "loss": 0.3407, "step": 156 }, { "epoch": 0.01256, "grad_norm": 2.363971471786499, "learning_rate": 9.996582566532844e-06, "loss": 0.5517, "step": 157 }, { "epoch": 0.01264, "grad_norm": 1.7961527109146118, "learning_rate": 9.99653591807863e-06, "loss": 0.3353, "step": 158 }, { "epoch": 0.01272, "grad_norm": 2.390897750854492, "learning_rate": 9.996488953510303e-06, "loss": 0.3781, "step": 159 }, { "epoch": 0.0128, "grad_norm": 2.0351173877716064, "learning_rate": 9.996441672830842e-06, "loss": 0.4534, "step": 160 }, { "epoch": 0.01288, "grad_norm": 1.8322193622589111, "learning_rate": 9.996394076043235e-06, "loss": 0.3663, "step": 161 }, { "epoch": 0.01296, "grad_norm": 1.927498459815979, "learning_rate": 9.996346163150489e-06, "loss": 0.4644, "step": 162 }, { "epoch": 0.01304, "grad_norm": 1.8679307699203491, "learning_rate": 9.996297934155642e-06, "loss": 0.3235, "step": 163 }, { "epoch": 0.01312, "grad_norm": 2.3288774490356445, "learning_rate": 9.996249389061742e-06, "loss": 0.3555, "step": 164 }, { "epoch": 0.0132, "grad_norm": 1.7463264465332031, "learning_rate": 9.99620052787186e-06, "loss": 0.3165, "step": 165 }, { "epoch": 0.01328, "grad_norm": 1.7550216913223267, "learning_rate": 9.996151350589089e-06, "loss": 0.3353, "step": 166 }, { "epoch": 0.01336, "grad_norm": 1.5060083866119385, "learning_rate": 9.996101857216538e-06, "loss": 0.3295, "step": 167 }, { "epoch": 0.01344, "grad_norm": 1.9037864208221436, "learning_rate": 9.996052047757342e-06, "loss": 0.4499, "step": 168 }, { "epoch": 0.01352, "grad_norm": 2.601609468460083, "learning_rate": 9.996001922214646e-06, "loss": 0.4094, "step": 169 }, { "epoch": 0.0136, "grad_norm": 1.6243177652359009, "learning_rate": 9.995951480591627e-06, "loss": 0.281, "step": 170 }, { "epoch": 0.01368, "grad_norm": 1.9210420846939087, "learning_rate": 9.995900722891474e-06, "loss": 0.3456, "step": 171 }, { "epoch": 0.01376, "grad_norm": 2.4466614723205566, "learning_rate": 9.995849649117398e-06, "loss": 0.3608, "step": 172 }, { "epoch": 0.01384, "grad_norm": 1.706709384918213, "learning_rate": 9.995798259272633e-06, "loss": 0.368, "step": 173 }, { "epoch": 0.01392, "grad_norm": 2.536444664001465, "learning_rate": 9.995746553360427e-06, "loss": 0.3482, "step": 174 }, { "epoch": 0.014, "grad_norm": 1.7476345300674438, "learning_rate": 9.995694531384051e-06, "loss": 0.3771, "step": 175 }, { "epoch": 0.01408, "grad_norm": 2.676931142807007, "learning_rate": 9.9956421933468e-06, "loss": 0.4398, "step": 176 }, { "epoch": 0.01416, "grad_norm": 2.442253828048706, "learning_rate": 9.99558953925198e-06, "loss": 0.3727, "step": 177 }, { "epoch": 0.01424, "grad_norm": 1.871006965637207, "learning_rate": 9.995536569102927e-06, "loss": 0.4419, "step": 178 }, { "epoch": 0.01432, "grad_norm": 1.8206201791763306, "learning_rate": 9.995483282902992e-06, "loss": 0.3648, "step": 179 }, { "epoch": 0.0144, "grad_norm": 2.077590227127075, "learning_rate": 9.995429680655541e-06, "loss": 0.3741, "step": 180 }, { "epoch": 0.01448, "grad_norm": 2.4338691234588623, "learning_rate": 9.995375762363972e-06, "loss": 0.3764, "step": 181 }, { "epoch": 0.01456, "grad_norm": 1.9777390956878662, "learning_rate": 9.995321528031693e-06, "loss": 0.3789, "step": 182 }, { "epoch": 0.01464, "grad_norm": 1.9252766370773315, "learning_rate": 9.995266977662132e-06, "loss": 0.3909, "step": 183 }, { "epoch": 0.01472, "grad_norm": 1.6491369009017944, "learning_rate": 9.995212111258745e-06, "loss": 0.4554, "step": 184 }, { "epoch": 0.0148, "grad_norm": 1.691046953201294, "learning_rate": 9.995156928825003e-06, "loss": 0.4205, "step": 185 }, { "epoch": 0.01488, "grad_norm": 1.9484405517578125, "learning_rate": 9.995101430364396e-06, "loss": 0.3699, "step": 186 }, { "epoch": 0.01496, "grad_norm": 2.238854169845581, "learning_rate": 9.995045615880434e-06, "loss": 0.374, "step": 187 }, { "epoch": 0.01504, "grad_norm": 1.2045531272888184, "learning_rate": 9.99498948537665e-06, "loss": 0.2368, "step": 188 }, { "epoch": 0.01512, "grad_norm": 1.7461583614349365, "learning_rate": 9.994933038856595e-06, "loss": 0.3133, "step": 189 }, { "epoch": 0.0152, "grad_norm": 1.7523412704467773, "learning_rate": 9.994876276323839e-06, "loss": 0.3396, "step": 190 }, { "epoch": 0.01528, "grad_norm": 3.5537517070770264, "learning_rate": 9.994819197781973e-06, "loss": 0.3683, "step": 191 }, { "epoch": 0.01536, "grad_norm": 1.3427032232284546, "learning_rate": 9.994761803234611e-06, "loss": 0.291, "step": 192 }, { "epoch": 0.01544, "grad_norm": 1.7434067726135254, "learning_rate": 9.994704092685381e-06, "loss": 0.3493, "step": 193 }, { "epoch": 0.01552, "grad_norm": 1.7947523593902588, "learning_rate": 9.994646066137937e-06, "loss": 0.4236, "step": 194 }, { "epoch": 0.0156, "grad_norm": 2.011258125305176, "learning_rate": 9.994587723595946e-06, "loss": 0.3265, "step": 195 }, { "epoch": 0.01568, "grad_norm": 1.2146482467651367, "learning_rate": 9.994529065063103e-06, "loss": 0.2635, "step": 196 }, { "epoch": 0.01576, "grad_norm": 2.2841827869415283, "learning_rate": 9.994470090543118e-06, "loss": 0.4389, "step": 197 }, { "epoch": 0.01584, "grad_norm": 1.4877972602844238, "learning_rate": 9.994410800039721e-06, "loss": 0.4311, "step": 198 }, { "epoch": 0.01592, "grad_norm": 2.1514832973480225, "learning_rate": 9.994351193556666e-06, "loss": 0.3551, "step": 199 }, { "epoch": 0.016, "grad_norm": 1.679862380027771, "learning_rate": 9.99429127109772e-06, "loss": 0.3547, "step": 200 }, { "epoch": 0.01608, "grad_norm": 2.345980167388916, "learning_rate": 9.994231032666677e-06, "loss": 0.3212, "step": 201 }, { "epoch": 0.01616, "grad_norm": 1.6985834836959839, "learning_rate": 9.994170478267348e-06, "loss": 0.4209, "step": 202 }, { "epoch": 0.01624, "grad_norm": 2.21384334564209, "learning_rate": 9.994109607903563e-06, "loss": 0.3629, "step": 203 }, { "epoch": 0.01632, "grad_norm": 2.7619104385375977, "learning_rate": 9.994048421579173e-06, "loss": 0.5192, "step": 204 }, { "epoch": 0.0164, "grad_norm": 2.18821382522583, "learning_rate": 9.993986919298049e-06, "loss": 0.388, "step": 205 }, { "epoch": 0.01648, "grad_norm": 1.5542010068893433, "learning_rate": 9.993925101064084e-06, "loss": 0.3708, "step": 206 }, { "epoch": 0.01656, "grad_norm": 1.6064860820770264, "learning_rate": 9.993862966881188e-06, "loss": 0.3564, "step": 207 }, { "epoch": 0.01664, "grad_norm": 1.946254849433899, "learning_rate": 9.993800516753289e-06, "loss": 0.4394, "step": 208 }, { "epoch": 0.01672, "grad_norm": 1.9601609706878662, "learning_rate": 9.993737750684342e-06, "loss": 0.4078, "step": 209 }, { "epoch": 0.0168, "grad_norm": 1.7054260969161987, "learning_rate": 9.993674668678316e-06, "loss": 0.3253, "step": 210 }, { "epoch": 0.01688, "grad_norm": 1.7017536163330078, "learning_rate": 9.993611270739205e-06, "loss": 0.3473, "step": 211 }, { "epoch": 0.01696, "grad_norm": 2.0936248302459717, "learning_rate": 9.993547556871015e-06, "loss": 0.4329, "step": 212 }, { "epoch": 0.01704, "grad_norm": 1.91551673412323, "learning_rate": 9.993483527077782e-06, "loss": 0.3773, "step": 213 }, { "epoch": 0.01712, "grad_norm": 2.9922895431518555, "learning_rate": 9.99341918136355e-06, "loss": 0.3711, "step": 214 }, { "epoch": 0.0172, "grad_norm": 1.5471163988113403, "learning_rate": 9.993354519732399e-06, "loss": 0.3224, "step": 215 }, { "epoch": 0.01728, "grad_norm": 1.5325666666030884, "learning_rate": 9.993289542188413e-06, "loss": 0.33, "step": 216 }, { "epoch": 0.01736, "grad_norm": 1.9776920080184937, "learning_rate": 9.993224248735706e-06, "loss": 0.3761, "step": 217 }, { "epoch": 0.01744, "grad_norm": 1.6905009746551514, "learning_rate": 9.993158639378408e-06, "loss": 0.3817, "step": 218 }, { "epoch": 0.01752, "grad_norm": 1.6124826669692993, "learning_rate": 9.993092714120671e-06, "loss": 0.3097, "step": 219 }, { "epoch": 0.0176, "grad_norm": 1.5717380046844482, "learning_rate": 9.993026472966664e-06, "loss": 0.3696, "step": 220 }, { "epoch": 0.01768, "grad_norm": 1.8339051008224487, "learning_rate": 9.992959915920579e-06, "loss": 0.3433, "step": 221 }, { "epoch": 0.01776, "grad_norm": 1.8827868700027466, "learning_rate": 9.992893042986627e-06, "loss": 0.3711, "step": 222 }, { "epoch": 0.01784, "grad_norm": 1.704789161682129, "learning_rate": 9.992825854169038e-06, "loss": 0.2893, "step": 223 }, { "epoch": 0.01792, "grad_norm": 1.803351640701294, "learning_rate": 9.992758349472062e-06, "loss": 0.3313, "step": 224 }, { "epoch": 0.018, "grad_norm": 1.406293511390686, "learning_rate": 9.992690528899972e-06, "loss": 0.3326, "step": 225 }, { "epoch": 0.01808, "grad_norm": 3.832010269165039, "learning_rate": 9.992622392457058e-06, "loss": 0.4356, "step": 226 }, { "epoch": 0.01816, "grad_norm": 1.769728660583496, "learning_rate": 9.992553940147631e-06, "loss": 0.3862, "step": 227 }, { "epoch": 0.01824, "grad_norm": 2.6798720359802246, "learning_rate": 9.99248517197602e-06, "loss": 0.3847, "step": 228 }, { "epoch": 0.01832, "grad_norm": 1.8670660257339478, "learning_rate": 9.992416087946579e-06, "loss": 0.3875, "step": 229 }, { "epoch": 0.0184, "grad_norm": 2.3109071254730225, "learning_rate": 9.992346688063676e-06, "loss": 0.4156, "step": 230 }, { "epoch": 0.01848, "grad_norm": 1.5571988821029663, "learning_rate": 9.992276972331702e-06, "loss": 0.3264, "step": 231 }, { "epoch": 0.01856, "grad_norm": 2.017947196960449, "learning_rate": 9.992206940755068e-06, "loss": 0.3828, "step": 232 }, { "epoch": 0.01864, "grad_norm": 1.9046738147735596, "learning_rate": 9.992136593338206e-06, "loss": 0.3952, "step": 233 }, { "epoch": 0.01872, "grad_norm": 1.9752707481384277, "learning_rate": 9.992065930085564e-06, "loss": 0.3992, "step": 234 }, { "epoch": 0.0188, "grad_norm": 2.36694598197937, "learning_rate": 9.991994951001616e-06, "loss": 0.4147, "step": 235 }, { "epoch": 0.01888, "grad_norm": 2.532259464263916, "learning_rate": 9.99192365609085e-06, "loss": 0.4949, "step": 236 }, { "epoch": 0.01896, "grad_norm": 2.2116692066192627, "learning_rate": 9.991852045357776e-06, "loss": 0.3448, "step": 237 }, { "epoch": 0.01904, "grad_norm": 1.5159705877304077, "learning_rate": 9.991780118806927e-06, "loss": 0.3287, "step": 238 }, { "epoch": 0.01912, "grad_norm": 1.7897611856460571, "learning_rate": 9.991707876442851e-06, "loss": 0.331, "step": 239 }, { "epoch": 0.0192, "grad_norm": 1.7827035188674927, "learning_rate": 9.991635318270123e-06, "loss": 0.3524, "step": 240 }, { "epoch": 0.01928, "grad_norm": 2.0052402019500732, "learning_rate": 9.991562444293328e-06, "loss": 0.4706, "step": 241 }, { "epoch": 0.01936, "grad_norm": 3.4093244075775146, "learning_rate": 9.991489254517079e-06, "loss": 0.4795, "step": 242 }, { "epoch": 0.01944, "grad_norm": 1.9298174381256104, "learning_rate": 9.991415748946007e-06, "loss": 0.3301, "step": 243 }, { "epoch": 0.01952, "grad_norm": 1.4808518886566162, "learning_rate": 9.991341927584763e-06, "loss": 0.3292, "step": 244 }, { "epoch": 0.0196, "grad_norm": 1.9954057931900024, "learning_rate": 9.991267790438016e-06, "loss": 0.4463, "step": 245 }, { "epoch": 0.01968, "grad_norm": 2.5434648990631104, "learning_rate": 9.991193337510455e-06, "loss": 0.4003, "step": 246 }, { "epoch": 0.01976, "grad_norm": 1.7687448263168335, "learning_rate": 9.991118568806794e-06, "loss": 0.4522, "step": 247 }, { "epoch": 0.01984, "grad_norm": 1.513317584991455, "learning_rate": 9.99104348433176e-06, "loss": 0.4053, "step": 248 }, { "epoch": 0.01992, "grad_norm": 1.8949499130249023, "learning_rate": 9.990968084090104e-06, "loss": 0.3524, "step": 249 }, { "epoch": 0.02, "grad_norm": 1.4182684421539307, "learning_rate": 9.9908923680866e-06, "loss": 0.3213, "step": 250 }, { "epoch": 0.02008, "grad_norm": 2.0624146461486816, "learning_rate": 9.990816336326034e-06, "loss": 0.4941, "step": 251 }, { "epoch": 0.02016, "grad_norm": 1.8073673248291016, "learning_rate": 9.990739988813219e-06, "loss": 0.342, "step": 252 }, { "epoch": 0.02024, "grad_norm": 1.4276469945907593, "learning_rate": 9.990663325552981e-06, "loss": 0.2914, "step": 253 }, { "epoch": 0.02032, "grad_norm": 1.8346103429794312, "learning_rate": 9.990586346550175e-06, "loss": 0.3763, "step": 254 }, { "epoch": 0.0204, "grad_norm": 2.2009072303771973, "learning_rate": 9.99050905180967e-06, "loss": 0.5039, "step": 255 }, { "epoch": 0.02048, "grad_norm": 1.2374415397644043, "learning_rate": 9.990431441336354e-06, "loss": 0.2549, "step": 256 }, { "epoch": 0.02056, "grad_norm": 2.125685214996338, "learning_rate": 9.99035351513514e-06, "loss": 0.3265, "step": 257 }, { "epoch": 0.02064, "grad_norm": 1.3601418733596802, "learning_rate": 9.990275273210958e-06, "loss": 0.3251, "step": 258 }, { "epoch": 0.02072, "grad_norm": 1.8217127323150635, "learning_rate": 9.990196715568755e-06, "loss": 0.4395, "step": 259 }, { "epoch": 0.0208, "grad_norm": 2.603867292404175, "learning_rate": 9.990117842213504e-06, "loss": 0.3981, "step": 260 }, { "epoch": 0.02088, "grad_norm": 1.8914546966552734, "learning_rate": 9.990038653150194e-06, "loss": 0.3568, "step": 261 }, { "epoch": 0.02096, "grad_norm": 1.4449388980865479, "learning_rate": 9.989959148383834e-06, "loss": 0.4019, "step": 262 }, { "epoch": 0.02104, "grad_norm": 1.4248216152191162, "learning_rate": 9.989879327919456e-06, "loss": 0.3217, "step": 263 }, { "epoch": 0.02112, "grad_norm": 2.652259588241577, "learning_rate": 9.98979919176211e-06, "loss": 0.4723, "step": 264 }, { "epoch": 0.0212, "grad_norm": 1.64741849899292, "learning_rate": 9.989718739916864e-06, "loss": 0.3847, "step": 265 }, { "epoch": 0.02128, "grad_norm": 1.9805387258529663, "learning_rate": 9.989637972388809e-06, "loss": 0.5401, "step": 266 }, { "epoch": 0.02136, "grad_norm": 2.0374245643615723, "learning_rate": 9.989556889183055e-06, "loss": 0.4374, "step": 267 }, { "epoch": 0.02144, "grad_norm": 1.9687358140945435, "learning_rate": 9.989475490304732e-06, "loss": 0.3703, "step": 268 }, { "epoch": 0.02152, "grad_norm": 1.6070175170898438, "learning_rate": 9.98939377575899e-06, "loss": 0.2785, "step": 269 }, { "epoch": 0.0216, "grad_norm": 2.3517184257507324, "learning_rate": 9.989311745550997e-06, "loss": 0.3411, "step": 270 }, { "epoch": 0.02168, "grad_norm": 2.0133581161499023, "learning_rate": 9.989229399685944e-06, "loss": 0.408, "step": 271 }, { "epoch": 0.02176, "grad_norm": 2.088970422744751, "learning_rate": 9.989146738169042e-06, "loss": 0.361, "step": 272 }, { "epoch": 0.02184, "grad_norm": 2.6646909713745117, "learning_rate": 9.98906376100552e-06, "loss": 0.4573, "step": 273 }, { "epoch": 0.02192, "grad_norm": 2.1084513664245605, "learning_rate": 9.988980468200627e-06, "loss": 0.4713, "step": 274 }, { "epoch": 0.022, "grad_norm": 1.7543795108795166, "learning_rate": 9.988896859759632e-06, "loss": 0.3186, "step": 275 }, { "epoch": 0.02208, "grad_norm": 1.4450194835662842, "learning_rate": 9.988812935687826e-06, "loss": 0.3079, "step": 276 }, { "epoch": 0.02216, "grad_norm": 1.684277892112732, "learning_rate": 9.988728695990518e-06, "loss": 0.3892, "step": 277 }, { "epoch": 0.02224, "grad_norm": 1.5748530626296997, "learning_rate": 9.988644140673038e-06, "loss": 0.3742, "step": 278 }, { "epoch": 0.02232, "grad_norm": 1.5772830247879028, "learning_rate": 9.988559269740736e-06, "loss": 0.3195, "step": 279 }, { "epoch": 0.0224, "grad_norm": 1.4628132581710815, "learning_rate": 9.98847408319898e-06, "loss": 0.3627, "step": 280 }, { "epoch": 0.02248, "grad_norm": 2.2950448989868164, "learning_rate": 9.98838858105316e-06, "loss": 0.4156, "step": 281 }, { "epoch": 0.02256, "grad_norm": 1.51676344871521, "learning_rate": 9.988302763308686e-06, "loss": 0.2407, "step": 282 }, { "epoch": 0.02264, "grad_norm": 1.6948258876800537, "learning_rate": 9.988216629970987e-06, "loss": 0.3616, "step": 283 }, { "epoch": 0.02272, "grad_norm": 1.9347060918807983, "learning_rate": 9.988130181045512e-06, "loss": 0.3056, "step": 284 }, { "epoch": 0.0228, "grad_norm": 2.000277280807495, "learning_rate": 9.988043416537731e-06, "loss": 0.402, "step": 285 }, { "epoch": 0.02288, "grad_norm": 1.702262043952942, "learning_rate": 9.987956336453135e-06, "loss": 0.358, "step": 286 }, { "epoch": 0.02296, "grad_norm": 1.9157183170318604, "learning_rate": 9.98786894079723e-06, "loss": 0.414, "step": 287 }, { "epoch": 0.02304, "grad_norm": 2.2817223072052, "learning_rate": 9.987781229575547e-06, "loss": 0.4701, "step": 288 }, { "epoch": 0.02312, "grad_norm": 1.9925824403762817, "learning_rate": 9.987693202793633e-06, "loss": 0.3452, "step": 289 }, { "epoch": 0.0232, "grad_norm": 1.446832299232483, "learning_rate": 9.98760486045706e-06, "loss": 0.3077, "step": 290 }, { "epoch": 0.02328, "grad_norm": 1.2968581914901733, "learning_rate": 9.987516202571417e-06, "loss": 0.2845, "step": 291 }, { "epoch": 0.02336, "grad_norm": 1.7567917108535767, "learning_rate": 9.98742722914231e-06, "loss": 0.4025, "step": 292 }, { "epoch": 0.02344, "grad_norm": 1.824837327003479, "learning_rate": 9.987337940175371e-06, "loss": 0.3339, "step": 293 }, { "epoch": 0.02352, "grad_norm": 1.6700366735458374, "learning_rate": 9.98724833567625e-06, "loss": 0.3338, "step": 294 }, { "epoch": 0.0236, "grad_norm": 1.7511111497879028, "learning_rate": 9.987158415650612e-06, "loss": 0.4037, "step": 295 }, { "epoch": 0.02368, "grad_norm": 1.5636446475982666, "learning_rate": 9.987068180104148e-06, "loss": 0.3424, "step": 296 }, { "epoch": 0.02376, "grad_norm": 1.8241537809371948, "learning_rate": 9.986977629042569e-06, "loss": 0.5058, "step": 297 }, { "epoch": 0.02384, "grad_norm": 2.1625335216522217, "learning_rate": 9.986886762471601e-06, "loss": 0.4052, "step": 298 }, { "epoch": 0.02392, "grad_norm": 1.7909748554229736, "learning_rate": 9.986795580396994e-06, "loss": 0.3581, "step": 299 }, { "epoch": 0.024, "grad_norm": 1.2463935613632202, "learning_rate": 9.986704082824516e-06, "loss": 0.2531, "step": 300 }, { "epoch": 0.02408, "grad_norm": 1.689233422279358, "learning_rate": 9.986612269759956e-06, "loss": 0.4526, "step": 301 }, { "epoch": 0.02416, "grad_norm": 2.223651170730591, "learning_rate": 9.986520141209123e-06, "loss": 0.5223, "step": 302 }, { "epoch": 0.02424, "grad_norm": 2.412198066711426, "learning_rate": 9.986427697177847e-06, "loss": 0.4936, "step": 303 }, { "epoch": 0.02432, "grad_norm": 1.578535795211792, "learning_rate": 9.986334937671974e-06, "loss": 0.3544, "step": 304 }, { "epoch": 0.0244, "grad_norm": 1.6475830078125, "learning_rate": 9.986241862697375e-06, "loss": 0.3465, "step": 305 }, { "epoch": 0.02448, "grad_norm": 1.4819910526275635, "learning_rate": 9.986148472259935e-06, "loss": 0.2884, "step": 306 }, { "epoch": 0.02456, "grad_norm": 1.7936198711395264, "learning_rate": 9.986054766365566e-06, "loss": 0.5354, "step": 307 }, { "epoch": 0.02464, "grad_norm": 1.8793686628341675, "learning_rate": 9.985960745020195e-06, "loss": 0.3592, "step": 308 }, { "epoch": 0.02472, "grad_norm": 1.6589360237121582, "learning_rate": 9.985866408229773e-06, "loss": 0.3107, "step": 309 }, { "epoch": 0.0248, "grad_norm": 1.6323789358139038, "learning_rate": 9.985771756000264e-06, "loss": 0.3276, "step": 310 }, { "epoch": 0.02488, "grad_norm": 1.6884311437606812, "learning_rate": 9.98567678833766e-06, "loss": 0.3694, "step": 311 }, { "epoch": 0.02496, "grad_norm": 1.9537749290466309, "learning_rate": 9.985581505247966e-06, "loss": 0.4009, "step": 312 }, { "epoch": 0.02504, "grad_norm": 1.6910805702209473, "learning_rate": 9.985485906737212e-06, "loss": 0.3655, "step": 313 }, { "epoch": 0.02512, "grad_norm": 1.6935604810714722, "learning_rate": 9.985389992811447e-06, "loss": 0.3912, "step": 314 }, { "epoch": 0.0252, "grad_norm": 1.7497141361236572, "learning_rate": 9.985293763476738e-06, "loss": 0.3744, "step": 315 }, { "epoch": 0.02528, "grad_norm": 1.6273237466812134, "learning_rate": 9.985197218739173e-06, "loss": 0.316, "step": 316 }, { "epoch": 0.02536, "grad_norm": 1.482218623161316, "learning_rate": 9.985100358604861e-06, "loss": 0.3293, "step": 317 }, { "epoch": 0.02544, "grad_norm": 1.6040552854537964, "learning_rate": 9.985003183079929e-06, "loss": 0.3666, "step": 318 }, { "epoch": 0.02552, "grad_norm": 1.7107411623001099, "learning_rate": 9.984905692170525e-06, "loss": 0.3839, "step": 319 }, { "epoch": 0.0256, "grad_norm": 1.7257567644119263, "learning_rate": 9.984807885882819e-06, "loss": 0.3729, "step": 320 }, { "epoch": 0.02568, "grad_norm": 1.6172734498977661, "learning_rate": 9.984709764222997e-06, "loss": 0.3743, "step": 321 }, { "epoch": 0.02576, "grad_norm": 1.929922342300415, "learning_rate": 9.984611327197267e-06, "loss": 0.3688, "step": 322 }, { "epoch": 0.02584, "grad_norm": 1.99301278591156, "learning_rate": 9.984512574811857e-06, "loss": 0.3554, "step": 323 }, { "epoch": 0.02592, "grad_norm": 1.3338180780410767, "learning_rate": 9.984413507073014e-06, "loss": 0.2592, "step": 324 }, { "epoch": 0.026, "grad_norm": 1.341111183166504, "learning_rate": 9.984314123987006e-06, "loss": 0.2646, "step": 325 }, { "epoch": 0.02608, "grad_norm": 1.6769309043884277, "learning_rate": 9.984214425560122e-06, "loss": 0.357, "step": 326 }, { "epoch": 0.02616, "grad_norm": 2.0242223739624023, "learning_rate": 9.984114411798667e-06, "loss": 0.5107, "step": 327 }, { "epoch": 0.02624, "grad_norm": 1.5169055461883545, "learning_rate": 9.984014082708972e-06, "loss": 0.3416, "step": 328 }, { "epoch": 0.02632, "grad_norm": 1.7003381252288818, "learning_rate": 9.983913438297381e-06, "loss": 0.3307, "step": 329 }, { "epoch": 0.0264, "grad_norm": 1.7793561220169067, "learning_rate": 9.983812478570265e-06, "loss": 0.3307, "step": 330 }, { "epoch": 0.02648, "grad_norm": 1.37666916847229, "learning_rate": 9.983711203534008e-06, "loss": 0.2952, "step": 331 }, { "epoch": 0.02656, "grad_norm": 1.3145544528961182, "learning_rate": 9.983609613195018e-06, "loss": 0.2823, "step": 332 }, { "epoch": 0.02664, "grad_norm": 1.7312018871307373, "learning_rate": 9.983507707559724e-06, "loss": 0.3524, "step": 333 }, { "epoch": 0.02672, "grad_norm": 1.6275979280471802, "learning_rate": 9.983405486634572e-06, "loss": 0.3636, "step": 334 }, { "epoch": 0.0268, "grad_norm": 1.8315293788909912, "learning_rate": 9.983302950426028e-06, "loss": 0.3742, "step": 335 }, { "epoch": 0.02688, "grad_norm": 2.2285656929016113, "learning_rate": 9.983200098940582e-06, "loss": 0.5228, "step": 336 }, { "epoch": 0.02696, "grad_norm": 2.5498311519622803, "learning_rate": 9.98309693218474e-06, "loss": 0.3208, "step": 337 }, { "epoch": 0.02704, "grad_norm": 1.8318697214126587, "learning_rate": 9.982993450165028e-06, "loss": 0.3807, "step": 338 }, { "epoch": 0.02712, "grad_norm": 1.5511677265167236, "learning_rate": 9.982889652887992e-06, "loss": 0.3051, "step": 339 }, { "epoch": 0.0272, "grad_norm": 1.9955275058746338, "learning_rate": 9.982785540360202e-06, "loss": 0.4089, "step": 340 }, { "epoch": 0.02728, "grad_norm": 1.8035287857055664, "learning_rate": 9.982681112588244e-06, "loss": 0.4043, "step": 341 }, { "epoch": 0.02736, "grad_norm": 1.9366258382797241, "learning_rate": 9.982576369578724e-06, "loss": 0.3673, "step": 342 }, { "epoch": 0.02744, "grad_norm": 1.7964532375335693, "learning_rate": 9.982471311338268e-06, "loss": 0.3345, "step": 343 }, { "epoch": 0.02752, "grad_norm": 1.6594544649124146, "learning_rate": 9.982365937873523e-06, "loss": 0.4043, "step": 344 }, { "epoch": 0.0276, "grad_norm": 2.0171964168548584, "learning_rate": 9.982260249191159e-06, "loss": 0.4117, "step": 345 }, { "epoch": 0.02768, "grad_norm": 1.921452283859253, "learning_rate": 9.982154245297856e-06, "loss": 0.4141, "step": 346 }, { "epoch": 0.02776, "grad_norm": 1.8650532960891724, "learning_rate": 9.982047926200327e-06, "loss": 0.3625, "step": 347 }, { "epoch": 0.02784, "grad_norm": 1.7884360551834106, "learning_rate": 9.981941291905294e-06, "loss": 0.3626, "step": 348 }, { "epoch": 0.02792, "grad_norm": 1.35147225856781, "learning_rate": 9.981834342419506e-06, "loss": 0.2929, "step": 349 }, { "epoch": 0.028, "grad_norm": 1.7383079528808594, "learning_rate": 9.981727077749727e-06, "loss": 0.378, "step": 350 }, { "epoch": 0.02808, "grad_norm": 1.9508424997329712, "learning_rate": 9.981619497902746e-06, "loss": 0.3706, "step": 351 }, { "epoch": 0.02816, "grad_norm": 2.229902744293213, "learning_rate": 9.981511602885368e-06, "loss": 0.4509, "step": 352 }, { "epoch": 0.02824, "grad_norm": 1.4837419986724854, "learning_rate": 9.981403392704419e-06, "loss": 0.3169, "step": 353 }, { "epoch": 0.02832, "grad_norm": 1.9721978902816772, "learning_rate": 9.981294867366745e-06, "loss": 0.4006, "step": 354 }, { "epoch": 0.0284, "grad_norm": 1.4611529111862183, "learning_rate": 9.981186026879212e-06, "loss": 0.2671, "step": 355 }, { "epoch": 0.02848, "grad_norm": 1.6954723596572876, "learning_rate": 9.981076871248705e-06, "loss": 0.3658, "step": 356 }, { "epoch": 0.02856, "grad_norm": 1.6720842123031616, "learning_rate": 9.980967400482134e-06, "loss": 0.3438, "step": 357 }, { "epoch": 0.02864, "grad_norm": 1.6820980310440063, "learning_rate": 9.98085761458642e-06, "loss": 0.3547, "step": 358 }, { "epoch": 0.02872, "grad_norm": 1.71517813205719, "learning_rate": 9.980747513568511e-06, "loss": 0.3521, "step": 359 }, { "epoch": 0.0288, "grad_norm": 1.6508595943450928, "learning_rate": 9.980637097435372e-06, "loss": 0.369, "step": 360 }, { "epoch": 0.02888, "grad_norm": 1.8108190298080444, "learning_rate": 9.98052636619399e-06, "loss": 0.3725, "step": 361 }, { "epoch": 0.02896, "grad_norm": 1.6566613912582397, "learning_rate": 9.98041531985137e-06, "loss": 0.4799, "step": 362 }, { "epoch": 0.02904, "grad_norm": 1.784568428993225, "learning_rate": 9.980303958414537e-06, "loss": 0.284, "step": 363 }, { "epoch": 0.02912, "grad_norm": 1.7334696054458618, "learning_rate": 9.980192281890535e-06, "loss": 0.3531, "step": 364 }, { "epoch": 0.0292, "grad_norm": 1.55849289894104, "learning_rate": 9.980080290286434e-06, "loss": 0.3523, "step": 365 }, { "epoch": 0.02928, "grad_norm": 1.8714348077774048, "learning_rate": 9.979967983609313e-06, "loss": 0.315, "step": 366 }, { "epoch": 0.02936, "grad_norm": 1.5885354280471802, "learning_rate": 9.979855361866283e-06, "loss": 0.3596, "step": 367 }, { "epoch": 0.02944, "grad_norm": 1.5087720155715942, "learning_rate": 9.979742425064467e-06, "loss": 0.4048, "step": 368 }, { "epoch": 0.02952, "grad_norm": 1.7668178081512451, "learning_rate": 9.97962917321101e-06, "loss": 0.3881, "step": 369 }, { "epoch": 0.0296, "grad_norm": 1.5784231424331665, "learning_rate": 9.979515606313074e-06, "loss": 0.3335, "step": 370 }, { "epoch": 0.02968, "grad_norm": 2.16629695892334, "learning_rate": 9.97940172437785e-06, "loss": 0.5286, "step": 371 }, { "epoch": 0.02976, "grad_norm": 2.021318197250366, "learning_rate": 9.979287527412541e-06, "loss": 0.3768, "step": 372 }, { "epoch": 0.02984, "grad_norm": 2.232651710510254, "learning_rate": 9.979173015424369e-06, "loss": 0.517, "step": 373 }, { "epoch": 0.02992, "grad_norm": 1.7764800786972046, "learning_rate": 9.979058188420581e-06, "loss": 0.3594, "step": 374 }, { "epoch": 0.03, "grad_norm": 1.5262402296066284, "learning_rate": 9.978943046408442e-06, "loss": 0.3362, "step": 375 }, { "epoch": 0.03008, "grad_norm": 1.5017170906066895, "learning_rate": 9.978827589395238e-06, "loss": 0.3163, "step": 376 }, { "epoch": 0.03016, "grad_norm": 1.7568035125732422, "learning_rate": 9.978711817388266e-06, "loss": 0.3264, "step": 377 }, { "epoch": 0.03024, "grad_norm": 1.738759160041809, "learning_rate": 9.978595730394861e-06, "loss": 0.4094, "step": 378 }, { "epoch": 0.03032, "grad_norm": 1.2938789129257202, "learning_rate": 9.978479328422362e-06, "loss": 0.2659, "step": 379 }, { "epoch": 0.0304, "grad_norm": 1.6452093124389648, "learning_rate": 9.978362611478132e-06, "loss": 0.3264, "step": 380 }, { "epoch": 0.03048, "grad_norm": 1.610556721687317, "learning_rate": 9.978245579569558e-06, "loss": 0.3351, "step": 381 }, { "epoch": 0.03056, "grad_norm": 1.4417246580123901, "learning_rate": 9.978128232704044e-06, "loss": 0.3223, "step": 382 }, { "epoch": 0.03064, "grad_norm": 1.9182772636413574, "learning_rate": 9.978010570889013e-06, "loss": 0.3542, "step": 383 }, { "epoch": 0.03072, "grad_norm": 2.3410074710845947, "learning_rate": 9.97789259413191e-06, "loss": 0.3877, "step": 384 }, { "epoch": 0.0308, "grad_norm": 1.9133309125900269, "learning_rate": 9.977774302440199e-06, "loss": 0.43, "step": 385 }, { "epoch": 0.03088, "grad_norm": 1.8875223398208618, "learning_rate": 9.977655695821362e-06, "loss": 0.4885, "step": 386 }, { "epoch": 0.03096, "grad_norm": 1.7378370761871338, "learning_rate": 9.977536774282906e-06, "loss": 0.4246, "step": 387 }, { "epoch": 0.03104, "grad_norm": 1.8936777114868164, "learning_rate": 9.977417537832352e-06, "loss": 0.3571, "step": 388 }, { "epoch": 0.03112, "grad_norm": 1.7290297746658325, "learning_rate": 9.977297986477246e-06, "loss": 0.3553, "step": 389 }, { "epoch": 0.0312, "grad_norm": 1.4241451025009155, "learning_rate": 9.977178120225151e-06, "loss": 0.2923, "step": 390 }, { "epoch": 0.03128, "grad_norm": 1.917623519897461, "learning_rate": 9.977057939083648e-06, "loss": 0.3546, "step": 391 }, { "epoch": 0.03136, "grad_norm": 1.762268304824829, "learning_rate": 9.976937443060343e-06, "loss": 0.386, "step": 392 }, { "epoch": 0.03144, "grad_norm": 1.6032752990722656, "learning_rate": 9.97681663216286e-06, "loss": 0.3461, "step": 393 }, { "epoch": 0.03152, "grad_norm": 1.9459599256515503, "learning_rate": 9.97669550639884e-06, "loss": 0.4753, "step": 394 }, { "epoch": 0.0316, "grad_norm": 2.0758650302886963, "learning_rate": 9.97657406577595e-06, "loss": 0.4122, "step": 395 }, { "epoch": 0.03168, "grad_norm": 1.4958903789520264, "learning_rate": 9.976452310301867e-06, "loss": 0.3139, "step": 396 }, { "epoch": 0.03176, "grad_norm": 1.7065391540527344, "learning_rate": 9.9763302399843e-06, "loss": 0.3258, "step": 397 }, { "epoch": 0.03184, "grad_norm": 1.7780778408050537, "learning_rate": 9.976207854830968e-06, "loss": 0.3458, "step": 398 }, { "epoch": 0.03192, "grad_norm": 1.318264365196228, "learning_rate": 9.976085154849617e-06, "loss": 0.3148, "step": 399 }, { "epoch": 0.032, "grad_norm": 2.011800527572632, "learning_rate": 9.975962140048007e-06, "loss": 0.3416, "step": 400 }, { "epoch": 0.03208, "grad_norm": 1.5037428140640259, "learning_rate": 9.975838810433922e-06, "loss": 0.3242, "step": 401 }, { "epoch": 0.03216, "grad_norm": 1.8600457906723022, "learning_rate": 9.975715166015165e-06, "loss": 0.3643, "step": 402 }, { "epoch": 0.03224, "grad_norm": 1.61443293094635, "learning_rate": 9.975591206799559e-06, "loss": 0.357, "step": 403 }, { "epoch": 0.03232, "grad_norm": 1.9129692316055298, "learning_rate": 9.975466932794943e-06, "loss": 0.4214, "step": 404 }, { "epoch": 0.0324, "grad_norm": 1.9851549863815308, "learning_rate": 9.975342344009186e-06, "loss": 0.4749, "step": 405 }, { "epoch": 0.03248, "grad_norm": 1.711588978767395, "learning_rate": 9.975217440450164e-06, "loss": 0.32, "step": 406 }, { "epoch": 0.03256, "grad_norm": 1.4885379076004028, "learning_rate": 9.975092222125783e-06, "loss": 0.3246, "step": 407 }, { "epoch": 0.03264, "grad_norm": 1.6126595735549927, "learning_rate": 9.974966689043963e-06, "loss": 0.3292, "step": 408 }, { "epoch": 0.03272, "grad_norm": 1.5870492458343506, "learning_rate": 9.974840841212648e-06, "loss": 0.3787, "step": 409 }, { "epoch": 0.0328, "grad_norm": 1.4002914428710938, "learning_rate": 9.974714678639797e-06, "loss": 0.2722, "step": 410 }, { "epoch": 0.03288, "grad_norm": 2.1911635398864746, "learning_rate": 9.974588201333394e-06, "loss": 0.4562, "step": 411 }, { "epoch": 0.03296, "grad_norm": 1.665370225906372, "learning_rate": 9.974461409301442e-06, "loss": 0.4463, "step": 412 }, { "epoch": 0.03304, "grad_norm": 4.2547736167907715, "learning_rate": 9.97433430255196e-06, "loss": 0.4829, "step": 413 }, { "epoch": 0.03312, "grad_norm": 2.003357410430908, "learning_rate": 9.97420688109299e-06, "loss": 0.2167, "step": 414 }, { "epoch": 0.0332, "grad_norm": 5.256060600280762, "learning_rate": 9.974079144932596e-06, "loss": 0.3008, "step": 415 }, { "epoch": 0.03328, "grad_norm": 2.828409433364868, "learning_rate": 9.973951094078857e-06, "loss": 0.3806, "step": 416 }, { "epoch": 0.03336, "grad_norm": 3.722662925720215, "learning_rate": 9.973822728539876e-06, "loss": 0.4364, "step": 417 }, { "epoch": 0.03344, "grad_norm": 2.073556900024414, "learning_rate": 9.973694048323773e-06, "loss": 0.3991, "step": 418 }, { "epoch": 0.03352, "grad_norm": 1.1159542798995972, "learning_rate": 9.97356505343869e-06, "loss": 0.2556, "step": 419 }, { "epoch": 0.0336, "grad_norm": 1.9902396202087402, "learning_rate": 9.973435743892787e-06, "loss": 0.3929, "step": 420 }, { "epoch": 0.03368, "grad_norm": 1.4526017904281616, "learning_rate": 9.973306119694246e-06, "loss": 0.3209, "step": 421 }, { "epoch": 0.03376, "grad_norm": 1.8279871940612793, "learning_rate": 9.973176180851267e-06, "loss": 0.4486, "step": 422 }, { "epoch": 0.03384, "grad_norm": 1.1028552055358887, "learning_rate": 9.973045927372071e-06, "loss": 0.2656, "step": 423 }, { "epoch": 0.03392, "grad_norm": 1.7597603797912598, "learning_rate": 9.972915359264901e-06, "loss": 0.3717, "step": 424 }, { "epoch": 0.034, "grad_norm": 1.9462575912475586, "learning_rate": 9.972784476538014e-06, "loss": 0.3828, "step": 425 }, { "epoch": 0.03408, "grad_norm": 2.132319927215576, "learning_rate": 9.972653279199693e-06, "loss": 0.4109, "step": 426 }, { "epoch": 0.03416, "grad_norm": 1.7720433473587036, "learning_rate": 9.972521767258236e-06, "loss": 0.3781, "step": 427 }, { "epoch": 0.03424, "grad_norm": 1.556762933731079, "learning_rate": 9.972389940721967e-06, "loss": 0.3173, "step": 428 }, { "epoch": 0.03432, "grad_norm": 1.8000061511993408, "learning_rate": 9.972257799599223e-06, "loss": 0.4679, "step": 429 }, { "epoch": 0.0344, "grad_norm": 1.4809861183166504, "learning_rate": 9.972125343898366e-06, "loss": 0.3235, "step": 430 }, { "epoch": 0.03448, "grad_norm": 1.4493865966796875, "learning_rate": 9.971992573627775e-06, "loss": 0.2701, "step": 431 }, { "epoch": 0.03456, "grad_norm": 1.4800419807434082, "learning_rate": 9.971859488795849e-06, "loss": 0.2702, "step": 432 }, { "epoch": 0.03464, "grad_norm": 1.9442113637924194, "learning_rate": 9.97172608941101e-06, "loss": 0.372, "step": 433 }, { "epoch": 0.03472, "grad_norm": 2.0416910648345947, "learning_rate": 9.971592375481697e-06, "loss": 0.4425, "step": 434 }, { "epoch": 0.0348, "grad_norm": 1.7712527513504028, "learning_rate": 9.971458347016369e-06, "loss": 0.4006, "step": 435 }, { "epoch": 0.03488, "grad_norm": 1.808829426765442, "learning_rate": 9.971324004023506e-06, "loss": 0.3774, "step": 436 }, { "epoch": 0.03496, "grad_norm": 1.4706016778945923, "learning_rate": 9.971189346511608e-06, "loss": 0.3434, "step": 437 }, { "epoch": 0.03504, "grad_norm": 1.7320775985717773, "learning_rate": 9.971054374489193e-06, "loss": 0.3452, "step": 438 }, { "epoch": 0.03512, "grad_norm": 1.6511974334716797, "learning_rate": 9.9709190879648e-06, "loss": 0.3614, "step": 439 }, { "epoch": 0.0352, "grad_norm": 1.4348496198654175, "learning_rate": 9.970783486946991e-06, "loss": 0.3445, "step": 440 }, { "epoch": 0.03528, "grad_norm": 1.5991486310958862, "learning_rate": 9.970647571444341e-06, "loss": 0.4998, "step": 441 }, { "epoch": 0.03536, "grad_norm": 1.3900196552276611, "learning_rate": 9.970511341465453e-06, "loss": 0.3307, "step": 442 }, { "epoch": 0.03544, "grad_norm": 1.249131202697754, "learning_rate": 9.970374797018942e-06, "loss": 0.2656, "step": 443 }, { "epoch": 0.03552, "grad_norm": 1.7488571405410767, "learning_rate": 9.97023793811345e-06, "loss": 0.3783, "step": 444 }, { "epoch": 0.0356, "grad_norm": 1.5800598859786987, "learning_rate": 9.970100764757635e-06, "loss": 0.2901, "step": 445 }, { "epoch": 0.03568, "grad_norm": 1.4809678792953491, "learning_rate": 9.969963276960173e-06, "loss": 0.3579, "step": 446 }, { "epoch": 0.03576, "grad_norm": 1.9391019344329834, "learning_rate": 9.969825474729763e-06, "loss": 0.3893, "step": 447 }, { "epoch": 0.03584, "grad_norm": 1.4065037965774536, "learning_rate": 9.969687358075126e-06, "loss": 0.3532, "step": 448 }, { "epoch": 0.03592, "grad_norm": 1.735000729560852, "learning_rate": 9.969548927004998e-06, "loss": 0.3295, "step": 449 }, { "epoch": 0.036, "grad_norm": 1.9082682132720947, "learning_rate": 9.969410181528138e-06, "loss": 0.3841, "step": 450 }, { "epoch": 0.03608, "grad_norm": 1.740655779838562, "learning_rate": 9.969271121653323e-06, "loss": 0.3545, "step": 451 }, { "epoch": 0.03616, "grad_norm": 2.0228629112243652, "learning_rate": 9.96913174738935e-06, "loss": 0.4113, "step": 452 }, { "epoch": 0.03624, "grad_norm": 1.7781933546066284, "learning_rate": 9.96899205874504e-06, "loss": 0.4382, "step": 453 }, { "epoch": 0.03632, "grad_norm": 2.111398696899414, "learning_rate": 9.968852055729229e-06, "loss": 0.416, "step": 454 }, { "epoch": 0.0364, "grad_norm": 1.8158979415893555, "learning_rate": 9.968711738350773e-06, "loss": 0.486, "step": 455 }, { "epoch": 0.03648, "grad_norm": 1.4151747226715088, "learning_rate": 9.968571106618551e-06, "loss": 0.3059, "step": 456 }, { "epoch": 0.03656, "grad_norm": 1.4982035160064697, "learning_rate": 9.968430160541461e-06, "loss": 0.282, "step": 457 }, { "epoch": 0.03664, "grad_norm": 1.3661582469940186, "learning_rate": 9.96828890012842e-06, "loss": 0.3013, "step": 458 }, { "epoch": 0.03672, "grad_norm": 1.747228980064392, "learning_rate": 9.968147325388363e-06, "loss": 0.3509, "step": 459 }, { "epoch": 0.0368, "grad_norm": 1.706491231918335, "learning_rate": 9.968005436330246e-06, "loss": 0.4495, "step": 460 }, { "epoch": 0.03688, "grad_norm": 1.7066677808761597, "learning_rate": 9.96786323296305e-06, "loss": 0.3797, "step": 461 }, { "epoch": 0.03696, "grad_norm": 2.126333475112915, "learning_rate": 9.96772071529577e-06, "loss": 0.4699, "step": 462 }, { "epoch": 0.03704, "grad_norm": 2.0231945514678955, "learning_rate": 9.967577883337421e-06, "loss": 0.4655, "step": 463 }, { "epoch": 0.03712, "grad_norm": 1.7993510961532593, "learning_rate": 9.967434737097043e-06, "loss": 0.3497, "step": 464 }, { "epoch": 0.0372, "grad_norm": 2.108503580093384, "learning_rate": 9.967291276583688e-06, "loss": 0.3369, "step": 465 }, { "epoch": 0.03728, "grad_norm": 1.6675549745559692, "learning_rate": 9.967147501806436e-06, "loss": 0.4097, "step": 466 }, { "epoch": 0.03736, "grad_norm": 1.325800895690918, "learning_rate": 9.967003412774381e-06, "loss": 0.2744, "step": 467 }, { "epoch": 0.03744, "grad_norm": 1.9661016464233398, "learning_rate": 9.966859009496641e-06, "loss": 0.3642, "step": 468 }, { "epoch": 0.03752, "grad_norm": 1.5406103134155273, "learning_rate": 9.966714291982349e-06, "loss": 0.2844, "step": 469 }, { "epoch": 0.0376, "grad_norm": 1.752842903137207, "learning_rate": 9.966569260240664e-06, "loss": 0.5024, "step": 470 }, { "epoch": 0.03768, "grad_norm": 1.2587329149246216, "learning_rate": 9.966423914280758e-06, "loss": 0.2523, "step": 471 }, { "epoch": 0.03776, "grad_norm": 1.629905104637146, "learning_rate": 9.96627825411183e-06, "loss": 0.3229, "step": 472 }, { "epoch": 0.03784, "grad_norm": 1.8891979455947876, "learning_rate": 9.966132279743095e-06, "loss": 0.3933, "step": 473 }, { "epoch": 0.03792, "grad_norm": 1.3782880306243896, "learning_rate": 9.965985991183787e-06, "loss": 0.3091, "step": 474 }, { "epoch": 0.038, "grad_norm": 1.4807788133621216, "learning_rate": 9.96583938844316e-06, "loss": 0.2939, "step": 475 }, { "epoch": 0.03808, "grad_norm": 2.111734390258789, "learning_rate": 9.965692471530492e-06, "loss": 0.3984, "step": 476 }, { "epoch": 0.03816, "grad_norm": 1.6450163125991821, "learning_rate": 9.965545240455077e-06, "loss": 0.3763, "step": 477 }, { "epoch": 0.03824, "grad_norm": 1.8153711557388306, "learning_rate": 9.965397695226228e-06, "loss": 0.5331, "step": 478 }, { "epoch": 0.03832, "grad_norm": 1.3268543481826782, "learning_rate": 9.965249835853281e-06, "loss": 0.3017, "step": 479 }, { "epoch": 0.0384, "grad_norm": 1.573397159576416, "learning_rate": 9.96510166234559e-06, "loss": 0.3174, "step": 480 }, { "epoch": 0.03848, "grad_norm": 1.8132137060165405, "learning_rate": 9.964953174712533e-06, "loss": 0.3517, "step": 481 }, { "epoch": 0.03856, "grad_norm": 1.5276272296905518, "learning_rate": 9.9648043729635e-06, "loss": 0.3427, "step": 482 }, { "epoch": 0.03864, "grad_norm": 1.4888263940811157, "learning_rate": 9.964655257107906e-06, "loss": 0.3782, "step": 483 }, { "epoch": 0.03872, "grad_norm": 1.5089031457901, "learning_rate": 9.964505827155186e-06, "loss": 0.3162, "step": 484 }, { "epoch": 0.0388, "grad_norm": 1.7417253255844116, "learning_rate": 9.964356083114795e-06, "loss": 0.4875, "step": 485 }, { "epoch": 0.03888, "grad_norm": 1.554205298423767, "learning_rate": 9.964206024996203e-06, "loss": 0.3795, "step": 486 }, { "epoch": 0.03896, "grad_norm": 2.758025884628296, "learning_rate": 9.964055652808908e-06, "loss": 0.5799, "step": 487 }, { "epoch": 0.03904, "grad_norm": 1.5787456035614014, "learning_rate": 9.96390496656242e-06, "loss": 0.3235, "step": 488 }, { "epoch": 0.03912, "grad_norm": 1.6550320386886597, "learning_rate": 9.963753966266276e-06, "loss": 0.3013, "step": 489 }, { "epoch": 0.0392, "grad_norm": 1.0665961503982544, "learning_rate": 9.963602651930027e-06, "loss": 0.2318, "step": 490 }, { "epoch": 0.03928, "grad_norm": 1.4649837017059326, "learning_rate": 9.963451023563245e-06, "loss": 0.3583, "step": 491 }, { "epoch": 0.03936, "grad_norm": 1.462937831878662, "learning_rate": 9.963299081175525e-06, "loss": 0.308, "step": 492 }, { "epoch": 0.03944, "grad_norm": 1.6257414817810059, "learning_rate": 9.96314682477648e-06, "loss": 0.3596, "step": 493 }, { "epoch": 0.03952, "grad_norm": 2.3487870693206787, "learning_rate": 9.962994254375742e-06, "loss": 0.4331, "step": 494 }, { "epoch": 0.0396, "grad_norm": 2.4731879234313965, "learning_rate": 9.962841369982962e-06, "loss": 0.4484, "step": 495 }, { "epoch": 0.03968, "grad_norm": 1.8673752546310425, "learning_rate": 9.962688171607817e-06, "loss": 0.3284, "step": 496 }, { "epoch": 0.03976, "grad_norm": 1.7251307964324951, "learning_rate": 9.962534659259995e-06, "loss": 0.2836, "step": 497 }, { "epoch": 0.03984, "grad_norm": 2.203010320663452, "learning_rate": 9.96238083294921e-06, "loss": 0.3871, "step": 498 }, { "epoch": 0.03992, "grad_norm": 1.6219152212142944, "learning_rate": 9.962226692685195e-06, "loss": 0.333, "step": 499 }, { "epoch": 0.04, "grad_norm": 1.6756365299224854, "learning_rate": 9.962072238477699e-06, "loss": 0.2864, "step": 500 }, { "epoch": 0.04008, "grad_norm": 1.6424753665924072, "learning_rate": 9.961917470336496e-06, "loss": 0.3253, "step": 501 }, { "epoch": 0.04016, "grad_norm": 1.5344758033752441, "learning_rate": 9.961762388271378e-06, "loss": 0.3293, "step": 502 }, { "epoch": 0.04024, "grad_norm": 1.1364933252334595, "learning_rate": 9.961606992292155e-06, "loss": 0.2658, "step": 503 }, { "epoch": 0.04032, "grad_norm": 1.397683024406433, "learning_rate": 9.96145128240866e-06, "loss": 0.2682, "step": 504 }, { "epoch": 0.0404, "grad_norm": 1.7218232154846191, "learning_rate": 9.961295258630743e-06, "loss": 0.3327, "step": 505 }, { "epoch": 0.04048, "grad_norm": 1.204432487487793, "learning_rate": 9.961138920968276e-06, "loss": 0.2728, "step": 506 }, { "epoch": 0.04056, "grad_norm": 1.273925542831421, "learning_rate": 9.960982269431149e-06, "loss": 0.2972, "step": 507 }, { "epoch": 0.04064, "grad_norm": 1.682013750076294, "learning_rate": 9.960825304029274e-06, "loss": 0.3276, "step": 508 }, { "epoch": 0.04072, "grad_norm": 1.4792194366455078, "learning_rate": 9.96066802477258e-06, "loss": 0.4057, "step": 509 }, { "epoch": 0.0408, "grad_norm": 2.17596173286438, "learning_rate": 9.96051043167102e-06, "loss": 0.5181, "step": 510 }, { "epoch": 0.04088, "grad_norm": 1.6542383432388306, "learning_rate": 9.960352524734562e-06, "loss": 0.3988, "step": 511 }, { "epoch": 0.04096, "grad_norm": 1.9632936716079712, "learning_rate": 9.960194303973196e-06, "loss": 0.3267, "step": 512 }, { "epoch": 0.04104, "grad_norm": 1.4410243034362793, "learning_rate": 9.960035769396935e-06, "loss": 0.2683, "step": 513 }, { "epoch": 0.04112, "grad_norm": 1.5401276350021362, "learning_rate": 9.959876921015805e-06, "loss": 0.2925, "step": 514 }, { "epoch": 0.0412, "grad_norm": 1.6564226150512695, "learning_rate": 9.95971775883986e-06, "loss": 0.33, "step": 515 }, { "epoch": 0.04128, "grad_norm": 1.4229389429092407, "learning_rate": 9.959558282879167e-06, "loss": 0.2565, "step": 516 }, { "epoch": 0.04136, "grad_norm": 1.524813175201416, "learning_rate": 9.959398493143816e-06, "loss": 0.3145, "step": 517 }, { "epoch": 0.04144, "grad_norm": 1.274613380432129, "learning_rate": 9.959238389643918e-06, "loss": 0.2743, "step": 518 }, { "epoch": 0.04152, "grad_norm": 1.9126328229904175, "learning_rate": 9.9590779723896e-06, "loss": 0.3763, "step": 519 }, { "epoch": 0.0416, "grad_norm": 1.5458030700683594, "learning_rate": 9.95891724139101e-06, "loss": 0.313, "step": 520 }, { "epoch": 0.04168, "grad_norm": 1.80939781665802, "learning_rate": 9.958756196658321e-06, "loss": 0.3907, "step": 521 }, { "epoch": 0.04176, "grad_norm": 1.551069974899292, "learning_rate": 9.958594838201719e-06, "loss": 0.3031, "step": 522 }, { "epoch": 0.04184, "grad_norm": 1.9089337587356567, "learning_rate": 9.958433166031412e-06, "loss": 0.3787, "step": 523 }, { "epoch": 0.04192, "grad_norm": 1.5611118078231812, "learning_rate": 9.958271180157631e-06, "loss": 0.3589, "step": 524 }, { "epoch": 0.042, "grad_norm": 1.8825455904006958, "learning_rate": 9.958108880590623e-06, "loss": 0.4088, "step": 525 }, { "epoch": 0.04208, "grad_norm": 1.9425339698791504, "learning_rate": 9.957946267340655e-06, "loss": 0.2773, "step": 526 }, { "epoch": 0.04216, "grad_norm": 1.547389030456543, "learning_rate": 9.957783340418015e-06, "loss": 0.2913, "step": 527 }, { "epoch": 0.04224, "grad_norm": 1.622795820236206, "learning_rate": 9.957620099833013e-06, "loss": 0.3456, "step": 528 }, { "epoch": 0.04232, "grad_norm": 1.9623744487762451, "learning_rate": 9.957456545595977e-06, "loss": 0.3924, "step": 529 }, { "epoch": 0.0424, "grad_norm": 1.489332675933838, "learning_rate": 9.95729267771725e-06, "loss": 0.3155, "step": 530 }, { "epoch": 0.04248, "grad_norm": 1.6495463848114014, "learning_rate": 9.957128496207205e-06, "loss": 0.3544, "step": 531 }, { "epoch": 0.04256, "grad_norm": 1.5633338689804077, "learning_rate": 9.956964001076224e-06, "loss": 0.3408, "step": 532 }, { "epoch": 0.04264, "grad_norm": 2.2354142665863037, "learning_rate": 9.95679919233472e-06, "loss": 0.4222, "step": 533 }, { "epoch": 0.04272, "grad_norm": 1.6407157182693481, "learning_rate": 9.956634069993114e-06, "loss": 0.4222, "step": 534 }, { "epoch": 0.0428, "grad_norm": 1.805367112159729, "learning_rate": 9.956468634061857e-06, "loss": 0.3485, "step": 535 }, { "epoch": 0.04288, "grad_norm": 1.9591755867004395, "learning_rate": 9.956302884551413e-06, "loss": 0.397, "step": 536 }, { "epoch": 0.04296, "grad_norm": 2.0101656913757324, "learning_rate": 9.956136821472269e-06, "loss": 0.5719, "step": 537 }, { "epoch": 0.04304, "grad_norm": 2.0787875652313232, "learning_rate": 9.955970444834933e-06, "loss": 0.4123, "step": 538 }, { "epoch": 0.04312, "grad_norm": 1.7366102933883667, "learning_rate": 9.955803754649929e-06, "loss": 0.3547, "step": 539 }, { "epoch": 0.0432, "grad_norm": 1.8972814083099365, "learning_rate": 9.955636750927803e-06, "loss": 0.3922, "step": 540 }, { "epoch": 0.04328, "grad_norm": 1.79804265499115, "learning_rate": 9.95546943367912e-06, "loss": 0.4213, "step": 541 }, { "epoch": 0.04336, "grad_norm": 1.6599233150482178, "learning_rate": 9.955301802914471e-06, "loss": 0.3559, "step": 542 }, { "epoch": 0.04344, "grad_norm": 1.4300049543380737, "learning_rate": 9.955133858644455e-06, "loss": 0.3159, "step": 543 }, { "epoch": 0.04352, "grad_norm": 1.2100526094436646, "learning_rate": 9.9549656008797e-06, "loss": 0.2633, "step": 544 }, { "epoch": 0.0436, "grad_norm": 1.8038049936294556, "learning_rate": 9.95479702963085e-06, "loss": 0.4075, "step": 545 }, { "epoch": 0.04368, "grad_norm": 2.2004456520080566, "learning_rate": 9.954628144908573e-06, "loss": 0.516, "step": 546 }, { "epoch": 0.04376, "grad_norm": 1.4208917617797852, "learning_rate": 9.95445894672355e-06, "loss": 0.3567, "step": 547 }, { "epoch": 0.04384, "grad_norm": 1.8718032836914062, "learning_rate": 9.954289435086487e-06, "loss": 0.4532, "step": 548 }, { "epoch": 0.04392, "grad_norm": 1.7566274404525757, "learning_rate": 9.954119610008108e-06, "loss": 0.3248, "step": 549 }, { "epoch": 0.044, "grad_norm": 1.7727574110031128, "learning_rate": 9.953949471499157e-06, "loss": 0.3495, "step": 550 }, { "epoch": 0.04408, "grad_norm": 1.5641764402389526, "learning_rate": 9.953779019570402e-06, "loss": 0.3757, "step": 551 }, { "epoch": 0.04416, "grad_norm": 1.4524203538894653, "learning_rate": 9.953608254232622e-06, "loss": 0.3353, "step": 552 }, { "epoch": 0.04424, "grad_norm": 1.6970480680465698, "learning_rate": 9.953437175496622e-06, "loss": 0.3662, "step": 553 }, { "epoch": 0.04432, "grad_norm": 2.238062858581543, "learning_rate": 9.953265783373227e-06, "loss": 0.4972, "step": 554 }, { "epoch": 0.0444, "grad_norm": 1.676147699356079, "learning_rate": 9.95309407787328e-06, "loss": 0.3475, "step": 555 }, { "epoch": 0.04448, "grad_norm": 1.4633108377456665, "learning_rate": 9.952922059007643e-06, "loss": 0.3406, "step": 556 }, { "epoch": 0.04456, "grad_norm": 1.5241601467132568, "learning_rate": 9.952749726787201e-06, "loss": 0.2829, "step": 557 }, { "epoch": 0.04464, "grad_norm": 2.0277199745178223, "learning_rate": 9.952577081222854e-06, "loss": 0.3451, "step": 558 }, { "epoch": 0.04472, "grad_norm": 1.724655032157898, "learning_rate": 9.952404122325528e-06, "loss": 0.4065, "step": 559 }, { "epoch": 0.0448, "grad_norm": 1.4190020561218262, "learning_rate": 9.952230850106164e-06, "loss": 0.3292, "step": 560 }, { "epoch": 0.04488, "grad_norm": 1.4401750564575195, "learning_rate": 9.952057264575723e-06, "loss": 0.3097, "step": 561 }, { "epoch": 0.04496, "grad_norm": 2.137566328048706, "learning_rate": 9.95188336574519e-06, "loss": 0.4065, "step": 562 }, { "epoch": 0.04504, "grad_norm": 1.5885168313980103, "learning_rate": 9.951709153625564e-06, "loss": 0.2592, "step": 563 }, { "epoch": 0.04512, "grad_norm": 1.431435227394104, "learning_rate": 9.951534628227868e-06, "loss": 0.4273, "step": 564 }, { "epoch": 0.0452, "grad_norm": 1.6501264572143555, "learning_rate": 9.951359789563145e-06, "loss": 0.3777, "step": 565 }, { "epoch": 0.04528, "grad_norm": 2.1749095916748047, "learning_rate": 9.951184637642456e-06, "loss": 0.461, "step": 566 }, { "epoch": 0.04536, "grad_norm": 1.7694364786148071, "learning_rate": 9.95100917247688e-06, "loss": 0.3626, "step": 567 }, { "epoch": 0.04544, "grad_norm": 1.836769938468933, "learning_rate": 9.950833394077522e-06, "loss": 0.3411, "step": 568 }, { "epoch": 0.04552, "grad_norm": 1.6859595775604248, "learning_rate": 9.9506573024555e-06, "loss": 0.3591, "step": 569 }, { "epoch": 0.0456, "grad_norm": 2.330047130584717, "learning_rate": 9.950480897621955e-06, "loss": 0.6001, "step": 570 }, { "epoch": 0.04568, "grad_norm": 1.5222753286361694, "learning_rate": 9.950304179588047e-06, "loss": 0.3455, "step": 571 }, { "epoch": 0.04576, "grad_norm": 1.7251989841461182, "learning_rate": 9.950127148364958e-06, "loss": 0.365, "step": 572 }, { "epoch": 0.04584, "grad_norm": 1.6580973863601685, "learning_rate": 9.949949803963887e-06, "loss": 0.3802, "step": 573 }, { "epoch": 0.04592, "grad_norm": 1.8021025657653809, "learning_rate": 9.949772146396056e-06, "loss": 0.4321, "step": 574 }, { "epoch": 0.046, "grad_norm": 1.5739644765853882, "learning_rate": 9.949594175672703e-06, "loss": 0.3246, "step": 575 }, { "epoch": 0.04608, "grad_norm": 2.065218925476074, "learning_rate": 9.949415891805087e-06, "loss": 0.4126, "step": 576 }, { "epoch": 0.04616, "grad_norm": 1.7216342687606812, "learning_rate": 9.949237294804489e-06, "loss": 0.4876, "step": 577 }, { "epoch": 0.04624, "grad_norm": 1.3691469430923462, "learning_rate": 9.949058384682206e-06, "loss": 0.2763, "step": 578 }, { "epoch": 0.04632, "grad_norm": 1.4548450708389282, "learning_rate": 9.94887916144956e-06, "loss": 0.3798, "step": 579 }, { "epoch": 0.0464, "grad_norm": 1.7984998226165771, "learning_rate": 9.948699625117888e-06, "loss": 0.4941, "step": 580 }, { "epoch": 0.04648, "grad_norm": 1.7592239379882812, "learning_rate": 9.948519775698551e-06, "loss": 0.3491, "step": 581 }, { "epoch": 0.04656, "grad_norm": 1.7368760108947754, "learning_rate": 9.948339613202923e-06, "loss": 0.401, "step": 582 }, { "epoch": 0.04664, "grad_norm": 1.7027363777160645, "learning_rate": 9.948159137642407e-06, "loss": 0.38, "step": 583 }, { "epoch": 0.04672, "grad_norm": 2.045701503753662, "learning_rate": 9.947978349028418e-06, "loss": 0.4475, "step": 584 }, { "epoch": 0.0468, "grad_norm": 1.5346065759658813, "learning_rate": 9.947797247372394e-06, "loss": 0.3696, "step": 585 }, { "epoch": 0.04688, "grad_norm": 1.5219029188156128, "learning_rate": 9.947615832685795e-06, "loss": 0.2784, "step": 586 }, { "epoch": 0.04696, "grad_norm": 1.4893606901168823, "learning_rate": 9.947434104980097e-06, "loss": 0.33, "step": 587 }, { "epoch": 0.04704, "grad_norm": 1.7347660064697266, "learning_rate": 9.947252064266796e-06, "loss": 0.3985, "step": 588 }, { "epoch": 0.04712, "grad_norm": 1.6877951622009277, "learning_rate": 9.947069710557412e-06, "loss": 0.3609, "step": 589 }, { "epoch": 0.0472, "grad_norm": 1.5663824081420898, "learning_rate": 9.946887043863478e-06, "loss": 0.3961, "step": 590 }, { "epoch": 0.04728, "grad_norm": 1.6489958763122559, "learning_rate": 9.946704064196554e-06, "loss": 0.4324, "step": 591 }, { "epoch": 0.04736, "grad_norm": 1.8338820934295654, "learning_rate": 9.946520771568217e-06, "loss": 0.3133, "step": 592 }, { "epoch": 0.04744, "grad_norm": 1.592786192893982, "learning_rate": 9.946337165990061e-06, "loss": 0.3564, "step": 593 }, { "epoch": 0.04752, "grad_norm": 1.8404992818832397, "learning_rate": 9.946153247473702e-06, "loss": 0.3641, "step": 594 }, { "epoch": 0.0476, "grad_norm": 1.990356206893921, "learning_rate": 9.945969016030779e-06, "loss": 0.4124, "step": 595 }, { "epoch": 0.04768, "grad_norm": 1.7861665487289429, "learning_rate": 9.945784471672943e-06, "loss": 0.3725, "step": 596 }, { "epoch": 0.04776, "grad_norm": 2.0100088119506836, "learning_rate": 9.945599614411875e-06, "loss": 0.4765, "step": 597 }, { "epoch": 0.04784, "grad_norm": 1.882002830505371, "learning_rate": 9.945414444259267e-06, "loss": 0.3974, "step": 598 }, { "epoch": 0.04792, "grad_norm": 1.8743993043899536, "learning_rate": 9.945228961226832e-06, "loss": 0.3773, "step": 599 }, { "epoch": 0.048, "grad_norm": 1.5349911451339722, "learning_rate": 9.94504316532631e-06, "loss": 0.2909, "step": 600 }, { "epoch": 0.04808, "grad_norm": 1.8333827257156372, "learning_rate": 9.944857056569452e-06, "loss": 0.3577, "step": 601 }, { "epoch": 0.04816, "grad_norm": 1.9618743658065796, "learning_rate": 9.944670634968033e-06, "loss": 0.4357, "step": 602 }, { "epoch": 0.04824, "grad_norm": 1.9149047136306763, "learning_rate": 9.944483900533848e-06, "loss": 0.4081, "step": 603 }, { "epoch": 0.04832, "grad_norm": 2.0370891094207764, "learning_rate": 9.944296853278712e-06, "loss": 0.3549, "step": 604 }, { "epoch": 0.0484, "grad_norm": 2.002598524093628, "learning_rate": 9.944109493214458e-06, "loss": 0.3807, "step": 605 }, { "epoch": 0.04848, "grad_norm": 1.196305274963379, "learning_rate": 9.943921820352938e-06, "loss": 0.2734, "step": 606 }, { "epoch": 0.04856, "grad_norm": 1.8980247974395752, "learning_rate": 9.943733834706027e-06, "loss": 0.45, "step": 607 }, { "epoch": 0.04864, "grad_norm": 1.8210073709487915, "learning_rate": 9.94354553628562e-06, "loss": 0.4916, "step": 608 }, { "epoch": 0.04872, "grad_norm": 1.4517135620117188, "learning_rate": 9.943356925103625e-06, "loss": 0.3107, "step": 609 }, { "epoch": 0.0488, "grad_norm": 1.8347890377044678, "learning_rate": 9.94316800117198e-06, "loss": 0.4305, "step": 610 }, { "epoch": 0.04888, "grad_norm": 1.664490818977356, "learning_rate": 9.942978764502635e-06, "loss": 0.3694, "step": 611 }, { "epoch": 0.04896, "grad_norm": 1.5147761106491089, "learning_rate": 9.942789215107562e-06, "loss": 0.3826, "step": 612 }, { "epoch": 0.04904, "grad_norm": 1.5997848510742188, "learning_rate": 9.942599352998756e-06, "loss": 0.3296, "step": 613 }, { "epoch": 0.04912, "grad_norm": 1.7910867929458618, "learning_rate": 9.942409178188225e-06, "loss": 0.354, "step": 614 }, { "epoch": 0.0492, "grad_norm": 1.7603079080581665, "learning_rate": 9.942218690688004e-06, "loss": 0.486, "step": 615 }, { "epoch": 0.04928, "grad_norm": 1.4481785297393799, "learning_rate": 9.942027890510142e-06, "loss": 0.4026, "step": 616 }, { "epoch": 0.04936, "grad_norm": 1.7464784383773804, "learning_rate": 9.94183677766671e-06, "loss": 0.3778, "step": 617 }, { "epoch": 0.04944, "grad_norm": 1.5305639505386353, "learning_rate": 9.941645352169804e-06, "loss": 0.3281, "step": 618 }, { "epoch": 0.04952, "grad_norm": 1.8555560111999512, "learning_rate": 9.941453614031528e-06, "loss": 0.3575, "step": 619 }, { "epoch": 0.0496, "grad_norm": 1.9013909101486206, "learning_rate": 9.941261563264019e-06, "loss": 0.3648, "step": 620 }, { "epoch": 0.04968, "grad_norm": 1.7493908405303955, "learning_rate": 9.941069199879424e-06, "loss": 0.3552, "step": 621 }, { "epoch": 0.04976, "grad_norm": 1.6803377866744995, "learning_rate": 9.940876523889911e-06, "loss": 0.4728, "step": 622 }, { "epoch": 0.04984, "grad_norm": 1.9179245233535767, "learning_rate": 9.940683535307675e-06, "loss": 0.357, "step": 623 }, { "epoch": 0.04992, "grad_norm": 2.1097536087036133, "learning_rate": 9.940490234144923e-06, "loss": 0.4825, "step": 624 }, { "epoch": 0.05, "grad_norm": 1.8075186014175415, "learning_rate": 9.940296620413884e-06, "loss": 0.4851, "step": 625 }, { "epoch": 0.05008, "grad_norm": 1.5507980585098267, "learning_rate": 9.940102694126809e-06, "loss": 0.3254, "step": 626 }, { "epoch": 0.05016, "grad_norm": 2.0220351219177246, "learning_rate": 9.939908455295965e-06, "loss": 0.405, "step": 627 }, { "epoch": 0.05024, "grad_norm": 1.5802489519119263, "learning_rate": 9.939713903933644e-06, "loss": 0.3536, "step": 628 }, { "epoch": 0.05032, "grad_norm": 1.7642475366592407, "learning_rate": 9.93951904005215e-06, "loss": 0.3986, "step": 629 }, { "epoch": 0.0504, "grad_norm": 1.5196278095245361, "learning_rate": 9.939323863663817e-06, "loss": 0.3779, "step": 630 }, { "epoch": 0.05048, "grad_norm": 1.995604395866394, "learning_rate": 9.939128374780987e-06, "loss": 0.4138, "step": 631 }, { "epoch": 0.05056, "grad_norm": 1.300185203552246, "learning_rate": 9.938932573416033e-06, "loss": 0.3161, "step": 632 }, { "epoch": 0.05064, "grad_norm": 1.9059289693832397, "learning_rate": 9.938736459581341e-06, "loss": 0.3545, "step": 633 }, { "epoch": 0.05072, "grad_norm": 2.062704563140869, "learning_rate": 9.938540033289317e-06, "loss": 0.3853, "step": 634 }, { "epoch": 0.0508, "grad_norm": 1.7212554216384888, "learning_rate": 9.93834329455239e-06, "loss": 0.3571, "step": 635 }, { "epoch": 0.05088, "grad_norm": 1.2724170684814453, "learning_rate": 9.938146243383006e-06, "loss": 0.3076, "step": 636 }, { "epoch": 0.05096, "grad_norm": 1.4473823308944702, "learning_rate": 9.937948879793634e-06, "loss": 0.328, "step": 637 }, { "epoch": 0.05104, "grad_norm": 1.304731011390686, "learning_rate": 9.937751203796758e-06, "loss": 0.3388, "step": 638 }, { "epoch": 0.05112, "grad_norm": 1.8237764835357666, "learning_rate": 9.937553215404884e-06, "loss": 0.3978, "step": 639 }, { "epoch": 0.0512, "grad_norm": 1.4053940773010254, "learning_rate": 9.937354914630542e-06, "loss": 0.2694, "step": 640 }, { "epoch": 0.05128, "grad_norm": 1.79597806930542, "learning_rate": 9.937156301486273e-06, "loss": 0.4567, "step": 641 }, { "epoch": 0.05136, "grad_norm": 1.2079006433486938, "learning_rate": 9.936957375984644e-06, "loss": 0.2878, "step": 642 }, { "epoch": 0.05144, "grad_norm": 1.547292947769165, "learning_rate": 9.93675813813824e-06, "loss": 0.3161, "step": 643 }, { "epoch": 0.05152, "grad_norm": 1.8033218383789062, "learning_rate": 9.936558587959671e-06, "loss": 0.3995, "step": 644 }, { "epoch": 0.0516, "grad_norm": 1.5613384246826172, "learning_rate": 9.936358725461555e-06, "loss": 0.2709, "step": 645 }, { "epoch": 0.05168, "grad_norm": 1.7098636627197266, "learning_rate": 9.936158550656539e-06, "loss": 0.3536, "step": 646 }, { "epoch": 0.05176, "grad_norm": 1.7437068223953247, "learning_rate": 9.93595806355729e-06, "loss": 0.3261, "step": 647 }, { "epoch": 0.05184, "grad_norm": 1.75038480758667, "learning_rate": 9.93575726417649e-06, "loss": 0.3398, "step": 648 }, { "epoch": 0.05192, "grad_norm": 1.4043598175048828, "learning_rate": 9.93555615252684e-06, "loss": 0.3377, "step": 649 }, { "epoch": 0.052, "grad_norm": 1.877252221107483, "learning_rate": 9.935354728621069e-06, "loss": 0.3839, "step": 650 }, { "epoch": 0.05208, "grad_norm": 1.5518877506256104, "learning_rate": 9.935152992471918e-06, "loss": 0.4235, "step": 651 }, { "epoch": 0.05216, "grad_norm": 1.384873390197754, "learning_rate": 9.93495094409215e-06, "loss": 0.3017, "step": 652 }, { "epoch": 0.05224, "grad_norm": 2.235605001449585, "learning_rate": 9.93474858349455e-06, "loss": 0.3975, "step": 653 }, { "epoch": 0.05232, "grad_norm": 2.175873279571533, "learning_rate": 9.934545910691914e-06, "loss": 0.4333, "step": 654 }, { "epoch": 0.0524, "grad_norm": 2.141580581665039, "learning_rate": 9.934342925697074e-06, "loss": 0.4373, "step": 655 }, { "epoch": 0.05248, "grad_norm": 1.8927725553512573, "learning_rate": 9.934139628522865e-06, "loss": 0.3006, "step": 656 }, { "epoch": 0.05256, "grad_norm": 1.7776702642440796, "learning_rate": 9.933936019182154e-06, "loss": 0.4212, "step": 657 }, { "epoch": 0.05264, "grad_norm": 1.5397083759307861, "learning_rate": 9.933732097687817e-06, "loss": 0.3998, "step": 658 }, { "epoch": 0.05272, "grad_norm": 1.476570963859558, "learning_rate": 9.93352786405276e-06, "loss": 0.2903, "step": 659 }, { "epoch": 0.0528, "grad_norm": 2.0092062950134277, "learning_rate": 9.933323318289902e-06, "loss": 0.3786, "step": 660 }, { "epoch": 0.05288, "grad_norm": 1.8279545307159424, "learning_rate": 9.933118460412186e-06, "loss": 0.3953, "step": 661 }, { "epoch": 0.05296, "grad_norm": 1.9125584363937378, "learning_rate": 9.93291329043257e-06, "loss": 0.3686, "step": 662 }, { "epoch": 0.05304, "grad_norm": 1.951283574104309, "learning_rate": 9.932707808364035e-06, "loss": 0.4019, "step": 663 }, { "epoch": 0.05312, "grad_norm": 1.3012398481369019, "learning_rate": 9.932502014219583e-06, "loss": 0.2866, "step": 664 }, { "epoch": 0.0532, "grad_norm": 1.913560390472412, "learning_rate": 9.932295908012233e-06, "loss": 0.3671, "step": 665 }, { "epoch": 0.05328, "grad_norm": 1.9930202960968018, "learning_rate": 9.932089489755024e-06, "loss": 0.4171, "step": 666 }, { "epoch": 0.05336, "grad_norm": 1.7586143016815186, "learning_rate": 9.931882759461016e-06, "loss": 0.4016, "step": 667 }, { "epoch": 0.05344, "grad_norm": 1.564494013786316, "learning_rate": 9.931675717143288e-06, "loss": 0.3041, "step": 668 }, { "epoch": 0.05352, "grad_norm": 1.6268788576126099, "learning_rate": 9.931468362814937e-06, "loss": 0.3104, "step": 669 }, { "epoch": 0.0536, "grad_norm": 1.5686761140823364, "learning_rate": 9.931260696489085e-06, "loss": 0.2989, "step": 670 }, { "epoch": 0.05368, "grad_norm": 1.2307571172714233, "learning_rate": 9.931052718178869e-06, "loss": 0.2887, "step": 671 }, { "epoch": 0.05376, "grad_norm": 1.586047887802124, "learning_rate": 9.930844427897447e-06, "loss": 0.3362, "step": 672 }, { "epoch": 0.05384, "grad_norm": 1.9724785089492798, "learning_rate": 9.930635825657996e-06, "loss": 0.3575, "step": 673 }, { "epoch": 0.05392, "grad_norm": 1.7712501287460327, "learning_rate": 9.930426911473715e-06, "loss": 0.3853, "step": 674 }, { "epoch": 0.054, "grad_norm": 1.4114841222763062, "learning_rate": 9.93021768535782e-06, "loss": 0.3587, "step": 675 }, { "epoch": 0.05408, "grad_norm": 2.0432698726654053, "learning_rate": 9.93000814732355e-06, "loss": 0.4619, "step": 676 }, { "epoch": 0.05416, "grad_norm": 1.5945602655410767, "learning_rate": 9.92979829738416e-06, "loss": 0.2833, "step": 677 }, { "epoch": 0.05424, "grad_norm": 2.2913811206817627, "learning_rate": 9.929588135552925e-06, "loss": 0.4524, "step": 678 }, { "epoch": 0.05432, "grad_norm": 1.2987146377563477, "learning_rate": 9.929377661843143e-06, "loss": 0.2484, "step": 679 }, { "epoch": 0.0544, "grad_norm": 1.3887112140655518, "learning_rate": 9.929166876268132e-06, "loss": 0.3099, "step": 680 }, { "epoch": 0.05448, "grad_norm": 1.4816182851791382, "learning_rate": 9.928955778841224e-06, "loss": 0.3065, "step": 681 }, { "epoch": 0.05456, "grad_norm": 1.4863247871398926, "learning_rate": 9.928744369575778e-06, "loss": 0.3439, "step": 682 }, { "epoch": 0.05464, "grad_norm": 2.332185745239258, "learning_rate": 9.928532648485168e-06, "loss": 0.5691, "step": 683 }, { "epoch": 0.05472, "grad_norm": 1.8246127367019653, "learning_rate": 9.928320615582784e-06, "loss": 0.3214, "step": 684 }, { "epoch": 0.0548, "grad_norm": 1.643385648727417, "learning_rate": 9.928108270882049e-06, "loss": 0.2813, "step": 685 }, { "epoch": 0.05488, "grad_norm": 1.6711945533752441, "learning_rate": 9.927895614396392e-06, "loss": 0.3232, "step": 686 }, { "epoch": 0.05496, "grad_norm": 1.6612136363983154, "learning_rate": 9.927682646139269e-06, "loss": 0.3504, "step": 687 }, { "epoch": 0.05504, "grad_norm": 1.4387818574905396, "learning_rate": 9.927469366124152e-06, "loss": 0.3185, "step": 688 }, { "epoch": 0.05512, "grad_norm": 1.5262702703475952, "learning_rate": 9.927255774364535e-06, "loss": 0.3075, "step": 689 }, { "epoch": 0.0552, "grad_norm": 1.6111522912979126, "learning_rate": 9.927041870873932e-06, "loss": 0.3691, "step": 690 }, { "epoch": 0.05528, "grad_norm": 1.7556387186050415, "learning_rate": 9.926827655665878e-06, "loss": 0.4307, "step": 691 }, { "epoch": 0.05536, "grad_norm": 1.449609637260437, "learning_rate": 9.926613128753922e-06, "loss": 0.3468, "step": 692 }, { "epoch": 0.05544, "grad_norm": 1.6672128438949585, "learning_rate": 9.926398290151637e-06, "loss": 0.3852, "step": 693 }, { "epoch": 0.05552, "grad_norm": 1.932892084121704, "learning_rate": 9.926183139872616e-06, "loss": 0.5628, "step": 694 }, { "epoch": 0.0556, "grad_norm": 1.3282470703125, "learning_rate": 9.925967677930472e-06, "loss": 0.3317, "step": 695 }, { "epoch": 0.05568, "grad_norm": 1.645140528678894, "learning_rate": 9.925751904338834e-06, "loss": 0.3647, "step": 696 }, { "epoch": 0.05576, "grad_norm": 1.6291844844818115, "learning_rate": 9.925535819111356e-06, "loss": 0.2835, "step": 697 }, { "epoch": 0.05584, "grad_norm": 1.8840759992599487, "learning_rate": 9.925319422261708e-06, "loss": 0.3661, "step": 698 }, { "epoch": 0.05592, "grad_norm": 1.7590316534042358, "learning_rate": 9.925102713803579e-06, "loss": 0.3905, "step": 699 }, { "epoch": 0.056, "grad_norm": 1.4774218797683716, "learning_rate": 9.924885693750681e-06, "loss": 0.491, "step": 700 }, { "epoch": 0.05608, "grad_norm": 1.8233764171600342, "learning_rate": 9.924668362116743e-06, "loss": 0.4349, "step": 701 }, { "epoch": 0.05616, "grad_norm": 2.0210931301116943, "learning_rate": 9.924450718915517e-06, "loss": 0.3602, "step": 702 }, { "epoch": 0.05624, "grad_norm": 1.9228521585464478, "learning_rate": 9.92423276416077e-06, "loss": 0.3999, "step": 703 }, { "epoch": 0.05632, "grad_norm": 1.6303220987319946, "learning_rate": 9.924014497866295e-06, "loss": 0.3443, "step": 704 }, { "epoch": 0.0564, "grad_norm": 1.7699851989746094, "learning_rate": 9.923795920045896e-06, "loss": 0.3392, "step": 705 }, { "epoch": 0.05648, "grad_norm": 1.4135288000106812, "learning_rate": 9.923577030713406e-06, "loss": 0.2955, "step": 706 }, { "epoch": 0.05656, "grad_norm": 1.8755179643630981, "learning_rate": 9.923357829882671e-06, "loss": 0.4674, "step": 707 }, { "epoch": 0.05664, "grad_norm": 1.4635642766952515, "learning_rate": 9.92313831756756e-06, "loss": 0.2897, "step": 708 }, { "epoch": 0.05672, "grad_norm": 1.7681899070739746, "learning_rate": 9.922918493781958e-06, "loss": 0.3768, "step": 709 }, { "epoch": 0.0568, "grad_norm": 1.4016932249069214, "learning_rate": 9.92269835853978e-06, "loss": 0.2945, "step": 710 }, { "epoch": 0.05688, "grad_norm": 2.1971428394317627, "learning_rate": 9.922477911854945e-06, "loss": 0.4537, "step": 711 }, { "epoch": 0.05696, "grad_norm": 1.4524810314178467, "learning_rate": 9.922257153741402e-06, "loss": 0.3269, "step": 712 }, { "epoch": 0.05704, "grad_norm": 1.7311736345291138, "learning_rate": 9.92203608421312e-06, "loss": 0.415, "step": 713 }, { "epoch": 0.05712, "grad_norm": 2.3954567909240723, "learning_rate": 9.921814703284086e-06, "loss": 0.4937, "step": 714 }, { "epoch": 0.0572, "grad_norm": 1.8570671081542969, "learning_rate": 9.921593010968302e-06, "loss": 0.3431, "step": 715 }, { "epoch": 0.05728, "grad_norm": 1.7509444952011108, "learning_rate": 9.921371007279796e-06, "loss": 0.4205, "step": 716 }, { "epoch": 0.05736, "grad_norm": 1.6266485452651978, "learning_rate": 9.921148692232615e-06, "loss": 0.3455, "step": 717 }, { "epoch": 0.05744, "grad_norm": 1.3018132448196411, "learning_rate": 9.920926065840821e-06, "loss": 0.2517, "step": 718 }, { "epoch": 0.05752, "grad_norm": 1.8658397197723389, "learning_rate": 9.9207031281185e-06, "loss": 0.4211, "step": 719 }, { "epoch": 0.0576, "grad_norm": 1.8189882040023804, "learning_rate": 9.920479879079758e-06, "loss": 0.3201, "step": 720 }, { "epoch": 0.05768, "grad_norm": 1.655189871788025, "learning_rate": 9.920256318738717e-06, "loss": 0.3993, "step": 721 }, { "epoch": 0.05776, "grad_norm": 1.7855379581451416, "learning_rate": 9.920032447109522e-06, "loss": 0.3763, "step": 722 }, { "epoch": 0.05784, "grad_norm": 1.5921399593353271, "learning_rate": 9.919808264206339e-06, "loss": 0.3259, "step": 723 }, { "epoch": 0.05792, "grad_norm": 1.2852582931518555, "learning_rate": 9.919583770043347e-06, "loss": 0.2554, "step": 724 }, { "epoch": 0.058, "grad_norm": 1.9973994493484497, "learning_rate": 9.91935896463475e-06, "loss": 0.4433, "step": 725 }, { "epoch": 0.05808, "grad_norm": 1.5259404182434082, "learning_rate": 9.91913384799477e-06, "loss": 0.391, "step": 726 }, { "epoch": 0.05816, "grad_norm": 1.7888422012329102, "learning_rate": 9.918908420137654e-06, "loss": 0.3253, "step": 727 }, { "epoch": 0.05824, "grad_norm": 1.7182378768920898, "learning_rate": 9.91868268107766e-06, "loss": 0.3694, "step": 728 }, { "epoch": 0.05832, "grad_norm": 1.8759191036224365, "learning_rate": 9.918456630829071e-06, "loss": 0.3199, "step": 729 }, { "epoch": 0.0584, "grad_norm": 1.4170511960983276, "learning_rate": 9.918230269406188e-06, "loss": 0.3506, "step": 730 }, { "epoch": 0.05848, "grad_norm": 1.6913896799087524, "learning_rate": 9.918003596823333e-06, "loss": 0.4956, "step": 731 }, { "epoch": 0.05856, "grad_norm": 1.5514379739761353, "learning_rate": 9.917776613094846e-06, "loss": 0.3401, "step": 732 }, { "epoch": 0.05864, "grad_norm": 1.7475535869598389, "learning_rate": 9.917549318235086e-06, "loss": 0.3692, "step": 733 }, { "epoch": 0.05872, "grad_norm": 1.3284653425216675, "learning_rate": 9.917321712258436e-06, "loss": 0.29, "step": 734 }, { "epoch": 0.0588, "grad_norm": 1.3704982995986938, "learning_rate": 9.917093795179294e-06, "loss": 0.288, "step": 735 }, { "epoch": 0.05888, "grad_norm": 1.5144543647766113, "learning_rate": 9.916865567012082e-06, "loss": 0.3907, "step": 736 }, { "epoch": 0.05896, "grad_norm": 1.4970391988754272, "learning_rate": 9.916637027771236e-06, "loss": 0.3982, "step": 737 }, { "epoch": 0.05904, "grad_norm": 1.6594504117965698, "learning_rate": 9.916408177471216e-06, "loss": 0.3547, "step": 738 }, { "epoch": 0.05912, "grad_norm": 1.6561534404754639, "learning_rate": 9.916179016126502e-06, "loss": 0.4931, "step": 739 }, { "epoch": 0.0592, "grad_norm": 1.0301389694213867, "learning_rate": 9.915949543751591e-06, "loss": 0.1981, "step": 740 }, { "epoch": 0.05928, "grad_norm": 1.5365773439407349, "learning_rate": 9.915719760361e-06, "loss": 0.3218, "step": 741 }, { "epoch": 0.05936, "grad_norm": 1.725466251373291, "learning_rate": 9.915489665969269e-06, "loss": 0.3497, "step": 742 }, { "epoch": 0.05944, "grad_norm": 1.3347848653793335, "learning_rate": 9.915259260590954e-06, "loss": 0.29, "step": 743 }, { "epoch": 0.05952, "grad_norm": 2.047433853149414, "learning_rate": 9.915028544240633e-06, "loss": 0.3546, "step": 744 }, { "epoch": 0.0596, "grad_norm": 1.5531580448150635, "learning_rate": 9.914797516932899e-06, "loss": 0.2984, "step": 745 }, { "epoch": 0.05968, "grad_norm": 1.964288353919983, "learning_rate": 9.914566178682373e-06, "loss": 0.3893, "step": 746 }, { "epoch": 0.05976, "grad_norm": 1.6539465188980103, "learning_rate": 9.91433452950369e-06, "loss": 0.3372, "step": 747 }, { "epoch": 0.05984, "grad_norm": 1.79845130443573, "learning_rate": 9.914102569411503e-06, "loss": 0.379, "step": 748 }, { "epoch": 0.05992, "grad_norm": 1.5639078617095947, "learning_rate": 9.91387029842049e-06, "loss": 0.3265, "step": 749 }, { "epoch": 0.06, "grad_norm": 1.675551414489746, "learning_rate": 9.913637716545344e-06, "loss": 0.3333, "step": 750 }, { "epoch": 0.06008, "grad_norm": 1.6994894742965698, "learning_rate": 9.913404823800782e-06, "loss": 0.3565, "step": 751 }, { "epoch": 0.06016, "grad_norm": 1.9031753540039062, "learning_rate": 9.913171620201536e-06, "loss": 0.4082, "step": 752 }, { "epoch": 0.06024, "grad_norm": 1.789406657218933, "learning_rate": 9.912938105762362e-06, "loss": 0.3834, "step": 753 }, { "epoch": 0.06032, "grad_norm": 1.7641888856887817, "learning_rate": 9.912704280498032e-06, "loss": 0.3464, "step": 754 }, { "epoch": 0.0604, "grad_norm": 1.8987987041473389, "learning_rate": 9.91247014442334e-06, "loss": 0.4163, "step": 755 }, { "epoch": 0.06048, "grad_norm": 1.8092420101165771, "learning_rate": 9.912235697553101e-06, "loss": 0.433, "step": 756 }, { "epoch": 0.06056, "grad_norm": 1.20881187915802, "learning_rate": 9.912000939902144e-06, "loss": 0.251, "step": 757 }, { "epoch": 0.06064, "grad_norm": 1.4415931701660156, "learning_rate": 9.911765871485325e-06, "loss": 0.3483, "step": 758 }, { "epoch": 0.06072, "grad_norm": 2.056685209274292, "learning_rate": 9.911530492317511e-06, "loss": 0.4327, "step": 759 }, { "epoch": 0.0608, "grad_norm": 2.1116669178009033, "learning_rate": 9.9112948024136e-06, "loss": 0.549, "step": 760 }, { "epoch": 0.06088, "grad_norm": 1.765018343925476, "learning_rate": 9.911058801788499e-06, "loss": 0.3743, "step": 761 }, { "epoch": 0.06096, "grad_norm": 1.5852882862091064, "learning_rate": 9.910822490457139e-06, "loss": 0.356, "step": 762 }, { "epoch": 0.06104, "grad_norm": 1.6404635906219482, "learning_rate": 9.910585868434473e-06, "loss": 0.3543, "step": 763 }, { "epoch": 0.06112, "grad_norm": 1.5035934448242188, "learning_rate": 9.91034893573547e-06, "loss": 0.3572, "step": 764 }, { "epoch": 0.0612, "grad_norm": 1.9684243202209473, "learning_rate": 9.910111692375118e-06, "loss": 0.4283, "step": 765 }, { "epoch": 0.06128, "grad_norm": 1.9752951860427856, "learning_rate": 9.90987413836843e-06, "loss": 0.4133, "step": 766 }, { "epoch": 0.06136, "grad_norm": 1.6825329065322876, "learning_rate": 9.909636273730434e-06, "loss": 0.4326, "step": 767 }, { "epoch": 0.06144, "grad_norm": 1.7126015424728394, "learning_rate": 9.909398098476177e-06, "loss": 0.3714, "step": 768 }, { "epoch": 0.06152, "grad_norm": 1.39950692653656, "learning_rate": 9.90915961262073e-06, "loss": 0.3208, "step": 769 }, { "epoch": 0.0616, "grad_norm": 1.8275245428085327, "learning_rate": 9.908920816179182e-06, "loss": 0.3871, "step": 770 }, { "epoch": 0.06168, "grad_norm": 1.6578397750854492, "learning_rate": 9.90868170916664e-06, "loss": 0.335, "step": 771 }, { "epoch": 0.06176, "grad_norm": 1.8626042604446411, "learning_rate": 9.908442291598227e-06, "loss": 0.3591, "step": 772 }, { "epoch": 0.06184, "grad_norm": 1.6682721376419067, "learning_rate": 9.908202563489095e-06, "loss": 0.3483, "step": 773 }, { "epoch": 0.06192, "grad_norm": 1.5909526348114014, "learning_rate": 9.907962524854411e-06, "loss": 0.4166, "step": 774 }, { "epoch": 0.062, "grad_norm": 1.7059555053710938, "learning_rate": 9.90772217570936e-06, "loss": 0.3201, "step": 775 }, { "epoch": 0.06208, "grad_norm": 1.5793256759643555, "learning_rate": 9.907481516069149e-06, "loss": 0.3644, "step": 776 }, { "epoch": 0.06216, "grad_norm": 1.5775402784347534, "learning_rate": 9.907240545949001e-06, "loss": 0.2876, "step": 777 }, { "epoch": 0.06224, "grad_norm": 1.9473414421081543, "learning_rate": 9.906999265364163e-06, "loss": 0.367, "step": 778 }, { "epoch": 0.06232, "grad_norm": 1.9545674324035645, "learning_rate": 9.906757674329903e-06, "loss": 0.4359, "step": 779 }, { "epoch": 0.0624, "grad_norm": 1.605450987815857, "learning_rate": 9.906515772861501e-06, "loss": 0.3206, "step": 780 }, { "epoch": 0.06248, "grad_norm": 1.5518044233322144, "learning_rate": 9.906273560974264e-06, "loss": 0.3723, "step": 781 }, { "epoch": 0.06256, "grad_norm": 1.6524393558502197, "learning_rate": 9.906031038683515e-06, "loss": 0.3181, "step": 782 }, { "epoch": 0.06264, "grad_norm": 1.3769612312316895, "learning_rate": 9.9057882060046e-06, "loss": 0.3077, "step": 783 }, { "epoch": 0.06272, "grad_norm": 1.5857384204864502, "learning_rate": 9.905545062952876e-06, "loss": 0.3109, "step": 784 }, { "epoch": 0.0628, "grad_norm": 1.549974799156189, "learning_rate": 9.90530160954373e-06, "loss": 0.3216, "step": 785 }, { "epoch": 0.06288, "grad_norm": 1.836879849433899, "learning_rate": 9.905057845792568e-06, "loss": 0.3937, "step": 786 }, { "epoch": 0.06296, "grad_norm": 1.6906849145889282, "learning_rate": 9.904813771714806e-06, "loss": 0.3045, "step": 787 }, { "epoch": 0.06304, "grad_norm": 1.4917078018188477, "learning_rate": 9.904569387325888e-06, "loss": 0.2654, "step": 788 }, { "epoch": 0.06312, "grad_norm": 1.9071767330169678, "learning_rate": 9.904324692641279e-06, "loss": 0.4201, "step": 789 }, { "epoch": 0.0632, "grad_norm": 1.6818795204162598, "learning_rate": 9.904079687676453e-06, "loss": 0.3577, "step": 790 }, { "epoch": 0.06328, "grad_norm": 1.5190314054489136, "learning_rate": 9.903834372446914e-06, "loss": 0.3156, "step": 791 }, { "epoch": 0.06336, "grad_norm": 1.2045843601226807, "learning_rate": 9.903588746968185e-06, "loss": 0.2759, "step": 792 }, { "epoch": 0.06344, "grad_norm": 1.6211133003234863, "learning_rate": 9.903342811255802e-06, "loss": 0.3412, "step": 793 }, { "epoch": 0.06352, "grad_norm": 1.6240700483322144, "learning_rate": 9.903096565325326e-06, "loss": 0.2906, "step": 794 }, { "epoch": 0.0636, "grad_norm": 1.5824121236801147, "learning_rate": 9.902850009192338e-06, "loss": 0.3956, "step": 795 }, { "epoch": 0.06368, "grad_norm": 1.8506085872650146, "learning_rate": 9.902603142872433e-06, "loss": 0.3616, "step": 796 }, { "epoch": 0.06376, "grad_norm": 1.4106500148773193, "learning_rate": 9.90235596638123e-06, "loss": 0.3068, "step": 797 }, { "epoch": 0.06384, "grad_norm": 1.5966297388076782, "learning_rate": 9.902108479734372e-06, "loss": 0.4436, "step": 798 }, { "epoch": 0.06392, "grad_norm": 1.908096194267273, "learning_rate": 9.90186068294751e-06, "loss": 0.3364, "step": 799 }, { "epoch": 0.064, "grad_norm": 1.6867671012878418, "learning_rate": 9.901612576036326e-06, "loss": 0.377, "step": 800 }, { "epoch": 0.06408, "grad_norm": 1.7174458503723145, "learning_rate": 9.901364159016514e-06, "loss": 0.4152, "step": 801 }, { "epoch": 0.06416, "grad_norm": 1.4476027488708496, "learning_rate": 9.901115431903792e-06, "loss": 0.3267, "step": 802 }, { "epoch": 0.06424, "grad_norm": 1.7989741563796997, "learning_rate": 9.900866394713895e-06, "loss": 0.3113, "step": 803 }, { "epoch": 0.06432, "grad_norm": 2.0357306003570557, "learning_rate": 9.900617047462581e-06, "loss": 0.3554, "step": 804 }, { "epoch": 0.0644, "grad_norm": 1.6617554426193237, "learning_rate": 9.900367390165623e-06, "loss": 0.3521, "step": 805 }, { "epoch": 0.06448, "grad_norm": 1.6434162855148315, "learning_rate": 9.900117422838817e-06, "loss": 0.3676, "step": 806 }, { "epoch": 0.06456, "grad_norm": 1.570143699645996, "learning_rate": 9.899867145497978e-06, "loss": 0.3857, "step": 807 }, { "epoch": 0.06464, "grad_norm": 1.8630397319793701, "learning_rate": 9.89961655815894e-06, "loss": 0.4321, "step": 808 }, { "epoch": 0.06472, "grad_norm": 1.3922876119613647, "learning_rate": 9.899365660837555e-06, "loss": 0.3751, "step": 809 }, { "epoch": 0.0648, "grad_norm": 1.575070858001709, "learning_rate": 9.899114453549699e-06, "loss": 0.3384, "step": 810 }, { "epoch": 0.06488, "grad_norm": 1.494339108467102, "learning_rate": 9.898862936311264e-06, "loss": 0.3632, "step": 811 }, { "epoch": 0.06496, "grad_norm": 1.5550836324691772, "learning_rate": 9.898611109138163e-06, "loss": 0.3129, "step": 812 }, { "epoch": 0.06504, "grad_norm": 1.3468754291534424, "learning_rate": 9.898358972046327e-06, "loss": 0.3435, "step": 813 }, { "epoch": 0.06512, "grad_norm": 1.2275058031082153, "learning_rate": 9.89810652505171e-06, "loss": 0.2591, "step": 814 }, { "epoch": 0.0652, "grad_norm": 1.689713954925537, "learning_rate": 9.897853768170282e-06, "loss": 0.4462, "step": 815 }, { "epoch": 0.06528, "grad_norm": 1.4738130569458008, "learning_rate": 9.897600701418033e-06, "loss": 0.2962, "step": 816 }, { "epoch": 0.06536, "grad_norm": 1.8294364213943481, "learning_rate": 9.897347324810977e-06, "loss": 0.3394, "step": 817 }, { "epoch": 0.06544, "grad_norm": 2.3864986896514893, "learning_rate": 9.89709363836514e-06, "loss": 0.5214, "step": 818 }, { "epoch": 0.06552, "grad_norm": 1.4256991147994995, "learning_rate": 9.896839642096576e-06, "loss": 0.3178, "step": 819 }, { "epoch": 0.0656, "grad_norm": 1.8996535539627075, "learning_rate": 9.896585336021353e-06, "loss": 0.53, "step": 820 }, { "epoch": 0.06568, "grad_norm": 1.7665208578109741, "learning_rate": 9.896330720155558e-06, "loss": 0.4027, "step": 821 }, { "epoch": 0.06576, "grad_norm": 1.2968679666519165, "learning_rate": 9.896075794515304e-06, "loss": 0.2658, "step": 822 }, { "epoch": 0.06584, "grad_norm": 2.179189682006836, "learning_rate": 9.895820559116715e-06, "loss": 0.3901, "step": 823 }, { "epoch": 0.06592, "grad_norm": 1.4469057321548462, "learning_rate": 9.89556501397594e-06, "loss": 0.3504, "step": 824 }, { "epoch": 0.066, "grad_norm": 1.4145433902740479, "learning_rate": 9.89530915910915e-06, "loss": 0.3422, "step": 825 }, { "epoch": 0.06608, "grad_norm": 1.6971393823623657, "learning_rate": 9.895052994532527e-06, "loss": 0.3849, "step": 826 }, { "epoch": 0.06616, "grad_norm": 1.416899561882019, "learning_rate": 9.89479652026228e-06, "loss": 0.2804, "step": 827 }, { "epoch": 0.06624, "grad_norm": 1.758364200592041, "learning_rate": 9.894539736314636e-06, "loss": 0.3886, "step": 828 }, { "epoch": 0.06632, "grad_norm": 1.8403048515319824, "learning_rate": 9.894282642705839e-06, "loss": 0.3514, "step": 829 }, { "epoch": 0.0664, "grad_norm": 2.0701754093170166, "learning_rate": 9.894025239452156e-06, "loss": 0.3829, "step": 830 }, { "epoch": 0.06648, "grad_norm": 1.6465054750442505, "learning_rate": 9.893767526569873e-06, "loss": 0.4353, "step": 831 }, { "epoch": 0.06656, "grad_norm": 1.3121788501739502, "learning_rate": 9.893509504075291e-06, "loss": 0.2722, "step": 832 }, { "epoch": 0.06664, "grad_norm": 1.453292727470398, "learning_rate": 9.893251171984737e-06, "loss": 0.3116, "step": 833 }, { "epoch": 0.06672, "grad_norm": 1.511839747428894, "learning_rate": 9.892992530314556e-06, "loss": 0.3135, "step": 834 }, { "epoch": 0.0668, "grad_norm": 1.5262436866760254, "learning_rate": 9.892733579081108e-06, "loss": 0.321, "step": 835 }, { "epoch": 0.06688, "grad_norm": 2.0322422981262207, "learning_rate": 9.892474318300778e-06, "loss": 0.5337, "step": 836 }, { "epoch": 0.06696, "grad_norm": 1.5687463283538818, "learning_rate": 9.892214747989967e-06, "loss": 0.2962, "step": 837 }, { "epoch": 0.06704, "grad_norm": 2.1740803718566895, "learning_rate": 9.8919548681651e-06, "loss": 0.4359, "step": 838 }, { "epoch": 0.06712, "grad_norm": 1.6020910739898682, "learning_rate": 9.891694678842617e-06, "loss": 0.3122, "step": 839 }, { "epoch": 0.0672, "grad_norm": 1.6646548509597778, "learning_rate": 9.891434180038979e-06, "loss": 0.4588, "step": 840 }, { "epoch": 0.06728, "grad_norm": 1.4084759950637817, "learning_rate": 9.891173371770666e-06, "loss": 0.2731, "step": 841 }, { "epoch": 0.06736, "grad_norm": 1.433922290802002, "learning_rate": 9.890912254054182e-06, "loss": 0.3111, "step": 842 }, { "epoch": 0.06744, "grad_norm": 1.3869508504867554, "learning_rate": 9.890650826906042e-06, "loss": 0.3775, "step": 843 }, { "epoch": 0.06752, "grad_norm": 1.6502546072006226, "learning_rate": 9.890389090342789e-06, "loss": 0.3441, "step": 844 }, { "epoch": 0.0676, "grad_norm": 1.827817678451538, "learning_rate": 9.890127044380983e-06, "loss": 0.4, "step": 845 }, { "epoch": 0.06768, "grad_norm": 1.306138038635254, "learning_rate": 9.889864689037201e-06, "loss": 0.3183, "step": 846 }, { "epoch": 0.06776, "grad_norm": 2.2315001487731934, "learning_rate": 9.88960202432804e-06, "loss": 0.4797, "step": 847 }, { "epoch": 0.06784, "grad_norm": 1.777730941772461, "learning_rate": 9.889339050270122e-06, "loss": 0.4378, "step": 848 }, { "epoch": 0.06792, "grad_norm": 1.909468173980713, "learning_rate": 9.88907576688008e-06, "loss": 0.4438, "step": 849 }, { "epoch": 0.068, "grad_norm": 1.5327777862548828, "learning_rate": 9.888812174174574e-06, "loss": 0.3694, "step": 850 }, { "epoch": 0.06808, "grad_norm": 2.512308120727539, "learning_rate": 9.88854827217028e-06, "loss": 0.5076, "step": 851 }, { "epoch": 0.06816, "grad_norm": 1.4551916122436523, "learning_rate": 9.888284060883892e-06, "loss": 0.3042, "step": 852 }, { "epoch": 0.06824, "grad_norm": 1.8381390571594238, "learning_rate": 9.88801954033213e-06, "loss": 0.4247, "step": 853 }, { "epoch": 0.06832, "grad_norm": 1.5407111644744873, "learning_rate": 9.887754710531727e-06, "loss": 0.3213, "step": 854 }, { "epoch": 0.0684, "grad_norm": 1.478391408920288, "learning_rate": 9.887489571499438e-06, "loss": 0.3527, "step": 855 }, { "epoch": 0.06848, "grad_norm": 1.9624853134155273, "learning_rate": 9.887224123252037e-06, "loss": 0.4039, "step": 856 }, { "epoch": 0.06856, "grad_norm": 1.6501410007476807, "learning_rate": 9.886958365806317e-06, "loss": 0.4169, "step": 857 }, { "epoch": 0.06864, "grad_norm": 1.4560359716415405, "learning_rate": 9.886692299179094e-06, "loss": 0.2993, "step": 858 }, { "epoch": 0.06872, "grad_norm": 1.681654930114746, "learning_rate": 9.8864259233872e-06, "loss": 0.405, "step": 859 }, { "epoch": 0.0688, "grad_norm": 1.7231281995773315, "learning_rate": 9.886159238447488e-06, "loss": 0.3455, "step": 860 }, { "epoch": 0.06888, "grad_norm": 1.5479826927185059, "learning_rate": 9.885892244376831e-06, "loss": 0.3774, "step": 861 }, { "epoch": 0.06896, "grad_norm": 1.5777459144592285, "learning_rate": 9.88562494119212e-06, "loss": 0.3301, "step": 862 }, { "epoch": 0.06904, "grad_norm": 2.2783584594726562, "learning_rate": 9.885357328910265e-06, "loss": 0.4368, "step": 863 }, { "epoch": 0.06912, "grad_norm": 1.8769716024398804, "learning_rate": 9.885089407548198e-06, "loss": 0.3725, "step": 864 }, { "epoch": 0.0692, "grad_norm": 1.3858102560043335, "learning_rate": 9.884821177122871e-06, "loss": 0.2691, "step": 865 }, { "epoch": 0.06928, "grad_norm": 1.477070689201355, "learning_rate": 9.884552637651252e-06, "loss": 0.2952, "step": 866 }, { "epoch": 0.06936, "grad_norm": 1.43630051612854, "learning_rate": 9.884283789150332e-06, "loss": 0.3235, "step": 867 }, { "epoch": 0.06944, "grad_norm": 1.65440833568573, "learning_rate": 9.88401463163712e-06, "loss": 0.3179, "step": 868 }, { "epoch": 0.06952, "grad_norm": 1.6520179510116577, "learning_rate": 9.883745165128642e-06, "loss": 0.3051, "step": 869 }, { "epoch": 0.0696, "grad_norm": 1.9777926206588745, "learning_rate": 9.883475389641952e-06, "loss": 0.4753, "step": 870 }, { "epoch": 0.06968, "grad_norm": 1.5092403888702393, "learning_rate": 9.883205305194112e-06, "loss": 0.3216, "step": 871 }, { "epoch": 0.06976, "grad_norm": 1.4967854022979736, "learning_rate": 9.88293491180221e-06, "loss": 0.3259, "step": 872 }, { "epoch": 0.06984, "grad_norm": 1.7150827646255493, "learning_rate": 9.882664209483356e-06, "loss": 0.4019, "step": 873 }, { "epoch": 0.06992, "grad_norm": 1.9008959531784058, "learning_rate": 9.882393198254676e-06, "loss": 0.5442, "step": 874 }, { "epoch": 0.07, "grad_norm": 1.919569492340088, "learning_rate": 9.882121878133314e-06, "loss": 0.501, "step": 875 }, { "epoch": 0.07008, "grad_norm": 1.6029773950576782, "learning_rate": 9.881850249136438e-06, "loss": 0.3842, "step": 876 }, { "epoch": 0.07016, "grad_norm": 1.4710146188735962, "learning_rate": 9.881578311281229e-06, "loss": 0.3785, "step": 877 }, { "epoch": 0.07024, "grad_norm": 1.135948896408081, "learning_rate": 9.881306064584895e-06, "loss": 0.201, "step": 878 }, { "epoch": 0.07032, "grad_norm": 1.733964443206787, "learning_rate": 9.88103350906466e-06, "loss": 0.327, "step": 879 }, { "epoch": 0.0704, "grad_norm": 1.3882209062576294, "learning_rate": 9.880760644737765e-06, "loss": 0.3169, "step": 880 }, { "epoch": 0.07048, "grad_norm": 1.7301795482635498, "learning_rate": 9.880487471621476e-06, "loss": 0.3786, "step": 881 }, { "epoch": 0.07056, "grad_norm": 1.1793718338012695, "learning_rate": 9.880213989733077e-06, "loss": 0.2577, "step": 882 }, { "epoch": 0.07064, "grad_norm": 1.4494835138320923, "learning_rate": 9.879940199089864e-06, "loss": 0.2908, "step": 883 }, { "epoch": 0.07072, "grad_norm": 1.7559984922409058, "learning_rate": 9.879666099709166e-06, "loss": 0.3739, "step": 884 }, { "epoch": 0.0708, "grad_norm": 1.729697823524475, "learning_rate": 9.87939169160832e-06, "loss": 0.4134, "step": 885 }, { "epoch": 0.07088, "grad_norm": 1.867188572883606, "learning_rate": 9.879116974804688e-06, "loss": 0.4629, "step": 886 }, { "epoch": 0.07096, "grad_norm": 1.5233434438705444, "learning_rate": 9.878841949315652e-06, "loss": 0.3819, "step": 887 }, { "epoch": 0.07104, "grad_norm": 1.6903587579727173, "learning_rate": 9.878566615158609e-06, "loss": 0.3121, "step": 888 }, { "epoch": 0.07112, "grad_norm": 1.3567419052124023, "learning_rate": 9.878290972350981e-06, "loss": 0.2761, "step": 889 }, { "epoch": 0.0712, "grad_norm": 1.6625133752822876, "learning_rate": 9.878015020910205e-06, "loss": 0.3199, "step": 890 }, { "epoch": 0.07128, "grad_norm": 1.4875622987747192, "learning_rate": 9.877738760853741e-06, "loss": 0.3499, "step": 891 }, { "epoch": 0.07136, "grad_norm": 1.3312549591064453, "learning_rate": 9.877462192199068e-06, "loss": 0.2261, "step": 892 }, { "epoch": 0.07144, "grad_norm": 1.9580225944519043, "learning_rate": 9.87718531496368e-06, "loss": 0.3892, "step": 893 }, { "epoch": 0.07152, "grad_norm": 1.421797275543213, "learning_rate": 9.876908129165096e-06, "loss": 0.3268, "step": 894 }, { "epoch": 0.0716, "grad_norm": 2.2238597869873047, "learning_rate": 9.876630634820853e-06, "loss": 0.5997, "step": 895 }, { "epoch": 0.07168, "grad_norm": 1.9498764276504517, "learning_rate": 9.87635283194851e-06, "loss": 0.3721, "step": 896 }, { "epoch": 0.07176, "grad_norm": 1.8909319639205933, "learning_rate": 9.876074720565637e-06, "loss": 0.4237, "step": 897 }, { "epoch": 0.07184, "grad_norm": 1.432112455368042, "learning_rate": 9.875796300689832e-06, "loss": 0.2579, "step": 898 }, { "epoch": 0.07192, "grad_norm": 1.9798771142959595, "learning_rate": 9.875517572338711e-06, "loss": 0.3328, "step": 899 }, { "epoch": 0.072, "grad_norm": 1.692348837852478, "learning_rate": 9.875238535529905e-06, "loss": 0.3862, "step": 900 }, { "epoch": 0.07208, "grad_norm": 1.7320940494537354, "learning_rate": 9.87495919028107e-06, "loss": 0.4359, "step": 901 }, { "epoch": 0.07216, "grad_norm": 1.4372979402542114, "learning_rate": 9.87467953660988e-06, "loss": 0.3045, "step": 902 }, { "epoch": 0.07224, "grad_norm": 1.5734885931015015, "learning_rate": 9.874399574534024e-06, "loss": 0.3442, "step": 903 }, { "epoch": 0.07232, "grad_norm": 1.7863317728042603, "learning_rate": 9.874119304071217e-06, "loss": 0.3815, "step": 904 }, { "epoch": 0.0724, "grad_norm": 1.580787181854248, "learning_rate": 9.873838725239192e-06, "loss": 0.3385, "step": 905 }, { "epoch": 0.07248, "grad_norm": 1.7870442867279053, "learning_rate": 9.873557838055698e-06, "loss": 0.3777, "step": 906 }, { "epoch": 0.07256, "grad_norm": 1.8872705698013306, "learning_rate": 9.873276642538508e-06, "loss": 0.3434, "step": 907 }, { "epoch": 0.07264, "grad_norm": 1.5185067653656006, "learning_rate": 9.872995138705407e-06, "loss": 0.3182, "step": 908 }, { "epoch": 0.07272, "grad_norm": 1.175097942352295, "learning_rate": 9.872713326574212e-06, "loss": 0.2708, "step": 909 }, { "epoch": 0.0728, "grad_norm": 1.4773153066635132, "learning_rate": 9.87243120616275e-06, "loss": 0.3242, "step": 910 }, { "epoch": 0.07288, "grad_norm": 1.9224504232406616, "learning_rate": 9.872148777488865e-06, "loss": 0.5271, "step": 911 }, { "epoch": 0.07296, "grad_norm": 1.5207834243774414, "learning_rate": 9.871866040570432e-06, "loss": 0.3758, "step": 912 }, { "epoch": 0.07304, "grad_norm": 1.730446696281433, "learning_rate": 9.871582995425335e-06, "loss": 0.4164, "step": 913 }, { "epoch": 0.07312, "grad_norm": 1.8168655633926392, "learning_rate": 9.871299642071483e-06, "loss": 0.3849, "step": 914 }, { "epoch": 0.0732, "grad_norm": 2.1133110523223877, "learning_rate": 9.871015980526802e-06, "loss": 0.4274, "step": 915 }, { "epoch": 0.07328, "grad_norm": 1.3025873899459839, "learning_rate": 9.870732010809236e-06, "loss": 0.2901, "step": 916 }, { "epoch": 0.07336, "grad_norm": 1.259355068206787, "learning_rate": 9.870447732936755e-06, "loss": 0.3114, "step": 917 }, { "epoch": 0.07344, "grad_norm": 1.4465082883834839, "learning_rate": 9.870163146927343e-06, "loss": 0.3957, "step": 918 }, { "epoch": 0.07352, "grad_norm": 1.7125508785247803, "learning_rate": 9.869878252799004e-06, "loss": 0.3449, "step": 919 }, { "epoch": 0.0736, "grad_norm": 2.206003189086914, "learning_rate": 9.869593050569761e-06, "loss": 0.4511, "step": 920 }, { "epoch": 0.07368, "grad_norm": 1.9376089572906494, "learning_rate": 9.869307540257663e-06, "loss": 0.3655, "step": 921 }, { "epoch": 0.07376, "grad_norm": 1.8399741649627686, "learning_rate": 9.869021721880765e-06, "loss": 0.4562, "step": 922 }, { "epoch": 0.07384, "grad_norm": 1.5403954982757568, "learning_rate": 9.868735595457157e-06, "loss": 0.4594, "step": 923 }, { "epoch": 0.07392, "grad_norm": 1.8176610469818115, "learning_rate": 9.86844916100494e-06, "loss": 0.3866, "step": 924 }, { "epoch": 0.074, "grad_norm": 1.6104621887207031, "learning_rate": 9.868162418542233e-06, "loss": 0.3379, "step": 925 }, { "epoch": 0.07408, "grad_norm": 1.7758665084838867, "learning_rate": 9.867875368087179e-06, "loss": 0.3828, "step": 926 }, { "epoch": 0.07416, "grad_norm": 1.398337960243225, "learning_rate": 9.867588009657938e-06, "loss": 0.3024, "step": 927 }, { "epoch": 0.07424, "grad_norm": 1.812811017036438, "learning_rate": 9.86730034327269e-06, "loss": 0.5139, "step": 928 }, { "epoch": 0.07432, "grad_norm": 1.4070488214492798, "learning_rate": 9.867012368949637e-06, "loss": 0.325, "step": 929 }, { "epoch": 0.0744, "grad_norm": 1.56523597240448, "learning_rate": 9.866724086706996e-06, "loss": 0.4111, "step": 930 }, { "epoch": 0.07448, "grad_norm": 2.1064064502716064, "learning_rate": 9.866435496563004e-06, "loss": 0.4322, "step": 931 }, { "epoch": 0.07456, "grad_norm": 1.599244236946106, "learning_rate": 9.866146598535925e-06, "loss": 0.3167, "step": 932 }, { "epoch": 0.07464, "grad_norm": 1.8283414840698242, "learning_rate": 9.865857392644029e-06, "loss": 0.4891, "step": 933 }, { "epoch": 0.07472, "grad_norm": 1.8395085334777832, "learning_rate": 9.86556787890562e-06, "loss": 0.4302, "step": 934 }, { "epoch": 0.0748, "grad_norm": 1.6834166049957275, "learning_rate": 9.865278057339011e-06, "loss": 0.3058, "step": 935 }, { "epoch": 0.07488, "grad_norm": 1.350382924079895, "learning_rate": 9.864987927962536e-06, "loss": 0.3224, "step": 936 }, { "epoch": 0.07496, "grad_norm": 2.0382933616638184, "learning_rate": 9.864697490794556e-06, "loss": 0.3888, "step": 937 }, { "epoch": 0.07504, "grad_norm": 1.7430832386016846, "learning_rate": 9.864406745853443e-06, "loss": 0.3893, "step": 938 }, { "epoch": 0.07512, "grad_norm": 2.0541799068450928, "learning_rate": 9.86411569315759e-06, "loss": 0.4582, "step": 939 }, { "epoch": 0.0752, "grad_norm": 1.439474105834961, "learning_rate": 9.863824332725413e-06, "loss": 0.3332, "step": 940 }, { "epoch": 0.07528, "grad_norm": 1.6011489629745483, "learning_rate": 9.863532664575346e-06, "loss": 0.3182, "step": 941 }, { "epoch": 0.07536, "grad_norm": 1.7721103429794312, "learning_rate": 9.863240688725839e-06, "loss": 0.2911, "step": 942 }, { "epoch": 0.07544, "grad_norm": 1.915481448173523, "learning_rate": 9.862948405195367e-06, "loss": 0.4128, "step": 943 }, { "epoch": 0.07552, "grad_norm": 1.525109887123108, "learning_rate": 9.862655814002421e-06, "loss": 0.2976, "step": 944 }, { "epoch": 0.0756, "grad_norm": 1.5049928426742554, "learning_rate": 9.862362915165513e-06, "loss": 0.3732, "step": 945 }, { "epoch": 0.07568, "grad_norm": 1.1610263586044312, "learning_rate": 9.862069708703172e-06, "loss": 0.2572, "step": 946 }, { "epoch": 0.07576, "grad_norm": 1.5599390268325806, "learning_rate": 9.861776194633948e-06, "loss": 0.3857, "step": 947 }, { "epoch": 0.07584, "grad_norm": 1.5780138969421387, "learning_rate": 9.861482372976413e-06, "loss": 0.3915, "step": 948 }, { "epoch": 0.07592, "grad_norm": 1.3370410203933716, "learning_rate": 9.861188243749154e-06, "loss": 0.3541, "step": 949 }, { "epoch": 0.076, "grad_norm": 1.5612949132919312, "learning_rate": 9.86089380697078e-06, "loss": 0.3142, "step": 950 }, { "epoch": 0.07608, "grad_norm": 2.2083637714385986, "learning_rate": 9.860599062659922e-06, "loss": 0.4862, "step": 951 }, { "epoch": 0.07616, "grad_norm": 1.7525696754455566, "learning_rate": 9.860304010835222e-06, "loss": 0.3501, "step": 952 }, { "epoch": 0.07624, "grad_norm": 1.5072181224822998, "learning_rate": 9.860008651515352e-06, "loss": 0.3569, "step": 953 }, { "epoch": 0.07632, "grad_norm": 2.169581890106201, "learning_rate": 9.859712984718994e-06, "loss": 0.4334, "step": 954 }, { "epoch": 0.0764, "grad_norm": 1.740846037864685, "learning_rate": 9.859417010464857e-06, "loss": 0.3597, "step": 955 }, { "epoch": 0.07648, "grad_norm": 1.516270637512207, "learning_rate": 9.859120728771667e-06, "loss": 0.3523, "step": 956 }, { "epoch": 0.07656, "grad_norm": 2.101482629776001, "learning_rate": 9.858824139658166e-06, "loss": 0.4112, "step": 957 }, { "epoch": 0.07664, "grad_norm": 1.6379002332687378, "learning_rate": 9.85852724314312e-06, "loss": 0.3163, "step": 958 }, { "epoch": 0.07672, "grad_norm": 1.6774054765701294, "learning_rate": 9.858230039245312e-06, "loss": 0.2885, "step": 959 }, { "epoch": 0.0768, "grad_norm": 1.9318816661834717, "learning_rate": 9.857932527983544e-06, "loss": 0.4156, "step": 960 }, { "epoch": 0.07688, "grad_norm": 1.6440109014511108, "learning_rate": 9.85763470937664e-06, "loss": 0.397, "step": 961 }, { "epoch": 0.07696, "grad_norm": 1.682336449623108, "learning_rate": 9.857336583443441e-06, "loss": 0.3695, "step": 962 }, { "epoch": 0.07704, "grad_norm": 1.555406093597412, "learning_rate": 9.85703815020281e-06, "loss": 0.304, "step": 963 }, { "epoch": 0.07712, "grad_norm": 1.527746319770813, "learning_rate": 9.856739409673628e-06, "loss": 0.3563, "step": 964 }, { "epoch": 0.0772, "grad_norm": 2.2929794788360596, "learning_rate": 9.856440361874791e-06, "loss": 0.449, "step": 965 }, { "epoch": 0.07728, "grad_norm": 1.6270774602890015, "learning_rate": 9.856141006825225e-06, "loss": 0.3649, "step": 966 }, { "epoch": 0.07736, "grad_norm": 1.5416566133499146, "learning_rate": 9.855841344543865e-06, "loss": 0.3216, "step": 967 }, { "epoch": 0.07744, "grad_norm": 1.646708607673645, "learning_rate": 9.855541375049671e-06, "loss": 0.4936, "step": 968 }, { "epoch": 0.07752, "grad_norm": 1.7581204175949097, "learning_rate": 9.85524109836162e-06, "loss": 0.3516, "step": 969 }, { "epoch": 0.0776, "grad_norm": 1.6109693050384521, "learning_rate": 9.854940514498712e-06, "loss": 0.3182, "step": 970 }, { "epoch": 0.07768, "grad_norm": 1.38651442527771, "learning_rate": 9.854639623479962e-06, "loss": 0.2693, "step": 971 }, { "epoch": 0.07776, "grad_norm": 1.5283217430114746, "learning_rate": 9.854338425324405e-06, "loss": 0.3448, "step": 972 }, { "epoch": 0.07784, "grad_norm": 1.3900861740112305, "learning_rate": 9.854036920051102e-06, "loss": 0.2597, "step": 973 }, { "epoch": 0.07792, "grad_norm": 1.3767924308776855, "learning_rate": 9.85373510767912e-06, "loss": 0.3011, "step": 974 }, { "epoch": 0.078, "grad_norm": 1.4808169603347778, "learning_rate": 9.853432988227563e-06, "loss": 0.3063, "step": 975 }, { "epoch": 0.07808, "grad_norm": 1.500436782836914, "learning_rate": 9.853130561715538e-06, "loss": 0.3745, "step": 976 }, { "epoch": 0.07816, "grad_norm": 1.8564033508300781, "learning_rate": 9.852827828162182e-06, "loss": 0.4051, "step": 977 }, { "epoch": 0.07824, "grad_norm": 2.207338809967041, "learning_rate": 9.852524787586645e-06, "loss": 0.4495, "step": 978 }, { "epoch": 0.07832, "grad_norm": 1.2589378356933594, "learning_rate": 9.852221440008103e-06, "loss": 0.3034, "step": 979 }, { "epoch": 0.0784, "grad_norm": 1.5934374332427979, "learning_rate": 9.851917785445745e-06, "loss": 0.3168, "step": 980 }, { "epoch": 0.07848, "grad_norm": 1.6377482414245605, "learning_rate": 9.851613823918785e-06, "loss": 0.3054, "step": 981 }, { "epoch": 0.07856, "grad_norm": 1.398160696029663, "learning_rate": 9.85130955544645e-06, "loss": 0.3111, "step": 982 }, { "epoch": 0.07864, "grad_norm": 1.5453648567199707, "learning_rate": 9.851004980047993e-06, "loss": 0.3174, "step": 983 }, { "epoch": 0.07872, "grad_norm": 1.4585448503494263, "learning_rate": 9.850700097742683e-06, "loss": 0.3859, "step": 984 }, { "epoch": 0.0788, "grad_norm": 1.838578224182129, "learning_rate": 9.850394908549808e-06, "loss": 0.431, "step": 985 }, { "epoch": 0.07888, "grad_norm": 1.7750165462493896, "learning_rate": 9.850089412488676e-06, "loss": 0.3653, "step": 986 }, { "epoch": 0.07896, "grad_norm": 1.7502179145812988, "learning_rate": 9.849783609578616e-06, "loss": 0.4266, "step": 987 }, { "epoch": 0.07904, "grad_norm": 1.66646409034729, "learning_rate": 9.849477499838974e-06, "loss": 0.3501, "step": 988 }, { "epoch": 0.07912, "grad_norm": 2.072134256362915, "learning_rate": 9.849171083289117e-06, "loss": 0.4357, "step": 989 }, { "epoch": 0.0792, "grad_norm": 1.6113394498825073, "learning_rate": 9.84886435994843e-06, "loss": 0.3318, "step": 990 }, { "epoch": 0.07928, "grad_norm": 1.862763524055481, "learning_rate": 9.84855732983632e-06, "loss": 0.34, "step": 991 }, { "epoch": 0.07936, "grad_norm": 1.8371633291244507, "learning_rate": 9.848249992972212e-06, "loss": 0.3153, "step": 992 }, { "epoch": 0.07944, "grad_norm": 1.5195534229278564, "learning_rate": 9.847942349375549e-06, "loss": 0.3188, "step": 993 }, { "epoch": 0.07952, "grad_norm": 1.642816185951233, "learning_rate": 9.847634399065794e-06, "loss": 0.3539, "step": 994 }, { "epoch": 0.0796, "grad_norm": 1.4042744636535645, "learning_rate": 9.84732614206243e-06, "loss": 0.3128, "step": 995 }, { "epoch": 0.07968, "grad_norm": 1.9033617973327637, "learning_rate": 9.847017578384961e-06, "loss": 0.395, "step": 996 }, { "epoch": 0.07976, "grad_norm": 2.087599039077759, "learning_rate": 9.846708708052908e-06, "loss": 0.3815, "step": 997 }, { "epoch": 0.07984, "grad_norm": 1.8923081159591675, "learning_rate": 9.846399531085812e-06, "loss": 0.3115, "step": 998 }, { "epoch": 0.07992, "grad_norm": 1.654727816581726, "learning_rate": 9.846090047503235e-06, "loss": 0.2935, "step": 999 }, { "epoch": 0.08, "grad_norm": 2.230875015258789, "learning_rate": 9.845780257324755e-06, "loss": 0.3925, "step": 1000 }, { "epoch": 0.08008, "grad_norm": 1.9971858263015747, "learning_rate": 9.845470160569973e-06, "loss": 0.3956, "step": 1001 }, { "epoch": 0.08016, "grad_norm": 1.6523634195327759, "learning_rate": 9.845159757258505e-06, "loss": 0.443, "step": 1002 }, { "epoch": 0.08024, "grad_norm": 2.058262586593628, "learning_rate": 9.844849047409993e-06, "loss": 0.3834, "step": 1003 }, { "epoch": 0.08032, "grad_norm": 1.6032230854034424, "learning_rate": 9.844538031044092e-06, "loss": 0.3474, "step": 1004 }, { "epoch": 0.0804, "grad_norm": 1.7125388383865356, "learning_rate": 9.84422670818048e-06, "loss": 0.4696, "step": 1005 }, { "epoch": 0.08048, "grad_norm": 1.2934495210647583, "learning_rate": 9.843915078838852e-06, "loss": 0.3361, "step": 1006 }, { "epoch": 0.08056, "grad_norm": 1.6751182079315186, "learning_rate": 9.843603143038925e-06, "loss": 0.3279, "step": 1007 }, { "epoch": 0.08064, "grad_norm": 1.739035964012146, "learning_rate": 9.843290900800436e-06, "loss": 0.3179, "step": 1008 }, { "epoch": 0.08072, "grad_norm": 1.8006422519683838, "learning_rate": 9.842978352143133e-06, "loss": 0.3985, "step": 1009 }, { "epoch": 0.0808, "grad_norm": 1.3257513046264648, "learning_rate": 9.842665497086798e-06, "loss": 0.3005, "step": 1010 }, { "epoch": 0.08088, "grad_norm": 1.31377112865448, "learning_rate": 9.84235233565122e-06, "loss": 0.2922, "step": 1011 }, { "epoch": 0.08096, "grad_norm": 1.3837974071502686, "learning_rate": 9.842038867856211e-06, "loss": 0.2946, "step": 1012 }, { "epoch": 0.08104, "grad_norm": 1.7588199377059937, "learning_rate": 9.841725093721606e-06, "loss": 0.4315, "step": 1013 }, { "epoch": 0.08112, "grad_norm": 1.4498939514160156, "learning_rate": 9.841411013267252e-06, "loss": 0.3537, "step": 1014 }, { "epoch": 0.0812, "grad_norm": 1.9055722951889038, "learning_rate": 9.841096626513024e-06, "loss": 0.4242, "step": 1015 }, { "epoch": 0.08128, "grad_norm": 2.0020415782928467, "learning_rate": 9.840781933478813e-06, "loss": 0.4011, "step": 1016 }, { "epoch": 0.08136, "grad_norm": 1.7482478618621826, "learning_rate": 9.840466934184525e-06, "loss": 0.3524, "step": 1017 }, { "epoch": 0.08144, "grad_norm": 1.8049051761627197, "learning_rate": 9.84015162865009e-06, "loss": 0.3151, "step": 1018 }, { "epoch": 0.08152, "grad_norm": 1.8534055948257446, "learning_rate": 9.839836016895457e-06, "loss": 0.3722, "step": 1019 }, { "epoch": 0.0816, "grad_norm": 1.7496310472488403, "learning_rate": 9.839520098940593e-06, "loss": 0.4035, "step": 1020 }, { "epoch": 0.08168, "grad_norm": 2.2628374099731445, "learning_rate": 9.839203874805486e-06, "loss": 0.4903, "step": 1021 }, { "epoch": 0.08176, "grad_norm": 1.4022674560546875, "learning_rate": 9.838887344510139e-06, "loss": 0.314, "step": 1022 }, { "epoch": 0.08184, "grad_norm": 1.7159093618392944, "learning_rate": 9.838570508074584e-06, "loss": 0.4232, "step": 1023 }, { "epoch": 0.08192, "grad_norm": 1.4128509759902954, "learning_rate": 9.838253365518862e-06, "loss": 0.3225, "step": 1024 }, { "epoch": 0.082, "grad_norm": 1.8229515552520752, "learning_rate": 9.837935916863038e-06, "loss": 0.3784, "step": 1025 }, { "epoch": 0.08208, "grad_norm": 1.3460335731506348, "learning_rate": 9.837618162127196e-06, "loss": 0.2683, "step": 1026 }, { "epoch": 0.08216, "grad_norm": 1.5191764831542969, "learning_rate": 9.83730010133144e-06, "loss": 0.3418, "step": 1027 }, { "epoch": 0.08224, "grad_norm": 1.3885241746902466, "learning_rate": 9.836981734495895e-06, "loss": 0.2815, "step": 1028 }, { "epoch": 0.08232, "grad_norm": 2.1471521854400635, "learning_rate": 9.836663061640697e-06, "loss": 0.3671, "step": 1029 }, { "epoch": 0.0824, "grad_norm": 1.2107166051864624, "learning_rate": 9.83634408278601e-06, "loss": 0.2604, "step": 1030 }, { "epoch": 0.08248, "grad_norm": 1.9330739974975586, "learning_rate": 9.836024797952017e-06, "loss": 0.5037, "step": 1031 }, { "epoch": 0.08256, "grad_norm": 1.541945219039917, "learning_rate": 9.835705207158916e-06, "loss": 0.3424, "step": 1032 }, { "epoch": 0.08264, "grad_norm": 2.0090341567993164, "learning_rate": 9.835385310426928e-06, "loss": 0.4266, "step": 1033 }, { "epoch": 0.08272, "grad_norm": 1.8531626462936401, "learning_rate": 9.835065107776289e-06, "loss": 0.4106, "step": 1034 }, { "epoch": 0.0828, "grad_norm": 1.9602084159851074, "learning_rate": 9.83474459922726e-06, "loss": 0.3546, "step": 1035 }, { "epoch": 0.08288, "grad_norm": 1.5967477560043335, "learning_rate": 9.834423784800115e-06, "loss": 0.4163, "step": 1036 }, { "epoch": 0.08296, "grad_norm": 1.9444454908370972, "learning_rate": 9.834102664515155e-06, "loss": 0.396, "step": 1037 }, { "epoch": 0.08304, "grad_norm": 1.4539134502410889, "learning_rate": 9.833781238392695e-06, "loss": 0.3087, "step": 1038 }, { "epoch": 0.08312, "grad_norm": 1.5706698894500732, "learning_rate": 9.833459506453069e-06, "loss": 0.3336, "step": 1039 }, { "epoch": 0.0832, "grad_norm": 1.93034827709198, "learning_rate": 9.833137468716634e-06, "loss": 0.402, "step": 1040 }, { "epoch": 0.08328, "grad_norm": 1.7693796157836914, "learning_rate": 9.832815125203761e-06, "loss": 0.4466, "step": 1041 }, { "epoch": 0.08336, "grad_norm": 1.506583571434021, "learning_rate": 9.832492475934848e-06, "loss": 0.3304, "step": 1042 }, { "epoch": 0.08344, "grad_norm": 1.3750256299972534, "learning_rate": 9.832169520930303e-06, "loss": 0.2565, "step": 1043 }, { "epoch": 0.08352, "grad_norm": 1.999240517616272, "learning_rate": 9.831846260210563e-06, "loss": 0.486, "step": 1044 }, { "epoch": 0.0836, "grad_norm": 1.6762970685958862, "learning_rate": 9.831522693796077e-06, "loss": 0.3922, "step": 1045 }, { "epoch": 0.08368, "grad_norm": 1.6641404628753662, "learning_rate": 9.831198821707316e-06, "loss": 0.3165, "step": 1046 }, { "epoch": 0.08376, "grad_norm": 1.7150557041168213, "learning_rate": 9.83087464396477e-06, "loss": 0.3909, "step": 1047 }, { "epoch": 0.08384, "grad_norm": 1.5680177211761475, "learning_rate": 9.830550160588951e-06, "loss": 0.4403, "step": 1048 }, { "epoch": 0.08392, "grad_norm": 1.7904253005981445, "learning_rate": 9.830225371600386e-06, "loss": 0.3263, "step": 1049 }, { "epoch": 0.084, "grad_norm": 1.3248636722564697, "learning_rate": 9.829900277019624e-06, "loss": 0.2865, "step": 1050 }, { "epoch": 0.08408, "grad_norm": 1.825286865234375, "learning_rate": 9.829574876867232e-06, "loss": 0.4188, "step": 1051 }, { "epoch": 0.08416, "grad_norm": 1.7181260585784912, "learning_rate": 9.829249171163798e-06, "loss": 0.3737, "step": 1052 }, { "epoch": 0.08424, "grad_norm": 2.284151077270508, "learning_rate": 9.828923159929927e-06, "loss": 0.4879, "step": 1053 }, { "epoch": 0.08432, "grad_norm": 1.6576015949249268, "learning_rate": 9.828596843186244e-06, "loss": 0.3157, "step": 1054 }, { "epoch": 0.0844, "grad_norm": 1.5160009860992432, "learning_rate": 9.828270220953398e-06, "loss": 0.2853, "step": 1055 }, { "epoch": 0.08448, "grad_norm": 1.5151618719100952, "learning_rate": 9.827943293252048e-06, "loss": 0.4335, "step": 1056 }, { "epoch": 0.08456, "grad_norm": 1.3789680004119873, "learning_rate": 9.82761606010288e-06, "loss": 0.3956, "step": 1057 }, { "epoch": 0.08464, "grad_norm": 2.0407497882843018, "learning_rate": 9.8272885215266e-06, "loss": 0.4707, "step": 1058 }, { "epoch": 0.08472, "grad_norm": 1.4480390548706055, "learning_rate": 9.826960677543926e-06, "loss": 0.3389, "step": 1059 }, { "epoch": 0.0848, "grad_norm": 1.438524603843689, "learning_rate": 9.8266325281756e-06, "loss": 0.2841, "step": 1060 }, { "epoch": 0.08488, "grad_norm": 1.9115839004516602, "learning_rate": 9.826304073442385e-06, "loss": 0.3926, "step": 1061 }, { "epoch": 0.08496, "grad_norm": 2.102163314819336, "learning_rate": 9.82597531336506e-06, "loss": 0.4392, "step": 1062 }, { "epoch": 0.08504, "grad_norm": 1.6580944061279297, "learning_rate": 9.825646247964425e-06, "loss": 0.3485, "step": 1063 }, { "epoch": 0.08512, "grad_norm": 1.469714879989624, "learning_rate": 9.825316877261298e-06, "loss": 0.3193, "step": 1064 }, { "epoch": 0.0852, "grad_norm": 1.4311915636062622, "learning_rate": 9.824987201276519e-06, "loss": 0.2758, "step": 1065 }, { "epoch": 0.08528, "grad_norm": 1.8289291858673096, "learning_rate": 9.824657220030942e-06, "loss": 0.4056, "step": 1066 }, { "epoch": 0.08536, "grad_norm": 1.6348884105682373, "learning_rate": 9.824326933545448e-06, "loss": 0.2958, "step": 1067 }, { "epoch": 0.08544, "grad_norm": 1.3245985507965088, "learning_rate": 9.823996341840929e-06, "loss": 0.2905, "step": 1068 }, { "epoch": 0.08552, "grad_norm": 1.329889178276062, "learning_rate": 9.823665444938304e-06, "loss": 0.3558, "step": 1069 }, { "epoch": 0.0856, "grad_norm": 1.8176319599151611, "learning_rate": 9.823334242858506e-06, "loss": 0.3617, "step": 1070 }, { "epoch": 0.08568, "grad_norm": 2.28464674949646, "learning_rate": 9.82300273562249e-06, "loss": 0.4581, "step": 1071 }, { "epoch": 0.08576, "grad_norm": 1.3536391258239746, "learning_rate": 9.822670923251228e-06, "loss": 0.3201, "step": 1072 }, { "epoch": 0.08584, "grad_norm": 1.6658986806869507, "learning_rate": 9.822338805765714e-06, "loss": 0.4, "step": 1073 }, { "epoch": 0.08592, "grad_norm": 1.4204604625701904, "learning_rate": 9.82200638318696e-06, "loss": 0.3206, "step": 1074 }, { "epoch": 0.086, "grad_norm": 1.6750638484954834, "learning_rate": 9.821673655535995e-06, "loss": 0.3092, "step": 1075 }, { "epoch": 0.08608, "grad_norm": 1.8198531866073608, "learning_rate": 9.821340622833873e-06, "loss": 0.3966, "step": 1076 }, { "epoch": 0.08616, "grad_norm": 1.710455298423767, "learning_rate": 9.82100728510166e-06, "loss": 0.2862, "step": 1077 }, { "epoch": 0.08624, "grad_norm": 1.3838942050933838, "learning_rate": 9.820673642360448e-06, "loss": 0.3097, "step": 1078 }, { "epoch": 0.08632, "grad_norm": 1.588562250137329, "learning_rate": 9.820339694631345e-06, "loss": 0.4134, "step": 1079 }, { "epoch": 0.0864, "grad_norm": 1.470871925354004, "learning_rate": 9.820005441935479e-06, "loss": 0.2925, "step": 1080 }, { "epoch": 0.08648, "grad_norm": 1.691702127456665, "learning_rate": 9.819670884293994e-06, "loss": 0.3222, "step": 1081 }, { "epoch": 0.08656, "grad_norm": 1.7869762182235718, "learning_rate": 9.819336021728062e-06, "loss": 0.3606, "step": 1082 }, { "epoch": 0.08664, "grad_norm": 1.437152624130249, "learning_rate": 9.819000854258864e-06, "loss": 0.2756, "step": 1083 }, { "epoch": 0.08672, "grad_norm": 1.7799644470214844, "learning_rate": 9.818665381907605e-06, "loss": 0.3988, "step": 1084 }, { "epoch": 0.0868, "grad_norm": 1.8911408185958862, "learning_rate": 9.818329604695513e-06, "loss": 0.4247, "step": 1085 }, { "epoch": 0.08688, "grad_norm": 2.2056448459625244, "learning_rate": 9.817993522643827e-06, "loss": 0.3675, "step": 1086 }, { "epoch": 0.08696, "grad_norm": 1.294008731842041, "learning_rate": 9.817657135773813e-06, "loss": 0.3124, "step": 1087 }, { "epoch": 0.08704, "grad_norm": 1.4826951026916504, "learning_rate": 9.817320444106753e-06, "loss": 0.3155, "step": 1088 }, { "epoch": 0.08712, "grad_norm": 1.5381442308425903, "learning_rate": 9.816983447663946e-06, "loss": 0.4008, "step": 1089 }, { "epoch": 0.0872, "grad_norm": 1.5871357917785645, "learning_rate": 9.816646146466714e-06, "loss": 0.333, "step": 1090 }, { "epoch": 0.08728, "grad_norm": 1.603145718574524, "learning_rate": 9.816308540536396e-06, "loss": 0.3314, "step": 1091 }, { "epoch": 0.08736, "grad_norm": 1.7113319635391235, "learning_rate": 9.815970629894354e-06, "loss": 0.408, "step": 1092 }, { "epoch": 0.08744, "grad_norm": 1.4441734552383423, "learning_rate": 9.815632414561964e-06, "loss": 0.2921, "step": 1093 }, { "epoch": 0.08752, "grad_norm": 1.7782763242721558, "learning_rate": 9.815293894560623e-06, "loss": 0.3528, "step": 1094 }, { "epoch": 0.0876, "grad_norm": 1.4475657939910889, "learning_rate": 9.814955069911752e-06, "loss": 0.3025, "step": 1095 }, { "epoch": 0.08768, "grad_norm": 1.3749080896377563, "learning_rate": 9.814615940636781e-06, "loss": 0.277, "step": 1096 }, { "epoch": 0.08776, "grad_norm": 1.6869237422943115, "learning_rate": 9.814276506757172e-06, "loss": 0.4035, "step": 1097 }, { "epoch": 0.08784, "grad_norm": 1.6214927434921265, "learning_rate": 9.813936768294397e-06, "loss": 0.3524, "step": 1098 }, { "epoch": 0.08792, "grad_norm": 2.009049654006958, "learning_rate": 9.813596725269948e-06, "loss": 0.3494, "step": 1099 }, { "epoch": 0.088, "grad_norm": 1.901945948600769, "learning_rate": 9.813256377705341e-06, "loss": 0.4302, "step": 1100 }, { "epoch": 0.08808, "grad_norm": 2.3681435585021973, "learning_rate": 9.812915725622109e-06, "loss": 0.5687, "step": 1101 }, { "epoch": 0.08816, "grad_norm": 2.2910284996032715, "learning_rate": 9.812574769041805e-06, "loss": 0.4986, "step": 1102 }, { "epoch": 0.08824, "grad_norm": 1.6809210777282715, "learning_rate": 9.812233507985995e-06, "loss": 0.3687, "step": 1103 }, { "epoch": 0.08832, "grad_norm": 1.779154658317566, "learning_rate": 9.811891942476275e-06, "loss": 0.4001, "step": 1104 }, { "epoch": 0.0884, "grad_norm": 1.6831563711166382, "learning_rate": 9.811550072534251e-06, "loss": 0.3435, "step": 1105 }, { "epoch": 0.08848, "grad_norm": 1.9863594770431519, "learning_rate": 9.811207898181555e-06, "loss": 0.3946, "step": 1106 }, { "epoch": 0.08856, "grad_norm": 1.761527419090271, "learning_rate": 9.81086541943983e-06, "loss": 0.316, "step": 1107 }, { "epoch": 0.08864, "grad_norm": 1.4106063842773438, "learning_rate": 9.810522636330751e-06, "loss": 0.2762, "step": 1108 }, { "epoch": 0.08872, "grad_norm": 1.7829062938690186, "learning_rate": 9.810179548875999e-06, "loss": 0.3734, "step": 1109 }, { "epoch": 0.0888, "grad_norm": 1.5982229709625244, "learning_rate": 9.809836157097282e-06, "loss": 0.3701, "step": 1110 }, { "epoch": 0.08888, "grad_norm": 1.4014500379562378, "learning_rate": 9.809492461016326e-06, "loss": 0.3202, "step": 1111 }, { "epoch": 0.08896, "grad_norm": 1.6743066310882568, "learning_rate": 9.809148460654874e-06, "loss": 0.2995, "step": 1112 }, { "epoch": 0.08904, "grad_norm": 1.6122390031814575, "learning_rate": 9.80880415603469e-06, "loss": 0.3507, "step": 1113 }, { "epoch": 0.08912, "grad_norm": 1.8418755531311035, "learning_rate": 9.808459547177559e-06, "loss": 0.4162, "step": 1114 }, { "epoch": 0.0892, "grad_norm": 1.4559040069580078, "learning_rate": 9.808114634105278e-06, "loss": 0.3797, "step": 1115 }, { "epoch": 0.08928, "grad_norm": 1.9068504571914673, "learning_rate": 9.807769416839677e-06, "loss": 0.4154, "step": 1116 }, { "epoch": 0.08936, "grad_norm": 1.5469180345535278, "learning_rate": 9.807423895402587e-06, "loss": 0.4295, "step": 1117 }, { "epoch": 0.08944, "grad_norm": 1.7214394807815552, "learning_rate": 9.807078069815877e-06, "loss": 0.3601, "step": 1118 }, { "epoch": 0.08952, "grad_norm": 1.7878152132034302, "learning_rate": 9.80673194010142e-06, "loss": 0.3575, "step": 1119 }, { "epoch": 0.0896, "grad_norm": 1.857308030128479, "learning_rate": 9.806385506281117e-06, "loss": 0.4363, "step": 1120 }, { "epoch": 0.08968, "grad_norm": 1.6544005870819092, "learning_rate": 9.806038768376885e-06, "loss": 0.3477, "step": 1121 }, { "epoch": 0.08976, "grad_norm": 1.5055336952209473, "learning_rate": 9.80569172641066e-06, "loss": 0.2681, "step": 1122 }, { "epoch": 0.08984, "grad_norm": 1.9073718786239624, "learning_rate": 9.8053443804044e-06, "loss": 0.4285, "step": 1123 }, { "epoch": 0.08992, "grad_norm": 1.2498080730438232, "learning_rate": 9.80499673038008e-06, "loss": 0.2639, "step": 1124 }, { "epoch": 0.09, "grad_norm": 1.5033406019210815, "learning_rate": 9.804648776359695e-06, "loss": 0.3395, "step": 1125 }, { "epoch": 0.09008, "grad_norm": 1.9122231006622314, "learning_rate": 9.80430051836526e-06, "loss": 0.3571, "step": 1126 }, { "epoch": 0.09016, "grad_norm": 2.6971442699432373, "learning_rate": 9.803951956418803e-06, "loss": 0.4251, "step": 1127 }, { "epoch": 0.09024, "grad_norm": 1.4795504808425903, "learning_rate": 9.803603090542381e-06, "loss": 0.3596, "step": 1128 }, { "epoch": 0.09032, "grad_norm": 1.6504251956939697, "learning_rate": 9.803253920758064e-06, "loss": 0.3429, "step": 1129 }, { "epoch": 0.0904, "grad_norm": 2.284956693649292, "learning_rate": 9.802904447087945e-06, "loss": 0.6959, "step": 1130 }, { "epoch": 0.09048, "grad_norm": 1.777529001235962, "learning_rate": 9.802554669554131e-06, "loss": 0.3375, "step": 1131 }, { "epoch": 0.09056, "grad_norm": 1.831652045249939, "learning_rate": 9.802204588178752e-06, "loss": 0.4083, "step": 1132 }, { "epoch": 0.09064, "grad_norm": 2.0457098484039307, "learning_rate": 9.801854202983957e-06, "loss": 0.3995, "step": 1133 }, { "epoch": 0.09072, "grad_norm": 1.3770190477371216, "learning_rate": 9.801503513991914e-06, "loss": 0.2523, "step": 1134 }, { "epoch": 0.0908, "grad_norm": 1.600197434425354, "learning_rate": 9.80115252122481e-06, "loss": 0.3447, "step": 1135 }, { "epoch": 0.09088, "grad_norm": 1.7964802980422974, "learning_rate": 9.800801224704851e-06, "loss": 0.4085, "step": 1136 }, { "epoch": 0.09096, "grad_norm": 1.558449149131775, "learning_rate": 9.800449624454262e-06, "loss": 0.3532, "step": 1137 }, { "epoch": 0.09104, "grad_norm": 1.844244360923767, "learning_rate": 9.800097720495286e-06, "loss": 0.3174, "step": 1138 }, { "epoch": 0.09112, "grad_norm": 1.8442306518554688, "learning_rate": 9.79974551285019e-06, "loss": 0.3615, "step": 1139 }, { "epoch": 0.0912, "grad_norm": 1.5331751108169556, "learning_rate": 9.799393001541255e-06, "loss": 0.3207, "step": 1140 }, { "epoch": 0.09128, "grad_norm": 1.578279972076416, "learning_rate": 9.799040186590782e-06, "loss": 0.4399, "step": 1141 }, { "epoch": 0.09136, "grad_norm": 1.4967174530029297, "learning_rate": 9.798687068021095e-06, "loss": 0.3297, "step": 1142 }, { "epoch": 0.09144, "grad_norm": 2.0132367610931396, "learning_rate": 9.798333645854536e-06, "loss": 0.3207, "step": 1143 }, { "epoch": 0.09152, "grad_norm": 1.6801670789718628, "learning_rate": 9.79797992011346e-06, "loss": 0.3193, "step": 1144 }, { "epoch": 0.0916, "grad_norm": 2.1940245628356934, "learning_rate": 9.797625890820249e-06, "loss": 0.4476, "step": 1145 }, { "epoch": 0.09168, "grad_norm": 1.625232458114624, "learning_rate": 9.7972715579973e-06, "loss": 0.2663, "step": 1146 }, { "epoch": 0.09176, "grad_norm": 2.0687153339385986, "learning_rate": 9.796916921667033e-06, "loss": 0.3539, "step": 1147 }, { "epoch": 0.09184, "grad_norm": 1.9847198724746704, "learning_rate": 9.796561981851882e-06, "loss": 0.4134, "step": 1148 }, { "epoch": 0.09192, "grad_norm": 1.644768238067627, "learning_rate": 9.796206738574303e-06, "loss": 0.2589, "step": 1149 }, { "epoch": 0.092, "grad_norm": 1.431854486465454, "learning_rate": 9.795851191856774e-06, "loss": 0.3136, "step": 1150 }, { "epoch": 0.09208, "grad_norm": 1.3832859992980957, "learning_rate": 9.795495341721784e-06, "loss": 0.3832, "step": 1151 }, { "epoch": 0.09216, "grad_norm": 1.497043251991272, "learning_rate": 9.795139188191851e-06, "loss": 0.367, "step": 1152 }, { "epoch": 0.09224, "grad_norm": 1.3032704591751099, "learning_rate": 9.794782731289507e-06, "loss": 0.2557, "step": 1153 }, { "epoch": 0.09232, "grad_norm": 1.7582857608795166, "learning_rate": 9.794425971037303e-06, "loss": 0.3647, "step": 1154 }, { "epoch": 0.0924, "grad_norm": 1.4522085189819336, "learning_rate": 9.794068907457809e-06, "loss": 0.2847, "step": 1155 }, { "epoch": 0.09248, "grad_norm": 1.3870216608047485, "learning_rate": 9.793711540573616e-06, "loss": 0.2989, "step": 1156 }, { "epoch": 0.09256, "grad_norm": 1.3415980339050293, "learning_rate": 9.793353870407335e-06, "loss": 0.2967, "step": 1157 }, { "epoch": 0.09264, "grad_norm": 1.8516277074813843, "learning_rate": 9.792995896981591e-06, "loss": 0.3464, "step": 1158 }, { "epoch": 0.09272, "grad_norm": 1.5445516109466553, "learning_rate": 9.792637620319037e-06, "loss": 0.3213, "step": 1159 }, { "epoch": 0.0928, "grad_norm": 1.8748204708099365, "learning_rate": 9.792279040442334e-06, "loss": 0.4657, "step": 1160 }, { "epoch": 0.09288, "grad_norm": 1.5904078483581543, "learning_rate": 9.791920157374173e-06, "loss": 0.3865, "step": 1161 }, { "epoch": 0.09296, "grad_norm": 1.2793481349945068, "learning_rate": 9.791560971137257e-06, "loss": 0.3927, "step": 1162 }, { "epoch": 0.09304, "grad_norm": 1.3844211101531982, "learning_rate": 9.791201481754312e-06, "loss": 0.2778, "step": 1163 }, { "epoch": 0.09312, "grad_norm": 1.733962059020996, "learning_rate": 9.790841689248078e-06, "loss": 0.3492, "step": 1164 }, { "epoch": 0.0932, "grad_norm": 1.4930529594421387, "learning_rate": 9.790481593641324e-06, "loss": 0.2962, "step": 1165 }, { "epoch": 0.09328, "grad_norm": 1.9883126020431519, "learning_rate": 9.790121194956825e-06, "loss": 0.3775, "step": 1166 }, { "epoch": 0.09336, "grad_norm": 1.5635162591934204, "learning_rate": 9.789760493217388e-06, "loss": 0.327, "step": 1167 }, { "epoch": 0.09344, "grad_norm": 1.5043566226959229, "learning_rate": 9.78939948844583e-06, "loss": 0.2745, "step": 1168 }, { "epoch": 0.09352, "grad_norm": 1.3067989349365234, "learning_rate": 9.789038180664994e-06, "loss": 0.2482, "step": 1169 }, { "epoch": 0.0936, "grad_norm": 1.678215503692627, "learning_rate": 9.788676569897734e-06, "loss": 0.316, "step": 1170 }, { "epoch": 0.09368, "grad_norm": 1.8970040082931519, "learning_rate": 9.788314656166931e-06, "loss": 0.4448, "step": 1171 }, { "epoch": 0.09376, "grad_norm": 1.6337584257125854, "learning_rate": 9.787952439495481e-06, "loss": 0.402, "step": 1172 }, { "epoch": 0.09384, "grad_norm": 2.054110050201416, "learning_rate": 9.787589919906301e-06, "loss": 0.4844, "step": 1173 }, { "epoch": 0.09392, "grad_norm": 1.683643102645874, "learning_rate": 9.787227097422327e-06, "loss": 0.4237, "step": 1174 }, { "epoch": 0.094, "grad_norm": 1.800292730331421, "learning_rate": 9.786863972066515e-06, "loss": 0.4866, "step": 1175 }, { "epoch": 0.09408, "grad_norm": 1.3618955612182617, "learning_rate": 9.786500543861833e-06, "loss": 0.3326, "step": 1176 }, { "epoch": 0.09416, "grad_norm": 1.6615434885025024, "learning_rate": 9.786136812831276e-06, "loss": 0.3495, "step": 1177 }, { "epoch": 0.09424, "grad_norm": 1.6406981945037842, "learning_rate": 9.78577277899786e-06, "loss": 0.3495, "step": 1178 }, { "epoch": 0.09432, "grad_norm": 1.4927983283996582, "learning_rate": 9.785408442384612e-06, "loss": 0.3832, "step": 1179 }, { "epoch": 0.0944, "grad_norm": 1.31818425655365, "learning_rate": 9.785043803014584e-06, "loss": 0.3047, "step": 1180 }, { "epoch": 0.09448, "grad_norm": 1.7068977355957031, "learning_rate": 9.784678860910846e-06, "loss": 0.3715, "step": 1181 }, { "epoch": 0.09456, "grad_norm": 2.2386581897735596, "learning_rate": 9.784313616096486e-06, "loss": 0.3983, "step": 1182 }, { "epoch": 0.09464, "grad_norm": 1.369645595550537, "learning_rate": 9.783948068594613e-06, "loss": 0.3607, "step": 1183 }, { "epoch": 0.09472, "grad_norm": 1.6752623319625854, "learning_rate": 9.783582218428352e-06, "loss": 0.4052, "step": 1184 }, { "epoch": 0.0948, "grad_norm": 1.740079641342163, "learning_rate": 9.783216065620849e-06, "loss": 0.3649, "step": 1185 }, { "epoch": 0.09488, "grad_norm": 1.514141321182251, "learning_rate": 9.78284961019527e-06, "loss": 0.3816, "step": 1186 }, { "epoch": 0.09496, "grad_norm": 1.8777161836624146, "learning_rate": 9.782482852174802e-06, "loss": 0.3526, "step": 1187 }, { "epoch": 0.09504, "grad_norm": 2.190656900405884, "learning_rate": 9.782115791582644e-06, "loss": 0.4483, "step": 1188 }, { "epoch": 0.09512, "grad_norm": 1.8506275415420532, "learning_rate": 9.781748428442022e-06, "loss": 0.3753, "step": 1189 }, { "epoch": 0.0952, "grad_norm": 1.515964150428772, "learning_rate": 9.781380762776176e-06, "loss": 0.4088, "step": 1190 }, { "epoch": 0.09528, "grad_norm": 1.1206365823745728, "learning_rate": 9.781012794608368e-06, "loss": 0.2067, "step": 1191 }, { "epoch": 0.09536, "grad_norm": 1.286902666091919, "learning_rate": 9.780644523961877e-06, "loss": 0.2787, "step": 1192 }, { "epoch": 0.09544, "grad_norm": 1.765989065170288, "learning_rate": 9.780275950860005e-06, "loss": 0.3091, "step": 1193 }, { "epoch": 0.09552, "grad_norm": 2.0813465118408203, "learning_rate": 9.779907075326066e-06, "loss": 0.3626, "step": 1194 }, { "epoch": 0.0956, "grad_norm": 1.8357547521591187, "learning_rate": 9.779537897383403e-06, "loss": 0.3385, "step": 1195 }, { "epoch": 0.09568, "grad_norm": 2.120861530303955, "learning_rate": 9.779168417055368e-06, "loss": 0.462, "step": 1196 }, { "epoch": 0.09576, "grad_norm": 1.4538586139678955, "learning_rate": 9.778798634365336e-06, "loss": 0.3281, "step": 1197 }, { "epoch": 0.09584, "grad_norm": 2.0800697803497314, "learning_rate": 9.778428549336707e-06, "loss": 0.4369, "step": 1198 }, { "epoch": 0.09592, "grad_norm": 1.2680668830871582, "learning_rate": 9.778058161992892e-06, "loss": 0.2548, "step": 1199 }, { "epoch": 0.096, "grad_norm": 1.8447930812835693, "learning_rate": 9.777687472357324e-06, "loss": 0.5501, "step": 1200 }, { "epoch": 0.09608, "grad_norm": 2.0600554943084717, "learning_rate": 9.777316480453457e-06, "loss": 0.604, "step": 1201 }, { "epoch": 0.09616, "grad_norm": 1.6585683822631836, "learning_rate": 9.77694518630476e-06, "loss": 0.3472, "step": 1202 }, { "epoch": 0.09624, "grad_norm": 1.3858237266540527, "learning_rate": 9.776573589934726e-06, "loss": 0.3476, "step": 1203 }, { "epoch": 0.09632, "grad_norm": 1.6310840845108032, "learning_rate": 9.776201691366863e-06, "loss": 0.3252, "step": 1204 }, { "epoch": 0.0964, "grad_norm": 1.4285833835601807, "learning_rate": 9.775829490624698e-06, "loss": 0.2611, "step": 1205 }, { "epoch": 0.09648, "grad_norm": 1.5389498472213745, "learning_rate": 9.775456987731784e-06, "loss": 0.3895, "step": 1206 }, { "epoch": 0.09656, "grad_norm": 1.5493495464324951, "learning_rate": 9.775084182711683e-06, "loss": 0.3191, "step": 1207 }, { "epoch": 0.09664, "grad_norm": 1.5432543754577637, "learning_rate": 9.774711075587985e-06, "loss": 0.4227, "step": 1208 }, { "epoch": 0.09672, "grad_norm": 1.4613178968429565, "learning_rate": 9.774337666384293e-06, "loss": 0.3943, "step": 1209 }, { "epoch": 0.0968, "grad_norm": 2.0416183471679688, "learning_rate": 9.773963955124232e-06, "loss": 0.5029, "step": 1210 }, { "epoch": 0.09688, "grad_norm": 1.3715733289718628, "learning_rate": 9.773589941831446e-06, "loss": 0.3508, "step": 1211 }, { "epoch": 0.09696, "grad_norm": 1.3506369590759277, "learning_rate": 9.773215626529596e-06, "loss": 0.3365, "step": 1212 }, { "epoch": 0.09704, "grad_norm": 1.867976427078247, "learning_rate": 9.772841009242362e-06, "loss": 0.3752, "step": 1213 }, { "epoch": 0.09712, "grad_norm": 1.6031577587127686, "learning_rate": 9.772466089993451e-06, "loss": 0.407, "step": 1214 }, { "epoch": 0.0972, "grad_norm": 1.4785380363464355, "learning_rate": 9.772090868806578e-06, "loss": 0.2738, "step": 1215 }, { "epoch": 0.09728, "grad_norm": 1.7080274820327759, "learning_rate": 9.771715345705482e-06, "loss": 0.3905, "step": 1216 }, { "epoch": 0.09736, "grad_norm": 1.4409105777740479, "learning_rate": 9.771339520713924e-06, "loss": 0.2903, "step": 1217 }, { "epoch": 0.09744, "grad_norm": 2.0454001426696777, "learning_rate": 9.77096339385568e-06, "loss": 0.4558, "step": 1218 }, { "epoch": 0.09752, "grad_norm": 1.4832075834274292, "learning_rate": 9.770586965154542e-06, "loss": 0.3588, "step": 1219 }, { "epoch": 0.0976, "grad_norm": 1.7369167804718018, "learning_rate": 9.770210234634333e-06, "loss": 0.3038, "step": 1220 }, { "epoch": 0.09768, "grad_norm": 1.412338137626648, "learning_rate": 9.769833202318882e-06, "loss": 0.27, "step": 1221 }, { "epoch": 0.09776, "grad_norm": 2.267824649810791, "learning_rate": 9.769455868232044e-06, "loss": 0.6121, "step": 1222 }, { "epoch": 0.09784, "grad_norm": 1.5539761781692505, "learning_rate": 9.769078232397693e-06, "loss": 0.3358, "step": 1223 }, { "epoch": 0.09792, "grad_norm": 1.4364866018295288, "learning_rate": 9.76870029483972e-06, "loss": 0.3932, "step": 1224 }, { "epoch": 0.098, "grad_norm": 1.2992466688156128, "learning_rate": 9.768322055582034e-06, "loss": 0.2755, "step": 1225 }, { "epoch": 0.09808, "grad_norm": 1.7094968557357788, "learning_rate": 9.767943514648567e-06, "loss": 0.3883, "step": 1226 }, { "epoch": 0.09816, "grad_norm": 1.9164812564849854, "learning_rate": 9.767564672063268e-06, "loss": 0.4088, "step": 1227 }, { "epoch": 0.09824, "grad_norm": 2.1447086334228516, "learning_rate": 9.767185527850103e-06, "loss": 0.4737, "step": 1228 }, { "epoch": 0.09832, "grad_norm": 1.7146050930023193, "learning_rate": 9.766806082033061e-06, "loss": 0.3838, "step": 1229 }, { "epoch": 0.0984, "grad_norm": 1.4320570230484009, "learning_rate": 9.766426334636149e-06, "loss": 0.3106, "step": 1230 }, { "epoch": 0.09848, "grad_norm": 1.5674022436141968, "learning_rate": 9.76604628568339e-06, "loss": 0.3787, "step": 1231 }, { "epoch": 0.09856, "grad_norm": 1.2451162338256836, "learning_rate": 9.765665935198831e-06, "loss": 0.3102, "step": 1232 }, { "epoch": 0.09864, "grad_norm": 1.3635990619659424, "learning_rate": 9.765285283206533e-06, "loss": 0.2954, "step": 1233 }, { "epoch": 0.09872, "grad_norm": 2.4409143924713135, "learning_rate": 9.764904329730583e-06, "loss": 0.3997, "step": 1234 }, { "epoch": 0.0988, "grad_norm": 1.705710530281067, "learning_rate": 9.764523074795077e-06, "loss": 0.4332, "step": 1235 }, { "epoch": 0.09888, "grad_norm": 1.3760895729064941, "learning_rate": 9.764141518424138e-06, "loss": 0.3509, "step": 1236 }, { "epoch": 0.09896, "grad_norm": 1.7472193241119385, "learning_rate": 9.763759660641905e-06, "loss": 0.3773, "step": 1237 }, { "epoch": 0.09904, "grad_norm": 1.6968291997909546, "learning_rate": 9.76337750147254e-06, "loss": 0.3579, "step": 1238 }, { "epoch": 0.09912, "grad_norm": 1.7409708499908447, "learning_rate": 9.762995040940217e-06, "loss": 0.2909, "step": 1239 }, { "epoch": 0.0992, "grad_norm": 1.7769598960876465, "learning_rate": 9.762612279069136e-06, "loss": 0.3435, "step": 1240 }, { "epoch": 0.09928, "grad_norm": 1.4426788091659546, "learning_rate": 9.762229215883511e-06, "loss": 0.3389, "step": 1241 }, { "epoch": 0.09936, "grad_norm": 1.7437098026275635, "learning_rate": 9.76184585140758e-06, "loss": 0.3582, "step": 1242 }, { "epoch": 0.09944, "grad_norm": 1.8013086318969727, "learning_rate": 9.761462185665593e-06, "loss": 0.3392, "step": 1243 }, { "epoch": 0.09952, "grad_norm": 1.4805794954299927, "learning_rate": 9.761078218681827e-06, "loss": 0.2881, "step": 1244 }, { "epoch": 0.0996, "grad_norm": 1.754407525062561, "learning_rate": 9.760693950480572e-06, "loss": 0.3884, "step": 1245 }, { "epoch": 0.09968, "grad_norm": 1.5800998210906982, "learning_rate": 9.760309381086139e-06, "loss": 0.3402, "step": 1246 }, { "epoch": 0.09976, "grad_norm": 1.5772699117660522, "learning_rate": 9.759924510522861e-06, "loss": 0.3164, "step": 1247 }, { "epoch": 0.09984, "grad_norm": 1.4671974182128906, "learning_rate": 9.759539338815085e-06, "loss": 0.3409, "step": 1248 }, { "epoch": 0.09992, "grad_norm": 1.8091286420822144, "learning_rate": 9.75915386598718e-06, "loss": 0.3817, "step": 1249 }, { "epoch": 0.1, "grad_norm": 1.8199232816696167, "learning_rate": 9.758768092063536e-06, "loss": 0.3736, "step": 1250 }, { "epoch": 0.10008, "grad_norm": 1.4588650465011597, "learning_rate": 9.758382017068558e-06, "loss": 0.3103, "step": 1251 }, { "epoch": 0.10016, "grad_norm": 2.0028064250946045, "learning_rate": 9.757995641026669e-06, "loss": 0.4277, "step": 1252 }, { "epoch": 0.10024, "grad_norm": 1.8617825508117676, "learning_rate": 9.757608963962317e-06, "loss": 0.3659, "step": 1253 }, { "epoch": 0.10032, "grad_norm": 1.7988715171813965, "learning_rate": 9.757221985899965e-06, "loss": 0.3815, "step": 1254 }, { "epoch": 0.1004, "grad_norm": 1.3747351169586182, "learning_rate": 9.756834706864096e-06, "loss": 0.3078, "step": 1255 }, { "epoch": 0.10048, "grad_norm": 1.8129006624221802, "learning_rate": 9.756447126879212e-06, "loss": 0.3561, "step": 1256 }, { "epoch": 0.10056, "grad_norm": 1.850455641746521, "learning_rate": 9.756059245969832e-06, "loss": 0.4652, "step": 1257 }, { "epoch": 0.10064, "grad_norm": 1.7385430335998535, "learning_rate": 9.755671064160499e-06, "loss": 0.3239, "step": 1258 }, { "epoch": 0.10072, "grad_norm": 1.8935037851333618, "learning_rate": 9.755282581475769e-06, "loss": 0.4487, "step": 1259 }, { "epoch": 0.1008, "grad_norm": 1.537541389465332, "learning_rate": 9.754893797940222e-06, "loss": 0.2818, "step": 1260 }, { "epoch": 0.10088, "grad_norm": 1.770788550376892, "learning_rate": 9.754504713578453e-06, "loss": 0.3322, "step": 1261 }, { "epoch": 0.10096, "grad_norm": 1.3864850997924805, "learning_rate": 9.75411532841508e-06, "loss": 0.2596, "step": 1262 }, { "epoch": 0.10104, "grad_norm": 1.4967447519302368, "learning_rate": 9.753725642474739e-06, "loss": 0.3948, "step": 1263 }, { "epoch": 0.10112, "grad_norm": 1.707395315170288, "learning_rate": 9.75333565578208e-06, "loss": 0.4174, "step": 1264 }, { "epoch": 0.1012, "grad_norm": 1.876741886138916, "learning_rate": 9.752945368361782e-06, "loss": 0.4069, "step": 1265 }, { "epoch": 0.10128, "grad_norm": 1.4767177104949951, "learning_rate": 9.75255478023853e-06, "loss": 0.3826, "step": 1266 }, { "epoch": 0.10136, "grad_norm": 2.0608136653900146, "learning_rate": 9.752163891437042e-06, "loss": 0.4269, "step": 1267 }, { "epoch": 0.10144, "grad_norm": 1.9878028631210327, "learning_rate": 9.751772701982045e-06, "loss": 0.4665, "step": 1268 }, { "epoch": 0.10152, "grad_norm": 1.6732946634292603, "learning_rate": 9.751381211898288e-06, "loss": 0.3332, "step": 1269 }, { "epoch": 0.1016, "grad_norm": 1.626977562904358, "learning_rate": 9.75098942121054e-06, "loss": 0.4315, "step": 1270 }, { "epoch": 0.10168, "grad_norm": 1.6960049867630005, "learning_rate": 9.750597329943588e-06, "loss": 0.328, "step": 1271 }, { "epoch": 0.10176, "grad_norm": 1.4753273725509644, "learning_rate": 9.75020493812224e-06, "loss": 0.3749, "step": 1272 }, { "epoch": 0.10184, "grad_norm": 1.60029935836792, "learning_rate": 9.749812245771318e-06, "loss": 0.37, "step": 1273 }, { "epoch": 0.10192, "grad_norm": 1.379223346710205, "learning_rate": 9.749419252915668e-06, "loss": 0.2877, "step": 1274 }, { "epoch": 0.102, "grad_norm": 1.588037371635437, "learning_rate": 9.749025959580156e-06, "loss": 0.3533, "step": 1275 }, { "epoch": 0.10208, "grad_norm": 1.8960047960281372, "learning_rate": 9.748632365789658e-06, "loss": 0.5091, "step": 1276 }, { "epoch": 0.10216, "grad_norm": 1.6755867004394531, "learning_rate": 9.748238471569083e-06, "loss": 0.3203, "step": 1277 }, { "epoch": 0.10224, "grad_norm": 1.5891623497009277, "learning_rate": 9.747844276943345e-06, "loss": 0.4389, "step": 1278 }, { "epoch": 0.10232, "grad_norm": 1.7560958862304688, "learning_rate": 9.747449781937388e-06, "loss": 0.281, "step": 1279 }, { "epoch": 0.1024, "grad_norm": 1.9748413562774658, "learning_rate": 9.747054986576165e-06, "loss": 0.5097, "step": 1280 }, { "epoch": 0.10248, "grad_norm": 1.5240323543548584, "learning_rate": 9.74665989088466e-06, "loss": 0.3438, "step": 1281 }, { "epoch": 0.10256, "grad_norm": 1.2884881496429443, "learning_rate": 9.746264494887865e-06, "loss": 0.3211, "step": 1282 }, { "epoch": 0.10264, "grad_norm": 2.2344231605529785, "learning_rate": 9.745868798610796e-06, "loss": 0.3818, "step": 1283 }, { "epoch": 0.10272, "grad_norm": 1.9735519886016846, "learning_rate": 9.745472802078488e-06, "loss": 0.35, "step": 1284 }, { "epoch": 0.1028, "grad_norm": 1.7784732580184937, "learning_rate": 9.745076505315994e-06, "loss": 0.4224, "step": 1285 }, { "epoch": 0.10288, "grad_norm": 1.9323874711990356, "learning_rate": 9.744679908348386e-06, "loss": 0.4125, "step": 1286 }, { "epoch": 0.10296, "grad_norm": 2.1199615001678467, "learning_rate": 9.74428301120076e-06, "loss": 0.4652, "step": 1287 }, { "epoch": 0.10304, "grad_norm": 1.5500082969665527, "learning_rate": 9.743885813898217e-06, "loss": 0.3081, "step": 1288 }, { "epoch": 0.10312, "grad_norm": 1.9076387882232666, "learning_rate": 9.743488316465895e-06, "loss": 0.4051, "step": 1289 }, { "epoch": 0.1032, "grad_norm": 1.48446524143219, "learning_rate": 9.743090518928937e-06, "loss": 0.3234, "step": 1290 }, { "epoch": 0.10328, "grad_norm": 2.2340919971466064, "learning_rate": 9.742692421312515e-06, "loss": 0.4365, "step": 1291 }, { "epoch": 0.10336, "grad_norm": 1.6090425252914429, "learning_rate": 9.74229402364181e-06, "loss": 0.3622, "step": 1292 }, { "epoch": 0.10344, "grad_norm": 1.7896775007247925, "learning_rate": 9.74189532594203e-06, "loss": 0.388, "step": 1293 }, { "epoch": 0.10352, "grad_norm": 1.6529676914215088, "learning_rate": 9.7414963282384e-06, "loss": 0.2979, "step": 1294 }, { "epoch": 0.1036, "grad_norm": 1.5958153009414673, "learning_rate": 9.741097030556162e-06, "loss": 0.3039, "step": 1295 }, { "epoch": 0.10368, "grad_norm": 2.0807502269744873, "learning_rate": 9.740697432920579e-06, "loss": 0.4092, "step": 1296 }, { "epoch": 0.10376, "grad_norm": 1.3676483631134033, "learning_rate": 9.740297535356931e-06, "loss": 0.2693, "step": 1297 }, { "epoch": 0.10384, "grad_norm": 2.016907215118408, "learning_rate": 9.739897337890521e-06, "loss": 0.3837, "step": 1298 }, { "epoch": 0.10392, "grad_norm": 1.2759778499603271, "learning_rate": 9.739496840546663e-06, "loss": 0.2593, "step": 1299 }, { "epoch": 0.104, "grad_norm": 1.7022594213485718, "learning_rate": 9.7390960433507e-06, "loss": 0.3582, "step": 1300 }, { "epoch": 0.10408, "grad_norm": 1.783856987953186, "learning_rate": 9.738694946327988e-06, "loss": 0.4139, "step": 1301 }, { "epoch": 0.10416, "grad_norm": 1.6456480026245117, "learning_rate": 9.738293549503902e-06, "loss": 0.3018, "step": 1302 }, { "epoch": 0.10424, "grad_norm": 1.666909098625183, "learning_rate": 9.737891852903838e-06, "loss": 0.3686, "step": 1303 }, { "epoch": 0.10432, "grad_norm": 1.3941386938095093, "learning_rate": 9.737489856553209e-06, "loss": 0.4, "step": 1304 }, { "epoch": 0.1044, "grad_norm": 1.745068907737732, "learning_rate": 9.737087560477449e-06, "loss": 0.3254, "step": 1305 }, { "epoch": 0.10448, "grad_norm": 1.4899107217788696, "learning_rate": 9.736684964702008e-06, "loss": 0.3617, "step": 1306 }, { "epoch": 0.10456, "grad_norm": 1.8580790758132935, "learning_rate": 9.736282069252358e-06, "loss": 0.368, "step": 1307 }, { "epoch": 0.10464, "grad_norm": 1.2298766374588013, "learning_rate": 9.735878874153993e-06, "loss": 0.293, "step": 1308 }, { "epoch": 0.10472, "grad_norm": 1.4866394996643066, "learning_rate": 9.735475379432414e-06, "loss": 0.3504, "step": 1309 }, { "epoch": 0.1048, "grad_norm": 1.508781909942627, "learning_rate": 9.735071585113153e-06, "loss": 0.3257, "step": 1310 }, { "epoch": 0.10488, "grad_norm": 1.6359803676605225, "learning_rate": 9.734667491221758e-06, "loss": 0.3885, "step": 1311 }, { "epoch": 0.10496, "grad_norm": 1.50149405002594, "learning_rate": 9.734263097783792e-06, "loss": 0.3049, "step": 1312 }, { "epoch": 0.10504, "grad_norm": 1.7158201932907104, "learning_rate": 9.73385840482484e-06, "loss": 0.4445, "step": 1313 }, { "epoch": 0.10512, "grad_norm": 1.7673109769821167, "learning_rate": 9.733453412370508e-06, "loss": 0.3097, "step": 1314 }, { "epoch": 0.1052, "grad_norm": 1.5726984739303589, "learning_rate": 9.733048120446416e-06, "loss": 0.2946, "step": 1315 }, { "epoch": 0.10528, "grad_norm": 1.9193859100341797, "learning_rate": 9.732642529078206e-06, "loss": 0.4489, "step": 1316 }, { "epoch": 0.10536, "grad_norm": 1.6108776330947876, "learning_rate": 9.73223663829154e-06, "loss": 0.3922, "step": 1317 }, { "epoch": 0.10544, "grad_norm": 1.4639272689819336, "learning_rate": 9.731830448112096e-06, "loss": 0.3506, "step": 1318 }, { "epoch": 0.10552, "grad_norm": 1.342962622642517, "learning_rate": 9.731423958565571e-06, "loss": 0.3522, "step": 1319 }, { "epoch": 0.1056, "grad_norm": 1.7002488374710083, "learning_rate": 9.731017169677683e-06, "loss": 0.4434, "step": 1320 }, { "epoch": 0.10568, "grad_norm": 1.669387698173523, "learning_rate": 9.73061008147417e-06, "loss": 0.3224, "step": 1321 }, { "epoch": 0.10576, "grad_norm": 1.4158101081848145, "learning_rate": 9.730202693980786e-06, "loss": 0.3246, "step": 1322 }, { "epoch": 0.10584, "grad_norm": 1.3361846208572388, "learning_rate": 9.729795007223303e-06, "loss": 0.269, "step": 1323 }, { "epoch": 0.10592, "grad_norm": 1.3655201196670532, "learning_rate": 9.729387021227518e-06, "loss": 0.3433, "step": 1324 }, { "epoch": 0.106, "grad_norm": 1.260786771774292, "learning_rate": 9.728978736019238e-06, "loss": 0.2933, "step": 1325 }, { "epoch": 0.10608, "grad_norm": 1.3825486898422241, "learning_rate": 9.7285701516243e-06, "loss": 0.3286, "step": 1326 }, { "epoch": 0.10616, "grad_norm": 1.707014799118042, "learning_rate": 9.72816126806855e-06, "loss": 0.447, "step": 1327 }, { "epoch": 0.10624, "grad_norm": 1.8020687103271484, "learning_rate": 9.727752085377855e-06, "loss": 0.4374, "step": 1328 }, { "epoch": 0.10632, "grad_norm": 1.697378158569336, "learning_rate": 9.727342603578105e-06, "loss": 0.3393, "step": 1329 }, { "epoch": 0.1064, "grad_norm": 1.4379756450653076, "learning_rate": 9.726932822695208e-06, "loss": 0.2879, "step": 1330 }, { "epoch": 0.10648, "grad_norm": 2.371371269226074, "learning_rate": 9.726522742755085e-06, "loss": 0.4398, "step": 1331 }, { "epoch": 0.10656, "grad_norm": 1.6934208869934082, "learning_rate": 9.726112363783684e-06, "loss": 0.3344, "step": 1332 }, { "epoch": 0.10664, "grad_norm": 1.745850682258606, "learning_rate": 9.725701685806968e-06, "loss": 0.4684, "step": 1333 }, { "epoch": 0.10672, "grad_norm": 1.6199334859848022, "learning_rate": 9.725290708850919e-06, "loss": 0.3236, "step": 1334 }, { "epoch": 0.1068, "grad_norm": 1.4844225645065308, "learning_rate": 9.724879432941536e-06, "loss": 0.3124, "step": 1335 }, { "epoch": 0.10688, "grad_norm": 1.5500150918960571, "learning_rate": 9.724467858104843e-06, "loss": 0.3436, "step": 1336 }, { "epoch": 0.10696, "grad_norm": 1.6310945749282837, "learning_rate": 9.724055984366876e-06, "loss": 0.3663, "step": 1337 }, { "epoch": 0.10704, "grad_norm": 1.5722607374191284, "learning_rate": 9.723643811753693e-06, "loss": 0.3498, "step": 1338 }, { "epoch": 0.10712, "grad_norm": 1.2501575946807861, "learning_rate": 9.723231340291372e-06, "loss": 0.2408, "step": 1339 }, { "epoch": 0.1072, "grad_norm": 1.5553503036499023, "learning_rate": 9.722818570006008e-06, "loss": 0.4481, "step": 1340 }, { "epoch": 0.10728, "grad_norm": 2.1203765869140625, "learning_rate": 9.722405500923715e-06, "loss": 0.4438, "step": 1341 }, { "epoch": 0.10736, "grad_norm": 1.671330213546753, "learning_rate": 9.721992133070627e-06, "loss": 0.3637, "step": 1342 }, { "epoch": 0.10744, "grad_norm": 1.697939157485962, "learning_rate": 9.721578466472896e-06, "loss": 0.3393, "step": 1343 }, { "epoch": 0.10752, "grad_norm": 1.2914625406265259, "learning_rate": 9.721164501156697e-06, "loss": 0.2479, "step": 1344 }, { "epoch": 0.1076, "grad_norm": 1.7447216510772705, "learning_rate": 9.720750237148214e-06, "loss": 0.5241, "step": 1345 }, { "epoch": 0.10768, "grad_norm": 1.8008346557617188, "learning_rate": 9.72033567447366e-06, "loss": 0.4449, "step": 1346 }, { "epoch": 0.10776, "grad_norm": 1.2198455333709717, "learning_rate": 9.719920813159262e-06, "loss": 0.2726, "step": 1347 }, { "epoch": 0.10784, "grad_norm": 1.7773081064224243, "learning_rate": 9.719505653231268e-06, "loss": 0.3292, "step": 1348 }, { "epoch": 0.10792, "grad_norm": 1.6153675317764282, "learning_rate": 9.719090194715943e-06, "loss": 0.404, "step": 1349 }, { "epoch": 0.108, "grad_norm": 1.9209851026535034, "learning_rate": 9.71867443763957e-06, "loss": 0.4205, "step": 1350 }, { "epoch": 0.10808, "grad_norm": 1.8615111112594604, "learning_rate": 9.718258382028456e-06, "loss": 0.4368, "step": 1351 }, { "epoch": 0.10816, "grad_norm": 1.647263526916504, "learning_rate": 9.71784202790892e-06, "loss": 0.5561, "step": 1352 }, { "epoch": 0.10824, "grad_norm": 1.804128646850586, "learning_rate": 9.717425375307305e-06, "loss": 0.3763, "step": 1353 }, { "epoch": 0.10832, "grad_norm": 1.5256234407424927, "learning_rate": 9.717008424249973e-06, "loss": 0.3653, "step": 1354 }, { "epoch": 0.1084, "grad_norm": 1.6643016338348389, "learning_rate": 9.716591174763297e-06, "loss": 0.3654, "step": 1355 }, { "epoch": 0.10848, "grad_norm": 1.531507968902588, "learning_rate": 9.716173626873682e-06, "loss": 0.2859, "step": 1356 }, { "epoch": 0.10856, "grad_norm": 1.4687572717666626, "learning_rate": 9.71575578060754e-06, "loss": 0.3194, "step": 1357 }, { "epoch": 0.10864, "grad_norm": 1.7269558906555176, "learning_rate": 9.715337635991312e-06, "loss": 0.4283, "step": 1358 }, { "epoch": 0.10872, "grad_norm": 1.9255378246307373, "learning_rate": 9.714919193051448e-06, "loss": 0.3628, "step": 1359 }, { "epoch": 0.1088, "grad_norm": 1.5584735870361328, "learning_rate": 9.714500451814421e-06, "loss": 0.2875, "step": 1360 }, { "epoch": 0.10888, "grad_norm": 1.3883323669433594, "learning_rate": 9.714081412306728e-06, "loss": 0.2614, "step": 1361 }, { "epoch": 0.10896, "grad_norm": 1.938321590423584, "learning_rate": 9.713662074554875e-06, "loss": 0.3478, "step": 1362 }, { "epoch": 0.10904, "grad_norm": 1.7196362018585205, "learning_rate": 9.713242438585397e-06, "loss": 0.3891, "step": 1363 }, { "epoch": 0.10912, "grad_norm": 1.8739652633666992, "learning_rate": 9.712822504424839e-06, "loss": 0.4158, "step": 1364 }, { "epoch": 0.1092, "grad_norm": 1.669812560081482, "learning_rate": 9.71240227209977e-06, "loss": 0.3483, "step": 1365 }, { "epoch": 0.10928, "grad_norm": 1.673043131828308, "learning_rate": 9.711981741636777e-06, "loss": 0.3858, "step": 1366 }, { "epoch": 0.10936, "grad_norm": 1.8101592063903809, "learning_rate": 9.711560913062465e-06, "loss": 0.3462, "step": 1367 }, { "epoch": 0.10944, "grad_norm": 1.4530932903289795, "learning_rate": 9.711139786403461e-06, "loss": 0.2484, "step": 1368 }, { "epoch": 0.10952, "grad_norm": 1.6506141424179077, "learning_rate": 9.710718361686405e-06, "loss": 0.3754, "step": 1369 }, { "epoch": 0.1096, "grad_norm": 1.7748082876205444, "learning_rate": 9.71029663893796e-06, "loss": 0.3619, "step": 1370 }, { "epoch": 0.10968, "grad_norm": 1.5540876388549805, "learning_rate": 9.709874618184808e-06, "loss": 0.3668, "step": 1371 }, { "epoch": 0.10976, "grad_norm": 1.5610040426254272, "learning_rate": 9.709452299453648e-06, "loss": 0.3208, "step": 1372 }, { "epoch": 0.10984, "grad_norm": 1.334820032119751, "learning_rate": 9.709029682771198e-06, "loss": 0.2853, "step": 1373 }, { "epoch": 0.10992, "grad_norm": 1.774716854095459, "learning_rate": 9.708606768164199e-06, "loss": 0.406, "step": 1374 }, { "epoch": 0.11, "grad_norm": 1.4674115180969238, "learning_rate": 9.708183555659404e-06, "loss": 0.3442, "step": 1375 }, { "epoch": 0.11008, "grad_norm": 1.6923162937164307, "learning_rate": 9.707760045283587e-06, "loss": 0.4355, "step": 1376 }, { "epoch": 0.11016, "grad_norm": 1.6160029172897339, "learning_rate": 9.707336237063546e-06, "loss": 0.3239, "step": 1377 }, { "epoch": 0.11024, "grad_norm": 1.5265469551086426, "learning_rate": 9.70691213102609e-06, "loss": 0.3529, "step": 1378 }, { "epoch": 0.11032, "grad_norm": 1.5923312902450562, "learning_rate": 9.706487727198055e-06, "loss": 0.3371, "step": 1379 }, { "epoch": 0.1104, "grad_norm": 1.9004793167114258, "learning_rate": 9.706063025606288e-06, "loss": 0.3516, "step": 1380 }, { "epoch": 0.11048, "grad_norm": 1.440619945526123, "learning_rate": 9.70563802627766e-06, "loss": 0.2806, "step": 1381 }, { "epoch": 0.11056, "grad_norm": 1.896283507347107, "learning_rate": 9.705212729239061e-06, "loss": 0.3541, "step": 1382 }, { "epoch": 0.11064, "grad_norm": 2.1123244762420654, "learning_rate": 9.704787134517396e-06, "loss": 0.3817, "step": 1383 }, { "epoch": 0.11072, "grad_norm": 2.1019601821899414, "learning_rate": 9.704361242139589e-06, "loss": 0.3864, "step": 1384 }, { "epoch": 0.1108, "grad_norm": 1.6103894710540771, "learning_rate": 9.703935052132589e-06, "loss": 0.3911, "step": 1385 }, { "epoch": 0.11088, "grad_norm": 1.6342370510101318, "learning_rate": 9.703508564523356e-06, "loss": 0.3514, "step": 1386 }, { "epoch": 0.11096, "grad_norm": 1.655190348625183, "learning_rate": 9.703081779338877e-06, "loss": 0.3249, "step": 1387 }, { "epoch": 0.11104, "grad_norm": 1.3274317979812622, "learning_rate": 9.702654696606147e-06, "loss": 0.3422, "step": 1388 }, { "epoch": 0.11112, "grad_norm": 1.903498888015747, "learning_rate": 9.702227316352192e-06, "loss": 0.4509, "step": 1389 }, { "epoch": 0.1112, "grad_norm": 1.5915570259094238, "learning_rate": 9.701799638604048e-06, "loss": 0.3718, "step": 1390 }, { "epoch": 0.11128, "grad_norm": 1.9381896257400513, "learning_rate": 9.701371663388771e-06, "loss": 0.3438, "step": 1391 }, { "epoch": 0.11136, "grad_norm": 1.6756565570831299, "learning_rate": 9.700943390733442e-06, "loss": 0.3565, "step": 1392 }, { "epoch": 0.11144, "grad_norm": 1.537050485610962, "learning_rate": 9.700514820665153e-06, "loss": 0.2721, "step": 1393 }, { "epoch": 0.11152, "grad_norm": 1.410021424293518, "learning_rate": 9.70008595321102e-06, "loss": 0.2962, "step": 1394 }, { "epoch": 0.1116, "grad_norm": 1.8756940364837646, "learning_rate": 9.699656788398175e-06, "loss": 0.4885, "step": 1395 }, { "epoch": 0.11168, "grad_norm": 1.6304752826690674, "learning_rate": 9.69922732625377e-06, "loss": 0.382, "step": 1396 }, { "epoch": 0.11176, "grad_norm": 1.274293303489685, "learning_rate": 9.698797566804976e-06, "loss": 0.2796, "step": 1397 }, { "epoch": 0.11184, "grad_norm": 1.7127177715301514, "learning_rate": 9.69836751007898e-06, "loss": 0.3734, "step": 1398 }, { "epoch": 0.11192, "grad_norm": 1.886913776397705, "learning_rate": 9.697937156102997e-06, "loss": 0.3908, "step": 1399 }, { "epoch": 0.112, "grad_norm": 1.5846264362335205, "learning_rate": 9.697506504904246e-06, "loss": 0.3985, "step": 1400 }, { "epoch": 0.11208, "grad_norm": 1.3665478229522705, "learning_rate": 9.697075556509978e-06, "loss": 0.2967, "step": 1401 }, { "epoch": 0.11216, "grad_norm": 1.4394631385803223, "learning_rate": 9.696644310947453e-06, "loss": 0.3561, "step": 1402 }, { "epoch": 0.11224, "grad_norm": 1.8952381610870361, "learning_rate": 9.69621276824396e-06, "loss": 0.5919, "step": 1403 }, { "epoch": 0.11232, "grad_norm": 1.3404344320297241, "learning_rate": 9.6957809284268e-06, "loss": 0.3089, "step": 1404 }, { "epoch": 0.1124, "grad_norm": 1.6374577283859253, "learning_rate": 9.69534879152329e-06, "loss": 0.3373, "step": 1405 }, { "epoch": 0.11248, "grad_norm": 1.6670361757278442, "learning_rate": 9.694916357560774e-06, "loss": 0.4697, "step": 1406 }, { "epoch": 0.11256, "grad_norm": 2.1049678325653076, "learning_rate": 9.694483626566609e-06, "loss": 0.3929, "step": 1407 }, { "epoch": 0.11264, "grad_norm": 2.02200984954834, "learning_rate": 9.694050598568173e-06, "loss": 0.503, "step": 1408 }, { "epoch": 0.11272, "grad_norm": 1.2237039804458618, "learning_rate": 9.693617273592861e-06, "loss": 0.2909, "step": 1409 }, { "epoch": 0.1128, "grad_norm": 1.4623360633850098, "learning_rate": 9.69318365166809e-06, "loss": 0.2837, "step": 1410 }, { "epoch": 0.11288, "grad_norm": 1.7014158964157104, "learning_rate": 9.692749732821293e-06, "loss": 0.2977, "step": 1411 }, { "epoch": 0.11296, "grad_norm": 1.7711082696914673, "learning_rate": 9.692315517079922e-06, "loss": 0.3118, "step": 1412 }, { "epoch": 0.11304, "grad_norm": 1.5991653203964233, "learning_rate": 9.691881004471449e-06, "loss": 0.3468, "step": 1413 }, { "epoch": 0.11312, "grad_norm": 1.7637615203857422, "learning_rate": 9.691446195023364e-06, "loss": 0.3192, "step": 1414 }, { "epoch": 0.1132, "grad_norm": 1.9800078868865967, "learning_rate": 9.691011088763175e-06, "loss": 0.3971, "step": 1415 }, { "epoch": 0.11328, "grad_norm": 1.2780556678771973, "learning_rate": 9.69057568571841e-06, "loss": 0.2921, "step": 1416 }, { "epoch": 0.11336, "grad_norm": 1.2494962215423584, "learning_rate": 9.690139985916619e-06, "loss": 0.283, "step": 1417 }, { "epoch": 0.11344, "grad_norm": 1.7476698160171509, "learning_rate": 9.689703989385362e-06, "loss": 0.3595, "step": 1418 }, { "epoch": 0.11352, "grad_norm": 1.494022011756897, "learning_rate": 9.689267696152226e-06, "loss": 0.2728, "step": 1419 }, { "epoch": 0.1136, "grad_norm": 1.702022671699524, "learning_rate": 9.688831106244814e-06, "loss": 0.3349, "step": 1420 }, { "epoch": 0.11368, "grad_norm": 1.6980208158493042, "learning_rate": 9.688394219690745e-06, "loss": 0.3074, "step": 1421 }, { "epoch": 0.11376, "grad_norm": 1.3088799715042114, "learning_rate": 9.687957036517662e-06, "loss": 0.2807, "step": 1422 }, { "epoch": 0.11384, "grad_norm": 1.4283772706985474, "learning_rate": 9.687519556753225e-06, "loss": 0.3256, "step": 1423 }, { "epoch": 0.11392, "grad_norm": 1.6759427785873413, "learning_rate": 9.687081780425108e-06, "loss": 0.3734, "step": 1424 }, { "epoch": 0.114, "grad_norm": 1.7384825944900513, "learning_rate": 9.68664370756101e-06, "loss": 0.396, "step": 1425 }, { "epoch": 0.11408, "grad_norm": 1.8031843900680542, "learning_rate": 9.686205338188645e-06, "loss": 0.4733, "step": 1426 }, { "epoch": 0.11416, "grad_norm": 1.8591822385787964, "learning_rate": 9.68576667233575e-06, "loss": 0.4139, "step": 1427 }, { "epoch": 0.11424, "grad_norm": 1.8857415914535522, "learning_rate": 9.685327710030077e-06, "loss": 0.3974, "step": 1428 }, { "epoch": 0.11432, "grad_norm": 1.1718003749847412, "learning_rate": 9.684888451299396e-06, "loss": 0.2668, "step": 1429 }, { "epoch": 0.1144, "grad_norm": 1.7708724737167358, "learning_rate": 9.684448896171498e-06, "loss": 0.3957, "step": 1430 }, { "epoch": 0.11448, "grad_norm": 1.4460155963897705, "learning_rate": 9.684009044674193e-06, "loss": 0.3137, "step": 1431 }, { "epoch": 0.11456, "grad_norm": 1.4484257698059082, "learning_rate": 9.683568896835309e-06, "loss": 0.33, "step": 1432 }, { "epoch": 0.11464, "grad_norm": 2.073709487915039, "learning_rate": 9.683128452682692e-06, "loss": 0.4239, "step": 1433 }, { "epoch": 0.11472, "grad_norm": 1.4383269548416138, "learning_rate": 9.682687712244205e-06, "loss": 0.269, "step": 1434 }, { "epoch": 0.1148, "grad_norm": 1.5897904634475708, "learning_rate": 9.682246675547737e-06, "loss": 0.3171, "step": 1435 }, { "epoch": 0.11488, "grad_norm": 1.4807348251342773, "learning_rate": 9.68180534262119e-06, "loss": 0.2884, "step": 1436 }, { "epoch": 0.11496, "grad_norm": 1.8860735893249512, "learning_rate": 9.681363713492483e-06, "loss": 0.449, "step": 1437 }, { "epoch": 0.11504, "grad_norm": 1.6362406015396118, "learning_rate": 9.680921788189556e-06, "loss": 0.3038, "step": 1438 }, { "epoch": 0.11512, "grad_norm": 1.7273122072219849, "learning_rate": 9.680479566740373e-06, "loss": 0.3497, "step": 1439 }, { "epoch": 0.1152, "grad_norm": 1.6315315961837769, "learning_rate": 9.680037049172907e-06, "loss": 0.3365, "step": 1440 }, { "epoch": 0.11528, "grad_norm": 1.381567358970642, "learning_rate": 9.679594235515158e-06, "loss": 0.2703, "step": 1441 }, { "epoch": 0.11536, "grad_norm": 2.0206964015960693, "learning_rate": 9.679151125795136e-06, "loss": 0.4859, "step": 1442 }, { "epoch": 0.11544, "grad_norm": 1.7309236526489258, "learning_rate": 9.678707720040882e-06, "loss": 0.4465, "step": 1443 }, { "epoch": 0.11552, "grad_norm": 1.8960282802581787, "learning_rate": 9.678264018280445e-06, "loss": 0.4757, "step": 1444 }, { "epoch": 0.1156, "grad_norm": 1.276365041732788, "learning_rate": 9.677820020541898e-06, "loss": 0.2635, "step": 1445 }, { "epoch": 0.11568, "grad_norm": 1.4210036993026733, "learning_rate": 9.677375726853327e-06, "loss": 0.3031, "step": 1446 }, { "epoch": 0.11576, "grad_norm": 1.6796081066131592, "learning_rate": 9.676931137242846e-06, "loss": 0.4549, "step": 1447 }, { "epoch": 0.11584, "grad_norm": 1.4270731210708618, "learning_rate": 9.676486251738581e-06, "loss": 0.3278, "step": 1448 }, { "epoch": 0.11592, "grad_norm": 1.5556656122207642, "learning_rate": 9.67604107036868e-06, "loss": 0.2951, "step": 1449 }, { "epoch": 0.116, "grad_norm": 1.3497978448867798, "learning_rate": 9.675595593161305e-06, "loss": 0.357, "step": 1450 }, { "epoch": 0.11608, "grad_norm": 1.3413703441619873, "learning_rate": 9.675149820144643e-06, "loss": 0.3007, "step": 1451 }, { "epoch": 0.11616, "grad_norm": 1.4101148843765259, "learning_rate": 9.674703751346893e-06, "loss": 0.3115, "step": 1452 }, { "epoch": 0.11624, "grad_norm": 1.480543851852417, "learning_rate": 9.67425738679628e-06, "loss": 0.3169, "step": 1453 }, { "epoch": 0.11632, "grad_norm": 2.082014799118042, "learning_rate": 9.67381072652104e-06, "loss": 0.4067, "step": 1454 }, { "epoch": 0.1164, "grad_norm": 1.5257582664489746, "learning_rate": 9.673363770549435e-06, "loss": 0.3861, "step": 1455 }, { "epoch": 0.11648, "grad_norm": 1.7837907075881958, "learning_rate": 9.672916518909743e-06, "loss": 0.4652, "step": 1456 }, { "epoch": 0.11656, "grad_norm": 1.7206987142562866, "learning_rate": 9.672468971630256e-06, "loss": 0.4114, "step": 1457 }, { "epoch": 0.11664, "grad_norm": 1.51250159740448, "learning_rate": 9.672021128739293e-06, "loss": 0.4297, "step": 1458 }, { "epoch": 0.11672, "grad_norm": 1.856751799583435, "learning_rate": 9.671572990265186e-06, "loss": 0.4375, "step": 1459 }, { "epoch": 0.1168, "grad_norm": 2.219356060028076, "learning_rate": 9.671124556236284e-06, "loss": 0.5495, "step": 1460 }, { "epoch": 0.11688, "grad_norm": 1.8340235948562622, "learning_rate": 9.670675826680963e-06, "loss": 0.361, "step": 1461 }, { "epoch": 0.11696, "grad_norm": 1.3916457891464233, "learning_rate": 9.670226801627611e-06, "loss": 0.3085, "step": 1462 }, { "epoch": 0.11704, "grad_norm": 1.3472083806991577, "learning_rate": 9.669777481104637e-06, "loss": 0.3098, "step": 1463 }, { "epoch": 0.11712, "grad_norm": 1.739112138748169, "learning_rate": 9.669327865140465e-06, "loss": 0.3132, "step": 1464 }, { "epoch": 0.1172, "grad_norm": 1.6176772117614746, "learning_rate": 9.668877953763544e-06, "loss": 0.4131, "step": 1465 }, { "epoch": 0.11728, "grad_norm": 1.2607818841934204, "learning_rate": 9.668427747002337e-06, "loss": 0.3161, "step": 1466 }, { "epoch": 0.11736, "grad_norm": 1.6247892379760742, "learning_rate": 9.667977244885324e-06, "loss": 0.3631, "step": 1467 }, { "epoch": 0.11744, "grad_norm": 1.8417178392410278, "learning_rate": 9.667526447441012e-06, "loss": 0.3967, "step": 1468 }, { "epoch": 0.11752, "grad_norm": 1.448082447052002, "learning_rate": 9.667075354697919e-06, "loss": 0.2738, "step": 1469 }, { "epoch": 0.1176, "grad_norm": 1.4578620195388794, "learning_rate": 9.666623966684585e-06, "loss": 0.437, "step": 1470 }, { "epoch": 0.11768, "grad_norm": 1.3342931270599365, "learning_rate": 9.666172283429566e-06, "loss": 0.3525, "step": 1471 }, { "epoch": 0.11776, "grad_norm": 1.410962700843811, "learning_rate": 9.66572030496144e-06, "loss": 0.3757, "step": 1472 }, { "epoch": 0.11784, "grad_norm": 1.3203762769699097, "learning_rate": 9.665268031308804e-06, "loss": 0.2784, "step": 1473 }, { "epoch": 0.11792, "grad_norm": 1.5164921283721924, "learning_rate": 9.664815462500268e-06, "loss": 0.3143, "step": 1474 }, { "epoch": 0.118, "grad_norm": 1.555022120475769, "learning_rate": 9.664362598564466e-06, "loss": 0.3576, "step": 1475 }, { "epoch": 0.11808, "grad_norm": 1.5586203336715698, "learning_rate": 9.66390943953005e-06, "loss": 0.2553, "step": 1476 }, { "epoch": 0.11816, "grad_norm": 1.8795398473739624, "learning_rate": 9.66345598542569e-06, "loss": 0.3442, "step": 1477 }, { "epoch": 0.11824, "grad_norm": 1.5754092931747437, "learning_rate": 9.663002236280072e-06, "loss": 0.2897, "step": 1478 }, { "epoch": 0.11832, "grad_norm": 1.3736915588378906, "learning_rate": 9.662548192121905e-06, "loss": 0.3255, "step": 1479 }, { "epoch": 0.1184, "grad_norm": 1.4949525594711304, "learning_rate": 9.662093852979916e-06, "loss": 0.2846, "step": 1480 }, { "epoch": 0.11848, "grad_norm": 1.5100445747375488, "learning_rate": 9.661639218882849e-06, "loss": 0.3407, "step": 1481 }, { "epoch": 0.11856, "grad_norm": 1.6961963176727295, "learning_rate": 9.661184289859465e-06, "loss": 0.3679, "step": 1482 }, { "epoch": 0.11864, "grad_norm": 1.5878902673721313, "learning_rate": 9.660729065938547e-06, "loss": 0.2542, "step": 1483 }, { "epoch": 0.11872, "grad_norm": 1.5185855627059937, "learning_rate": 9.660273547148897e-06, "loss": 0.3301, "step": 1484 }, { "epoch": 0.1188, "grad_norm": 1.7931679487228394, "learning_rate": 9.659817733519333e-06, "loss": 0.3853, "step": 1485 }, { "epoch": 0.11888, "grad_norm": 1.5024420022964478, "learning_rate": 9.659361625078691e-06, "loss": 0.2664, "step": 1486 }, { "epoch": 0.11896, "grad_norm": 1.6533669233322144, "learning_rate": 9.65890522185583e-06, "loss": 0.3301, "step": 1487 }, { "epoch": 0.11904, "grad_norm": 2.171912908554077, "learning_rate": 9.658448523879626e-06, "loss": 0.396, "step": 1488 }, { "epoch": 0.11912, "grad_norm": 1.7563323974609375, "learning_rate": 9.65799153117897e-06, "loss": 0.4444, "step": 1489 }, { "epoch": 0.1192, "grad_norm": 1.1641733646392822, "learning_rate": 9.657534243782775e-06, "loss": 0.2883, "step": 1490 }, { "epoch": 0.11928, "grad_norm": 1.5933700799942017, "learning_rate": 9.657076661719972e-06, "loss": 0.336, "step": 1491 }, { "epoch": 0.11936, "grad_norm": 1.411896824836731, "learning_rate": 9.656618785019513e-06, "loss": 0.2531, "step": 1492 }, { "epoch": 0.11944, "grad_norm": 1.3056985139846802, "learning_rate": 9.656160613710364e-06, "loss": 0.2732, "step": 1493 }, { "epoch": 0.11952, "grad_norm": 1.6541974544525146, "learning_rate": 9.655702147821514e-06, "loss": 0.3592, "step": 1494 }, { "epoch": 0.1196, "grad_norm": 1.6240873336791992, "learning_rate": 9.655243387381965e-06, "loss": 0.3601, "step": 1495 }, { "epoch": 0.11968, "grad_norm": 1.4387151002883911, "learning_rate": 9.654784332420744e-06, "loss": 0.274, "step": 1496 }, { "epoch": 0.11976, "grad_norm": 1.491258144378662, "learning_rate": 9.654324982966891e-06, "loss": 0.3344, "step": 1497 }, { "epoch": 0.11984, "grad_norm": 1.6981704235076904, "learning_rate": 9.653865339049472e-06, "loss": 0.5099, "step": 1498 }, { "epoch": 0.11992, "grad_norm": 1.6840261220932007, "learning_rate": 9.653405400697567e-06, "loss": 0.3375, "step": 1499 }, { "epoch": 0.12, "grad_norm": 1.4480576515197754, "learning_rate": 9.65294516794027e-06, "loss": 0.3806, "step": 1500 }, { "epoch": 0.12008, "grad_norm": 1.518832802772522, "learning_rate": 9.6524846408067e-06, "loss": 0.3553, "step": 1501 }, { "epoch": 0.12016, "grad_norm": 1.694017767906189, "learning_rate": 9.652023819325998e-06, "loss": 0.3129, "step": 1502 }, { "epoch": 0.12024, "grad_norm": 1.3871276378631592, "learning_rate": 9.651562703527311e-06, "loss": 0.3113, "step": 1503 }, { "epoch": 0.12032, "grad_norm": 1.6968255043029785, "learning_rate": 9.651101293439817e-06, "loss": 0.3511, "step": 1504 }, { "epoch": 0.1204, "grad_norm": 1.6635756492614746, "learning_rate": 9.650639589092708e-06, "loss": 0.354, "step": 1505 }, { "epoch": 0.12048, "grad_norm": 1.5392272472381592, "learning_rate": 9.65017759051519e-06, "loss": 0.3186, "step": 1506 }, { "epoch": 0.12056, "grad_norm": 1.2892400026321411, "learning_rate": 9.649715297736499e-06, "loss": 0.2914, "step": 1507 }, { "epoch": 0.12064, "grad_norm": 1.6253796815872192, "learning_rate": 9.649252710785876e-06, "loss": 0.3478, "step": 1508 }, { "epoch": 0.12072, "grad_norm": 1.8582258224487305, "learning_rate": 9.648789829692594e-06, "loss": 0.4373, "step": 1509 }, { "epoch": 0.1208, "grad_norm": 1.683634877204895, "learning_rate": 9.648326654485931e-06, "loss": 0.3919, "step": 1510 }, { "epoch": 0.12088, "grad_norm": 2.1171162128448486, "learning_rate": 9.647863185195197e-06, "loss": 0.4234, "step": 1511 }, { "epoch": 0.12096, "grad_norm": 1.5578632354736328, "learning_rate": 9.647399421849708e-06, "loss": 0.3344, "step": 1512 }, { "epoch": 0.12104, "grad_norm": 1.7130169868469238, "learning_rate": 9.64693536447881e-06, "loss": 0.4795, "step": 1513 }, { "epoch": 0.12112, "grad_norm": 1.5698732137680054, "learning_rate": 9.64647101311186e-06, "loss": 0.3961, "step": 1514 }, { "epoch": 0.1212, "grad_norm": 1.7023333311080933, "learning_rate": 9.646006367778235e-06, "loss": 0.3476, "step": 1515 }, { "epoch": 0.12128, "grad_norm": 1.8466098308563232, "learning_rate": 9.645541428507334e-06, "loss": 0.4728, "step": 1516 }, { "epoch": 0.12136, "grad_norm": 1.5508915185928345, "learning_rate": 9.64507619532857e-06, "loss": 0.4352, "step": 1517 }, { "epoch": 0.12144, "grad_norm": 1.3814231157302856, "learning_rate": 9.644610668271377e-06, "loss": 0.3032, "step": 1518 }, { "epoch": 0.12152, "grad_norm": 1.73944890499115, "learning_rate": 9.64414484736521e-06, "loss": 0.3277, "step": 1519 }, { "epoch": 0.1216, "grad_norm": 1.2703596353530884, "learning_rate": 9.643678732639537e-06, "loss": 0.2786, "step": 1520 }, { "epoch": 0.12168, "grad_norm": 1.4023946523666382, "learning_rate": 9.643212324123848e-06, "loss": 0.2626, "step": 1521 }, { "epoch": 0.12176, "grad_norm": 1.5729879140853882, "learning_rate": 9.64274562184765e-06, "loss": 0.3552, "step": 1522 }, { "epoch": 0.12184, "grad_norm": 1.6716023683547974, "learning_rate": 9.642278625840473e-06, "loss": 0.3472, "step": 1523 }, { "epoch": 0.12192, "grad_norm": 1.9659554958343506, "learning_rate": 9.64181133613186e-06, "loss": 0.3494, "step": 1524 }, { "epoch": 0.122, "grad_norm": 1.7329281568527222, "learning_rate": 9.641343752751375e-06, "loss": 0.3407, "step": 1525 }, { "epoch": 0.12208, "grad_norm": 1.9919676780700684, "learning_rate": 9.640875875728602e-06, "loss": 0.461, "step": 1526 }, { "epoch": 0.12216, "grad_norm": 1.6600706577301025, "learning_rate": 9.64040770509314e-06, "loss": 0.3863, "step": 1527 }, { "epoch": 0.12224, "grad_norm": 1.8697322607040405, "learning_rate": 9.639939240874609e-06, "loss": 0.4213, "step": 1528 }, { "epoch": 0.12232, "grad_norm": 1.6128782033920288, "learning_rate": 9.639470483102647e-06, "loss": 0.2803, "step": 1529 }, { "epoch": 0.1224, "grad_norm": 1.698266625404358, "learning_rate": 9.639001431806912e-06, "loss": 0.3881, "step": 1530 }, { "epoch": 0.12248, "grad_norm": 1.2810356616973877, "learning_rate": 9.638532087017079e-06, "loss": 0.2839, "step": 1531 }, { "epoch": 0.12256, "grad_norm": 1.382637619972229, "learning_rate": 9.638062448762842e-06, "loss": 0.2794, "step": 1532 }, { "epoch": 0.12264, "grad_norm": 1.7451645135879517, "learning_rate": 9.637592517073911e-06, "loss": 0.3944, "step": 1533 }, { "epoch": 0.12272, "grad_norm": 1.6562174558639526, "learning_rate": 9.63712229198002e-06, "loss": 0.3808, "step": 1534 }, { "epoch": 0.1228, "grad_norm": 1.47275972366333, "learning_rate": 9.636651773510917e-06, "loss": 0.3047, "step": 1535 }, { "epoch": 0.12288, "grad_norm": 1.6594504117965698, "learning_rate": 9.636180961696371e-06, "loss": 0.3467, "step": 1536 }, { "epoch": 0.12296, "grad_norm": 1.4340310096740723, "learning_rate": 9.635709856566167e-06, "loss": 0.2981, "step": 1537 }, { "epoch": 0.12304, "grad_norm": 1.524653673171997, "learning_rate": 9.635238458150114e-06, "loss": 0.2904, "step": 1538 }, { "epoch": 0.12312, "grad_norm": 1.5759027004241943, "learning_rate": 9.634766766478032e-06, "loss": 0.3737, "step": 1539 }, { "epoch": 0.1232, "grad_norm": 1.283862590789795, "learning_rate": 9.634294781579764e-06, "loss": 0.2563, "step": 1540 }, { "epoch": 0.12328, "grad_norm": 1.5947818756103516, "learning_rate": 9.633822503485172e-06, "loss": 0.3229, "step": 1541 }, { "epoch": 0.12336, "grad_norm": 1.2302602529525757, "learning_rate": 9.633349932224135e-06, "loss": 0.2566, "step": 1542 }, { "epoch": 0.12344, "grad_norm": 1.265610694885254, "learning_rate": 9.632877067826552e-06, "loss": 0.25, "step": 1543 }, { "epoch": 0.12352, "grad_norm": 1.6030651330947876, "learning_rate": 9.632403910322337e-06, "loss": 0.3573, "step": 1544 }, { "epoch": 0.1236, "grad_norm": 1.4760074615478516, "learning_rate": 9.631930459741427e-06, "loss": 0.3391, "step": 1545 }, { "epoch": 0.12368, "grad_norm": 1.7136142253875732, "learning_rate": 9.631456716113777e-06, "loss": 0.3732, "step": 1546 }, { "epoch": 0.12376, "grad_norm": 1.8622156381607056, "learning_rate": 9.630982679469355e-06, "loss": 0.3882, "step": 1547 }, { "epoch": 0.12384, "grad_norm": 1.7958600521087646, "learning_rate": 9.630508349838155e-06, "loss": 0.3191, "step": 1548 }, { "epoch": 0.12392, "grad_norm": 1.9841034412384033, "learning_rate": 9.630033727250186e-06, "loss": 0.4149, "step": 1549 }, { "epoch": 0.124, "grad_norm": 1.6318235397338867, "learning_rate": 9.629558811735475e-06, "loss": 0.349, "step": 1550 }, { "epoch": 0.12408, "grad_norm": 1.3617398738861084, "learning_rate": 9.62908360332407e-06, "loss": 0.3016, "step": 1551 }, { "epoch": 0.12416, "grad_norm": 1.6608842611312866, "learning_rate": 9.628608102046032e-06, "loss": 0.3923, "step": 1552 }, { "epoch": 0.12424, "grad_norm": 1.6326426267623901, "learning_rate": 9.628132307931446e-06, "loss": 0.3796, "step": 1553 }, { "epoch": 0.12432, "grad_norm": 1.473404049873352, "learning_rate": 9.627656221010417e-06, "loss": 0.3165, "step": 1554 }, { "epoch": 0.1244, "grad_norm": 1.6658129692077637, "learning_rate": 9.627179841313063e-06, "loss": 0.3675, "step": 1555 }, { "epoch": 0.12448, "grad_norm": 1.455764889717102, "learning_rate": 9.626703168869522e-06, "loss": 0.3633, "step": 1556 }, { "epoch": 0.12456, "grad_norm": 1.4597536325454712, "learning_rate": 9.626226203709954e-06, "loss": 0.3412, "step": 1557 }, { "epoch": 0.12464, "grad_norm": 1.6526191234588623, "learning_rate": 9.625748945864531e-06, "loss": 0.4253, "step": 1558 }, { "epoch": 0.12472, "grad_norm": 1.3261500597000122, "learning_rate": 9.625271395363453e-06, "loss": 0.2575, "step": 1559 }, { "epoch": 0.1248, "grad_norm": 1.5173916816711426, "learning_rate": 9.624793552236927e-06, "loss": 0.3687, "step": 1560 }, { "epoch": 0.12488, "grad_norm": 1.2248857021331787, "learning_rate": 9.62431541651519e-06, "loss": 0.2559, "step": 1561 }, { "epoch": 0.12496, "grad_norm": 1.925079584121704, "learning_rate": 9.623836988228487e-06, "loss": 0.365, "step": 1562 }, { "epoch": 0.12504, "grad_norm": 1.8328197002410889, "learning_rate": 9.623358267407092e-06, "loss": 0.3888, "step": 1563 }, { "epoch": 0.12512, "grad_norm": 1.723613977432251, "learning_rate": 9.622879254081288e-06, "loss": 0.4945, "step": 1564 }, { "epoch": 0.1252, "grad_norm": 1.787847638130188, "learning_rate": 9.622399948281382e-06, "loss": 0.424, "step": 1565 }, { "epoch": 0.12528, "grad_norm": 1.1664930582046509, "learning_rate": 9.621920350037697e-06, "loss": 0.2628, "step": 1566 }, { "epoch": 0.12536, "grad_norm": 1.3959739208221436, "learning_rate": 9.621440459380577e-06, "loss": 0.2896, "step": 1567 }, { "epoch": 0.12544, "grad_norm": 1.3110196590423584, "learning_rate": 9.620960276340383e-06, "loss": 0.2658, "step": 1568 }, { "epoch": 0.12552, "grad_norm": 1.2237164974212646, "learning_rate": 9.620479800947494e-06, "loss": 0.2624, "step": 1569 }, { "epoch": 0.1256, "grad_norm": 1.7535439729690552, "learning_rate": 9.619999033232308e-06, "loss": 0.3613, "step": 1570 }, { "epoch": 0.12568, "grad_norm": 1.5596789121627808, "learning_rate": 9.61951797322524e-06, "loss": 0.3552, "step": 1571 }, { "epoch": 0.12576, "grad_norm": 2.2569661140441895, "learning_rate": 9.61903662095673e-06, "loss": 0.4619, "step": 1572 }, { "epoch": 0.12584, "grad_norm": 1.540496826171875, "learning_rate": 9.618554976457226e-06, "loss": 0.2943, "step": 1573 }, { "epoch": 0.12592, "grad_norm": 1.417845606803894, "learning_rate": 9.618073039757204e-06, "loss": 0.2505, "step": 1574 }, { "epoch": 0.126, "grad_norm": 1.3284822702407837, "learning_rate": 9.617590810887151e-06, "loss": 0.2476, "step": 1575 }, { "epoch": 0.12608, "grad_norm": 1.5151894092559814, "learning_rate": 9.617108289877578e-06, "loss": 0.3971, "step": 1576 }, { "epoch": 0.12616, "grad_norm": 1.6880301237106323, "learning_rate": 9.616625476759014e-06, "loss": 0.3476, "step": 1577 }, { "epoch": 0.12624, "grad_norm": 1.7845243215560913, "learning_rate": 9.616142371562003e-06, "loss": 0.4271, "step": 1578 }, { "epoch": 0.12632, "grad_norm": 1.6483310461044312, "learning_rate": 9.61565897431711e-06, "loss": 0.3685, "step": 1579 }, { "epoch": 0.1264, "grad_norm": 1.4874074459075928, "learning_rate": 9.615175285054916e-06, "loss": 0.3274, "step": 1580 }, { "epoch": 0.12648, "grad_norm": 1.4330966472625732, "learning_rate": 9.614691303806027e-06, "loss": 0.3611, "step": 1581 }, { "epoch": 0.12656, "grad_norm": 1.424462914466858, "learning_rate": 9.614207030601057e-06, "loss": 0.2903, "step": 1582 }, { "epoch": 0.12664, "grad_norm": 1.2430349588394165, "learning_rate": 9.61372246547065e-06, "loss": 0.2505, "step": 1583 }, { "epoch": 0.12672, "grad_norm": 1.5988134145736694, "learning_rate": 9.613237608445458e-06, "loss": 0.3123, "step": 1584 }, { "epoch": 0.1268, "grad_norm": 1.6877517700195312, "learning_rate": 9.612752459556161e-06, "loss": 0.4546, "step": 1585 }, { "epoch": 0.12688, "grad_norm": 1.350681185722351, "learning_rate": 9.612267018833448e-06, "loss": 0.3087, "step": 1586 }, { "epoch": 0.12696, "grad_norm": 1.260632038116455, "learning_rate": 9.611781286308032e-06, "loss": 0.2645, "step": 1587 }, { "epoch": 0.12704, "grad_norm": 1.2421963214874268, "learning_rate": 9.611295262010649e-06, "loss": 0.296, "step": 1588 }, { "epoch": 0.12712, "grad_norm": 1.5727829933166504, "learning_rate": 9.610808945972042e-06, "loss": 0.3084, "step": 1589 }, { "epoch": 0.1272, "grad_norm": 1.3248558044433594, "learning_rate": 9.610322338222982e-06, "loss": 0.3195, "step": 1590 }, { "epoch": 0.12728, "grad_norm": 1.5574604272842407, "learning_rate": 9.60983543879425e-06, "loss": 0.3364, "step": 1591 }, { "epoch": 0.12736, "grad_norm": 1.849393367767334, "learning_rate": 9.609348247716658e-06, "loss": 0.3784, "step": 1592 }, { "epoch": 0.12744, "grad_norm": 1.5725646018981934, "learning_rate": 9.608860765021025e-06, "loss": 0.3448, "step": 1593 }, { "epoch": 0.12752, "grad_norm": 1.3313325643539429, "learning_rate": 9.608372990738193e-06, "loss": 0.2394, "step": 1594 }, { "epoch": 0.1276, "grad_norm": 1.4957760572433472, "learning_rate": 9.60788492489902e-06, "loss": 0.3165, "step": 1595 }, { "epoch": 0.12768, "grad_norm": 1.8721736669540405, "learning_rate": 9.607396567534387e-06, "loss": 0.3877, "step": 1596 }, { "epoch": 0.12776, "grad_norm": 1.5303232669830322, "learning_rate": 9.606907918675189e-06, "loss": 0.3717, "step": 1597 }, { "epoch": 0.12784, "grad_norm": 1.6356817483901978, "learning_rate": 9.606418978352342e-06, "loss": 0.4317, "step": 1598 }, { "epoch": 0.12792, "grad_norm": 1.7951912879943848, "learning_rate": 9.60592974659678e-06, "loss": 0.3572, "step": 1599 }, { "epoch": 0.128, "grad_norm": 1.7281938791275024, "learning_rate": 9.605440223439452e-06, "loss": 0.3721, "step": 1600 }, { "epoch": 0.12808, "grad_norm": 1.4484968185424805, "learning_rate": 9.604950408911334e-06, "loss": 0.2891, "step": 1601 }, { "epoch": 0.12816, "grad_norm": 1.6097825765609741, "learning_rate": 9.604460303043411e-06, "loss": 0.3225, "step": 1602 }, { "epoch": 0.12824, "grad_norm": 1.80912184715271, "learning_rate": 9.60396990586669e-06, "loss": 0.4023, "step": 1603 }, { "epoch": 0.12832, "grad_norm": 1.5157806873321533, "learning_rate": 9.6034792174122e-06, "loss": 0.3546, "step": 1604 }, { "epoch": 0.1284, "grad_norm": 1.6665889024734497, "learning_rate": 9.60298823771098e-06, "loss": 0.3968, "step": 1605 }, { "epoch": 0.12848, "grad_norm": 1.4822949171066284, "learning_rate": 9.602496966794098e-06, "loss": 0.3122, "step": 1606 }, { "epoch": 0.12856, "grad_norm": 1.6601344347000122, "learning_rate": 9.602005404692633e-06, "loss": 0.3554, "step": 1607 }, { "epoch": 0.12864, "grad_norm": 1.8020589351654053, "learning_rate": 9.601513551437685e-06, "loss": 0.4218, "step": 1608 }, { "epoch": 0.12872, "grad_norm": 1.8085490465164185, "learning_rate": 9.60102140706037e-06, "loss": 0.5042, "step": 1609 }, { "epoch": 0.1288, "grad_norm": 1.8596782684326172, "learning_rate": 9.600528971591824e-06, "loss": 0.4824, "step": 1610 }, { "epoch": 0.12888, "grad_norm": 1.5251644849777222, "learning_rate": 9.600036245063206e-06, "loss": 0.298, "step": 1611 }, { "epoch": 0.12896, "grad_norm": 1.4406249523162842, "learning_rate": 9.599543227505685e-06, "loss": 0.286, "step": 1612 }, { "epoch": 0.12904, "grad_norm": 2.1787021160125732, "learning_rate": 9.599049918950456e-06, "loss": 0.4677, "step": 1613 }, { "epoch": 0.12912, "grad_norm": 1.8443048000335693, "learning_rate": 9.598556319428726e-06, "loss": 0.3842, "step": 1614 }, { "epoch": 0.1292, "grad_norm": 2.4744701385498047, "learning_rate": 9.598062428971725e-06, "loss": 0.5183, "step": 1615 }, { "epoch": 0.12928, "grad_norm": 1.6642011404037476, "learning_rate": 9.597568247610699e-06, "loss": 0.3329, "step": 1616 }, { "epoch": 0.12936, "grad_norm": 1.8446506261825562, "learning_rate": 9.597073775376912e-06, "loss": 0.4378, "step": 1617 }, { "epoch": 0.12944, "grad_norm": 2.154025077819824, "learning_rate": 9.596579012301652e-06, "loss": 0.4079, "step": 1618 }, { "epoch": 0.12952, "grad_norm": 1.8335460424423218, "learning_rate": 9.596083958416216e-06, "loss": 0.3403, "step": 1619 }, { "epoch": 0.1296, "grad_norm": 1.4928038120269775, "learning_rate": 9.595588613751927e-06, "loss": 0.3134, "step": 1620 }, { "epoch": 0.12968, "grad_norm": 1.566196322441101, "learning_rate": 9.595092978340124e-06, "loss": 0.3698, "step": 1621 }, { "epoch": 0.12976, "grad_norm": 1.6201937198638916, "learning_rate": 9.594597052212163e-06, "loss": 0.3139, "step": 1622 }, { "epoch": 0.12984, "grad_norm": 1.5580354928970337, "learning_rate": 9.59410083539942e-06, "loss": 0.3374, "step": 1623 }, { "epoch": 0.12992, "grad_norm": 1.472684621810913, "learning_rate": 9.593604327933288e-06, "loss": 0.3058, "step": 1624 }, { "epoch": 0.13, "grad_norm": 1.9004932641983032, "learning_rate": 9.59310752984518e-06, "loss": 0.3661, "step": 1625 }, { "epoch": 0.13008, "grad_norm": 1.7129523754119873, "learning_rate": 9.59261044116653e-06, "loss": 0.4236, "step": 1626 }, { "epoch": 0.13016, "grad_norm": 1.9962881803512573, "learning_rate": 9.592113061928783e-06, "loss": 0.4657, "step": 1627 }, { "epoch": 0.13024, "grad_norm": 1.0669331550598145, "learning_rate": 9.591615392163408e-06, "loss": 0.2477, "step": 1628 }, { "epoch": 0.13032, "grad_norm": 1.2336879968643188, "learning_rate": 9.59111743190189e-06, "loss": 0.2468, "step": 1629 }, { "epoch": 0.1304, "grad_norm": 2.120313882827759, "learning_rate": 9.590619181175736e-06, "loss": 0.423, "step": 1630 }, { "epoch": 0.13048, "grad_norm": 2.0639867782592773, "learning_rate": 9.590120640016463e-06, "loss": 0.3888, "step": 1631 }, { "epoch": 0.13056, "grad_norm": 1.6942673921585083, "learning_rate": 9.589621808455617e-06, "loss": 0.377, "step": 1632 }, { "epoch": 0.13064, "grad_norm": 1.8577876091003418, "learning_rate": 9.589122686524759e-06, "loss": 0.3929, "step": 1633 }, { "epoch": 0.13072, "grad_norm": 1.837852954864502, "learning_rate": 9.588623274255461e-06, "loss": 0.3284, "step": 1634 }, { "epoch": 0.1308, "grad_norm": 1.546615481376648, "learning_rate": 9.588123571679323e-06, "loss": 0.3227, "step": 1635 }, { "epoch": 0.13088, "grad_norm": 1.5524723529815674, "learning_rate": 9.587623578827958e-06, "loss": 0.3375, "step": 1636 }, { "epoch": 0.13096, "grad_norm": 1.36067533493042, "learning_rate": 9.587123295733e-06, "loss": 0.394, "step": 1637 }, { "epoch": 0.13104, "grad_norm": 1.860670804977417, "learning_rate": 9.5866227224261e-06, "loss": 0.563, "step": 1638 }, { "epoch": 0.13112, "grad_norm": 1.4262064695358276, "learning_rate": 9.586121858938926e-06, "loss": 0.3446, "step": 1639 }, { "epoch": 0.1312, "grad_norm": 1.6558672189712524, "learning_rate": 9.585620705303168e-06, "loss": 0.3452, "step": 1640 }, { "epoch": 0.13128, "grad_norm": 1.3890858888626099, "learning_rate": 9.585119261550531e-06, "loss": 0.3047, "step": 1641 }, { "epoch": 0.13136, "grad_norm": 1.5068423748016357, "learning_rate": 9.58461752771274e-06, "loss": 0.3401, "step": 1642 }, { "epoch": 0.13144, "grad_norm": 1.6645478010177612, "learning_rate": 9.584115503821538e-06, "loss": 0.2867, "step": 1643 }, { "epoch": 0.13152, "grad_norm": 1.6663644313812256, "learning_rate": 9.583613189908688e-06, "loss": 0.3418, "step": 1644 }, { "epoch": 0.1316, "grad_norm": 1.356171727180481, "learning_rate": 9.583110586005969e-06, "loss": 0.2561, "step": 1645 }, { "epoch": 0.13168, "grad_norm": 2.0098698139190674, "learning_rate": 9.582607692145176e-06, "loss": 0.3869, "step": 1646 }, { "epoch": 0.13176, "grad_norm": 1.6945312023162842, "learning_rate": 9.582104508358128e-06, "loss": 0.3589, "step": 1647 }, { "epoch": 0.13184, "grad_norm": 1.593902826309204, "learning_rate": 9.58160103467666e-06, "loss": 0.367, "step": 1648 }, { "epoch": 0.13192, "grad_norm": 1.8232020139694214, "learning_rate": 9.581097271132626e-06, "loss": 0.4105, "step": 1649 }, { "epoch": 0.132, "grad_norm": 1.5520724058151245, "learning_rate": 9.580593217757893e-06, "loss": 0.3554, "step": 1650 }, { "epoch": 0.13208, "grad_norm": 1.5328576564788818, "learning_rate": 9.580088874584356e-06, "loss": 0.3405, "step": 1651 }, { "epoch": 0.13216, "grad_norm": 1.6017850637435913, "learning_rate": 9.579584241643923e-06, "loss": 0.3577, "step": 1652 }, { "epoch": 0.13224, "grad_norm": 1.6488410234451294, "learning_rate": 9.579079318968514e-06, "loss": 0.3528, "step": 1653 }, { "epoch": 0.13232, "grad_norm": 2.3161511421203613, "learning_rate": 9.578574106590081e-06, "loss": 0.5027, "step": 1654 }, { "epoch": 0.1324, "grad_norm": 1.536486029624939, "learning_rate": 9.578068604540582e-06, "loss": 0.3476, "step": 1655 }, { "epoch": 0.13248, "grad_norm": 1.761263132095337, "learning_rate": 9.577562812852004e-06, "loss": 0.36, "step": 1656 }, { "epoch": 0.13256, "grad_norm": 1.8455753326416016, "learning_rate": 9.57705673155634e-06, "loss": 0.4576, "step": 1657 }, { "epoch": 0.13264, "grad_norm": 1.7750699520111084, "learning_rate": 9.576550360685613e-06, "loss": 0.3432, "step": 1658 }, { "epoch": 0.13272, "grad_norm": 1.4518996477127075, "learning_rate": 9.576043700271857e-06, "loss": 0.3352, "step": 1659 }, { "epoch": 0.1328, "grad_norm": 1.6674631834030151, "learning_rate": 9.57553675034713e-06, "loss": 0.3836, "step": 1660 }, { "epoch": 0.13288, "grad_norm": 1.7204508781433105, "learning_rate": 9.575029510943501e-06, "loss": 0.3045, "step": 1661 }, { "epoch": 0.13296, "grad_norm": 1.37985098361969, "learning_rate": 9.574521982093063e-06, "loss": 0.2761, "step": 1662 }, { "epoch": 0.13304, "grad_norm": 1.4991388320922852, "learning_rate": 9.574014163827926e-06, "loss": 0.3735, "step": 1663 }, { "epoch": 0.13312, "grad_norm": 1.4861139059066772, "learning_rate": 9.573506056180215e-06, "loss": 0.3132, "step": 1664 }, { "epoch": 0.1332, "grad_norm": 1.6248459815979004, "learning_rate": 9.572997659182081e-06, "loss": 0.3233, "step": 1665 }, { "epoch": 0.13328, "grad_norm": 1.3340996503829956, "learning_rate": 9.572488972865686e-06, "loss": 0.3419, "step": 1666 }, { "epoch": 0.13336, "grad_norm": 1.760284423828125, "learning_rate": 9.571979997263214e-06, "loss": 0.3719, "step": 1667 }, { "epoch": 0.13344, "grad_norm": 1.4957592487335205, "learning_rate": 9.571470732406865e-06, "loss": 0.3708, "step": 1668 }, { "epoch": 0.13352, "grad_norm": 1.7483258247375488, "learning_rate": 9.570961178328859e-06, "loss": 0.4577, "step": 1669 }, { "epoch": 0.1336, "grad_norm": 1.3776280879974365, "learning_rate": 9.570451335061433e-06, "loss": 0.288, "step": 1670 }, { "epoch": 0.13368, "grad_norm": 1.427151083946228, "learning_rate": 9.569941202636846e-06, "loss": 0.3302, "step": 1671 }, { "epoch": 0.13376, "grad_norm": 1.7294217348098755, "learning_rate": 9.569430781087367e-06, "loss": 0.3842, "step": 1672 }, { "epoch": 0.13384, "grad_norm": 1.1407139301300049, "learning_rate": 9.568920070445295e-06, "loss": 0.2215, "step": 1673 }, { "epoch": 0.13392, "grad_norm": 1.2754679918289185, "learning_rate": 9.568409070742936e-06, "loss": 0.2493, "step": 1674 }, { "epoch": 0.134, "grad_norm": 1.5319108963012695, "learning_rate": 9.56789778201262e-06, "loss": 0.3972, "step": 1675 }, { "epoch": 0.13408, "grad_norm": 1.3693424463272095, "learning_rate": 9.567386204286697e-06, "loss": 0.261, "step": 1676 }, { "epoch": 0.13416, "grad_norm": 1.649755597114563, "learning_rate": 9.566874337597533e-06, "loss": 0.3777, "step": 1677 }, { "epoch": 0.13424, "grad_norm": 1.7307039499282837, "learning_rate": 9.566362181977509e-06, "loss": 0.4339, "step": 1678 }, { "epoch": 0.13432, "grad_norm": 1.3721426725387573, "learning_rate": 9.565849737459027e-06, "loss": 0.2845, "step": 1679 }, { "epoch": 0.1344, "grad_norm": 2.235292434692383, "learning_rate": 9.565337004074512e-06, "loss": 0.4784, "step": 1680 }, { "epoch": 0.13448, "grad_norm": 1.6313210725784302, "learning_rate": 9.5648239818564e-06, "loss": 0.3986, "step": 1681 }, { "epoch": 0.13456, "grad_norm": 1.9515478610992432, "learning_rate": 9.564310670837146e-06, "loss": 0.4105, "step": 1682 }, { "epoch": 0.13464, "grad_norm": 1.635183572769165, "learning_rate": 9.563797071049232e-06, "loss": 0.3187, "step": 1683 }, { "epoch": 0.13472, "grad_norm": 1.5895339250564575, "learning_rate": 9.563283182525145e-06, "loss": 0.3844, "step": 1684 }, { "epoch": 0.1348, "grad_norm": 1.718421459197998, "learning_rate": 9.562769005297401e-06, "loss": 0.3469, "step": 1685 }, { "epoch": 0.13488, "grad_norm": 1.5541642904281616, "learning_rate": 9.56225453939853e-06, "loss": 0.3621, "step": 1686 }, { "epoch": 0.13496, "grad_norm": 1.4598628282546997, "learning_rate": 9.561739784861077e-06, "loss": 0.3712, "step": 1687 }, { "epoch": 0.13504, "grad_norm": 1.4597030878067017, "learning_rate": 9.561224741717614e-06, "loss": 0.343, "step": 1688 }, { "epoch": 0.13512, "grad_norm": 1.3350636959075928, "learning_rate": 9.560709410000722e-06, "loss": 0.2609, "step": 1689 }, { "epoch": 0.1352, "grad_norm": 1.1671631336212158, "learning_rate": 9.560193789743006e-06, "loss": 0.2378, "step": 1690 }, { "epoch": 0.13528, "grad_norm": 1.94452965259552, "learning_rate": 9.55967788097709e-06, "loss": 0.3756, "step": 1691 }, { "epoch": 0.13536, "grad_norm": 2.2720422744750977, "learning_rate": 9.559161683735607e-06, "loss": 0.4896, "step": 1692 }, { "epoch": 0.13544, "grad_norm": 1.2150167226791382, "learning_rate": 9.558645198051221e-06, "loss": 0.2944, "step": 1693 }, { "epoch": 0.13552, "grad_norm": 1.321184754371643, "learning_rate": 9.558128423956608e-06, "loss": 0.3323, "step": 1694 }, { "epoch": 0.1356, "grad_norm": 1.6017346382141113, "learning_rate": 9.55761136148446e-06, "loss": 0.3454, "step": 1695 }, { "epoch": 0.13568, "grad_norm": 1.4574471712112427, "learning_rate": 9.55709401066749e-06, "loss": 0.3351, "step": 1696 }, { "epoch": 0.13576, "grad_norm": 1.7760628461837769, "learning_rate": 9.556576371538431e-06, "loss": 0.3757, "step": 1697 }, { "epoch": 0.13584, "grad_norm": 1.3665014505386353, "learning_rate": 9.556058444130032e-06, "loss": 0.2966, "step": 1698 }, { "epoch": 0.13592, "grad_norm": 1.6557552814483643, "learning_rate": 9.555540228475058e-06, "loss": 0.3721, "step": 1699 }, { "epoch": 0.136, "grad_norm": 1.6838151216506958, "learning_rate": 9.555021724606298e-06, "loss": 0.4326, "step": 1700 }, { "epoch": 0.13608, "grad_norm": 1.3219002485275269, "learning_rate": 9.554502932556555e-06, "loss": 0.3573, "step": 1701 }, { "epoch": 0.13616, "grad_norm": 1.4627076387405396, "learning_rate": 9.55398385235865e-06, "loss": 0.3406, "step": 1702 }, { "epoch": 0.13624, "grad_norm": 1.6964737176895142, "learning_rate": 9.553464484045425e-06, "loss": 0.3365, "step": 1703 }, { "epoch": 0.13632, "grad_norm": 1.4238336086273193, "learning_rate": 9.552944827649737e-06, "loss": 0.3474, "step": 1704 }, { "epoch": 0.1364, "grad_norm": 1.3716814517974854, "learning_rate": 9.552424883204465e-06, "loss": 0.3811, "step": 1705 }, { "epoch": 0.13648, "grad_norm": 1.5849741697311401, "learning_rate": 9.551904650742503e-06, "loss": 0.3416, "step": 1706 }, { "epoch": 0.13656, "grad_norm": 1.7011810541152954, "learning_rate": 9.551384130296763e-06, "loss": 0.3839, "step": 1707 }, { "epoch": 0.13664, "grad_norm": 1.6651289463043213, "learning_rate": 9.55086332190018e-06, "loss": 0.3876, "step": 1708 }, { "epoch": 0.13672, "grad_norm": 1.391045331954956, "learning_rate": 9.5503422255857e-06, "loss": 0.2749, "step": 1709 }, { "epoch": 0.1368, "grad_norm": 1.50571870803833, "learning_rate": 9.549820841386295e-06, "loss": 0.3082, "step": 1710 }, { "epoch": 0.13688, "grad_norm": 1.1734155416488647, "learning_rate": 9.549299169334948e-06, "loss": 0.3251, "step": 1711 }, { "epoch": 0.13696, "grad_norm": 1.4512368440628052, "learning_rate": 9.548777209464664e-06, "loss": 0.3287, "step": 1712 }, { "epoch": 0.13704, "grad_norm": 1.5819345712661743, "learning_rate": 9.548254961808467e-06, "loss": 0.3006, "step": 1713 }, { "epoch": 0.13712, "grad_norm": 1.6825000047683716, "learning_rate": 9.547732426399397e-06, "loss": 0.3654, "step": 1714 }, { "epoch": 0.1372, "grad_norm": 2.002753496170044, "learning_rate": 9.547209603270513e-06, "loss": 0.4603, "step": 1715 }, { "epoch": 0.13728, "grad_norm": 1.3878475427627563, "learning_rate": 9.546686492454892e-06, "loss": 0.3205, "step": 1716 }, { "epoch": 0.13736, "grad_norm": 1.9172121286392212, "learning_rate": 9.546163093985631e-06, "loss": 0.4037, "step": 1717 }, { "epoch": 0.13744, "grad_norm": 1.4683332443237305, "learning_rate": 9.545639407895842e-06, "loss": 0.3113, "step": 1718 }, { "epoch": 0.13752, "grad_norm": 1.4814456701278687, "learning_rate": 9.545115434218658e-06, "loss": 0.3195, "step": 1719 }, { "epoch": 0.1376, "grad_norm": 1.6814812421798706, "learning_rate": 9.544591172987227e-06, "loss": 0.3734, "step": 1720 }, { "epoch": 0.13768, "grad_norm": 1.4139302968978882, "learning_rate": 9.54406662423472e-06, "loss": 0.3155, "step": 1721 }, { "epoch": 0.13776, "grad_norm": 1.6127204895019531, "learning_rate": 9.543541787994322e-06, "loss": 0.3476, "step": 1722 }, { "epoch": 0.13784, "grad_norm": 1.9391632080078125, "learning_rate": 9.543016664299237e-06, "loss": 0.4613, "step": 1723 }, { "epoch": 0.13792, "grad_norm": 1.070672869682312, "learning_rate": 9.542491253182689e-06, "loss": 0.2503, "step": 1724 }, { "epoch": 0.138, "grad_norm": 1.7871367931365967, "learning_rate": 9.541965554677918e-06, "loss": 0.4031, "step": 1725 }, { "epoch": 0.13808, "grad_norm": 1.4849225282669067, "learning_rate": 9.541439568818186e-06, "loss": 0.3372, "step": 1726 }, { "epoch": 0.13816, "grad_norm": 1.5674102306365967, "learning_rate": 9.540913295636766e-06, "loss": 0.4731, "step": 1727 }, { "epoch": 0.13824, "grad_norm": 1.6845797300338745, "learning_rate": 9.540386735166957e-06, "loss": 0.4774, "step": 1728 }, { "epoch": 0.13832, "grad_norm": 1.2045738697052002, "learning_rate": 9.539859887442071e-06, "loss": 0.3388, "step": 1729 }, { "epoch": 0.1384, "grad_norm": 1.78014075756073, "learning_rate": 9.53933275249544e-06, "loss": 0.3886, "step": 1730 }, { "epoch": 0.13848, "grad_norm": 1.5145901441574097, "learning_rate": 9.538805330360415e-06, "loss": 0.3352, "step": 1731 }, { "epoch": 0.13856, "grad_norm": 1.681176781654358, "learning_rate": 9.538277621070363e-06, "loss": 0.3815, "step": 1732 }, { "epoch": 0.13864, "grad_norm": 1.7225366830825806, "learning_rate": 9.537749624658671e-06, "loss": 0.3022, "step": 1733 }, { "epoch": 0.13872, "grad_norm": 1.4362283945083618, "learning_rate": 9.537221341158745e-06, "loss": 0.3086, "step": 1734 }, { "epoch": 0.1388, "grad_norm": 1.7611305713653564, "learning_rate": 9.536692770604005e-06, "loss": 0.3078, "step": 1735 }, { "epoch": 0.13888, "grad_norm": 2.2746381759643555, "learning_rate": 9.536163913027894e-06, "loss": 0.4661, "step": 1736 }, { "epoch": 0.13896, "grad_norm": 1.6022064685821533, "learning_rate": 9.535634768463869e-06, "loss": 0.3514, "step": 1737 }, { "epoch": 0.13904, "grad_norm": 2.1435439586639404, "learning_rate": 9.53510533694541e-06, "loss": 0.4979, "step": 1738 }, { "epoch": 0.13912, "grad_norm": 1.5919454097747803, "learning_rate": 9.53457561850601e-06, "loss": 0.387, "step": 1739 }, { "epoch": 0.1392, "grad_norm": 1.475101351737976, "learning_rate": 9.534045613179184e-06, "loss": 0.3359, "step": 1740 }, { "epoch": 0.13928, "grad_norm": 1.5171840190887451, "learning_rate": 9.533515320998462e-06, "loss": 0.3717, "step": 1741 }, { "epoch": 0.13936, "grad_norm": 1.7646937370300293, "learning_rate": 9.532984741997395e-06, "loss": 0.3868, "step": 1742 }, { "epoch": 0.13944, "grad_norm": 2.229182720184326, "learning_rate": 9.532453876209551e-06, "loss": 0.4725, "step": 1743 }, { "epoch": 0.13952, "grad_norm": 1.8388185501098633, "learning_rate": 9.531922723668517e-06, "loss": 0.3709, "step": 1744 }, { "epoch": 0.1396, "grad_norm": 1.9653486013412476, "learning_rate": 9.531391284407896e-06, "loss": 0.4021, "step": 1745 }, { "epoch": 0.13968, "grad_norm": 1.6100776195526123, "learning_rate": 9.530859558461309e-06, "loss": 0.3279, "step": 1746 }, { "epoch": 0.13976, "grad_norm": 1.587827205657959, "learning_rate": 9.530327545862398e-06, "loss": 0.3501, "step": 1747 }, { "epoch": 0.13984, "grad_norm": 1.6959279775619507, "learning_rate": 9.529795246644821e-06, "loss": 0.3558, "step": 1748 }, { "epoch": 0.13992, "grad_norm": 1.7590053081512451, "learning_rate": 9.529262660842257e-06, "loss": 0.3838, "step": 1749 }, { "epoch": 0.14, "grad_norm": 2.2163288593292236, "learning_rate": 9.5287297884884e-06, "loss": 0.4245, "step": 1750 }, { "epoch": 0.14008, "grad_norm": 1.510589599609375, "learning_rate": 9.528196629616963e-06, "loss": 0.3794, "step": 1751 }, { "epoch": 0.14016, "grad_norm": 1.942276954650879, "learning_rate": 9.527663184261674e-06, "loss": 0.4612, "step": 1752 }, { "epoch": 0.14024, "grad_norm": 1.3821239471435547, "learning_rate": 9.527129452456288e-06, "loss": 0.3189, "step": 1753 }, { "epoch": 0.14032, "grad_norm": 2.456770181655884, "learning_rate": 9.526595434234567e-06, "loss": 0.5582, "step": 1754 }, { "epoch": 0.1404, "grad_norm": 1.4780219793319702, "learning_rate": 9.5260611296303e-06, "loss": 0.373, "step": 1755 }, { "epoch": 0.14048, "grad_norm": 1.8656375408172607, "learning_rate": 9.52552653867729e-06, "loss": 0.3918, "step": 1756 }, { "epoch": 0.14056, "grad_norm": 1.6014630794525146, "learning_rate": 9.524991661409356e-06, "loss": 0.3833, "step": 1757 }, { "epoch": 0.14064, "grad_norm": 1.5639004707336426, "learning_rate": 9.524456497860342e-06, "loss": 0.4446, "step": 1758 }, { "epoch": 0.14072, "grad_norm": 1.6596323251724243, "learning_rate": 9.523921048064105e-06, "loss": 0.3756, "step": 1759 }, { "epoch": 0.1408, "grad_norm": 1.7615050077438354, "learning_rate": 9.523385312054519e-06, "loss": 0.355, "step": 1760 }, { "epoch": 0.14088, "grad_norm": 1.968165636062622, "learning_rate": 9.52284928986548e-06, "loss": 0.4518, "step": 1761 }, { "epoch": 0.14096, "grad_norm": 1.9906011819839478, "learning_rate": 9.5223129815309e-06, "loss": 0.4838, "step": 1762 }, { "epoch": 0.14104, "grad_norm": 1.559403419494629, "learning_rate": 9.52177638708471e-06, "loss": 0.3133, "step": 1763 }, { "epoch": 0.14112, "grad_norm": 1.639631748199463, "learning_rate": 9.521239506560856e-06, "loss": 0.3229, "step": 1764 }, { "epoch": 0.1412, "grad_norm": 1.5165354013442993, "learning_rate": 9.520702339993308e-06, "loss": 0.2711, "step": 1765 }, { "epoch": 0.14128, "grad_norm": 1.5883980989456177, "learning_rate": 9.520164887416048e-06, "loss": 0.3004, "step": 1766 }, { "epoch": 0.14136, "grad_norm": 2.106351852416992, "learning_rate": 9.519627148863083e-06, "loss": 0.3873, "step": 1767 }, { "epoch": 0.14144, "grad_norm": 1.6328215599060059, "learning_rate": 9.519089124368428e-06, "loss": 0.3724, "step": 1768 }, { "epoch": 0.14152, "grad_norm": 1.9253731966018677, "learning_rate": 9.518550813966127e-06, "loss": 0.4179, "step": 1769 }, { "epoch": 0.1416, "grad_norm": 1.5602757930755615, "learning_rate": 9.518012217690233e-06, "loss": 0.3443, "step": 1770 }, { "epoch": 0.14168, "grad_norm": 0.9548719525337219, "learning_rate": 9.517473335574826e-06, "loss": 0.2493, "step": 1771 }, { "epoch": 0.14176, "grad_norm": 1.837083339691162, "learning_rate": 9.516934167653995e-06, "loss": 0.4071, "step": 1772 }, { "epoch": 0.14184, "grad_norm": 1.7493815422058105, "learning_rate": 9.516394713961851e-06, "loss": 0.3572, "step": 1773 }, { "epoch": 0.14192, "grad_norm": 1.7509686946868896, "learning_rate": 9.51585497453253e-06, "loss": 0.38, "step": 1774 }, { "epoch": 0.142, "grad_norm": 1.8923393487930298, "learning_rate": 9.515314949400172e-06, "loss": 0.3667, "step": 1775 }, { "epoch": 0.14208, "grad_norm": 1.5569912195205688, "learning_rate": 9.514774638598945e-06, "loss": 0.3337, "step": 1776 }, { "epoch": 0.14216, "grad_norm": 1.260204792022705, "learning_rate": 9.514234042163033e-06, "loss": 0.2805, "step": 1777 }, { "epoch": 0.14224, "grad_norm": 1.447977066040039, "learning_rate": 9.51369316012664e-06, "loss": 0.3253, "step": 1778 }, { "epoch": 0.14232, "grad_norm": 1.598922610282898, "learning_rate": 9.513151992523982e-06, "loss": 0.3362, "step": 1779 }, { "epoch": 0.1424, "grad_norm": 1.7492644786834717, "learning_rate": 9.512610539389297e-06, "loss": 0.3523, "step": 1780 }, { "epoch": 0.14248, "grad_norm": 1.6594542264938354, "learning_rate": 9.512068800756845e-06, "loss": 0.3077, "step": 1781 }, { "epoch": 0.14256, "grad_norm": 1.445451259613037, "learning_rate": 9.511526776660898e-06, "loss": 0.2962, "step": 1782 }, { "epoch": 0.14264, "grad_norm": 1.554978609085083, "learning_rate": 9.510984467135744e-06, "loss": 0.4125, "step": 1783 }, { "epoch": 0.14272, "grad_norm": 1.4923337697982788, "learning_rate": 9.5104418722157e-06, "loss": 0.2499, "step": 1784 }, { "epoch": 0.1428, "grad_norm": 1.4580963850021362, "learning_rate": 9.509898991935088e-06, "loss": 0.3109, "step": 1785 }, { "epoch": 0.14288, "grad_norm": 1.3246641159057617, "learning_rate": 9.50935582632826e-06, "loss": 0.2577, "step": 1786 }, { "epoch": 0.14296, "grad_norm": 1.401862382888794, "learning_rate": 9.508812375429575e-06, "loss": 0.34, "step": 1787 }, { "epoch": 0.14304, "grad_norm": 1.5076367855072021, "learning_rate": 9.508268639273417e-06, "loss": 0.4019, "step": 1788 }, { "epoch": 0.14312, "grad_norm": 1.6799163818359375, "learning_rate": 9.507724617894188e-06, "loss": 0.4145, "step": 1789 }, { "epoch": 0.1432, "grad_norm": 1.2659521102905273, "learning_rate": 9.507180311326306e-06, "loss": 0.2803, "step": 1790 }, { "epoch": 0.14328, "grad_norm": 2.103095054626465, "learning_rate": 9.506635719604207e-06, "loss": 0.4016, "step": 1791 }, { "epoch": 0.14336, "grad_norm": 1.4720572233200073, "learning_rate": 9.506090842762344e-06, "loss": 0.3029, "step": 1792 }, { "epoch": 0.14344, "grad_norm": 1.6554877758026123, "learning_rate": 9.50554568083519e-06, "loss": 0.3414, "step": 1793 }, { "epoch": 0.14352, "grad_norm": 1.4517250061035156, "learning_rate": 9.505000233857238e-06, "loss": 0.423, "step": 1794 }, { "epoch": 0.1436, "grad_norm": 1.9899773597717285, "learning_rate": 9.504454501862994e-06, "loss": 0.3435, "step": 1795 }, { "epoch": 0.14368, "grad_norm": 1.4385102987289429, "learning_rate": 9.503908484886986e-06, "loss": 0.3223, "step": 1796 }, { "epoch": 0.14376, "grad_norm": 1.4701497554779053, "learning_rate": 9.503362182963757e-06, "loss": 0.3363, "step": 1797 }, { "epoch": 0.14384, "grad_norm": 2.0168707370758057, "learning_rate": 9.502815596127874e-06, "loss": 0.5457, "step": 1798 }, { "epoch": 0.14392, "grad_norm": 1.6727696657180786, "learning_rate": 9.502268724413913e-06, "loss": 0.3096, "step": 1799 }, { "epoch": 0.144, "grad_norm": 1.4367340803146362, "learning_rate": 9.501721567856475e-06, "loss": 0.2831, "step": 1800 }, { "epoch": 0.14408, "grad_norm": 1.5396634340286255, "learning_rate": 9.501174126490176e-06, "loss": 0.3791, "step": 1801 }, { "epoch": 0.14416, "grad_norm": 1.8134430646896362, "learning_rate": 9.500626400349651e-06, "loss": 0.5339, "step": 1802 }, { "epoch": 0.14424, "grad_norm": 1.4883620738983154, "learning_rate": 9.500078389469551e-06, "loss": 0.3966, "step": 1803 }, { "epoch": 0.14432, "grad_norm": 1.666069746017456, "learning_rate": 9.49953009388455e-06, "loss": 0.3484, "step": 1804 }, { "epoch": 0.1444, "grad_norm": 1.8850048780441284, "learning_rate": 9.498981513629336e-06, "loss": 0.3768, "step": 1805 }, { "epoch": 0.14448, "grad_norm": 1.885728359222412, "learning_rate": 9.498432648738616e-06, "loss": 0.413, "step": 1806 }, { "epoch": 0.14456, "grad_norm": 1.6938875913619995, "learning_rate": 9.497883499247112e-06, "loss": 0.3417, "step": 1807 }, { "epoch": 0.14464, "grad_norm": 1.6095532178878784, "learning_rate": 9.49733406518957e-06, "loss": 0.3333, "step": 1808 }, { "epoch": 0.14472, "grad_norm": 2.2393977642059326, "learning_rate": 9.496784346600749e-06, "loss": 0.4968, "step": 1809 }, { "epoch": 0.1448, "grad_norm": 1.5077526569366455, "learning_rate": 9.496234343515428e-06, "loss": 0.3411, "step": 1810 }, { "epoch": 0.14488, "grad_norm": 1.496593713760376, "learning_rate": 9.495684055968408e-06, "loss": 0.468, "step": 1811 }, { "epoch": 0.14496, "grad_norm": 1.466931700706482, "learning_rate": 9.495133483994498e-06, "loss": 0.3441, "step": 1812 }, { "epoch": 0.14504, "grad_norm": 1.3516722917556763, "learning_rate": 9.494582627628533e-06, "loss": 0.2619, "step": 1813 }, { "epoch": 0.14512, "grad_norm": 2.093080520629883, "learning_rate": 9.494031486905366e-06, "loss": 0.4629, "step": 1814 }, { "epoch": 0.1452, "grad_norm": 1.777840256690979, "learning_rate": 9.493480061859861e-06, "loss": 0.3284, "step": 1815 }, { "epoch": 0.14528, "grad_norm": 1.5537855625152588, "learning_rate": 9.492928352526908e-06, "loss": 0.3827, "step": 1816 }, { "epoch": 0.14536, "grad_norm": 1.2391389608383179, "learning_rate": 9.492376358941414e-06, "loss": 0.3373, "step": 1817 }, { "epoch": 0.14544, "grad_norm": 1.3341397047042847, "learning_rate": 9.4918240811383e-06, "loss": 0.2814, "step": 1818 }, { "epoch": 0.14552, "grad_norm": 1.5178688764572144, "learning_rate": 9.491271519152503e-06, "loss": 0.3376, "step": 1819 }, { "epoch": 0.1456, "grad_norm": 1.4326900243759155, "learning_rate": 9.490718673018986e-06, "loss": 0.3139, "step": 1820 }, { "epoch": 0.14568, "grad_norm": 1.5329378843307495, "learning_rate": 9.490165542772724e-06, "loss": 0.3327, "step": 1821 }, { "epoch": 0.14576, "grad_norm": 1.8199474811553955, "learning_rate": 9.489612128448714e-06, "loss": 0.4225, "step": 1822 }, { "epoch": 0.14584, "grad_norm": 1.8470107316970825, "learning_rate": 9.489058430081964e-06, "loss": 0.3648, "step": 1823 }, { "epoch": 0.14592, "grad_norm": 1.2022221088409424, "learning_rate": 9.48850444770751e-06, "loss": 0.304, "step": 1824 }, { "epoch": 0.146, "grad_norm": 1.4264217615127563, "learning_rate": 9.487950181360397e-06, "loss": 0.3224, "step": 1825 }, { "epoch": 0.14608, "grad_norm": 2.1026673316955566, "learning_rate": 9.487395631075693e-06, "loss": 0.4528, "step": 1826 }, { "epoch": 0.14616, "grad_norm": 1.5366865396499634, "learning_rate": 9.486840796888483e-06, "loss": 0.2714, "step": 1827 }, { "epoch": 0.14624, "grad_norm": 1.836358666419983, "learning_rate": 9.48628567883387e-06, "loss": 0.3978, "step": 1828 }, { "epoch": 0.14632, "grad_norm": 1.6860939264297485, "learning_rate": 9.48573027694697e-06, "loss": 0.3714, "step": 1829 }, { "epoch": 0.1464, "grad_norm": 1.3524024486541748, "learning_rate": 9.485174591262925e-06, "loss": 0.2514, "step": 1830 }, { "epoch": 0.14648, "grad_norm": 1.6706621646881104, "learning_rate": 9.484618621816892e-06, "loss": 0.3619, "step": 1831 }, { "epoch": 0.14656, "grad_norm": 1.2413554191589355, "learning_rate": 9.484062368644045e-06, "loss": 0.3066, "step": 1832 }, { "epoch": 0.14664, "grad_norm": 1.7254809141159058, "learning_rate": 9.483505831779577e-06, "loss": 0.3962, "step": 1833 }, { "epoch": 0.14672, "grad_norm": 1.8376824855804443, "learning_rate": 9.482949011258693e-06, "loss": 0.5107, "step": 1834 }, { "epoch": 0.1468, "grad_norm": 1.2581253051757812, "learning_rate": 9.482391907116628e-06, "loss": 0.2733, "step": 1835 }, { "epoch": 0.14688, "grad_norm": 1.1894922256469727, "learning_rate": 9.481834519388624e-06, "loss": 0.2422, "step": 1836 }, { "epoch": 0.14696, "grad_norm": 1.6918854713439941, "learning_rate": 9.481276848109947e-06, "loss": 0.3263, "step": 1837 }, { "epoch": 0.14704, "grad_norm": 1.6978294849395752, "learning_rate": 9.480718893315876e-06, "loss": 0.3933, "step": 1838 }, { "epoch": 0.14712, "grad_norm": 1.701881766319275, "learning_rate": 9.480160655041717e-06, "loss": 0.3639, "step": 1839 }, { "epoch": 0.1472, "grad_norm": 1.7398412227630615, "learning_rate": 9.479602133322781e-06, "loss": 0.3151, "step": 1840 }, { "epoch": 0.14728, "grad_norm": 1.8988409042358398, "learning_rate": 9.479043328194409e-06, "loss": 0.4829, "step": 1841 }, { "epoch": 0.14736, "grad_norm": 1.7794743776321411, "learning_rate": 9.47848423969195e-06, "loss": 0.3512, "step": 1842 }, { "epoch": 0.14744, "grad_norm": 1.6518741846084595, "learning_rate": 9.477924867850781e-06, "loss": 0.35, "step": 1843 }, { "epoch": 0.14752, "grad_norm": 2.0679967403411865, "learning_rate": 9.477365212706286e-06, "loss": 0.3591, "step": 1844 }, { "epoch": 0.1476, "grad_norm": 1.650707721710205, "learning_rate": 9.476805274293877e-06, "loss": 0.2558, "step": 1845 }, { "epoch": 0.14768, "grad_norm": 1.2669817209243774, "learning_rate": 9.476245052648978e-06, "loss": 0.2441, "step": 1846 }, { "epoch": 0.14776, "grad_norm": 1.504056692123413, "learning_rate": 9.475684547807032e-06, "loss": 0.2845, "step": 1847 }, { "epoch": 0.14784, "grad_norm": 0.9670877456665039, "learning_rate": 9.4751237598035e-06, "loss": 0.1881, "step": 1848 }, { "epoch": 0.14792, "grad_norm": 1.458354115486145, "learning_rate": 9.474562688673861e-06, "loss": 0.3841, "step": 1849 }, { "epoch": 0.148, "grad_norm": 1.9649925231933594, "learning_rate": 9.474001334453613e-06, "loss": 0.3908, "step": 1850 }, { "epoch": 0.14808, "grad_norm": 1.5312343835830688, "learning_rate": 9.47343969717827e-06, "loss": 0.3213, "step": 1851 }, { "epoch": 0.14816, "grad_norm": 1.8272303342819214, "learning_rate": 9.472877776883365e-06, "loss": 0.3786, "step": 1852 }, { "epoch": 0.14824, "grad_norm": 1.590427041053772, "learning_rate": 9.47231557360445e-06, "loss": 0.3035, "step": 1853 }, { "epoch": 0.14832, "grad_norm": 1.4117902517318726, "learning_rate": 9.471753087377094e-06, "loss": 0.4148, "step": 1854 }, { "epoch": 0.1484, "grad_norm": 2.0782604217529297, "learning_rate": 9.471190318236883e-06, "loss": 0.7431, "step": 1855 }, { "epoch": 0.14848, "grad_norm": 1.819188117980957, "learning_rate": 9.47062726621942e-06, "loss": 0.3289, "step": 1856 }, { "epoch": 0.14856, "grad_norm": 1.4712084531784058, "learning_rate": 9.470063931360329e-06, "loss": 0.3727, "step": 1857 }, { "epoch": 0.14864, "grad_norm": 1.7734615802764893, "learning_rate": 9.46950031369525e-06, "loss": 0.3527, "step": 1858 }, { "epoch": 0.14872, "grad_norm": 1.6210514307022095, "learning_rate": 9.468936413259842e-06, "loss": 0.3714, "step": 1859 }, { "epoch": 0.1488, "grad_norm": 1.4350985288619995, "learning_rate": 9.468372230089779e-06, "loss": 0.3185, "step": 1860 }, { "epoch": 0.14888, "grad_norm": 1.2719461917877197, "learning_rate": 9.467807764220757e-06, "loss": 0.2664, "step": 1861 }, { "epoch": 0.14896, "grad_norm": 1.8364416360855103, "learning_rate": 9.467243015688486e-06, "loss": 0.5466, "step": 1862 }, { "epoch": 0.14904, "grad_norm": 1.5372499227523804, "learning_rate": 9.466677984528698e-06, "loss": 0.3587, "step": 1863 }, { "epoch": 0.14912, "grad_norm": 1.268711805343628, "learning_rate": 9.46611267077714e-06, "loss": 0.2545, "step": 1864 }, { "epoch": 0.1492, "grad_norm": 1.8258861303329468, "learning_rate": 9.465547074469576e-06, "loss": 0.4055, "step": 1865 }, { "epoch": 0.14928, "grad_norm": 1.428105354309082, "learning_rate": 9.46498119564179e-06, "loss": 0.3134, "step": 1866 }, { "epoch": 0.14936, "grad_norm": 1.4180129766464233, "learning_rate": 9.464415034329584e-06, "loss": 0.3236, "step": 1867 }, { "epoch": 0.14944, "grad_norm": 1.5813634395599365, "learning_rate": 9.463848590568776e-06, "loss": 0.3679, "step": 1868 }, { "epoch": 0.14952, "grad_norm": 1.8351691961288452, "learning_rate": 9.463281864395204e-06, "loss": 0.3565, "step": 1869 }, { "epoch": 0.1496, "grad_norm": 1.5723137855529785, "learning_rate": 9.462714855844724e-06, "loss": 0.3112, "step": 1870 }, { "epoch": 0.14968, "grad_norm": 1.319880723953247, "learning_rate": 9.462147564953206e-06, "loss": 0.2729, "step": 1871 }, { "epoch": 0.14976, "grad_norm": 1.9398796558380127, "learning_rate": 9.461579991756543e-06, "loss": 0.4355, "step": 1872 }, { "epoch": 0.14984, "grad_norm": 1.74547278881073, "learning_rate": 9.461012136290641e-06, "loss": 0.4414, "step": 1873 }, { "epoch": 0.14992, "grad_norm": 1.329624891281128, "learning_rate": 9.460443998591429e-06, "loss": 0.3745, "step": 1874 }, { "epoch": 0.15, "grad_norm": 1.7463513612747192, "learning_rate": 9.45987557869485e-06, "loss": 0.5341, "step": 1875 }, { "epoch": 0.15008, "grad_norm": 2.0810306072235107, "learning_rate": 9.459306876636865e-06, "loss": 0.5546, "step": 1876 }, { "epoch": 0.15016, "grad_norm": 1.8790583610534668, "learning_rate": 9.458737892453455e-06, "loss": 0.5832, "step": 1877 }, { "epoch": 0.15024, "grad_norm": 1.392000675201416, "learning_rate": 9.458168626180619e-06, "loss": 0.2942, "step": 1878 }, { "epoch": 0.15032, "grad_norm": 1.9226977825164795, "learning_rate": 9.457599077854369e-06, "loss": 0.3703, "step": 1879 }, { "epoch": 0.1504, "grad_norm": 1.9512839317321777, "learning_rate": 9.457029247510742e-06, "loss": 0.3894, "step": 1880 }, { "epoch": 0.15048, "grad_norm": 1.148292899131775, "learning_rate": 9.456459135185787e-06, "loss": 0.2978, "step": 1881 }, { "epoch": 0.15056, "grad_norm": 1.5737361907958984, "learning_rate": 9.455888740915573e-06, "loss": 0.3149, "step": 1882 }, { "epoch": 0.15064, "grad_norm": 1.5501254796981812, "learning_rate": 9.45531806473619e-06, "loss": 0.3572, "step": 1883 }, { "epoch": 0.15072, "grad_norm": 1.52022385597229, "learning_rate": 9.45474710668374e-06, "loss": 0.2995, "step": 1884 }, { "epoch": 0.1508, "grad_norm": 2.051806926727295, "learning_rate": 9.454175866794344e-06, "loss": 0.4449, "step": 1885 }, { "epoch": 0.15088, "grad_norm": 1.4593029022216797, "learning_rate": 9.453604345104146e-06, "loss": 0.3873, "step": 1886 }, { "epoch": 0.15096, "grad_norm": 1.7019308805465698, "learning_rate": 9.453032541649304e-06, "loss": 0.3382, "step": 1887 }, { "epoch": 0.15104, "grad_norm": 1.7120949029922485, "learning_rate": 9.452460456465991e-06, "loss": 0.3859, "step": 1888 }, { "epoch": 0.15112, "grad_norm": 1.4484901428222656, "learning_rate": 9.451888089590404e-06, "loss": 0.3352, "step": 1889 }, { "epoch": 0.1512, "grad_norm": 1.4985060691833496, "learning_rate": 9.451315441058754e-06, "loss": 0.3172, "step": 1890 }, { "epoch": 0.15128, "grad_norm": 1.5723661184310913, "learning_rate": 9.45074251090727e-06, "loss": 0.394, "step": 1891 }, { "epoch": 0.15136, "grad_norm": 1.131264567375183, "learning_rate": 9.450169299172201e-06, "loss": 0.2274, "step": 1892 }, { "epoch": 0.15144, "grad_norm": 1.2636674642562866, "learning_rate": 9.449595805889809e-06, "loss": 0.3056, "step": 1893 }, { "epoch": 0.15152, "grad_norm": 1.8149745464324951, "learning_rate": 9.449022031096378e-06, "loss": 0.3782, "step": 1894 }, { "epoch": 0.1516, "grad_norm": 2.029224157333374, "learning_rate": 9.448447974828209e-06, "loss": 0.3975, "step": 1895 }, { "epoch": 0.15168, "grad_norm": 1.4619930982589722, "learning_rate": 9.447873637121624e-06, "loss": 0.3157, "step": 1896 }, { "epoch": 0.15176, "grad_norm": 1.570886254310608, "learning_rate": 9.447299018012954e-06, "loss": 0.372, "step": 1897 }, { "epoch": 0.15184, "grad_norm": 1.4011813402175903, "learning_rate": 9.446724117538559e-06, "loss": 0.3174, "step": 1898 }, { "epoch": 0.15192, "grad_norm": 1.6975699663162231, "learning_rate": 9.446148935734804e-06, "loss": 0.3275, "step": 1899 }, { "epoch": 0.152, "grad_norm": 3.675276041030884, "learning_rate": 9.445573472638085e-06, "loss": 0.4014, "step": 1900 }, { "epoch": 0.15208, "grad_norm": 1.3942408561706543, "learning_rate": 9.444997728284808e-06, "loss": 0.3226, "step": 1901 }, { "epoch": 0.15216, "grad_norm": 1.4078660011291504, "learning_rate": 9.444421702711397e-06, "loss": 0.2997, "step": 1902 }, { "epoch": 0.15224, "grad_norm": 1.1963045597076416, "learning_rate": 9.443845395954295e-06, "loss": 0.2498, "step": 1903 }, { "epoch": 0.15232, "grad_norm": 1.53505277633667, "learning_rate": 9.443268808049966e-06, "loss": 0.3383, "step": 1904 }, { "epoch": 0.1524, "grad_norm": 1.4499201774597168, "learning_rate": 9.442691939034885e-06, "loss": 0.2753, "step": 1905 }, { "epoch": 0.15248, "grad_norm": 1.238735556602478, "learning_rate": 9.44211478894555e-06, "loss": 0.2528, "step": 1906 }, { "epoch": 0.15256, "grad_norm": 1.7521767616271973, "learning_rate": 9.441537357818476e-06, "loss": 0.4218, "step": 1907 }, { "epoch": 0.15264, "grad_norm": 1.695391297340393, "learning_rate": 9.440959645690195e-06, "loss": 0.3643, "step": 1908 }, { "epoch": 0.15272, "grad_norm": 1.8289469480514526, "learning_rate": 9.440381652597258e-06, "loss": 0.3741, "step": 1909 }, { "epoch": 0.1528, "grad_norm": 1.3207136392593384, "learning_rate": 9.43980337857623e-06, "loss": 0.3747, "step": 1910 }, { "epoch": 0.15288, "grad_norm": 1.8912609815597534, "learning_rate": 9.439224823663698e-06, "loss": 0.4154, "step": 1911 }, { "epoch": 0.15296, "grad_norm": 1.8537139892578125, "learning_rate": 9.438645987896264e-06, "loss": 0.4575, "step": 1912 }, { "epoch": 0.15304, "grad_norm": 1.393486499786377, "learning_rate": 9.438066871310552e-06, "loss": 0.2722, "step": 1913 }, { "epoch": 0.15312, "grad_norm": 1.4133944511413574, "learning_rate": 9.437487473943198e-06, "loss": 0.335, "step": 1914 }, { "epoch": 0.1532, "grad_norm": 1.4364306926727295, "learning_rate": 9.436907795830861e-06, "loss": 0.302, "step": 1915 }, { "epoch": 0.15328, "grad_norm": 1.730425477027893, "learning_rate": 9.43632783701021e-06, "loss": 0.3615, "step": 1916 }, { "epoch": 0.15336, "grad_norm": 1.5723671913146973, "learning_rate": 9.435747597517943e-06, "loss": 0.2994, "step": 1917 }, { "epoch": 0.15344, "grad_norm": 1.4602887630462646, "learning_rate": 9.435167077390768e-06, "loss": 0.3276, "step": 1918 }, { "epoch": 0.15352, "grad_norm": 1.5619248151779175, "learning_rate": 9.434586276665412e-06, "loss": 0.4369, "step": 1919 }, { "epoch": 0.1536, "grad_norm": 1.581910252571106, "learning_rate": 9.434005195378622e-06, "loss": 0.3914, "step": 1920 }, { "epoch": 0.15368, "grad_norm": 1.3012194633483887, "learning_rate": 9.433423833567156e-06, "loss": 0.2824, "step": 1921 }, { "epoch": 0.15376, "grad_norm": 1.5758806467056274, "learning_rate": 9.432842191267802e-06, "loss": 0.2919, "step": 1922 }, { "epoch": 0.15384, "grad_norm": 2.18058180809021, "learning_rate": 9.432260268517352e-06, "loss": 0.3899, "step": 1923 }, { "epoch": 0.15392, "grad_norm": 1.6730529069900513, "learning_rate": 9.431678065352625e-06, "loss": 0.5427, "step": 1924 }, { "epoch": 0.154, "grad_norm": 1.690724492073059, "learning_rate": 9.431095581810457e-06, "loss": 0.3699, "step": 1925 }, { "epoch": 0.15408, "grad_norm": 1.7529829740524292, "learning_rate": 9.430512817927698e-06, "loss": 0.3121, "step": 1926 }, { "epoch": 0.15416, "grad_norm": 1.5383788347244263, "learning_rate": 9.429929773741216e-06, "loss": 0.389, "step": 1927 }, { "epoch": 0.15424, "grad_norm": 2.017871141433716, "learning_rate": 9.429346449287902e-06, "loss": 0.531, "step": 1928 }, { "epoch": 0.15432, "grad_norm": 1.6736904382705688, "learning_rate": 9.428762844604658e-06, "loss": 0.3876, "step": 1929 }, { "epoch": 0.1544, "grad_norm": 1.5975745916366577, "learning_rate": 9.428178959728406e-06, "loss": 0.408, "step": 1930 }, { "epoch": 0.15448, "grad_norm": 1.3620975017547607, "learning_rate": 9.427594794696089e-06, "loss": 0.2987, "step": 1931 }, { "epoch": 0.15456, "grad_norm": 1.9585412740707397, "learning_rate": 9.427010349544665e-06, "loss": 0.4924, "step": 1932 }, { "epoch": 0.15464, "grad_norm": 1.616124153137207, "learning_rate": 9.426425624311107e-06, "loss": 0.3609, "step": 1933 }, { "epoch": 0.15472, "grad_norm": 1.6203333139419556, "learning_rate": 9.425840619032411e-06, "loss": 0.343, "step": 1934 }, { "epoch": 0.1548, "grad_norm": 1.8410626649856567, "learning_rate": 9.42525533374559e-06, "loss": 0.3722, "step": 1935 }, { "epoch": 0.15488, "grad_norm": 1.4306490421295166, "learning_rate": 9.424669768487668e-06, "loss": 0.3013, "step": 1936 }, { "epoch": 0.15496, "grad_norm": 1.5979857444763184, "learning_rate": 9.424083923295698e-06, "loss": 0.4501, "step": 1937 }, { "epoch": 0.15504, "grad_norm": 1.3215088844299316, "learning_rate": 9.42349779820674e-06, "loss": 0.3814, "step": 1938 }, { "epoch": 0.15512, "grad_norm": 1.462342619895935, "learning_rate": 9.422911393257876e-06, "loss": 0.3256, "step": 1939 }, { "epoch": 0.1552, "grad_norm": 1.2837486267089844, "learning_rate": 9.422324708486208e-06, "loss": 0.3029, "step": 1940 }, { "epoch": 0.15528, "grad_norm": 1.586371660232544, "learning_rate": 9.421737743928854e-06, "loss": 0.4278, "step": 1941 }, { "epoch": 0.15536, "grad_norm": 1.5170694589614868, "learning_rate": 9.421150499622947e-06, "loss": 0.3776, "step": 1942 }, { "epoch": 0.15544, "grad_norm": 1.640691876411438, "learning_rate": 9.42056297560564e-06, "loss": 0.2805, "step": 1943 }, { "epoch": 0.15552, "grad_norm": 1.7329473495483398, "learning_rate": 9.419975171914108e-06, "loss": 0.4477, "step": 1944 }, { "epoch": 0.1556, "grad_norm": 1.7486817836761475, "learning_rate": 9.419387088585534e-06, "loss": 0.3196, "step": 1945 }, { "epoch": 0.15568, "grad_norm": 1.9604579210281372, "learning_rate": 9.418798725657125e-06, "loss": 0.373, "step": 1946 }, { "epoch": 0.15576, "grad_norm": 1.8238966464996338, "learning_rate": 9.418210083166108e-06, "loss": 0.4417, "step": 1947 }, { "epoch": 0.15584, "grad_norm": 1.3136991262435913, "learning_rate": 9.417621161149723e-06, "loss": 0.2475, "step": 1948 }, { "epoch": 0.15592, "grad_norm": 2.024016857147217, "learning_rate": 9.417031959645227e-06, "loss": 0.3924, "step": 1949 }, { "epoch": 0.156, "grad_norm": 1.4279539585113525, "learning_rate": 9.416442478689898e-06, "loss": 0.3131, "step": 1950 }, { "epoch": 0.15608, "grad_norm": 1.5639196634292603, "learning_rate": 9.415852718321032e-06, "loss": 0.3048, "step": 1951 }, { "epoch": 0.15616, "grad_norm": 1.6544080972671509, "learning_rate": 9.41526267857594e-06, "loss": 0.3371, "step": 1952 }, { "epoch": 0.15624, "grad_norm": 1.7706680297851562, "learning_rate": 9.414672359491952e-06, "loss": 0.3639, "step": 1953 }, { "epoch": 0.15632, "grad_norm": 1.6847538948059082, "learning_rate": 9.414081761106413e-06, "loss": 0.5029, "step": 1954 }, { "epoch": 0.1564, "grad_norm": 1.432637095451355, "learning_rate": 9.413490883456694e-06, "loss": 0.2809, "step": 1955 }, { "epoch": 0.15648, "grad_norm": 1.5604770183563232, "learning_rate": 9.412899726580171e-06, "loss": 0.2855, "step": 1956 }, { "epoch": 0.15656, "grad_norm": 1.5877360105514526, "learning_rate": 9.41230829051425e-06, "loss": 0.3401, "step": 1957 }, { "epoch": 0.15664, "grad_norm": 2.027043581008911, "learning_rate": 9.411716575296349e-06, "loss": 0.4226, "step": 1958 }, { "epoch": 0.15672, "grad_norm": 1.7963011264801025, "learning_rate": 9.411124580963897e-06, "loss": 0.3433, "step": 1959 }, { "epoch": 0.1568, "grad_norm": 1.6322389841079712, "learning_rate": 9.410532307554356e-06, "loss": 0.3208, "step": 1960 }, { "epoch": 0.15688, "grad_norm": 1.4425139427185059, "learning_rate": 9.409939755105193e-06, "loss": 0.3305, "step": 1961 }, { "epoch": 0.15696, "grad_norm": 2.181800365447998, "learning_rate": 9.409346923653897e-06, "loss": 0.428, "step": 1962 }, { "epoch": 0.15704, "grad_norm": 1.3570939302444458, "learning_rate": 9.408753813237974e-06, "loss": 0.3384, "step": 1963 }, { "epoch": 0.15712, "grad_norm": 1.302597999572754, "learning_rate": 9.40816042389495e-06, "loss": 0.2947, "step": 1964 }, { "epoch": 0.1572, "grad_norm": 1.9911285638809204, "learning_rate": 9.407566755662366e-06, "loss": 0.4879, "step": 1965 }, { "epoch": 0.15728, "grad_norm": 1.518795132637024, "learning_rate": 9.406972808577782e-06, "loss": 0.4356, "step": 1966 }, { "epoch": 0.15736, "grad_norm": 1.8553831577301025, "learning_rate": 9.406378582678772e-06, "loss": 0.407, "step": 1967 }, { "epoch": 0.15744, "grad_norm": 1.409943699836731, "learning_rate": 9.405784078002932e-06, "loss": 0.3465, "step": 1968 }, { "epoch": 0.15752, "grad_norm": 1.3879483938217163, "learning_rate": 9.405189294587879e-06, "loss": 0.3013, "step": 1969 }, { "epoch": 0.1576, "grad_norm": 1.4256548881530762, "learning_rate": 9.404594232471237e-06, "loss": 0.2841, "step": 1970 }, { "epoch": 0.15768, "grad_norm": 1.50364351272583, "learning_rate": 9.403998891690655e-06, "loss": 0.3085, "step": 1971 }, { "epoch": 0.15776, "grad_norm": 1.7137510776519775, "learning_rate": 9.4034032722838e-06, "loss": 0.3598, "step": 1972 }, { "epoch": 0.15784, "grad_norm": 1.9388797283172607, "learning_rate": 9.402807374288354e-06, "loss": 0.505, "step": 1973 }, { "epoch": 0.15792, "grad_norm": 1.7556933164596558, "learning_rate": 9.402211197742016e-06, "loss": 0.3934, "step": 1974 }, { "epoch": 0.158, "grad_norm": 1.7362114191055298, "learning_rate": 9.401614742682508e-06, "loss": 0.3839, "step": 1975 }, { "epoch": 0.15808, "grad_norm": 1.413800597190857, "learning_rate": 9.40101800914756e-06, "loss": 0.3378, "step": 1976 }, { "epoch": 0.15816, "grad_norm": 1.6335464715957642, "learning_rate": 9.40042099717493e-06, "loss": 0.4107, "step": 1977 }, { "epoch": 0.15824, "grad_norm": 1.9229556322097778, "learning_rate": 9.399823706802386e-06, "loss": 0.5094, "step": 1978 }, { "epoch": 0.15832, "grad_norm": 1.2669042348861694, "learning_rate": 9.399226138067721e-06, "loss": 0.304, "step": 1979 }, { "epoch": 0.1584, "grad_norm": 1.4909025430679321, "learning_rate": 9.398628291008735e-06, "loss": 0.3361, "step": 1980 }, { "epoch": 0.15848, "grad_norm": 1.4694348573684692, "learning_rate": 9.398030165663257e-06, "loss": 0.3228, "step": 1981 }, { "epoch": 0.15856, "grad_norm": 1.7894591093063354, "learning_rate": 9.397431762069124e-06, "loss": 0.4964, "step": 1982 }, { "epoch": 0.15864, "grad_norm": 1.3913213014602661, "learning_rate": 9.396833080264198e-06, "loss": 0.3159, "step": 1983 }, { "epoch": 0.15872, "grad_norm": 1.4307069778442383, "learning_rate": 9.396234120286356e-06, "loss": 0.4041, "step": 1984 }, { "epoch": 0.1588, "grad_norm": 1.663399577140808, "learning_rate": 9.39563488217349e-06, "loss": 0.3734, "step": 1985 }, { "epoch": 0.15888, "grad_norm": 2.0649588108062744, "learning_rate": 9.395035365963514e-06, "loss": 0.4112, "step": 1986 }, { "epoch": 0.15896, "grad_norm": 1.6770638227462769, "learning_rate": 9.394435571694356e-06, "loss": 0.3357, "step": 1987 }, { "epoch": 0.15904, "grad_norm": 2.003188371658325, "learning_rate": 9.393835499403963e-06, "loss": 0.4054, "step": 1988 }, { "epoch": 0.15912, "grad_norm": 1.657409906387329, "learning_rate": 9.393235149130299e-06, "loss": 0.3624, "step": 1989 }, { "epoch": 0.1592, "grad_norm": 1.6382983922958374, "learning_rate": 9.392634520911348e-06, "loss": 0.4006, "step": 1990 }, { "epoch": 0.15928, "grad_norm": 2.041346549987793, "learning_rate": 9.392033614785108e-06, "loss": 0.3929, "step": 1991 }, { "epoch": 0.15936, "grad_norm": 1.2810925245285034, "learning_rate": 9.391432430789597e-06, "loss": 0.2469, "step": 1992 }, { "epoch": 0.15944, "grad_norm": 1.6841486692428589, "learning_rate": 9.390830968962849e-06, "loss": 0.3423, "step": 1993 }, { "epoch": 0.15952, "grad_norm": 1.7549775838851929, "learning_rate": 9.390229229342918e-06, "loss": 0.3976, "step": 1994 }, { "epoch": 0.1596, "grad_norm": 1.5098248720169067, "learning_rate": 9.389627211967874e-06, "loss": 0.3533, "step": 1995 }, { "epoch": 0.15968, "grad_norm": 1.392735242843628, "learning_rate": 9.389024916875805e-06, "loss": 0.4128, "step": 1996 }, { "epoch": 0.15976, "grad_norm": 1.8983796834945679, "learning_rate": 9.388422344104812e-06, "loss": 0.4626, "step": 1997 }, { "epoch": 0.15984, "grad_norm": 1.4891161918640137, "learning_rate": 9.387819493693025e-06, "loss": 0.3736, "step": 1998 }, { "epoch": 0.15992, "grad_norm": 1.8602221012115479, "learning_rate": 9.387216365678578e-06, "loss": 0.4016, "step": 1999 }, { "epoch": 0.16, "grad_norm": 1.5026849508285522, "learning_rate": 9.38661296009963e-06, "loss": 0.3062, "step": 2000 }, { "epoch": 0.16008, "grad_norm": 1.7527680397033691, "learning_rate": 9.38600927699436e-06, "loss": 0.3501, "step": 2001 }, { "epoch": 0.16016, "grad_norm": 1.6725231409072876, "learning_rate": 9.385405316400957e-06, "loss": 0.4831, "step": 2002 }, { "epoch": 0.16024, "grad_norm": 1.6204967498779297, "learning_rate": 9.384801078357635e-06, "loss": 0.3016, "step": 2003 }, { "epoch": 0.16032, "grad_norm": 1.6982675790786743, "learning_rate": 9.38419656290262e-06, "loss": 0.5008, "step": 2004 }, { "epoch": 0.1604, "grad_norm": 1.5643174648284912, "learning_rate": 9.383591770074156e-06, "loss": 0.302, "step": 2005 }, { "epoch": 0.16048, "grad_norm": 1.7776808738708496, "learning_rate": 9.38298669991051e-06, "loss": 0.3796, "step": 2006 }, { "epoch": 0.16056, "grad_norm": 1.6092027425765991, "learning_rate": 9.38238135244996e-06, "loss": 0.3388, "step": 2007 }, { "epoch": 0.16064, "grad_norm": 1.8656328916549683, "learning_rate": 9.381775727730807e-06, "loss": 0.3574, "step": 2008 }, { "epoch": 0.16072, "grad_norm": 1.6534117460250854, "learning_rate": 9.381169825791364e-06, "loss": 0.4165, "step": 2009 }, { "epoch": 0.1608, "grad_norm": 1.7371861934661865, "learning_rate": 9.380563646669967e-06, "loss": 0.41, "step": 2010 }, { "epoch": 0.16088, "grad_norm": 1.6235700845718384, "learning_rate": 9.379957190404966e-06, "loss": 0.3675, "step": 2011 }, { "epoch": 0.16096, "grad_norm": 1.9247312545776367, "learning_rate": 9.379350457034726e-06, "loss": 0.4458, "step": 2012 }, { "epoch": 0.16104, "grad_norm": 1.2080788612365723, "learning_rate": 9.378743446597635e-06, "loss": 0.2824, "step": 2013 }, { "epoch": 0.16112, "grad_norm": 1.8663572072982788, "learning_rate": 9.378136159132101e-06, "loss": 0.3204, "step": 2014 }, { "epoch": 0.1612, "grad_norm": 1.4532349109649658, "learning_rate": 9.37752859467654e-06, "loss": 0.3409, "step": 2015 }, { "epoch": 0.16128, "grad_norm": 1.467343807220459, "learning_rate": 9.376920753269391e-06, "loss": 0.3027, "step": 2016 }, { "epoch": 0.16136, "grad_norm": 1.7154619693756104, "learning_rate": 9.376312634949114e-06, "loss": 0.3916, "step": 2017 }, { "epoch": 0.16144, "grad_norm": 1.4569969177246094, "learning_rate": 9.375704239754178e-06, "loss": 0.316, "step": 2018 }, { "epoch": 0.16152, "grad_norm": 1.6265618801116943, "learning_rate": 9.375095567723076e-06, "loss": 0.29, "step": 2019 }, { "epoch": 0.1616, "grad_norm": 1.7646688222885132, "learning_rate": 9.374486618894316e-06, "loss": 0.4374, "step": 2020 }, { "epoch": 0.16168, "grad_norm": 1.7426738739013672, "learning_rate": 9.373877393306426e-06, "loss": 0.3223, "step": 2021 }, { "epoch": 0.16176, "grad_norm": 1.8202601671218872, "learning_rate": 9.373267890997949e-06, "loss": 0.3962, "step": 2022 }, { "epoch": 0.16184, "grad_norm": 1.5177546739578247, "learning_rate": 9.372658112007442e-06, "loss": 0.2866, "step": 2023 }, { "epoch": 0.16192, "grad_norm": 1.7819550037384033, "learning_rate": 9.37204805637349e-06, "loss": 0.3551, "step": 2024 }, { "epoch": 0.162, "grad_norm": 1.8583236932754517, "learning_rate": 9.371437724134687e-06, "loss": 0.3201, "step": 2025 }, { "epoch": 0.16208, "grad_norm": 1.4678624868392944, "learning_rate": 9.370827115329644e-06, "loss": 0.3144, "step": 2026 }, { "epoch": 0.16216, "grad_norm": 1.5797638893127441, "learning_rate": 9.370216229996995e-06, "loss": 0.2976, "step": 2027 }, { "epoch": 0.16224, "grad_norm": 1.471345067024231, "learning_rate": 9.369605068175388e-06, "loss": 0.2832, "step": 2028 }, { "epoch": 0.16232, "grad_norm": 2.2264575958251953, "learning_rate": 9.368993629903489e-06, "loss": 0.4478, "step": 2029 }, { "epoch": 0.1624, "grad_norm": 2.1455178260803223, "learning_rate": 9.368381915219982e-06, "loss": 0.455, "step": 2030 }, { "epoch": 0.16248, "grad_norm": 1.47684907913208, "learning_rate": 9.367769924163568e-06, "loss": 0.2801, "step": 2031 }, { "epoch": 0.16256, "grad_norm": 1.5183435678482056, "learning_rate": 9.367157656772965e-06, "loss": 0.367, "step": 2032 }, { "epoch": 0.16264, "grad_norm": 1.7493641376495361, "learning_rate": 9.366545113086909e-06, "loss": 0.3876, "step": 2033 }, { "epoch": 0.16272, "grad_norm": 1.9206058979034424, "learning_rate": 9.365932293144156e-06, "loss": 0.3943, "step": 2034 }, { "epoch": 0.1628, "grad_norm": 1.4696857929229736, "learning_rate": 9.365319196983474e-06, "loss": 0.316, "step": 2035 }, { "epoch": 0.16288, "grad_norm": 1.558495044708252, "learning_rate": 9.364705824643653e-06, "loss": 0.3151, "step": 2036 }, { "epoch": 0.16296, "grad_norm": 1.9205808639526367, "learning_rate": 9.364092176163499e-06, "loss": 0.3735, "step": 2037 }, { "epoch": 0.16304, "grad_norm": 2.3546855449676514, "learning_rate": 9.363478251581834e-06, "loss": 0.4612, "step": 2038 }, { "epoch": 0.16312, "grad_norm": 1.5820739269256592, "learning_rate": 9.362864050937503e-06, "loss": 0.3454, "step": 2039 }, { "epoch": 0.1632, "grad_norm": 1.4024479389190674, "learning_rate": 9.36224957426936e-06, "loss": 0.284, "step": 2040 }, { "epoch": 0.16328, "grad_norm": 1.346354603767395, "learning_rate": 9.361634821616284e-06, "loss": 0.3249, "step": 2041 }, { "epoch": 0.16336, "grad_norm": 1.496208667755127, "learning_rate": 9.361019793017164e-06, "loss": 0.4657, "step": 2042 }, { "epoch": 0.16344, "grad_norm": 1.449812650680542, "learning_rate": 9.360404488510916e-06, "loss": 0.332, "step": 2043 }, { "epoch": 0.16352, "grad_norm": 1.5065429210662842, "learning_rate": 9.359788908136464e-06, "loss": 0.3212, "step": 2044 }, { "epoch": 0.1636, "grad_norm": 1.5385485887527466, "learning_rate": 9.359173051932758e-06, "loss": 0.35, "step": 2045 }, { "epoch": 0.16368, "grad_norm": 1.4125738143920898, "learning_rate": 9.358556919938759e-06, "loss": 0.3793, "step": 2046 }, { "epoch": 0.16376, "grad_norm": 1.4307483434677124, "learning_rate": 9.357940512193446e-06, "loss": 0.3367, "step": 2047 }, { "epoch": 0.16384, "grad_norm": 1.469079613685608, "learning_rate": 9.357323828735818e-06, "loss": 0.3201, "step": 2048 }, { "epoch": 0.16392, "grad_norm": 1.462432861328125, "learning_rate": 9.356706869604892e-06, "loss": 0.2956, "step": 2049 }, { "epoch": 0.164, "grad_norm": 1.8476369380950928, "learning_rate": 9.3560896348397e-06, "loss": 0.4874, "step": 2050 }, { "epoch": 0.16408, "grad_norm": 1.6221674680709839, "learning_rate": 9.355472124479292e-06, "loss": 0.2875, "step": 2051 }, { "epoch": 0.16416, "grad_norm": 1.772452473640442, "learning_rate": 9.354854338562735e-06, "loss": 0.3395, "step": 2052 }, { "epoch": 0.16424, "grad_norm": 1.8067185878753662, "learning_rate": 9.354236277129119e-06, "loss": 0.4001, "step": 2053 }, { "epoch": 0.16432, "grad_norm": 1.7058254480361938, "learning_rate": 9.35361794021754e-06, "loss": 0.48, "step": 2054 }, { "epoch": 0.1644, "grad_norm": 1.4677443504333496, "learning_rate": 9.352999327867122e-06, "loss": 0.268, "step": 2055 }, { "epoch": 0.16448, "grad_norm": 1.252163052558899, "learning_rate": 9.352380440117002e-06, "loss": 0.2939, "step": 2056 }, { "epoch": 0.16456, "grad_norm": 1.5425444841384888, "learning_rate": 9.351761277006332e-06, "loss": 0.3803, "step": 2057 }, { "epoch": 0.16464, "grad_norm": 1.7046698331832886, "learning_rate": 9.351141838574291e-06, "loss": 0.3808, "step": 2058 }, { "epoch": 0.16472, "grad_norm": 1.4355201721191406, "learning_rate": 9.350522124860063e-06, "loss": 0.2472, "step": 2059 }, { "epoch": 0.1648, "grad_norm": 1.3044764995574951, "learning_rate": 9.349902135902857e-06, "loss": 0.27, "step": 2060 }, { "epoch": 0.16488, "grad_norm": 1.7062314748764038, "learning_rate": 9.349281871741898e-06, "loss": 0.3372, "step": 2061 }, { "epoch": 0.16496, "grad_norm": 1.5979080200195312, "learning_rate": 9.348661332416428e-06, "loss": 0.3478, "step": 2062 }, { "epoch": 0.16504, "grad_norm": 1.3202630281448364, "learning_rate": 9.348040517965704e-06, "loss": 0.2525, "step": 2063 }, { "epoch": 0.16512, "grad_norm": 1.7079370021820068, "learning_rate": 9.347419428429007e-06, "loss": 0.4255, "step": 2064 }, { "epoch": 0.1652, "grad_norm": 2.1258163452148438, "learning_rate": 9.34679806384563e-06, "loss": 0.4046, "step": 2065 }, { "epoch": 0.16528, "grad_norm": 1.4599392414093018, "learning_rate": 9.346176424254883e-06, "loss": 0.3442, "step": 2066 }, { "epoch": 0.16536, "grad_norm": 1.5100538730621338, "learning_rate": 9.345554509696096e-06, "loss": 0.314, "step": 2067 }, { "epoch": 0.16544, "grad_norm": 1.472424030303955, "learning_rate": 9.344932320208615e-06, "loss": 0.3324, "step": 2068 }, { "epoch": 0.16552, "grad_norm": 1.7998536825180054, "learning_rate": 9.344309855831806e-06, "loss": 0.4959, "step": 2069 }, { "epoch": 0.1656, "grad_norm": 1.8450273275375366, "learning_rate": 9.343687116605045e-06, "loss": 0.3331, "step": 2070 }, { "epoch": 0.16568, "grad_norm": 1.7891483306884766, "learning_rate": 9.343064102567738e-06, "loss": 0.3411, "step": 2071 }, { "epoch": 0.16576, "grad_norm": 1.251073956489563, "learning_rate": 9.342440813759294e-06, "loss": 0.2926, "step": 2072 }, { "epoch": 0.16584, "grad_norm": 1.9193722009658813, "learning_rate": 9.341817250219153e-06, "loss": 0.4983, "step": 2073 }, { "epoch": 0.16592, "grad_norm": 1.5404998064041138, "learning_rate": 9.34119341198676e-06, "loss": 0.4395, "step": 2074 }, { "epoch": 0.166, "grad_norm": 1.6261802911758423, "learning_rate": 9.340569299101584e-06, "loss": 0.382, "step": 2075 }, { "epoch": 0.16608, "grad_norm": 1.4044370651245117, "learning_rate": 9.339944911603116e-06, "loss": 0.3091, "step": 2076 }, { "epoch": 0.16616, "grad_norm": 1.707542061805725, "learning_rate": 9.339320249530851e-06, "loss": 0.3379, "step": 2077 }, { "epoch": 0.16624, "grad_norm": 2.2420706748962402, "learning_rate": 9.338695312924317e-06, "loss": 0.4235, "step": 2078 }, { "epoch": 0.16632, "grad_norm": 1.7224174737930298, "learning_rate": 9.338070101823046e-06, "loss": 0.3715, "step": 2079 }, { "epoch": 0.1664, "grad_norm": 1.4423843622207642, "learning_rate": 9.337444616266595e-06, "loss": 0.3355, "step": 2080 }, { "epoch": 0.16648, "grad_norm": 1.2990514039993286, "learning_rate": 9.336818856294535e-06, "loss": 0.2665, "step": 2081 }, { "epoch": 0.16656, "grad_norm": 1.7929611206054688, "learning_rate": 9.336192821946459e-06, "loss": 0.34, "step": 2082 }, { "epoch": 0.16664, "grad_norm": 1.742226243019104, "learning_rate": 9.33556651326197e-06, "loss": 0.3516, "step": 2083 }, { "epoch": 0.16672, "grad_norm": 1.5436040163040161, "learning_rate": 9.334939930280698e-06, "loss": 0.2721, "step": 2084 }, { "epoch": 0.1668, "grad_norm": 2.2682416439056396, "learning_rate": 9.334313073042279e-06, "loss": 0.4935, "step": 2085 }, { "epoch": 0.16688, "grad_norm": 1.4342129230499268, "learning_rate": 9.333685941586375e-06, "loss": 0.382, "step": 2086 }, { "epoch": 0.16696, "grad_norm": 1.3763701915740967, "learning_rate": 9.333058535952661e-06, "loss": 0.2874, "step": 2087 }, { "epoch": 0.16704, "grad_norm": 1.4731640815734863, "learning_rate": 9.332430856180831e-06, "loss": 0.3329, "step": 2088 }, { "epoch": 0.16712, "grad_norm": 1.6573126316070557, "learning_rate": 9.3318029023106e-06, "loss": 0.4924, "step": 2089 }, { "epoch": 0.1672, "grad_norm": 1.5075602531433105, "learning_rate": 9.331174674381692e-06, "loss": 0.3175, "step": 2090 }, { "epoch": 0.16728, "grad_norm": 1.938504934310913, "learning_rate": 9.330546172433855e-06, "loss": 0.4211, "step": 2091 }, { "epoch": 0.16736, "grad_norm": 1.2835865020751953, "learning_rate": 9.329917396506851e-06, "loss": 0.2607, "step": 2092 }, { "epoch": 0.16744, "grad_norm": 1.6493488550186157, "learning_rate": 9.329288346640462e-06, "loss": 0.3207, "step": 2093 }, { "epoch": 0.16752, "grad_norm": 1.4919483661651611, "learning_rate": 9.328659022874486e-06, "loss": 0.3066, "step": 2094 }, { "epoch": 0.1676, "grad_norm": 1.7278863191604614, "learning_rate": 9.328029425248736e-06, "loss": 0.4222, "step": 2095 }, { "epoch": 0.16768, "grad_norm": 1.574093222618103, "learning_rate": 9.327399553803047e-06, "loss": 0.3571, "step": 2096 }, { "epoch": 0.16776, "grad_norm": 1.94347083568573, "learning_rate": 9.326769408577266e-06, "loss": 0.4714, "step": 2097 }, { "epoch": 0.16784, "grad_norm": 1.8207807540893555, "learning_rate": 9.326138989611265e-06, "loss": 0.3273, "step": 2098 }, { "epoch": 0.16792, "grad_norm": 1.5437902212142944, "learning_rate": 9.325508296944922e-06, "loss": 0.3561, "step": 2099 }, { "epoch": 0.168, "grad_norm": 1.638817310333252, "learning_rate": 9.324877330618143e-06, "loss": 0.3146, "step": 2100 }, { "epoch": 0.16808, "grad_norm": 1.3537306785583496, "learning_rate": 9.324246090670847e-06, "loss": 0.263, "step": 2101 }, { "epoch": 0.16816, "grad_norm": 1.4852831363677979, "learning_rate": 9.32361457714297e-06, "loss": 0.2881, "step": 2102 }, { "epoch": 0.16824, "grad_norm": 1.9877256155014038, "learning_rate": 9.322982790074467e-06, "loss": 0.425, "step": 2103 }, { "epoch": 0.16832, "grad_norm": 1.4754945039749146, "learning_rate": 9.322350729505305e-06, "loss": 0.2753, "step": 2104 }, { "epoch": 0.1684, "grad_norm": 1.820770502090454, "learning_rate": 9.321718395475475e-06, "loss": 0.3354, "step": 2105 }, { "epoch": 0.16848, "grad_norm": 1.3193517923355103, "learning_rate": 9.321085788024983e-06, "loss": 0.2598, "step": 2106 }, { "epoch": 0.16856, "grad_norm": 1.3211908340454102, "learning_rate": 9.320452907193854e-06, "loss": 0.2607, "step": 2107 }, { "epoch": 0.16864, "grad_norm": 2.045539140701294, "learning_rate": 9.319819753022123e-06, "loss": 0.3862, "step": 2108 }, { "epoch": 0.16872, "grad_norm": 1.5828810930252075, "learning_rate": 9.319186325549851e-06, "loss": 0.478, "step": 2109 }, { "epoch": 0.1688, "grad_norm": 1.4170411825180054, "learning_rate": 9.318552624817114e-06, "loss": 0.323, "step": 2110 }, { "epoch": 0.16888, "grad_norm": 1.40409255027771, "learning_rate": 9.317918650864e-06, "loss": 0.2817, "step": 2111 }, { "epoch": 0.16896, "grad_norm": 1.6362119913101196, "learning_rate": 9.317284403730622e-06, "loss": 0.3455, "step": 2112 }, { "epoch": 0.16904, "grad_norm": 1.4493852853775024, "learning_rate": 9.316649883457104e-06, "loss": 0.3114, "step": 2113 }, { "epoch": 0.16912, "grad_norm": 1.9049961566925049, "learning_rate": 9.316015090083595e-06, "loss": 0.3715, "step": 2114 }, { "epoch": 0.1692, "grad_norm": 1.3683356046676636, "learning_rate": 9.315380023650248e-06, "loss": 0.2945, "step": 2115 }, { "epoch": 0.16928, "grad_norm": 2.296933889389038, "learning_rate": 9.31474468419725e-06, "loss": 0.445, "step": 2116 }, { "epoch": 0.16936, "grad_norm": 1.487442970275879, "learning_rate": 9.314109071764793e-06, "loss": 0.2976, "step": 2117 }, { "epoch": 0.16944, "grad_norm": 1.581235647201538, "learning_rate": 9.313473186393087e-06, "loss": 0.3736, "step": 2118 }, { "epoch": 0.16952, "grad_norm": 1.685365080833435, "learning_rate": 9.312837028122368e-06, "loss": 0.3519, "step": 2119 }, { "epoch": 0.1696, "grad_norm": 1.5775948762893677, "learning_rate": 9.312200596992879e-06, "loss": 0.3245, "step": 2120 }, { "epoch": 0.16968, "grad_norm": 1.5085936784744263, "learning_rate": 9.311563893044888e-06, "loss": 0.2838, "step": 2121 }, { "epoch": 0.16976, "grad_norm": 1.522886037826538, "learning_rate": 9.310926916318677e-06, "loss": 0.3361, "step": 2122 }, { "epoch": 0.16984, "grad_norm": 1.5970796346664429, "learning_rate": 9.310289666854543e-06, "loss": 0.4171, "step": 2123 }, { "epoch": 0.16992, "grad_norm": 1.9791122674942017, "learning_rate": 9.309652144692804e-06, "loss": 0.4402, "step": 2124 }, { "epoch": 0.17, "grad_norm": 2.1077051162719727, "learning_rate": 9.309014349873795e-06, "loss": 0.5385, "step": 2125 }, { "epoch": 0.17008, "grad_norm": 1.4200516939163208, "learning_rate": 9.308376282437866e-06, "loss": 0.306, "step": 2126 }, { "epoch": 0.17016, "grad_norm": 1.521388053894043, "learning_rate": 9.307737942425385e-06, "loss": 0.3083, "step": 2127 }, { "epoch": 0.17024, "grad_norm": 2.1909584999084473, "learning_rate": 9.307099329876736e-06, "loss": 0.3912, "step": 2128 }, { "epoch": 0.17032, "grad_norm": 1.6918203830718994, "learning_rate": 9.306460444832327e-06, "loss": 0.3992, "step": 2129 }, { "epoch": 0.1704, "grad_norm": 1.7204002141952515, "learning_rate": 9.305821287332575e-06, "loss": 0.3076, "step": 2130 }, { "epoch": 0.17048, "grad_norm": 1.634256362915039, "learning_rate": 9.305181857417917e-06, "loss": 0.3281, "step": 2131 }, { "epoch": 0.17056, "grad_norm": 1.616892695426941, "learning_rate": 9.304542155128806e-06, "loss": 0.3026, "step": 2132 }, { "epoch": 0.17064, "grad_norm": 1.3723448514938354, "learning_rate": 9.30390218050572e-06, "loss": 0.3969, "step": 2133 }, { "epoch": 0.17072, "grad_norm": 1.7421725988388062, "learning_rate": 9.303261933589141e-06, "loss": 0.3514, "step": 2134 }, { "epoch": 0.1708, "grad_norm": 1.6025031805038452, "learning_rate": 9.302621414419577e-06, "loss": 0.3093, "step": 2135 }, { "epoch": 0.17088, "grad_norm": 1.4779335260391235, "learning_rate": 9.301980623037556e-06, "loss": 0.2883, "step": 2136 }, { "epoch": 0.17096, "grad_norm": 1.6824702024459839, "learning_rate": 9.301339559483614e-06, "loss": 0.3437, "step": 2137 }, { "epoch": 0.17104, "grad_norm": 1.7231637239456177, "learning_rate": 9.30069822379831e-06, "loss": 0.5239, "step": 2138 }, { "epoch": 0.17112, "grad_norm": 1.4851043224334717, "learning_rate": 9.30005661602222e-06, "loss": 0.3925, "step": 2139 }, { "epoch": 0.1712, "grad_norm": 1.611282229423523, "learning_rate": 9.299414736195936e-06, "loss": 0.316, "step": 2140 }, { "epoch": 0.17128, "grad_norm": 1.8075153827667236, "learning_rate": 9.298772584360068e-06, "loss": 0.4018, "step": 2141 }, { "epoch": 0.17136, "grad_norm": 1.6185144186019897, "learning_rate": 9.298130160555241e-06, "loss": 0.3886, "step": 2142 }, { "epoch": 0.17144, "grad_norm": 1.4245575666427612, "learning_rate": 9.297487464822101e-06, "loss": 0.2893, "step": 2143 }, { "epoch": 0.17152, "grad_norm": 1.7352980375289917, "learning_rate": 9.296844497201309e-06, "loss": 0.3129, "step": 2144 }, { "epoch": 0.1716, "grad_norm": 1.7901087999343872, "learning_rate": 9.296201257733542e-06, "loss": 0.4773, "step": 2145 }, { "epoch": 0.17168, "grad_norm": 1.4795407056808472, "learning_rate": 9.295557746459498e-06, "loss": 0.2931, "step": 2146 }, { "epoch": 0.17176, "grad_norm": 1.349307894706726, "learning_rate": 9.294913963419887e-06, "loss": 0.2764, "step": 2147 }, { "epoch": 0.17184, "grad_norm": 1.5399342775344849, "learning_rate": 9.29426990865544e-06, "loss": 0.3619, "step": 2148 }, { "epoch": 0.17192, "grad_norm": 1.2979024648666382, "learning_rate": 9.293625582206907e-06, "loss": 0.2828, "step": 2149 }, { "epoch": 0.172, "grad_norm": 1.999293565750122, "learning_rate": 9.292980984115048e-06, "loss": 0.4908, "step": 2150 }, { "epoch": 0.17208, "grad_norm": 1.910706877708435, "learning_rate": 9.292336114420645e-06, "loss": 0.3672, "step": 2151 }, { "epoch": 0.17216, "grad_norm": 1.7465935945510864, "learning_rate": 9.2916909731645e-06, "loss": 0.447, "step": 2152 }, { "epoch": 0.17224, "grad_norm": 1.3406716585159302, "learning_rate": 9.291045560387428e-06, "loss": 0.3121, "step": 2153 }, { "epoch": 0.17232, "grad_norm": 1.4294350147247314, "learning_rate": 9.290399876130261e-06, "loss": 0.3287, "step": 2154 }, { "epoch": 0.1724, "grad_norm": 1.1393554210662842, "learning_rate": 9.289753920433848e-06, "loss": 0.2707, "step": 2155 }, { "epoch": 0.17248, "grad_norm": 1.8590903282165527, "learning_rate": 9.28910769333906e-06, "loss": 0.4045, "step": 2156 }, { "epoch": 0.17256, "grad_norm": 1.8060451745986938, "learning_rate": 9.288461194886778e-06, "loss": 0.4324, "step": 2157 }, { "epoch": 0.17264, "grad_norm": 1.4243879318237305, "learning_rate": 9.287814425117907e-06, "loss": 0.2985, "step": 2158 }, { "epoch": 0.17272, "grad_norm": 1.8122689723968506, "learning_rate": 9.287167384073364e-06, "loss": 0.4555, "step": 2159 }, { "epoch": 0.1728, "grad_norm": 1.4533016681671143, "learning_rate": 9.286520071794085e-06, "loss": 0.2583, "step": 2160 }, { "epoch": 0.17288, "grad_norm": 1.7077553272247314, "learning_rate": 9.285872488321023e-06, "loss": 0.475, "step": 2161 }, { "epoch": 0.17296, "grad_norm": 1.8486095666885376, "learning_rate": 9.285224633695151e-06, "loss": 0.4598, "step": 2162 }, { "epoch": 0.17304, "grad_norm": 1.693713903427124, "learning_rate": 9.284576507957454e-06, "loss": 0.3301, "step": 2163 }, { "epoch": 0.17312, "grad_norm": 1.5107775926589966, "learning_rate": 9.283928111148937e-06, "loss": 0.3226, "step": 2164 }, { "epoch": 0.1732, "grad_norm": 1.395003318786621, "learning_rate": 9.283279443310623e-06, "loss": 0.3354, "step": 2165 }, { "epoch": 0.17328, "grad_norm": 1.4116548299789429, "learning_rate": 9.28263050448355e-06, "loss": 0.2977, "step": 2166 }, { "epoch": 0.17336, "grad_norm": 1.5333601236343384, "learning_rate": 9.281981294708775e-06, "loss": 0.4171, "step": 2167 }, { "epoch": 0.17344, "grad_norm": 1.510820746421814, "learning_rate": 9.281331814027372e-06, "loss": 0.3506, "step": 2168 }, { "epoch": 0.17352, "grad_norm": 1.3123133182525635, "learning_rate": 9.28068206248043e-06, "loss": 0.2977, "step": 2169 }, { "epoch": 0.1736, "grad_norm": 1.606671929359436, "learning_rate": 9.280032040109057e-06, "loss": 0.3311, "step": 2170 }, { "epoch": 0.17368, "grad_norm": 1.8891907930374146, "learning_rate": 9.279381746954378e-06, "loss": 0.5067, "step": 2171 }, { "epoch": 0.17376, "grad_norm": 1.3156824111938477, "learning_rate": 9.278731183057533e-06, "loss": 0.3077, "step": 2172 }, { "epoch": 0.17384, "grad_norm": 1.8756389617919922, "learning_rate": 9.278080348459684e-06, "loss": 0.3977, "step": 2173 }, { "epoch": 0.17392, "grad_norm": 1.0538018941879272, "learning_rate": 9.277429243202007e-06, "loss": 0.244, "step": 2174 }, { "epoch": 0.174, "grad_norm": 1.7108577489852905, "learning_rate": 9.276777867325693e-06, "loss": 0.4015, "step": 2175 }, { "epoch": 0.17408, "grad_norm": 1.6555908918380737, "learning_rate": 9.276126220871952e-06, "loss": 0.4131, "step": 2176 }, { "epoch": 0.17416, "grad_norm": 1.5826455354690552, "learning_rate": 9.275474303882016e-06, "loss": 0.3234, "step": 2177 }, { "epoch": 0.17424, "grad_norm": 1.338919758796692, "learning_rate": 9.274822116397124e-06, "loss": 0.3157, "step": 2178 }, { "epoch": 0.17432, "grad_norm": 1.8238811492919922, "learning_rate": 9.274169658458543e-06, "loss": 0.3202, "step": 2179 }, { "epoch": 0.1744, "grad_norm": 1.4607664346694946, "learning_rate": 9.273516930107547e-06, "loss": 0.3517, "step": 2180 }, { "epoch": 0.17448, "grad_norm": 1.8293615579605103, "learning_rate": 9.272863931385434e-06, "loss": 0.3754, "step": 2181 }, { "epoch": 0.17456, "grad_norm": 2.1700384616851807, "learning_rate": 9.272210662333518e-06, "loss": 0.5072, "step": 2182 }, { "epoch": 0.17464, "grad_norm": 1.8814268112182617, "learning_rate": 9.27155712299313e-06, "loss": 0.366, "step": 2183 }, { "epoch": 0.17472, "grad_norm": 1.32944655418396, "learning_rate": 9.270903313405612e-06, "loss": 0.287, "step": 2184 }, { "epoch": 0.1748, "grad_norm": 1.4097766876220703, "learning_rate": 9.270249233612334e-06, "loss": 0.3646, "step": 2185 }, { "epoch": 0.17488, "grad_norm": 2.0853865146636963, "learning_rate": 9.269594883654673e-06, "loss": 0.383, "step": 2186 }, { "epoch": 0.17496, "grad_norm": 2.0160350799560547, "learning_rate": 9.268940263574032e-06, "loss": 0.5086, "step": 2187 }, { "epoch": 0.17504, "grad_norm": 1.8854643106460571, "learning_rate": 9.268285373411823e-06, "loss": 0.4704, "step": 2188 }, { "epoch": 0.17512, "grad_norm": 1.7649178504943848, "learning_rate": 9.267630213209482e-06, "loss": 0.3193, "step": 2189 }, { "epoch": 0.1752, "grad_norm": 1.8833215236663818, "learning_rate": 9.266974783008456e-06, "loss": 0.4477, "step": 2190 }, { "epoch": 0.17528, "grad_norm": 1.488447666168213, "learning_rate": 9.266319082850212e-06, "loss": 0.3464, "step": 2191 }, { "epoch": 0.17536, "grad_norm": 1.2886351346969604, "learning_rate": 9.265663112776237e-06, "loss": 0.3003, "step": 2192 }, { "epoch": 0.17544, "grad_norm": 2.210524320602417, "learning_rate": 9.265006872828028e-06, "loss": 0.4336, "step": 2193 }, { "epoch": 0.17552, "grad_norm": 1.7764190435409546, "learning_rate": 9.264350363047105e-06, "loss": 0.3219, "step": 2194 }, { "epoch": 0.1756, "grad_norm": 1.7255445718765259, "learning_rate": 9.263693583475003e-06, "loss": 0.4066, "step": 2195 }, { "epoch": 0.17568, "grad_norm": 1.7819242477416992, "learning_rate": 9.263036534153276e-06, "loss": 0.3992, "step": 2196 }, { "epoch": 0.17576, "grad_norm": 1.5859037637710571, "learning_rate": 9.262379215123489e-06, "loss": 0.4164, "step": 2197 }, { "epoch": 0.17584, "grad_norm": 1.5518395900726318, "learning_rate": 9.261721626427233e-06, "loss": 0.2977, "step": 2198 }, { "epoch": 0.17592, "grad_norm": 1.689637541770935, "learning_rate": 9.26106376810611e-06, "loss": 0.4156, "step": 2199 }, { "epoch": 0.176, "grad_norm": 1.8921533823013306, "learning_rate": 9.260405640201737e-06, "loss": 0.4744, "step": 2200 }, { "epoch": 0.17608, "grad_norm": 1.4645209312438965, "learning_rate": 9.259747242755757e-06, "loss": 0.3006, "step": 2201 }, { "epoch": 0.17616, "grad_norm": 1.5621235370635986, "learning_rate": 9.25908857580982e-06, "loss": 0.3133, "step": 2202 }, { "epoch": 0.17624, "grad_norm": 1.5191184282302856, "learning_rate": 9.258429639405602e-06, "loss": 0.3818, "step": 2203 }, { "epoch": 0.17632, "grad_norm": 1.3126143217086792, "learning_rate": 9.25777043358479e-06, "loss": 0.2928, "step": 2204 }, { "epoch": 0.1764, "grad_norm": 1.6664056777954102, "learning_rate": 9.257110958389088e-06, "loss": 0.3457, "step": 2205 }, { "epoch": 0.17648, "grad_norm": 1.7404066324234009, "learning_rate": 9.25645121386022e-06, "loss": 0.5175, "step": 2206 }, { "epoch": 0.17656, "grad_norm": 1.5736061334609985, "learning_rate": 9.255791200039925e-06, "loss": 0.3344, "step": 2207 }, { "epoch": 0.17664, "grad_norm": 1.7708841562271118, "learning_rate": 9.255130916969962e-06, "loss": 0.3816, "step": 2208 }, { "epoch": 0.17672, "grad_norm": 1.5940806865692139, "learning_rate": 9.254470364692103e-06, "loss": 0.2876, "step": 2209 }, { "epoch": 0.1768, "grad_norm": 1.2727887630462646, "learning_rate": 9.253809543248139e-06, "loss": 0.3499, "step": 2210 }, { "epoch": 0.17688, "grad_norm": 1.7963303327560425, "learning_rate": 9.253148452679878e-06, "loss": 0.4021, "step": 2211 }, { "epoch": 0.17696, "grad_norm": 1.2776786088943481, "learning_rate": 9.252487093029149e-06, "loss": 0.2827, "step": 2212 }, { "epoch": 0.17704, "grad_norm": 1.625144600868225, "learning_rate": 9.251825464337785e-06, "loss": 0.3478, "step": 2213 }, { "epoch": 0.17712, "grad_norm": 1.4690876007080078, "learning_rate": 9.251163566647655e-06, "loss": 0.3588, "step": 2214 }, { "epoch": 0.1772, "grad_norm": 1.4391189813613892, "learning_rate": 9.250501400000628e-06, "loss": 0.3249, "step": 2215 }, { "epoch": 0.17728, "grad_norm": 1.5366137027740479, "learning_rate": 9.249838964438602e-06, "loss": 0.35, "step": 2216 }, { "epoch": 0.17736, "grad_norm": 1.5445330142974854, "learning_rate": 9.249176260003482e-06, "loss": 0.3531, "step": 2217 }, { "epoch": 0.17744, "grad_norm": 1.41277277469635, "learning_rate": 9.248513286737199e-06, "loss": 0.3252, "step": 2218 }, { "epoch": 0.17752, "grad_norm": 1.4927526712417603, "learning_rate": 9.247850044681698e-06, "loss": 0.3272, "step": 2219 }, { "epoch": 0.1776, "grad_norm": 1.9397213459014893, "learning_rate": 9.247186533878936e-06, "loss": 0.4147, "step": 2220 }, { "epoch": 0.17768, "grad_norm": 1.734050989151001, "learning_rate": 9.246522754370893e-06, "loss": 0.3629, "step": 2221 }, { "epoch": 0.17776, "grad_norm": 1.57426118850708, "learning_rate": 9.245858706199565e-06, "loss": 0.3241, "step": 2222 }, { "epoch": 0.17784, "grad_norm": 1.3740696907043457, "learning_rate": 9.245194389406961e-06, "loss": 0.2866, "step": 2223 }, { "epoch": 0.17792, "grad_norm": 1.3346598148345947, "learning_rate": 9.244529804035116e-06, "loss": 0.2516, "step": 2224 }, { "epoch": 0.178, "grad_norm": 1.5931025743484497, "learning_rate": 9.24386495012607e-06, "loss": 0.4599, "step": 2225 }, { "epoch": 0.17808, "grad_norm": 1.608982801437378, "learning_rate": 9.24319982772189e-06, "loss": 0.3688, "step": 2226 }, { "epoch": 0.17816, "grad_norm": 1.6699520349502563, "learning_rate": 9.242534436864654e-06, "loss": 0.3289, "step": 2227 }, { "epoch": 0.17824, "grad_norm": 1.524670958518982, "learning_rate": 9.24186877759646e-06, "loss": 0.3446, "step": 2228 }, { "epoch": 0.17832, "grad_norm": 2.181579113006592, "learning_rate": 9.241202849959422e-06, "loss": 0.4242, "step": 2229 }, { "epoch": 0.1784, "grad_norm": 1.4129284620285034, "learning_rate": 9.240536653995671e-06, "loss": 0.3339, "step": 2230 }, { "epoch": 0.17848, "grad_norm": 1.5664548873901367, "learning_rate": 9.239870189747355e-06, "loss": 0.3472, "step": 2231 }, { "epoch": 0.17856, "grad_norm": 2.093280076980591, "learning_rate": 9.239203457256636e-06, "loss": 0.4444, "step": 2232 }, { "epoch": 0.17864, "grad_norm": 1.6080608367919922, "learning_rate": 9.238536456565702e-06, "loss": 0.4602, "step": 2233 }, { "epoch": 0.17872, "grad_norm": 1.671555519104004, "learning_rate": 9.237869187716747e-06, "loss": 0.4864, "step": 2234 }, { "epoch": 0.1788, "grad_norm": 1.5570496320724487, "learning_rate": 9.237201650751987e-06, "loss": 0.4289, "step": 2235 }, { "epoch": 0.17888, "grad_norm": 1.9579259157180786, "learning_rate": 9.23653384571366e-06, "loss": 0.373, "step": 2236 }, { "epoch": 0.17896, "grad_norm": 1.2836438417434692, "learning_rate": 9.23586577264401e-06, "loss": 0.2913, "step": 2237 }, { "epoch": 0.17904, "grad_norm": 1.651989221572876, "learning_rate": 9.235197431585305e-06, "loss": 0.3415, "step": 2238 }, { "epoch": 0.17912, "grad_norm": 1.5639806985855103, "learning_rate": 9.23452882257983e-06, "loss": 0.4252, "step": 2239 }, { "epoch": 0.1792, "grad_norm": 1.5605380535125732, "learning_rate": 9.233859945669888e-06, "loss": 0.303, "step": 2240 }, { "epoch": 0.17928, "grad_norm": 1.6003406047821045, "learning_rate": 9.23319080089779e-06, "loss": 0.3035, "step": 2241 }, { "epoch": 0.17936, "grad_norm": 1.3260300159454346, "learning_rate": 9.232521388305876e-06, "loss": 0.2764, "step": 2242 }, { "epoch": 0.17944, "grad_norm": 1.5889723300933838, "learning_rate": 9.231851707936495e-06, "loss": 0.3757, "step": 2243 }, { "epoch": 0.17952, "grad_norm": 1.5519212484359741, "learning_rate": 9.231181759832017e-06, "loss": 0.3367, "step": 2244 }, { "epoch": 0.1796, "grad_norm": 1.8289942741394043, "learning_rate": 9.230511544034826e-06, "loss": 0.3846, "step": 2245 }, { "epoch": 0.17968, "grad_norm": 1.6226454973220825, "learning_rate": 9.229841060587326e-06, "loss": 0.3169, "step": 2246 }, { "epoch": 0.17976, "grad_norm": 1.7149393558502197, "learning_rate": 9.229170309531934e-06, "loss": 0.3524, "step": 2247 }, { "epoch": 0.17984, "grad_norm": 1.6193126440048218, "learning_rate": 9.228499290911088e-06, "loss": 0.3369, "step": 2248 }, { "epoch": 0.17992, "grad_norm": 2.0542750358581543, "learning_rate": 9.22782800476724e-06, "loss": 0.3477, "step": 2249 }, { "epoch": 0.18, "grad_norm": 1.3201754093170166, "learning_rate": 9.227156451142863e-06, "loss": 0.3066, "step": 2250 }, { "epoch": 0.18008, "grad_norm": 1.7556428909301758, "learning_rate": 9.226484630080439e-06, "loss": 0.3791, "step": 2251 }, { "epoch": 0.18016, "grad_norm": 1.8350175619125366, "learning_rate": 9.225812541622474e-06, "loss": 0.3668, "step": 2252 }, { "epoch": 0.18024, "grad_norm": 1.2991888523101807, "learning_rate": 9.22514018581149e-06, "loss": 0.2949, "step": 2253 }, { "epoch": 0.18032, "grad_norm": 1.7130646705627441, "learning_rate": 9.224467562690022e-06, "loss": 0.381, "step": 2254 }, { "epoch": 0.1804, "grad_norm": 1.361568570137024, "learning_rate": 9.22379467230063e-06, "loss": 0.2958, "step": 2255 }, { "epoch": 0.18048, "grad_norm": 1.8371490240097046, "learning_rate": 9.22312151468588e-06, "loss": 0.3656, "step": 2256 }, { "epoch": 0.18056, "grad_norm": 1.5410816669464111, "learning_rate": 9.22244808988836e-06, "loss": 0.3638, "step": 2257 }, { "epoch": 0.18064, "grad_norm": 1.7891284227371216, "learning_rate": 9.22177439795068e-06, "loss": 0.4046, "step": 2258 }, { "epoch": 0.18072, "grad_norm": 1.0117267370224, "learning_rate": 9.221100438915462e-06, "loss": 0.2083, "step": 2259 }, { "epoch": 0.1808, "grad_norm": 1.6379694938659668, "learning_rate": 9.22042621282534e-06, "loss": 0.3407, "step": 2260 }, { "epoch": 0.18088, "grad_norm": 1.8293073177337646, "learning_rate": 9.219751719722974e-06, "loss": 0.3578, "step": 2261 }, { "epoch": 0.18096, "grad_norm": 1.6874363422393799, "learning_rate": 9.219076959651037e-06, "loss": 0.438, "step": 2262 }, { "epoch": 0.18104, "grad_norm": 1.3425878286361694, "learning_rate": 9.218401932652217e-06, "loss": 0.2945, "step": 2263 }, { "epoch": 0.18112, "grad_norm": 1.3295488357543945, "learning_rate": 9.21772663876922e-06, "loss": 0.3008, "step": 2264 }, { "epoch": 0.1812, "grad_norm": 1.479942798614502, "learning_rate": 9.217051078044773e-06, "loss": 0.3512, "step": 2265 }, { "epoch": 0.18128, "grad_norm": 1.6754070520401, "learning_rate": 9.216375250521614e-06, "loss": 0.3106, "step": 2266 }, { "epoch": 0.18136, "grad_norm": 1.2535821199417114, "learning_rate": 9.215699156242501e-06, "loss": 0.298, "step": 2267 }, { "epoch": 0.18144, "grad_norm": 1.4116896390914917, "learning_rate": 9.215022795250209e-06, "loss": 0.3316, "step": 2268 }, { "epoch": 0.18152, "grad_norm": 1.704809546470642, "learning_rate": 9.214346167587529e-06, "loss": 0.4056, "step": 2269 }, { "epoch": 0.1816, "grad_norm": 1.3820918798446655, "learning_rate": 9.213669273297266e-06, "loss": 0.3467, "step": 2270 }, { "epoch": 0.18168, "grad_norm": 1.1921701431274414, "learning_rate": 9.212992112422248e-06, "loss": 0.277, "step": 2271 }, { "epoch": 0.18176, "grad_norm": 1.5377929210662842, "learning_rate": 9.212314685005314e-06, "loss": 0.3292, "step": 2272 }, { "epoch": 0.18184, "grad_norm": 1.6128257513046265, "learning_rate": 9.211636991089328e-06, "loss": 0.3684, "step": 2273 }, { "epoch": 0.18192, "grad_norm": 2.1811368465423584, "learning_rate": 9.210959030717158e-06, "loss": 0.4364, "step": 2274 }, { "epoch": 0.182, "grad_norm": 1.9046738147735596, "learning_rate": 9.210280803931702e-06, "loss": 0.3347, "step": 2275 }, { "epoch": 0.18208, "grad_norm": 1.7492754459381104, "learning_rate": 9.209602310775868e-06, "loss": 0.3851, "step": 2276 }, { "epoch": 0.18216, "grad_norm": 1.6498044729232788, "learning_rate": 9.208923551292578e-06, "loss": 0.3078, "step": 2277 }, { "epoch": 0.18224, "grad_norm": 1.6144391298294067, "learning_rate": 9.208244525524782e-06, "loss": 0.4138, "step": 2278 }, { "epoch": 0.18232, "grad_norm": 1.871065616607666, "learning_rate": 9.207565233515434e-06, "loss": 0.4951, "step": 2279 }, { "epoch": 0.1824, "grad_norm": 1.9937913417816162, "learning_rate": 9.20688567530751e-06, "loss": 0.4494, "step": 2280 }, { "epoch": 0.18248, "grad_norm": 1.3968439102172852, "learning_rate": 9.206205850944009e-06, "loss": 0.2798, "step": 2281 }, { "epoch": 0.18256, "grad_norm": 1.554082989692688, "learning_rate": 9.205525760467937e-06, "loss": 0.3656, "step": 2282 }, { "epoch": 0.18264, "grad_norm": 2.032961130142212, "learning_rate": 9.204845403922321e-06, "loss": 0.4478, "step": 2283 }, { "epoch": 0.18272, "grad_norm": 1.621554970741272, "learning_rate": 9.204164781350207e-06, "loss": 0.3502, "step": 2284 }, { "epoch": 0.1828, "grad_norm": 1.4644954204559326, "learning_rate": 9.203483892794652e-06, "loss": 0.3379, "step": 2285 }, { "epoch": 0.18288, "grad_norm": 1.294154167175293, "learning_rate": 9.202802738298738e-06, "loss": 0.2404, "step": 2286 }, { "epoch": 0.18296, "grad_norm": 2.0851991176605225, "learning_rate": 9.202121317905557e-06, "loss": 0.3982, "step": 2287 }, { "epoch": 0.18304, "grad_norm": 1.1855297088623047, "learning_rate": 9.20143963165822e-06, "loss": 0.2492, "step": 2288 }, { "epoch": 0.18312, "grad_norm": 1.5839331150054932, "learning_rate": 9.200757679599857e-06, "loss": 0.2824, "step": 2289 }, { "epoch": 0.1832, "grad_norm": 1.5208096504211426, "learning_rate": 9.20007546177361e-06, "loss": 0.3692, "step": 2290 }, { "epoch": 0.18328, "grad_norm": 2.2178423404693604, "learning_rate": 9.199392978222644e-06, "loss": 0.436, "step": 2291 }, { "epoch": 0.18336, "grad_norm": 1.7429447174072266, "learning_rate": 9.198710228990132e-06, "loss": 0.4171, "step": 2292 }, { "epoch": 0.18344, "grad_norm": 1.6876320838928223, "learning_rate": 9.198027214119275e-06, "loss": 0.4079, "step": 2293 }, { "epoch": 0.18352, "grad_norm": 2.200977087020874, "learning_rate": 9.197343933653283e-06, "loss": 0.4152, "step": 2294 }, { "epoch": 0.1836, "grad_norm": 1.5130945444107056, "learning_rate": 9.196660387635384e-06, "loss": 0.2694, "step": 2295 }, { "epoch": 0.18368, "grad_norm": 1.4931762218475342, "learning_rate": 9.195976576108825e-06, "loss": 0.4342, "step": 2296 }, { "epoch": 0.18376, "grad_norm": 1.4749504327774048, "learning_rate": 9.195292499116868e-06, "loss": 0.3459, "step": 2297 }, { "epoch": 0.18384, "grad_norm": 1.4674580097198486, "learning_rate": 9.19460815670279e-06, "loss": 0.3511, "step": 2298 }, { "epoch": 0.18392, "grad_norm": 1.4509178400039673, "learning_rate": 9.193923548909891e-06, "loss": 0.279, "step": 2299 }, { "epoch": 0.184, "grad_norm": 1.370303988456726, "learning_rate": 9.193238675781482e-06, "loss": 0.2597, "step": 2300 }, { "epoch": 0.18408, "grad_norm": 1.7368767261505127, "learning_rate": 9.19255353736089e-06, "loss": 0.387, "step": 2301 }, { "epoch": 0.18416, "grad_norm": 1.8166148662567139, "learning_rate": 9.191868133691467e-06, "loss": 0.3739, "step": 2302 }, { "epoch": 0.18424, "grad_norm": 1.7067928314208984, "learning_rate": 9.191182464816572e-06, "loss": 0.3617, "step": 2303 }, { "epoch": 0.18432, "grad_norm": 1.0539000034332275, "learning_rate": 9.190496530779587e-06, "loss": 0.2726, "step": 2304 }, { "epoch": 0.1844, "grad_norm": 1.555287480354309, "learning_rate": 9.189810331623908e-06, "loss": 0.332, "step": 2305 }, { "epoch": 0.18448, "grad_norm": 1.7620913982391357, "learning_rate": 9.189123867392947e-06, "loss": 0.4206, "step": 2306 }, { "epoch": 0.18456, "grad_norm": 1.1607882976531982, "learning_rate": 9.188437138130138e-06, "loss": 0.242, "step": 2307 }, { "epoch": 0.18464, "grad_norm": 1.4528453350067139, "learning_rate": 9.187750143878924e-06, "loss": 0.2846, "step": 2308 }, { "epoch": 0.18472, "grad_norm": 1.9288702011108398, "learning_rate": 9.187062884682772e-06, "loss": 0.4231, "step": 2309 }, { "epoch": 0.1848, "grad_norm": 1.4445855617523193, "learning_rate": 9.186375360585159e-06, "loss": 0.3683, "step": 2310 }, { "epoch": 0.18488, "grad_norm": 1.6469016075134277, "learning_rate": 9.185687571629587e-06, "loss": 0.4188, "step": 2311 }, { "epoch": 0.18496, "grad_norm": 1.7436647415161133, "learning_rate": 9.184999517859566e-06, "loss": 0.3646, "step": 2312 }, { "epoch": 0.18504, "grad_norm": 2.044245719909668, "learning_rate": 9.18431119931863e-06, "loss": 0.4894, "step": 2313 }, { "epoch": 0.18512, "grad_norm": 1.396704077720642, "learning_rate": 9.183622616050323e-06, "loss": 0.3058, "step": 2314 }, { "epoch": 0.1852, "grad_norm": 1.5335050821304321, "learning_rate": 9.182933768098213e-06, "loss": 0.3617, "step": 2315 }, { "epoch": 0.18528, "grad_norm": 1.3849726915359497, "learning_rate": 9.18224465550588e-06, "loss": 0.2886, "step": 2316 }, { "epoch": 0.18536, "grad_norm": 1.3755298852920532, "learning_rate": 9.181555278316921e-06, "loss": 0.2749, "step": 2317 }, { "epoch": 0.18544, "grad_norm": 1.4264538288116455, "learning_rate": 9.180865636574951e-06, "loss": 0.369, "step": 2318 }, { "epoch": 0.18552, "grad_norm": 1.5905702114105225, "learning_rate": 9.180175730323602e-06, "loss": 0.3422, "step": 2319 }, { "epoch": 0.1856, "grad_norm": 2.0537757873535156, "learning_rate": 9.17948555960652e-06, "loss": 0.3696, "step": 2320 }, { "epoch": 0.18568, "grad_norm": 1.9900577068328857, "learning_rate": 9.178795124467372e-06, "loss": 0.5154, "step": 2321 }, { "epoch": 0.18576, "grad_norm": 1.8288344144821167, "learning_rate": 9.17810442494984e-06, "loss": 0.3817, "step": 2322 }, { "epoch": 0.18584, "grad_norm": 2.499269723892212, "learning_rate": 9.17741346109762e-06, "loss": 0.4914, "step": 2323 }, { "epoch": 0.18592, "grad_norm": 1.8195888996124268, "learning_rate": 9.176722232954426e-06, "loss": 0.3946, "step": 2324 }, { "epoch": 0.186, "grad_norm": 2.1742544174194336, "learning_rate": 9.176030740563994e-06, "loss": 0.465, "step": 2325 }, { "epoch": 0.18608, "grad_norm": 2.0490312576293945, "learning_rate": 9.175338983970071e-06, "loss": 0.3448, "step": 2326 }, { "epoch": 0.18616, "grad_norm": 1.3807387351989746, "learning_rate": 9.17464696321642e-06, "loss": 0.3351, "step": 2327 }, { "epoch": 0.18624, "grad_norm": 1.89095938205719, "learning_rate": 9.173954678346823e-06, "loss": 0.336, "step": 2328 }, { "epoch": 0.18632, "grad_norm": 1.5240287780761719, "learning_rate": 9.173262129405081e-06, "loss": 0.3218, "step": 2329 }, { "epoch": 0.1864, "grad_norm": 1.6663240194320679, "learning_rate": 9.172569316435008e-06, "loss": 0.3771, "step": 2330 }, { "epoch": 0.18648, "grad_norm": 1.4801119565963745, "learning_rate": 9.171876239480435e-06, "loss": 0.3371, "step": 2331 }, { "epoch": 0.18656, "grad_norm": 1.4088448286056519, "learning_rate": 9.17118289858521e-06, "loss": 0.351, "step": 2332 }, { "epoch": 0.18664, "grad_norm": 1.6157206296920776, "learning_rate": 9.170489293793203e-06, "loss": 0.3401, "step": 2333 }, { "epoch": 0.18672, "grad_norm": 1.7214024066925049, "learning_rate": 9.16979542514829e-06, "loss": 0.3182, "step": 2334 }, { "epoch": 0.1868, "grad_norm": 1.4012519121170044, "learning_rate": 9.169101292694376e-06, "loss": 0.3879, "step": 2335 }, { "epoch": 0.18688, "grad_norm": 1.5938009023666382, "learning_rate": 9.168406896475372e-06, "loss": 0.3759, "step": 2336 }, { "epoch": 0.18696, "grad_norm": 1.4147008657455444, "learning_rate": 9.167712236535209e-06, "loss": 0.2956, "step": 2337 }, { "epoch": 0.18704, "grad_norm": 1.8677122592926025, "learning_rate": 9.16701731291784e-06, "loss": 0.4634, "step": 2338 }, { "epoch": 0.18712, "grad_norm": 1.4628612995147705, "learning_rate": 9.166322125667229e-06, "loss": 0.2652, "step": 2339 }, { "epoch": 0.1872, "grad_norm": 1.6055920124053955, "learning_rate": 9.165626674827355e-06, "loss": 0.4288, "step": 2340 }, { "epoch": 0.18728, "grad_norm": 1.2751909494400024, "learning_rate": 9.164930960442222e-06, "loss": 0.249, "step": 2341 }, { "epoch": 0.18736, "grad_norm": 1.565497875213623, "learning_rate": 9.164234982555841e-06, "loss": 0.3797, "step": 2342 }, { "epoch": 0.18744, "grad_norm": 1.4904969930648804, "learning_rate": 9.163538741212247e-06, "loss": 0.3217, "step": 2343 }, { "epoch": 0.18752, "grad_norm": 1.5334428548812866, "learning_rate": 9.16284223645549e-06, "loss": 0.3089, "step": 2344 }, { "epoch": 0.1876, "grad_norm": 1.3491618633270264, "learning_rate": 9.16214546832963e-06, "loss": 0.3263, "step": 2345 }, { "epoch": 0.18768, "grad_norm": 1.757620930671692, "learning_rate": 9.161448436878755e-06, "loss": 0.3392, "step": 2346 }, { "epoch": 0.18776, "grad_norm": 1.591558814048767, "learning_rate": 9.160751142146962e-06, "loss": 0.3605, "step": 2347 }, { "epoch": 0.18784, "grad_norm": 1.4992103576660156, "learning_rate": 9.160053584178365e-06, "loss": 0.3162, "step": 2348 }, { "epoch": 0.18792, "grad_norm": 1.847265601158142, "learning_rate": 9.159355763017099e-06, "loss": 0.4105, "step": 2349 }, { "epoch": 0.188, "grad_norm": 1.6127371788024902, "learning_rate": 9.15865767870731e-06, "loss": 0.345, "step": 2350 }, { "epoch": 0.18808, "grad_norm": 1.6684445142745972, "learning_rate": 9.157959331293165e-06, "loss": 0.3925, "step": 2351 }, { "epoch": 0.18816, "grad_norm": 1.3808155059814453, "learning_rate": 9.157260720818843e-06, "loss": 0.3334, "step": 2352 }, { "epoch": 0.18824, "grad_norm": 1.176571249961853, "learning_rate": 9.15656184732855e-06, "loss": 0.2693, "step": 2353 }, { "epoch": 0.18832, "grad_norm": 1.5287673473358154, "learning_rate": 9.155862710866493e-06, "loss": 0.3012, "step": 2354 }, { "epoch": 0.1884, "grad_norm": 2.0192978382110596, "learning_rate": 9.15516331147691e-06, "loss": 0.5501, "step": 2355 }, { "epoch": 0.18848, "grad_norm": 1.7251996994018555, "learning_rate": 9.154463649204046e-06, "loss": 0.4322, "step": 2356 }, { "epoch": 0.18856, "grad_norm": 1.4777201414108276, "learning_rate": 9.153763724092169e-06, "loss": 0.3243, "step": 2357 }, { "epoch": 0.18864, "grad_norm": 1.573727011680603, "learning_rate": 9.15306353618556e-06, "loss": 0.3615, "step": 2358 }, { "epoch": 0.18872, "grad_norm": 1.976193904876709, "learning_rate": 9.152363085528516e-06, "loss": 0.3525, "step": 2359 }, { "epoch": 0.1888, "grad_norm": 1.72635817527771, "learning_rate": 9.151662372165354e-06, "loss": 0.357, "step": 2360 }, { "epoch": 0.18888, "grad_norm": 1.5015325546264648, "learning_rate": 9.150961396140405e-06, "loss": 0.3098, "step": 2361 }, { "epoch": 0.18896, "grad_norm": 1.459358811378479, "learning_rate": 9.15026015749802e-06, "loss": 0.3554, "step": 2362 }, { "epoch": 0.18904, "grad_norm": 1.7636947631835938, "learning_rate": 9.149558656282557e-06, "loss": 0.4678, "step": 2363 }, { "epoch": 0.18912, "grad_norm": 1.615004539489746, "learning_rate": 9.148856892538406e-06, "loss": 0.3788, "step": 2364 }, { "epoch": 0.1892, "grad_norm": 1.5762476921081543, "learning_rate": 9.14815486630996e-06, "loss": 0.3102, "step": 2365 }, { "epoch": 0.18928, "grad_norm": 1.6073107719421387, "learning_rate": 9.147452577641635e-06, "loss": 0.3938, "step": 2366 }, { "epoch": 0.18936, "grad_norm": 1.3423787355422974, "learning_rate": 9.146750026577865e-06, "loss": 0.2977, "step": 2367 }, { "epoch": 0.18944, "grad_norm": 1.3909164667129517, "learning_rate": 9.146047213163094e-06, "loss": 0.256, "step": 2368 }, { "epoch": 0.18952, "grad_norm": 1.6559034585952759, "learning_rate": 9.14534413744179e-06, "loss": 0.3731, "step": 2369 }, { "epoch": 0.1896, "grad_norm": 1.2788715362548828, "learning_rate": 9.14464079945843e-06, "loss": 0.2787, "step": 2370 }, { "epoch": 0.18968, "grad_norm": 1.9036865234375, "learning_rate": 9.143937199257518e-06, "loss": 0.3465, "step": 2371 }, { "epoch": 0.18976, "grad_norm": 1.7238149642944336, "learning_rate": 9.143233336883563e-06, "loss": 0.3865, "step": 2372 }, { "epoch": 0.18984, "grad_norm": 1.2368091344833374, "learning_rate": 9.142529212381098e-06, "loss": 0.2267, "step": 2373 }, { "epoch": 0.18992, "grad_norm": 1.7011198997497559, "learning_rate": 9.141824825794672e-06, "loss": 0.3792, "step": 2374 }, { "epoch": 0.19, "grad_norm": 1.9078706502914429, "learning_rate": 9.141120177168846e-06, "loss": 0.3637, "step": 2375 }, { "epoch": 0.19008, "grad_norm": 2.0864484310150146, "learning_rate": 9.140415266548203e-06, "loss": 0.4524, "step": 2376 }, { "epoch": 0.19016, "grad_norm": 1.4254752397537231, "learning_rate": 9.139710093977342e-06, "loss": 0.2662, "step": 2377 }, { "epoch": 0.19024, "grad_norm": 1.6436759233474731, "learning_rate": 9.139004659500874e-06, "loss": 0.3361, "step": 2378 }, { "epoch": 0.19032, "grad_norm": 2.105926752090454, "learning_rate": 9.138298963163429e-06, "loss": 0.3339, "step": 2379 }, { "epoch": 0.1904, "grad_norm": 1.5537519454956055, "learning_rate": 9.137593005009657e-06, "loss": 0.3319, "step": 2380 }, { "epoch": 0.19048, "grad_norm": 2.272524833679199, "learning_rate": 9.136886785084217e-06, "loss": 0.4195, "step": 2381 }, { "epoch": 0.19056, "grad_norm": 1.2556356191635132, "learning_rate": 9.136180303431797e-06, "loss": 0.3392, "step": 2382 }, { "epoch": 0.19064, "grad_norm": 1.5762921571731567, "learning_rate": 9.135473560097086e-06, "loss": 0.3597, "step": 2383 }, { "epoch": 0.19072, "grad_norm": 1.3007557392120361, "learning_rate": 9.1347665551248e-06, "loss": 0.2813, "step": 2384 }, { "epoch": 0.1908, "grad_norm": 1.295035719871521, "learning_rate": 9.134059288559669e-06, "loss": 0.3211, "step": 2385 }, { "epoch": 0.19088, "grad_norm": 1.5391051769256592, "learning_rate": 9.13335176044644e-06, "loss": 0.3058, "step": 2386 }, { "epoch": 0.19096, "grad_norm": 1.6031337976455688, "learning_rate": 9.132643970829876e-06, "loss": 0.3478, "step": 2387 }, { "epoch": 0.19104, "grad_norm": 1.572896122932434, "learning_rate": 9.131935919754755e-06, "loss": 0.4024, "step": 2388 }, { "epoch": 0.19112, "grad_norm": 1.4550867080688477, "learning_rate": 9.131227607265874e-06, "loss": 0.2725, "step": 2389 }, { "epoch": 0.1912, "grad_norm": 2.0746469497680664, "learning_rate": 9.130519033408045e-06, "loss": 0.4477, "step": 2390 }, { "epoch": 0.19128, "grad_norm": 1.5469022989273071, "learning_rate": 9.129810198226099e-06, "loss": 0.3219, "step": 2391 }, { "epoch": 0.19136, "grad_norm": 1.4501341581344604, "learning_rate": 9.12910110176488e-06, "loss": 0.4573, "step": 2392 }, { "epoch": 0.19144, "grad_norm": 1.1760106086730957, "learning_rate": 9.128391744069248e-06, "loss": 0.2149, "step": 2393 }, { "epoch": 0.19152, "grad_norm": 1.8944004774093628, "learning_rate": 9.127682125184085e-06, "loss": 0.4243, "step": 2394 }, { "epoch": 0.1916, "grad_norm": 1.382332682609558, "learning_rate": 9.126972245154287e-06, "loss": 0.2722, "step": 2395 }, { "epoch": 0.19168, "grad_norm": 1.5882385969161987, "learning_rate": 9.12626210402476e-06, "loss": 0.4348, "step": 2396 }, { "epoch": 0.19176, "grad_norm": 1.7748380899429321, "learning_rate": 9.12555170184044e-06, "loss": 0.3083, "step": 2397 }, { "epoch": 0.19184, "grad_norm": 1.7226957082748413, "learning_rate": 9.124841038646268e-06, "loss": 0.3241, "step": 2398 }, { "epoch": 0.19192, "grad_norm": 1.642120599746704, "learning_rate": 9.124130114487203e-06, "loss": 0.3724, "step": 2399 }, { "epoch": 0.192, "grad_norm": 1.916398048400879, "learning_rate": 9.123418929408225e-06, "loss": 0.4925, "step": 2400 }, { "epoch": 0.19208, "grad_norm": 1.2868858575820923, "learning_rate": 9.12270748345433e-06, "loss": 0.3048, "step": 2401 }, { "epoch": 0.19216, "grad_norm": 1.6964247226715088, "learning_rate": 9.121995776670527e-06, "loss": 0.3746, "step": 2402 }, { "epoch": 0.19224, "grad_norm": 1.710965871810913, "learning_rate": 9.121283809101843e-06, "loss": 0.4557, "step": 2403 }, { "epoch": 0.19232, "grad_norm": 1.4680863618850708, "learning_rate": 9.120571580793322e-06, "loss": 0.303, "step": 2404 }, { "epoch": 0.1924, "grad_norm": 1.802053451538086, "learning_rate": 9.119859091790025e-06, "loss": 0.3215, "step": 2405 }, { "epoch": 0.19248, "grad_norm": 1.676236867904663, "learning_rate": 9.119146342137029e-06, "loss": 0.4425, "step": 2406 }, { "epoch": 0.19256, "grad_norm": 1.6125231981277466, "learning_rate": 9.118433331879424e-06, "loss": 0.3079, "step": 2407 }, { "epoch": 0.19264, "grad_norm": 1.313044548034668, "learning_rate": 9.117720061062324e-06, "loss": 0.3317, "step": 2408 }, { "epoch": 0.19272, "grad_norm": 1.5678797960281372, "learning_rate": 9.117006529730853e-06, "loss": 0.311, "step": 2409 }, { "epoch": 0.1928, "grad_norm": 1.5004569292068481, "learning_rate": 9.116292737930156e-06, "loss": 0.3152, "step": 2410 }, { "epoch": 0.19288, "grad_norm": 1.86920964717865, "learning_rate": 9.115578685705391e-06, "loss": 0.3537, "step": 2411 }, { "epoch": 0.19296, "grad_norm": 1.8753119707107544, "learning_rate": 9.114864373101733e-06, "loss": 0.4428, "step": 2412 }, { "epoch": 0.19304, "grad_norm": 1.3418453931808472, "learning_rate": 9.114149800164372e-06, "loss": 0.2649, "step": 2413 }, { "epoch": 0.19312, "grad_norm": 1.6189359426498413, "learning_rate": 9.11343496693852e-06, "loss": 0.3543, "step": 2414 }, { "epoch": 0.1932, "grad_norm": 1.6323224306106567, "learning_rate": 9.112719873469403e-06, "loss": 0.3309, "step": 2415 }, { "epoch": 0.19328, "grad_norm": 1.9282023906707764, "learning_rate": 9.11200451980226e-06, "loss": 0.3991, "step": 2416 }, { "epoch": 0.19336, "grad_norm": 1.5333116054534912, "learning_rate": 9.11128890598235e-06, "loss": 0.3514, "step": 2417 }, { "epoch": 0.19344, "grad_norm": 1.4084820747375488, "learning_rate": 9.110573032054947e-06, "loss": 0.3691, "step": 2418 }, { "epoch": 0.19352, "grad_norm": 1.6476877927780151, "learning_rate": 9.109856898065343e-06, "loss": 0.3209, "step": 2419 }, { "epoch": 0.1936, "grad_norm": 1.7577415704727173, "learning_rate": 9.109140504058843e-06, "loss": 0.3974, "step": 2420 }, { "epoch": 0.19368, "grad_norm": 1.454849123954773, "learning_rate": 9.108423850080774e-06, "loss": 0.2629, "step": 2421 }, { "epoch": 0.19376, "grad_norm": 1.6696566343307495, "learning_rate": 9.107706936176474e-06, "loss": 0.3151, "step": 2422 }, { "epoch": 0.19384, "grad_norm": 2.0093162059783936, "learning_rate": 9.1069897623913e-06, "loss": 0.3786, "step": 2423 }, { "epoch": 0.19392, "grad_norm": 1.7907012701034546, "learning_rate": 9.106272328770627e-06, "loss": 0.3346, "step": 2424 }, { "epoch": 0.194, "grad_norm": 1.5353763103485107, "learning_rate": 9.105554635359843e-06, "loss": 0.3275, "step": 2425 }, { "epoch": 0.19408, "grad_norm": 1.6136126518249512, "learning_rate": 9.104836682204354e-06, "loss": 0.3316, "step": 2426 }, { "epoch": 0.19416, "grad_norm": 1.877288579940796, "learning_rate": 9.104118469349585e-06, "loss": 0.3735, "step": 2427 }, { "epoch": 0.19424, "grad_norm": 1.869982361793518, "learning_rate": 9.103399996840972e-06, "loss": 0.4767, "step": 2428 }, { "epoch": 0.19432, "grad_norm": 1.6566518545150757, "learning_rate": 9.102681264723969e-06, "loss": 0.3472, "step": 2429 }, { "epoch": 0.1944, "grad_norm": 1.631504774093628, "learning_rate": 9.101962273044053e-06, "loss": 0.3334, "step": 2430 }, { "epoch": 0.19448, "grad_norm": 1.7339836359024048, "learning_rate": 9.101243021846705e-06, "loss": 0.4071, "step": 2431 }, { "epoch": 0.19456, "grad_norm": 1.6289665699005127, "learning_rate": 9.10052351117744e-06, "loss": 0.4196, "step": 2432 }, { "epoch": 0.19464, "grad_norm": 1.76044762134552, "learning_rate": 9.099803741081767e-06, "loss": 0.4187, "step": 2433 }, { "epoch": 0.19472, "grad_norm": 1.4212559461593628, "learning_rate": 9.099083711605233e-06, "loss": 0.3407, "step": 2434 }, { "epoch": 0.1948, "grad_norm": 1.439171314239502, "learning_rate": 9.098363422793387e-06, "loss": 0.2902, "step": 2435 }, { "epoch": 0.19488, "grad_norm": 1.539538025856018, "learning_rate": 9.0976428746918e-06, "loss": 0.3686, "step": 2436 }, { "epoch": 0.19496, "grad_norm": 1.2634087800979614, "learning_rate": 9.09692206734606e-06, "loss": 0.2648, "step": 2437 }, { "epoch": 0.19504, "grad_norm": 1.2132208347320557, "learning_rate": 9.096201000801768e-06, "loss": 0.2667, "step": 2438 }, { "epoch": 0.19512, "grad_norm": 1.5921686887741089, "learning_rate": 9.095479675104543e-06, "loss": 0.3618, "step": 2439 }, { "epoch": 0.1952, "grad_norm": 1.5952152013778687, "learning_rate": 9.094758090300026e-06, "loss": 0.3662, "step": 2440 }, { "epoch": 0.19528, "grad_norm": 1.3034418821334839, "learning_rate": 9.094036246433863e-06, "loss": 0.2761, "step": 2441 }, { "epoch": 0.19536, "grad_norm": 1.6235853433609009, "learning_rate": 9.093314143551728e-06, "loss": 0.4082, "step": 2442 }, { "epoch": 0.19544, "grad_norm": 1.4469619989395142, "learning_rate": 9.092591781699302e-06, "loss": 0.3711, "step": 2443 }, { "epoch": 0.19552, "grad_norm": 1.574036717414856, "learning_rate": 9.09186916092229e-06, "loss": 0.399, "step": 2444 }, { "epoch": 0.1956, "grad_norm": 1.363416314125061, "learning_rate": 9.091146281266403e-06, "loss": 0.3059, "step": 2445 }, { "epoch": 0.19568, "grad_norm": 1.4807307720184326, "learning_rate": 9.090423142777383e-06, "loss": 0.3728, "step": 2446 }, { "epoch": 0.19576, "grad_norm": 1.6184738874435425, "learning_rate": 9.089699745500977e-06, "loss": 0.3375, "step": 2447 }, { "epoch": 0.19584, "grad_norm": 1.5262713432312012, "learning_rate": 9.08897608948295e-06, "loss": 0.3389, "step": 2448 }, { "epoch": 0.19592, "grad_norm": 1.2660731077194214, "learning_rate": 9.088252174769092e-06, "loss": 0.2845, "step": 2449 }, { "epoch": 0.196, "grad_norm": 2.0074665546417236, "learning_rate": 9.087528001405194e-06, "loss": 0.3709, "step": 2450 }, { "epoch": 0.19608, "grad_norm": 1.678610920906067, "learning_rate": 9.08680356943708e-06, "loss": 0.3842, "step": 2451 }, { "epoch": 0.19616, "grad_norm": 1.604011058807373, "learning_rate": 9.086078878910576e-06, "loss": 0.305, "step": 2452 }, { "epoch": 0.19624, "grad_norm": 1.7445040941238403, "learning_rate": 9.085353929871534e-06, "loss": 0.3833, "step": 2453 }, { "epoch": 0.19632, "grad_norm": 1.6903281211853027, "learning_rate": 9.084628722365817e-06, "loss": 0.3579, "step": 2454 }, { "epoch": 0.1964, "grad_norm": 1.587699294090271, "learning_rate": 9.08390325643931e-06, "loss": 0.324, "step": 2455 }, { "epoch": 0.19648, "grad_norm": 1.681836724281311, "learning_rate": 9.083177532137909e-06, "loss": 0.3006, "step": 2456 }, { "epoch": 0.19656, "grad_norm": 1.259889006614685, "learning_rate": 9.082451549507528e-06, "loss": 0.3757, "step": 2457 }, { "epoch": 0.19664, "grad_norm": 1.4303218126296997, "learning_rate": 9.081725308594096e-06, "loss": 0.3122, "step": 2458 }, { "epoch": 0.19672, "grad_norm": 1.956416130065918, "learning_rate": 9.080998809443563e-06, "loss": 0.5083, "step": 2459 }, { "epoch": 0.1968, "grad_norm": 1.0173094272613525, "learning_rate": 9.080272052101888e-06, "loss": 0.204, "step": 2460 }, { "epoch": 0.19688, "grad_norm": 1.6032788753509521, "learning_rate": 9.079545036615054e-06, "loss": 0.4081, "step": 2461 }, { "epoch": 0.19696, "grad_norm": 1.881056547164917, "learning_rate": 9.078817763029054e-06, "loss": 0.4511, "step": 2462 }, { "epoch": 0.19704, "grad_norm": 1.7417298555374146, "learning_rate": 9.078090231389904e-06, "loss": 0.307, "step": 2463 }, { "epoch": 0.19712, "grad_norm": 1.7411683797836304, "learning_rate": 9.077362441743632e-06, "loss": 0.3968, "step": 2464 }, { "epoch": 0.1972, "grad_norm": 1.9373418092727661, "learning_rate": 9.076634394136279e-06, "loss": 0.5019, "step": 2465 }, { "epoch": 0.19728, "grad_norm": 1.9146742820739746, "learning_rate": 9.075906088613909e-06, "loss": 0.421, "step": 2466 }, { "epoch": 0.19736, "grad_norm": 1.7911179065704346, "learning_rate": 9.075177525222597e-06, "loss": 0.3551, "step": 2467 }, { "epoch": 0.19744, "grad_norm": 2.1474406719207764, "learning_rate": 9.074448704008441e-06, "loss": 0.4482, "step": 2468 }, { "epoch": 0.19752, "grad_norm": 1.610866665840149, "learning_rate": 9.073719625017548e-06, "loss": 0.4025, "step": 2469 }, { "epoch": 0.1976, "grad_norm": 1.5338889360427856, "learning_rate": 9.072990288296044e-06, "loss": 0.3913, "step": 2470 }, { "epoch": 0.19768, "grad_norm": 1.8097115755081177, "learning_rate": 9.072260693890073e-06, "loss": 0.4809, "step": 2471 }, { "epoch": 0.19776, "grad_norm": 1.3825757503509521, "learning_rate": 9.071530841845794e-06, "loss": 0.3793, "step": 2472 }, { "epoch": 0.19784, "grad_norm": 1.5398048162460327, "learning_rate": 9.070800732209382e-06, "loss": 0.3685, "step": 2473 }, { "epoch": 0.19792, "grad_norm": 1.705269694328308, "learning_rate": 9.070070365027029e-06, "loss": 0.5052, "step": 2474 }, { "epoch": 0.198, "grad_norm": 1.5767617225646973, "learning_rate": 9.069339740344943e-06, "loss": 0.3937, "step": 2475 }, { "epoch": 0.19808, "grad_norm": 1.8536933660507202, "learning_rate": 9.068608858209347e-06, "loss": 0.5109, "step": 2476 }, { "epoch": 0.19816, "grad_norm": 1.6715956926345825, "learning_rate": 9.067877718666482e-06, "loss": 0.3558, "step": 2477 }, { "epoch": 0.19824, "grad_norm": 1.6765884160995483, "learning_rate": 9.067146321762603e-06, "loss": 0.3681, "step": 2478 }, { "epoch": 0.19832, "grad_norm": 1.7231676578521729, "learning_rate": 9.066414667543988e-06, "loss": 0.3274, "step": 2479 }, { "epoch": 0.1984, "grad_norm": 1.4541349411010742, "learning_rate": 9.065682756056922e-06, "loss": 0.3247, "step": 2480 }, { "epoch": 0.19848, "grad_norm": 1.4403581619262695, "learning_rate": 9.064950587347711e-06, "loss": 0.4417, "step": 2481 }, { "epoch": 0.19856, "grad_norm": 1.7160850763320923, "learning_rate": 9.06421816146268e-06, "loss": 0.3751, "step": 2482 }, { "epoch": 0.19864, "grad_norm": 1.7260886430740356, "learning_rate": 9.063485478448164e-06, "loss": 0.3181, "step": 2483 }, { "epoch": 0.19872, "grad_norm": 2.1682159900665283, "learning_rate": 9.062752538350517e-06, "loss": 0.4809, "step": 2484 }, { "epoch": 0.1988, "grad_norm": 1.7218812704086304, "learning_rate": 9.062019341216112e-06, "loss": 0.3777, "step": 2485 }, { "epoch": 0.19888, "grad_norm": 1.914481520652771, "learning_rate": 9.061285887091334e-06, "loss": 0.4097, "step": 2486 }, { "epoch": 0.19896, "grad_norm": 1.4902898073196411, "learning_rate": 9.060552176022587e-06, "loss": 0.3261, "step": 2487 }, { "epoch": 0.19904, "grad_norm": 1.306275725364685, "learning_rate": 9.059818208056293e-06, "loss": 0.2747, "step": 2488 }, { "epoch": 0.19912, "grad_norm": 1.6835832595825195, "learning_rate": 9.059083983238882e-06, "loss": 0.3671, "step": 2489 }, { "epoch": 0.1992, "grad_norm": 1.5236090421676636, "learning_rate": 9.05834950161681e-06, "loss": 0.359, "step": 2490 }, { "epoch": 0.19928, "grad_norm": 1.5683189630508423, "learning_rate": 9.057614763236545e-06, "loss": 0.3818, "step": 2491 }, { "epoch": 0.19936, "grad_norm": 1.9629745483398438, "learning_rate": 9.056879768144572e-06, "loss": 0.4597, "step": 2492 }, { "epoch": 0.19944, "grad_norm": 1.8202420473098755, "learning_rate": 9.056144516387387e-06, "loss": 0.3821, "step": 2493 }, { "epoch": 0.19952, "grad_norm": 2.0477683544158936, "learning_rate": 9.055409008011513e-06, "loss": 0.5704, "step": 2494 }, { "epoch": 0.1996, "grad_norm": 1.5534504652023315, "learning_rate": 9.05467324306348e-06, "loss": 0.2988, "step": 2495 }, { "epoch": 0.19968, "grad_norm": 1.8985226154327393, "learning_rate": 9.053937221589837e-06, "loss": 0.4126, "step": 2496 }, { "epoch": 0.19976, "grad_norm": 1.7892488241195679, "learning_rate": 9.05320094363715e-06, "loss": 0.4152, "step": 2497 }, { "epoch": 0.19984, "grad_norm": 1.563428282737732, "learning_rate": 9.052464409252003e-06, "loss": 0.3499, "step": 2498 }, { "epoch": 0.19992, "grad_norm": 1.7737313508987427, "learning_rate": 9.051727618480992e-06, "loss": 0.4627, "step": 2499 }, { "epoch": 0.2, "grad_norm": 1.461615800857544, "learning_rate": 9.050990571370731e-06, "loss": 0.3934, "step": 2500 }, { "epoch": 0.20008, "grad_norm": 1.1548340320587158, "learning_rate": 9.050253267967852e-06, "loss": 0.2507, "step": 2501 }, { "epoch": 0.20016, "grad_norm": 1.709346055984497, "learning_rate": 9.049515708319001e-06, "loss": 0.3237, "step": 2502 }, { "epoch": 0.20024, "grad_norm": 1.4969236850738525, "learning_rate": 9.048777892470841e-06, "loss": 0.2986, "step": 2503 }, { "epoch": 0.20032, "grad_norm": 1.687313199043274, "learning_rate": 9.048039820470049e-06, "loss": 0.3521, "step": 2504 }, { "epoch": 0.2004, "grad_norm": 1.6837893724441528, "learning_rate": 9.047301492363325e-06, "loss": 0.376, "step": 2505 }, { "epoch": 0.20048, "grad_norm": 1.6461684703826904, "learning_rate": 9.046562908197376e-06, "loss": 0.3472, "step": 2506 }, { "epoch": 0.20056, "grad_norm": 1.2622382640838623, "learning_rate": 9.045824068018934e-06, "loss": 0.259, "step": 2507 }, { "epoch": 0.20064, "grad_norm": 1.7232669591903687, "learning_rate": 9.045084971874738e-06, "loss": 0.4805, "step": 2508 }, { "epoch": 0.20072, "grad_norm": 1.287067174911499, "learning_rate": 9.044345619811552e-06, "loss": 0.2888, "step": 2509 }, { "epoch": 0.2008, "grad_norm": 1.4580618143081665, "learning_rate": 9.04360601187615e-06, "loss": 0.3821, "step": 2510 }, { "epoch": 0.20088, "grad_norm": 1.244520902633667, "learning_rate": 9.042866148115325e-06, "loss": 0.276, "step": 2511 }, { "epoch": 0.20096, "grad_norm": 1.3449219465255737, "learning_rate": 9.042126028575889e-06, "loss": 0.2915, "step": 2512 }, { "epoch": 0.20104, "grad_norm": 1.5387424230575562, "learning_rate": 9.041385653304664e-06, "loss": 0.3284, "step": 2513 }, { "epoch": 0.20112, "grad_norm": 2.1138205528259277, "learning_rate": 9.04064502234849e-06, "loss": 0.4097, "step": 2514 }, { "epoch": 0.2012, "grad_norm": 1.5106236934661865, "learning_rate": 9.039904135754225e-06, "loss": 0.3061, "step": 2515 }, { "epoch": 0.20128, "grad_norm": 1.434637427330017, "learning_rate": 9.039162993568743e-06, "loss": 0.3543, "step": 2516 }, { "epoch": 0.20136, "grad_norm": 1.5437572002410889, "learning_rate": 9.038421595838934e-06, "loss": 0.3407, "step": 2517 }, { "epoch": 0.20144, "grad_norm": 1.3405263423919678, "learning_rate": 9.037679942611704e-06, "loss": 0.2682, "step": 2518 }, { "epoch": 0.20152, "grad_norm": 1.4590741395950317, "learning_rate": 9.036938033933973e-06, "loss": 0.3135, "step": 2519 }, { "epoch": 0.2016, "grad_norm": 1.43552565574646, "learning_rate": 9.03619586985268e-06, "loss": 0.2935, "step": 2520 }, { "epoch": 0.20168, "grad_norm": 1.7763866186141968, "learning_rate": 9.035453450414779e-06, "loss": 0.3757, "step": 2521 }, { "epoch": 0.20176, "grad_norm": 1.8434540033340454, "learning_rate": 9.034710775667242e-06, "loss": 0.4078, "step": 2522 }, { "epoch": 0.20184, "grad_norm": 1.806249737739563, "learning_rate": 9.033967845657054e-06, "loss": 0.4245, "step": 2523 }, { "epoch": 0.20192, "grad_norm": 1.451335072517395, "learning_rate": 9.033224660431219e-06, "loss": 0.3327, "step": 2524 }, { "epoch": 0.202, "grad_norm": 1.4611728191375732, "learning_rate": 9.032481220036754e-06, "loss": 0.3051, "step": 2525 }, { "epoch": 0.20208, "grad_norm": 1.524274230003357, "learning_rate": 9.031737524520697e-06, "loss": 0.3235, "step": 2526 }, { "epoch": 0.20216, "grad_norm": 1.7024296522140503, "learning_rate": 9.030993573930094e-06, "loss": 0.3785, "step": 2527 }, { "epoch": 0.20224, "grad_norm": 0.9294398427009583, "learning_rate": 9.030249368312015e-06, "loss": 0.1819, "step": 2528 }, { "epoch": 0.20232, "grad_norm": 1.809221625328064, "learning_rate": 9.029504907713547e-06, "loss": 0.3686, "step": 2529 }, { "epoch": 0.2024, "grad_norm": 1.8182200193405151, "learning_rate": 9.028760192181785e-06, "loss": 0.3476, "step": 2530 }, { "epoch": 0.20248, "grad_norm": 1.5775073766708374, "learning_rate": 9.028015221763844e-06, "loss": 0.3117, "step": 2531 }, { "epoch": 0.20256, "grad_norm": 1.9079387187957764, "learning_rate": 9.02726999650686e-06, "loss": 0.4509, "step": 2532 }, { "epoch": 0.20264, "grad_norm": 1.451041579246521, "learning_rate": 9.026524516457977e-06, "loss": 0.3606, "step": 2533 }, { "epoch": 0.20272, "grad_norm": 1.1723933219909668, "learning_rate": 9.025778781664361e-06, "loss": 0.2418, "step": 2534 }, { "epoch": 0.2028, "grad_norm": 1.693428635597229, "learning_rate": 9.025032792173193e-06, "loss": 0.3352, "step": 2535 }, { "epoch": 0.20288, "grad_norm": 1.216301441192627, "learning_rate": 9.024286548031666e-06, "loss": 0.334, "step": 2536 }, { "epoch": 0.20296, "grad_norm": 1.6559041738510132, "learning_rate": 9.023540049286996e-06, "loss": 0.3532, "step": 2537 }, { "epoch": 0.20304, "grad_norm": 1.55605149269104, "learning_rate": 9.02279329598641e-06, "loss": 0.3522, "step": 2538 }, { "epoch": 0.20312, "grad_norm": 1.8087921142578125, "learning_rate": 9.022046288177153e-06, "loss": 0.3933, "step": 2539 }, { "epoch": 0.2032, "grad_norm": 1.6193270683288574, "learning_rate": 9.021299025906482e-06, "loss": 0.3471, "step": 2540 }, { "epoch": 0.20328, "grad_norm": 1.2611744403839111, "learning_rate": 9.02055150922168e-06, "loss": 0.2635, "step": 2541 }, { "epoch": 0.20336, "grad_norm": 1.7183457612991333, "learning_rate": 9.019803738170036e-06, "loss": 0.3218, "step": 2542 }, { "epoch": 0.20344, "grad_norm": 1.3315821886062622, "learning_rate": 9.01905571279886e-06, "loss": 0.2472, "step": 2543 }, { "epoch": 0.20352, "grad_norm": 1.7462687492370605, "learning_rate": 9.018307433155477e-06, "loss": 0.3837, "step": 2544 }, { "epoch": 0.2036, "grad_norm": 2.0826213359832764, "learning_rate": 9.017558899287226e-06, "loss": 0.4974, "step": 2545 }, { "epoch": 0.20368, "grad_norm": 1.5876009464263916, "learning_rate": 9.01681011124147e-06, "loss": 0.3448, "step": 2546 }, { "epoch": 0.20376, "grad_norm": 1.6092311143875122, "learning_rate": 9.016061069065576e-06, "loss": 0.2966, "step": 2547 }, { "epoch": 0.20384, "grad_norm": 1.5083656311035156, "learning_rate": 9.015311772806937e-06, "loss": 0.3039, "step": 2548 }, { "epoch": 0.20392, "grad_norm": 1.6449142694473267, "learning_rate": 9.014562222512954e-06, "loss": 0.3372, "step": 2549 }, { "epoch": 0.204, "grad_norm": 1.5135823488235474, "learning_rate": 9.013812418231055e-06, "loss": 0.2915, "step": 2550 }, { "epoch": 0.20408, "grad_norm": 2.0886871814727783, "learning_rate": 9.013062360008675e-06, "loss": 0.3916, "step": 2551 }, { "epoch": 0.20416, "grad_norm": 1.2576608657836914, "learning_rate": 9.012312047893265e-06, "loss": 0.2639, "step": 2552 }, { "epoch": 0.20424, "grad_norm": 1.7450748682022095, "learning_rate": 9.011561481932301e-06, "loss": 0.3559, "step": 2553 }, { "epoch": 0.20432, "grad_norm": 1.6265859603881836, "learning_rate": 9.010810662173262e-06, "loss": 0.3201, "step": 2554 }, { "epoch": 0.2044, "grad_norm": 1.57313072681427, "learning_rate": 9.010059588663651e-06, "loss": 0.4484, "step": 2555 }, { "epoch": 0.20448, "grad_norm": 1.5142831802368164, "learning_rate": 9.00930826145099e-06, "loss": 0.3172, "step": 2556 }, { "epoch": 0.20456, "grad_norm": 1.7187414169311523, "learning_rate": 9.00855668058281e-06, "loss": 0.3237, "step": 2557 }, { "epoch": 0.20464, "grad_norm": 1.5038609504699707, "learning_rate": 9.007804846106662e-06, "loss": 0.3068, "step": 2558 }, { "epoch": 0.20472, "grad_norm": 1.3688982725143433, "learning_rate": 9.007052758070111e-06, "loss": 0.2904, "step": 2559 }, { "epoch": 0.2048, "grad_norm": 1.5706552267074585, "learning_rate": 9.00630041652074e-06, "loss": 0.3624, "step": 2560 }, { "epoch": 0.20488, "grad_norm": 1.5629266500473022, "learning_rate": 9.005547821506145e-06, "loss": 0.3069, "step": 2561 }, { "epoch": 0.20496, "grad_norm": 1.3994823694229126, "learning_rate": 9.004794973073943e-06, "loss": 0.2774, "step": 2562 }, { "epoch": 0.20504, "grad_norm": 1.459088921546936, "learning_rate": 9.004041871271763e-06, "loss": 0.3553, "step": 2563 }, { "epoch": 0.20512, "grad_norm": 1.7317893505096436, "learning_rate": 9.003288516147253e-06, "loss": 0.3853, "step": 2564 }, { "epoch": 0.2052, "grad_norm": 2.0651421546936035, "learning_rate": 9.002534907748071e-06, "loss": 0.6532, "step": 2565 }, { "epoch": 0.20528, "grad_norm": 1.663630485534668, "learning_rate": 9.0017810461219e-06, "loss": 0.3526, "step": 2566 }, { "epoch": 0.20536, "grad_norm": 1.511698842048645, "learning_rate": 9.00102693131643e-06, "loss": 0.2987, "step": 2567 }, { "epoch": 0.20544, "grad_norm": 1.929552674293518, "learning_rate": 9.000272563379375e-06, "loss": 0.4855, "step": 2568 }, { "epoch": 0.20552, "grad_norm": 1.6176848411560059, "learning_rate": 8.99951794235846e-06, "loss": 0.3513, "step": 2569 }, { "epoch": 0.2056, "grad_norm": 1.647544264793396, "learning_rate": 8.998763068301428e-06, "loss": 0.4105, "step": 2570 }, { "epoch": 0.20568, "grad_norm": 1.4232338666915894, "learning_rate": 8.998007941256035e-06, "loss": 0.3615, "step": 2571 }, { "epoch": 0.20576, "grad_norm": 1.769791841506958, "learning_rate": 8.997252561270058e-06, "loss": 0.3636, "step": 2572 }, { "epoch": 0.20584, "grad_norm": 1.6207672357559204, "learning_rate": 8.996496928391285e-06, "loss": 0.3445, "step": 2573 }, { "epoch": 0.20592, "grad_norm": 1.8386582136154175, "learning_rate": 8.995741042667524e-06, "loss": 0.3672, "step": 2574 }, { "epoch": 0.206, "grad_norm": 1.6057279109954834, "learning_rate": 8.994984904146599e-06, "loss": 0.344, "step": 2575 }, { "epoch": 0.20608, "grad_norm": 1.5600706338882446, "learning_rate": 8.994228512876345e-06, "loss": 0.346, "step": 2576 }, { "epoch": 0.20616, "grad_norm": 1.798862338066101, "learning_rate": 8.993471868904617e-06, "loss": 0.3838, "step": 2577 }, { "epoch": 0.20624, "grad_norm": 1.6833524703979492, "learning_rate": 8.992714972279285e-06, "loss": 0.4041, "step": 2578 }, { "epoch": 0.20632, "grad_norm": 1.3995176553726196, "learning_rate": 8.99195782304824e-06, "loss": 0.3753, "step": 2579 }, { "epoch": 0.2064, "grad_norm": 1.242598295211792, "learning_rate": 8.991200421259378e-06, "loss": 0.2893, "step": 2580 }, { "epoch": 0.20648, "grad_norm": 1.356940746307373, "learning_rate": 8.990442766960622e-06, "loss": 0.2868, "step": 2581 }, { "epoch": 0.20656, "grad_norm": 1.8150062561035156, "learning_rate": 8.989684860199903e-06, "loss": 0.4231, "step": 2582 }, { "epoch": 0.20664, "grad_norm": 1.3813666105270386, "learning_rate": 8.988926701025171e-06, "loss": 0.33, "step": 2583 }, { "epoch": 0.20672, "grad_norm": 1.7560433149337769, "learning_rate": 8.988168289484396e-06, "loss": 0.34, "step": 2584 }, { "epoch": 0.2068, "grad_norm": 1.6420555114746094, "learning_rate": 8.987409625625556e-06, "loss": 0.3671, "step": 2585 }, { "epoch": 0.20688, "grad_norm": 1.5787367820739746, "learning_rate": 8.986650709496652e-06, "loss": 0.3619, "step": 2586 }, { "epoch": 0.20696, "grad_norm": 1.6930251121520996, "learning_rate": 8.985891541145696e-06, "loss": 0.3442, "step": 2587 }, { "epoch": 0.20704, "grad_norm": 1.488715410232544, "learning_rate": 8.98513212062072e-06, "loss": 0.4164, "step": 2588 }, { "epoch": 0.20712, "grad_norm": 1.5280767679214478, "learning_rate": 8.98437244796977e-06, "loss": 0.2627, "step": 2589 }, { "epoch": 0.2072, "grad_norm": 1.939414381980896, "learning_rate": 8.983612523240903e-06, "loss": 0.3893, "step": 2590 }, { "epoch": 0.20728, "grad_norm": 1.3220232725143433, "learning_rate": 8.982852346482205e-06, "loss": 0.3041, "step": 2591 }, { "epoch": 0.20736, "grad_norm": 1.585889458656311, "learning_rate": 8.982091917741764e-06, "loss": 0.3324, "step": 2592 }, { "epoch": 0.20744, "grad_norm": 1.9471094608306885, "learning_rate": 8.981331237067691e-06, "loss": 0.4718, "step": 2593 }, { "epoch": 0.20752, "grad_norm": 1.783488392829895, "learning_rate": 8.980570304508114e-06, "loss": 0.4395, "step": 2594 }, { "epoch": 0.2076, "grad_norm": 1.2568445205688477, "learning_rate": 8.97980912011117e-06, "loss": 0.2889, "step": 2595 }, { "epoch": 0.20768, "grad_norm": 1.5438101291656494, "learning_rate": 8.979047683925022e-06, "loss": 0.4005, "step": 2596 }, { "epoch": 0.20776, "grad_norm": 1.58918297290802, "learning_rate": 8.978285995997839e-06, "loss": 0.2668, "step": 2597 }, { "epoch": 0.20784, "grad_norm": 1.4388453960418701, "learning_rate": 8.977524056377814e-06, "loss": 0.2873, "step": 2598 }, { "epoch": 0.20792, "grad_norm": 2.0037522315979004, "learning_rate": 8.97676186511315e-06, "loss": 0.3798, "step": 2599 }, { "epoch": 0.208, "grad_norm": 1.6372380256652832, "learning_rate": 8.975999422252071e-06, "loss": 0.3634, "step": 2600 }, { "epoch": 0.20808, "grad_norm": 1.2802543640136719, "learning_rate": 8.97523672784281e-06, "loss": 0.3416, "step": 2601 }, { "epoch": 0.20816, "grad_norm": 1.6268035173416138, "learning_rate": 8.974473781933623e-06, "loss": 0.3532, "step": 2602 }, { "epoch": 0.20824, "grad_norm": 1.604090690612793, "learning_rate": 8.97371058457278e-06, "loss": 0.3794, "step": 2603 }, { "epoch": 0.20832, "grad_norm": 1.903481125831604, "learning_rate": 8.97294713580856e-06, "loss": 0.4268, "step": 2604 }, { "epoch": 0.2084, "grad_norm": 1.351362705230713, "learning_rate": 8.972183435689273e-06, "loss": 0.2761, "step": 2605 }, { "epoch": 0.20848, "grad_norm": 1.6017656326293945, "learning_rate": 8.97141948426323e-06, "loss": 0.3673, "step": 2606 }, { "epoch": 0.20856, "grad_norm": 1.882969856262207, "learning_rate": 8.970655281578762e-06, "loss": 0.3843, "step": 2607 }, { "epoch": 0.20864, "grad_norm": 2.003662347793579, "learning_rate": 8.969890827684222e-06, "loss": 0.4264, "step": 2608 }, { "epoch": 0.20872, "grad_norm": 1.6131091117858887, "learning_rate": 8.969126122627973e-06, "loss": 0.3647, "step": 2609 }, { "epoch": 0.2088, "grad_norm": 1.922102451324463, "learning_rate": 8.968361166458395e-06, "loss": 0.3897, "step": 2610 }, { "epoch": 0.20888, "grad_norm": 1.5826460123062134, "learning_rate": 8.967595959223882e-06, "loss": 0.3342, "step": 2611 }, { "epoch": 0.20896, "grad_norm": 1.6929792165756226, "learning_rate": 8.966830500972852e-06, "loss": 0.4384, "step": 2612 }, { "epoch": 0.20904, "grad_norm": 1.5595133304595947, "learning_rate": 8.966064791753727e-06, "loss": 0.3149, "step": 2613 }, { "epoch": 0.20912, "grad_norm": 1.4859492778778076, "learning_rate": 8.965298831614952e-06, "loss": 0.2991, "step": 2614 }, { "epoch": 0.2092, "grad_norm": 2.3546998500823975, "learning_rate": 8.96453262060499e-06, "loss": 0.6323, "step": 2615 }, { "epoch": 0.20928, "grad_norm": 1.4650485515594482, "learning_rate": 8.963766158772314e-06, "loss": 0.2931, "step": 2616 }, { "epoch": 0.20936, "grad_norm": 1.8523716926574707, "learning_rate": 8.962999446165417e-06, "loss": 0.3536, "step": 2617 }, { "epoch": 0.20944, "grad_norm": 1.4977569580078125, "learning_rate": 8.962232482832803e-06, "loss": 0.356, "step": 2618 }, { "epoch": 0.20952, "grad_norm": 1.681130051612854, "learning_rate": 8.961465268822997e-06, "loss": 0.2998, "step": 2619 }, { "epoch": 0.2096, "grad_norm": 1.994041919708252, "learning_rate": 8.960697804184541e-06, "loss": 0.4286, "step": 2620 }, { "epoch": 0.20968, "grad_norm": 1.5844112634658813, "learning_rate": 8.959930088965987e-06, "loss": 0.4088, "step": 2621 }, { "epoch": 0.20976, "grad_norm": 1.7651921510696411, "learning_rate": 8.959162123215906e-06, "loss": 0.4029, "step": 2622 }, { "epoch": 0.20984, "grad_norm": 1.6215308904647827, "learning_rate": 8.958393906982885e-06, "loss": 0.4116, "step": 2623 }, { "epoch": 0.20992, "grad_norm": 1.4633090496063232, "learning_rate": 8.957625440315524e-06, "loss": 0.4603, "step": 2624 }, { "epoch": 0.21, "grad_norm": 1.4941697120666504, "learning_rate": 8.956856723262445e-06, "loss": 0.3767, "step": 2625 }, { "epoch": 0.21008, "grad_norm": 2.0093963146209717, "learning_rate": 8.956087755872283e-06, "loss": 0.3451, "step": 2626 }, { "epoch": 0.21016, "grad_norm": 1.8792312145233154, "learning_rate": 8.955318538193684e-06, "loss": 0.3411, "step": 2627 }, { "epoch": 0.21024, "grad_norm": 1.4969063997268677, "learning_rate": 8.954549070275316e-06, "loss": 0.331, "step": 2628 }, { "epoch": 0.21032, "grad_norm": 1.5657908916473389, "learning_rate": 8.953779352165859e-06, "loss": 0.4029, "step": 2629 }, { "epoch": 0.2104, "grad_norm": 1.5333744287490845, "learning_rate": 8.953009383914012e-06, "loss": 0.2624, "step": 2630 }, { "epoch": 0.21048, "grad_norm": 1.338257908821106, "learning_rate": 8.95223916556849e-06, "loss": 0.3209, "step": 2631 }, { "epoch": 0.21056, "grad_norm": 1.6348015069961548, "learning_rate": 8.95146869717802e-06, "loss": 0.3488, "step": 2632 }, { "epoch": 0.21064, "grad_norm": 1.488411784172058, "learning_rate": 8.950697978791345e-06, "loss": 0.3626, "step": 2633 }, { "epoch": 0.21072, "grad_norm": 1.8199793100357056, "learning_rate": 8.94992701045723e-06, "loss": 0.3665, "step": 2634 }, { "epoch": 0.2108, "grad_norm": 1.5185476541519165, "learning_rate": 8.949155792224448e-06, "loss": 0.3566, "step": 2635 }, { "epoch": 0.21088, "grad_norm": 1.959179401397705, "learning_rate": 8.948384324141794e-06, "loss": 0.3719, "step": 2636 }, { "epoch": 0.21096, "grad_norm": 1.65220046043396, "learning_rate": 8.947612606258076e-06, "loss": 0.3149, "step": 2637 }, { "epoch": 0.21104, "grad_norm": 1.392369270324707, "learning_rate": 8.946840638622117e-06, "loss": 0.2671, "step": 2638 }, { "epoch": 0.21112, "grad_norm": 1.6883777379989624, "learning_rate": 8.946068421282754e-06, "loss": 0.4213, "step": 2639 }, { "epoch": 0.2112, "grad_norm": 1.4826557636260986, "learning_rate": 8.945295954288848e-06, "loss": 0.312, "step": 2640 }, { "epoch": 0.21128, "grad_norm": 1.6768203973770142, "learning_rate": 8.944523237689268e-06, "loss": 0.3766, "step": 2641 }, { "epoch": 0.21136, "grad_norm": 1.9561296701431274, "learning_rate": 8.9437502715329e-06, "loss": 0.3921, "step": 2642 }, { "epoch": 0.21144, "grad_norm": 1.2287523746490479, "learning_rate": 8.94297705586865e-06, "loss": 0.3746, "step": 2643 }, { "epoch": 0.21152, "grad_norm": 1.383736252784729, "learning_rate": 8.942203590745433e-06, "loss": 0.3747, "step": 2644 }, { "epoch": 0.2116, "grad_norm": 2.0053927898406982, "learning_rate": 8.941429876212187e-06, "loss": 0.2964, "step": 2645 }, { "epoch": 0.21168, "grad_norm": 1.3449758291244507, "learning_rate": 8.94065591231786e-06, "loss": 0.2413, "step": 2646 }, { "epoch": 0.21176, "grad_norm": 1.6508148908615112, "learning_rate": 8.939881699111418e-06, "loss": 0.3681, "step": 2647 }, { "epoch": 0.21184, "grad_norm": 1.292067050933838, "learning_rate": 8.939107236641845e-06, "loss": 0.3007, "step": 2648 }, { "epoch": 0.21192, "grad_norm": 1.5809190273284912, "learning_rate": 8.938332524958137e-06, "loss": 0.3361, "step": 2649 }, { "epoch": 0.212, "grad_norm": 1.343364953994751, "learning_rate": 8.937557564109307e-06, "loss": 0.2596, "step": 2650 }, { "epoch": 0.21208, "grad_norm": 1.7212300300598145, "learning_rate": 8.936782354144387e-06, "loss": 0.3525, "step": 2651 }, { "epoch": 0.21216, "grad_norm": 2.104867935180664, "learning_rate": 8.93600689511242e-06, "loss": 0.4524, "step": 2652 }, { "epoch": 0.21224, "grad_norm": 1.942893624305725, "learning_rate": 8.935231187062465e-06, "loss": 0.4223, "step": 2653 }, { "epoch": 0.21232, "grad_norm": 1.5138806104660034, "learning_rate": 8.9344552300436e-06, "loss": 0.2939, "step": 2654 }, { "epoch": 0.2124, "grad_norm": 1.6802589893341064, "learning_rate": 8.93367902410492e-06, "loss": 0.3564, "step": 2655 }, { "epoch": 0.21248, "grad_norm": 1.7636982202529907, "learning_rate": 8.932902569295527e-06, "loss": 0.4802, "step": 2656 }, { "epoch": 0.21256, "grad_norm": 1.8842812776565552, "learning_rate": 8.932125865664549e-06, "loss": 0.4784, "step": 2657 }, { "epoch": 0.21264, "grad_norm": 1.495451807975769, "learning_rate": 8.931348913261125e-06, "loss": 0.2886, "step": 2658 }, { "epoch": 0.21272, "grad_norm": 1.4396910667419434, "learning_rate": 8.93057171213441e-06, "loss": 0.2712, "step": 2659 }, { "epoch": 0.2128, "grad_norm": 1.7895922660827637, "learning_rate": 8.929794262333574e-06, "loss": 0.3548, "step": 2660 }, { "epoch": 0.21288, "grad_norm": 2.094775438308716, "learning_rate": 8.929016563907805e-06, "loss": 0.4313, "step": 2661 }, { "epoch": 0.21296, "grad_norm": 1.7981663942337036, "learning_rate": 8.928238616906302e-06, "loss": 0.3359, "step": 2662 }, { "epoch": 0.21304, "grad_norm": 1.6360666751861572, "learning_rate": 8.927460421378287e-06, "loss": 0.3905, "step": 2663 }, { "epoch": 0.21312, "grad_norm": 2.0702054500579834, "learning_rate": 8.926681977372993e-06, "loss": 0.4009, "step": 2664 }, { "epoch": 0.2132, "grad_norm": 1.4153721332550049, "learning_rate": 8.92590328493967e-06, "loss": 0.3303, "step": 2665 }, { "epoch": 0.21328, "grad_norm": 1.5334463119506836, "learning_rate": 8.92512434412758e-06, "loss": 0.3581, "step": 2666 }, { "epoch": 0.21336, "grad_norm": 1.445220708847046, "learning_rate": 8.924345154986008e-06, "loss": 0.2794, "step": 2667 }, { "epoch": 0.21344, "grad_norm": 1.4006540775299072, "learning_rate": 8.923565717564247e-06, "loss": 0.2452, "step": 2668 }, { "epoch": 0.21352, "grad_norm": 1.3207964897155762, "learning_rate": 8.922786031911613e-06, "loss": 0.2685, "step": 2669 }, { "epoch": 0.2136, "grad_norm": 1.587815523147583, "learning_rate": 8.922006098077432e-06, "loss": 0.3331, "step": 2670 }, { "epoch": 0.21368, "grad_norm": 1.743464469909668, "learning_rate": 8.921225916111048e-06, "loss": 0.3479, "step": 2671 }, { "epoch": 0.21376, "grad_norm": 1.6636502742767334, "learning_rate": 8.920445486061822e-06, "loss": 0.3878, "step": 2672 }, { "epoch": 0.21384, "grad_norm": 1.9191046953201294, "learning_rate": 8.919664807979126e-06, "loss": 0.4291, "step": 2673 }, { "epoch": 0.21392, "grad_norm": 1.7954356670379639, "learning_rate": 8.918883881912353e-06, "loss": 0.3258, "step": 2674 }, { "epoch": 0.214, "grad_norm": 1.4741268157958984, "learning_rate": 8.91810270791091e-06, "loss": 0.2867, "step": 2675 }, { "epoch": 0.21408, "grad_norm": 1.7667266130447388, "learning_rate": 8.917321286024218e-06, "loss": 0.4209, "step": 2676 }, { "epoch": 0.21416, "grad_norm": 1.924230694770813, "learning_rate": 8.916539616301718e-06, "loss": 0.3504, "step": 2677 }, { "epoch": 0.21424, "grad_norm": 1.4865533113479614, "learning_rate": 8.91575769879286e-06, "loss": 0.3856, "step": 2678 }, { "epoch": 0.21432, "grad_norm": 1.7114523649215698, "learning_rate": 8.914975533547114e-06, "loss": 0.4292, "step": 2679 }, { "epoch": 0.2144, "grad_norm": 1.5713534355163574, "learning_rate": 8.914193120613966e-06, "loss": 0.3665, "step": 2680 }, { "epoch": 0.21448, "grad_norm": 1.5431712865829468, "learning_rate": 8.913410460042915e-06, "loss": 0.3065, "step": 2681 }, { "epoch": 0.21456, "grad_norm": 1.625646710395813, "learning_rate": 8.91262755188348e-06, "loss": 0.4117, "step": 2682 }, { "epoch": 0.21464, "grad_norm": 1.5041875839233398, "learning_rate": 8.911844396185192e-06, "loss": 0.3816, "step": 2683 }, { "epoch": 0.21472, "grad_norm": 1.8179059028625488, "learning_rate": 8.911060992997596e-06, "loss": 0.3031, "step": 2684 }, { "epoch": 0.2148, "grad_norm": 1.7004882097244263, "learning_rate": 8.910277342370259e-06, "loss": 0.4804, "step": 2685 }, { "epoch": 0.21488, "grad_norm": 2.1051254272460938, "learning_rate": 8.909493444352757e-06, "loss": 0.4295, "step": 2686 }, { "epoch": 0.21496, "grad_norm": 1.3784528970718384, "learning_rate": 8.908709298994686e-06, "loss": 0.3112, "step": 2687 }, { "epoch": 0.21504, "grad_norm": 1.7142976522445679, "learning_rate": 8.907924906345659e-06, "loss": 0.3797, "step": 2688 }, { "epoch": 0.21512, "grad_norm": 1.7396079301834106, "learning_rate": 8.907140266455297e-06, "loss": 0.4081, "step": 2689 }, { "epoch": 0.2152, "grad_norm": 0.8471266627311707, "learning_rate": 8.906355379373243e-06, "loss": 0.2378, "step": 2690 }, { "epoch": 0.21528, "grad_norm": 1.6248892545700073, "learning_rate": 8.905570245149156e-06, "loss": 0.3695, "step": 2691 }, { "epoch": 0.21536, "grad_norm": 1.4380123615264893, "learning_rate": 8.904784863832708e-06, "loss": 0.3377, "step": 2692 }, { "epoch": 0.21544, "grad_norm": 1.8028794527053833, "learning_rate": 8.903999235473586e-06, "loss": 0.5013, "step": 2693 }, { "epoch": 0.21552, "grad_norm": 1.2009968757629395, "learning_rate": 8.903213360121496e-06, "loss": 0.2609, "step": 2694 }, { "epoch": 0.2156, "grad_norm": 1.5965559482574463, "learning_rate": 8.902427237826157e-06, "loss": 0.3731, "step": 2695 }, { "epoch": 0.21568, "grad_norm": 1.466230869293213, "learning_rate": 8.901640868637304e-06, "loss": 0.3941, "step": 2696 }, { "epoch": 0.21576, "grad_norm": 1.538789987564087, "learning_rate": 8.900854252604689e-06, "loss": 0.2974, "step": 2697 }, { "epoch": 0.21584, "grad_norm": 1.5418365001678467, "learning_rate": 8.900067389778075e-06, "loss": 0.38, "step": 2698 }, { "epoch": 0.21592, "grad_norm": 1.4428279399871826, "learning_rate": 8.89928028020725e-06, "loss": 0.3617, "step": 2699 }, { "epoch": 0.216, "grad_norm": 1.5055745840072632, "learning_rate": 8.898492923942007e-06, "loss": 0.2906, "step": 2700 }, { "epoch": 0.21608, "grad_norm": 1.5170435905456543, "learning_rate": 8.897705321032162e-06, "loss": 0.3001, "step": 2701 }, { "epoch": 0.21616, "grad_norm": 1.5084747076034546, "learning_rate": 8.896917471527542e-06, "loss": 0.3537, "step": 2702 }, { "epoch": 0.21624, "grad_norm": 1.590378761291504, "learning_rate": 8.896129375477993e-06, "loss": 0.2969, "step": 2703 }, { "epoch": 0.21632, "grad_norm": 1.5651493072509766, "learning_rate": 8.895341032933376e-06, "loss": 0.3104, "step": 2704 }, { "epoch": 0.2164, "grad_norm": 1.545362949371338, "learning_rate": 8.894552443943564e-06, "loss": 0.3274, "step": 2705 }, { "epoch": 0.21648, "grad_norm": 1.7417821884155273, "learning_rate": 8.893763608558453e-06, "loss": 0.4584, "step": 2706 }, { "epoch": 0.21656, "grad_norm": 1.2758499383926392, "learning_rate": 8.892974526827944e-06, "loss": 0.2997, "step": 2707 }, { "epoch": 0.21664, "grad_norm": 1.4422128200531006, "learning_rate": 8.892185198801963e-06, "loss": 0.3044, "step": 2708 }, { "epoch": 0.21672, "grad_norm": 1.5123414993286133, "learning_rate": 8.891395624530449e-06, "loss": 0.3265, "step": 2709 }, { "epoch": 0.2168, "grad_norm": 1.5700615644454956, "learning_rate": 8.890605804063353e-06, "loss": 0.3064, "step": 2710 }, { "epoch": 0.21688, "grad_norm": 1.971044659614563, "learning_rate": 8.889815737450648e-06, "loss": 0.3514, "step": 2711 }, { "epoch": 0.21696, "grad_norm": 1.5621892213821411, "learning_rate": 8.889025424742314e-06, "loss": 0.3242, "step": 2712 }, { "epoch": 0.21704, "grad_norm": 1.4305284023284912, "learning_rate": 8.888234865988356e-06, "loss": 0.3518, "step": 2713 }, { "epoch": 0.21712, "grad_norm": 1.436153769493103, "learning_rate": 8.887444061238787e-06, "loss": 0.3249, "step": 2714 }, { "epoch": 0.2172, "grad_norm": 1.4724273681640625, "learning_rate": 8.886653010543641e-06, "loss": 0.303, "step": 2715 }, { "epoch": 0.21728, "grad_norm": 2.188133716583252, "learning_rate": 8.885861713952964e-06, "loss": 0.4513, "step": 2716 }, { "epoch": 0.21736, "grad_norm": 1.3821355104446411, "learning_rate": 8.885070171516816e-06, "loss": 0.2599, "step": 2717 }, { "epoch": 0.21744, "grad_norm": 1.943583369255066, "learning_rate": 8.88427838328528e-06, "loss": 0.5347, "step": 2718 }, { "epoch": 0.21752, "grad_norm": 1.9702650308609009, "learning_rate": 8.883486349308446e-06, "loss": 0.387, "step": 2719 }, { "epoch": 0.2176, "grad_norm": 1.7490559816360474, "learning_rate": 8.882694069636426e-06, "loss": 0.3813, "step": 2720 }, { "epoch": 0.21768, "grad_norm": 1.3750137090682983, "learning_rate": 8.881901544319345e-06, "loss": 0.3268, "step": 2721 }, { "epoch": 0.21776, "grad_norm": 1.3257077932357788, "learning_rate": 8.881108773407338e-06, "loss": 0.2837, "step": 2722 }, { "epoch": 0.21784, "grad_norm": 1.7465240955352783, "learning_rate": 8.88031575695057e-06, "loss": 0.3157, "step": 2723 }, { "epoch": 0.21792, "grad_norm": 1.619113564491272, "learning_rate": 8.879522494999204e-06, "loss": 0.3192, "step": 2724 }, { "epoch": 0.218, "grad_norm": 1.9502321481704712, "learning_rate": 8.878728987603433e-06, "loss": 0.3442, "step": 2725 }, { "epoch": 0.21808, "grad_norm": 1.1939575672149658, "learning_rate": 8.877935234813455e-06, "loss": 0.2337, "step": 2726 }, { "epoch": 0.21816, "grad_norm": 1.9584698677062988, "learning_rate": 8.877141236679492e-06, "loss": 0.3656, "step": 2727 }, { "epoch": 0.21824, "grad_norm": 1.442341685295105, "learning_rate": 8.876346993251777e-06, "loss": 0.352, "step": 2728 }, { "epoch": 0.21832, "grad_norm": 1.8673537969589233, "learning_rate": 8.875552504580556e-06, "loss": 0.391, "step": 2729 }, { "epoch": 0.2184, "grad_norm": 1.3063445091247559, "learning_rate": 8.874757770716096e-06, "loss": 0.3345, "step": 2730 }, { "epoch": 0.21848, "grad_norm": 1.615033507347107, "learning_rate": 8.873962791708676e-06, "loss": 0.3744, "step": 2731 }, { "epoch": 0.21856, "grad_norm": 1.8220925331115723, "learning_rate": 8.873167567608594e-06, "loss": 0.3211, "step": 2732 }, { "epoch": 0.21864, "grad_norm": 1.4469846487045288, "learning_rate": 8.872372098466159e-06, "loss": 0.4197, "step": 2733 }, { "epoch": 0.21872, "grad_norm": 1.1662476062774658, "learning_rate": 8.871576384331699e-06, "loss": 0.263, "step": 2734 }, { "epoch": 0.2188, "grad_norm": 1.2721196413040161, "learning_rate": 8.870780425255554e-06, "loss": 0.2601, "step": 2735 }, { "epoch": 0.21888, "grad_norm": 1.9340859651565552, "learning_rate": 8.869984221288085e-06, "loss": 0.3647, "step": 2736 }, { "epoch": 0.21896, "grad_norm": 1.5645490884780884, "learning_rate": 8.869187772479661e-06, "loss": 0.3446, "step": 2737 }, { "epoch": 0.21904, "grad_norm": 1.3666660785675049, "learning_rate": 8.868391078880677e-06, "loss": 0.3243, "step": 2738 }, { "epoch": 0.21912, "grad_norm": 1.340316653251648, "learning_rate": 8.86759414054153e-06, "loss": 0.2668, "step": 2739 }, { "epoch": 0.2192, "grad_norm": 1.5360743999481201, "learning_rate": 8.866796957512642e-06, "loss": 0.3216, "step": 2740 }, { "epoch": 0.21928, "grad_norm": 1.6655339002609253, "learning_rate": 8.865999529844452e-06, "loss": 0.3079, "step": 2741 }, { "epoch": 0.21936, "grad_norm": 1.4910820722579956, "learning_rate": 8.865201857587405e-06, "loss": 0.2896, "step": 2742 }, { "epoch": 0.21944, "grad_norm": 1.905479907989502, "learning_rate": 8.864403940791969e-06, "loss": 0.4271, "step": 2743 }, { "epoch": 0.21952, "grad_norm": 1.5210912227630615, "learning_rate": 8.863605779508627e-06, "loss": 0.319, "step": 2744 }, { "epoch": 0.2196, "grad_norm": 1.7995033264160156, "learning_rate": 8.862807373787876e-06, "loss": 0.3828, "step": 2745 }, { "epoch": 0.21968, "grad_norm": 1.853249430656433, "learning_rate": 8.862008723680225e-06, "loss": 0.374, "step": 2746 }, { "epoch": 0.21976, "grad_norm": 1.831558108329773, "learning_rate": 8.861209829236206e-06, "loss": 0.4955, "step": 2747 }, { "epoch": 0.21984, "grad_norm": 1.945634365081787, "learning_rate": 8.860410690506361e-06, "loss": 0.3997, "step": 2748 }, { "epoch": 0.21992, "grad_norm": 1.5275148153305054, "learning_rate": 8.859611307541247e-06, "loss": 0.3866, "step": 2749 }, { "epoch": 0.22, "grad_norm": 1.7615596055984497, "learning_rate": 8.858811680391442e-06, "loss": 0.3599, "step": 2750 }, { "epoch": 0.22008, "grad_norm": 1.431030511856079, "learning_rate": 8.858011809107532e-06, "loss": 0.2883, "step": 2751 }, { "epoch": 0.22016, "grad_norm": 1.2879825830459595, "learning_rate": 8.857211693740125e-06, "loss": 0.317, "step": 2752 }, { "epoch": 0.22024, "grad_norm": 1.4592652320861816, "learning_rate": 8.856411334339841e-06, "loss": 0.2936, "step": 2753 }, { "epoch": 0.22032, "grad_norm": 1.5514662265777588, "learning_rate": 8.855610730957313e-06, "loss": 0.4303, "step": 2754 }, { "epoch": 0.2204, "grad_norm": 1.5528576374053955, "learning_rate": 8.854809883643197e-06, "loss": 0.3552, "step": 2755 }, { "epoch": 0.22048, "grad_norm": 1.8523775339126587, "learning_rate": 8.854008792448156e-06, "loss": 0.4036, "step": 2756 }, { "epoch": 0.22056, "grad_norm": 1.6846541166305542, "learning_rate": 8.853207457422877e-06, "loss": 0.3821, "step": 2757 }, { "epoch": 0.22064, "grad_norm": 1.7963849306106567, "learning_rate": 8.852405878618052e-06, "loss": 0.3123, "step": 2758 }, { "epoch": 0.22072, "grad_norm": 1.451002597808838, "learning_rate": 8.8516040560844e-06, "loss": 0.3076, "step": 2759 }, { "epoch": 0.2208, "grad_norm": 1.808860421180725, "learning_rate": 8.850801989872644e-06, "loss": 0.3846, "step": 2760 }, { "epoch": 0.22088, "grad_norm": 1.5843877792358398, "learning_rate": 8.849999680033535e-06, "loss": 0.286, "step": 2761 }, { "epoch": 0.22096, "grad_norm": 1.2339478731155396, "learning_rate": 8.849197126617824e-06, "loss": 0.3054, "step": 2762 }, { "epoch": 0.22104, "grad_norm": 1.9607781171798706, "learning_rate": 8.848394329676294e-06, "loss": 0.3739, "step": 2763 }, { "epoch": 0.22112, "grad_norm": 1.4922035932540894, "learning_rate": 8.847591289259729e-06, "loss": 0.3911, "step": 2764 }, { "epoch": 0.2212, "grad_norm": 1.7909282445907593, "learning_rate": 8.846788005418938e-06, "loss": 0.401, "step": 2765 }, { "epoch": 0.22128, "grad_norm": 1.6844979524612427, "learning_rate": 8.845984478204742e-06, "loss": 0.4258, "step": 2766 }, { "epoch": 0.22136, "grad_norm": 1.6638482809066772, "learning_rate": 8.845180707667975e-06, "loss": 0.3188, "step": 2767 }, { "epoch": 0.22144, "grad_norm": 1.4212157726287842, "learning_rate": 8.84437669385949e-06, "loss": 0.3071, "step": 2768 }, { "epoch": 0.22152, "grad_norm": 1.4176979064941406, "learning_rate": 8.843572436830157e-06, "loss": 0.3029, "step": 2769 }, { "epoch": 0.2216, "grad_norm": 1.8087478876113892, "learning_rate": 8.842767936630857e-06, "loss": 0.4011, "step": 2770 }, { "epoch": 0.22168, "grad_norm": 1.3983772993087769, "learning_rate": 8.841963193312487e-06, "loss": 0.347, "step": 2771 }, { "epoch": 0.22176, "grad_norm": 1.5463216304779053, "learning_rate": 8.841158206925959e-06, "loss": 0.3349, "step": 2772 }, { "epoch": 0.22184, "grad_norm": 1.3587409257888794, "learning_rate": 8.840352977522206e-06, "loss": 0.3083, "step": 2773 }, { "epoch": 0.22192, "grad_norm": 1.3786388635635376, "learning_rate": 8.83954750515217e-06, "loss": 0.3388, "step": 2774 }, { "epoch": 0.222, "grad_norm": 1.750858187675476, "learning_rate": 8.83874178986681e-06, "loss": 0.4101, "step": 2775 }, { "epoch": 0.22208, "grad_norm": 1.5796808004379272, "learning_rate": 8.837935831717102e-06, "loss": 0.3572, "step": 2776 }, { "epoch": 0.22216, "grad_norm": 1.397741675376892, "learning_rate": 8.837129630754034e-06, "loss": 0.3428, "step": 2777 }, { "epoch": 0.22224, "grad_norm": 1.4873077869415283, "learning_rate": 8.836323187028615e-06, "loss": 0.3411, "step": 2778 }, { "epoch": 0.22232, "grad_norm": 1.351516604423523, "learning_rate": 8.835516500591863e-06, "loss": 0.3341, "step": 2779 }, { "epoch": 0.2224, "grad_norm": 1.303882360458374, "learning_rate": 8.834709571494817e-06, "loss": 0.3025, "step": 2780 }, { "epoch": 0.22248, "grad_norm": 1.4679828882217407, "learning_rate": 8.833902399788527e-06, "loss": 0.3012, "step": 2781 }, { "epoch": 0.22256, "grad_norm": 1.4130665063858032, "learning_rate": 8.83309498552406e-06, "loss": 0.2994, "step": 2782 }, { "epoch": 0.22264, "grad_norm": 1.885979413986206, "learning_rate": 8.832287328752499e-06, "loss": 0.4008, "step": 2783 }, { "epoch": 0.22272, "grad_norm": 2.2439332008361816, "learning_rate": 8.83147942952494e-06, "loss": 0.5255, "step": 2784 }, { "epoch": 0.2228, "grad_norm": 1.7513844966888428, "learning_rate": 8.8306712878925e-06, "loss": 0.3906, "step": 2785 }, { "epoch": 0.22288, "grad_norm": 1.585735559463501, "learning_rate": 8.829862903906306e-06, "loss": 0.3534, "step": 2786 }, { "epoch": 0.22296, "grad_norm": 1.6071237325668335, "learning_rate": 8.829054277617499e-06, "loss": 0.3327, "step": 2787 }, { "epoch": 0.22304, "grad_norm": 1.4853163957595825, "learning_rate": 8.828245409077241e-06, "loss": 0.3353, "step": 2788 }, { "epoch": 0.22312, "grad_norm": 1.8716695308685303, "learning_rate": 8.827436298336703e-06, "loss": 0.4146, "step": 2789 }, { "epoch": 0.2232, "grad_norm": 1.6231932640075684, "learning_rate": 8.826626945447079e-06, "loss": 0.3451, "step": 2790 }, { "epoch": 0.22328, "grad_norm": 1.5309462547302246, "learning_rate": 8.825817350459571e-06, "loss": 0.3078, "step": 2791 }, { "epoch": 0.22336, "grad_norm": 1.340057134628296, "learning_rate": 8.825007513425401e-06, "loss": 0.2812, "step": 2792 }, { "epoch": 0.22344, "grad_norm": 1.6794257164001465, "learning_rate": 8.824197434395805e-06, "loss": 0.3899, "step": 2793 }, { "epoch": 0.22352, "grad_norm": 1.566836953163147, "learning_rate": 8.823387113422034e-06, "loss": 0.4213, "step": 2794 }, { "epoch": 0.2236, "grad_norm": 1.4850705862045288, "learning_rate": 8.82257655055535e-06, "loss": 0.3569, "step": 2795 }, { "epoch": 0.22368, "grad_norm": 1.5087475776672363, "learning_rate": 8.82176574584704e-06, "loss": 0.3534, "step": 2796 }, { "epoch": 0.22376, "grad_norm": 1.5350658893585205, "learning_rate": 8.820954699348399e-06, "loss": 0.3551, "step": 2797 }, { "epoch": 0.22384, "grad_norm": 1.1860790252685547, "learning_rate": 8.820143411110737e-06, "loss": 0.214, "step": 2798 }, { "epoch": 0.22392, "grad_norm": 2.052689552307129, "learning_rate": 8.819331881185387e-06, "loss": 0.5796, "step": 2799 }, { "epoch": 0.224, "grad_norm": 1.7889010906219482, "learning_rate": 8.818520109623687e-06, "loss": 0.441, "step": 2800 }, { "epoch": 0.22408, "grad_norm": 1.6740270853042603, "learning_rate": 8.817708096476996e-06, "loss": 0.3098, "step": 2801 }, { "epoch": 0.22416, "grad_norm": 1.4243390560150146, "learning_rate": 8.81689584179669e-06, "loss": 0.2933, "step": 2802 }, { "epoch": 0.22424, "grad_norm": 1.7533442974090576, "learning_rate": 8.816083345634153e-06, "loss": 0.3185, "step": 2803 }, { "epoch": 0.22432, "grad_norm": 1.8434312343597412, "learning_rate": 8.815270608040792e-06, "loss": 0.4669, "step": 2804 }, { "epoch": 0.2244, "grad_norm": 1.609260082244873, "learning_rate": 8.814457629068025e-06, "loss": 0.304, "step": 2805 }, { "epoch": 0.22448, "grad_norm": 1.258373737335205, "learning_rate": 8.813644408767287e-06, "loss": 0.2606, "step": 2806 }, { "epoch": 0.22456, "grad_norm": 1.3866522312164307, "learning_rate": 8.812830947190028e-06, "loss": 0.2768, "step": 2807 }, { "epoch": 0.22464, "grad_norm": 1.794108271598816, "learning_rate": 8.812017244387714e-06, "loss": 0.4018, "step": 2808 }, { "epoch": 0.22472, "grad_norm": 1.3862345218658447, "learning_rate": 8.811203300411823e-06, "loss": 0.3217, "step": 2809 }, { "epoch": 0.2248, "grad_norm": 2.0301730632781982, "learning_rate": 8.81038911531385e-06, "loss": 0.3777, "step": 2810 }, { "epoch": 0.22488, "grad_norm": 1.378750205039978, "learning_rate": 8.80957468914531e-06, "loss": 0.3752, "step": 2811 }, { "epoch": 0.22496, "grad_norm": 1.7173678874969482, "learning_rate": 8.808760021957725e-06, "loss": 0.3415, "step": 2812 }, { "epoch": 0.22504, "grad_norm": 1.7305114269256592, "learning_rate": 8.807945113802638e-06, "loss": 0.4127, "step": 2813 }, { "epoch": 0.22512, "grad_norm": 1.6595813035964966, "learning_rate": 8.807129964731604e-06, "loss": 0.3818, "step": 2814 }, { "epoch": 0.2252, "grad_norm": 1.9219270944595337, "learning_rate": 8.806314574796198e-06, "loss": 0.4096, "step": 2815 }, { "epoch": 0.22528, "grad_norm": 1.5241590738296509, "learning_rate": 8.805498944048003e-06, "loss": 0.3071, "step": 2816 }, { "epoch": 0.22536, "grad_norm": 1.5931185483932495, "learning_rate": 8.804683072538623e-06, "loss": 0.3009, "step": 2817 }, { "epoch": 0.22544, "grad_norm": 1.7457307577133179, "learning_rate": 8.803866960319676e-06, "loss": 0.2971, "step": 2818 }, { "epoch": 0.22552, "grad_norm": 1.769567847251892, "learning_rate": 8.803050607442794e-06, "loss": 0.4444, "step": 2819 }, { "epoch": 0.2256, "grad_norm": 2.1283857822418213, "learning_rate": 8.802234013959626e-06, "loss": 0.4656, "step": 2820 }, { "epoch": 0.22568, "grad_norm": 1.2949507236480713, "learning_rate": 8.801417179921834e-06, "loss": 0.2726, "step": 2821 }, { "epoch": 0.22576, "grad_norm": 1.8464694023132324, "learning_rate": 8.800600105381097e-06, "loss": 0.4021, "step": 2822 }, { "epoch": 0.22584, "grad_norm": 1.674856185913086, "learning_rate": 8.799782790389107e-06, "loss": 0.3484, "step": 2823 }, { "epoch": 0.22592, "grad_norm": 1.2917641401290894, "learning_rate": 8.798965234997574e-06, "loss": 0.2873, "step": 2824 }, { "epoch": 0.226, "grad_norm": 1.5360853672027588, "learning_rate": 8.798147439258222e-06, "loss": 0.3182, "step": 2825 }, { "epoch": 0.22608, "grad_norm": 1.5402098894119263, "learning_rate": 8.797329403222791e-06, "loss": 0.3192, "step": 2826 }, { "epoch": 0.22616, "grad_norm": 1.3484301567077637, "learning_rate": 8.796511126943032e-06, "loss": 0.2512, "step": 2827 }, { "epoch": 0.22624, "grad_norm": 1.4177839756011963, "learning_rate": 8.79569261047072e-06, "loss": 0.3263, "step": 2828 }, { "epoch": 0.22632, "grad_norm": 1.4625004529953003, "learning_rate": 8.794873853857638e-06, "loss": 0.3261, "step": 2829 }, { "epoch": 0.2264, "grad_norm": 1.3922655582427979, "learning_rate": 8.794054857155582e-06, "loss": 0.3243, "step": 2830 }, { "epoch": 0.22648, "grad_norm": 1.4068520069122314, "learning_rate": 8.793235620416372e-06, "loss": 0.2632, "step": 2831 }, { "epoch": 0.22656, "grad_norm": 1.9339982271194458, "learning_rate": 8.792416143691836e-06, "loss": 0.4937, "step": 2832 }, { "epoch": 0.22664, "grad_norm": 1.5683056116104126, "learning_rate": 8.791596427033818e-06, "loss": 0.3712, "step": 2833 }, { "epoch": 0.22672, "grad_norm": 1.811155915260315, "learning_rate": 8.790776470494183e-06, "loss": 0.3811, "step": 2834 }, { "epoch": 0.2268, "grad_norm": 1.855803370475769, "learning_rate": 8.789956274124805e-06, "loss": 0.4298, "step": 2835 }, { "epoch": 0.22688, "grad_norm": 2.0151405334472656, "learning_rate": 8.789135837977573e-06, "loss": 0.4112, "step": 2836 }, { "epoch": 0.22696, "grad_norm": 1.684455394744873, "learning_rate": 8.788315162104396e-06, "loss": 0.2819, "step": 2837 }, { "epoch": 0.22704, "grad_norm": 1.5724467039108276, "learning_rate": 8.787494246557195e-06, "loss": 0.3237, "step": 2838 }, { "epoch": 0.22712, "grad_norm": 1.4349114894866943, "learning_rate": 8.786673091387906e-06, "loss": 0.3576, "step": 2839 }, { "epoch": 0.2272, "grad_norm": 1.4084806442260742, "learning_rate": 8.78585169664848e-06, "loss": 0.2697, "step": 2840 }, { "epoch": 0.22728, "grad_norm": 1.615369200706482, "learning_rate": 8.785030062390885e-06, "loss": 0.3478, "step": 2841 }, { "epoch": 0.22736, "grad_norm": 1.2227230072021484, "learning_rate": 8.784208188667102e-06, "loss": 0.2652, "step": 2842 }, { "epoch": 0.22744, "grad_norm": 1.4531816244125366, "learning_rate": 8.78338607552913e-06, "loss": 0.3219, "step": 2843 }, { "epoch": 0.22752, "grad_norm": 1.391142725944519, "learning_rate": 8.782563723028979e-06, "loss": 0.3095, "step": 2844 }, { "epoch": 0.2276, "grad_norm": 1.6999260187149048, "learning_rate": 8.781741131218678e-06, "loss": 0.3403, "step": 2845 }, { "epoch": 0.22768, "grad_norm": 1.1833946704864502, "learning_rate": 8.78091830015027e-06, "loss": 0.2708, "step": 2846 }, { "epoch": 0.22776, "grad_norm": 1.4600259065628052, "learning_rate": 8.780095229875813e-06, "loss": 0.4186, "step": 2847 }, { "epoch": 0.22784, "grad_norm": 1.6836893558502197, "learning_rate": 8.779271920447378e-06, "loss": 0.4746, "step": 2848 }, { "epoch": 0.22792, "grad_norm": 1.6797387599945068, "learning_rate": 8.778448371917055e-06, "loss": 0.3224, "step": 2849 }, { "epoch": 0.228, "grad_norm": 1.6203384399414062, "learning_rate": 8.777624584336944e-06, "loss": 0.4012, "step": 2850 }, { "epoch": 0.22808, "grad_norm": 1.385426640510559, "learning_rate": 8.776800557759167e-06, "loss": 0.3187, "step": 2851 }, { "epoch": 0.22816, "grad_norm": 1.3712928295135498, "learning_rate": 8.775976292235857e-06, "loss": 0.3654, "step": 2852 }, { "epoch": 0.22824, "grad_norm": 1.6508076190948486, "learning_rate": 8.775151787819159e-06, "loss": 0.4184, "step": 2853 }, { "epoch": 0.22832, "grad_norm": 1.793189525604248, "learning_rate": 8.77432704456124e-06, "loss": 0.4786, "step": 2854 }, { "epoch": 0.2284, "grad_norm": 1.7186195850372314, "learning_rate": 8.77350206251428e-06, "loss": 0.3742, "step": 2855 }, { "epoch": 0.22848, "grad_norm": 1.5993332862854004, "learning_rate": 8.772676841730468e-06, "loss": 0.397, "step": 2856 }, { "epoch": 0.22856, "grad_norm": 1.4774996042251587, "learning_rate": 8.771851382262016e-06, "loss": 0.3349, "step": 2857 }, { "epoch": 0.22864, "grad_norm": 1.5428051948547363, "learning_rate": 8.771025684161147e-06, "loss": 0.3738, "step": 2858 }, { "epoch": 0.22872, "grad_norm": 1.4334566593170166, "learning_rate": 8.770199747480105e-06, "loss": 0.2782, "step": 2859 }, { "epoch": 0.2288, "grad_norm": 2.1138930320739746, "learning_rate": 8.769373572271137e-06, "loss": 0.4068, "step": 2860 }, { "epoch": 0.22888, "grad_norm": 1.580213189125061, "learning_rate": 8.768547158586514e-06, "loss": 0.304, "step": 2861 }, { "epoch": 0.22896, "grad_norm": 1.601637840270996, "learning_rate": 8.767720506478523e-06, "loss": 0.3489, "step": 2862 }, { "epoch": 0.22904, "grad_norm": 1.425022840499878, "learning_rate": 8.766893615999463e-06, "loss": 0.2925, "step": 2863 }, { "epoch": 0.22912, "grad_norm": 1.401037573814392, "learning_rate": 8.766066487201648e-06, "loss": 0.3418, "step": 2864 }, { "epoch": 0.2292, "grad_norm": 1.5454732179641724, "learning_rate": 8.765239120137407e-06, "loss": 0.3042, "step": 2865 }, { "epoch": 0.22928, "grad_norm": 1.3919222354888916, "learning_rate": 8.764411514859086e-06, "loss": 0.3628, "step": 2866 }, { "epoch": 0.22936, "grad_norm": 1.6965492963790894, "learning_rate": 8.763583671419045e-06, "loss": 0.3109, "step": 2867 }, { "epoch": 0.22944, "grad_norm": 1.665880799293518, "learning_rate": 8.762755589869655e-06, "loss": 0.3084, "step": 2868 }, { "epoch": 0.22952, "grad_norm": 1.277369737625122, "learning_rate": 8.761927270263313e-06, "loss": 0.2969, "step": 2869 }, { "epoch": 0.2296, "grad_norm": 1.5863888263702393, "learning_rate": 8.761098712652418e-06, "loss": 0.342, "step": 2870 }, { "epoch": 0.22968, "grad_norm": 1.9805275201797485, "learning_rate": 8.760269917089392e-06, "loss": 0.4074, "step": 2871 }, { "epoch": 0.22976, "grad_norm": 1.5686451196670532, "learning_rate": 8.75944088362667e-06, "loss": 0.3272, "step": 2872 }, { "epoch": 0.22984, "grad_norm": 1.4256458282470703, "learning_rate": 8.758611612316704e-06, "loss": 0.2976, "step": 2873 }, { "epoch": 0.22992, "grad_norm": 1.0142706632614136, "learning_rate": 8.757782103211958e-06, "loss": 0.2594, "step": 2874 }, { "epoch": 0.23, "grad_norm": 1.2783145904541016, "learning_rate": 8.756952356364909e-06, "loss": 0.3521, "step": 2875 }, { "epoch": 0.23008, "grad_norm": 1.5316811800003052, "learning_rate": 8.756122371828058e-06, "loss": 0.3306, "step": 2876 }, { "epoch": 0.23016, "grad_norm": 1.5814586877822876, "learning_rate": 8.75529214965391e-06, "loss": 0.2806, "step": 2877 }, { "epoch": 0.23024, "grad_norm": 1.5847948789596558, "learning_rate": 8.754461689894995e-06, "loss": 0.4181, "step": 2878 }, { "epoch": 0.23032, "grad_norm": 1.3571077585220337, "learning_rate": 8.75363099260385e-06, "loss": 0.3639, "step": 2879 }, { "epoch": 0.2304, "grad_norm": 1.354897379875183, "learning_rate": 8.752800057833033e-06, "loss": 0.3231, "step": 2880 }, { "epoch": 0.23048, "grad_norm": 2.0537118911743164, "learning_rate": 8.751968885635115e-06, "loss": 0.3662, "step": 2881 }, { "epoch": 0.23056, "grad_norm": 1.585959792137146, "learning_rate": 8.751137476062677e-06, "loss": 0.392, "step": 2882 }, { "epoch": 0.23064, "grad_norm": 2.2385966777801514, "learning_rate": 8.750305829168321e-06, "loss": 0.4388, "step": 2883 }, { "epoch": 0.23072, "grad_norm": 1.3772207498550415, "learning_rate": 8.749473945004665e-06, "loss": 0.3189, "step": 2884 }, { "epoch": 0.2308, "grad_norm": 1.298640251159668, "learning_rate": 8.74864182362434e-06, "loss": 0.2753, "step": 2885 }, { "epoch": 0.23088, "grad_norm": 1.3288183212280273, "learning_rate": 8.747809465079988e-06, "loss": 0.297, "step": 2886 }, { "epoch": 0.23096, "grad_norm": 1.2760984897613525, "learning_rate": 8.746976869424272e-06, "loss": 0.2857, "step": 2887 }, { "epoch": 0.23104, "grad_norm": 1.6551486253738403, "learning_rate": 8.746144036709867e-06, "loss": 0.3029, "step": 2888 }, { "epoch": 0.23112, "grad_norm": 1.5374372005462646, "learning_rate": 8.745310966989463e-06, "loss": 0.3214, "step": 2889 }, { "epoch": 0.2312, "grad_norm": 2.0101284980773926, "learning_rate": 8.744477660315767e-06, "loss": 0.3779, "step": 2890 }, { "epoch": 0.23128, "grad_norm": 1.6186890602111816, "learning_rate": 8.743644116741497e-06, "loss": 0.3475, "step": 2891 }, { "epoch": 0.23136, "grad_norm": 1.7061872482299805, "learning_rate": 8.742810336319391e-06, "loss": 0.3468, "step": 2892 }, { "epoch": 0.23144, "grad_norm": 1.7789872884750366, "learning_rate": 8.741976319102198e-06, "loss": 0.4333, "step": 2893 }, { "epoch": 0.23152, "grad_norm": 1.6100202798843384, "learning_rate": 8.741142065142683e-06, "loss": 0.39, "step": 2894 }, { "epoch": 0.2316, "grad_norm": 1.5869725942611694, "learning_rate": 8.74030757449363e-06, "loss": 0.3075, "step": 2895 }, { "epoch": 0.23168, "grad_norm": 1.3948324918746948, "learning_rate": 8.73947284720783e-06, "loss": 0.2776, "step": 2896 }, { "epoch": 0.23176, "grad_norm": 2.0415258407592773, "learning_rate": 8.738637883338097e-06, "loss": 0.3917, "step": 2897 }, { "epoch": 0.23184, "grad_norm": 1.994744062423706, "learning_rate": 8.737802682937253e-06, "loss": 0.407, "step": 2898 }, { "epoch": 0.23192, "grad_norm": 1.5355931520462036, "learning_rate": 8.736967246058139e-06, "loss": 0.3687, "step": 2899 }, { "epoch": 0.232, "grad_norm": 1.7060344219207764, "learning_rate": 8.736131572753614e-06, "loss": 0.326, "step": 2900 }, { "epoch": 0.23208, "grad_norm": 1.27048921585083, "learning_rate": 8.735295663076545e-06, "loss": 0.2716, "step": 2901 }, { "epoch": 0.23216, "grad_norm": 1.5977073907852173, "learning_rate": 8.734459517079815e-06, "loss": 0.3342, "step": 2902 }, { "epoch": 0.23224, "grad_norm": 2.4220876693725586, "learning_rate": 8.733623134816329e-06, "loss": 0.4348, "step": 2903 }, { "epoch": 0.23232, "grad_norm": 1.7467727661132812, "learning_rate": 8.732786516339e-06, "loss": 0.346, "step": 2904 }, { "epoch": 0.2324, "grad_norm": 1.4733282327651978, "learning_rate": 8.731949661700759e-06, "loss": 0.3384, "step": 2905 }, { "epoch": 0.23248, "grad_norm": 1.3760995864868164, "learning_rate": 8.731112570954547e-06, "loss": 0.3029, "step": 2906 }, { "epoch": 0.23256, "grad_norm": 1.9718873500823975, "learning_rate": 8.73027524415333e-06, "loss": 0.395, "step": 2907 }, { "epoch": 0.23264, "grad_norm": 1.4080829620361328, "learning_rate": 8.729437681350078e-06, "loss": 0.4309, "step": 2908 }, { "epoch": 0.23272, "grad_norm": 1.7443673610687256, "learning_rate": 8.728599882597784e-06, "loss": 0.373, "step": 2909 }, { "epoch": 0.2328, "grad_norm": 1.4104093313217163, "learning_rate": 8.72776184794945e-06, "loss": 0.2877, "step": 2910 }, { "epoch": 0.23288, "grad_norm": 1.8802305459976196, "learning_rate": 8.726923577458097e-06, "loss": 0.2943, "step": 2911 }, { "epoch": 0.23296, "grad_norm": 1.4886568784713745, "learning_rate": 8.726085071176761e-06, "loss": 0.3797, "step": 2912 }, { "epoch": 0.23304, "grad_norm": 1.259099006652832, "learning_rate": 8.72524632915849e-06, "loss": 0.3082, "step": 2913 }, { "epoch": 0.23312, "grad_norm": 1.6729494333267212, "learning_rate": 8.724407351456348e-06, "loss": 0.3901, "step": 2914 }, { "epoch": 0.2332, "grad_norm": 2.0305817127227783, "learning_rate": 8.723568138123414e-06, "loss": 0.6386, "step": 2915 }, { "epoch": 0.23328, "grad_norm": 1.870473027229309, "learning_rate": 8.722728689212785e-06, "loss": 0.4489, "step": 2916 }, { "epoch": 0.23336, "grad_norm": 1.5763081312179565, "learning_rate": 8.721889004777566e-06, "loss": 0.3733, "step": 2917 }, { "epoch": 0.23344, "grad_norm": 1.4396545886993408, "learning_rate": 8.721049084870883e-06, "loss": 0.3985, "step": 2918 }, { "epoch": 0.23352, "grad_norm": 1.4830892086029053, "learning_rate": 8.720208929545876e-06, "loss": 0.2998, "step": 2919 }, { "epoch": 0.2336, "grad_norm": 1.6304142475128174, "learning_rate": 8.719368538855699e-06, "loss": 0.4229, "step": 2920 }, { "epoch": 0.23368, "grad_norm": 1.3104946613311768, "learning_rate": 8.718527912853518e-06, "loss": 0.2697, "step": 2921 }, { "epoch": 0.23376, "grad_norm": 1.498120665550232, "learning_rate": 8.717687051592518e-06, "loss": 0.3604, "step": 2922 }, { "epoch": 0.23384, "grad_norm": 2.0636587142944336, "learning_rate": 8.716845955125899e-06, "loss": 0.3904, "step": 2923 }, { "epoch": 0.23392, "grad_norm": 1.651100516319275, "learning_rate": 8.716004623506872e-06, "loss": 0.3936, "step": 2924 }, { "epoch": 0.234, "grad_norm": 1.8705461025238037, "learning_rate": 8.715163056788666e-06, "loss": 0.4764, "step": 2925 }, { "epoch": 0.23408, "grad_norm": 1.6736024618148804, "learning_rate": 8.714321255024525e-06, "loss": 0.3558, "step": 2926 }, { "epoch": 0.23416, "grad_norm": 1.4450464248657227, "learning_rate": 8.713479218267707e-06, "loss": 0.3201, "step": 2927 }, { "epoch": 0.23424, "grad_norm": 1.6315438747406006, "learning_rate": 8.712636946571484e-06, "loss": 0.3774, "step": 2928 }, { "epoch": 0.23432, "grad_norm": 2.0019845962524414, "learning_rate": 8.711794439989142e-06, "loss": 0.3845, "step": 2929 }, { "epoch": 0.2344, "grad_norm": 1.9739534854888916, "learning_rate": 8.710951698573987e-06, "loss": 0.3859, "step": 2930 }, { "epoch": 0.23448, "grad_norm": 1.6644498109817505, "learning_rate": 8.710108722379335e-06, "loss": 0.3413, "step": 2931 }, { "epoch": 0.23456, "grad_norm": 1.7164329290390015, "learning_rate": 8.709265511458518e-06, "loss": 0.3603, "step": 2932 }, { "epoch": 0.23464, "grad_norm": 1.5060267448425293, "learning_rate": 8.708422065864884e-06, "loss": 0.3308, "step": 2933 }, { "epoch": 0.23472, "grad_norm": 1.650381326675415, "learning_rate": 8.707578385651795e-06, "loss": 0.3483, "step": 2934 }, { "epoch": 0.2348, "grad_norm": 1.5644901990890503, "learning_rate": 8.706734470872624e-06, "loss": 0.2845, "step": 2935 }, { "epoch": 0.23488, "grad_norm": 1.9320694208145142, "learning_rate": 8.705890321580768e-06, "loss": 0.3227, "step": 2936 }, { "epoch": 0.23496, "grad_norm": 1.542862892150879, "learning_rate": 8.70504593782963e-06, "loss": 0.2855, "step": 2937 }, { "epoch": 0.23504, "grad_norm": 1.6313329935073853, "learning_rate": 8.704201319672635e-06, "loss": 0.3229, "step": 2938 }, { "epoch": 0.23512, "grad_norm": 2.1460344791412354, "learning_rate": 8.703356467163214e-06, "loss": 0.5213, "step": 2939 }, { "epoch": 0.2352, "grad_norm": 1.938667893409729, "learning_rate": 8.702511380354822e-06, "loss": 0.4258, "step": 2940 }, { "epoch": 0.23528, "grad_norm": 1.58048415184021, "learning_rate": 8.701666059300924e-06, "loss": 0.3398, "step": 2941 }, { "epoch": 0.23536, "grad_norm": 1.4565937519073486, "learning_rate": 8.700820504055e-06, "loss": 0.3913, "step": 2942 }, { "epoch": 0.23544, "grad_norm": 1.457868218421936, "learning_rate": 8.699974714670544e-06, "loss": 0.3067, "step": 2943 }, { "epoch": 0.23552, "grad_norm": 1.6874083280563354, "learning_rate": 8.699128691201071e-06, "loss": 0.3866, "step": 2944 }, { "epoch": 0.2356, "grad_norm": 1.3682913780212402, "learning_rate": 8.698282433700102e-06, "loss": 0.2925, "step": 2945 }, { "epoch": 0.23568, "grad_norm": 1.2929877042770386, "learning_rate": 8.697435942221178e-06, "loss": 0.2813, "step": 2946 }, { "epoch": 0.23576, "grad_norm": 1.2776330709457397, "learning_rate": 8.696589216817852e-06, "loss": 0.2464, "step": 2947 }, { "epoch": 0.23584, "grad_norm": 1.6202374696731567, "learning_rate": 8.695742257543697e-06, "loss": 0.3886, "step": 2948 }, { "epoch": 0.23592, "grad_norm": 1.200810432434082, "learning_rate": 8.694895064452294e-06, "loss": 0.3024, "step": 2949 }, { "epoch": 0.236, "grad_norm": 1.6930382251739502, "learning_rate": 8.694047637597245e-06, "loss": 0.3178, "step": 2950 }, { "epoch": 0.23608, "grad_norm": 1.5066219568252563, "learning_rate": 8.693199977032161e-06, "loss": 0.322, "step": 2951 }, { "epoch": 0.23616, "grad_norm": 1.304224967956543, "learning_rate": 8.692352082810673e-06, "loss": 0.3451, "step": 2952 }, { "epoch": 0.23624, "grad_norm": 1.763218641281128, "learning_rate": 8.691503954986422e-06, "loss": 0.4435, "step": 2953 }, { "epoch": 0.23632, "grad_norm": 1.316672444343567, "learning_rate": 8.690655593613068e-06, "loss": 0.3426, "step": 2954 }, { "epoch": 0.2364, "grad_norm": 1.6360975503921509, "learning_rate": 8.689806998744284e-06, "loss": 0.3094, "step": 2955 }, { "epoch": 0.23648, "grad_norm": 1.7397687435150146, "learning_rate": 8.688958170433757e-06, "loss": 0.4201, "step": 2956 }, { "epoch": 0.23656, "grad_norm": 1.3750033378601074, "learning_rate": 8.68810910873519e-06, "loss": 0.2625, "step": 2957 }, { "epoch": 0.23664, "grad_norm": 1.3705966472625732, "learning_rate": 8.687259813702301e-06, "loss": 0.3107, "step": 2958 }, { "epoch": 0.23672, "grad_norm": 1.367472529411316, "learning_rate": 8.686410285388818e-06, "loss": 0.2897, "step": 2959 }, { "epoch": 0.2368, "grad_norm": 1.3883439302444458, "learning_rate": 8.685560523848494e-06, "loss": 0.3076, "step": 2960 }, { "epoch": 0.23688, "grad_norm": 1.2411656379699707, "learning_rate": 8.684710529135088e-06, "loss": 0.3676, "step": 2961 }, { "epoch": 0.23696, "grad_norm": 1.4780466556549072, "learning_rate": 8.683860301302373e-06, "loss": 0.2878, "step": 2962 }, { "epoch": 0.23704, "grad_norm": 1.2274631261825562, "learning_rate": 8.683009840404145e-06, "loss": 0.3174, "step": 2963 }, { "epoch": 0.23712, "grad_norm": 1.504817247390747, "learning_rate": 8.682159146494208e-06, "loss": 0.291, "step": 2964 }, { "epoch": 0.2372, "grad_norm": 1.6749637126922607, "learning_rate": 8.681308219626381e-06, "loss": 0.3828, "step": 2965 }, { "epoch": 0.23728, "grad_norm": 1.835526943206787, "learning_rate": 8.680457059854502e-06, "loss": 0.3636, "step": 2966 }, { "epoch": 0.23736, "grad_norm": 1.364960789680481, "learning_rate": 8.679605667232421e-06, "loss": 0.3038, "step": 2967 }, { "epoch": 0.23744, "grad_norm": 1.8709944486618042, "learning_rate": 8.678754041813996e-06, "loss": 0.4753, "step": 2968 }, { "epoch": 0.23752, "grad_norm": 1.5966829061508179, "learning_rate": 8.677902183653117e-06, "loss": 0.3359, "step": 2969 }, { "epoch": 0.2376, "grad_norm": 1.4219958782196045, "learning_rate": 8.677050092803671e-06, "loss": 0.2726, "step": 2970 }, { "epoch": 0.23768, "grad_norm": 1.6112638711929321, "learning_rate": 8.67619776931957e-06, "loss": 0.4072, "step": 2971 }, { "epoch": 0.23776, "grad_norm": 1.330270767211914, "learning_rate": 8.675345213254739e-06, "loss": 0.3817, "step": 2972 }, { "epoch": 0.23784, "grad_norm": 1.7107291221618652, "learning_rate": 8.67449242466311e-06, "loss": 0.3458, "step": 2973 }, { "epoch": 0.23792, "grad_norm": 1.2672098875045776, "learning_rate": 8.67363940359864e-06, "loss": 0.2455, "step": 2974 }, { "epoch": 0.238, "grad_norm": 1.589351773262024, "learning_rate": 8.6727861501153e-06, "loss": 0.3935, "step": 2975 }, { "epoch": 0.23808, "grad_norm": 1.4220237731933594, "learning_rate": 8.67193266426707e-06, "loss": 0.3488, "step": 2976 }, { "epoch": 0.23816, "grad_norm": 1.346812129020691, "learning_rate": 8.671078946107942e-06, "loss": 0.2996, "step": 2977 }, { "epoch": 0.23824, "grad_norm": 1.5532695055007935, "learning_rate": 8.670224995691937e-06, "loss": 0.2827, "step": 2978 }, { "epoch": 0.23832, "grad_norm": 1.6328967809677124, "learning_rate": 8.669370813073076e-06, "loss": 0.3164, "step": 2979 }, { "epoch": 0.2384, "grad_norm": 1.4983983039855957, "learning_rate": 8.6685163983054e-06, "loss": 0.3014, "step": 2980 }, { "epoch": 0.23848, "grad_norm": 1.8855558633804321, "learning_rate": 8.667661751442967e-06, "loss": 0.3764, "step": 2981 }, { "epoch": 0.23856, "grad_norm": 1.2733697891235352, "learning_rate": 8.666806872539848e-06, "loss": 0.2521, "step": 2982 }, { "epoch": 0.23864, "grad_norm": 1.7222651243209839, "learning_rate": 8.665951761650126e-06, "loss": 0.451, "step": 2983 }, { "epoch": 0.23872, "grad_norm": 1.8912527561187744, "learning_rate": 8.665096418827902e-06, "loss": 0.4282, "step": 2984 }, { "epoch": 0.2388, "grad_norm": 1.8017947673797607, "learning_rate": 8.664240844127294e-06, "loss": 0.414, "step": 2985 }, { "epoch": 0.23888, "grad_norm": 1.7017481327056885, "learning_rate": 8.663385037602425e-06, "loss": 0.3998, "step": 2986 }, { "epoch": 0.23896, "grad_norm": 1.7164137363433838, "learning_rate": 8.662528999307445e-06, "loss": 0.4701, "step": 2987 }, { "epoch": 0.23904, "grad_norm": 1.832329273223877, "learning_rate": 8.661672729296508e-06, "loss": 0.368, "step": 2988 }, { "epoch": 0.23912, "grad_norm": 1.7810895442962646, "learning_rate": 8.660816227623791e-06, "loss": 0.3759, "step": 2989 }, { "epoch": 0.2392, "grad_norm": 1.597602128982544, "learning_rate": 8.65995949434348e-06, "loss": 0.3215, "step": 2990 }, { "epoch": 0.23928, "grad_norm": 1.6633431911468506, "learning_rate": 8.659102529509777e-06, "loss": 0.3171, "step": 2991 }, { "epoch": 0.23936, "grad_norm": 1.16764497756958, "learning_rate": 8.6582453331769e-06, "loss": 0.2655, "step": 2992 }, { "epoch": 0.23944, "grad_norm": 1.6470662355422974, "learning_rate": 8.657387905399085e-06, "loss": 0.3766, "step": 2993 }, { "epoch": 0.23952, "grad_norm": 1.707756519317627, "learning_rate": 8.65653024623057e-06, "loss": 0.3319, "step": 2994 }, { "epoch": 0.2396, "grad_norm": 1.4462788105010986, "learning_rate": 8.655672355725624e-06, "loss": 0.3719, "step": 2995 }, { "epoch": 0.23968, "grad_norm": 1.4448779821395874, "learning_rate": 8.65481423393852e-06, "loss": 0.3045, "step": 2996 }, { "epoch": 0.23976, "grad_norm": 1.3186711072921753, "learning_rate": 8.653955880923548e-06, "loss": 0.2988, "step": 2997 }, { "epoch": 0.23984, "grad_norm": 1.8978686332702637, "learning_rate": 8.653097296735013e-06, "loss": 0.4307, "step": 2998 }, { "epoch": 0.23992, "grad_norm": 1.8591535091400146, "learning_rate": 8.652238481427236e-06, "loss": 0.4093, "step": 2999 }, { "epoch": 0.24, "grad_norm": 1.7549595832824707, "learning_rate": 8.65137943505455e-06, "loss": 0.3208, "step": 3000 }, { "epoch": 0.24008, "grad_norm": 1.3385145664215088, "learning_rate": 8.650520157671305e-06, "loss": 0.3473, "step": 3001 }, { "epoch": 0.24016, "grad_norm": 1.6653660535812378, "learning_rate": 8.649660649331866e-06, "loss": 0.4244, "step": 3002 }, { "epoch": 0.24024, "grad_norm": 1.7460511922836304, "learning_rate": 8.648800910090607e-06, "loss": 0.4825, "step": 3003 }, { "epoch": 0.24032, "grad_norm": 1.650795817375183, "learning_rate": 8.647940940001925e-06, "loss": 0.3525, "step": 3004 }, { "epoch": 0.2404, "grad_norm": 1.116976261138916, "learning_rate": 8.647080739120224e-06, "loss": 0.2398, "step": 3005 }, { "epoch": 0.24048, "grad_norm": 2.5539703369140625, "learning_rate": 8.64622030749993e-06, "loss": 0.5062, "step": 3006 }, { "epoch": 0.24056, "grad_norm": 1.4344453811645508, "learning_rate": 8.645359645195475e-06, "loss": 0.4, "step": 3007 }, { "epoch": 0.24064, "grad_norm": 1.8538168668746948, "learning_rate": 8.644498752261314e-06, "loss": 0.3884, "step": 3008 }, { "epoch": 0.24072, "grad_norm": 1.4812930822372437, "learning_rate": 8.643637628751912e-06, "loss": 0.379, "step": 3009 }, { "epoch": 0.2408, "grad_norm": 2.2699289321899414, "learning_rate": 8.642776274721747e-06, "loss": 0.4227, "step": 3010 }, { "epoch": 0.24088, "grad_norm": 1.7083232402801514, "learning_rate": 8.64191469022532e-06, "loss": 0.4064, "step": 3011 }, { "epoch": 0.24096, "grad_norm": 1.5670645236968994, "learning_rate": 8.641052875317134e-06, "loss": 0.4143, "step": 3012 }, { "epoch": 0.24104, "grad_norm": 1.5622729063034058, "learning_rate": 8.640190830051714e-06, "loss": 0.3417, "step": 3013 }, { "epoch": 0.24112, "grad_norm": 1.4791356325149536, "learning_rate": 8.639328554483602e-06, "loss": 0.3159, "step": 3014 }, { "epoch": 0.2412, "grad_norm": 1.7441151142120361, "learning_rate": 8.63846604866735e-06, "loss": 0.3908, "step": 3015 }, { "epoch": 0.24128, "grad_norm": 1.3369641304016113, "learning_rate": 8.637603312657523e-06, "loss": 0.3106, "step": 3016 }, { "epoch": 0.24136, "grad_norm": 1.8151953220367432, "learning_rate": 8.636740346508708e-06, "loss": 0.3418, "step": 3017 }, { "epoch": 0.24144, "grad_norm": 1.3240052461624146, "learning_rate": 8.635877150275498e-06, "loss": 0.2562, "step": 3018 }, { "epoch": 0.24152, "grad_norm": 1.6432435512542725, "learning_rate": 8.63501372401251e-06, "loss": 0.4784, "step": 3019 }, { "epoch": 0.2416, "grad_norm": 1.8531707525253296, "learning_rate": 8.634150067774363e-06, "loss": 0.4161, "step": 3020 }, { "epoch": 0.24168, "grad_norm": 1.6908509731292725, "learning_rate": 8.633286181615701e-06, "loss": 0.3549, "step": 3021 }, { "epoch": 0.24176, "grad_norm": 1.5330978631973267, "learning_rate": 8.632422065591181e-06, "loss": 0.3241, "step": 3022 }, { "epoch": 0.24184, "grad_norm": 1.633192777633667, "learning_rate": 8.63155771975547e-06, "loss": 0.3212, "step": 3023 }, { "epoch": 0.24192, "grad_norm": 1.6121563911437988, "learning_rate": 8.630693144163255e-06, "loss": 0.3842, "step": 3024 }, { "epoch": 0.242, "grad_norm": 1.674614667892456, "learning_rate": 8.629828338869232e-06, "loss": 0.3152, "step": 3025 }, { "epoch": 0.24208, "grad_norm": 1.8287391662597656, "learning_rate": 8.628963303928115e-06, "loss": 0.5112, "step": 3026 }, { "epoch": 0.24216, "grad_norm": 1.853173017501831, "learning_rate": 8.628098039394632e-06, "loss": 0.3848, "step": 3027 }, { "epoch": 0.24224, "grad_norm": 1.7024120092391968, "learning_rate": 8.627232545323527e-06, "loss": 0.3737, "step": 3028 }, { "epoch": 0.24232, "grad_norm": 1.4395607709884644, "learning_rate": 8.626366821769556e-06, "loss": 0.3088, "step": 3029 }, { "epoch": 0.2424, "grad_norm": 1.6949063539505005, "learning_rate": 8.625500868787488e-06, "loss": 0.4567, "step": 3030 }, { "epoch": 0.24248, "grad_norm": 2.09765887260437, "learning_rate": 8.624634686432112e-06, "loss": 0.2887, "step": 3031 }, { "epoch": 0.24256, "grad_norm": 1.5703338384628296, "learning_rate": 8.623768274758228e-06, "loss": 0.3327, "step": 3032 }, { "epoch": 0.24264, "grad_norm": 1.6307145357131958, "learning_rate": 8.62290163382065e-06, "loss": 0.3305, "step": 3033 }, { "epoch": 0.24272, "grad_norm": 1.5243542194366455, "learning_rate": 8.622034763674207e-06, "loss": 0.3326, "step": 3034 }, { "epoch": 0.2428, "grad_norm": 1.9055639505386353, "learning_rate": 8.621167664373746e-06, "loss": 0.4019, "step": 3035 }, { "epoch": 0.24288, "grad_norm": 1.4484702348709106, "learning_rate": 8.620300335974122e-06, "loss": 0.2865, "step": 3036 }, { "epoch": 0.24296, "grad_norm": 1.721147894859314, "learning_rate": 8.61943277853021e-06, "loss": 0.4413, "step": 3037 }, { "epoch": 0.24304, "grad_norm": 1.7040313482284546, "learning_rate": 8.618564992096896e-06, "loss": 0.2903, "step": 3038 }, { "epoch": 0.24312, "grad_norm": 1.6943026781082153, "learning_rate": 8.617696976729082e-06, "loss": 0.3307, "step": 3039 }, { "epoch": 0.2432, "grad_norm": 1.560989260673523, "learning_rate": 8.616828732481687e-06, "loss": 0.4324, "step": 3040 }, { "epoch": 0.24328, "grad_norm": 1.2807221412658691, "learning_rate": 8.61596025940964e-06, "loss": 0.3329, "step": 3041 }, { "epoch": 0.24336, "grad_norm": 1.504556655883789, "learning_rate": 8.615091557567884e-06, "loss": 0.303, "step": 3042 }, { "epoch": 0.24344, "grad_norm": 1.388826847076416, "learning_rate": 8.614222627011382e-06, "loss": 0.3595, "step": 3043 }, { "epoch": 0.24352, "grad_norm": 1.3632359504699707, "learning_rate": 8.613353467795108e-06, "loss": 0.2863, "step": 3044 }, { "epoch": 0.2436, "grad_norm": 1.6326030492782593, "learning_rate": 8.612484079974053e-06, "loss": 0.3433, "step": 3045 }, { "epoch": 0.24368, "grad_norm": 1.4086010456085205, "learning_rate": 8.611614463603215e-06, "loss": 0.4176, "step": 3046 }, { "epoch": 0.24376, "grad_norm": 1.719990611076355, "learning_rate": 8.610744618737614e-06, "loss": 0.4066, "step": 3047 }, { "epoch": 0.24384, "grad_norm": 1.6791647672653198, "learning_rate": 8.609874545432285e-06, "loss": 0.3398, "step": 3048 }, { "epoch": 0.24392, "grad_norm": 1.7768301963806152, "learning_rate": 8.60900424374227e-06, "loss": 0.4071, "step": 3049 }, { "epoch": 0.244, "grad_norm": 1.4605388641357422, "learning_rate": 8.608133713722631e-06, "loss": 0.3426, "step": 3050 }, { "epoch": 0.24408, "grad_norm": 1.4044535160064697, "learning_rate": 8.607262955428449e-06, "loss": 0.3431, "step": 3051 }, { "epoch": 0.24416, "grad_norm": 1.5170437097549438, "learning_rate": 8.606391968914807e-06, "loss": 0.3865, "step": 3052 }, { "epoch": 0.24424, "grad_norm": 1.5312057733535767, "learning_rate": 8.60552075423681e-06, "loss": 0.391, "step": 3053 }, { "epoch": 0.24432, "grad_norm": 2.062711715698242, "learning_rate": 8.604649311449583e-06, "loss": 0.5652, "step": 3054 }, { "epoch": 0.2444, "grad_norm": 1.4739586114883423, "learning_rate": 8.603777640608254e-06, "loss": 0.3014, "step": 3055 }, { "epoch": 0.24448, "grad_norm": 1.3225152492523193, "learning_rate": 8.60290574176797e-06, "loss": 0.311, "step": 3056 }, { "epoch": 0.24456, "grad_norm": 1.7784897089004517, "learning_rate": 8.602033614983898e-06, "loss": 0.3726, "step": 3057 }, { "epoch": 0.24464, "grad_norm": 1.7232739925384521, "learning_rate": 8.60116126031121e-06, "loss": 0.3162, "step": 3058 }, { "epoch": 0.24472, "grad_norm": 1.1945064067840576, "learning_rate": 8.6002886778051e-06, "loss": 0.2356, "step": 3059 }, { "epoch": 0.2448, "grad_norm": 1.5598511695861816, "learning_rate": 8.59941586752077e-06, "loss": 0.3743, "step": 3060 }, { "epoch": 0.24488, "grad_norm": 1.2169926166534424, "learning_rate": 8.598542829513444e-06, "loss": 0.2614, "step": 3061 }, { "epoch": 0.24496, "grad_norm": 1.6568950414657593, "learning_rate": 8.597669563838353e-06, "loss": 0.3015, "step": 3062 }, { "epoch": 0.24504, "grad_norm": 1.6429921388626099, "learning_rate": 8.596796070550746e-06, "loss": 0.3532, "step": 3063 }, { "epoch": 0.24512, "grad_norm": 1.6444114446640015, "learning_rate": 8.595922349705886e-06, "loss": 0.4508, "step": 3064 }, { "epoch": 0.2452, "grad_norm": 1.5652337074279785, "learning_rate": 8.595048401359052e-06, "loss": 0.2787, "step": 3065 }, { "epoch": 0.24528, "grad_norm": 2.0153632164001465, "learning_rate": 8.594174225565535e-06, "loss": 0.3774, "step": 3066 }, { "epoch": 0.24536, "grad_norm": 1.5180140733718872, "learning_rate": 8.59329982238064e-06, "loss": 0.3183, "step": 3067 }, { "epoch": 0.24544, "grad_norm": 1.8348703384399414, "learning_rate": 8.592425191859687e-06, "loss": 0.4139, "step": 3068 }, { "epoch": 0.24552, "grad_norm": 1.6924543380737305, "learning_rate": 8.591550334058015e-06, "loss": 0.3641, "step": 3069 }, { "epoch": 0.2456, "grad_norm": 1.5413908958435059, "learning_rate": 8.59067524903097e-06, "loss": 0.3124, "step": 3070 }, { "epoch": 0.24568, "grad_norm": 1.7611223459243774, "learning_rate": 8.589799936833916e-06, "loss": 0.397, "step": 3071 }, { "epoch": 0.24576, "grad_norm": 1.5739551782608032, "learning_rate": 8.588924397522231e-06, "loss": 0.2906, "step": 3072 }, { "epoch": 0.24584, "grad_norm": 1.6537935733795166, "learning_rate": 8.58804863115131e-06, "loss": 0.3932, "step": 3073 }, { "epoch": 0.24592, "grad_norm": 1.7853214740753174, "learning_rate": 8.587172637776558e-06, "loss": 0.3289, "step": 3074 }, { "epoch": 0.246, "grad_norm": 1.2469711303710938, "learning_rate": 8.586296417453393e-06, "loss": 0.3285, "step": 3075 }, { "epoch": 0.24608, "grad_norm": 1.7327982187271118, "learning_rate": 8.585419970237255e-06, "loss": 0.5739, "step": 3076 }, { "epoch": 0.24616, "grad_norm": 1.298710823059082, "learning_rate": 8.584543296183591e-06, "loss": 0.2513, "step": 3077 }, { "epoch": 0.24624, "grad_norm": 1.2274887561798096, "learning_rate": 8.583666395347869e-06, "loss": 0.2949, "step": 3078 }, { "epoch": 0.24632, "grad_norm": 1.9516730308532715, "learning_rate": 8.582789267785563e-06, "loss": 0.4306, "step": 3079 }, { "epoch": 0.2464, "grad_norm": 1.424416422843933, "learning_rate": 8.58191191355217e-06, "loss": 0.3194, "step": 3080 }, { "epoch": 0.24648, "grad_norm": 1.8527755737304688, "learning_rate": 8.581034332703194e-06, "loss": 0.3808, "step": 3081 }, { "epoch": 0.24656, "grad_norm": 1.4947212934494019, "learning_rate": 8.58015652529416e-06, "loss": 0.2791, "step": 3082 }, { "epoch": 0.24664, "grad_norm": 1.3371108770370483, "learning_rate": 8.579278491380598e-06, "loss": 0.2267, "step": 3083 }, { "epoch": 0.24672, "grad_norm": 1.5755013227462769, "learning_rate": 8.578400231018064e-06, "loss": 0.3123, "step": 3084 }, { "epoch": 0.2468, "grad_norm": 1.653031349182129, "learning_rate": 8.577521744262123e-06, "loss": 0.4084, "step": 3085 }, { "epoch": 0.24688, "grad_norm": 1.827981948852539, "learning_rate": 8.576643031168349e-06, "loss": 0.3714, "step": 3086 }, { "epoch": 0.24696, "grad_norm": 1.6476799249649048, "learning_rate": 8.575764091792339e-06, "loss": 0.2948, "step": 3087 }, { "epoch": 0.24704, "grad_norm": 1.5807143449783325, "learning_rate": 8.574884926189699e-06, "loss": 0.4274, "step": 3088 }, { "epoch": 0.24712, "grad_norm": 1.801981806755066, "learning_rate": 8.57400553441605e-06, "loss": 0.4747, "step": 3089 }, { "epoch": 0.2472, "grad_norm": 1.1952424049377441, "learning_rate": 8.573125916527031e-06, "loss": 0.2432, "step": 3090 }, { "epoch": 0.24728, "grad_norm": 1.6386176347732544, "learning_rate": 8.572246072578292e-06, "loss": 0.3894, "step": 3091 }, { "epoch": 0.24736, "grad_norm": 1.36851966381073, "learning_rate": 8.571366002625495e-06, "loss": 0.2686, "step": 3092 }, { "epoch": 0.24744, "grad_norm": 1.3977208137512207, "learning_rate": 8.570485706724322e-06, "loss": 0.3077, "step": 3093 }, { "epoch": 0.24752, "grad_norm": 1.9915634393692017, "learning_rate": 8.569605184930466e-06, "loss": 0.3775, "step": 3094 }, { "epoch": 0.2476, "grad_norm": 1.7627999782562256, "learning_rate": 8.568724437299631e-06, "loss": 0.3882, "step": 3095 }, { "epoch": 0.24768, "grad_norm": 1.5895828008651733, "learning_rate": 8.567843463887543e-06, "loss": 0.2905, "step": 3096 }, { "epoch": 0.24776, "grad_norm": 1.7350879907608032, "learning_rate": 8.566962264749938e-06, "loss": 0.3652, "step": 3097 }, { "epoch": 0.24784, "grad_norm": 1.7022144794464111, "learning_rate": 8.566080839942566e-06, "loss": 0.3455, "step": 3098 }, { "epoch": 0.24792, "grad_norm": 1.8170467615127563, "learning_rate": 8.565199189521189e-06, "loss": 0.3867, "step": 3099 }, { "epoch": 0.248, "grad_norm": 1.3393418788909912, "learning_rate": 8.56431731354159e-06, "loss": 0.2957, "step": 3100 }, { "epoch": 0.24808, "grad_norm": 1.4289052486419678, "learning_rate": 8.563435212059561e-06, "loss": 0.3263, "step": 3101 }, { "epoch": 0.24816, "grad_norm": 1.3951023817062378, "learning_rate": 8.56255288513091e-06, "loss": 0.2906, "step": 3102 }, { "epoch": 0.24824, "grad_norm": 1.709173560142517, "learning_rate": 8.561670332811458e-06, "loss": 0.2987, "step": 3103 }, { "epoch": 0.24832, "grad_norm": 2.1143503189086914, "learning_rate": 8.56078755515704e-06, "loss": 0.4867, "step": 3104 }, { "epoch": 0.2484, "grad_norm": 1.202492356300354, "learning_rate": 8.55990455222351e-06, "loss": 0.2769, "step": 3105 }, { "epoch": 0.24848, "grad_norm": 1.666141390800476, "learning_rate": 8.559021324066728e-06, "loss": 0.3279, "step": 3106 }, { "epoch": 0.24856, "grad_norm": 1.3046966791152954, "learning_rate": 8.558137870742578e-06, "loss": 0.3452, "step": 3107 }, { "epoch": 0.24864, "grad_norm": 1.476283073425293, "learning_rate": 8.557254192306948e-06, "loss": 0.3225, "step": 3108 }, { "epoch": 0.24872, "grad_norm": 1.7318915128707886, "learning_rate": 8.55637028881575e-06, "loss": 0.3913, "step": 3109 }, { "epoch": 0.2488, "grad_norm": 1.5203393697738647, "learning_rate": 8.555486160324902e-06, "loss": 0.3077, "step": 3110 }, { "epoch": 0.24888, "grad_norm": 1.610252857208252, "learning_rate": 8.554601806890342e-06, "loss": 0.3474, "step": 3111 }, { "epoch": 0.24896, "grad_norm": 1.8684039115905762, "learning_rate": 8.553717228568018e-06, "loss": 0.361, "step": 3112 }, { "epoch": 0.24904, "grad_norm": 1.8766143321990967, "learning_rate": 8.552832425413897e-06, "loss": 0.3453, "step": 3113 }, { "epoch": 0.24912, "grad_norm": 1.7946739196777344, "learning_rate": 8.551947397483957e-06, "loss": 0.4859, "step": 3114 }, { "epoch": 0.2492, "grad_norm": 1.4358093738555908, "learning_rate": 8.551062144834189e-06, "loss": 0.3495, "step": 3115 }, { "epoch": 0.24928, "grad_norm": 1.4946582317352295, "learning_rate": 8.5501766675206e-06, "loss": 0.361, "step": 3116 }, { "epoch": 0.24936, "grad_norm": 1.362602710723877, "learning_rate": 8.549290965599214e-06, "loss": 0.3362, "step": 3117 }, { "epoch": 0.24944, "grad_norm": 1.9473742246627808, "learning_rate": 8.548405039126064e-06, "loss": 0.4263, "step": 3118 }, { "epoch": 0.24952, "grad_norm": 1.4761608839035034, "learning_rate": 8.5475188881572e-06, "loss": 0.3398, "step": 3119 }, { "epoch": 0.2496, "grad_norm": 1.3862004280090332, "learning_rate": 8.546632512748685e-06, "loss": 0.361, "step": 3120 }, { "epoch": 0.24968, "grad_norm": 1.507232904434204, "learning_rate": 8.5457459129566e-06, "loss": 0.2987, "step": 3121 }, { "epoch": 0.24976, "grad_norm": 1.8091908693313599, "learning_rate": 8.544859088837034e-06, "loss": 0.3306, "step": 3122 }, { "epoch": 0.24984, "grad_norm": 1.4990782737731934, "learning_rate": 8.543972040446094e-06, "loss": 0.2953, "step": 3123 }, { "epoch": 0.24992, "grad_norm": 1.6368714570999146, "learning_rate": 8.543084767839903e-06, "loss": 0.2681, "step": 3124 }, { "epoch": 0.25, "grad_norm": 1.792515516281128, "learning_rate": 8.542197271074593e-06, "loss": 0.3714, "step": 3125 }, { "epoch": 0.25008, "grad_norm": 1.680100679397583, "learning_rate": 8.541309550206314e-06, "loss": 0.4091, "step": 3126 }, { "epoch": 0.25016, "grad_norm": 1.2095801830291748, "learning_rate": 8.540421605291228e-06, "loss": 0.2428, "step": 3127 }, { "epoch": 0.25024, "grad_norm": 1.3905729055404663, "learning_rate": 8.539533436385515e-06, "loss": 0.2728, "step": 3128 }, { "epoch": 0.25032, "grad_norm": 1.7047653198242188, "learning_rate": 8.538645043545364e-06, "loss": 0.3631, "step": 3129 }, { "epoch": 0.2504, "grad_norm": 1.328326940536499, "learning_rate": 8.537756426826981e-06, "loss": 0.2764, "step": 3130 }, { "epoch": 0.25048, "grad_norm": 1.6265760660171509, "learning_rate": 8.536867586286586e-06, "loss": 0.3045, "step": 3131 }, { "epoch": 0.25056, "grad_norm": 1.6370584964752197, "learning_rate": 8.535978521980414e-06, "loss": 0.3782, "step": 3132 }, { "epoch": 0.25064, "grad_norm": 1.4649031162261963, "learning_rate": 8.535089233964712e-06, "loss": 0.3088, "step": 3133 }, { "epoch": 0.25072, "grad_norm": 1.2999662160873413, "learning_rate": 8.534199722295744e-06, "loss": 0.2541, "step": 3134 }, { "epoch": 0.2508, "grad_norm": 1.5978385210037231, "learning_rate": 8.533309987029783e-06, "loss": 0.3905, "step": 3135 }, { "epoch": 0.25088, "grad_norm": 1.4170856475830078, "learning_rate": 8.532420028223122e-06, "loss": 0.3053, "step": 3136 }, { "epoch": 0.25096, "grad_norm": 1.6570608615875244, "learning_rate": 8.531529845932066e-06, "loss": 0.3299, "step": 3137 }, { "epoch": 0.25104, "grad_norm": 1.5469874143600464, "learning_rate": 8.530639440212934e-06, "loss": 0.3261, "step": 3138 }, { "epoch": 0.25112, "grad_norm": 1.5680407285690308, "learning_rate": 8.529748811122056e-06, "loss": 0.3176, "step": 3139 }, { "epoch": 0.2512, "grad_norm": 1.4539591073989868, "learning_rate": 8.528857958715783e-06, "loss": 0.3613, "step": 3140 }, { "epoch": 0.25128, "grad_norm": 1.8934903144836426, "learning_rate": 8.527966883050474e-06, "loss": 0.445, "step": 3141 }, { "epoch": 0.25136, "grad_norm": 1.4876171350479126, "learning_rate": 8.527075584182505e-06, "loss": 0.2891, "step": 3142 }, { "epoch": 0.25144, "grad_norm": 1.2558088302612305, "learning_rate": 8.526184062168264e-06, "loss": 0.3297, "step": 3143 }, { "epoch": 0.25152, "grad_norm": 1.8519783020019531, "learning_rate": 8.525292317064157e-06, "loss": 0.3826, "step": 3144 }, { "epoch": 0.2516, "grad_norm": 1.5815104246139526, "learning_rate": 8.524400348926602e-06, "loss": 0.3588, "step": 3145 }, { "epoch": 0.25168, "grad_norm": 1.5304956436157227, "learning_rate": 8.523508157812029e-06, "loss": 0.3735, "step": 3146 }, { "epoch": 0.25176, "grad_norm": 1.5469937324523926, "learning_rate": 8.522615743776885e-06, "loss": 0.3096, "step": 3147 }, { "epoch": 0.25184, "grad_norm": 1.3774428367614746, "learning_rate": 8.52172310687763e-06, "loss": 0.248, "step": 3148 }, { "epoch": 0.25192, "grad_norm": 1.1401811838150024, "learning_rate": 8.520830247170735e-06, "loss": 0.2834, "step": 3149 }, { "epoch": 0.252, "grad_norm": 1.574325680732727, "learning_rate": 8.519937164712691e-06, "loss": 0.3037, "step": 3150 }, { "epoch": 0.25208, "grad_norm": 1.6470081806182861, "learning_rate": 8.519043859560004e-06, "loss": 0.3799, "step": 3151 }, { "epoch": 0.25216, "grad_norm": 1.6649773120880127, "learning_rate": 8.518150331769184e-06, "loss": 0.3009, "step": 3152 }, { "epoch": 0.25224, "grad_norm": 1.4246584177017212, "learning_rate": 8.517256581396764e-06, "loss": 0.3222, "step": 3153 }, { "epoch": 0.25232, "grad_norm": 1.5069494247436523, "learning_rate": 8.51636260849929e-06, "loss": 0.3699, "step": 3154 }, { "epoch": 0.2524, "grad_norm": 1.4292433261871338, "learning_rate": 8.51546841313332e-06, "loss": 0.4126, "step": 3155 }, { "epoch": 0.25248, "grad_norm": 1.3793959617614746, "learning_rate": 8.514573995355426e-06, "loss": 0.2454, "step": 3156 }, { "epoch": 0.25256, "grad_norm": 1.3602819442749023, "learning_rate": 8.513679355222195e-06, "loss": 0.3414, "step": 3157 }, { "epoch": 0.25264, "grad_norm": 1.6190898418426514, "learning_rate": 8.512784492790227e-06, "loss": 0.3326, "step": 3158 }, { "epoch": 0.25272, "grad_norm": 2.0170743465423584, "learning_rate": 8.511889408116138e-06, "loss": 0.3803, "step": 3159 }, { "epoch": 0.2528, "grad_norm": 1.572218418121338, "learning_rate": 8.510994101256557e-06, "loss": 0.3277, "step": 3160 }, { "epoch": 0.25288, "grad_norm": 1.3365064859390259, "learning_rate": 8.510098572268129e-06, "loss": 0.2734, "step": 3161 }, { "epoch": 0.25296, "grad_norm": 1.7582436800003052, "learning_rate": 8.509202821207508e-06, "loss": 0.4216, "step": 3162 }, { "epoch": 0.25304, "grad_norm": 1.665613055229187, "learning_rate": 8.508306848131367e-06, "loss": 0.3075, "step": 3163 }, { "epoch": 0.25312, "grad_norm": 1.382347822189331, "learning_rate": 8.50741065309639e-06, "loss": 0.3831, "step": 3164 }, { "epoch": 0.2532, "grad_norm": 1.4488331079483032, "learning_rate": 8.506514236159276e-06, "loss": 0.353, "step": 3165 }, { "epoch": 0.25328, "grad_norm": 1.5848164558410645, "learning_rate": 8.505617597376739e-06, "loss": 0.4441, "step": 3166 }, { "epoch": 0.25336, "grad_norm": 1.5356500148773193, "learning_rate": 8.504720736805507e-06, "loss": 0.3197, "step": 3167 }, { "epoch": 0.25344, "grad_norm": 1.2928869724273682, "learning_rate": 8.50382365450232e-06, "loss": 0.2967, "step": 3168 }, { "epoch": 0.25352, "grad_norm": 1.418664813041687, "learning_rate": 8.502926350523937e-06, "loss": 0.2878, "step": 3169 }, { "epoch": 0.2536, "grad_norm": 2.2894961833953857, "learning_rate": 8.502028824927123e-06, "loss": 0.5619, "step": 3170 }, { "epoch": 0.25368, "grad_norm": 1.8666496276855469, "learning_rate": 8.501131077768664e-06, "loss": 0.3883, "step": 3171 }, { "epoch": 0.25376, "grad_norm": 1.9164226055145264, "learning_rate": 8.500233109105354e-06, "loss": 0.3319, "step": 3172 }, { "epoch": 0.25384, "grad_norm": 1.4399014711380005, "learning_rate": 8.499334918994008e-06, "loss": 0.3641, "step": 3173 }, { "epoch": 0.25392, "grad_norm": 1.7369312047958374, "learning_rate": 8.498436507491452e-06, "loss": 0.3709, "step": 3174 }, { "epoch": 0.254, "grad_norm": 1.5380709171295166, "learning_rate": 8.497537874654523e-06, "loss": 0.3893, "step": 3175 }, { "epoch": 0.25408, "grad_norm": 1.6948041915893555, "learning_rate": 8.496639020540074e-06, "loss": 0.3261, "step": 3176 }, { "epoch": 0.25416, "grad_norm": 1.0962127447128296, "learning_rate": 8.495739945204975e-06, "loss": 0.2629, "step": 3177 }, { "epoch": 0.25424, "grad_norm": 1.5826691389083862, "learning_rate": 8.494840648706107e-06, "loss": 0.4263, "step": 3178 }, { "epoch": 0.25432, "grad_norm": 1.7018965482711792, "learning_rate": 8.493941131100365e-06, "loss": 0.471, "step": 3179 }, { "epoch": 0.2544, "grad_norm": 2.042285680770874, "learning_rate": 8.49304139244466e-06, "loss": 0.4853, "step": 3180 }, { "epoch": 0.25448, "grad_norm": 1.3213742971420288, "learning_rate": 8.492141432795911e-06, "loss": 0.2682, "step": 3181 }, { "epoch": 0.25456, "grad_norm": 1.6487033367156982, "learning_rate": 8.491241252211058e-06, "loss": 0.3989, "step": 3182 }, { "epoch": 0.25464, "grad_norm": 1.4631835222244263, "learning_rate": 8.490340850747055e-06, "loss": 0.3634, "step": 3183 }, { "epoch": 0.25472, "grad_norm": 1.5799041986465454, "learning_rate": 8.489440228460864e-06, "loss": 0.4085, "step": 3184 }, { "epoch": 0.2548, "grad_norm": 1.6044838428497314, "learning_rate": 8.488539385409464e-06, "loss": 0.3399, "step": 3185 }, { "epoch": 0.25488, "grad_norm": 1.7360122203826904, "learning_rate": 8.48763832164985e-06, "loss": 0.3473, "step": 3186 }, { "epoch": 0.25496, "grad_norm": 1.6504510641098022, "learning_rate": 8.486737037239032e-06, "loss": 0.3314, "step": 3187 }, { "epoch": 0.25504, "grad_norm": 1.5512864589691162, "learning_rate": 8.485835532234027e-06, "loss": 0.3203, "step": 3188 }, { "epoch": 0.25512, "grad_norm": 1.515463948249817, "learning_rate": 8.48493380669187e-06, "loss": 0.3878, "step": 3189 }, { "epoch": 0.2552, "grad_norm": 1.3933261632919312, "learning_rate": 8.484031860669612e-06, "loss": 0.3515, "step": 3190 }, { "epoch": 0.25528, "grad_norm": 1.280632495880127, "learning_rate": 8.483129694224319e-06, "loss": 0.3116, "step": 3191 }, { "epoch": 0.25536, "grad_norm": 1.7904398441314697, "learning_rate": 8.482227307413063e-06, "loss": 0.4518, "step": 3192 }, { "epoch": 0.25544, "grad_norm": 1.5730119943618774, "learning_rate": 8.481324700292934e-06, "loss": 0.3031, "step": 3193 }, { "epoch": 0.25552, "grad_norm": 1.5001622438430786, "learning_rate": 8.480421872921042e-06, "loss": 0.2442, "step": 3194 }, { "epoch": 0.2556, "grad_norm": 1.639999508857727, "learning_rate": 8.479518825354504e-06, "loss": 0.3146, "step": 3195 }, { "epoch": 0.25568, "grad_norm": 1.664425253868103, "learning_rate": 8.478615557650453e-06, "loss": 0.2826, "step": 3196 }, { "epoch": 0.25576, "grad_norm": 1.624760389328003, "learning_rate": 8.477712069866033e-06, "loss": 0.3324, "step": 3197 }, { "epoch": 0.25584, "grad_norm": 1.328769564628601, "learning_rate": 8.47680836205841e-06, "loss": 0.296, "step": 3198 }, { "epoch": 0.25592, "grad_norm": 1.3259910345077515, "learning_rate": 8.475904434284752e-06, "loss": 0.2968, "step": 3199 }, { "epoch": 0.256, "grad_norm": 1.7926405668258667, "learning_rate": 8.475000286602254e-06, "loss": 0.447, "step": 3200 }, { "epoch": 0.25608, "grad_norm": 1.6081920862197876, "learning_rate": 8.47409591906811e-06, "loss": 0.3946, "step": 3201 }, { "epoch": 0.25616, "grad_norm": 1.7535005807876587, "learning_rate": 8.473191331739547e-06, "loss": 0.4252, "step": 3202 }, { "epoch": 0.25624, "grad_norm": 1.5326111316680908, "learning_rate": 8.472286524673787e-06, "loss": 0.423, "step": 3203 }, { "epoch": 0.25632, "grad_norm": 1.3280184268951416, "learning_rate": 8.471381497928079e-06, "loss": 0.2994, "step": 3204 }, { "epoch": 0.2564, "grad_norm": 1.3200703859329224, "learning_rate": 8.470476251559677e-06, "loss": 0.3823, "step": 3205 }, { "epoch": 0.25648, "grad_norm": 1.8249784708023071, "learning_rate": 8.469570785625856e-06, "loss": 0.4093, "step": 3206 }, { "epoch": 0.25656, "grad_norm": 1.7080583572387695, "learning_rate": 8.4686651001839e-06, "loss": 0.4029, "step": 3207 }, { "epoch": 0.25664, "grad_norm": 1.537379503250122, "learning_rate": 8.467759195291108e-06, "loss": 0.3258, "step": 3208 }, { "epoch": 0.25672, "grad_norm": 1.783140778541565, "learning_rate": 8.466853071004797e-06, "loss": 0.3672, "step": 3209 }, { "epoch": 0.2568, "grad_norm": 1.411805272102356, "learning_rate": 8.465946727382293e-06, "loss": 0.2902, "step": 3210 }, { "epoch": 0.25688, "grad_norm": 1.7587497234344482, "learning_rate": 8.465040164480934e-06, "loss": 0.3364, "step": 3211 }, { "epoch": 0.25696, "grad_norm": 2.055811882019043, "learning_rate": 8.46413338235808e-06, "loss": 0.4274, "step": 3212 }, { "epoch": 0.25704, "grad_norm": 1.577392339706421, "learning_rate": 8.463226381071095e-06, "loss": 0.2775, "step": 3213 }, { "epoch": 0.25712, "grad_norm": 1.8780300617218018, "learning_rate": 8.462319160677368e-06, "loss": 0.3771, "step": 3214 }, { "epoch": 0.2572, "grad_norm": 1.4582608938217163, "learning_rate": 8.461411721234292e-06, "loss": 0.4151, "step": 3215 }, { "epoch": 0.25728, "grad_norm": 1.8028523921966553, "learning_rate": 8.460504062799277e-06, "loss": 0.3596, "step": 3216 }, { "epoch": 0.25736, "grad_norm": 1.2800477743148804, "learning_rate": 8.459596185429751e-06, "loss": 0.2512, "step": 3217 }, { "epoch": 0.25744, "grad_norm": 1.3028494119644165, "learning_rate": 8.458688089183147e-06, "loss": 0.3026, "step": 3218 }, { "epoch": 0.25752, "grad_norm": 1.5475552082061768, "learning_rate": 8.457779774116924e-06, "loss": 0.2819, "step": 3219 }, { "epoch": 0.2576, "grad_norm": 1.6385855674743652, "learning_rate": 8.456871240288542e-06, "loss": 0.4, "step": 3220 }, { "epoch": 0.25768, "grad_norm": 1.2522298097610474, "learning_rate": 8.455962487755484e-06, "loss": 0.3563, "step": 3221 }, { "epoch": 0.25776, "grad_norm": 1.9725730419158936, "learning_rate": 8.455053516575243e-06, "loss": 0.3775, "step": 3222 }, { "epoch": 0.25784, "grad_norm": 1.417087197303772, "learning_rate": 8.454144326805328e-06, "loss": 0.3495, "step": 3223 }, { "epoch": 0.25792, "grad_norm": 1.4763423204421997, "learning_rate": 8.453234918503257e-06, "loss": 0.3413, "step": 3224 }, { "epoch": 0.258, "grad_norm": 1.2496310472488403, "learning_rate": 8.452325291726567e-06, "loss": 0.2642, "step": 3225 }, { "epoch": 0.25808, "grad_norm": 2.0870649814605713, "learning_rate": 8.45141544653281e-06, "loss": 0.4066, "step": 3226 }, { "epoch": 0.25816, "grad_norm": 1.2118961811065674, "learning_rate": 8.450505382979544e-06, "loss": 0.2908, "step": 3227 }, { "epoch": 0.25824, "grad_norm": 1.6424560546875, "learning_rate": 8.449595101124349e-06, "loss": 0.2963, "step": 3228 }, { "epoch": 0.25832, "grad_norm": 1.646569013595581, "learning_rate": 8.448684601024812e-06, "loss": 0.3907, "step": 3229 }, { "epoch": 0.2584, "grad_norm": 1.6771202087402344, "learning_rate": 8.447773882738542e-06, "loss": 0.3657, "step": 3230 }, { "epoch": 0.25848, "grad_norm": 2.0412819385528564, "learning_rate": 8.446862946323154e-06, "loss": 0.4464, "step": 3231 }, { "epoch": 0.25856, "grad_norm": 1.4120683670043945, "learning_rate": 8.44595179183628e-06, "loss": 0.3551, "step": 3232 }, { "epoch": 0.25864, "grad_norm": 1.4363163709640503, "learning_rate": 8.445040419335569e-06, "loss": 0.3243, "step": 3233 }, { "epoch": 0.25872, "grad_norm": 1.8272966146469116, "learning_rate": 8.444128828878676e-06, "loss": 0.3876, "step": 3234 }, { "epoch": 0.2588, "grad_norm": 1.5481266975402832, "learning_rate": 8.443217020523275e-06, "loss": 0.4025, "step": 3235 }, { "epoch": 0.25888, "grad_norm": 1.6646552085876465, "learning_rate": 8.442304994327055e-06, "loss": 0.3478, "step": 3236 }, { "epoch": 0.25896, "grad_norm": 1.440777063369751, "learning_rate": 8.441392750347716e-06, "loss": 0.2645, "step": 3237 }, { "epoch": 0.25904, "grad_norm": 1.4575594663619995, "learning_rate": 8.440480288642972e-06, "loss": 0.3257, "step": 3238 }, { "epoch": 0.25912, "grad_norm": 1.6355897188186646, "learning_rate": 8.439567609270554e-06, "loss": 0.3219, "step": 3239 }, { "epoch": 0.2592, "grad_norm": 1.4267311096191406, "learning_rate": 8.438654712288202e-06, "loss": 0.3609, "step": 3240 }, { "epoch": 0.25928, "grad_norm": 1.911621332168579, "learning_rate": 8.43774159775367e-06, "loss": 0.4765, "step": 3241 }, { "epoch": 0.25936, "grad_norm": 2.092682123184204, "learning_rate": 8.43682826572473e-06, "loss": 0.4551, "step": 3242 }, { "epoch": 0.25944, "grad_norm": 1.7436370849609375, "learning_rate": 8.435914716259166e-06, "loss": 0.3244, "step": 3243 }, { "epoch": 0.25952, "grad_norm": 1.278278112411499, "learning_rate": 8.435000949414775e-06, "loss": 0.2589, "step": 3244 }, { "epoch": 0.2596, "grad_norm": 1.5885848999023438, "learning_rate": 8.434086965249368e-06, "loss": 0.445, "step": 3245 }, { "epoch": 0.25968, "grad_norm": 1.544396996498108, "learning_rate": 8.433172763820767e-06, "loss": 0.406, "step": 3246 }, { "epoch": 0.25976, "grad_norm": 1.5454270839691162, "learning_rate": 8.432258345186815e-06, "loss": 0.3159, "step": 3247 }, { "epoch": 0.25984, "grad_norm": 1.3528130054473877, "learning_rate": 8.43134370940536e-06, "loss": 0.2816, "step": 3248 }, { "epoch": 0.25992, "grad_norm": 1.8098217248916626, "learning_rate": 8.430428856534271e-06, "loss": 0.3795, "step": 3249 }, { "epoch": 0.26, "grad_norm": 1.2492495775222778, "learning_rate": 8.429513786631428e-06, "loss": 0.2786, "step": 3250 }, { "epoch": 0.26008, "grad_norm": 1.5359301567077637, "learning_rate": 8.42859849975472e-06, "loss": 0.3571, "step": 3251 }, { "epoch": 0.26016, "grad_norm": 1.8178164958953857, "learning_rate": 8.427682995962058e-06, "loss": 0.41, "step": 3252 }, { "epoch": 0.26024, "grad_norm": 1.6950284242630005, "learning_rate": 8.426767275311361e-06, "loss": 0.2952, "step": 3253 }, { "epoch": 0.26032, "grad_norm": 1.3474059104919434, "learning_rate": 8.425851337860566e-06, "loss": 0.3301, "step": 3254 }, { "epoch": 0.2604, "grad_norm": 1.5049055814743042, "learning_rate": 8.42493518366762e-06, "loss": 0.3448, "step": 3255 }, { "epoch": 0.26048, "grad_norm": 1.99711275100708, "learning_rate": 8.424018812790484e-06, "loss": 0.3592, "step": 3256 }, { "epoch": 0.26056, "grad_norm": 1.4437679052352905, "learning_rate": 8.423102225287135e-06, "loss": 0.3078, "step": 3257 }, { "epoch": 0.26064, "grad_norm": 2.1142630577087402, "learning_rate": 8.42218542121556e-06, "loss": 0.5487, "step": 3258 }, { "epoch": 0.26072, "grad_norm": 1.680812120437622, "learning_rate": 8.421268400633766e-06, "loss": 0.4302, "step": 3259 }, { "epoch": 0.2608, "grad_norm": 1.1650234460830688, "learning_rate": 8.420351163599768e-06, "loss": 0.2241, "step": 3260 }, { "epoch": 0.26088, "grad_norm": 1.5910313129425049, "learning_rate": 8.419433710171596e-06, "loss": 0.2969, "step": 3261 }, { "epoch": 0.26096, "grad_norm": 1.5094048976898193, "learning_rate": 8.418516040407295e-06, "loss": 0.4483, "step": 3262 }, { "epoch": 0.26104, "grad_norm": 1.2878527641296387, "learning_rate": 8.417598154364923e-06, "loss": 0.2714, "step": 3263 }, { "epoch": 0.26112, "grad_norm": 1.6986520290374756, "learning_rate": 8.416680052102549e-06, "loss": 0.388, "step": 3264 }, { "epoch": 0.2612, "grad_norm": 1.396224021911621, "learning_rate": 8.415761733678262e-06, "loss": 0.3414, "step": 3265 }, { "epoch": 0.26128, "grad_norm": 1.9210736751556396, "learning_rate": 8.414843199150159e-06, "loss": 0.4595, "step": 3266 }, { "epoch": 0.26136, "grad_norm": 1.2947478294372559, "learning_rate": 8.413924448576351e-06, "loss": 0.2695, "step": 3267 }, { "epoch": 0.26144, "grad_norm": 1.2264394760131836, "learning_rate": 8.41300548201497e-06, "loss": 0.3017, "step": 3268 }, { "epoch": 0.26152, "grad_norm": 1.5077235698699951, "learning_rate": 8.412086299524149e-06, "loss": 0.3803, "step": 3269 }, { "epoch": 0.2616, "grad_norm": 1.603265643119812, "learning_rate": 8.411166901162046e-06, "loss": 0.3499, "step": 3270 }, { "epoch": 0.26168, "grad_norm": 1.7666727304458618, "learning_rate": 8.410247286986827e-06, "loss": 0.3375, "step": 3271 }, { "epoch": 0.26176, "grad_norm": 1.5865931510925293, "learning_rate": 8.409327457056673e-06, "loss": 0.3626, "step": 3272 }, { "epoch": 0.26184, "grad_norm": 1.4339234828948975, "learning_rate": 8.408407411429777e-06, "loss": 0.3711, "step": 3273 }, { "epoch": 0.26192, "grad_norm": 1.5342092514038086, "learning_rate": 8.40748715016435e-06, "loss": 0.3039, "step": 3274 }, { "epoch": 0.262, "grad_norm": 1.602797269821167, "learning_rate": 8.406566673318613e-06, "loss": 0.3913, "step": 3275 }, { "epoch": 0.26208, "grad_norm": 1.554508924484253, "learning_rate": 8.405645980950799e-06, "loss": 0.3879, "step": 3276 }, { "epoch": 0.26216, "grad_norm": 1.9122850894927979, "learning_rate": 8.40472507311916e-06, "loss": 0.4212, "step": 3277 }, { "epoch": 0.26224, "grad_norm": 1.8808695077896118, "learning_rate": 8.403803949881958e-06, "loss": 0.4648, "step": 3278 }, { "epoch": 0.26232, "grad_norm": 1.2711478471755981, "learning_rate": 8.402882611297469e-06, "loss": 0.2795, "step": 3279 }, { "epoch": 0.2624, "grad_norm": 1.6705055236816406, "learning_rate": 8.401961057423984e-06, "loss": 0.4656, "step": 3280 }, { "epoch": 0.26248, "grad_norm": 1.4070430994033813, "learning_rate": 8.401039288319805e-06, "loss": 0.3143, "step": 3281 }, { "epoch": 0.26256, "grad_norm": 1.4044073820114136, "learning_rate": 8.400117304043252e-06, "loss": 0.2966, "step": 3282 }, { "epoch": 0.26264, "grad_norm": 1.5478609800338745, "learning_rate": 8.399195104652652e-06, "loss": 0.371, "step": 3283 }, { "epoch": 0.26272, "grad_norm": 1.9816876649856567, "learning_rate": 8.398272690206353e-06, "loss": 0.3998, "step": 3284 }, { "epoch": 0.2628, "grad_norm": 1.6786158084869385, "learning_rate": 8.39735006076271e-06, "loss": 0.3962, "step": 3285 }, { "epoch": 0.26288, "grad_norm": 1.7463001012802124, "learning_rate": 8.396427216380096e-06, "loss": 0.3922, "step": 3286 }, { "epoch": 0.26296, "grad_norm": 1.5134419202804565, "learning_rate": 8.395504157116899e-06, "loss": 0.2912, "step": 3287 }, { "epoch": 0.26304, "grad_norm": 1.6220186948776245, "learning_rate": 8.394580883031512e-06, "loss": 0.4388, "step": 3288 }, { "epoch": 0.26312, "grad_norm": 2.0490458011627197, "learning_rate": 8.393657394182354e-06, "loss": 0.524, "step": 3289 }, { "epoch": 0.2632, "grad_norm": 1.5475538969039917, "learning_rate": 8.392733690627846e-06, "loss": 0.475, "step": 3290 }, { "epoch": 0.26328, "grad_norm": 1.4094207286834717, "learning_rate": 8.391809772426429e-06, "loss": 0.3468, "step": 3291 }, { "epoch": 0.26336, "grad_norm": 1.0840815305709839, "learning_rate": 8.39088563963656e-06, "loss": 0.2516, "step": 3292 }, { "epoch": 0.26344, "grad_norm": 1.739927053451538, "learning_rate": 8.389961292316699e-06, "loss": 0.4647, "step": 3293 }, { "epoch": 0.26352, "grad_norm": 1.712862491607666, "learning_rate": 8.389036730525331e-06, "loss": 0.3649, "step": 3294 }, { "epoch": 0.2636, "grad_norm": 1.7053585052490234, "learning_rate": 8.38811195432095e-06, "loss": 0.4469, "step": 3295 }, { "epoch": 0.26368, "grad_norm": 1.1163685321807861, "learning_rate": 8.38718696376206e-06, "loss": 0.2379, "step": 3296 }, { "epoch": 0.26376, "grad_norm": 1.718429446220398, "learning_rate": 8.386261758907187e-06, "loss": 0.4064, "step": 3297 }, { "epoch": 0.26384, "grad_norm": 1.4358896017074585, "learning_rate": 8.38533633981486e-06, "loss": 0.2584, "step": 3298 }, { "epoch": 0.26392, "grad_norm": 1.6465083360671997, "learning_rate": 8.384410706543632e-06, "loss": 0.3218, "step": 3299 }, { "epoch": 0.264, "grad_norm": 1.5639690160751343, "learning_rate": 8.383484859152062e-06, "loss": 0.4031, "step": 3300 }, { "epoch": 0.26408, "grad_norm": 1.5496549606323242, "learning_rate": 8.382558797698728e-06, "loss": 0.3324, "step": 3301 }, { "epoch": 0.26416, "grad_norm": 1.8462837934494019, "learning_rate": 8.381632522242215e-06, "loss": 0.3972, "step": 3302 }, { "epoch": 0.26424, "grad_norm": 1.6575276851654053, "learning_rate": 8.380706032841128e-06, "loss": 0.4347, "step": 3303 }, { "epoch": 0.26432, "grad_norm": 1.0943249464035034, "learning_rate": 8.379779329554082e-06, "loss": 0.2716, "step": 3304 }, { "epoch": 0.2644, "grad_norm": 2.2363994121551514, "learning_rate": 8.378852412439706e-06, "loss": 0.3581, "step": 3305 }, { "epoch": 0.26448, "grad_norm": 1.4077491760253906, "learning_rate": 8.377925281556645e-06, "loss": 0.2796, "step": 3306 }, { "epoch": 0.26456, "grad_norm": 1.9200001955032349, "learning_rate": 8.376997936963553e-06, "loss": 0.431, "step": 3307 }, { "epoch": 0.26464, "grad_norm": 1.444860577583313, "learning_rate": 8.3760703787191e-06, "loss": 0.2795, "step": 3308 }, { "epoch": 0.26472, "grad_norm": 1.153676152229309, "learning_rate": 8.375142606881973e-06, "loss": 0.2588, "step": 3309 }, { "epoch": 0.2648, "grad_norm": 2.0317533016204834, "learning_rate": 8.374214621510866e-06, "loss": 0.4834, "step": 3310 }, { "epoch": 0.26488, "grad_norm": 1.2731088399887085, "learning_rate": 8.373286422664489e-06, "loss": 0.2232, "step": 3311 }, { "epoch": 0.26496, "grad_norm": 1.506540298461914, "learning_rate": 8.372358010401568e-06, "loss": 0.3614, "step": 3312 }, { "epoch": 0.26504, "grad_norm": 1.827217698097229, "learning_rate": 8.371429384780839e-06, "loss": 0.4667, "step": 3313 }, { "epoch": 0.26512, "grad_norm": 1.9941977262496948, "learning_rate": 8.370500545861053e-06, "loss": 0.413, "step": 3314 }, { "epoch": 0.2652, "grad_norm": 1.5264027118682861, "learning_rate": 8.369571493700976e-06, "loss": 0.3107, "step": 3315 }, { "epoch": 0.26528, "grad_norm": 1.9555363655090332, "learning_rate": 8.368642228359384e-06, "loss": 0.3539, "step": 3316 }, { "epoch": 0.26536, "grad_norm": 1.4721167087554932, "learning_rate": 8.367712749895072e-06, "loss": 0.3781, "step": 3317 }, { "epoch": 0.26544, "grad_norm": 1.2694438695907593, "learning_rate": 8.366783058366843e-06, "loss": 0.279, "step": 3318 }, { "epoch": 0.26552, "grad_norm": 1.8869990110397339, "learning_rate": 8.365853153833511e-06, "loss": 0.3966, "step": 3319 }, { "epoch": 0.2656, "grad_norm": 1.7108945846557617, "learning_rate": 8.364923036353915e-06, "loss": 0.3471, "step": 3320 }, { "epoch": 0.26568, "grad_norm": 1.5163145065307617, "learning_rate": 8.3639927059869e-06, "loss": 0.2747, "step": 3321 }, { "epoch": 0.26576, "grad_norm": 1.5786588191986084, "learning_rate": 8.36306216279132e-06, "loss": 0.3084, "step": 3322 }, { "epoch": 0.26584, "grad_norm": 1.5403141975402832, "learning_rate": 8.362131406826049e-06, "loss": 0.3665, "step": 3323 }, { "epoch": 0.26592, "grad_norm": 1.248185634613037, "learning_rate": 8.361200438149975e-06, "loss": 0.2885, "step": 3324 }, { "epoch": 0.266, "grad_norm": 1.7556192874908447, "learning_rate": 8.360269256821994e-06, "loss": 0.3611, "step": 3325 }, { "epoch": 0.26608, "grad_norm": 1.7010356187820435, "learning_rate": 8.359337862901023e-06, "loss": 0.4007, "step": 3326 }, { "epoch": 0.26616, "grad_norm": 1.3344570398330688, "learning_rate": 8.358406256445985e-06, "loss": 0.2836, "step": 3327 }, { "epoch": 0.26624, "grad_norm": 1.2783137559890747, "learning_rate": 8.357474437515819e-06, "loss": 0.3394, "step": 3328 }, { "epoch": 0.26632, "grad_norm": 1.2508137226104736, "learning_rate": 8.356542406169481e-06, "loss": 0.301, "step": 3329 }, { "epoch": 0.2664, "grad_norm": 1.4016166925430298, "learning_rate": 8.355610162465935e-06, "loss": 0.2951, "step": 3330 }, { "epoch": 0.26648, "grad_norm": 1.7911545038223267, "learning_rate": 8.354677706464162e-06, "loss": 0.4171, "step": 3331 }, { "epoch": 0.26656, "grad_norm": 1.3079147338867188, "learning_rate": 8.353745038223155e-06, "loss": 0.2591, "step": 3332 }, { "epoch": 0.26664, "grad_norm": 1.5401325225830078, "learning_rate": 8.352812157801923e-06, "loss": 0.3765, "step": 3333 }, { "epoch": 0.26672, "grad_norm": 1.556061863899231, "learning_rate": 8.351879065259484e-06, "loss": 0.3329, "step": 3334 }, { "epoch": 0.2668, "grad_norm": 1.2529242038726807, "learning_rate": 8.35094576065487e-06, "loss": 0.2728, "step": 3335 }, { "epoch": 0.26688, "grad_norm": 1.6587908267974854, "learning_rate": 8.350012244047132e-06, "loss": 0.3042, "step": 3336 }, { "epoch": 0.26696, "grad_norm": 1.5234625339508057, "learning_rate": 8.349078515495327e-06, "loss": 0.3707, "step": 3337 }, { "epoch": 0.26704, "grad_norm": 1.4544942378997803, "learning_rate": 8.348144575058531e-06, "loss": 0.3395, "step": 3338 }, { "epoch": 0.26712, "grad_norm": 1.9101989269256592, "learning_rate": 8.347210422795831e-06, "loss": 0.3822, "step": 3339 }, { "epoch": 0.2672, "grad_norm": 1.5211460590362549, "learning_rate": 8.346276058766327e-06, "loss": 0.3944, "step": 3340 }, { "epoch": 0.26728, "grad_norm": 1.3909361362457275, "learning_rate": 8.345341483029134e-06, "loss": 0.2667, "step": 3341 }, { "epoch": 0.26736, "grad_norm": 1.3863193988800049, "learning_rate": 8.344406695643378e-06, "loss": 0.2917, "step": 3342 }, { "epoch": 0.26744, "grad_norm": 1.6209913492202759, "learning_rate": 8.343471696668202e-06, "loss": 0.3255, "step": 3343 }, { "epoch": 0.26752, "grad_norm": 1.6851791143417358, "learning_rate": 8.342536486162758e-06, "loss": 0.4018, "step": 3344 }, { "epoch": 0.2676, "grad_norm": 1.5533232688903809, "learning_rate": 8.341601064186215e-06, "loss": 0.3628, "step": 3345 }, { "epoch": 0.26768, "grad_norm": 1.481484055519104, "learning_rate": 8.340665430797752e-06, "loss": 0.3407, "step": 3346 }, { "epoch": 0.26776, "grad_norm": 1.3775691986083984, "learning_rate": 8.339729586056567e-06, "loss": 0.3287, "step": 3347 }, { "epoch": 0.26784, "grad_norm": 1.6615140438079834, "learning_rate": 8.338793530021866e-06, "loss": 0.4026, "step": 3348 }, { "epoch": 0.26792, "grad_norm": 1.7780884504318237, "learning_rate": 8.337857262752869e-06, "loss": 0.3734, "step": 3349 }, { "epoch": 0.268, "grad_norm": 1.2541722059249878, "learning_rate": 8.336920784308814e-06, "loss": 0.232, "step": 3350 }, { "epoch": 0.26808, "grad_norm": 1.472259521484375, "learning_rate": 8.335984094748944e-06, "loss": 0.2674, "step": 3351 }, { "epoch": 0.26816, "grad_norm": 1.723530888557434, "learning_rate": 8.335047194132522e-06, "loss": 0.3773, "step": 3352 }, { "epoch": 0.26824, "grad_norm": 1.5939034223556519, "learning_rate": 8.334110082518825e-06, "loss": 0.4006, "step": 3353 }, { "epoch": 0.26832, "grad_norm": 1.7733458280563354, "learning_rate": 8.333172759967137e-06, "loss": 0.3455, "step": 3354 }, { "epoch": 0.2684, "grad_norm": 1.562965750694275, "learning_rate": 8.332235226536762e-06, "loss": 0.2928, "step": 3355 }, { "epoch": 0.26848, "grad_norm": 1.6739492416381836, "learning_rate": 8.331297482287016e-06, "loss": 0.3414, "step": 3356 }, { "epoch": 0.26856, "grad_norm": 1.6316815614700317, "learning_rate": 8.330359527277224e-06, "loss": 0.3977, "step": 3357 }, { "epoch": 0.26864, "grad_norm": 1.0808318853378296, "learning_rate": 8.329421361566724e-06, "loss": 0.2338, "step": 3358 }, { "epoch": 0.26872, "grad_norm": 1.3807647228240967, "learning_rate": 8.328482985214879e-06, "loss": 0.246, "step": 3359 }, { "epoch": 0.2688, "grad_norm": 1.602565050125122, "learning_rate": 8.327544398281053e-06, "loss": 0.3755, "step": 3360 }, { "epoch": 0.26888, "grad_norm": 1.6400891542434692, "learning_rate": 8.326605600824625e-06, "loss": 0.3009, "step": 3361 }, { "epoch": 0.26896, "grad_norm": 1.5679333209991455, "learning_rate": 8.325666592904993e-06, "loss": 0.3595, "step": 3362 }, { "epoch": 0.26904, "grad_norm": 1.984908103942871, "learning_rate": 8.324727374581563e-06, "loss": 0.4163, "step": 3363 }, { "epoch": 0.26912, "grad_norm": 1.620694875717163, "learning_rate": 8.323787945913755e-06, "loss": 0.3158, "step": 3364 }, { "epoch": 0.2692, "grad_norm": 1.4381059408187866, "learning_rate": 8.322848306961007e-06, "loss": 0.2875, "step": 3365 }, { "epoch": 0.26928, "grad_norm": 2.2146494388580322, "learning_rate": 8.321908457782764e-06, "loss": 0.419, "step": 3366 }, { "epoch": 0.26936, "grad_norm": 1.2770941257476807, "learning_rate": 8.320968398438487e-06, "loss": 0.3038, "step": 3367 }, { "epoch": 0.26944, "grad_norm": 1.5118519067764282, "learning_rate": 8.320028128987653e-06, "loss": 0.3433, "step": 3368 }, { "epoch": 0.26952, "grad_norm": 1.8498775959014893, "learning_rate": 8.319087649489747e-06, "loss": 0.3517, "step": 3369 }, { "epoch": 0.2696, "grad_norm": 1.4128763675689697, "learning_rate": 8.318146960004273e-06, "loss": 0.2989, "step": 3370 }, { "epoch": 0.26968, "grad_norm": 1.828460931777954, "learning_rate": 8.31720606059074e-06, "loss": 0.4235, "step": 3371 }, { "epoch": 0.26976, "grad_norm": 1.5870622396469116, "learning_rate": 8.316264951308682e-06, "loss": 0.307, "step": 3372 }, { "epoch": 0.26984, "grad_norm": 1.6679601669311523, "learning_rate": 8.315323632217636e-06, "loss": 0.3352, "step": 3373 }, { "epoch": 0.26992, "grad_norm": 1.5351213216781616, "learning_rate": 8.314382103377158e-06, "loss": 0.3193, "step": 3374 }, { "epoch": 0.27, "grad_norm": 1.365196704864502, "learning_rate": 8.313440364846811e-06, "loss": 0.2916, "step": 3375 }, { "epoch": 0.27008, "grad_norm": 1.6591503620147705, "learning_rate": 8.312498416686183e-06, "loss": 0.3194, "step": 3376 }, { "epoch": 0.27016, "grad_norm": 1.4480193853378296, "learning_rate": 8.31155625895486e-06, "loss": 0.3788, "step": 3377 }, { "epoch": 0.27024, "grad_norm": 1.819912314414978, "learning_rate": 8.310613891712455e-06, "loss": 0.3924, "step": 3378 }, { "epoch": 0.27032, "grad_norm": 1.4778650999069214, "learning_rate": 8.309671315018587e-06, "loss": 0.4356, "step": 3379 }, { "epoch": 0.2704, "grad_norm": 1.509982705116272, "learning_rate": 8.308728528932889e-06, "loss": 0.3287, "step": 3380 }, { "epoch": 0.27048, "grad_norm": 1.2625203132629395, "learning_rate": 8.307785533515007e-06, "loss": 0.2483, "step": 3381 }, { "epoch": 0.27056, "grad_norm": 1.5584619045257568, "learning_rate": 8.306842328824602e-06, "loss": 0.3089, "step": 3382 }, { "epoch": 0.27064, "grad_norm": 1.7761331796646118, "learning_rate": 8.305898914921348e-06, "loss": 0.3591, "step": 3383 }, { "epoch": 0.27072, "grad_norm": 1.5493628978729248, "learning_rate": 8.304955291864932e-06, "loss": 0.3878, "step": 3384 }, { "epoch": 0.2708, "grad_norm": 1.6428436040878296, "learning_rate": 8.304011459715052e-06, "loss": 0.3434, "step": 3385 }, { "epoch": 0.27088, "grad_norm": 1.5427204370498657, "learning_rate": 8.303067418531424e-06, "loss": 0.3383, "step": 3386 }, { "epoch": 0.27096, "grad_norm": 1.6140514612197876, "learning_rate": 8.302123168373771e-06, "loss": 0.3603, "step": 3387 }, { "epoch": 0.27104, "grad_norm": 1.098120927810669, "learning_rate": 8.301178709301833e-06, "loss": 0.2703, "step": 3388 }, { "epoch": 0.27112, "grad_norm": 1.3397828340530396, "learning_rate": 8.300234041375366e-06, "loss": 0.3006, "step": 3389 }, { "epoch": 0.2712, "grad_norm": 2.0523359775543213, "learning_rate": 8.299289164654132e-06, "loss": 0.321, "step": 3390 }, { "epoch": 0.27128, "grad_norm": 1.3592901229858398, "learning_rate": 8.298344079197913e-06, "loss": 0.2744, "step": 3391 }, { "epoch": 0.27136, "grad_norm": 1.4901467561721802, "learning_rate": 8.297398785066501e-06, "loss": 0.4224, "step": 3392 }, { "epoch": 0.27144, "grad_norm": 1.6656169891357422, "learning_rate": 8.2964532823197e-06, "loss": 0.3217, "step": 3393 }, { "epoch": 0.27152, "grad_norm": 1.3976407051086426, "learning_rate": 8.29550757101733e-06, "loss": 0.3364, "step": 3394 }, { "epoch": 0.2716, "grad_norm": 1.6198853254318237, "learning_rate": 8.294561651219223e-06, "loss": 0.4076, "step": 3395 }, { "epoch": 0.27168, "grad_norm": 1.8720680475234985, "learning_rate": 8.293615522985224e-06, "loss": 0.3663, "step": 3396 }, { "epoch": 0.27176, "grad_norm": 1.2774734497070312, "learning_rate": 8.292669186375192e-06, "loss": 0.3451, "step": 3397 }, { "epoch": 0.27184, "grad_norm": 1.308482050895691, "learning_rate": 8.291722641448995e-06, "loss": 0.2712, "step": 3398 }, { "epoch": 0.27192, "grad_norm": 1.2496081590652466, "learning_rate": 8.290775888266525e-06, "loss": 0.2932, "step": 3399 }, { "epoch": 0.272, "grad_norm": 1.3944826126098633, "learning_rate": 8.289828926887673e-06, "loss": 0.2683, "step": 3400 }, { "epoch": 0.27208, "grad_norm": 1.0699915885925293, "learning_rate": 8.288881757372352e-06, "loss": 0.2491, "step": 3401 }, { "epoch": 0.27216, "grad_norm": 1.4618251323699951, "learning_rate": 8.287934379780489e-06, "loss": 0.3656, "step": 3402 }, { "epoch": 0.27224, "grad_norm": 1.899168610572815, "learning_rate": 8.286986794172017e-06, "loss": 0.3084, "step": 3403 }, { "epoch": 0.27232, "grad_norm": 1.8475021123886108, "learning_rate": 8.286039000606889e-06, "loss": 0.32, "step": 3404 }, { "epoch": 0.2724, "grad_norm": 1.7992068529129028, "learning_rate": 8.28509099914507e-06, "loss": 0.3602, "step": 3405 }, { "epoch": 0.27248, "grad_norm": 1.4878534078598022, "learning_rate": 8.284142789846535e-06, "loss": 0.3161, "step": 3406 }, { "epoch": 0.27256, "grad_norm": 1.534593105316162, "learning_rate": 8.283194372771274e-06, "loss": 0.3459, "step": 3407 }, { "epoch": 0.27264, "grad_norm": 1.5597976446151733, "learning_rate": 8.28224574797929e-06, "loss": 0.2994, "step": 3408 }, { "epoch": 0.27272, "grad_norm": 1.2504432201385498, "learning_rate": 8.281296915530602e-06, "loss": 0.2811, "step": 3409 }, { "epoch": 0.2728, "grad_norm": 1.392411231994629, "learning_rate": 8.280347875485236e-06, "loss": 0.2981, "step": 3410 }, { "epoch": 0.27288, "grad_norm": 1.4544750452041626, "learning_rate": 8.279398627903235e-06, "loss": 0.2969, "step": 3411 }, { "epoch": 0.27296, "grad_norm": 1.6616743803024292, "learning_rate": 8.278449172844656e-06, "loss": 0.3423, "step": 3412 }, { "epoch": 0.27304, "grad_norm": 1.702860951423645, "learning_rate": 8.27749951036957e-06, "loss": 0.4418, "step": 3413 }, { "epoch": 0.27312, "grad_norm": 1.229211449623108, "learning_rate": 8.276549640538055e-06, "loss": 0.2837, "step": 3414 }, { "epoch": 0.2732, "grad_norm": 1.7604984045028687, "learning_rate": 8.275599563410209e-06, "loss": 0.3789, "step": 3415 }, { "epoch": 0.27328, "grad_norm": 1.5034135580062866, "learning_rate": 8.274649279046137e-06, "loss": 0.2756, "step": 3416 }, { "epoch": 0.27336, "grad_norm": 1.3147361278533936, "learning_rate": 8.273698787505962e-06, "loss": 0.2322, "step": 3417 }, { "epoch": 0.27344, "grad_norm": 1.2072352170944214, "learning_rate": 8.272748088849818e-06, "loss": 0.2772, "step": 3418 }, { "epoch": 0.27352, "grad_norm": 1.5700079202651978, "learning_rate": 8.271797183137855e-06, "loss": 0.3311, "step": 3419 }, { "epoch": 0.2736, "grad_norm": 1.3132052421569824, "learning_rate": 8.27084607043023e-06, "loss": 0.271, "step": 3420 }, { "epoch": 0.27368, "grad_norm": 1.235514760017395, "learning_rate": 8.26989475078712e-06, "loss": 0.2534, "step": 3421 }, { "epoch": 0.27376, "grad_norm": 1.5095601081848145, "learning_rate": 8.26894322426871e-06, "loss": 0.294, "step": 3422 }, { "epoch": 0.27384, "grad_norm": 1.3389074802398682, "learning_rate": 8.267991490935199e-06, "loss": 0.2859, "step": 3423 }, { "epoch": 0.27392, "grad_norm": 1.3496683835983276, "learning_rate": 8.267039550846802e-06, "loss": 0.2212, "step": 3424 }, { "epoch": 0.274, "grad_norm": 1.800253987312317, "learning_rate": 8.266087404063743e-06, "loss": 0.4047, "step": 3425 }, { "epoch": 0.27408, "grad_norm": 1.3180961608886719, "learning_rate": 8.265135050646262e-06, "loss": 0.3051, "step": 3426 }, { "epoch": 0.27416, "grad_norm": 1.7630568742752075, "learning_rate": 8.264182490654614e-06, "loss": 0.3703, "step": 3427 }, { "epoch": 0.27424, "grad_norm": 1.518088459968567, "learning_rate": 8.26322972414906e-06, "loss": 0.3771, "step": 3428 }, { "epoch": 0.27432, "grad_norm": 1.6900966167449951, "learning_rate": 8.262276751189882e-06, "loss": 0.3216, "step": 3429 }, { "epoch": 0.2744, "grad_norm": 1.6469087600708008, "learning_rate": 8.261323571837367e-06, "loss": 0.3232, "step": 3430 }, { "epoch": 0.27448, "grad_norm": 1.3444794416427612, "learning_rate": 8.260370186151822e-06, "loss": 0.2931, "step": 3431 }, { "epoch": 0.27456, "grad_norm": 1.5142230987548828, "learning_rate": 8.259416594193566e-06, "loss": 0.3601, "step": 3432 }, { "epoch": 0.27464, "grad_norm": 1.472582221031189, "learning_rate": 8.25846279602293e-06, "loss": 0.3059, "step": 3433 }, { "epoch": 0.27472, "grad_norm": 1.7719913721084595, "learning_rate": 8.257508791700253e-06, "loss": 0.4288, "step": 3434 }, { "epoch": 0.2748, "grad_norm": 1.7561125755310059, "learning_rate": 8.256554581285895e-06, "loss": 0.3458, "step": 3435 }, { "epoch": 0.27488, "grad_norm": 1.25856614112854, "learning_rate": 8.255600164840226e-06, "loss": 0.2884, "step": 3436 }, { "epoch": 0.27496, "grad_norm": 1.5106571912765503, "learning_rate": 8.254645542423627e-06, "loss": 0.314, "step": 3437 }, { "epoch": 0.27504, "grad_norm": 1.3592396974563599, "learning_rate": 8.253690714096494e-06, "loss": 0.2891, "step": 3438 }, { "epoch": 0.27512, "grad_norm": 1.3193227052688599, "learning_rate": 8.252735679919238e-06, "loss": 0.3048, "step": 3439 }, { "epoch": 0.2752, "grad_norm": 1.72906494140625, "learning_rate": 8.251780439952277e-06, "loss": 0.3843, "step": 3440 }, { "epoch": 0.27528, "grad_norm": 1.8231126070022583, "learning_rate": 8.250824994256048e-06, "loss": 0.4098, "step": 3441 }, { "epoch": 0.27536, "grad_norm": 1.2340192794799805, "learning_rate": 8.249869342891001e-06, "loss": 0.245, "step": 3442 }, { "epoch": 0.27544, "grad_norm": 1.667550802230835, "learning_rate": 8.248913485917593e-06, "loss": 0.4117, "step": 3443 }, { "epoch": 0.27552, "grad_norm": 1.5927138328552246, "learning_rate": 8.247957423396302e-06, "loss": 0.4634, "step": 3444 }, { "epoch": 0.2756, "grad_norm": 1.898964285850525, "learning_rate": 8.24700115538761e-06, "loss": 0.4804, "step": 3445 }, { "epoch": 0.27568, "grad_norm": 1.5059139728546143, "learning_rate": 8.246044681952022e-06, "loss": 0.3067, "step": 3446 }, { "epoch": 0.27576, "grad_norm": 1.2551331520080566, "learning_rate": 8.245088003150047e-06, "loss": 0.2362, "step": 3447 }, { "epoch": 0.27584, "grad_norm": 1.5307579040527344, "learning_rate": 8.244131119042211e-06, "loss": 0.3315, "step": 3448 }, { "epoch": 0.27592, "grad_norm": 1.7814452648162842, "learning_rate": 8.243174029689055e-06, "loss": 0.3452, "step": 3449 }, { "epoch": 0.276, "grad_norm": 1.68953275680542, "learning_rate": 8.242216735151131e-06, "loss": 0.3785, "step": 3450 }, { "epoch": 0.27608, "grad_norm": 1.7366198301315308, "learning_rate": 8.241259235489001e-06, "loss": 0.4651, "step": 3451 }, { "epoch": 0.27616, "grad_norm": 1.8493014574050903, "learning_rate": 8.240301530763244e-06, "loss": 0.3389, "step": 3452 }, { "epoch": 0.27624, "grad_norm": 1.3720508813858032, "learning_rate": 8.239343621034452e-06, "loss": 0.306, "step": 3453 }, { "epoch": 0.27632, "grad_norm": 1.694554328918457, "learning_rate": 8.23838550636323e-06, "loss": 0.4108, "step": 3454 }, { "epoch": 0.2764, "grad_norm": 1.6249841451644897, "learning_rate": 8.23742718681019e-06, "loss": 0.3531, "step": 3455 }, { "epoch": 0.27648, "grad_norm": 1.7144601345062256, "learning_rate": 8.236468662435964e-06, "loss": 0.4359, "step": 3456 }, { "epoch": 0.27656, "grad_norm": 1.3996644020080566, "learning_rate": 8.235509933301197e-06, "loss": 0.3071, "step": 3457 }, { "epoch": 0.27664, "grad_norm": 1.3151053190231323, "learning_rate": 8.234550999466542e-06, "loss": 0.2705, "step": 3458 }, { "epoch": 0.27672, "grad_norm": 1.8970770835876465, "learning_rate": 8.233591860992667e-06, "loss": 0.4059, "step": 3459 }, { "epoch": 0.2768, "grad_norm": 1.183557391166687, "learning_rate": 8.232632517940255e-06, "loss": 0.226, "step": 3460 }, { "epoch": 0.27688, "grad_norm": 2.460853099822998, "learning_rate": 8.23167297037e-06, "loss": 0.5838, "step": 3461 }, { "epoch": 0.27696, "grad_norm": 1.3229994773864746, "learning_rate": 8.230713218342611e-06, "loss": 0.2763, "step": 3462 }, { "epoch": 0.27704, "grad_norm": 1.5857011079788208, "learning_rate": 8.229753261918805e-06, "loss": 0.3172, "step": 3463 }, { "epoch": 0.27712, "grad_norm": 1.5672333240509033, "learning_rate": 8.228793101159318e-06, "loss": 0.3418, "step": 3464 }, { "epoch": 0.2772, "grad_norm": 1.766082763671875, "learning_rate": 8.227832736124895e-06, "loss": 0.4143, "step": 3465 }, { "epoch": 0.27728, "grad_norm": 1.8570321798324585, "learning_rate": 8.226872166876293e-06, "loss": 0.4841, "step": 3466 }, { "epoch": 0.27736, "grad_norm": 1.8656039237976074, "learning_rate": 8.225911393474289e-06, "loss": 0.4092, "step": 3467 }, { "epoch": 0.27744, "grad_norm": 1.710142731666565, "learning_rate": 8.224950415979664e-06, "loss": 0.3453, "step": 3468 }, { "epoch": 0.27752, "grad_norm": 2.0013701915740967, "learning_rate": 8.223989234453215e-06, "loss": 0.4788, "step": 3469 }, { "epoch": 0.2776, "grad_norm": 1.8113597631454468, "learning_rate": 8.223027848955757e-06, "loss": 0.4269, "step": 3470 }, { "epoch": 0.27768, "grad_norm": 1.8451036214828491, "learning_rate": 8.222066259548111e-06, "loss": 0.3931, "step": 3471 }, { "epoch": 0.27776, "grad_norm": 2.501964569091797, "learning_rate": 8.221104466291112e-06, "loss": 0.5903, "step": 3472 }, { "epoch": 0.27784, "grad_norm": 1.5321495532989502, "learning_rate": 8.220142469245613e-06, "loss": 0.3115, "step": 3473 }, { "epoch": 0.27792, "grad_norm": 1.721560001373291, "learning_rate": 8.219180268472476e-06, "loss": 0.304, "step": 3474 }, { "epoch": 0.278, "grad_norm": 1.3853156566619873, "learning_rate": 8.218217864032572e-06, "loss": 0.2682, "step": 3475 }, { "epoch": 0.27808, "grad_norm": 1.5545319318771362, "learning_rate": 8.217255255986794e-06, "loss": 0.3532, "step": 3476 }, { "epoch": 0.27816, "grad_norm": 1.3426564931869507, "learning_rate": 8.216292444396038e-06, "loss": 0.301, "step": 3477 }, { "epoch": 0.27824, "grad_norm": 1.5732641220092773, "learning_rate": 8.215329429321224e-06, "loss": 0.4052, "step": 3478 }, { "epoch": 0.27832, "grad_norm": 1.5441874265670776, "learning_rate": 8.214366210823274e-06, "loss": 0.2982, "step": 3479 }, { "epoch": 0.2784, "grad_norm": 1.5156362056732178, "learning_rate": 8.21340278896313e-06, "loss": 0.4654, "step": 3480 }, { "epoch": 0.27848, "grad_norm": 1.3966670036315918, "learning_rate": 8.212439163801743e-06, "loss": 0.2895, "step": 3481 }, { "epoch": 0.27856, "grad_norm": 1.8723714351654053, "learning_rate": 8.211475335400079e-06, "loss": 0.3717, "step": 3482 }, { "epoch": 0.27864, "grad_norm": 1.6649876832962036, "learning_rate": 8.210511303819116e-06, "loss": 0.3856, "step": 3483 }, { "epoch": 0.27872, "grad_norm": 1.9745384454727173, "learning_rate": 8.209547069119845e-06, "loss": 0.4151, "step": 3484 }, { "epoch": 0.2788, "grad_norm": 1.885003685951233, "learning_rate": 8.20858263136327e-06, "loss": 0.3486, "step": 3485 }, { "epoch": 0.27888, "grad_norm": 1.5459332466125488, "learning_rate": 8.20761799061041e-06, "loss": 0.2786, "step": 3486 }, { "epoch": 0.27896, "grad_norm": 0.8227094411849976, "learning_rate": 8.20665314692229e-06, "loss": 0.1874, "step": 3487 }, { "epoch": 0.27904, "grad_norm": 1.5767573118209839, "learning_rate": 8.205688100359956e-06, "loss": 0.3388, "step": 3488 }, { "epoch": 0.27912, "grad_norm": 1.772367238998413, "learning_rate": 8.20472285098446e-06, "loss": 0.3531, "step": 3489 }, { "epoch": 0.2792, "grad_norm": 1.7118393182754517, "learning_rate": 8.203757398856875e-06, "loss": 0.312, "step": 3490 }, { "epoch": 0.27928, "grad_norm": 1.6346728801727295, "learning_rate": 8.202791744038278e-06, "loss": 0.3631, "step": 3491 }, { "epoch": 0.27936, "grad_norm": 1.87623131275177, "learning_rate": 8.201825886589765e-06, "loss": 0.5274, "step": 3492 }, { "epoch": 0.27944, "grad_norm": 1.3338699340820312, "learning_rate": 8.20085982657244e-06, "loss": 0.2704, "step": 3493 }, { "epoch": 0.27952, "grad_norm": 1.6640794277191162, "learning_rate": 8.199893564047425e-06, "loss": 0.3691, "step": 3494 }, { "epoch": 0.2796, "grad_norm": 1.3115794658660889, "learning_rate": 8.19892709907585e-06, "loss": 0.3016, "step": 3495 }, { "epoch": 0.27968, "grad_norm": 1.4660612344741821, "learning_rate": 8.197960431718862e-06, "loss": 0.3437, "step": 3496 }, { "epoch": 0.27976, "grad_norm": 1.5010005235671997, "learning_rate": 8.196993562037618e-06, "loss": 0.4072, "step": 3497 }, { "epoch": 0.27984, "grad_norm": 1.6829041242599487, "learning_rate": 8.196026490093289e-06, "loss": 0.4016, "step": 3498 }, { "epoch": 0.27992, "grad_norm": 1.4259425401687622, "learning_rate": 8.195059215947057e-06, "loss": 0.3122, "step": 3499 }, { "epoch": 0.28, "grad_norm": 1.4573216438293457, "learning_rate": 8.194091739660119e-06, "loss": 0.3861, "step": 3500 }, { "epoch": 0.28008, "grad_norm": 1.3331818580627441, "learning_rate": 8.193124061293684e-06, "loss": 0.2762, "step": 3501 }, { "epoch": 0.28016, "grad_norm": 1.7647954225540161, "learning_rate": 8.192156180908974e-06, "loss": 0.3346, "step": 3502 }, { "epoch": 0.28024, "grad_norm": 1.65705144405365, "learning_rate": 8.191188098567224e-06, "loss": 0.3713, "step": 3503 }, { "epoch": 0.28032, "grad_norm": 1.42042076587677, "learning_rate": 8.190219814329681e-06, "loss": 0.3316, "step": 3504 }, { "epoch": 0.2804, "grad_norm": 1.3383320569992065, "learning_rate": 8.189251328257604e-06, "loss": 0.3086, "step": 3505 }, { "epoch": 0.28048, "grad_norm": 1.7345651388168335, "learning_rate": 8.188282640412267e-06, "loss": 0.3242, "step": 3506 }, { "epoch": 0.28056, "grad_norm": 1.544132113456726, "learning_rate": 8.187313750854956e-06, "loss": 0.3949, "step": 3507 }, { "epoch": 0.28064, "grad_norm": 1.5500211715698242, "learning_rate": 8.186344659646966e-06, "loss": 0.4493, "step": 3508 }, { "epoch": 0.28072, "grad_norm": 1.221084475517273, "learning_rate": 8.185375366849613e-06, "loss": 0.2511, "step": 3509 }, { "epoch": 0.2808, "grad_norm": 1.6258049011230469, "learning_rate": 8.184405872524219e-06, "loss": 0.3507, "step": 3510 }, { "epoch": 0.28088, "grad_norm": 1.774895191192627, "learning_rate": 8.18343617673212e-06, "loss": 0.3715, "step": 3511 }, { "epoch": 0.28096, "grad_norm": 1.593440055847168, "learning_rate": 8.182466279534666e-06, "loss": 0.3073, "step": 3512 }, { "epoch": 0.28104, "grad_norm": 1.700151801109314, "learning_rate": 8.18149618099322e-06, "loss": 0.3785, "step": 3513 }, { "epoch": 0.28112, "grad_norm": 1.4120454788208008, "learning_rate": 8.180525881169155e-06, "loss": 0.326, "step": 3514 }, { "epoch": 0.2812, "grad_norm": 1.9900215864181519, "learning_rate": 8.17955538012386e-06, "loss": 0.4358, "step": 3515 }, { "epoch": 0.28128, "grad_norm": 1.1549736261367798, "learning_rate": 8.178584677918734e-06, "loss": 0.2993, "step": 3516 }, { "epoch": 0.28136, "grad_norm": 1.4273407459259033, "learning_rate": 8.177613774615193e-06, "loss": 0.322, "step": 3517 }, { "epoch": 0.28144, "grad_norm": 1.7175918817520142, "learning_rate": 8.17664267027466e-06, "loss": 0.3311, "step": 3518 }, { "epoch": 0.28152, "grad_norm": 1.2839045524597168, "learning_rate": 8.175671364958573e-06, "loss": 0.304, "step": 3519 }, { "epoch": 0.2816, "grad_norm": 1.3330825567245483, "learning_rate": 8.174699858728386e-06, "loss": 0.3253, "step": 3520 }, { "epoch": 0.28168, "grad_norm": 1.5143156051635742, "learning_rate": 8.173728151645561e-06, "loss": 0.2915, "step": 3521 }, { "epoch": 0.28176, "grad_norm": 1.783462405204773, "learning_rate": 8.172756243771575e-06, "loss": 0.4024, "step": 3522 }, { "epoch": 0.28184, "grad_norm": 1.8704419136047363, "learning_rate": 8.171784135167917e-06, "loss": 0.3364, "step": 3523 }, { "epoch": 0.28192, "grad_norm": 1.4959416389465332, "learning_rate": 8.17081182589609e-06, "loss": 0.3278, "step": 3524 }, { "epoch": 0.282, "grad_norm": 2.0997822284698486, "learning_rate": 8.169839316017609e-06, "loss": 0.4619, "step": 3525 }, { "epoch": 0.28208, "grad_norm": 1.242011547088623, "learning_rate": 8.168866605594001e-06, "loss": 0.2991, "step": 3526 }, { "epoch": 0.28216, "grad_norm": 1.39487624168396, "learning_rate": 8.167893694686805e-06, "loss": 0.3224, "step": 3527 }, { "epoch": 0.28224, "grad_norm": 1.8157869577407837, "learning_rate": 8.166920583357575e-06, "loss": 0.3669, "step": 3528 }, { "epoch": 0.28232, "grad_norm": 1.2828103303909302, "learning_rate": 8.165947271667875e-06, "loss": 0.2376, "step": 3529 }, { "epoch": 0.2824, "grad_norm": 1.9789408445358276, "learning_rate": 8.164973759679287e-06, "loss": 0.4204, "step": 3530 }, { "epoch": 0.28248, "grad_norm": 1.2703884840011597, "learning_rate": 8.164000047453398e-06, "loss": 0.2719, "step": 3531 }, { "epoch": 0.28256, "grad_norm": 1.5081241130828857, "learning_rate": 8.163026135051813e-06, "loss": 0.3015, "step": 3532 }, { "epoch": 0.28264, "grad_norm": 1.3452731370925903, "learning_rate": 8.162052022536148e-06, "loss": 0.2942, "step": 3533 }, { "epoch": 0.28272, "grad_norm": 1.6602774858474731, "learning_rate": 8.16107770996803e-06, "loss": 0.4434, "step": 3534 }, { "epoch": 0.2828, "grad_norm": 2.201460361480713, "learning_rate": 8.160103197409104e-06, "loss": 0.3866, "step": 3535 }, { "epoch": 0.28288, "grad_norm": 1.59469735622406, "learning_rate": 8.159128484921022e-06, "loss": 0.4352, "step": 3536 }, { "epoch": 0.28296, "grad_norm": 1.86858332157135, "learning_rate": 8.158153572565452e-06, "loss": 0.4111, "step": 3537 }, { "epoch": 0.28304, "grad_norm": 1.9216097593307495, "learning_rate": 8.157178460404071e-06, "loss": 0.4455, "step": 3538 }, { "epoch": 0.28312, "grad_norm": 1.8076080083847046, "learning_rate": 8.156203148498575e-06, "loss": 0.4287, "step": 3539 }, { "epoch": 0.2832, "grad_norm": 1.820548415184021, "learning_rate": 8.155227636910665e-06, "loss": 0.3744, "step": 3540 }, { "epoch": 0.28328, "grad_norm": 1.749031662940979, "learning_rate": 8.15425192570206e-06, "loss": 0.4377, "step": 3541 }, { "epoch": 0.28336, "grad_norm": 1.2342244386672974, "learning_rate": 8.15327601493449e-06, "loss": 0.2596, "step": 3542 }, { "epoch": 0.28344, "grad_norm": 1.6412309408187866, "learning_rate": 8.152299904669698e-06, "loss": 0.344, "step": 3543 }, { "epoch": 0.28352, "grad_norm": 1.5343546867370605, "learning_rate": 8.151323594969438e-06, "loss": 0.3667, "step": 3544 }, { "epoch": 0.2836, "grad_norm": 1.3292542695999146, "learning_rate": 8.150347085895479e-06, "loss": 0.3124, "step": 3545 }, { "epoch": 0.28368, "grad_norm": 1.3630868196487427, "learning_rate": 8.1493703775096e-06, "loss": 0.3347, "step": 3546 }, { "epoch": 0.28376, "grad_norm": 1.5773979425430298, "learning_rate": 8.148393469873596e-06, "loss": 0.3451, "step": 3547 }, { "epoch": 0.28384, "grad_norm": 1.5990095138549805, "learning_rate": 8.147416363049271e-06, "loss": 0.3509, "step": 3548 }, { "epoch": 0.28392, "grad_norm": 1.6553959846496582, "learning_rate": 8.146439057098446e-06, "loss": 0.3553, "step": 3549 }, { "epoch": 0.284, "grad_norm": 1.6501647233963013, "learning_rate": 8.145461552082948e-06, "loss": 0.3922, "step": 3550 }, { "epoch": 0.28408, "grad_norm": 1.6862366199493408, "learning_rate": 8.144483848064621e-06, "loss": 0.5672, "step": 3551 }, { "epoch": 0.28416, "grad_norm": 1.5185807943344116, "learning_rate": 8.143505945105325e-06, "loss": 0.2501, "step": 3552 }, { "epoch": 0.28424, "grad_norm": 1.547339916229248, "learning_rate": 8.142527843266924e-06, "loss": 0.3314, "step": 3553 }, { "epoch": 0.28432, "grad_norm": 1.4977695941925049, "learning_rate": 8.141549542611302e-06, "loss": 0.351, "step": 3554 }, { "epoch": 0.2844, "grad_norm": 1.4256455898284912, "learning_rate": 8.140571043200354e-06, "loss": 0.313, "step": 3555 }, { "epoch": 0.28448, "grad_norm": 1.908646821975708, "learning_rate": 8.139592345095982e-06, "loss": 0.4029, "step": 3556 }, { "epoch": 0.28456, "grad_norm": 1.7634451389312744, "learning_rate": 8.138613448360108e-06, "loss": 0.4724, "step": 3557 }, { "epoch": 0.28464, "grad_norm": 1.4443376064300537, "learning_rate": 8.137634353054664e-06, "loss": 0.2718, "step": 3558 }, { "epoch": 0.28472, "grad_norm": 1.3614927530288696, "learning_rate": 8.13665505924159e-06, "loss": 0.2782, "step": 3559 }, { "epoch": 0.2848, "grad_norm": 1.582389235496521, "learning_rate": 8.13567556698285e-06, "loss": 0.4158, "step": 3560 }, { "epoch": 0.28488, "grad_norm": 1.4175881147384644, "learning_rate": 8.134695876340406e-06, "loss": 0.3379, "step": 3561 }, { "epoch": 0.28496, "grad_norm": 1.4460728168487549, "learning_rate": 8.133715987376245e-06, "loss": 0.2862, "step": 3562 }, { "epoch": 0.28504, "grad_norm": 1.5810394287109375, "learning_rate": 8.132735900152357e-06, "loss": 0.288, "step": 3563 }, { "epoch": 0.28512, "grad_norm": 1.2738404273986816, "learning_rate": 8.131755614730752e-06, "loss": 0.3385, "step": 3564 }, { "epoch": 0.2852, "grad_norm": 1.656475305557251, "learning_rate": 8.130775131173447e-06, "loss": 0.3369, "step": 3565 }, { "epoch": 0.28528, "grad_norm": 1.2260715961456299, "learning_rate": 8.129794449542474e-06, "loss": 0.2934, "step": 3566 }, { "epoch": 0.28536, "grad_norm": 1.6491786241531372, "learning_rate": 8.12881356989988e-06, "loss": 0.4359, "step": 3567 }, { "epoch": 0.28544, "grad_norm": 1.270279049873352, "learning_rate": 8.127832492307722e-06, "loss": 0.277, "step": 3568 }, { "epoch": 0.28552, "grad_norm": 1.3211328983306885, "learning_rate": 8.126851216828065e-06, "loss": 0.2417, "step": 3569 }, { "epoch": 0.2856, "grad_norm": 1.2138761281967163, "learning_rate": 8.125869743522997e-06, "loss": 0.2868, "step": 3570 }, { "epoch": 0.28568, "grad_norm": 1.6802400350570679, "learning_rate": 8.124888072454607e-06, "loss": 0.3408, "step": 3571 }, { "epoch": 0.28576, "grad_norm": 1.6839826107025146, "learning_rate": 8.123906203685007e-06, "loss": 0.3029, "step": 3572 }, { "epoch": 0.28584, "grad_norm": 1.53984534740448, "learning_rate": 8.122924137276311e-06, "loss": 0.3264, "step": 3573 }, { "epoch": 0.28592, "grad_norm": 1.4914592504501343, "learning_rate": 8.121941873290655e-06, "loss": 0.3296, "step": 3574 }, { "epoch": 0.286, "grad_norm": 1.3114702701568604, "learning_rate": 8.120959411790184e-06, "loss": 0.3089, "step": 3575 }, { "epoch": 0.28608, "grad_norm": 1.9156404733657837, "learning_rate": 8.119976752837054e-06, "loss": 0.3684, "step": 3576 }, { "epoch": 0.28616, "grad_norm": 2.184542179107666, "learning_rate": 8.118993896493433e-06, "loss": 0.4615, "step": 3577 }, { "epoch": 0.28624, "grad_norm": 1.3617297410964966, "learning_rate": 8.118010842821504e-06, "loss": 0.3079, "step": 3578 }, { "epoch": 0.28632, "grad_norm": 1.7069450616836548, "learning_rate": 8.117027591883463e-06, "loss": 0.3893, "step": 3579 }, { "epoch": 0.2864, "grad_norm": 1.5273637771606445, "learning_rate": 8.116044143741517e-06, "loss": 0.3337, "step": 3580 }, { "epoch": 0.28648, "grad_norm": 1.6013727188110352, "learning_rate": 8.115060498457882e-06, "loss": 0.3929, "step": 3581 }, { "epoch": 0.28656, "grad_norm": 1.5870674848556519, "learning_rate": 8.114076656094794e-06, "loss": 0.3977, "step": 3582 }, { "epoch": 0.28664, "grad_norm": 1.2662407159805298, "learning_rate": 8.113092616714494e-06, "loss": 0.3117, "step": 3583 }, { "epoch": 0.28672, "grad_norm": 1.407432198524475, "learning_rate": 8.112108380379242e-06, "loss": 0.2418, "step": 3584 }, { "epoch": 0.2868, "grad_norm": 1.5326191186904907, "learning_rate": 8.111123947151305e-06, "loss": 0.3517, "step": 3585 }, { "epoch": 0.28688, "grad_norm": 2.2068679332733154, "learning_rate": 8.110139317092966e-06, "loss": 0.5408, "step": 3586 }, { "epoch": 0.28696, "grad_norm": 1.5989627838134766, "learning_rate": 8.10915449026652e-06, "loss": 0.4202, "step": 3587 }, { "epoch": 0.28704, "grad_norm": 1.3212885856628418, "learning_rate": 8.108169466734271e-06, "loss": 0.2814, "step": 3588 }, { "epoch": 0.28712, "grad_norm": 1.6807013750076294, "learning_rate": 8.107184246558542e-06, "loss": 0.3642, "step": 3589 }, { "epoch": 0.2872, "grad_norm": 1.4167367219924927, "learning_rate": 8.10619882980166e-06, "loss": 0.3045, "step": 3590 }, { "epoch": 0.28728, "grad_norm": 1.8939565420150757, "learning_rate": 8.105213216525974e-06, "loss": 0.3761, "step": 3591 }, { "epoch": 0.28736, "grad_norm": 1.1385818719863892, "learning_rate": 8.104227406793834e-06, "loss": 0.2466, "step": 3592 }, { "epoch": 0.28744, "grad_norm": 1.3388983011245728, "learning_rate": 8.103241400667617e-06, "loss": 0.3011, "step": 3593 }, { "epoch": 0.28752, "grad_norm": 1.2075142860412598, "learning_rate": 8.102255198209696e-06, "loss": 0.263, "step": 3594 }, { "epoch": 0.2876, "grad_norm": 1.6224541664123535, "learning_rate": 8.101268799482472e-06, "loss": 0.3047, "step": 3595 }, { "epoch": 0.28768, "grad_norm": 1.7167912721633911, "learning_rate": 8.100282204548347e-06, "loss": 0.4803, "step": 3596 }, { "epoch": 0.28776, "grad_norm": 1.5466722249984741, "learning_rate": 8.09929541346974e-06, "loss": 0.4716, "step": 3597 }, { "epoch": 0.28784, "grad_norm": 1.9157578945159912, "learning_rate": 8.098308426309082e-06, "loss": 0.3921, "step": 3598 }, { "epoch": 0.28792, "grad_norm": 1.7497869729995728, "learning_rate": 8.097321243128817e-06, "loss": 0.3694, "step": 3599 }, { "epoch": 0.288, "grad_norm": 1.7529637813568115, "learning_rate": 8.096333863991402e-06, "loss": 0.3621, "step": 3600 }, { "epoch": 0.28808, "grad_norm": 1.7688055038452148, "learning_rate": 8.095346288959303e-06, "loss": 0.3573, "step": 3601 }, { "epoch": 0.28816, "grad_norm": 2.178952217102051, "learning_rate": 8.094358518095002e-06, "loss": 0.5357, "step": 3602 }, { "epoch": 0.28824, "grad_norm": 2.0295703411102295, "learning_rate": 8.09337055146099e-06, "loss": 0.4133, "step": 3603 }, { "epoch": 0.28832, "grad_norm": 1.6052531003952026, "learning_rate": 8.092382389119775e-06, "loss": 0.2952, "step": 3604 }, { "epoch": 0.2884, "grad_norm": 1.9326715469360352, "learning_rate": 8.091394031133872e-06, "loss": 0.3914, "step": 3605 }, { "epoch": 0.28848, "grad_norm": 1.7421461343765259, "learning_rate": 8.090405477565814e-06, "loss": 0.3181, "step": 3606 }, { "epoch": 0.28856, "grad_norm": 1.501417875289917, "learning_rate": 8.089416728478141e-06, "loss": 0.314, "step": 3607 }, { "epoch": 0.28864, "grad_norm": 1.6021887063980103, "learning_rate": 8.088427783933408e-06, "loss": 0.3401, "step": 3608 }, { "epoch": 0.28872, "grad_norm": 1.6619521379470825, "learning_rate": 8.087438643994185e-06, "loss": 0.3583, "step": 3609 }, { "epoch": 0.2888, "grad_norm": 1.458363652229309, "learning_rate": 8.086449308723048e-06, "loss": 0.3985, "step": 3610 }, { "epoch": 0.28888, "grad_norm": 1.698525309562683, "learning_rate": 8.085459778182591e-06, "loss": 0.3954, "step": 3611 }, { "epoch": 0.28896, "grad_norm": 1.6027504205703735, "learning_rate": 8.084470052435419e-06, "loss": 0.3274, "step": 3612 }, { "epoch": 0.28904, "grad_norm": 1.7605338096618652, "learning_rate": 8.083480131544146e-06, "loss": 0.3009, "step": 3613 }, { "epoch": 0.28912, "grad_norm": 1.6151217222213745, "learning_rate": 8.082490015571403e-06, "loss": 0.3801, "step": 3614 }, { "epoch": 0.2892, "grad_norm": 1.507580041885376, "learning_rate": 8.08149970457983e-06, "loss": 0.3096, "step": 3615 }, { "epoch": 0.28928, "grad_norm": 1.56270432472229, "learning_rate": 8.080509198632082e-06, "loss": 0.2935, "step": 3616 }, { "epoch": 0.28936, "grad_norm": 1.5426692962646484, "learning_rate": 8.079518497790825e-06, "loss": 0.3052, "step": 3617 }, { "epoch": 0.28944, "grad_norm": 1.736161231994629, "learning_rate": 8.078527602118735e-06, "loss": 0.4002, "step": 3618 }, { "epoch": 0.28952, "grad_norm": 1.4631191492080688, "learning_rate": 8.077536511678506e-06, "loss": 0.2789, "step": 3619 }, { "epoch": 0.2896, "grad_norm": 1.389417052268982, "learning_rate": 8.076545226532839e-06, "loss": 0.2832, "step": 3620 }, { "epoch": 0.28968, "grad_norm": 1.6184295415878296, "learning_rate": 8.07555374674445e-06, "loss": 0.3232, "step": 3621 }, { "epoch": 0.28976, "grad_norm": 1.4686223268508911, "learning_rate": 8.074562072376067e-06, "loss": 0.3091, "step": 3622 }, { "epoch": 0.28984, "grad_norm": 1.418705940246582, "learning_rate": 8.073570203490428e-06, "loss": 0.3433, "step": 3623 }, { "epoch": 0.28992, "grad_norm": 2.270691394805908, "learning_rate": 8.072578140150286e-06, "loss": 0.5489, "step": 3624 }, { "epoch": 0.29, "grad_norm": 2.0474798679351807, "learning_rate": 8.071585882418406e-06, "loss": 0.3902, "step": 3625 }, { "epoch": 0.29008, "grad_norm": 1.4669115543365479, "learning_rate": 8.070593430357565e-06, "loss": 0.3705, "step": 3626 }, { "epoch": 0.29016, "grad_norm": 1.3906588554382324, "learning_rate": 8.069600784030553e-06, "loss": 0.2618, "step": 3627 }, { "epoch": 0.29024, "grad_norm": 2.14176869392395, "learning_rate": 8.068607943500168e-06, "loss": 0.6205, "step": 3628 }, { "epoch": 0.29032, "grad_norm": 1.6090953350067139, "learning_rate": 8.067614908829229e-06, "loss": 0.3588, "step": 3629 }, { "epoch": 0.2904, "grad_norm": 1.9249868392944336, "learning_rate": 8.066621680080557e-06, "loss": 0.3878, "step": 3630 }, { "epoch": 0.29048, "grad_norm": 1.986382007598877, "learning_rate": 8.065628257316993e-06, "loss": 0.4614, "step": 3631 }, { "epoch": 0.29056, "grad_norm": 1.3387824296951294, "learning_rate": 8.064634640601386e-06, "loss": 0.3331, "step": 3632 }, { "epoch": 0.29064, "grad_norm": 1.2659355401992798, "learning_rate": 8.0636408299966e-06, "loss": 0.311, "step": 3633 }, { "epoch": 0.29072, "grad_norm": 1.6108455657958984, "learning_rate": 8.06264682556551e-06, "loss": 0.4001, "step": 3634 }, { "epoch": 0.2908, "grad_norm": 1.5894638299942017, "learning_rate": 8.061652627371003e-06, "loss": 0.291, "step": 3635 }, { "epoch": 0.29088, "grad_norm": 2.1120431423187256, "learning_rate": 8.060658235475978e-06, "loss": 0.5541, "step": 3636 }, { "epoch": 0.29096, "grad_norm": 1.6898366212844849, "learning_rate": 8.059663649943348e-06, "loss": 0.3644, "step": 3637 }, { "epoch": 0.29104, "grad_norm": 1.6504249572753906, "learning_rate": 8.058668870836035e-06, "loss": 0.4423, "step": 3638 }, { "epoch": 0.29112, "grad_norm": 1.3013478517532349, "learning_rate": 8.05767389821698e-06, "loss": 0.3372, "step": 3639 }, { "epoch": 0.2912, "grad_norm": 1.45965576171875, "learning_rate": 8.056678732149125e-06, "loss": 0.2493, "step": 3640 }, { "epoch": 0.29128, "grad_norm": 1.4169219732284546, "learning_rate": 8.055683372695437e-06, "loss": 0.3208, "step": 3641 }, { "epoch": 0.29136, "grad_norm": 1.294084072113037, "learning_rate": 8.054687819918884e-06, "loss": 0.2904, "step": 3642 }, { "epoch": 0.29144, "grad_norm": 1.3888262510299683, "learning_rate": 8.053692073882456e-06, "loss": 0.3211, "step": 3643 }, { "epoch": 0.29152, "grad_norm": 1.5449509620666504, "learning_rate": 8.052696134649147e-06, "loss": 0.2892, "step": 3644 }, { "epoch": 0.2916, "grad_norm": 1.308077096939087, "learning_rate": 8.051700002281967e-06, "loss": 0.2521, "step": 3645 }, { "epoch": 0.29168, "grad_norm": 1.7512633800506592, "learning_rate": 8.05070367684394e-06, "loss": 0.3653, "step": 3646 }, { "epoch": 0.29176, "grad_norm": 2.0131380558013916, "learning_rate": 8.0497071583981e-06, "loss": 0.4186, "step": 3647 }, { "epoch": 0.29184, "grad_norm": 2.0268020629882812, "learning_rate": 8.048710447007491e-06, "loss": 0.4878, "step": 3648 }, { "epoch": 0.29192, "grad_norm": 1.627658486366272, "learning_rate": 8.047713542735173e-06, "loss": 0.4156, "step": 3649 }, { "epoch": 0.292, "grad_norm": 1.7770787477493286, "learning_rate": 8.046716445644217e-06, "loss": 0.3509, "step": 3650 }, { "epoch": 0.29208, "grad_norm": 1.3930143117904663, "learning_rate": 8.045719155797708e-06, "loss": 0.2563, "step": 3651 }, { "epoch": 0.29216, "grad_norm": 1.5242277383804321, "learning_rate": 8.044721673258736e-06, "loss": 0.4064, "step": 3652 }, { "epoch": 0.29224, "grad_norm": 1.174406886100769, "learning_rate": 8.043723998090413e-06, "loss": 0.2569, "step": 3653 }, { "epoch": 0.29232, "grad_norm": 1.1028757095336914, "learning_rate": 8.042726130355856e-06, "loss": 0.2104, "step": 3654 }, { "epoch": 0.2924, "grad_norm": 1.654245138168335, "learning_rate": 8.041728070118198e-06, "loss": 0.4263, "step": 3655 }, { "epoch": 0.29248, "grad_norm": 1.5495033264160156, "learning_rate": 8.040729817440584e-06, "loss": 0.3549, "step": 3656 }, { "epoch": 0.29256, "grad_norm": 1.5318132638931274, "learning_rate": 8.039731372386168e-06, "loss": 0.3307, "step": 3657 }, { "epoch": 0.29264, "grad_norm": 1.3253992795944214, "learning_rate": 8.038732735018118e-06, "loss": 0.3224, "step": 3658 }, { "epoch": 0.29272, "grad_norm": 1.608940601348877, "learning_rate": 8.037733905399616e-06, "loss": 0.3678, "step": 3659 }, { "epoch": 0.2928, "grad_norm": 1.910239577293396, "learning_rate": 8.036734883593852e-06, "loss": 0.3916, "step": 3660 }, { "epoch": 0.29288, "grad_norm": 1.2812817096710205, "learning_rate": 8.035735669664037e-06, "loss": 0.296, "step": 3661 }, { "epoch": 0.29296, "grad_norm": 1.475081205368042, "learning_rate": 8.034736263673382e-06, "loss": 0.3323, "step": 3662 }, { "epoch": 0.29304, "grad_norm": 1.463401436805725, "learning_rate": 8.033736665685119e-06, "loss": 0.3216, "step": 3663 }, { "epoch": 0.29312, "grad_norm": 1.6229404211044312, "learning_rate": 8.032736875762486e-06, "loss": 0.3206, "step": 3664 }, { "epoch": 0.2932, "grad_norm": 1.4625945091247559, "learning_rate": 8.031736893968738e-06, "loss": 0.3063, "step": 3665 }, { "epoch": 0.29328, "grad_norm": 1.762115716934204, "learning_rate": 8.030736720367143e-06, "loss": 0.3884, "step": 3666 }, { "epoch": 0.29336, "grad_norm": 1.6735233068466187, "learning_rate": 8.029736355020975e-06, "loss": 0.4038, "step": 3667 }, { "epoch": 0.29344, "grad_norm": 1.6050176620483398, "learning_rate": 8.028735797993528e-06, "loss": 0.4044, "step": 3668 }, { "epoch": 0.29352, "grad_norm": 1.3272747993469238, "learning_rate": 8.027735049348099e-06, "loss": 0.2959, "step": 3669 }, { "epoch": 0.2936, "grad_norm": 1.7667698860168457, "learning_rate": 8.026734109148005e-06, "loss": 0.423, "step": 3670 }, { "epoch": 0.29368, "grad_norm": 1.7235229015350342, "learning_rate": 8.02573297745657e-06, "loss": 0.3251, "step": 3671 }, { "epoch": 0.29376, "grad_norm": 1.4288824796676636, "learning_rate": 8.024731654337134e-06, "loss": 0.3099, "step": 3672 }, { "epoch": 0.29384, "grad_norm": 1.4889461994171143, "learning_rate": 8.023730139853049e-06, "loss": 0.271, "step": 3673 }, { "epoch": 0.29392, "grad_norm": 1.5079691410064697, "learning_rate": 8.022728434067675e-06, "loss": 0.3027, "step": 3674 }, { "epoch": 0.294, "grad_norm": 1.52550208568573, "learning_rate": 8.021726537044385e-06, "loss": 0.4014, "step": 3675 }, { "epoch": 0.29408, "grad_norm": 1.9863523244857788, "learning_rate": 8.020724448846569e-06, "loss": 0.4531, "step": 3676 }, { "epoch": 0.29416, "grad_norm": 1.5718297958374023, "learning_rate": 8.019722169537624e-06, "loss": 0.4036, "step": 3677 }, { "epoch": 0.29424, "grad_norm": 1.8104145526885986, "learning_rate": 8.018719699180961e-06, "loss": 0.3399, "step": 3678 }, { "epoch": 0.29432, "grad_norm": 1.6687524318695068, "learning_rate": 8.017717037840005e-06, "loss": 0.3931, "step": 3679 }, { "epoch": 0.2944, "grad_norm": 1.5040487051010132, "learning_rate": 8.016714185578189e-06, "loss": 0.3671, "step": 3680 }, { "epoch": 0.29448, "grad_norm": 1.4653910398483276, "learning_rate": 8.01571114245896e-06, "loss": 0.3147, "step": 3681 }, { "epoch": 0.29456, "grad_norm": 1.5103580951690674, "learning_rate": 8.014707908545776e-06, "loss": 0.3386, "step": 3682 }, { "epoch": 0.29464, "grad_norm": 1.5704234838485718, "learning_rate": 8.013704483902112e-06, "loss": 0.2921, "step": 3683 }, { "epoch": 0.29472, "grad_norm": 1.7446131706237793, "learning_rate": 8.012700868591449e-06, "loss": 0.3766, "step": 3684 }, { "epoch": 0.2948, "grad_norm": 1.2436984777450562, "learning_rate": 8.011697062677282e-06, "loss": 0.2697, "step": 3685 }, { "epoch": 0.29488, "grad_norm": 1.496191382408142, "learning_rate": 8.01069306622312e-06, "loss": 0.325, "step": 3686 }, { "epoch": 0.29496, "grad_norm": 1.426172137260437, "learning_rate": 8.00968887929248e-06, "loss": 0.2959, "step": 3687 }, { "epoch": 0.29504, "grad_norm": 1.596839427947998, "learning_rate": 8.008684501948895e-06, "loss": 0.3525, "step": 3688 }, { "epoch": 0.29512, "grad_norm": 1.2332483530044556, "learning_rate": 8.00767993425591e-06, "loss": 0.2837, "step": 3689 }, { "epoch": 0.2952, "grad_norm": 1.593542218208313, "learning_rate": 8.00667517627708e-06, "loss": 0.3293, "step": 3690 }, { "epoch": 0.29528, "grad_norm": 1.504442572593689, "learning_rate": 8.00567022807597e-06, "loss": 0.3502, "step": 3691 }, { "epoch": 0.29536, "grad_norm": 1.763466477394104, "learning_rate": 8.004665089716162e-06, "loss": 0.3728, "step": 3692 }, { "epoch": 0.29544, "grad_norm": 1.6120593547821045, "learning_rate": 8.003659761261248e-06, "loss": 0.314, "step": 3693 }, { "epoch": 0.29552, "grad_norm": 1.3458003997802734, "learning_rate": 8.00265424277483e-06, "loss": 0.3373, "step": 3694 }, { "epoch": 0.2956, "grad_norm": 1.3163591623306274, "learning_rate": 8.001648534320526e-06, "loss": 0.325, "step": 3695 }, { "epoch": 0.29568, "grad_norm": 1.616263747215271, "learning_rate": 8.000642635961963e-06, "loss": 0.4398, "step": 3696 }, { "epoch": 0.29576, "grad_norm": 1.4315654039382935, "learning_rate": 7.99963654776278e-06, "loss": 0.3828, "step": 3697 }, { "epoch": 0.29584, "grad_norm": 1.517011284828186, "learning_rate": 7.99863026978663e-06, "loss": 0.3207, "step": 3698 }, { "epoch": 0.29592, "grad_norm": 1.4280073642730713, "learning_rate": 7.997623802097176e-06, "loss": 0.3207, "step": 3699 }, { "epoch": 0.296, "grad_norm": 1.3090413808822632, "learning_rate": 7.996617144758094e-06, "loss": 0.3191, "step": 3700 }, { "epoch": 0.29608, "grad_norm": 1.6390103101730347, "learning_rate": 7.995610297833072e-06, "loss": 0.3944, "step": 3701 }, { "epoch": 0.29616, "grad_norm": 1.4760853052139282, "learning_rate": 7.994603261385809e-06, "loss": 0.3428, "step": 3702 }, { "epoch": 0.29624, "grad_norm": 1.1745274066925049, "learning_rate": 7.99359603548002e-06, "loss": 0.2517, "step": 3703 }, { "epoch": 0.29632, "grad_norm": 1.4406036138534546, "learning_rate": 7.992588620179424e-06, "loss": 0.3696, "step": 3704 }, { "epoch": 0.2964, "grad_norm": 1.278671383857727, "learning_rate": 7.99158101554776e-06, "loss": 0.249, "step": 3705 }, { "epoch": 0.29648, "grad_norm": 1.420791506767273, "learning_rate": 7.990573221648775e-06, "loss": 0.2905, "step": 3706 }, { "epoch": 0.29656, "grad_norm": 1.7047336101531982, "learning_rate": 7.989565238546228e-06, "loss": 0.4367, "step": 3707 }, { "epoch": 0.29664, "grad_norm": 2.036435127258301, "learning_rate": 7.988557066303892e-06, "loss": 0.3923, "step": 3708 }, { "epoch": 0.29672, "grad_norm": 1.4706825017929077, "learning_rate": 7.987548704985553e-06, "loss": 0.429, "step": 3709 }, { "epoch": 0.2968, "grad_norm": 1.6444703340530396, "learning_rate": 7.986540154655e-06, "loss": 0.3174, "step": 3710 }, { "epoch": 0.29688, "grad_norm": 1.5745984315872192, "learning_rate": 7.985531415376046e-06, "loss": 0.2996, "step": 3711 }, { "epoch": 0.29696, "grad_norm": 1.4535049200057983, "learning_rate": 7.984522487212509e-06, "loss": 0.3336, "step": 3712 }, { "epoch": 0.29704, "grad_norm": 1.7702298164367676, "learning_rate": 7.98351337022822e-06, "loss": 0.388, "step": 3713 }, { "epoch": 0.29712, "grad_norm": 1.5240532159805298, "learning_rate": 7.982504064487022e-06, "loss": 0.3296, "step": 3714 }, { "epoch": 0.2972, "grad_norm": 1.6705554723739624, "learning_rate": 7.981494570052775e-06, "loss": 0.3306, "step": 3715 }, { "epoch": 0.29728, "grad_norm": 1.8875733613967896, "learning_rate": 7.980484886989338e-06, "loss": 0.3438, "step": 3716 }, { "epoch": 0.29736, "grad_norm": 1.5699726343154907, "learning_rate": 7.979475015360599e-06, "loss": 0.3333, "step": 3717 }, { "epoch": 0.29744, "grad_norm": 1.580299973487854, "learning_rate": 7.978464955230442e-06, "loss": 0.3609, "step": 3718 }, { "epoch": 0.29752, "grad_norm": 1.7409504652023315, "learning_rate": 7.977454706662775e-06, "loss": 0.3506, "step": 3719 }, { "epoch": 0.2976, "grad_norm": 2.2885334491729736, "learning_rate": 7.97644426972151e-06, "loss": 0.4091, "step": 3720 }, { "epoch": 0.29768, "grad_norm": 1.6000932455062866, "learning_rate": 7.975433644470576e-06, "loss": 0.3848, "step": 3721 }, { "epoch": 0.29776, "grad_norm": 1.8004785776138306, "learning_rate": 7.974422830973912e-06, "loss": 0.3568, "step": 3722 }, { "epoch": 0.29784, "grad_norm": 1.5973020792007446, "learning_rate": 7.973411829295466e-06, "loss": 0.3497, "step": 3723 }, { "epoch": 0.29792, "grad_norm": 1.4873243570327759, "learning_rate": 7.972400639499204e-06, "loss": 0.3358, "step": 3724 }, { "epoch": 0.298, "grad_norm": 1.824233055114746, "learning_rate": 7.971389261649099e-06, "loss": 0.4382, "step": 3725 }, { "epoch": 0.29808, "grad_norm": 1.824907660484314, "learning_rate": 7.970377695809138e-06, "loss": 0.4416, "step": 3726 }, { "epoch": 0.29816, "grad_norm": 1.4309464693069458, "learning_rate": 7.96936594204332e-06, "loss": 0.2923, "step": 3727 }, { "epoch": 0.29824, "grad_norm": 1.8320714235305786, "learning_rate": 7.968354000415652e-06, "loss": 0.4268, "step": 3728 }, { "epoch": 0.29832, "grad_norm": 1.6846635341644287, "learning_rate": 7.967341870990159e-06, "loss": 0.297, "step": 3729 }, { "epoch": 0.2984, "grad_norm": 1.6068602800369263, "learning_rate": 7.966329553830876e-06, "loss": 0.3604, "step": 3730 }, { "epoch": 0.29848, "grad_norm": 1.7175391912460327, "learning_rate": 7.965317049001847e-06, "loss": 0.394, "step": 3731 }, { "epoch": 0.29856, "grad_norm": 1.2361955642700195, "learning_rate": 7.96430435656713e-06, "loss": 0.2636, "step": 3732 }, { "epoch": 0.29864, "grad_norm": 1.6340537071228027, "learning_rate": 7.963291476590795e-06, "loss": 0.3437, "step": 3733 }, { "epoch": 0.29872, "grad_norm": 1.4597103595733643, "learning_rate": 7.962278409136924e-06, "loss": 0.307, "step": 3734 }, { "epoch": 0.2988, "grad_norm": 1.6404805183410645, "learning_rate": 7.961265154269608e-06, "loss": 0.5112, "step": 3735 }, { "epoch": 0.29888, "grad_norm": 1.6717686653137207, "learning_rate": 7.960251712052955e-06, "loss": 0.3567, "step": 3736 }, { "epoch": 0.29896, "grad_norm": 1.874589204788208, "learning_rate": 7.959238082551081e-06, "loss": 0.4107, "step": 3737 }, { "epoch": 0.29904, "grad_norm": 1.2089720964431763, "learning_rate": 7.958224265828118e-06, "loss": 0.3063, "step": 3738 }, { "epoch": 0.29912, "grad_norm": 1.2900773286819458, "learning_rate": 7.957210261948201e-06, "loss": 0.2518, "step": 3739 }, { "epoch": 0.2992, "grad_norm": 1.737890362739563, "learning_rate": 7.956196070975485e-06, "loss": 0.3588, "step": 3740 }, { "epoch": 0.29928, "grad_norm": 1.681058406829834, "learning_rate": 7.955181692974138e-06, "loss": 0.3226, "step": 3741 }, { "epoch": 0.29936, "grad_norm": 1.6634056568145752, "learning_rate": 7.954167128008332e-06, "loss": 0.3443, "step": 3742 }, { "epoch": 0.29944, "grad_norm": 1.8775358200073242, "learning_rate": 7.953152376142255e-06, "loss": 0.3255, "step": 3743 }, { "epoch": 0.29952, "grad_norm": 1.728934645652771, "learning_rate": 7.95213743744011e-06, "loss": 0.3183, "step": 3744 }, { "epoch": 0.2996, "grad_norm": 1.514907717704773, "learning_rate": 7.951122311966108e-06, "loss": 0.3914, "step": 3745 }, { "epoch": 0.29968, "grad_norm": 1.5943530797958374, "learning_rate": 7.95010699978447e-06, "loss": 0.349, "step": 3746 }, { "epoch": 0.29976, "grad_norm": 1.815433382987976, "learning_rate": 7.949091500959434e-06, "loss": 0.4143, "step": 3747 }, { "epoch": 0.29984, "grad_norm": 1.7097547054290771, "learning_rate": 7.948075815555246e-06, "loss": 0.3884, "step": 3748 }, { "epoch": 0.29992, "grad_norm": 1.619554877281189, "learning_rate": 7.947059943636166e-06, "loss": 0.3996, "step": 3749 }, { "epoch": 0.3, "grad_norm": 1.394123911857605, "learning_rate": 7.946043885266465e-06, "loss": 0.2721, "step": 3750 }, { "epoch": 0.30008, "grad_norm": 1.3874683380126953, "learning_rate": 7.945027640510423e-06, "loss": 0.3412, "step": 3751 }, { "epoch": 0.30016, "grad_norm": 1.5768663883209229, "learning_rate": 7.944011209432336e-06, "loss": 0.3391, "step": 3752 }, { "epoch": 0.30024, "grad_norm": 1.4615979194641113, "learning_rate": 7.942994592096513e-06, "loss": 0.2966, "step": 3753 }, { "epoch": 0.30032, "grad_norm": 1.3654316663742065, "learning_rate": 7.941977788567267e-06, "loss": 0.3807, "step": 3754 }, { "epoch": 0.3004, "grad_norm": 1.8147246837615967, "learning_rate": 7.940960798908933e-06, "loss": 0.3741, "step": 3755 }, { "epoch": 0.30048, "grad_norm": 1.5056862831115723, "learning_rate": 7.939943623185847e-06, "loss": 0.3309, "step": 3756 }, { "epoch": 0.30056, "grad_norm": 1.4294978380203247, "learning_rate": 7.938926261462366e-06, "loss": 0.3607, "step": 3757 }, { "epoch": 0.30064, "grad_norm": 1.4605019092559814, "learning_rate": 7.937908713802855e-06, "loss": 0.3732, "step": 3758 }, { "epoch": 0.30072, "grad_norm": 1.7275772094726562, "learning_rate": 7.936890980271688e-06, "loss": 0.3738, "step": 3759 }, { "epoch": 0.3008, "grad_norm": 1.480759620666504, "learning_rate": 7.935873060933257e-06, "loss": 0.3517, "step": 3760 }, { "epoch": 0.30088, "grad_norm": 1.655974268913269, "learning_rate": 7.934854955851961e-06, "loss": 0.3499, "step": 3761 }, { "epoch": 0.30096, "grad_norm": 1.35233736038208, "learning_rate": 7.933836665092212e-06, "loss": 0.3247, "step": 3762 }, { "epoch": 0.30104, "grad_norm": 1.2412519454956055, "learning_rate": 7.932818188718433e-06, "loss": 0.2889, "step": 3763 }, { "epoch": 0.30112, "grad_norm": 1.5485812425613403, "learning_rate": 7.931799526795062e-06, "loss": 0.3616, "step": 3764 }, { "epoch": 0.3012, "grad_norm": 1.448938250541687, "learning_rate": 7.930780679386542e-06, "loss": 0.3128, "step": 3765 }, { "epoch": 0.30128, "grad_norm": 1.4666680097579956, "learning_rate": 7.929761646557337e-06, "loss": 0.3414, "step": 3766 }, { "epoch": 0.30136, "grad_norm": 1.6032980680465698, "learning_rate": 7.928742428371916e-06, "loss": 0.3667, "step": 3767 }, { "epoch": 0.30144, "grad_norm": 1.5190998315811157, "learning_rate": 7.92772302489476e-06, "loss": 0.3274, "step": 3768 }, { "epoch": 0.30152, "grad_norm": 1.6384193897247314, "learning_rate": 7.926703436190363e-06, "loss": 0.371, "step": 3769 }, { "epoch": 0.3016, "grad_norm": 1.7404835224151611, "learning_rate": 7.925683662323235e-06, "loss": 0.3464, "step": 3770 }, { "epoch": 0.30168, "grad_norm": 1.719647765159607, "learning_rate": 7.924663703357892e-06, "loss": 0.371, "step": 3771 }, { "epoch": 0.30176, "grad_norm": 1.545730471611023, "learning_rate": 7.923643559358858e-06, "loss": 0.3684, "step": 3772 }, { "epoch": 0.30184, "grad_norm": 1.717699646949768, "learning_rate": 7.922623230390682e-06, "loss": 0.3934, "step": 3773 }, { "epoch": 0.30192, "grad_norm": 1.8281782865524292, "learning_rate": 7.921602716517914e-06, "loss": 0.508, "step": 3774 }, { "epoch": 0.302, "grad_norm": 2.0022242069244385, "learning_rate": 7.920582017805114e-06, "loss": 0.36, "step": 3775 }, { "epoch": 0.30208, "grad_norm": 1.4404866695404053, "learning_rate": 7.919561134316865e-06, "loss": 0.2932, "step": 3776 }, { "epoch": 0.30216, "grad_norm": 1.687625527381897, "learning_rate": 7.918540066117752e-06, "loss": 0.3467, "step": 3777 }, { "epoch": 0.30224, "grad_norm": 1.2726261615753174, "learning_rate": 7.917518813272373e-06, "loss": 0.3044, "step": 3778 }, { "epoch": 0.30232, "grad_norm": 2.043890953063965, "learning_rate": 7.916497375845342e-06, "loss": 0.3605, "step": 3779 }, { "epoch": 0.3024, "grad_norm": 1.251383900642395, "learning_rate": 7.91547575390128e-06, "loss": 0.3175, "step": 3780 }, { "epoch": 0.30248, "grad_norm": 1.544699788093567, "learning_rate": 7.914453947504822e-06, "loss": 0.3783, "step": 3781 }, { "epoch": 0.30256, "grad_norm": 1.4141426086425781, "learning_rate": 7.913431956720615e-06, "loss": 0.26, "step": 3782 }, { "epoch": 0.30264, "grad_norm": 1.7382965087890625, "learning_rate": 7.912409781613317e-06, "loss": 0.3664, "step": 3783 }, { "epoch": 0.30272, "grad_norm": 1.1553996801376343, "learning_rate": 7.911387422247596e-06, "loss": 0.2621, "step": 3784 }, { "epoch": 0.3028, "grad_norm": 1.534834623336792, "learning_rate": 7.910364878688135e-06, "loss": 0.3252, "step": 3785 }, { "epoch": 0.30288, "grad_norm": 1.4503859281539917, "learning_rate": 7.909342150999626e-06, "loss": 0.3371, "step": 3786 }, { "epoch": 0.30296, "grad_norm": 1.7136962413787842, "learning_rate": 7.908319239246774e-06, "loss": 0.3998, "step": 3787 }, { "epoch": 0.30304, "grad_norm": 1.6705702543258667, "learning_rate": 7.907296143494293e-06, "loss": 0.4058, "step": 3788 }, { "epoch": 0.30312, "grad_norm": 1.4927536249160767, "learning_rate": 7.906272863806916e-06, "loss": 0.3728, "step": 3789 }, { "epoch": 0.3032, "grad_norm": 1.1272435188293457, "learning_rate": 7.905249400249377e-06, "loss": 0.2479, "step": 3790 }, { "epoch": 0.30328, "grad_norm": 1.3360072374343872, "learning_rate": 7.904225752886433e-06, "loss": 0.269, "step": 3791 }, { "epoch": 0.30336, "grad_norm": 1.445589542388916, "learning_rate": 7.90320192178284e-06, "loss": 0.311, "step": 3792 }, { "epoch": 0.30344, "grad_norm": 2.228759527206421, "learning_rate": 7.902177907003375e-06, "loss": 0.4801, "step": 3793 }, { "epoch": 0.30352, "grad_norm": 1.5473653078079224, "learning_rate": 7.901153708612825e-06, "loss": 0.2992, "step": 3794 }, { "epoch": 0.3036, "grad_norm": 1.85487699508667, "learning_rate": 7.90012932667599e-06, "loss": 0.3764, "step": 3795 }, { "epoch": 0.30368, "grad_norm": 1.6702008247375488, "learning_rate": 7.899104761257673e-06, "loss": 0.3616, "step": 3796 }, { "epoch": 0.30376, "grad_norm": 1.6418166160583496, "learning_rate": 7.8980800124227e-06, "loss": 0.3333, "step": 3797 }, { "epoch": 0.30384, "grad_norm": 1.5101503133773804, "learning_rate": 7.8970550802359e-06, "loss": 0.3233, "step": 3798 }, { "epoch": 0.30392, "grad_norm": 1.378827452659607, "learning_rate": 7.896029964762119e-06, "loss": 0.2467, "step": 3799 }, { "epoch": 0.304, "grad_norm": 1.2998595237731934, "learning_rate": 7.895004666066214e-06, "loss": 0.2874, "step": 3800 }, { "epoch": 0.30408, "grad_norm": 1.455960750579834, "learning_rate": 7.893979184213049e-06, "loss": 0.2777, "step": 3801 }, { "epoch": 0.30416, "grad_norm": 1.5600316524505615, "learning_rate": 7.892953519267506e-06, "loss": 0.3268, "step": 3802 }, { "epoch": 0.30424, "grad_norm": 1.37201726436615, "learning_rate": 7.891927671294472e-06, "loss": 0.3666, "step": 3803 }, { "epoch": 0.30432, "grad_norm": 1.4936875104904175, "learning_rate": 7.890901640358852e-06, "loss": 0.2798, "step": 3804 }, { "epoch": 0.3044, "grad_norm": 1.624596357345581, "learning_rate": 7.889875426525557e-06, "loss": 0.4087, "step": 3805 }, { "epoch": 0.30448, "grad_norm": 2.078230381011963, "learning_rate": 7.888849029859513e-06, "loss": 0.4298, "step": 3806 }, { "epoch": 0.30456, "grad_norm": 1.4350990056991577, "learning_rate": 7.887822450425658e-06, "loss": 0.3688, "step": 3807 }, { "epoch": 0.30464, "grad_norm": 1.392849087715149, "learning_rate": 7.886795688288937e-06, "loss": 0.3813, "step": 3808 }, { "epoch": 0.30472, "grad_norm": 1.645775556564331, "learning_rate": 7.885768743514316e-06, "loss": 0.3152, "step": 3809 }, { "epoch": 0.3048, "grad_norm": 1.6383919715881348, "learning_rate": 7.88474161616676e-06, "loss": 0.3558, "step": 3810 }, { "epoch": 0.30488, "grad_norm": 2.151557445526123, "learning_rate": 7.883714306311255e-06, "loss": 0.4004, "step": 3811 }, { "epoch": 0.30496, "grad_norm": 1.6432582139968872, "learning_rate": 7.882686814012792e-06, "loss": 0.4162, "step": 3812 }, { "epoch": 0.30504, "grad_norm": 1.6074105501174927, "learning_rate": 7.881659139336383e-06, "loss": 0.3813, "step": 3813 }, { "epoch": 0.30512, "grad_norm": 1.7226243019104004, "learning_rate": 7.880631282347042e-06, "loss": 0.446, "step": 3814 }, { "epoch": 0.3052, "grad_norm": 1.3685539960861206, "learning_rate": 7.879603243109799e-06, "loss": 0.2664, "step": 3815 }, { "epoch": 0.30528, "grad_norm": 2.002601146697998, "learning_rate": 7.878575021689693e-06, "loss": 0.402, "step": 3816 }, { "epoch": 0.30536, "grad_norm": 1.7490203380584717, "learning_rate": 7.877546618151776e-06, "loss": 0.3748, "step": 3817 }, { "epoch": 0.30544, "grad_norm": 1.197702169418335, "learning_rate": 7.876518032561113e-06, "loss": 0.2688, "step": 3818 }, { "epoch": 0.30552, "grad_norm": 1.7372758388519287, "learning_rate": 7.875489264982781e-06, "loss": 0.377, "step": 3819 }, { "epoch": 0.3056, "grad_norm": 1.6473854780197144, "learning_rate": 7.874460315481863e-06, "loss": 0.3239, "step": 3820 }, { "epoch": 0.30568, "grad_norm": 1.2986738681793213, "learning_rate": 7.87343118412346e-06, "loss": 0.3873, "step": 3821 }, { "epoch": 0.30576, "grad_norm": 2.0069668292999268, "learning_rate": 7.872401870972679e-06, "loss": 0.3118, "step": 3822 }, { "epoch": 0.30584, "grad_norm": 1.1595829725265503, "learning_rate": 7.871372376094642e-06, "loss": 0.3185, "step": 3823 }, { "epoch": 0.30592, "grad_norm": 1.917394995689392, "learning_rate": 7.870342699554484e-06, "loss": 0.4492, "step": 3824 }, { "epoch": 0.306, "grad_norm": 1.6679039001464844, "learning_rate": 7.869312841417346e-06, "loss": 0.4018, "step": 3825 }, { "epoch": 0.30608, "grad_norm": 1.2022053003311157, "learning_rate": 7.868282801748389e-06, "loss": 0.2811, "step": 3826 }, { "epoch": 0.30616, "grad_norm": 1.3668828010559082, "learning_rate": 7.867252580612775e-06, "loss": 0.2921, "step": 3827 }, { "epoch": 0.30624, "grad_norm": 1.7087875604629517, "learning_rate": 7.866222178075681e-06, "loss": 0.3836, "step": 3828 }, { "epoch": 0.30632, "grad_norm": 1.1265833377838135, "learning_rate": 7.865191594202302e-06, "loss": 0.2809, "step": 3829 }, { "epoch": 0.3064, "grad_norm": 1.4156330823898315, "learning_rate": 7.864160829057838e-06, "loss": 0.3044, "step": 3830 }, { "epoch": 0.30648, "grad_norm": 1.5269126892089844, "learning_rate": 7.863129882707504e-06, "loss": 0.3253, "step": 3831 }, { "epoch": 0.30656, "grad_norm": 1.6441112756729126, "learning_rate": 7.862098755216519e-06, "loss": 0.3304, "step": 3832 }, { "epoch": 0.30664, "grad_norm": 1.804513931274414, "learning_rate": 7.861067446650124e-06, "loss": 0.3199, "step": 3833 }, { "epoch": 0.30672, "grad_norm": 1.4178650379180908, "learning_rate": 7.860035957073566e-06, "loss": 0.326, "step": 3834 }, { "epoch": 0.3068, "grad_norm": 1.4408081769943237, "learning_rate": 7.8590042865521e-06, "loss": 0.3487, "step": 3835 }, { "epoch": 0.30688, "grad_norm": 1.7407159805297852, "learning_rate": 7.857972435151e-06, "loss": 0.3687, "step": 3836 }, { "epoch": 0.30696, "grad_norm": 1.395830512046814, "learning_rate": 7.856940402935547e-06, "loss": 0.2618, "step": 3837 }, { "epoch": 0.30704, "grad_norm": 2.0533602237701416, "learning_rate": 7.855908189971036e-06, "loss": 0.3489, "step": 3838 }, { "epoch": 0.30712, "grad_norm": 1.6558175086975098, "learning_rate": 7.854875796322767e-06, "loss": 0.3476, "step": 3839 }, { "epoch": 0.3072, "grad_norm": 1.524119257926941, "learning_rate": 7.853843222056059e-06, "loss": 0.4397, "step": 3840 }, { "epoch": 0.30728, "grad_norm": 1.5205790996551514, "learning_rate": 7.85281046723624e-06, "loss": 0.3255, "step": 3841 }, { "epoch": 0.30736, "grad_norm": 1.547203779220581, "learning_rate": 7.851777531928648e-06, "loss": 0.369, "step": 3842 }, { "epoch": 0.30744, "grad_norm": 1.3715342283248901, "learning_rate": 7.850744416198635e-06, "loss": 0.2742, "step": 3843 }, { "epoch": 0.30752, "grad_norm": 1.7399414777755737, "learning_rate": 7.849711120111561e-06, "loss": 0.3737, "step": 3844 }, { "epoch": 0.3076, "grad_norm": 1.67813241481781, "learning_rate": 7.848677643732799e-06, "loss": 0.3005, "step": 3845 }, { "epoch": 0.30768, "grad_norm": 1.495720386505127, "learning_rate": 7.847643987127733e-06, "loss": 0.3383, "step": 3846 }, { "epoch": 0.30776, "grad_norm": 1.943218469619751, "learning_rate": 7.846610150361761e-06, "loss": 0.4522, "step": 3847 }, { "epoch": 0.30784, "grad_norm": 2.0549914836883545, "learning_rate": 7.84557613350029e-06, "loss": 0.4285, "step": 3848 }, { "epoch": 0.30792, "grad_norm": 2.2861602306365967, "learning_rate": 7.844541936608738e-06, "loss": 0.5222, "step": 3849 }, { "epoch": 0.308, "grad_norm": 1.814911961555481, "learning_rate": 7.843507559752537e-06, "loss": 0.4032, "step": 3850 }, { "epoch": 0.30808, "grad_norm": 1.2270601987838745, "learning_rate": 7.842473002997127e-06, "loss": 0.2968, "step": 3851 }, { "epoch": 0.30816, "grad_norm": 1.692413091659546, "learning_rate": 7.841438266407959e-06, "loss": 0.4398, "step": 3852 }, { "epoch": 0.30824, "grad_norm": 1.7804385423660278, "learning_rate": 7.8404033500505e-06, "loss": 0.4585, "step": 3853 }, { "epoch": 0.30832, "grad_norm": 1.3115049600601196, "learning_rate": 7.839368253990224e-06, "loss": 0.2864, "step": 3854 }, { "epoch": 0.3084, "grad_norm": 1.490980625152588, "learning_rate": 7.838332978292622e-06, "loss": 0.3292, "step": 3855 }, { "epoch": 0.30848, "grad_norm": 1.78223717212677, "learning_rate": 7.837297523023189e-06, "loss": 0.4908, "step": 3856 }, { "epoch": 0.30856, "grad_norm": 1.4501593112945557, "learning_rate": 7.836261888247434e-06, "loss": 0.3176, "step": 3857 }, { "epoch": 0.30864, "grad_norm": 1.7654818296432495, "learning_rate": 7.83522607403088e-06, "loss": 0.3421, "step": 3858 }, { "epoch": 0.30872, "grad_norm": 1.4273486137390137, "learning_rate": 7.834190080439058e-06, "loss": 0.3251, "step": 3859 }, { "epoch": 0.3088, "grad_norm": 1.3740286827087402, "learning_rate": 7.833153907537515e-06, "loss": 0.3186, "step": 3860 }, { "epoch": 0.30888, "grad_norm": 1.4216959476470947, "learning_rate": 7.832117555391803e-06, "loss": 0.3298, "step": 3861 }, { "epoch": 0.30896, "grad_norm": 1.8324718475341797, "learning_rate": 7.831081024067489e-06, "loss": 0.4795, "step": 3862 }, { "epoch": 0.30904, "grad_norm": 1.479018211364746, "learning_rate": 7.83004431363015e-06, "loss": 0.32, "step": 3863 }, { "epoch": 0.30912, "grad_norm": 1.3187556266784668, "learning_rate": 7.829007424145379e-06, "loss": 0.2529, "step": 3864 }, { "epoch": 0.3092, "grad_norm": 1.7161333560943604, "learning_rate": 7.827970355678774e-06, "loss": 0.3463, "step": 3865 }, { "epoch": 0.30928, "grad_norm": 1.6725596189498901, "learning_rate": 7.826933108295947e-06, "loss": 0.4048, "step": 3866 }, { "epoch": 0.30936, "grad_norm": 1.5978217124938965, "learning_rate": 7.82589568206252e-06, "loss": 0.333, "step": 3867 }, { "epoch": 0.30944, "grad_norm": 1.6118669509887695, "learning_rate": 7.82485807704413e-06, "loss": 0.4502, "step": 3868 }, { "epoch": 0.30952, "grad_norm": 1.6804618835449219, "learning_rate": 7.823820293306421e-06, "loss": 0.2939, "step": 3869 }, { "epoch": 0.3096, "grad_norm": 1.3810745477676392, "learning_rate": 7.82278233091505e-06, "loss": 0.2853, "step": 3870 }, { "epoch": 0.30968, "grad_norm": 1.4669585227966309, "learning_rate": 7.821744189935687e-06, "loss": 0.3352, "step": 3871 }, { "epoch": 0.30976, "grad_norm": 1.5521767139434814, "learning_rate": 7.82070587043401e-06, "loss": 0.3828, "step": 3872 }, { "epoch": 0.30984, "grad_norm": 2.0385186672210693, "learning_rate": 7.81966737247571e-06, "loss": 0.4688, "step": 3873 }, { "epoch": 0.30992, "grad_norm": 1.3120101690292358, "learning_rate": 7.81862869612649e-06, "loss": 0.2976, "step": 3874 }, { "epoch": 0.31, "grad_norm": 1.5607187747955322, "learning_rate": 7.817589841452065e-06, "loss": 0.2853, "step": 3875 }, { "epoch": 0.31008, "grad_norm": 1.4162628650665283, "learning_rate": 7.816550808518158e-06, "loss": 0.3474, "step": 3876 }, { "epoch": 0.31016, "grad_norm": 1.9220107793807983, "learning_rate": 7.815511597390506e-06, "loss": 0.3522, "step": 3877 }, { "epoch": 0.31024, "grad_norm": 1.6877249479293823, "learning_rate": 7.814472208134855e-06, "loss": 0.331, "step": 3878 }, { "epoch": 0.31032, "grad_norm": 1.6581977605819702, "learning_rate": 7.813432640816965e-06, "loss": 0.4292, "step": 3879 }, { "epoch": 0.3104, "grad_norm": 1.860425591468811, "learning_rate": 7.812392895502605e-06, "loss": 0.4297, "step": 3880 }, { "epoch": 0.31048, "grad_norm": 1.4564399719238281, "learning_rate": 7.811352972257558e-06, "loss": 0.3113, "step": 3881 }, { "epoch": 0.31056, "grad_norm": 1.661933183670044, "learning_rate": 7.810312871147614e-06, "loss": 0.381, "step": 3882 }, { "epoch": 0.31064, "grad_norm": 1.430387258529663, "learning_rate": 7.809272592238579e-06, "loss": 0.2592, "step": 3883 }, { "epoch": 0.31072, "grad_norm": 1.3695735931396484, "learning_rate": 7.808232135596267e-06, "loss": 0.3324, "step": 3884 }, { "epoch": 0.3108, "grad_norm": 1.8727529048919678, "learning_rate": 7.807191501286505e-06, "loss": 0.4287, "step": 3885 }, { "epoch": 0.31088, "grad_norm": 1.6684083938598633, "learning_rate": 7.806150689375126e-06, "loss": 0.3635, "step": 3886 }, { "epoch": 0.31096, "grad_norm": 1.6127012968063354, "learning_rate": 7.805109699927985e-06, "loss": 0.3655, "step": 3887 }, { "epoch": 0.31104, "grad_norm": 1.7159547805786133, "learning_rate": 7.804068533010938e-06, "loss": 0.3361, "step": 3888 }, { "epoch": 0.31112, "grad_norm": 1.4053112268447876, "learning_rate": 7.803027188689859e-06, "loss": 0.3538, "step": 3889 }, { "epoch": 0.3112, "grad_norm": 1.524722695350647, "learning_rate": 7.801985667030628e-06, "loss": 0.4273, "step": 3890 }, { "epoch": 0.31128, "grad_norm": 1.4059796333312988, "learning_rate": 7.800943968099139e-06, "loss": 0.3329, "step": 3891 }, { "epoch": 0.31136, "grad_norm": 1.8556325435638428, "learning_rate": 7.799902091961298e-06, "loss": 0.4473, "step": 3892 }, { "epoch": 0.31144, "grad_norm": 1.5466563701629639, "learning_rate": 7.798860038683019e-06, "loss": 0.3742, "step": 3893 }, { "epoch": 0.31152, "grad_norm": 1.2450904846191406, "learning_rate": 7.79781780833023e-06, "loss": 0.2928, "step": 3894 }, { "epoch": 0.3116, "grad_norm": 1.7872172594070435, "learning_rate": 7.796775400968871e-06, "loss": 0.415, "step": 3895 }, { "epoch": 0.31168, "grad_norm": 1.6194071769714355, "learning_rate": 7.79573281666489e-06, "loss": 0.3123, "step": 3896 }, { "epoch": 0.31176, "grad_norm": 1.4827814102172852, "learning_rate": 7.794690055484249e-06, "loss": 0.2888, "step": 3897 }, { "epoch": 0.31184, "grad_norm": 1.2528879642486572, "learning_rate": 7.793647117492919e-06, "loss": 0.2737, "step": 3898 }, { "epoch": 0.31192, "grad_norm": 1.405361294746399, "learning_rate": 7.792604002756882e-06, "loss": 0.3119, "step": 3899 }, { "epoch": 0.312, "grad_norm": 1.7106093168258667, "learning_rate": 7.791560711342134e-06, "loss": 0.3068, "step": 3900 }, { "epoch": 0.31208, "grad_norm": 2.097712755203247, "learning_rate": 7.790517243314682e-06, "loss": 0.5649, "step": 3901 }, { "epoch": 0.31216, "grad_norm": 1.1007894277572632, "learning_rate": 7.78947359874054e-06, "loss": 0.2437, "step": 3902 }, { "epoch": 0.31224, "grad_norm": 1.5791645050048828, "learning_rate": 7.788429777685737e-06, "loss": 0.3363, "step": 3903 }, { "epoch": 0.31232, "grad_norm": 1.4127541780471802, "learning_rate": 7.787385780216313e-06, "loss": 0.3199, "step": 3904 }, { "epoch": 0.3124, "grad_norm": 1.4177106618881226, "learning_rate": 7.786341606398318e-06, "loss": 0.3293, "step": 3905 }, { "epoch": 0.31248, "grad_norm": 1.3515725135803223, "learning_rate": 7.785297256297811e-06, "loss": 0.2774, "step": 3906 }, { "epoch": 0.31256, "grad_norm": 1.4087157249450684, "learning_rate": 7.784252729980866e-06, "loss": 0.3314, "step": 3907 }, { "epoch": 0.31264, "grad_norm": 1.7299154996871948, "learning_rate": 7.783208027513569e-06, "loss": 0.3618, "step": 3908 }, { "epoch": 0.31272, "grad_norm": 1.4551969766616821, "learning_rate": 7.78216314896201e-06, "loss": 0.3251, "step": 3909 }, { "epoch": 0.3128, "grad_norm": 1.9065160751342773, "learning_rate": 7.7811180943923e-06, "loss": 0.5049, "step": 3910 }, { "epoch": 0.31288, "grad_norm": 1.8001066446304321, "learning_rate": 7.780072863870551e-06, "loss": 0.4066, "step": 3911 }, { "epoch": 0.31296, "grad_norm": 1.3444501161575317, "learning_rate": 7.779027457462896e-06, "loss": 0.3117, "step": 3912 }, { "epoch": 0.31304, "grad_norm": 1.6955708265304565, "learning_rate": 7.777981875235472e-06, "loss": 0.4009, "step": 3913 }, { "epoch": 0.31312, "grad_norm": 2.015012264251709, "learning_rate": 7.776936117254432e-06, "loss": 0.4429, "step": 3914 }, { "epoch": 0.3132, "grad_norm": 1.5816140174865723, "learning_rate": 7.775890183585932e-06, "loss": 0.3805, "step": 3915 }, { "epoch": 0.31328, "grad_norm": 1.2359122037887573, "learning_rate": 7.77484407429615e-06, "loss": 0.2843, "step": 3916 }, { "epoch": 0.31336, "grad_norm": 1.453830361366272, "learning_rate": 7.77379778945127e-06, "loss": 0.3051, "step": 3917 }, { "epoch": 0.31344, "grad_norm": 1.4686434268951416, "learning_rate": 7.772751329117481e-06, "loss": 0.3015, "step": 3918 }, { "epoch": 0.31352, "grad_norm": 1.454747200012207, "learning_rate": 7.771704693360996e-06, "loss": 0.3953, "step": 3919 }, { "epoch": 0.3136, "grad_norm": 1.7359517812728882, "learning_rate": 7.77065788224803e-06, "loss": 0.2675, "step": 3920 }, { "epoch": 0.31368, "grad_norm": 1.285766839981079, "learning_rate": 7.769610895844808e-06, "loss": 0.2614, "step": 3921 }, { "epoch": 0.31376, "grad_norm": 1.5808966159820557, "learning_rate": 7.768563734217572e-06, "loss": 0.2836, "step": 3922 }, { "epoch": 0.31384, "grad_norm": 1.6945921182632446, "learning_rate": 7.767516397432574e-06, "loss": 0.3945, "step": 3923 }, { "epoch": 0.31392, "grad_norm": 1.5754956007003784, "learning_rate": 7.766468885556072e-06, "loss": 0.3035, "step": 3924 }, { "epoch": 0.314, "grad_norm": 1.4004344940185547, "learning_rate": 7.76542119865434e-06, "loss": 0.3592, "step": 3925 }, { "epoch": 0.31408, "grad_norm": 1.5656622648239136, "learning_rate": 7.764373336793662e-06, "loss": 0.3183, "step": 3926 }, { "epoch": 0.31416, "grad_norm": 1.9068760871887207, "learning_rate": 7.763325300040333e-06, "loss": 0.3495, "step": 3927 }, { "epoch": 0.31424, "grad_norm": 1.5390478372573853, "learning_rate": 7.762277088460659e-06, "loss": 0.3439, "step": 3928 }, { "epoch": 0.31432, "grad_norm": 1.4874063730239868, "learning_rate": 7.761228702120956e-06, "loss": 0.3257, "step": 3929 }, { "epoch": 0.3144, "grad_norm": 1.8362594842910767, "learning_rate": 7.760180141087552e-06, "loss": 0.5189, "step": 3930 }, { "epoch": 0.31448, "grad_norm": 1.7211016416549683, "learning_rate": 7.759131405426785e-06, "loss": 0.3418, "step": 3931 }, { "epoch": 0.31456, "grad_norm": 1.237994909286499, "learning_rate": 7.758082495205007e-06, "loss": 0.2734, "step": 3932 }, { "epoch": 0.31464, "grad_norm": 1.7199347019195557, "learning_rate": 7.757033410488577e-06, "loss": 0.4034, "step": 3933 }, { "epoch": 0.31472, "grad_norm": 1.5101882219314575, "learning_rate": 7.75598415134387e-06, "loss": 0.288, "step": 3934 }, { "epoch": 0.3148, "grad_norm": 1.4008091688156128, "learning_rate": 7.754934717837267e-06, "loss": 0.3252, "step": 3935 }, { "epoch": 0.31488, "grad_norm": 1.382588505744934, "learning_rate": 7.753885110035161e-06, "loss": 0.2844, "step": 3936 }, { "epoch": 0.31496, "grad_norm": 1.5880348682403564, "learning_rate": 7.75283532800396e-06, "loss": 0.2899, "step": 3937 }, { "epoch": 0.31504, "grad_norm": 1.820833444595337, "learning_rate": 7.751785371810079e-06, "loss": 0.4055, "step": 3938 }, { "epoch": 0.31512, "grad_norm": 1.5325983762741089, "learning_rate": 7.750735241519945e-06, "loss": 0.3185, "step": 3939 }, { "epoch": 0.3152, "grad_norm": 1.4334882497787476, "learning_rate": 7.749684937199995e-06, "loss": 0.3512, "step": 3940 }, { "epoch": 0.31528, "grad_norm": 1.317023754119873, "learning_rate": 7.74863445891668e-06, "loss": 0.2794, "step": 3941 }, { "epoch": 0.31536, "grad_norm": 1.646425724029541, "learning_rate": 7.74758380673646e-06, "loss": 0.4987, "step": 3942 }, { "epoch": 0.31544, "grad_norm": 1.4610693454742432, "learning_rate": 7.746532980725806e-06, "loss": 0.3181, "step": 3943 }, { "epoch": 0.31552, "grad_norm": 1.675247311592102, "learning_rate": 7.7454819809512e-06, "loss": 0.3994, "step": 3944 }, { "epoch": 0.3156, "grad_norm": 1.453763723373413, "learning_rate": 7.744430807479136e-06, "loss": 0.3001, "step": 3945 }, { "epoch": 0.31568, "grad_norm": 1.418670415878296, "learning_rate": 7.743379460376117e-06, "loss": 0.2949, "step": 3946 }, { "epoch": 0.31576, "grad_norm": 1.5007295608520508, "learning_rate": 7.74232793970866e-06, "loss": 0.4157, "step": 3947 }, { "epoch": 0.31584, "grad_norm": 1.3415838479995728, "learning_rate": 7.741276245543287e-06, "loss": 0.3165, "step": 3948 }, { "epoch": 0.31592, "grad_norm": 1.6637834310531616, "learning_rate": 7.740224377946543e-06, "loss": 0.419, "step": 3949 }, { "epoch": 0.316, "grad_norm": 1.9902657270431519, "learning_rate": 7.739172336984969e-06, "loss": 0.3941, "step": 3950 }, { "epoch": 0.31608, "grad_norm": 1.3266291618347168, "learning_rate": 7.738120122725126e-06, "loss": 0.3171, "step": 3951 }, { "epoch": 0.31616, "grad_norm": 1.462837815284729, "learning_rate": 7.737067735233586e-06, "loss": 0.3602, "step": 3952 }, { "epoch": 0.31624, "grad_norm": 1.3161622285842896, "learning_rate": 7.736015174576927e-06, "loss": 0.2974, "step": 3953 }, { "epoch": 0.31632, "grad_norm": 1.4802780151367188, "learning_rate": 7.734962440821742e-06, "loss": 0.3562, "step": 3954 }, { "epoch": 0.3164, "grad_norm": 1.5523854494094849, "learning_rate": 7.733909534034638e-06, "loss": 0.3115, "step": 3955 }, { "epoch": 0.31648, "grad_norm": 1.815574288368225, "learning_rate": 7.732856454282223e-06, "loss": 0.3293, "step": 3956 }, { "epoch": 0.31656, "grad_norm": 2.250521183013916, "learning_rate": 7.731803201631125e-06, "loss": 0.597, "step": 3957 }, { "epoch": 0.31664, "grad_norm": 1.4423999786376953, "learning_rate": 7.730749776147978e-06, "loss": 0.3607, "step": 3958 }, { "epoch": 0.31672, "grad_norm": 1.7350084781646729, "learning_rate": 7.729696177899432e-06, "loss": 0.4186, "step": 3959 }, { "epoch": 0.3168, "grad_norm": 1.1580049991607666, "learning_rate": 7.72864240695214e-06, "loss": 0.2433, "step": 3960 }, { "epoch": 0.31688, "grad_norm": 1.309934139251709, "learning_rate": 7.727588463372775e-06, "loss": 0.2844, "step": 3961 }, { "epoch": 0.31696, "grad_norm": 1.4602081775665283, "learning_rate": 7.726534347228013e-06, "loss": 0.4136, "step": 3962 }, { "epoch": 0.31704, "grad_norm": 1.3389019966125488, "learning_rate": 7.725480058584547e-06, "loss": 0.2797, "step": 3963 }, { "epoch": 0.31712, "grad_norm": 1.8739509582519531, "learning_rate": 7.724425597509077e-06, "loss": 0.4236, "step": 3964 }, { "epoch": 0.3172, "grad_norm": 0.9077721834182739, "learning_rate": 7.723370964068315e-06, "loss": 0.2111, "step": 3965 }, { "epoch": 0.31728, "grad_norm": 1.4916582107543945, "learning_rate": 7.722316158328983e-06, "loss": 0.3819, "step": 3966 }, { "epoch": 0.31736, "grad_norm": 1.4052239656448364, "learning_rate": 7.72126118035782e-06, "loss": 0.2928, "step": 3967 }, { "epoch": 0.31744, "grad_norm": 1.968377947807312, "learning_rate": 7.720206030221567e-06, "loss": 0.3799, "step": 3968 }, { "epoch": 0.31752, "grad_norm": 1.677701473236084, "learning_rate": 7.719150707986977e-06, "loss": 0.4268, "step": 3969 }, { "epoch": 0.3176, "grad_norm": 1.2914515733718872, "learning_rate": 7.718095213720822e-06, "loss": 0.2356, "step": 3970 }, { "epoch": 0.31768, "grad_norm": 1.7517569065093994, "learning_rate": 7.717039547489877e-06, "loss": 0.3818, "step": 3971 }, { "epoch": 0.31776, "grad_norm": 1.708135962486267, "learning_rate": 7.715983709360932e-06, "loss": 0.3412, "step": 3972 }, { "epoch": 0.31784, "grad_norm": 1.5727461576461792, "learning_rate": 7.714927699400785e-06, "loss": 0.3977, "step": 3973 }, { "epoch": 0.31792, "grad_norm": 1.4590837955474854, "learning_rate": 7.713871517676247e-06, "loss": 0.3593, "step": 3974 }, { "epoch": 0.318, "grad_norm": 1.291216492652893, "learning_rate": 7.712815164254138e-06, "loss": 0.2984, "step": 3975 }, { "epoch": 0.31808, "grad_norm": 2.102529525756836, "learning_rate": 7.71175863920129e-06, "loss": 0.392, "step": 3976 }, { "epoch": 0.31816, "grad_norm": 2.003671169281006, "learning_rate": 7.710701942584549e-06, "loss": 0.3816, "step": 3977 }, { "epoch": 0.31824, "grad_norm": 1.6712678670883179, "learning_rate": 7.709645074470764e-06, "loss": 0.3481, "step": 3978 }, { "epoch": 0.31832, "grad_norm": 1.2910213470458984, "learning_rate": 7.708588034926802e-06, "loss": 0.2783, "step": 3979 }, { "epoch": 0.3184, "grad_norm": 2.094609260559082, "learning_rate": 7.70753082401954e-06, "loss": 0.6013, "step": 3980 }, { "epoch": 0.31848, "grad_norm": 1.9006894826889038, "learning_rate": 7.70647344181586e-06, "loss": 0.3544, "step": 3981 }, { "epoch": 0.31856, "grad_norm": 1.320239543914795, "learning_rate": 7.705415888382662e-06, "loss": 0.3105, "step": 3982 }, { "epoch": 0.31864, "grad_norm": 1.9996472597122192, "learning_rate": 7.704358163786852e-06, "loss": 0.4681, "step": 3983 }, { "epoch": 0.31872, "grad_norm": 1.3153249025344849, "learning_rate": 7.70330026809535e-06, "loss": 0.2759, "step": 3984 }, { "epoch": 0.3188, "grad_norm": 1.4559813737869263, "learning_rate": 7.702242201375087e-06, "loss": 0.3541, "step": 3985 }, { "epoch": 0.31888, "grad_norm": 1.5201846361160278, "learning_rate": 7.701183963693e-06, "loss": 0.323, "step": 3986 }, { "epoch": 0.31896, "grad_norm": 1.1571747064590454, "learning_rate": 7.700125555116043e-06, "loss": 0.2244, "step": 3987 }, { "epoch": 0.31904, "grad_norm": 1.2063324451446533, "learning_rate": 7.699066975711176e-06, "loss": 0.3063, "step": 3988 }, { "epoch": 0.31912, "grad_norm": 1.5614107847213745, "learning_rate": 7.698008225545372e-06, "loss": 0.3284, "step": 3989 }, { "epoch": 0.3192, "grad_norm": 1.713163137435913, "learning_rate": 7.696949304685616e-06, "loss": 0.384, "step": 3990 }, { "epoch": 0.31928, "grad_norm": 1.4818403720855713, "learning_rate": 7.695890213198902e-06, "loss": 0.3769, "step": 3991 }, { "epoch": 0.31936, "grad_norm": 1.8636175394058228, "learning_rate": 7.694830951152233e-06, "loss": 0.3745, "step": 3992 }, { "epoch": 0.31944, "grad_norm": 1.5398733615875244, "learning_rate": 7.693771518612627e-06, "loss": 0.3747, "step": 3993 }, { "epoch": 0.31952, "grad_norm": 1.2448610067367554, "learning_rate": 7.69271191564711e-06, "loss": 0.2528, "step": 3994 }, { "epoch": 0.3196, "grad_norm": 1.3927911520004272, "learning_rate": 7.691652142322723e-06, "loss": 0.3104, "step": 3995 }, { "epoch": 0.31968, "grad_norm": 1.3788466453552246, "learning_rate": 7.690592198706508e-06, "loss": 0.3017, "step": 3996 }, { "epoch": 0.31976, "grad_norm": 1.8893440961837769, "learning_rate": 7.68953208486553e-06, "loss": 0.4398, "step": 3997 }, { "epoch": 0.31984, "grad_norm": 1.4432144165039062, "learning_rate": 7.688471800866855e-06, "loss": 0.3526, "step": 3998 }, { "epoch": 0.31992, "grad_norm": 1.7225422859191895, "learning_rate": 7.687411346777564e-06, "loss": 0.3249, "step": 3999 }, { "epoch": 0.32, "grad_norm": 1.5686019659042358, "learning_rate": 7.686350722664751e-06, "loss": 0.2928, "step": 4000 }, { "epoch": 0.32008, "grad_norm": 1.5652544498443604, "learning_rate": 7.685289928595514e-06, "loss": 0.3355, "step": 4001 }, { "epoch": 0.32016, "grad_norm": 1.6727375984191895, "learning_rate": 7.68422896463697e-06, "loss": 0.3482, "step": 4002 }, { "epoch": 0.32024, "grad_norm": 1.4307200908660889, "learning_rate": 7.68316783085624e-06, "loss": 0.34, "step": 4003 }, { "epoch": 0.32032, "grad_norm": 1.5332558155059814, "learning_rate": 7.682106527320458e-06, "loss": 0.3796, "step": 4004 }, { "epoch": 0.3204, "grad_norm": 1.9234296083450317, "learning_rate": 7.681045054096773e-06, "loss": 0.3732, "step": 4005 }, { "epoch": 0.32048, "grad_norm": 1.8026024103164673, "learning_rate": 7.679983411252336e-06, "loss": 0.3543, "step": 4006 }, { "epoch": 0.32056, "grad_norm": 1.5684106349945068, "learning_rate": 7.678921598854316e-06, "loss": 0.3274, "step": 4007 }, { "epoch": 0.32064, "grad_norm": 1.4080421924591064, "learning_rate": 7.677859616969892e-06, "loss": 0.346, "step": 4008 }, { "epoch": 0.32072, "grad_norm": 1.667383074760437, "learning_rate": 7.67679746566625e-06, "loss": 0.3846, "step": 4009 }, { "epoch": 0.3208, "grad_norm": 1.5758278369903564, "learning_rate": 7.67573514501059e-06, "loss": 0.396, "step": 4010 }, { "epoch": 0.32088, "grad_norm": 1.7792264223098755, "learning_rate": 7.674672655070117e-06, "loss": 0.441, "step": 4011 }, { "epoch": 0.32096, "grad_norm": 1.3458983898162842, "learning_rate": 7.673609995912056e-06, "loss": 0.2971, "step": 4012 }, { "epoch": 0.32104, "grad_norm": 1.343664526939392, "learning_rate": 7.672547167603638e-06, "loss": 0.3028, "step": 4013 }, { "epoch": 0.32112, "grad_norm": 1.9842946529388428, "learning_rate": 7.6714841702121e-06, "loss": 0.3945, "step": 4014 }, { "epoch": 0.3212, "grad_norm": 1.2746963500976562, "learning_rate": 7.6704210038047e-06, "loss": 0.2742, "step": 4015 }, { "epoch": 0.32128, "grad_norm": 1.8831450939178467, "learning_rate": 7.669357668448695e-06, "loss": 0.3977, "step": 4016 }, { "epoch": 0.32136, "grad_norm": 1.4678828716278076, "learning_rate": 7.668294164211365e-06, "loss": 0.3188, "step": 4017 }, { "epoch": 0.32144, "grad_norm": 2.1258130073547363, "learning_rate": 7.667230491159992e-06, "loss": 0.3754, "step": 4018 }, { "epoch": 0.32152, "grad_norm": 2.3288698196411133, "learning_rate": 7.666166649361868e-06, "loss": 0.6207, "step": 4019 }, { "epoch": 0.3216, "grad_norm": 1.5426217317581177, "learning_rate": 7.665102638884303e-06, "loss": 0.331, "step": 4020 }, { "epoch": 0.32168, "grad_norm": 1.523314356803894, "learning_rate": 7.664038459794612e-06, "loss": 0.3148, "step": 4021 }, { "epoch": 0.32176, "grad_norm": 1.3950161933898926, "learning_rate": 7.66297411216012e-06, "loss": 0.2575, "step": 4022 }, { "epoch": 0.32184, "grad_norm": 1.7832293510437012, "learning_rate": 7.661909596048167e-06, "loss": 0.3661, "step": 4023 }, { "epoch": 0.32192, "grad_norm": 1.4251043796539307, "learning_rate": 7.6608449115261e-06, "loss": 0.3476, "step": 4024 }, { "epoch": 0.322, "grad_norm": 1.7455947399139404, "learning_rate": 7.659780058661278e-06, "loss": 0.4794, "step": 4025 }, { "epoch": 0.32208, "grad_norm": 1.5899649858474731, "learning_rate": 7.658715037521074e-06, "loss": 0.3369, "step": 4026 }, { "epoch": 0.32216, "grad_norm": 1.148187279701233, "learning_rate": 7.657649848172864e-06, "loss": 0.2667, "step": 4027 }, { "epoch": 0.32224, "grad_norm": 1.3705857992172241, "learning_rate": 7.65658449068404e-06, "loss": 0.2696, "step": 4028 }, { "epoch": 0.32232, "grad_norm": 1.8258371353149414, "learning_rate": 7.655518965122004e-06, "loss": 0.3487, "step": 4029 }, { "epoch": 0.3224, "grad_norm": 1.5610127449035645, "learning_rate": 7.65445327155417e-06, "loss": 0.353, "step": 4030 }, { "epoch": 0.32248, "grad_norm": 1.566694974899292, "learning_rate": 7.653387410047958e-06, "loss": 0.3971, "step": 4031 }, { "epoch": 0.32256, "grad_norm": 1.6996960639953613, "learning_rate": 7.652321380670805e-06, "loss": 0.3876, "step": 4032 }, { "epoch": 0.32264, "grad_norm": 1.460684061050415, "learning_rate": 7.65125518349015e-06, "loss": 0.3302, "step": 4033 }, { "epoch": 0.32272, "grad_norm": 1.5468209981918335, "learning_rate": 7.650188818573452e-06, "loss": 0.3631, "step": 4034 }, { "epoch": 0.3228, "grad_norm": 1.5066543817520142, "learning_rate": 7.649122285988175e-06, "loss": 0.3756, "step": 4035 }, { "epoch": 0.32288, "grad_norm": 1.4689096212387085, "learning_rate": 7.648055585801795e-06, "loss": 0.3488, "step": 4036 }, { "epoch": 0.32296, "grad_norm": 1.2508280277252197, "learning_rate": 7.646988718081799e-06, "loss": 0.2692, "step": 4037 }, { "epoch": 0.32304, "grad_norm": 1.4631690979003906, "learning_rate": 7.645921682895684e-06, "loss": 0.3593, "step": 4038 }, { "epoch": 0.32312, "grad_norm": 1.4271583557128906, "learning_rate": 7.644854480310955e-06, "loss": 0.3081, "step": 4039 }, { "epoch": 0.3232, "grad_norm": 1.613905906677246, "learning_rate": 7.643787110395135e-06, "loss": 0.4458, "step": 4040 }, { "epoch": 0.32328, "grad_norm": 1.3938994407653809, "learning_rate": 7.642719573215748e-06, "loss": 0.3038, "step": 4041 }, { "epoch": 0.32336, "grad_norm": 1.4477261304855347, "learning_rate": 7.641651868840338e-06, "loss": 0.3112, "step": 4042 }, { "epoch": 0.32344, "grad_norm": 1.692185401916504, "learning_rate": 7.640583997336455e-06, "loss": 0.4446, "step": 4043 }, { "epoch": 0.32352, "grad_norm": 1.4064701795578003, "learning_rate": 7.639515958771656e-06, "loss": 0.2536, "step": 4044 }, { "epoch": 0.3236, "grad_norm": 1.7881546020507812, "learning_rate": 7.638447753213516e-06, "loss": 0.4048, "step": 4045 }, { "epoch": 0.32368, "grad_norm": 1.3405356407165527, "learning_rate": 7.637379380729612e-06, "loss": 0.2965, "step": 4046 }, { "epoch": 0.32376, "grad_norm": 1.182118535041809, "learning_rate": 7.636310841387541e-06, "loss": 0.2477, "step": 4047 }, { "epoch": 0.32384, "grad_norm": 1.4148246049880981, "learning_rate": 7.635242135254903e-06, "loss": 0.2628, "step": 4048 }, { "epoch": 0.32392, "grad_norm": 1.527912974357605, "learning_rate": 7.634173262399315e-06, "loss": 0.3746, "step": 4049 }, { "epoch": 0.324, "grad_norm": 1.4282273054122925, "learning_rate": 7.633104222888396e-06, "loss": 0.2963, "step": 4050 }, { "epoch": 0.32408, "grad_norm": 1.5053621530532837, "learning_rate": 7.632035016789785e-06, "loss": 0.3858, "step": 4051 }, { "epoch": 0.32416, "grad_norm": 1.633727788925171, "learning_rate": 7.630965644171124e-06, "loss": 0.4096, "step": 4052 }, { "epoch": 0.32424, "grad_norm": 1.6081035137176514, "learning_rate": 7.62989610510007e-06, "loss": 0.296, "step": 4053 }, { "epoch": 0.32432, "grad_norm": 1.2334622144699097, "learning_rate": 7.628826399644292e-06, "loss": 0.2918, "step": 4054 }, { "epoch": 0.3244, "grad_norm": 1.6431307792663574, "learning_rate": 7.627756527871459e-06, "loss": 0.3395, "step": 4055 }, { "epoch": 0.32448, "grad_norm": 1.6120516061782837, "learning_rate": 7.626686489849266e-06, "loss": 0.3045, "step": 4056 }, { "epoch": 0.32456, "grad_norm": 1.4079047441482544, "learning_rate": 7.625616285645408e-06, "loss": 0.316, "step": 4057 }, { "epoch": 0.32464, "grad_norm": 1.4125021696090698, "learning_rate": 7.624545915327593e-06, "loss": 0.3104, "step": 4058 }, { "epoch": 0.32472, "grad_norm": 1.4613399505615234, "learning_rate": 7.6234753789635374e-06, "loss": 0.4579, "step": 4059 }, { "epoch": 0.3248, "grad_norm": 1.8771039247512817, "learning_rate": 7.622404676620974e-06, "loss": 0.3648, "step": 4060 }, { "epoch": 0.32488, "grad_norm": 1.4824306964874268, "learning_rate": 7.621333808367643e-06, "loss": 0.2957, "step": 4061 }, { "epoch": 0.32496, "grad_norm": 1.4490859508514404, "learning_rate": 7.62026277427129e-06, "loss": 0.2975, "step": 4062 }, { "epoch": 0.32504, "grad_norm": 1.4416332244873047, "learning_rate": 7.61919157439968e-06, "loss": 0.4024, "step": 4063 }, { "epoch": 0.32512, "grad_norm": 1.0941718816757202, "learning_rate": 7.618120208820583e-06, "loss": 0.2395, "step": 4064 }, { "epoch": 0.3252, "grad_norm": 1.8790380954742432, "learning_rate": 7.617048677601781e-06, "loss": 0.4126, "step": 4065 }, { "epoch": 0.32528, "grad_norm": 1.4821059703826904, "learning_rate": 7.615976980811067e-06, "loss": 0.3245, "step": 4066 }, { "epoch": 0.32536, "grad_norm": 1.9037108421325684, "learning_rate": 7.61490511851624e-06, "loss": 0.4843, "step": 4067 }, { "epoch": 0.32544, "grad_norm": 1.4991943836212158, "learning_rate": 7.613833090785117e-06, "loss": 0.3384, "step": 4068 }, { "epoch": 0.32552, "grad_norm": 1.4513674974441528, "learning_rate": 7.612760897685519e-06, "loss": 0.3904, "step": 4069 }, { "epoch": 0.3256, "grad_norm": 1.277267575263977, "learning_rate": 7.611688539285283e-06, "loss": 0.2926, "step": 4070 }, { "epoch": 0.32568, "grad_norm": 1.3457417488098145, "learning_rate": 7.61061601565225e-06, "loss": 0.2733, "step": 4071 }, { "epoch": 0.32576, "grad_norm": 1.9008210897445679, "learning_rate": 7.609543326854278e-06, "loss": 0.4293, "step": 4072 }, { "epoch": 0.32584, "grad_norm": 1.4903104305267334, "learning_rate": 7.608470472959233e-06, "loss": 0.3002, "step": 4073 }, { "epoch": 0.32592, "grad_norm": 1.4304206371307373, "learning_rate": 7.607397454034986e-06, "loss": 0.3073, "step": 4074 }, { "epoch": 0.326, "grad_norm": 1.7637519836425781, "learning_rate": 7.606324270149428e-06, "loss": 0.3062, "step": 4075 }, { "epoch": 0.32608, "grad_norm": 1.7573155164718628, "learning_rate": 7.605250921370454e-06, "loss": 0.3944, "step": 4076 }, { "epoch": 0.32616, "grad_norm": 1.3438637256622314, "learning_rate": 7.604177407765972e-06, "loss": 0.2591, "step": 4077 }, { "epoch": 0.32624, "grad_norm": 1.9403959512710571, "learning_rate": 7.603103729403898e-06, "loss": 0.5459, "step": 4078 }, { "epoch": 0.32632, "grad_norm": 1.421299934387207, "learning_rate": 7.602029886352163e-06, "loss": 0.3475, "step": 4079 }, { "epoch": 0.3264, "grad_norm": 1.635377287864685, "learning_rate": 7.600955878678702e-06, "loss": 0.3548, "step": 4080 }, { "epoch": 0.32648, "grad_norm": 1.75666344165802, "learning_rate": 7.599881706451464e-06, "loss": 0.5445, "step": 4081 }, { "epoch": 0.32656, "grad_norm": 1.7399910688400269, "learning_rate": 7.598807369738411e-06, "loss": 0.3803, "step": 4082 }, { "epoch": 0.32664, "grad_norm": 1.6762953996658325, "learning_rate": 7.597732868607511e-06, "loss": 0.3633, "step": 4083 }, { "epoch": 0.32672, "grad_norm": 1.6950074434280396, "learning_rate": 7.5966582031267455e-06, "loss": 0.4613, "step": 4084 }, { "epoch": 0.3268, "grad_norm": 1.7901536226272583, "learning_rate": 7.595583373364104e-06, "loss": 0.3687, "step": 4085 }, { "epoch": 0.32688, "grad_norm": 1.0144829750061035, "learning_rate": 7.594508379387586e-06, "loss": 0.2249, "step": 4086 }, { "epoch": 0.32696, "grad_norm": 1.360308051109314, "learning_rate": 7.593433221265205e-06, "loss": 0.3309, "step": 4087 }, { "epoch": 0.32704, "grad_norm": 1.6082754135131836, "learning_rate": 7.59235789906498e-06, "loss": 0.3511, "step": 4088 }, { "epoch": 0.32712, "grad_norm": 1.4917949438095093, "learning_rate": 7.591282412854947e-06, "loss": 0.3855, "step": 4089 }, { "epoch": 0.3272, "grad_norm": 1.895923376083374, "learning_rate": 7.590206762703145e-06, "loss": 0.3869, "step": 4090 }, { "epoch": 0.32728, "grad_norm": 1.37894606590271, "learning_rate": 7.58913094867763e-06, "loss": 0.3502, "step": 4091 }, { "epoch": 0.32736, "grad_norm": 1.5004284381866455, "learning_rate": 7.588054970846461e-06, "loss": 0.3698, "step": 4092 }, { "epoch": 0.32744, "grad_norm": 1.6978641748428345, "learning_rate": 7.586978829277714e-06, "loss": 0.3473, "step": 4093 }, { "epoch": 0.32752, "grad_norm": 1.7193430662155151, "learning_rate": 7.5859025240394725e-06, "loss": 0.3368, "step": 4094 }, { "epoch": 0.3276, "grad_norm": 1.4797018766403198, "learning_rate": 7.584826055199831e-06, "loss": 0.2682, "step": 4095 }, { "epoch": 0.32768, "grad_norm": 1.4176084995269775, "learning_rate": 7.583749422826894e-06, "loss": 0.2922, "step": 4096 }, { "epoch": 0.32776, "grad_norm": 1.4341297149658203, "learning_rate": 7.582672626988776e-06, "loss": 0.3214, "step": 4097 }, { "epoch": 0.32784, "grad_norm": 1.48208749294281, "learning_rate": 7.581595667753603e-06, "loss": 0.3318, "step": 4098 }, { "epoch": 0.32792, "grad_norm": 1.4464733600616455, "learning_rate": 7.580518545189509e-06, "loss": 0.358, "step": 4099 }, { "epoch": 0.328, "grad_norm": 1.890705943107605, "learning_rate": 7.579441259364643e-06, "loss": 0.5347, "step": 4100 }, { "epoch": 0.32808, "grad_norm": 1.4273359775543213, "learning_rate": 7.578363810347158e-06, "loss": 0.3098, "step": 4101 }, { "epoch": 0.32816, "grad_norm": 1.5816519260406494, "learning_rate": 7.577286198205223e-06, "loss": 0.3446, "step": 4102 }, { "epoch": 0.32824, "grad_norm": 1.684738278388977, "learning_rate": 7.576208423007012e-06, "loss": 0.4043, "step": 4103 }, { "epoch": 0.32832, "grad_norm": 1.6417845487594604, "learning_rate": 7.575130484820715e-06, "loss": 0.3435, "step": 4104 }, { "epoch": 0.3284, "grad_norm": 1.5224345922470093, "learning_rate": 7.57405238371453e-06, "loss": 0.4164, "step": 4105 }, { "epoch": 0.32848, "grad_norm": 1.3996219635009766, "learning_rate": 7.5729741197566604e-06, "loss": 0.3818, "step": 4106 }, { "epoch": 0.32856, "grad_norm": 1.8475956916809082, "learning_rate": 7.571895693015329e-06, "loss": 0.5135, "step": 4107 }, { "epoch": 0.32864, "grad_norm": 2.0243968963623047, "learning_rate": 7.570817103558764e-06, "loss": 0.3888, "step": 4108 }, { "epoch": 0.32872, "grad_norm": 2.109097480773926, "learning_rate": 7.569738351455203e-06, "loss": 0.3905, "step": 4109 }, { "epoch": 0.3288, "grad_norm": 1.4075511693954468, "learning_rate": 7.568659436772892e-06, "loss": 0.2856, "step": 4110 }, { "epoch": 0.32888, "grad_norm": 1.384484052658081, "learning_rate": 7.567580359580095e-06, "loss": 0.3404, "step": 4111 }, { "epoch": 0.32896, "grad_norm": 1.7616000175476074, "learning_rate": 7.56650111994508e-06, "loss": 0.3693, "step": 4112 }, { "epoch": 0.32904, "grad_norm": 1.8043757677078247, "learning_rate": 7.565421717936127e-06, "loss": 0.3948, "step": 4113 }, { "epoch": 0.32912, "grad_norm": 1.4485535621643066, "learning_rate": 7.564342153621525e-06, "loss": 0.3289, "step": 4114 }, { "epoch": 0.3292, "grad_norm": 1.7356315851211548, "learning_rate": 7.5632624270695755e-06, "loss": 0.3701, "step": 4115 }, { "epoch": 0.32928, "grad_norm": 1.4794209003448486, "learning_rate": 7.562182538348589e-06, "loss": 0.3695, "step": 4116 }, { "epoch": 0.32936, "grad_norm": 1.806711196899414, "learning_rate": 7.5611024875268856e-06, "loss": 0.3913, "step": 4117 }, { "epoch": 0.32944, "grad_norm": 1.9311702251434326, "learning_rate": 7.560022274672798e-06, "loss": 0.3523, "step": 4118 }, { "epoch": 0.32952, "grad_norm": 1.8792706727981567, "learning_rate": 7.5589418998546675e-06, "loss": 0.477, "step": 4119 }, { "epoch": 0.3296, "grad_norm": 1.8583472967147827, "learning_rate": 7.5578613631408444e-06, "loss": 0.4641, "step": 4120 }, { "epoch": 0.32968, "grad_norm": 1.8453302383422852, "learning_rate": 7.556780664599692e-06, "loss": 0.365, "step": 4121 }, { "epoch": 0.32976, "grad_norm": 1.3581293821334839, "learning_rate": 7.555699804299581e-06, "loss": 0.3245, "step": 4122 }, { "epoch": 0.32984, "grad_norm": 1.490319848060608, "learning_rate": 7.554618782308897e-06, "loss": 0.2991, "step": 4123 }, { "epoch": 0.32992, "grad_norm": 1.4145665168762207, "learning_rate": 7.553537598696028e-06, "loss": 0.2676, "step": 4124 }, { "epoch": 0.33, "grad_norm": 2.0299646854400635, "learning_rate": 7.552456253529381e-06, "loss": 0.3604, "step": 4125 }, { "epoch": 0.33008, "grad_norm": 1.8027198314666748, "learning_rate": 7.551374746877367e-06, "loss": 0.5369, "step": 4126 }, { "epoch": 0.33016, "grad_norm": 1.3814693689346313, "learning_rate": 7.550293078808408e-06, "loss": 0.3013, "step": 4127 }, { "epoch": 0.33024, "grad_norm": 1.5172748565673828, "learning_rate": 7.549211249390942e-06, "loss": 0.3157, "step": 4128 }, { "epoch": 0.33032, "grad_norm": 1.5251926183700562, "learning_rate": 7.548129258693407e-06, "loss": 0.3171, "step": 4129 }, { "epoch": 0.3304, "grad_norm": 1.377307415008545, "learning_rate": 7.547047106784262e-06, "loss": 0.3191, "step": 4130 }, { "epoch": 0.33048, "grad_norm": 1.4914178848266602, "learning_rate": 7.545964793731968e-06, "loss": 0.3543, "step": 4131 }, { "epoch": 0.33056, "grad_norm": 1.3784854412078857, "learning_rate": 7.544882319605e-06, "loss": 0.2985, "step": 4132 }, { "epoch": 0.33064, "grad_norm": 1.4411064386367798, "learning_rate": 7.543799684471845e-06, "loss": 0.3399, "step": 4133 }, { "epoch": 0.33072, "grad_norm": 0.9017268419265747, "learning_rate": 7.542716888400994e-06, "loss": 0.2339, "step": 4134 }, { "epoch": 0.3308, "grad_norm": 1.7791081666946411, "learning_rate": 7.541633931460953e-06, "loss": 0.4546, "step": 4135 }, { "epoch": 0.33088, "grad_norm": 1.6857120990753174, "learning_rate": 7.540550813720238e-06, "loss": 0.3708, "step": 4136 }, { "epoch": 0.33096, "grad_norm": 1.3614606857299805, "learning_rate": 7.5394675352473735e-06, "loss": 0.2817, "step": 4137 }, { "epoch": 0.33104, "grad_norm": 1.4521223306655884, "learning_rate": 7.538384096110896e-06, "loss": 0.3394, "step": 4138 }, { "epoch": 0.33112, "grad_norm": 1.4563080072402954, "learning_rate": 7.5373004963793496e-06, "loss": 0.2831, "step": 4139 }, { "epoch": 0.3312, "grad_norm": 1.2740604877471924, "learning_rate": 7.53621673612129e-06, "loss": 0.3314, "step": 4140 }, { "epoch": 0.33128, "grad_norm": 1.6157991886138916, "learning_rate": 7.5351328154052835e-06, "loss": 0.334, "step": 4141 }, { "epoch": 0.33136, "grad_norm": 1.7817760705947876, "learning_rate": 7.534048734299908e-06, "loss": 0.3722, "step": 4142 }, { "epoch": 0.33144, "grad_norm": 1.3499414920806885, "learning_rate": 7.532964492873747e-06, "loss": 0.2966, "step": 4143 }, { "epoch": 0.33152, "grad_norm": 1.4766731262207031, "learning_rate": 7.531880091195398e-06, "loss": 0.3477, "step": 4144 }, { "epoch": 0.3316, "grad_norm": 1.642026424407959, "learning_rate": 7.530795529333468e-06, "loss": 0.3435, "step": 4145 }, { "epoch": 0.33168, "grad_norm": 1.4514563083648682, "learning_rate": 7.529710807356572e-06, "loss": 0.2747, "step": 4146 }, { "epoch": 0.33176, "grad_norm": 1.4751369953155518, "learning_rate": 7.528625925333337e-06, "loss": 0.3201, "step": 4147 }, { "epoch": 0.33184, "grad_norm": 1.3058736324310303, "learning_rate": 7.5275408833324025e-06, "loss": 0.2439, "step": 4148 }, { "epoch": 0.33192, "grad_norm": 1.7958042621612549, "learning_rate": 7.526455681422413e-06, "loss": 0.344, "step": 4149 }, { "epoch": 0.332, "grad_norm": 1.462531328201294, "learning_rate": 7.525370319672025e-06, "loss": 0.2951, "step": 4150 }, { "epoch": 0.33208, "grad_norm": 1.6958011388778687, "learning_rate": 7.5242847981499064e-06, "loss": 0.4044, "step": 4151 }, { "epoch": 0.33216, "grad_norm": 1.6132330894470215, "learning_rate": 7.5231991169247355e-06, "loss": 0.5282, "step": 4152 }, { "epoch": 0.33224, "grad_norm": 1.6550413370132446, "learning_rate": 7.522113276065199e-06, "loss": 0.3572, "step": 4153 }, { "epoch": 0.33232, "grad_norm": 1.4192287921905518, "learning_rate": 7.521027275639996e-06, "loss": 0.3088, "step": 4154 }, { "epoch": 0.3324, "grad_norm": 1.506937861442566, "learning_rate": 7.5199411157178316e-06, "loss": 0.2994, "step": 4155 }, { "epoch": 0.33248, "grad_norm": 1.411502480506897, "learning_rate": 7.518854796367424e-06, "loss": 0.2824, "step": 4156 }, { "epoch": 0.33256, "grad_norm": 1.7876211404800415, "learning_rate": 7.517768317657503e-06, "loss": 0.3639, "step": 4157 }, { "epoch": 0.33264, "grad_norm": 1.318710207939148, "learning_rate": 7.516681679656804e-06, "loss": 0.2662, "step": 4158 }, { "epoch": 0.33272, "grad_norm": 1.4276552200317383, "learning_rate": 7.515594882434076e-06, "loss": 0.3084, "step": 4159 }, { "epoch": 0.3328, "grad_norm": 1.7345210313796997, "learning_rate": 7.514507926058077e-06, "loss": 0.3099, "step": 4160 }, { "epoch": 0.33288, "grad_norm": 1.3311537504196167, "learning_rate": 7.513420810597576e-06, "loss": 0.2696, "step": 4161 }, { "epoch": 0.33296, "grad_norm": 1.3819355964660645, "learning_rate": 7.512333536121349e-06, "loss": 0.3543, "step": 4162 }, { "epoch": 0.33304, "grad_norm": 1.6310116052627563, "learning_rate": 7.5112461026981855e-06, "loss": 0.385, "step": 4163 }, { "epoch": 0.33312, "grad_norm": 1.8169513940811157, "learning_rate": 7.510158510396883e-06, "loss": 0.4667, "step": 4164 }, { "epoch": 0.3332, "grad_norm": 1.5222045183181763, "learning_rate": 7.509070759286252e-06, "loss": 0.3944, "step": 4165 }, { "epoch": 0.33328, "grad_norm": 1.5854097604751587, "learning_rate": 7.507982849435109e-06, "loss": 0.3542, "step": 4166 }, { "epoch": 0.33336, "grad_norm": 1.7982919216156006, "learning_rate": 7.5068947809122835e-06, "loss": 0.4572, "step": 4167 }, { "epoch": 0.33344, "grad_norm": 1.4023102521896362, "learning_rate": 7.505806553786613e-06, "loss": 0.2961, "step": 4168 }, { "epoch": 0.33352, "grad_norm": 1.5655568838119507, "learning_rate": 7.504718168126947e-06, "loss": 0.451, "step": 4169 }, { "epoch": 0.3336, "grad_norm": 1.6367048025131226, "learning_rate": 7.5036296240021425e-06, "loss": 0.3058, "step": 4170 }, { "epoch": 0.33368, "grad_norm": 1.4759256839752197, "learning_rate": 7.5025409214810676e-06, "loss": 0.3068, "step": 4171 }, { "epoch": 0.33376, "grad_norm": 2.158902406692505, "learning_rate": 7.501452060632603e-06, "loss": 0.4489, "step": 4172 }, { "epoch": 0.33384, "grad_norm": 1.8630950450897217, "learning_rate": 7.500363041525638e-06, "loss": 0.4119, "step": 4173 }, { "epoch": 0.33392, "grad_norm": 1.9762685298919678, "learning_rate": 7.499273864229069e-06, "loss": 0.491, "step": 4174 }, { "epoch": 0.334, "grad_norm": 1.4855585098266602, "learning_rate": 7.4981845288118026e-06, "loss": 0.2987, "step": 4175 }, { "epoch": 0.33408, "grad_norm": 1.9851583242416382, "learning_rate": 7.497095035342762e-06, "loss": 0.3656, "step": 4176 }, { "epoch": 0.33416, "grad_norm": 1.7855724096298218, "learning_rate": 7.496005383890874e-06, "loss": 0.344, "step": 4177 }, { "epoch": 0.33424, "grad_norm": 1.5612376928329468, "learning_rate": 7.4949155745250765e-06, "loss": 0.3669, "step": 4178 }, { "epoch": 0.33432, "grad_norm": 1.6685304641723633, "learning_rate": 7.493825607314319e-06, "loss": 0.4009, "step": 4179 }, { "epoch": 0.3344, "grad_norm": 1.3716298341751099, "learning_rate": 7.49273548232756e-06, "loss": 0.3536, "step": 4180 }, { "epoch": 0.33448, "grad_norm": 1.7887216806411743, "learning_rate": 7.491645199633768e-06, "loss": 0.4492, "step": 4181 }, { "epoch": 0.33456, "grad_norm": 1.4562711715698242, "learning_rate": 7.4905547593019205e-06, "loss": 0.3379, "step": 4182 }, { "epoch": 0.33464, "grad_norm": 1.4248229265213013, "learning_rate": 7.4894641614010065e-06, "loss": 0.3323, "step": 4183 }, { "epoch": 0.33472, "grad_norm": 1.4600697755813599, "learning_rate": 7.488373406000024e-06, "loss": 0.3882, "step": 4184 }, { "epoch": 0.3348, "grad_norm": 1.9589154720306396, "learning_rate": 7.487282493167985e-06, "loss": 0.4016, "step": 4185 }, { "epoch": 0.33488, "grad_norm": 1.8087719678878784, "learning_rate": 7.486191422973903e-06, "loss": 0.3524, "step": 4186 }, { "epoch": 0.33496, "grad_norm": 1.154066801071167, "learning_rate": 7.485100195486808e-06, "loss": 0.2329, "step": 4187 }, { "epoch": 0.33504, "grad_norm": 2.5276176929473877, "learning_rate": 7.48400881077574e-06, "loss": 0.5058, "step": 4188 }, { "epoch": 0.33512, "grad_norm": 2.2821571826934814, "learning_rate": 7.482917268909746e-06, "loss": 0.5157, "step": 4189 }, { "epoch": 0.3352, "grad_norm": 1.4835360050201416, "learning_rate": 7.481825569957886e-06, "loss": 0.3223, "step": 4190 }, { "epoch": 0.33528, "grad_norm": 1.7583729028701782, "learning_rate": 7.4807337139892235e-06, "loss": 0.3243, "step": 4191 }, { "epoch": 0.33536, "grad_norm": 1.958134412765503, "learning_rate": 7.479641701072841e-06, "loss": 0.448, "step": 4192 }, { "epoch": 0.33544, "grad_norm": 1.5461760759353638, "learning_rate": 7.478549531277824e-06, "loss": 0.2942, "step": 4193 }, { "epoch": 0.33552, "grad_norm": 1.5328489542007446, "learning_rate": 7.477457204673272e-06, "loss": 0.327, "step": 4194 }, { "epoch": 0.3356, "grad_norm": 1.610932469367981, "learning_rate": 7.476364721328292e-06, "loss": 0.3925, "step": 4195 }, { "epoch": 0.33568, "grad_norm": 1.6235414743423462, "learning_rate": 7.475272081312003e-06, "loss": 0.3026, "step": 4196 }, { "epoch": 0.33576, "grad_norm": 1.615761399269104, "learning_rate": 7.4741792846935304e-06, "loss": 0.3267, "step": 4197 }, { "epoch": 0.33584, "grad_norm": 1.9006683826446533, "learning_rate": 7.473086331542013e-06, "loss": 0.3666, "step": 4198 }, { "epoch": 0.33592, "grad_norm": 1.3763328790664673, "learning_rate": 7.471993221926599e-06, "loss": 0.2783, "step": 4199 }, { "epoch": 0.336, "grad_norm": 1.5629264116287231, "learning_rate": 7.4708999559164455e-06, "loss": 0.3361, "step": 4200 }, { "epoch": 0.33608, "grad_norm": 1.5894604921340942, "learning_rate": 7.469806533580719e-06, "loss": 0.3866, "step": 4201 }, { "epoch": 0.33616, "grad_norm": 1.707032322883606, "learning_rate": 7.468712954988597e-06, "loss": 0.337, "step": 4202 }, { "epoch": 0.33624, "grad_norm": 1.5144277811050415, "learning_rate": 7.467619220209268e-06, "loss": 0.4724, "step": 4203 }, { "epoch": 0.33632, "grad_norm": 1.9514338970184326, "learning_rate": 7.466525329311927e-06, "loss": 0.4198, "step": 4204 }, { "epoch": 0.3364, "grad_norm": 1.3086645603179932, "learning_rate": 7.4654312823657805e-06, "loss": 0.2661, "step": 4205 }, { "epoch": 0.33648, "grad_norm": 1.8274202346801758, "learning_rate": 7.464337079440046e-06, "loss": 0.3858, "step": 4206 }, { "epoch": 0.33656, "grad_norm": 1.772868037223816, "learning_rate": 7.4632427206039505e-06, "loss": 0.3487, "step": 4207 }, { "epoch": 0.33664, "grad_norm": 1.3562657833099365, "learning_rate": 7.4621482059267315e-06, "loss": 0.3073, "step": 4208 }, { "epoch": 0.33672, "grad_norm": 1.3614493608474731, "learning_rate": 7.461053535477632e-06, "loss": 0.3029, "step": 4209 }, { "epoch": 0.3368, "grad_norm": 1.4911876916885376, "learning_rate": 7.459958709325911e-06, "loss": 0.4744, "step": 4210 }, { "epoch": 0.33688, "grad_norm": 1.3329766988754272, "learning_rate": 7.458863727540832e-06, "loss": 0.2758, "step": 4211 }, { "epoch": 0.33696, "grad_norm": 1.798316478729248, "learning_rate": 7.457768590191674e-06, "loss": 0.3735, "step": 4212 }, { "epoch": 0.33704, "grad_norm": 1.8065177202224731, "learning_rate": 7.456673297347721e-06, "loss": 0.3828, "step": 4213 }, { "epoch": 0.33712, "grad_norm": 1.4247829914093018, "learning_rate": 7.455577849078269e-06, "loss": 0.3492, "step": 4214 }, { "epoch": 0.3372, "grad_norm": 1.6128746271133423, "learning_rate": 7.454482245452621e-06, "loss": 0.4182, "step": 4215 }, { "epoch": 0.33728, "grad_norm": 1.8242870569229126, "learning_rate": 7.453386486540095e-06, "loss": 0.4306, "step": 4216 }, { "epoch": 0.33736, "grad_norm": 1.6618531942367554, "learning_rate": 7.4522905724100155e-06, "loss": 0.3258, "step": 4217 }, { "epoch": 0.33744, "grad_norm": 2.2641119956970215, "learning_rate": 7.451194503131715e-06, "loss": 0.4345, "step": 4218 }, { "epoch": 0.33752, "grad_norm": 1.4390738010406494, "learning_rate": 7.450098278774542e-06, "loss": 0.2435, "step": 4219 }, { "epoch": 0.3376, "grad_norm": 1.6688294410705566, "learning_rate": 7.449001899407849e-06, "loss": 0.3547, "step": 4220 }, { "epoch": 0.33768, "grad_norm": 1.3295515775680542, "learning_rate": 7.447905365101e-06, "loss": 0.3372, "step": 4221 }, { "epoch": 0.33776, "grad_norm": 1.7421692609786987, "learning_rate": 7.446808675923369e-06, "loss": 0.417, "step": 4222 }, { "epoch": 0.33784, "grad_norm": 1.7241042852401733, "learning_rate": 7.44571183194434e-06, "loss": 0.436, "step": 4223 }, { "epoch": 0.33792, "grad_norm": 1.7911804914474487, "learning_rate": 7.4446148332333065e-06, "loss": 0.4371, "step": 4224 }, { "epoch": 0.338, "grad_norm": 1.1841015815734863, "learning_rate": 7.4435176798596735e-06, "loss": 0.3404, "step": 4225 }, { "epoch": 0.33808, "grad_norm": 2.037875175476074, "learning_rate": 7.442420371892852e-06, "loss": 0.373, "step": 4226 }, { "epoch": 0.33816, "grad_norm": 1.385729432106018, "learning_rate": 7.4413229094022655e-06, "loss": 0.297, "step": 4227 }, { "epoch": 0.33824, "grad_norm": 1.7231978178024292, "learning_rate": 7.440225292457348e-06, "loss": 0.4303, "step": 4228 }, { "epoch": 0.33832, "grad_norm": 1.3325543403625488, "learning_rate": 7.43912752112754e-06, "loss": 0.243, "step": 4229 }, { "epoch": 0.3384, "grad_norm": 1.3618645668029785, "learning_rate": 7.438029595482297e-06, "loss": 0.3506, "step": 4230 }, { "epoch": 0.33848, "grad_norm": 1.3418787717819214, "learning_rate": 7.436931515591077e-06, "loss": 0.2405, "step": 4231 }, { "epoch": 0.33856, "grad_norm": 1.4859660863876343, "learning_rate": 7.435833281523356e-06, "loss": 0.3964, "step": 4232 }, { "epoch": 0.33864, "grad_norm": 1.6270725727081299, "learning_rate": 7.434734893348612e-06, "loss": 0.3465, "step": 4233 }, { "epoch": 0.33872, "grad_norm": 1.3810713291168213, "learning_rate": 7.433636351136338e-06, "loss": 0.3105, "step": 4234 }, { "epoch": 0.3388, "grad_norm": 1.595778465270996, "learning_rate": 7.432537654956036e-06, "loss": 0.3078, "step": 4235 }, { "epoch": 0.33888, "grad_norm": 1.770795464515686, "learning_rate": 7.431438804877216e-06, "loss": 0.3117, "step": 4236 }, { "epoch": 0.33896, "grad_norm": 1.5661720037460327, "learning_rate": 7.4303398009694e-06, "loss": 0.3066, "step": 4237 }, { "epoch": 0.33904, "grad_norm": 1.6826071739196777, "learning_rate": 7.429240643302114e-06, "loss": 0.2934, "step": 4238 }, { "epoch": 0.33912, "grad_norm": 1.8159326314926147, "learning_rate": 7.428141331944901e-06, "loss": 0.3794, "step": 4239 }, { "epoch": 0.3392, "grad_norm": 1.6127220392227173, "learning_rate": 7.42704186696731e-06, "loss": 0.3804, "step": 4240 }, { "epoch": 0.33928, "grad_norm": 1.844524621963501, "learning_rate": 7.425942248438902e-06, "loss": 0.4147, "step": 4241 }, { "epoch": 0.33936, "grad_norm": 1.4185341596603394, "learning_rate": 7.424842476429246e-06, "loss": 0.247, "step": 4242 }, { "epoch": 0.33944, "grad_norm": 1.1865193843841553, "learning_rate": 7.423742551007919e-06, "loss": 0.2286, "step": 4243 }, { "epoch": 0.33952, "grad_norm": 1.284876823425293, "learning_rate": 7.422642472244511e-06, "loss": 0.2601, "step": 4244 }, { "epoch": 0.3396, "grad_norm": 1.4666966199874878, "learning_rate": 7.42154224020862e-06, "loss": 0.315, "step": 4245 }, { "epoch": 0.33968, "grad_norm": 1.2044739723205566, "learning_rate": 7.420441854969853e-06, "loss": 0.2234, "step": 4246 }, { "epoch": 0.33976, "grad_norm": 1.548977255821228, "learning_rate": 7.41934131659783e-06, "loss": 0.3675, "step": 4247 }, { "epoch": 0.33984, "grad_norm": 1.2386624813079834, "learning_rate": 7.418240625162178e-06, "loss": 0.2938, "step": 4248 }, { "epoch": 0.33992, "grad_norm": 1.4964457750320435, "learning_rate": 7.4171397807325314e-06, "loss": 0.3568, "step": 4249 }, { "epoch": 0.34, "grad_norm": 1.543899416923523, "learning_rate": 7.416038783378539e-06, "loss": 0.4038, "step": 4250 }, { "epoch": 0.34008, "grad_norm": 1.7202891111373901, "learning_rate": 7.414937633169857e-06, "loss": 0.4456, "step": 4251 }, { "epoch": 0.34016, "grad_norm": 1.3532027006149292, "learning_rate": 7.413836330176149e-06, "loss": 0.2768, "step": 4252 }, { "epoch": 0.34024, "grad_norm": 1.1683356761932373, "learning_rate": 7.412734874467096e-06, "loss": 0.3269, "step": 4253 }, { "epoch": 0.34032, "grad_norm": 1.4227070808410645, "learning_rate": 7.411633266112379e-06, "loss": 0.3285, "step": 4254 }, { "epoch": 0.3404, "grad_norm": 1.7242900133132935, "learning_rate": 7.410531505181697e-06, "loss": 0.4367, "step": 4255 }, { "epoch": 0.34048, "grad_norm": 1.3391542434692383, "learning_rate": 7.40942959174475e-06, "loss": 0.3332, "step": 4256 }, { "epoch": 0.34056, "grad_norm": 1.4209461212158203, "learning_rate": 7.408327525871255e-06, "loss": 0.3338, "step": 4257 }, { "epoch": 0.34064, "grad_norm": 2.0780012607574463, "learning_rate": 7.4072253076309375e-06, "loss": 0.51, "step": 4258 }, { "epoch": 0.34072, "grad_norm": 2.3870983123779297, "learning_rate": 7.4061229370935275e-06, "loss": 0.4668, "step": 4259 }, { "epoch": 0.3408, "grad_norm": 1.4779009819030762, "learning_rate": 7.405020414328771e-06, "loss": 0.2749, "step": 4260 }, { "epoch": 0.34088, "grad_norm": 1.7060835361480713, "learning_rate": 7.4039177394064196e-06, "loss": 0.3663, "step": 4261 }, { "epoch": 0.34096, "grad_norm": 1.5579965114593506, "learning_rate": 7.4028149123962365e-06, "loss": 0.3155, "step": 4262 }, { "epoch": 0.34104, "grad_norm": 1.6776325702667236, "learning_rate": 7.401711933367994e-06, "loss": 0.3946, "step": 4263 }, { "epoch": 0.34112, "grad_norm": 2.1484310626983643, "learning_rate": 7.4006088023914735e-06, "loss": 0.541, "step": 4264 }, { "epoch": 0.3412, "grad_norm": 1.8193962574005127, "learning_rate": 7.399505519536465e-06, "loss": 0.4295, "step": 4265 }, { "epoch": 0.34128, "grad_norm": 1.7744461297988892, "learning_rate": 7.398402084872773e-06, "loss": 0.3818, "step": 4266 }, { "epoch": 0.34136, "grad_norm": 1.5246503353118896, "learning_rate": 7.397298498470206e-06, "loss": 0.3992, "step": 4267 }, { "epoch": 0.34144, "grad_norm": 1.3743784427642822, "learning_rate": 7.396194760398584e-06, "loss": 0.3579, "step": 4268 }, { "epoch": 0.34152, "grad_norm": 1.4627759456634521, "learning_rate": 7.395090870727737e-06, "loss": 0.4135, "step": 4269 }, { "epoch": 0.3416, "grad_norm": 1.4635417461395264, "learning_rate": 7.393986829527506e-06, "loss": 0.3232, "step": 4270 }, { "epoch": 0.34168, "grad_norm": 1.804682731628418, "learning_rate": 7.392882636867738e-06, "loss": 0.3609, "step": 4271 }, { "epoch": 0.34176, "grad_norm": 1.6113015413284302, "learning_rate": 7.39177829281829e-06, "loss": 0.319, "step": 4272 }, { "epoch": 0.34184, "grad_norm": 1.5073604583740234, "learning_rate": 7.390673797449035e-06, "loss": 0.3688, "step": 4273 }, { "epoch": 0.34192, "grad_norm": 1.9971095323562622, "learning_rate": 7.389569150829847e-06, "loss": 0.4176, "step": 4274 }, { "epoch": 0.342, "grad_norm": 1.400302767753601, "learning_rate": 7.3884643530306146e-06, "loss": 0.281, "step": 4275 }, { "epoch": 0.34208, "grad_norm": 1.5128846168518066, "learning_rate": 7.387359404121234e-06, "loss": 0.3501, "step": 4276 }, { "epoch": 0.34216, "grad_norm": 1.645851969718933, "learning_rate": 7.386254304171612e-06, "loss": 0.3803, "step": 4277 }, { "epoch": 0.34224, "grad_norm": 1.6219562292099, "learning_rate": 7.385149053251664e-06, "loss": 0.3261, "step": 4278 }, { "epoch": 0.34232, "grad_norm": 2.3219873905181885, "learning_rate": 7.3840436514313185e-06, "loss": 0.5242, "step": 4279 }, { "epoch": 0.3424, "grad_norm": 1.3534795045852661, "learning_rate": 7.382938098780505e-06, "loss": 0.2412, "step": 4280 }, { "epoch": 0.34248, "grad_norm": 1.458430290222168, "learning_rate": 7.381832395369175e-06, "loss": 0.303, "step": 4281 }, { "epoch": 0.34256, "grad_norm": 1.8676531314849854, "learning_rate": 7.380726541267276e-06, "loss": 0.4078, "step": 4282 }, { "epoch": 0.34264, "grad_norm": 1.8521391153335571, "learning_rate": 7.379620536544776e-06, "loss": 0.5314, "step": 4283 }, { "epoch": 0.34272, "grad_norm": 1.4564507007598877, "learning_rate": 7.378514381271646e-06, "loss": 0.3131, "step": 4284 }, { "epoch": 0.3428, "grad_norm": 1.7293351888656616, "learning_rate": 7.377408075517871e-06, "loss": 0.3745, "step": 4285 }, { "epoch": 0.34288, "grad_norm": 1.8894083499908447, "learning_rate": 7.376301619353441e-06, "loss": 0.4519, "step": 4286 }, { "epoch": 0.34296, "grad_norm": 1.4705125093460083, "learning_rate": 7.375195012848359e-06, "loss": 0.3943, "step": 4287 }, { "epoch": 0.34304, "grad_norm": 1.75259530544281, "learning_rate": 7.374088256072635e-06, "loss": 0.3682, "step": 4288 }, { "epoch": 0.34312, "grad_norm": 1.3996787071228027, "learning_rate": 7.372981349096291e-06, "loss": 0.2633, "step": 4289 }, { "epoch": 0.3432, "grad_norm": 1.3005949258804321, "learning_rate": 7.371874291989358e-06, "loss": 0.2874, "step": 4290 }, { "epoch": 0.34328, "grad_norm": 1.8466405868530273, "learning_rate": 7.370767084821875e-06, "loss": 0.4616, "step": 4291 }, { "epoch": 0.34336, "grad_norm": 1.9775362014770508, "learning_rate": 7.369659727663889e-06, "loss": 0.4652, "step": 4292 }, { "epoch": 0.34344, "grad_norm": 1.816942811012268, "learning_rate": 7.368552220585464e-06, "loss": 0.5346, "step": 4293 }, { "epoch": 0.34352, "grad_norm": 1.3692759275436401, "learning_rate": 7.367444563656663e-06, "loss": 0.4167, "step": 4294 }, { "epoch": 0.3436, "grad_norm": 1.5143382549285889, "learning_rate": 7.3663367569475665e-06, "loss": 0.2954, "step": 4295 }, { "epoch": 0.34368, "grad_norm": 1.7048492431640625, "learning_rate": 7.365228800528263e-06, "loss": 0.3539, "step": 4296 }, { "epoch": 0.34376, "grad_norm": 1.409371256828308, "learning_rate": 7.364120694468845e-06, "loss": 0.3441, "step": 4297 }, { "epoch": 0.34384, "grad_norm": 1.3780807256698608, "learning_rate": 7.363012438839421e-06, "loss": 0.3102, "step": 4298 }, { "epoch": 0.34392, "grad_norm": 1.3144261837005615, "learning_rate": 7.361904033710108e-06, "loss": 0.2998, "step": 4299 }, { "epoch": 0.344, "grad_norm": 1.5273244380950928, "learning_rate": 7.360795479151029e-06, "loss": 0.3879, "step": 4300 }, { "epoch": 0.34408, "grad_norm": 1.3805782794952393, "learning_rate": 7.359686775232318e-06, "loss": 0.3264, "step": 4301 }, { "epoch": 0.34416, "grad_norm": 1.8906997442245483, "learning_rate": 7.358577922024123e-06, "loss": 0.496, "step": 4302 }, { "epoch": 0.34424, "grad_norm": 1.627760410308838, "learning_rate": 7.357468919596593e-06, "loss": 0.3229, "step": 4303 }, { "epoch": 0.34432, "grad_norm": 1.7278337478637695, "learning_rate": 7.356359768019894e-06, "loss": 0.4291, "step": 4304 }, { "epoch": 0.3444, "grad_norm": 1.5394865274429321, "learning_rate": 7.355250467364196e-06, "loss": 0.321, "step": 4305 }, { "epoch": 0.34448, "grad_norm": 1.5800862312316895, "learning_rate": 7.354141017699681e-06, "loss": 0.3424, "step": 4306 }, { "epoch": 0.34456, "grad_norm": 1.81803297996521, "learning_rate": 7.35303141909654e-06, "loss": 0.3574, "step": 4307 }, { "epoch": 0.34464, "grad_norm": 1.6725924015045166, "learning_rate": 7.351921671624977e-06, "loss": 0.3973, "step": 4308 }, { "epoch": 0.34472, "grad_norm": 1.7673554420471191, "learning_rate": 7.350811775355197e-06, "loss": 0.4279, "step": 4309 }, { "epoch": 0.3448, "grad_norm": 1.319742202758789, "learning_rate": 7.349701730357424e-06, "loss": 0.2885, "step": 4310 }, { "epoch": 0.34488, "grad_norm": 1.64664888381958, "learning_rate": 7.348591536701882e-06, "loss": 0.4351, "step": 4311 }, { "epoch": 0.34496, "grad_norm": 2.0317161083221436, "learning_rate": 7.347481194458813e-06, "loss": 0.4843, "step": 4312 }, { "epoch": 0.34504, "grad_norm": 1.2819757461547852, "learning_rate": 7.346370703698464e-06, "loss": 0.2645, "step": 4313 }, { "epoch": 0.34512, "grad_norm": 1.667986512184143, "learning_rate": 7.345260064491092e-06, "loss": 0.4948, "step": 4314 }, { "epoch": 0.3452, "grad_norm": 1.6327108144760132, "learning_rate": 7.344149276906962e-06, "loss": 0.3498, "step": 4315 }, { "epoch": 0.34528, "grad_norm": 1.8304697275161743, "learning_rate": 7.343038341016352e-06, "loss": 0.364, "step": 4316 }, { "epoch": 0.34536, "grad_norm": 1.6535539627075195, "learning_rate": 7.341927256889545e-06, "loss": 0.3881, "step": 4317 }, { "epoch": 0.34544, "grad_norm": 1.561083197593689, "learning_rate": 7.340816024596838e-06, "loss": 0.4237, "step": 4318 }, { "epoch": 0.34552, "grad_norm": 1.6609004735946655, "learning_rate": 7.339704644208531e-06, "loss": 0.3515, "step": 4319 }, { "epoch": 0.3456, "grad_norm": 1.5211528539657593, "learning_rate": 7.338593115794942e-06, "loss": 0.3339, "step": 4320 }, { "epoch": 0.34568, "grad_norm": 1.4612761735916138, "learning_rate": 7.33748143942639e-06, "loss": 0.3212, "step": 4321 }, { "epoch": 0.34576, "grad_norm": 1.503321647644043, "learning_rate": 7.336369615173209e-06, "loss": 0.4281, "step": 4322 }, { "epoch": 0.34584, "grad_norm": 1.4206557273864746, "learning_rate": 7.33525764310574e-06, "loss": 0.309, "step": 4323 }, { "epoch": 0.34592, "grad_norm": 1.1924537420272827, "learning_rate": 7.334145523294334e-06, "loss": 0.2613, "step": 4324 }, { "epoch": 0.346, "grad_norm": 1.3378185033798218, "learning_rate": 7.333033255809351e-06, "loss": 0.2799, "step": 4325 }, { "epoch": 0.34608, "grad_norm": 1.678286075592041, "learning_rate": 7.33192084072116e-06, "loss": 0.3864, "step": 4326 }, { "epoch": 0.34616, "grad_norm": 1.57291841506958, "learning_rate": 7.330808278100141e-06, "loss": 0.3265, "step": 4327 }, { "epoch": 0.34624, "grad_norm": 1.6815236806869507, "learning_rate": 7.329695568016679e-06, "loss": 0.3757, "step": 4328 }, { "epoch": 0.34632, "grad_norm": 1.3270570039749146, "learning_rate": 7.328582710541174e-06, "loss": 0.2978, "step": 4329 }, { "epoch": 0.3464, "grad_norm": 1.3988102674484253, "learning_rate": 7.327469705744034e-06, "loss": 0.3347, "step": 4330 }, { "epoch": 0.34648, "grad_norm": 1.7103873491287231, "learning_rate": 7.3263565536956725e-06, "loss": 0.3344, "step": 4331 }, { "epoch": 0.34656, "grad_norm": 1.768531322479248, "learning_rate": 7.325243254466516e-06, "loss": 0.3403, "step": 4332 }, { "epoch": 0.34664, "grad_norm": 1.3608916997909546, "learning_rate": 7.3241298081269995e-06, "loss": 0.2652, "step": 4333 }, { "epoch": 0.34672, "grad_norm": 2.1497552394866943, "learning_rate": 7.323016214747566e-06, "loss": 0.404, "step": 4334 }, { "epoch": 0.3468, "grad_norm": 1.8790044784545898, "learning_rate": 7.321902474398669e-06, "loss": 0.3277, "step": 4335 }, { "epoch": 0.34688, "grad_norm": 1.679563045501709, "learning_rate": 7.3207885871507715e-06, "loss": 0.3758, "step": 4336 }, { "epoch": 0.34696, "grad_norm": 1.669385552406311, "learning_rate": 7.319674553074347e-06, "loss": 0.3434, "step": 4337 }, { "epoch": 0.34704, "grad_norm": 1.5998573303222656, "learning_rate": 7.318560372239873e-06, "loss": 0.4371, "step": 4338 }, { "epoch": 0.34712, "grad_norm": 1.4684016704559326, "learning_rate": 7.317446044717845e-06, "loss": 0.2922, "step": 4339 }, { "epoch": 0.3472, "grad_norm": 1.7351469993591309, "learning_rate": 7.316331570578759e-06, "loss": 0.41, "step": 4340 }, { "epoch": 0.34728, "grad_norm": 1.467858910560608, "learning_rate": 7.315216949893124e-06, "loss": 0.353, "step": 4341 }, { "epoch": 0.34736, "grad_norm": 1.418001651763916, "learning_rate": 7.314102182731459e-06, "loss": 0.3429, "step": 4342 }, { "epoch": 0.34744, "grad_norm": 1.5263274908065796, "learning_rate": 7.3129872691642935e-06, "loss": 0.4126, "step": 4343 }, { "epoch": 0.34752, "grad_norm": 1.2809735536575317, "learning_rate": 7.3118722092621615e-06, "loss": 0.2515, "step": 4344 }, { "epoch": 0.3476, "grad_norm": 1.377831220626831, "learning_rate": 7.31075700309561e-06, "loss": 0.2804, "step": 4345 }, { "epoch": 0.34768, "grad_norm": 1.5023661851882935, "learning_rate": 7.309641650735195e-06, "loss": 0.3874, "step": 4346 }, { "epoch": 0.34776, "grad_norm": 1.7414848804473877, "learning_rate": 7.308526152251482e-06, "loss": 0.346, "step": 4347 }, { "epoch": 0.34784, "grad_norm": 1.4180262088775635, "learning_rate": 7.307410507715044e-06, "loss": 0.3632, "step": 4348 }, { "epoch": 0.34792, "grad_norm": 1.8054182529449463, "learning_rate": 7.3062947171964626e-06, "loss": 0.3541, "step": 4349 }, { "epoch": 0.348, "grad_norm": 1.9470112323760986, "learning_rate": 7.305178780766332e-06, "loss": 0.3118, "step": 4350 }, { "epoch": 0.34808, "grad_norm": 1.3836066722869873, "learning_rate": 7.304062698495253e-06, "loss": 0.2609, "step": 4351 }, { "epoch": 0.34816, "grad_norm": 1.3901145458221436, "learning_rate": 7.302946470453835e-06, "loss": 0.3282, "step": 4352 }, { "epoch": 0.34824, "grad_norm": 1.5342923402786255, "learning_rate": 7.301830096712701e-06, "loss": 0.284, "step": 4353 }, { "epoch": 0.34832, "grad_norm": 1.7975738048553467, "learning_rate": 7.300713577342477e-06, "loss": 0.415, "step": 4354 }, { "epoch": 0.3484, "grad_norm": 1.4693024158477783, "learning_rate": 7.2995969124138054e-06, "loss": 0.3352, "step": 4355 }, { "epoch": 0.34848, "grad_norm": 1.8647490739822388, "learning_rate": 7.298480101997331e-06, "loss": 0.399, "step": 4356 }, { "epoch": 0.34856, "grad_norm": 1.3949613571166992, "learning_rate": 7.297363146163711e-06, "loss": 0.4118, "step": 4357 }, { "epoch": 0.34864, "grad_norm": 1.5804004669189453, "learning_rate": 7.296246044983611e-06, "loss": 0.3258, "step": 4358 }, { "epoch": 0.34872, "grad_norm": 1.418825626373291, "learning_rate": 7.295128798527708e-06, "loss": 0.324, "step": 4359 }, { "epoch": 0.3488, "grad_norm": 1.6800353527069092, "learning_rate": 7.294011406866686e-06, "loss": 0.4247, "step": 4360 }, { "epoch": 0.34888, "grad_norm": 1.2685750722885132, "learning_rate": 7.292893870071238e-06, "loss": 0.2687, "step": 4361 }, { "epoch": 0.34896, "grad_norm": 1.8051155805587769, "learning_rate": 7.2917761882120655e-06, "loss": 0.4082, "step": 4362 }, { "epoch": 0.34904, "grad_norm": 1.8455771207809448, "learning_rate": 7.290658361359883e-06, "loss": 0.3075, "step": 4363 }, { "epoch": 0.34912, "grad_norm": 1.6396703720092773, "learning_rate": 7.28954038958541e-06, "loss": 0.4184, "step": 4364 }, { "epoch": 0.3492, "grad_norm": 1.6618815660476685, "learning_rate": 7.2884222729593765e-06, "loss": 0.3217, "step": 4365 }, { "epoch": 0.34928, "grad_norm": 1.307723045349121, "learning_rate": 7.287304011552524e-06, "loss": 0.3596, "step": 4366 }, { "epoch": 0.34936, "grad_norm": 1.5229804515838623, "learning_rate": 7.2861856054356e-06, "loss": 0.3199, "step": 4367 }, { "epoch": 0.34944, "grad_norm": 1.6636910438537598, "learning_rate": 7.285067054679362e-06, "loss": 0.2866, "step": 4368 }, { "epoch": 0.34952, "grad_norm": 2.0462241172790527, "learning_rate": 7.283948359354578e-06, "loss": 0.4956, "step": 4369 }, { "epoch": 0.3496, "grad_norm": 1.820526361465454, "learning_rate": 7.282829519532022e-06, "loss": 0.369, "step": 4370 }, { "epoch": 0.34968, "grad_norm": 1.3398878574371338, "learning_rate": 7.281710535282482e-06, "loss": 0.3695, "step": 4371 }, { "epoch": 0.34976, "grad_norm": 1.5608426332473755, "learning_rate": 7.280591406676751e-06, "loss": 0.34, "step": 4372 }, { "epoch": 0.34984, "grad_norm": 1.7027915716171265, "learning_rate": 7.279472133785633e-06, "loss": 0.3301, "step": 4373 }, { "epoch": 0.34992, "grad_norm": 1.7262037992477417, "learning_rate": 7.278352716679939e-06, "loss": 0.3066, "step": 4374 }, { "epoch": 0.35, "grad_norm": 1.5613858699798584, "learning_rate": 7.277233155430492e-06, "loss": 0.3567, "step": 4375 }, { "epoch": 0.35008, "grad_norm": 1.6144390106201172, "learning_rate": 7.2761134501081246e-06, "loss": 0.4078, "step": 4376 }, { "epoch": 0.35016, "grad_norm": 1.7558294534683228, "learning_rate": 7.274993600783673e-06, "loss": 0.4121, "step": 4377 }, { "epoch": 0.35024, "grad_norm": 1.5915411710739136, "learning_rate": 7.27387360752799e-06, "loss": 0.3196, "step": 4378 }, { "epoch": 0.35032, "grad_norm": 2.133772850036621, "learning_rate": 7.272753470411931e-06, "loss": 0.4273, "step": 4379 }, { "epoch": 0.3504, "grad_norm": 1.4743467569351196, "learning_rate": 7.271633189506366e-06, "loss": 0.2437, "step": 4380 }, { "epoch": 0.35048, "grad_norm": 1.521664023399353, "learning_rate": 7.270512764882168e-06, "loss": 0.3741, "step": 4381 }, { "epoch": 0.35056, "grad_norm": 1.5323858261108398, "learning_rate": 7.269392196610226e-06, "loss": 0.3979, "step": 4382 }, { "epoch": 0.35064, "grad_norm": 1.616807222366333, "learning_rate": 7.268271484761433e-06, "loss": 0.3299, "step": 4383 }, { "epoch": 0.35072, "grad_norm": 1.7666263580322266, "learning_rate": 7.267150629406694e-06, "loss": 0.4004, "step": 4384 }, { "epoch": 0.3508, "grad_norm": 1.719724178314209, "learning_rate": 7.266029630616918e-06, "loss": 0.4081, "step": 4385 }, { "epoch": 0.35088, "grad_norm": 1.9632675647735596, "learning_rate": 7.2649084884630305e-06, "loss": 0.3993, "step": 4386 }, { "epoch": 0.35096, "grad_norm": 1.2466901540756226, "learning_rate": 7.2637872030159616e-06, "loss": 0.2621, "step": 4387 }, { "epoch": 0.35104, "grad_norm": 1.5172184705734253, "learning_rate": 7.262665774346651e-06, "loss": 0.3656, "step": 4388 }, { "epoch": 0.35112, "grad_norm": 1.544595718383789, "learning_rate": 7.261544202526047e-06, "loss": 0.4009, "step": 4389 }, { "epoch": 0.3512, "grad_norm": 1.762475609779358, "learning_rate": 7.260422487625109e-06, "loss": 0.4339, "step": 4390 }, { "epoch": 0.35128, "grad_norm": 1.3777724504470825, "learning_rate": 7.259300629714805e-06, "loss": 0.3057, "step": 4391 }, { "epoch": 0.35136, "grad_norm": 1.9480410814285278, "learning_rate": 7.258178628866108e-06, "loss": 0.493, "step": 4392 }, { "epoch": 0.35144, "grad_norm": 1.656561255455017, "learning_rate": 7.257056485150004e-06, "loss": 0.5685, "step": 4393 }, { "epoch": 0.35152, "grad_norm": 1.3406410217285156, "learning_rate": 7.255934198637492e-06, "loss": 0.321, "step": 4394 }, { "epoch": 0.3516, "grad_norm": 1.745924472808838, "learning_rate": 7.254811769399569e-06, "loss": 0.4519, "step": 4395 }, { "epoch": 0.35168, "grad_norm": 1.6247973442077637, "learning_rate": 7.253689197507252e-06, "loss": 0.3926, "step": 4396 }, { "epoch": 0.35176, "grad_norm": 1.585526943206787, "learning_rate": 7.252566483031558e-06, "loss": 0.32, "step": 4397 }, { "epoch": 0.35184, "grad_norm": 1.8269659280776978, "learning_rate": 7.251443626043521e-06, "loss": 0.442, "step": 4398 }, { "epoch": 0.35192, "grad_norm": 2.0274364948272705, "learning_rate": 7.250320626614178e-06, "loss": 0.3676, "step": 4399 }, { "epoch": 0.352, "grad_norm": 1.7257592678070068, "learning_rate": 7.249197484814579e-06, "loss": 0.4199, "step": 4400 }, { "epoch": 0.35208, "grad_norm": 2.1232810020446777, "learning_rate": 7.2480742007157815e-06, "loss": 0.3919, "step": 4401 }, { "epoch": 0.35216, "grad_norm": 1.9855226278305054, "learning_rate": 7.246950774388851e-06, "loss": 0.5002, "step": 4402 }, { "epoch": 0.35224, "grad_norm": 1.150065541267395, "learning_rate": 7.245827205904864e-06, "loss": 0.2116, "step": 4403 }, { "epoch": 0.35232, "grad_norm": 1.5630650520324707, "learning_rate": 7.244703495334904e-06, "loss": 0.2761, "step": 4404 }, { "epoch": 0.3524, "grad_norm": 1.6066936254501343, "learning_rate": 7.243579642750064e-06, "loss": 0.3575, "step": 4405 }, { "epoch": 0.35248, "grad_norm": 1.361370325088501, "learning_rate": 7.242455648221447e-06, "loss": 0.2968, "step": 4406 }, { "epoch": 0.35256, "grad_norm": 1.8183329105377197, "learning_rate": 7.241331511820165e-06, "loss": 0.3925, "step": 4407 }, { "epoch": 0.35264, "grad_norm": 1.5168720483779907, "learning_rate": 7.240207233617338e-06, "loss": 0.3869, "step": 4408 }, { "epoch": 0.35272, "grad_norm": 1.3194420337677002, "learning_rate": 7.239082813684095e-06, "loss": 0.2716, "step": 4409 }, { "epoch": 0.3528, "grad_norm": 1.8594125509262085, "learning_rate": 7.237958252091573e-06, "loss": 0.4105, "step": 4410 }, { "epoch": 0.35288, "grad_norm": 1.588093638420105, "learning_rate": 7.236833548910922e-06, "loss": 0.3534, "step": 4411 }, { "epoch": 0.35296, "grad_norm": 1.6373640298843384, "learning_rate": 7.235708704213297e-06, "loss": 0.3045, "step": 4412 }, { "epoch": 0.35304, "grad_norm": 2.0101304054260254, "learning_rate": 7.234583718069862e-06, "loss": 0.4869, "step": 4413 }, { "epoch": 0.35312, "grad_norm": 1.357629656791687, "learning_rate": 7.233458590551793e-06, "loss": 0.2507, "step": 4414 }, { "epoch": 0.3532, "grad_norm": 1.851575255393982, "learning_rate": 7.232333321730271e-06, "loss": 0.3451, "step": 4415 }, { "epoch": 0.35328, "grad_norm": 1.6210441589355469, "learning_rate": 7.2312079116764895e-06, "loss": 0.3416, "step": 4416 }, { "epoch": 0.35336, "grad_norm": 1.2965114116668701, "learning_rate": 7.230082360461651e-06, "loss": 0.2877, "step": 4417 }, { "epoch": 0.35344, "grad_norm": 1.6795660257339478, "learning_rate": 7.228956668156961e-06, "loss": 0.4307, "step": 4418 }, { "epoch": 0.35352, "grad_norm": 1.4039353132247925, "learning_rate": 7.2278308348336425e-06, "loss": 0.3429, "step": 4419 }, { "epoch": 0.3536, "grad_norm": 1.2655251026153564, "learning_rate": 7.226704860562921e-06, "loss": 0.3212, "step": 4420 }, { "epoch": 0.35368, "grad_norm": 1.6746938228607178, "learning_rate": 7.225578745416033e-06, "loss": 0.3959, "step": 4421 }, { "epoch": 0.35376, "grad_norm": 2.1023738384246826, "learning_rate": 7.224452489464224e-06, "loss": 0.3883, "step": 4422 }, { "epoch": 0.35384, "grad_norm": 1.2368510961532593, "learning_rate": 7.2233260927787495e-06, "loss": 0.241, "step": 4423 }, { "epoch": 0.35392, "grad_norm": 1.6504780054092407, "learning_rate": 7.222199555430872e-06, "loss": 0.5443, "step": 4424 }, { "epoch": 0.354, "grad_norm": 1.4802947044372559, "learning_rate": 7.221072877491866e-06, "loss": 0.3644, "step": 4425 }, { "epoch": 0.35408, "grad_norm": 1.553727626800537, "learning_rate": 7.219946059033009e-06, "loss": 0.3166, "step": 4426 }, { "epoch": 0.35416, "grad_norm": 1.4172799587249756, "learning_rate": 7.2188191001255935e-06, "loss": 0.3096, "step": 4427 }, { "epoch": 0.35424, "grad_norm": 1.5616695880889893, "learning_rate": 7.2176920008409175e-06, "loss": 0.3423, "step": 4428 }, { "epoch": 0.35432, "grad_norm": 1.895857810974121, "learning_rate": 7.216564761250289e-06, "loss": 0.3599, "step": 4429 }, { "epoch": 0.3544, "grad_norm": 1.2682089805603027, "learning_rate": 7.2154373814250246e-06, "loss": 0.2516, "step": 4430 }, { "epoch": 0.35448, "grad_norm": 1.800615906715393, "learning_rate": 7.2143098614364504e-06, "loss": 0.5255, "step": 4431 }, { "epoch": 0.35456, "grad_norm": 1.2873491048812866, "learning_rate": 7.2131822013559e-06, "loss": 0.2427, "step": 4432 }, { "epoch": 0.35464, "grad_norm": 1.6039749383926392, "learning_rate": 7.212054401254718e-06, "loss": 0.3177, "step": 4433 }, { "epoch": 0.35472, "grad_norm": 1.0338603258132935, "learning_rate": 7.210926461204254e-06, "loss": 0.2171, "step": 4434 }, { "epoch": 0.3548, "grad_norm": 1.7069857120513916, "learning_rate": 7.209798381275871e-06, "loss": 0.3947, "step": 4435 }, { "epoch": 0.35488, "grad_norm": 1.2922322750091553, "learning_rate": 7.208670161540938e-06, "loss": 0.2809, "step": 4436 }, { "epoch": 0.35496, "grad_norm": 1.5950067043304443, "learning_rate": 7.207541802070836e-06, "loss": 0.3408, "step": 4437 }, { "epoch": 0.35504, "grad_norm": 2.009056806564331, "learning_rate": 7.206413302936948e-06, "loss": 0.423, "step": 4438 }, { "epoch": 0.35512, "grad_norm": 1.1245336532592773, "learning_rate": 7.2052846642106754e-06, "loss": 0.2455, "step": 4439 }, { "epoch": 0.3552, "grad_norm": 1.768202781677246, "learning_rate": 7.204155885963421e-06, "loss": 0.3644, "step": 4440 }, { "epoch": 0.35528, "grad_norm": 1.4672619104385376, "learning_rate": 7.203026968266598e-06, "loss": 0.4002, "step": 4441 }, { "epoch": 0.35536, "grad_norm": 1.898700475692749, "learning_rate": 7.201897911191629e-06, "loss": 0.6915, "step": 4442 }, { "epoch": 0.35544, "grad_norm": 1.4047620296478271, "learning_rate": 7.200768714809949e-06, "loss": 0.2622, "step": 4443 }, { "epoch": 0.35552, "grad_norm": 1.597065806388855, "learning_rate": 7.199639379192994e-06, "loss": 0.3354, "step": 4444 }, { "epoch": 0.3556, "grad_norm": 1.5272079706192017, "learning_rate": 7.198509904412216e-06, "loss": 0.3772, "step": 4445 }, { "epoch": 0.35568, "grad_norm": 1.6030325889587402, "learning_rate": 7.197380290539073e-06, "loss": 0.4348, "step": 4446 }, { "epoch": 0.35576, "grad_norm": 1.7476811408996582, "learning_rate": 7.1962505376450305e-06, "loss": 0.3492, "step": 4447 }, { "epoch": 0.35584, "grad_norm": 1.6694729328155518, "learning_rate": 7.195120645801567e-06, "loss": 0.2928, "step": 4448 }, { "epoch": 0.35592, "grad_norm": 1.4422017335891724, "learning_rate": 7.193990615080165e-06, "loss": 0.3645, "step": 4449 }, { "epoch": 0.356, "grad_norm": 1.421806812286377, "learning_rate": 7.192860445552317e-06, "loss": 0.3291, "step": 4450 }, { "epoch": 0.35608, "grad_norm": 1.5557352304458618, "learning_rate": 7.1917301372895265e-06, "loss": 0.321, "step": 4451 }, { "epoch": 0.35616, "grad_norm": 1.34674870967865, "learning_rate": 7.190599690363303e-06, "loss": 0.3187, "step": 4452 }, { "epoch": 0.35624, "grad_norm": 1.7821531295776367, "learning_rate": 7.189469104845167e-06, "loss": 0.3548, "step": 4453 }, { "epoch": 0.35632, "grad_norm": 1.5039072036743164, "learning_rate": 7.1883383808066474e-06, "loss": 0.3209, "step": 4454 }, { "epoch": 0.3564, "grad_norm": 1.5421476364135742, "learning_rate": 7.187207518319281e-06, "loss": 0.3517, "step": 4455 }, { "epoch": 0.35648, "grad_norm": 1.7966395616531372, "learning_rate": 7.186076517454612e-06, "loss": 0.5422, "step": 4456 }, { "epoch": 0.35656, "grad_norm": 1.0441017150878906, "learning_rate": 7.184945378284196e-06, "loss": 0.2174, "step": 4457 }, { "epoch": 0.35664, "grad_norm": 1.6243857145309448, "learning_rate": 7.1838141008795985e-06, "loss": 0.3314, "step": 4458 }, { "epoch": 0.35672, "grad_norm": 1.5501540899276733, "learning_rate": 7.182682685312389e-06, "loss": 0.3994, "step": 4459 }, { "epoch": 0.3568, "grad_norm": 1.3322809934616089, "learning_rate": 7.181551131654149e-06, "loss": 0.2615, "step": 4460 }, { "epoch": 0.35688, "grad_norm": 1.8182624578475952, "learning_rate": 7.1804194399764695e-06, "loss": 0.4124, "step": 4461 }, { "epoch": 0.35696, "grad_norm": 1.7917581796646118, "learning_rate": 7.179287610350947e-06, "loss": 0.3101, "step": 4462 }, { "epoch": 0.35704, "grad_norm": 1.4548834562301636, "learning_rate": 7.1781556428491895e-06, "loss": 0.3088, "step": 4463 }, { "epoch": 0.35712, "grad_norm": 1.8527675867080688, "learning_rate": 7.177023537542812e-06, "loss": 0.4651, "step": 4464 }, { "epoch": 0.3572, "grad_norm": 2.1842334270477295, "learning_rate": 7.17589129450344e-06, "loss": 0.4734, "step": 4465 }, { "epoch": 0.35728, "grad_norm": 1.522463321685791, "learning_rate": 7.174758913802707e-06, "loss": 0.3419, "step": 4466 }, { "epoch": 0.35736, "grad_norm": 1.5129618644714355, "learning_rate": 7.173626395512253e-06, "loss": 0.3018, "step": 4467 }, { "epoch": 0.35744, "grad_norm": 1.5700559616088867, "learning_rate": 7.172493739703731e-06, "loss": 0.3354, "step": 4468 }, { "epoch": 0.35752, "grad_norm": 1.571109414100647, "learning_rate": 7.171360946448799e-06, "loss": 0.4211, "step": 4469 }, { "epoch": 0.3576, "grad_norm": 1.8297759294509888, "learning_rate": 7.170228015819125e-06, "loss": 0.4264, "step": 4470 }, { "epoch": 0.35768, "grad_norm": 1.6647297143936157, "learning_rate": 7.169094947886386e-06, "loss": 0.3417, "step": 4471 }, { "epoch": 0.35776, "grad_norm": 1.950027585029602, "learning_rate": 7.167961742722268e-06, "loss": 0.3798, "step": 4472 }, { "epoch": 0.35784, "grad_norm": 1.2567858695983887, "learning_rate": 7.166828400398465e-06, "loss": 0.2779, "step": 4473 }, { "epoch": 0.35792, "grad_norm": 1.0137466192245483, "learning_rate": 7.165694920986679e-06, "loss": 0.2356, "step": 4474 }, { "epoch": 0.358, "grad_norm": 1.2423343658447266, "learning_rate": 7.16456130455862e-06, "loss": 0.2513, "step": 4475 }, { "epoch": 0.35808, "grad_norm": 1.8165005445480347, "learning_rate": 7.163427551186012e-06, "loss": 0.3987, "step": 4476 }, { "epoch": 0.35816, "grad_norm": 1.666602373123169, "learning_rate": 7.1622936609405804e-06, "loss": 0.3845, "step": 4477 }, { "epoch": 0.35824, "grad_norm": 1.850458025932312, "learning_rate": 7.161159633894065e-06, "loss": 0.3629, "step": 4478 }, { "epoch": 0.35832, "grad_norm": 1.5257556438446045, "learning_rate": 7.1600254701182106e-06, "loss": 0.4075, "step": 4479 }, { "epoch": 0.3584, "grad_norm": 2.071976661682129, "learning_rate": 7.158891169684772e-06, "loss": 0.394, "step": 4480 }, { "epoch": 0.35848, "grad_norm": 1.8309593200683594, "learning_rate": 7.157756732665512e-06, "loss": 0.3949, "step": 4481 }, { "epoch": 0.35856, "grad_norm": 1.468177080154419, "learning_rate": 7.156622159132204e-06, "loss": 0.3528, "step": 4482 }, { "epoch": 0.35864, "grad_norm": 1.4701149463653564, "learning_rate": 7.1554874491566274e-06, "loss": 0.334, "step": 4483 }, { "epoch": 0.35872, "grad_norm": 1.1661218404769897, "learning_rate": 7.1543526028105735e-06, "loss": 0.2672, "step": 4484 }, { "epoch": 0.3588, "grad_norm": 1.2732799053192139, "learning_rate": 7.153217620165838e-06, "loss": 0.2915, "step": 4485 }, { "epoch": 0.35888, "grad_norm": 1.7382800579071045, "learning_rate": 7.15208250129423e-06, "loss": 0.296, "step": 4486 }, { "epoch": 0.35896, "grad_norm": 1.537358283996582, "learning_rate": 7.150947246267561e-06, "loss": 0.3903, "step": 4487 }, { "epoch": 0.35904, "grad_norm": 1.4270464181900024, "learning_rate": 7.1498118551576574e-06, "loss": 0.3617, "step": 4488 }, { "epoch": 0.35912, "grad_norm": 1.366553544998169, "learning_rate": 7.148676328036352e-06, "loss": 0.3077, "step": 4489 }, { "epoch": 0.3592, "grad_norm": 1.4373116493225098, "learning_rate": 7.1475406649754845e-06, "loss": 0.3871, "step": 4490 }, { "epoch": 0.35928, "grad_norm": 1.6480387449264526, "learning_rate": 7.146404866046903e-06, "loss": 0.3344, "step": 4491 }, { "epoch": 0.35936, "grad_norm": 1.5638269186019897, "learning_rate": 7.145268931322469e-06, "loss": 0.301, "step": 4492 }, { "epoch": 0.35944, "grad_norm": 1.2544904947280884, "learning_rate": 7.144132860874047e-06, "loss": 0.2363, "step": 4493 }, { "epoch": 0.35952, "grad_norm": 1.566571593284607, "learning_rate": 7.142996654773514e-06, "loss": 0.3634, "step": 4494 }, { "epoch": 0.3596, "grad_norm": 2.4747540950775146, "learning_rate": 7.141860313092753e-06, "loss": 0.5304, "step": 4495 }, { "epoch": 0.35968, "grad_norm": 1.7263084650039673, "learning_rate": 7.1407238359036565e-06, "loss": 0.3384, "step": 4496 }, { "epoch": 0.35976, "grad_norm": 1.6196907758712769, "learning_rate": 7.139587223278127e-06, "loss": 0.3003, "step": 4497 }, { "epoch": 0.35984, "grad_norm": 0.9107118844985962, "learning_rate": 7.138450475288072e-06, "loss": 0.163, "step": 4498 }, { "epoch": 0.35992, "grad_norm": 1.3455191850662231, "learning_rate": 7.1373135920054104e-06, "loss": 0.308, "step": 4499 }, { "epoch": 0.36, "grad_norm": 1.6801081895828247, "learning_rate": 7.1361765735020695e-06, "loss": 0.3765, "step": 4500 }, { "epoch": 0.36008, "grad_norm": 1.4999953508377075, "learning_rate": 7.135039419849984e-06, "loss": 0.2996, "step": 4501 }, { "epoch": 0.36016, "grad_norm": 1.4781824350357056, "learning_rate": 7.1339021311211e-06, "loss": 0.3048, "step": 4502 }, { "epoch": 0.36024, "grad_norm": 1.7782272100448608, "learning_rate": 7.1327647073873665e-06, "loss": 0.4415, "step": 4503 }, { "epoch": 0.36032, "grad_norm": 1.6102408170700073, "learning_rate": 7.131627148720746e-06, "loss": 0.3111, "step": 4504 }, { "epoch": 0.3604, "grad_norm": 1.8156652450561523, "learning_rate": 7.130489455193208e-06, "loss": 0.2951, "step": 4505 }, { "epoch": 0.36048, "grad_norm": 1.3658769130706787, "learning_rate": 7.129351626876733e-06, "loss": 0.348, "step": 4506 }, { "epoch": 0.36056, "grad_norm": 1.5961403846740723, "learning_rate": 7.128213663843304e-06, "loss": 0.3197, "step": 4507 }, { "epoch": 0.36064, "grad_norm": 1.7644686698913574, "learning_rate": 7.127075566164919e-06, "loss": 0.3424, "step": 4508 }, { "epoch": 0.36072, "grad_norm": 1.4091618061065674, "learning_rate": 7.125937333913577e-06, "loss": 0.3399, "step": 4509 }, { "epoch": 0.3608, "grad_norm": 2.0317022800445557, "learning_rate": 7.124798967161296e-06, "loss": 0.4621, "step": 4510 }, { "epoch": 0.36088, "grad_norm": 2.1083295345306396, "learning_rate": 7.123660465980093e-06, "loss": 0.4287, "step": 4511 }, { "epoch": 0.36096, "grad_norm": 2.015735626220703, "learning_rate": 7.122521830441998e-06, "loss": 0.4459, "step": 4512 }, { "epoch": 0.36104, "grad_norm": 1.581146001815796, "learning_rate": 7.121383060619048e-06, "loss": 0.3427, "step": 4513 }, { "epoch": 0.36112, "grad_norm": 1.9534862041473389, "learning_rate": 7.120244156583291e-06, "loss": 0.4336, "step": 4514 }, { "epoch": 0.3612, "grad_norm": 1.0503820180892944, "learning_rate": 7.11910511840678e-06, "loss": 0.2304, "step": 4515 }, { "epoch": 0.36128, "grad_norm": 1.4276210069656372, "learning_rate": 7.11796594616158e-06, "loss": 0.3487, "step": 4516 }, { "epoch": 0.36136, "grad_norm": 1.9091681241989136, "learning_rate": 7.116826639919761e-06, "loss": 0.4407, "step": 4517 }, { "epoch": 0.36144, "grad_norm": 1.8984267711639404, "learning_rate": 7.115687199753403e-06, "loss": 0.4895, "step": 4518 }, { "epoch": 0.36152, "grad_norm": 1.8324373960494995, "learning_rate": 7.114547625734593e-06, "loss": 0.384, "step": 4519 }, { "epoch": 0.3616, "grad_norm": 1.296120524406433, "learning_rate": 7.113407917935433e-06, "loss": 0.2754, "step": 4520 }, { "epoch": 0.36168, "grad_norm": 1.7863622903823853, "learning_rate": 7.112268076428025e-06, "loss": 0.3302, "step": 4521 }, { "epoch": 0.36176, "grad_norm": 2.1770036220550537, "learning_rate": 7.1111281012844825e-06, "loss": 0.3736, "step": 4522 }, { "epoch": 0.36184, "grad_norm": 1.410852074623108, "learning_rate": 7.109987992576929e-06, "loss": 0.3158, "step": 4523 }, { "epoch": 0.36192, "grad_norm": 1.6189733743667603, "learning_rate": 7.108847750377494e-06, "loss": 0.4493, "step": 4524 }, { "epoch": 0.362, "grad_norm": 1.5762578248977661, "learning_rate": 7.107707374758321e-06, "loss": 0.3384, "step": 4525 }, { "epoch": 0.36208, "grad_norm": 1.3969612121582031, "learning_rate": 7.106566865791553e-06, "loss": 0.3198, "step": 4526 }, { "epoch": 0.36216, "grad_norm": 1.4169987440109253, "learning_rate": 7.105426223549349e-06, "loss": 0.3192, "step": 4527 }, { "epoch": 0.36224, "grad_norm": 1.601337194442749, "learning_rate": 7.104285448103871e-06, "loss": 0.4275, "step": 4528 }, { "epoch": 0.36232, "grad_norm": 1.4484046697616577, "learning_rate": 7.103144539527295e-06, "loss": 0.2617, "step": 4529 }, { "epoch": 0.3624, "grad_norm": 1.1574383974075317, "learning_rate": 7.1020034978918006e-06, "loss": 0.248, "step": 4530 }, { "epoch": 0.36248, "grad_norm": 1.2772170305252075, "learning_rate": 7.100862323269579e-06, "loss": 0.2806, "step": 4531 }, { "epoch": 0.36256, "grad_norm": 1.5458264350891113, "learning_rate": 7.099721015732828e-06, "loss": 0.4107, "step": 4532 }, { "epoch": 0.36264, "grad_norm": 1.3658666610717773, "learning_rate": 7.0985795753537525e-06, "loss": 0.3861, "step": 4533 }, { "epoch": 0.36272, "grad_norm": 1.4563086032867432, "learning_rate": 7.097438002204568e-06, "loss": 0.3225, "step": 4534 }, { "epoch": 0.3628, "grad_norm": 1.7949572801589966, "learning_rate": 7.096296296357502e-06, "loss": 0.5492, "step": 4535 }, { "epoch": 0.36288, "grad_norm": 1.9033316373825073, "learning_rate": 7.095154457884782e-06, "loss": 0.3523, "step": 4536 }, { "epoch": 0.36296, "grad_norm": 1.7623441219329834, "learning_rate": 7.094012486858652e-06, "loss": 0.4402, "step": 4537 }, { "epoch": 0.36304, "grad_norm": 1.6129101514816284, "learning_rate": 7.092870383351355e-06, "loss": 0.4225, "step": 4538 }, { "epoch": 0.36312, "grad_norm": 1.721638798713684, "learning_rate": 7.091728147435154e-06, "loss": 0.3564, "step": 4539 }, { "epoch": 0.3632, "grad_norm": 1.588778018951416, "learning_rate": 7.090585779182311e-06, "loss": 0.3925, "step": 4540 }, { "epoch": 0.36328, "grad_norm": 1.32707941532135, "learning_rate": 7.089443278665102e-06, "loss": 0.324, "step": 4541 }, { "epoch": 0.36336, "grad_norm": 2.0091614723205566, "learning_rate": 7.088300645955808e-06, "loss": 0.405, "step": 4542 }, { "epoch": 0.36344, "grad_norm": 1.9140995740890503, "learning_rate": 7.087157881126719e-06, "loss": 0.4189, "step": 4543 }, { "epoch": 0.36352, "grad_norm": 1.6873313188552856, "learning_rate": 7.0860149842501345e-06, "loss": 0.2981, "step": 4544 }, { "epoch": 0.3636, "grad_norm": 1.7330468893051147, "learning_rate": 7.084871955398361e-06, "loss": 0.4079, "step": 4545 }, { "epoch": 0.36368, "grad_norm": 1.319456934928894, "learning_rate": 7.083728794643716e-06, "loss": 0.2838, "step": 4546 }, { "epoch": 0.36376, "grad_norm": 1.4083654880523682, "learning_rate": 7.082585502058522e-06, "loss": 0.2926, "step": 4547 }, { "epoch": 0.36384, "grad_norm": 1.7500334978103638, "learning_rate": 7.0814420777151115e-06, "loss": 0.4215, "step": 4548 }, { "epoch": 0.36392, "grad_norm": 2.052196979522705, "learning_rate": 7.080298521685826e-06, "loss": 0.4126, "step": 4549 }, { "epoch": 0.364, "grad_norm": 1.5984829664230347, "learning_rate": 7.0791548340430125e-06, "loss": 0.3908, "step": 4550 }, { "epoch": 0.36408, "grad_norm": 1.5750199556350708, "learning_rate": 7.0780110148590305e-06, "loss": 0.4188, "step": 4551 }, { "epoch": 0.36416, "grad_norm": 1.4217820167541504, "learning_rate": 7.076867064206244e-06, "loss": 0.2855, "step": 4552 }, { "epoch": 0.36424, "grad_norm": 1.2456365823745728, "learning_rate": 7.0757229821570285e-06, "loss": 0.3137, "step": 4553 }, { "epoch": 0.36432, "grad_norm": 1.511091947555542, "learning_rate": 7.074578768783764e-06, "loss": 0.3978, "step": 4554 }, { "epoch": 0.3644, "grad_norm": 1.2580100297927856, "learning_rate": 7.073434424158845e-06, "loss": 0.2876, "step": 4555 }, { "epoch": 0.36448, "grad_norm": 1.751238465309143, "learning_rate": 7.072289948354665e-06, "loss": 0.3838, "step": 4556 }, { "epoch": 0.36456, "grad_norm": 1.7836511135101318, "learning_rate": 7.071145341443635e-06, "loss": 0.418, "step": 4557 }, { "epoch": 0.36464, "grad_norm": 1.676069974899292, "learning_rate": 7.070000603498169e-06, "loss": 0.4017, "step": 4558 }, { "epoch": 0.36472, "grad_norm": 1.334571123123169, "learning_rate": 7.06885573459069e-06, "loss": 0.2776, "step": 4559 }, { "epoch": 0.3648, "grad_norm": 1.4708949327468872, "learning_rate": 7.067710734793631e-06, "loss": 0.2896, "step": 4560 }, { "epoch": 0.36488, "grad_norm": 1.422624945640564, "learning_rate": 7.066565604179435e-06, "loss": 0.3168, "step": 4561 }, { "epoch": 0.36496, "grad_norm": 1.2501122951507568, "learning_rate": 7.065420342820546e-06, "loss": 0.2806, "step": 4562 }, { "epoch": 0.36504, "grad_norm": 1.5056577920913696, "learning_rate": 7.064274950789424e-06, "loss": 0.2764, "step": 4563 }, { "epoch": 0.36512, "grad_norm": 1.5189259052276611, "learning_rate": 7.063129428158533e-06, "loss": 0.3423, "step": 4564 }, { "epoch": 0.3652, "grad_norm": 1.5131878852844238, "learning_rate": 7.061983775000345e-06, "loss": 0.3619, "step": 4565 }, { "epoch": 0.36528, "grad_norm": 1.3569074869155884, "learning_rate": 7.0608379913873444e-06, "loss": 0.2639, "step": 4566 }, { "epoch": 0.36536, "grad_norm": 1.1969329118728638, "learning_rate": 7.059692077392018e-06, "loss": 0.2341, "step": 4567 }, { "epoch": 0.36544, "grad_norm": 1.7154649496078491, "learning_rate": 7.058546033086867e-06, "loss": 0.4684, "step": 4568 }, { "epoch": 0.36552, "grad_norm": 1.4121015071868896, "learning_rate": 7.057399858544396e-06, "loss": 0.3091, "step": 4569 }, { "epoch": 0.3656, "grad_norm": 1.7438020706176758, "learning_rate": 7.056253553837119e-06, "loss": 0.4294, "step": 4570 }, { "epoch": 0.36568, "grad_norm": 1.392918586730957, "learning_rate": 7.0551071190375605e-06, "loss": 0.3318, "step": 4571 }, { "epoch": 0.36576, "grad_norm": 1.6059353351593018, "learning_rate": 7.053960554218252e-06, "loss": 0.3442, "step": 4572 }, { "epoch": 0.36584, "grad_norm": 1.400192379951477, "learning_rate": 7.052813859451731e-06, "loss": 0.3341, "step": 4573 }, { "epoch": 0.36592, "grad_norm": 1.4998594522476196, "learning_rate": 7.0516670348105455e-06, "loss": 0.365, "step": 4574 }, { "epoch": 0.366, "grad_norm": 1.7953191995620728, "learning_rate": 7.050520080367254e-06, "loss": 0.3027, "step": 4575 }, { "epoch": 0.36608, "grad_norm": 2.084156036376953, "learning_rate": 7.049372996194415e-06, "loss": 0.43, "step": 4576 }, { "epoch": 0.36616, "grad_norm": 1.0851385593414307, "learning_rate": 7.048225782364605e-06, "loss": 0.244, "step": 4577 }, { "epoch": 0.36624, "grad_norm": 1.446227788925171, "learning_rate": 7.047078438950403e-06, "loss": 0.3054, "step": 4578 }, { "epoch": 0.36632, "grad_norm": 1.2726980447769165, "learning_rate": 7.0459309660243976e-06, "loss": 0.292, "step": 4579 }, { "epoch": 0.3664, "grad_norm": 1.6056383848190308, "learning_rate": 7.044783363659185e-06, "loss": 0.3092, "step": 4580 }, { "epoch": 0.36648, "grad_norm": 1.5209200382232666, "learning_rate": 7.043635631927372e-06, "loss": 0.3111, "step": 4581 }, { "epoch": 0.36656, "grad_norm": 1.5532723665237427, "learning_rate": 7.04248777090157e-06, "loss": 0.339, "step": 4582 }, { "epoch": 0.36664, "grad_norm": 2.0113229751586914, "learning_rate": 7.041339780654401e-06, "loss": 0.3921, "step": 4583 }, { "epoch": 0.36672, "grad_norm": 1.3075275421142578, "learning_rate": 7.040191661258495e-06, "loss": 0.346, "step": 4584 }, { "epoch": 0.3668, "grad_norm": 1.2967735528945923, "learning_rate": 7.039043412786489e-06, "loss": 0.3008, "step": 4585 }, { "epoch": 0.36688, "grad_norm": 1.4599136114120483, "learning_rate": 7.037895035311029e-06, "loss": 0.337, "step": 4586 }, { "epoch": 0.36696, "grad_norm": 1.6987855434417725, "learning_rate": 7.0367465289047685e-06, "loss": 0.3539, "step": 4587 }, { "epoch": 0.36704, "grad_norm": 1.5327343940734863, "learning_rate": 7.03559789364037e-06, "loss": 0.4788, "step": 4588 }, { "epoch": 0.36712, "grad_norm": 1.7550090551376343, "learning_rate": 7.034449129590504e-06, "loss": 0.3794, "step": 4589 }, { "epoch": 0.3672, "grad_norm": 1.623400092124939, "learning_rate": 7.033300236827849e-06, "loss": 0.323, "step": 4590 }, { "epoch": 0.36728, "grad_norm": 1.9743831157684326, "learning_rate": 7.032151215425092e-06, "loss": 0.3837, "step": 4591 }, { "epoch": 0.36736, "grad_norm": 1.4057821035385132, "learning_rate": 7.0310020654549264e-06, "loss": 0.289, "step": 4592 }, { "epoch": 0.36744, "grad_norm": 1.7358758449554443, "learning_rate": 7.029852786990056e-06, "loss": 0.3107, "step": 4593 }, { "epoch": 0.36752, "grad_norm": 1.3099263906478882, "learning_rate": 7.028703380103192e-06, "loss": 0.2482, "step": 4594 }, { "epoch": 0.3676, "grad_norm": 1.3868745565414429, "learning_rate": 7.027553844867052e-06, "loss": 0.3824, "step": 4595 }, { "epoch": 0.36768, "grad_norm": 1.7561724185943604, "learning_rate": 7.0264041813543655e-06, "loss": 0.3757, "step": 4596 }, { "epoch": 0.36776, "grad_norm": 1.4879529476165771, "learning_rate": 7.0252543896378666e-06, "loss": 0.3778, "step": 4597 }, { "epoch": 0.36784, "grad_norm": 1.437186360359192, "learning_rate": 7.024104469790301e-06, "loss": 0.3313, "step": 4598 }, { "epoch": 0.36792, "grad_norm": 1.691285252571106, "learning_rate": 7.022954421884415e-06, "loss": 0.4338, "step": 4599 }, { "epoch": 0.368, "grad_norm": 1.7200967073440552, "learning_rate": 7.021804245992973e-06, "loss": 0.3275, "step": 4600 }, { "epoch": 0.36808, "grad_norm": 1.6157788038253784, "learning_rate": 7.020653942188741e-06, "loss": 0.4735, "step": 4601 }, { "epoch": 0.36816, "grad_norm": 1.5696213245391846, "learning_rate": 7.019503510544496e-06, "loss": 0.3177, "step": 4602 }, { "epoch": 0.36824, "grad_norm": 1.3805010318756104, "learning_rate": 7.01835295113302e-06, "loss": 0.2877, "step": 4603 }, { "epoch": 0.36832, "grad_norm": 2.140430212020874, "learning_rate": 7.017202264027108e-06, "loss": 0.5055, "step": 4604 }, { "epoch": 0.3684, "grad_norm": 1.4156843423843384, "learning_rate": 7.016051449299556e-06, "loss": 0.347, "step": 4605 }, { "epoch": 0.36848, "grad_norm": 1.9364955425262451, "learning_rate": 7.014900507023175e-06, "loss": 0.3778, "step": 4606 }, { "epoch": 0.36856, "grad_norm": 1.7083888053894043, "learning_rate": 7.013749437270781e-06, "loss": 0.3028, "step": 4607 }, { "epoch": 0.36864, "grad_norm": 1.3670393228530884, "learning_rate": 7.012598240115201e-06, "loss": 0.2461, "step": 4608 }, { "epoch": 0.36872, "grad_norm": 1.3490629196166992, "learning_rate": 7.011446915629261e-06, "loss": 0.2549, "step": 4609 }, { "epoch": 0.3688, "grad_norm": 1.7474075555801392, "learning_rate": 7.0102954638858065e-06, "loss": 0.4072, "step": 4610 }, { "epoch": 0.36888, "grad_norm": 1.9252272844314575, "learning_rate": 7.009143884957684e-06, "loss": 0.3497, "step": 4611 }, { "epoch": 0.36896, "grad_norm": 1.2210733890533447, "learning_rate": 7.007992178917751e-06, "loss": 0.2395, "step": 4612 }, { "epoch": 0.36904, "grad_norm": 1.4923193454742432, "learning_rate": 7.0068403458388715e-06, "loss": 0.3063, "step": 4613 }, { "epoch": 0.36912, "grad_norm": 1.8423126935958862, "learning_rate": 7.0056883857939174e-06, "loss": 0.5195, "step": 4614 }, { "epoch": 0.3692, "grad_norm": 1.8047568798065186, "learning_rate": 7.004536298855771e-06, "loss": 0.4048, "step": 4615 }, { "epoch": 0.36928, "grad_norm": 1.594499111175537, "learning_rate": 7.003384085097319e-06, "loss": 0.3086, "step": 4616 }, { "epoch": 0.36936, "grad_norm": 1.4673727750778198, "learning_rate": 7.00223174459146e-06, "loss": 0.2774, "step": 4617 }, { "epoch": 0.36944, "grad_norm": 1.760596752166748, "learning_rate": 7.001079277411098e-06, "loss": 0.3534, "step": 4618 }, { "epoch": 0.36952, "grad_norm": 1.7721569538116455, "learning_rate": 6.9999266836291464e-06, "loss": 0.3507, "step": 4619 }, { "epoch": 0.3696, "grad_norm": 1.4745303392410278, "learning_rate": 6.9987739633185245e-06, "loss": 0.35, "step": 4620 }, { "epoch": 0.36968, "grad_norm": 1.7846648693084717, "learning_rate": 6.9976211165521635e-06, "loss": 0.4137, "step": 4621 }, { "epoch": 0.36976, "grad_norm": 1.6463090181350708, "learning_rate": 6.996468143402997e-06, "loss": 0.3458, "step": 4622 }, { "epoch": 0.36984, "grad_norm": 1.7248306274414062, "learning_rate": 6.995315043943971e-06, "loss": 0.3225, "step": 4623 }, { "epoch": 0.36992, "grad_norm": 2.039926528930664, "learning_rate": 6.9941618182480384e-06, "loss": 0.4484, "step": 4624 }, { "epoch": 0.37, "grad_norm": 1.9294219017028809, "learning_rate": 6.993008466388161e-06, "loss": 0.407, "step": 4625 }, { "epoch": 0.37008, "grad_norm": 1.5425759553909302, "learning_rate": 6.991854988437307e-06, "loss": 0.3614, "step": 4626 }, { "epoch": 0.37016, "grad_norm": 1.3891255855560303, "learning_rate": 6.990701384468451e-06, "loss": 0.2851, "step": 4627 }, { "epoch": 0.37024, "grad_norm": 2.027620792388916, "learning_rate": 6.9895476545545804e-06, "loss": 0.3724, "step": 4628 }, { "epoch": 0.37032, "grad_norm": 1.5156015157699585, "learning_rate": 6.988393798768685e-06, "loss": 0.3557, "step": 4629 }, { "epoch": 0.3704, "grad_norm": 1.4785630702972412, "learning_rate": 6.987239817183769e-06, "loss": 0.3771, "step": 4630 }, { "epoch": 0.37048, "grad_norm": 1.4804576635360718, "learning_rate": 6.98608570987284e-06, "loss": 0.4071, "step": 4631 }, { "epoch": 0.37056, "grad_norm": 1.5395426750183105, "learning_rate": 6.984931476908911e-06, "loss": 0.337, "step": 4632 }, { "epoch": 0.37064, "grad_norm": 1.7918102741241455, "learning_rate": 6.983777118365011e-06, "loss": 0.4346, "step": 4633 }, { "epoch": 0.37072, "grad_norm": 1.6034590005874634, "learning_rate": 6.982622634314171e-06, "loss": 0.3478, "step": 4634 }, { "epoch": 0.3708, "grad_norm": 1.6067657470703125, "learning_rate": 6.981468024829428e-06, "loss": 0.4161, "step": 4635 }, { "epoch": 0.37088, "grad_norm": 1.1507148742675781, "learning_rate": 6.980313289983836e-06, "loss": 0.2877, "step": 4636 }, { "epoch": 0.37096, "grad_norm": 1.345964789390564, "learning_rate": 6.979158429850448e-06, "loss": 0.2516, "step": 4637 }, { "epoch": 0.37104, "grad_norm": 1.6395801305770874, "learning_rate": 6.978003444502326e-06, "loss": 0.4643, "step": 4638 }, { "epoch": 0.37112, "grad_norm": 1.3134948015213013, "learning_rate": 6.976848334012546e-06, "loss": 0.3187, "step": 4639 }, { "epoch": 0.3712, "grad_norm": 1.9457036256790161, "learning_rate": 6.975693098454186e-06, "loss": 0.4266, "step": 4640 }, { "epoch": 0.37128, "grad_norm": 1.6113479137420654, "learning_rate": 6.974537737900336e-06, "loss": 0.3117, "step": 4641 }, { "epoch": 0.37136, "grad_norm": 1.1665359735488892, "learning_rate": 6.973382252424088e-06, "loss": 0.2507, "step": 4642 }, { "epoch": 0.37144, "grad_norm": 1.7264834642410278, "learning_rate": 6.9722266420985495e-06, "loss": 0.4146, "step": 4643 }, { "epoch": 0.37152, "grad_norm": 2.008244276046753, "learning_rate": 6.97107090699683e-06, "loss": 0.3615, "step": 4644 }, { "epoch": 0.3716, "grad_norm": 1.2290860414505005, "learning_rate": 6.969915047192049e-06, "loss": 0.2695, "step": 4645 }, { "epoch": 0.37168, "grad_norm": 1.5274767875671387, "learning_rate": 6.968759062757334e-06, "loss": 0.281, "step": 4646 }, { "epoch": 0.37176, "grad_norm": 1.9756278991699219, "learning_rate": 6.967602953765821e-06, "loss": 0.3648, "step": 4647 }, { "epoch": 0.37184, "grad_norm": 1.4363765716552734, "learning_rate": 6.966446720290652e-06, "loss": 0.3549, "step": 4648 }, { "epoch": 0.37192, "grad_norm": 1.5073497295379639, "learning_rate": 6.9652903624049804e-06, "loss": 0.3297, "step": 4649 }, { "epoch": 0.372, "grad_norm": 1.4804567098617554, "learning_rate": 6.964133880181962e-06, "loss": 0.3181, "step": 4650 }, { "epoch": 0.37208, "grad_norm": 1.5861263275146484, "learning_rate": 6.962977273694765e-06, "loss": 0.3307, "step": 4651 }, { "epoch": 0.37216, "grad_norm": 2.007474422454834, "learning_rate": 6.961820543016565e-06, "loss": 0.3815, "step": 4652 }, { "epoch": 0.37224, "grad_norm": 1.6723278760910034, "learning_rate": 6.960663688220543e-06, "loss": 0.3828, "step": 4653 }, { "epoch": 0.37232, "grad_norm": 1.481465458869934, "learning_rate": 6.959506709379891e-06, "loss": 0.2932, "step": 4654 }, { "epoch": 0.3724, "grad_norm": 1.6673827171325684, "learning_rate": 6.958349606567806e-06, "loss": 0.3636, "step": 4655 }, { "epoch": 0.37248, "grad_norm": 2.048527956008911, "learning_rate": 6.957192379857493e-06, "loss": 0.45, "step": 4656 }, { "epoch": 0.37256, "grad_norm": 1.8853121995925903, "learning_rate": 6.9560350293221655e-06, "loss": 0.3628, "step": 4657 }, { "epoch": 0.37264, "grad_norm": 1.4432258605957031, "learning_rate": 6.954877555035049e-06, "loss": 0.2615, "step": 4658 }, { "epoch": 0.37272, "grad_norm": 1.38565993309021, "learning_rate": 6.953719957069369e-06, "loss": 0.2794, "step": 4659 }, { "epoch": 0.3728, "grad_norm": 2.1929290294647217, "learning_rate": 6.952562235498366e-06, "loss": 0.5944, "step": 4660 }, { "epoch": 0.37288, "grad_norm": 1.4748060703277588, "learning_rate": 6.951404390395282e-06, "loss": 0.2881, "step": 4661 }, { "epoch": 0.37296, "grad_norm": 1.4496852159500122, "learning_rate": 6.950246421833373e-06, "loss": 0.2684, "step": 4662 }, { "epoch": 0.37304, "grad_norm": 1.4460296630859375, "learning_rate": 6.949088329885898e-06, "loss": 0.2797, "step": 4663 }, { "epoch": 0.37312, "grad_norm": 1.7262259721755981, "learning_rate": 6.947930114626125e-06, "loss": 0.3871, "step": 4664 }, { "epoch": 0.3732, "grad_norm": 1.6757460832595825, "learning_rate": 6.946771776127334e-06, "loss": 0.2729, "step": 4665 }, { "epoch": 0.37328, "grad_norm": 1.40213143825531, "learning_rate": 6.945613314462804e-06, "loss": 0.3466, "step": 4666 }, { "epoch": 0.37336, "grad_norm": 2.0440454483032227, "learning_rate": 6.9444547297058315e-06, "loss": 0.4531, "step": 4667 }, { "epoch": 0.37344, "grad_norm": 1.950875163078308, "learning_rate": 6.943296021929713e-06, "loss": 0.4574, "step": 4668 }, { "epoch": 0.37352, "grad_norm": 1.5116314888000488, "learning_rate": 6.9421371912077585e-06, "loss": 0.4164, "step": 4669 }, { "epoch": 0.3736, "grad_norm": 2.183765172958374, "learning_rate": 6.9409782376132805e-06, "loss": 0.5813, "step": 4670 }, { "epoch": 0.37368, "grad_norm": 1.54366934299469, "learning_rate": 6.9398191612196055e-06, "loss": 0.3999, "step": 4671 }, { "epoch": 0.37376, "grad_norm": 1.3263498544692993, "learning_rate": 6.938659962100064e-06, "loss": 0.2936, "step": 4672 }, { "epoch": 0.37384, "grad_norm": 1.3171329498291016, "learning_rate": 6.937500640327992e-06, "loss": 0.3129, "step": 4673 }, { "epoch": 0.37392, "grad_norm": 1.4824219942092896, "learning_rate": 6.936341195976737e-06, "loss": 0.2798, "step": 4674 }, { "epoch": 0.374, "grad_norm": 1.6193792819976807, "learning_rate": 6.935181629119654e-06, "loss": 0.3232, "step": 4675 }, { "epoch": 0.37408, "grad_norm": 2.0140795707702637, "learning_rate": 6.934021939830104e-06, "loss": 0.4507, "step": 4676 }, { "epoch": 0.37416, "grad_norm": 1.3184332847595215, "learning_rate": 6.932862128181459e-06, "loss": 0.3604, "step": 4677 }, { "epoch": 0.37424, "grad_norm": 1.5860306024551392, "learning_rate": 6.931702194247094e-06, "loss": 0.3256, "step": 4678 }, { "epoch": 0.37432, "grad_norm": 1.9520553350448608, "learning_rate": 6.930542138100393e-06, "loss": 0.416, "step": 4679 }, { "epoch": 0.3744, "grad_norm": 1.1498091220855713, "learning_rate": 6.929381959814751e-06, "loss": 0.3029, "step": 4680 }, { "epoch": 0.37448, "grad_norm": 1.5657634735107422, "learning_rate": 6.928221659463568e-06, "loss": 0.3745, "step": 4681 }, { "epoch": 0.37456, "grad_norm": 1.403543472290039, "learning_rate": 6.9270612371202524e-06, "loss": 0.3247, "step": 4682 }, { "epoch": 0.37464, "grad_norm": 1.869012475013733, "learning_rate": 6.925900692858222e-06, "loss": 0.3587, "step": 4683 }, { "epoch": 0.37472, "grad_norm": 1.9943881034851074, "learning_rate": 6.924740026750898e-06, "loss": 0.4173, "step": 4684 }, { "epoch": 0.3748, "grad_norm": 1.7604013681411743, "learning_rate": 6.923579238871712e-06, "loss": 0.3363, "step": 4685 }, { "epoch": 0.37488, "grad_norm": 1.7223265171051025, "learning_rate": 6.922418329294104e-06, "loss": 0.3656, "step": 4686 }, { "epoch": 0.37496, "grad_norm": 1.5790249109268188, "learning_rate": 6.921257298091522e-06, "loss": 0.3046, "step": 4687 }, { "epoch": 0.37504, "grad_norm": 1.5063668489456177, "learning_rate": 6.920096145337418e-06, "loss": 0.2886, "step": 4688 }, { "epoch": 0.37512, "grad_norm": 1.7410809993743896, "learning_rate": 6.9189348711052565e-06, "loss": 0.3991, "step": 4689 }, { "epoch": 0.3752, "grad_norm": 1.309981346130371, "learning_rate": 6.9177734754685055e-06, "loss": 0.2905, "step": 4690 }, { "epoch": 0.37528, "grad_norm": 1.7682570219039917, "learning_rate": 6.916611958500644e-06, "loss": 0.3659, "step": 4691 }, { "epoch": 0.37536, "grad_norm": 1.6350557804107666, "learning_rate": 6.9154503202751564e-06, "loss": 0.3403, "step": 4692 }, { "epoch": 0.37544, "grad_norm": 1.6600843667984009, "learning_rate": 6.914288560865536e-06, "loss": 0.3286, "step": 4693 }, { "epoch": 0.37552, "grad_norm": 1.66802179813385, "learning_rate": 6.913126680345285e-06, "loss": 0.4507, "step": 4694 }, { "epoch": 0.3756, "grad_norm": 2.0192372798919678, "learning_rate": 6.911964678787908e-06, "loss": 0.459, "step": 4695 }, { "epoch": 0.37568, "grad_norm": 1.459499478340149, "learning_rate": 6.910802556266927e-06, "loss": 0.279, "step": 4696 }, { "epoch": 0.37576, "grad_norm": 2.2443490028381348, "learning_rate": 6.909640312855859e-06, "loss": 0.3926, "step": 4697 }, { "epoch": 0.37584, "grad_norm": 1.5917470455169678, "learning_rate": 6.9084779486282385e-06, "loss": 0.3271, "step": 4698 }, { "epoch": 0.37592, "grad_norm": 1.4331306219100952, "learning_rate": 6.907315463657603e-06, "loss": 0.2767, "step": 4699 }, { "epoch": 0.376, "grad_norm": 1.7702385187149048, "learning_rate": 6.906152858017502e-06, "loss": 0.3622, "step": 4700 }, { "epoch": 0.37608, "grad_norm": 1.6627930402755737, "learning_rate": 6.904990131781486e-06, "loss": 0.4196, "step": 4701 }, { "epoch": 0.37616, "grad_norm": 1.6581454277038574, "learning_rate": 6.90382728502312e-06, "loss": 0.3285, "step": 4702 }, { "epoch": 0.37624, "grad_norm": 1.6215664148330688, "learning_rate": 6.9026643178159714e-06, "loss": 0.3781, "step": 4703 }, { "epoch": 0.37632, "grad_norm": 1.4504485130310059, "learning_rate": 6.901501230233617e-06, "loss": 0.3214, "step": 4704 }, { "epoch": 0.3764, "grad_norm": 1.4753299951553345, "learning_rate": 6.900338022349643e-06, "loss": 0.3267, "step": 4705 }, { "epoch": 0.37648, "grad_norm": 1.9508014917373657, "learning_rate": 6.89917469423764e-06, "loss": 0.4347, "step": 4706 }, { "epoch": 0.37656, "grad_norm": 1.515520453453064, "learning_rate": 6.89801124597121e-06, "loss": 0.3053, "step": 4707 }, { "epoch": 0.37664, "grad_norm": 1.8075672388076782, "learning_rate": 6.896847677623959e-06, "loss": 0.3736, "step": 4708 }, { "epoch": 0.37672, "grad_norm": 1.3629133701324463, "learning_rate": 6.8956839892695015e-06, "loss": 0.2908, "step": 4709 }, { "epoch": 0.3768, "grad_norm": 1.7496662139892578, "learning_rate": 6.894520180981461e-06, "loss": 0.3834, "step": 4710 }, { "epoch": 0.37688, "grad_norm": 2.0693397521972656, "learning_rate": 6.893356252833469e-06, "loss": 0.3402, "step": 4711 }, { "epoch": 0.37696, "grad_norm": 1.662286400794983, "learning_rate": 6.892192204899161e-06, "loss": 0.3915, "step": 4712 }, { "epoch": 0.37704, "grad_norm": 1.633707880973816, "learning_rate": 6.8910280372521834e-06, "loss": 0.2893, "step": 4713 }, { "epoch": 0.37712, "grad_norm": 1.2424566745758057, "learning_rate": 6.8898637499661906e-06, "loss": 0.2871, "step": 4714 }, { "epoch": 0.3772, "grad_norm": 1.8725136518478394, "learning_rate": 6.8886993431148395e-06, "loss": 0.406, "step": 4715 }, { "epoch": 0.37728, "grad_norm": 1.4558591842651367, "learning_rate": 6.887534816771802e-06, "loss": 0.2686, "step": 4716 }, { "epoch": 0.37736, "grad_norm": 1.2151341438293457, "learning_rate": 6.886370171010752e-06, "loss": 0.2585, "step": 4717 }, { "epoch": 0.37744, "grad_norm": 1.3010770082473755, "learning_rate": 6.885205405905373e-06, "loss": 0.2596, "step": 4718 }, { "epoch": 0.37752, "grad_norm": 1.303667664527893, "learning_rate": 6.884040521529356e-06, "loss": 0.2937, "step": 4719 }, { "epoch": 0.3776, "grad_norm": 1.5519078969955444, "learning_rate": 6.8828755179564e-06, "loss": 0.3064, "step": 4720 }, { "epoch": 0.37768, "grad_norm": 2.096662759780884, "learning_rate": 6.88171039526021e-06, "loss": 0.469, "step": 4721 }, { "epoch": 0.37776, "grad_norm": 1.5880454778671265, "learning_rate": 6.880545153514498e-06, "loss": 0.3606, "step": 4722 }, { "epoch": 0.37784, "grad_norm": 1.351523756980896, "learning_rate": 6.879379792792988e-06, "loss": 0.3292, "step": 4723 }, { "epoch": 0.37792, "grad_norm": 1.5579664707183838, "learning_rate": 6.878214313169407e-06, "loss": 0.3451, "step": 4724 }, { "epoch": 0.378, "grad_norm": 1.906211018562317, "learning_rate": 6.87704871471749e-06, "loss": 0.3471, "step": 4725 }, { "epoch": 0.37808, "grad_norm": 1.8630839586257935, "learning_rate": 6.875882997510982e-06, "loss": 0.4373, "step": 4726 }, { "epoch": 0.37816, "grad_norm": 1.5359853506088257, "learning_rate": 6.874717161623633e-06, "loss": 0.323, "step": 4727 }, { "epoch": 0.37824, "grad_norm": 1.6257649660110474, "learning_rate": 6.8735512071292024e-06, "loss": 0.4539, "step": 4728 }, { "epoch": 0.37832, "grad_norm": 1.5281004905700684, "learning_rate": 6.872385134101454e-06, "loss": 0.3276, "step": 4729 }, { "epoch": 0.3784, "grad_norm": 1.4861382246017456, "learning_rate": 6.871218942614165e-06, "loss": 0.398, "step": 4730 }, { "epoch": 0.37848, "grad_norm": 1.907596230506897, "learning_rate": 6.8700526327411155e-06, "loss": 0.3705, "step": 4731 }, { "epoch": 0.37856, "grad_norm": 1.6404950618743896, "learning_rate": 6.868886204556092e-06, "loss": 0.4075, "step": 4732 }, { "epoch": 0.37864, "grad_norm": 1.4109077453613281, "learning_rate": 6.867719658132892e-06, "loss": 0.3178, "step": 4733 }, { "epoch": 0.37872, "grad_norm": 1.863938808441162, "learning_rate": 6.866552993545319e-06, "loss": 0.3065, "step": 4734 }, { "epoch": 0.3788, "grad_norm": 2.2239177227020264, "learning_rate": 6.865386210867182e-06, "loss": 0.4868, "step": 4735 }, { "epoch": 0.37888, "grad_norm": 1.7756961584091187, "learning_rate": 6.864219310172302e-06, "loss": 0.3413, "step": 4736 }, { "epoch": 0.37896, "grad_norm": 1.6238456964492798, "learning_rate": 6.863052291534505e-06, "loss": 0.3501, "step": 4737 }, { "epoch": 0.37904, "grad_norm": 1.5405563116073608, "learning_rate": 6.8618851550276225e-06, "loss": 0.3405, "step": 4738 }, { "epoch": 0.37912, "grad_norm": 1.663956642150879, "learning_rate": 6.860717900725495e-06, "loss": 0.378, "step": 4739 }, { "epoch": 0.3792, "grad_norm": 1.5309809446334839, "learning_rate": 6.859550528701972e-06, "loss": 0.3202, "step": 4740 }, { "epoch": 0.37928, "grad_norm": 1.7055078744888306, "learning_rate": 6.858383039030911e-06, "loss": 0.3591, "step": 4741 }, { "epoch": 0.37936, "grad_norm": 1.452616572380066, "learning_rate": 6.857215431786172e-06, "loss": 0.3181, "step": 4742 }, { "epoch": 0.37944, "grad_norm": 1.454076886177063, "learning_rate": 6.856047707041628e-06, "loss": 0.319, "step": 4743 }, { "epoch": 0.37952, "grad_norm": 1.9659279584884644, "learning_rate": 6.854879864871155e-06, "loss": 0.4883, "step": 4744 }, { "epoch": 0.3796, "grad_norm": 1.0691478252410889, "learning_rate": 6.85371190534864e-06, "loss": 0.2573, "step": 4745 }, { "epoch": 0.37968, "grad_norm": 1.682841420173645, "learning_rate": 6.8525438285479755e-06, "loss": 0.2741, "step": 4746 }, { "epoch": 0.37976, "grad_norm": 1.7029143571853638, "learning_rate": 6.85137563454306e-06, "loss": 0.3921, "step": 4747 }, { "epoch": 0.37984, "grad_norm": 1.5727744102478027, "learning_rate": 6.850207323407803e-06, "loss": 0.3508, "step": 4748 }, { "epoch": 0.37992, "grad_norm": 1.4125081300735474, "learning_rate": 6.8490388952161215e-06, "loss": 0.3447, "step": 4749 }, { "epoch": 0.38, "grad_norm": 1.9592466354370117, "learning_rate": 6.847870350041934e-06, "loss": 0.3249, "step": 4750 }, { "epoch": 0.38008, "grad_norm": 1.4628976583480835, "learning_rate": 6.846701687959173e-06, "loss": 0.2875, "step": 4751 }, { "epoch": 0.38016, "grad_norm": 1.5076208114624023, "learning_rate": 6.845532909041775e-06, "loss": 0.3145, "step": 4752 }, { "epoch": 0.38024, "grad_norm": 1.3270679712295532, "learning_rate": 6.8443640133636845e-06, "loss": 0.3381, "step": 4753 }, { "epoch": 0.38032, "grad_norm": 2.057631731033325, "learning_rate": 6.8431950009988565e-06, "loss": 0.3674, "step": 4754 }, { "epoch": 0.3804, "grad_norm": 1.4664708375930786, "learning_rate": 6.842025872021246e-06, "loss": 0.3211, "step": 4755 }, { "epoch": 0.38048, "grad_norm": 1.819962739944458, "learning_rate": 6.840856626504822e-06, "loss": 0.3592, "step": 4756 }, { "epoch": 0.38056, "grad_norm": 1.3360856771469116, "learning_rate": 6.8396872645235615e-06, "loss": 0.2608, "step": 4757 }, { "epoch": 0.38064, "grad_norm": 1.8079757690429688, "learning_rate": 6.838517786151441e-06, "loss": 0.4004, "step": 4758 }, { "epoch": 0.38072, "grad_norm": 1.4537521600723267, "learning_rate": 6.837348191462452e-06, "loss": 0.288, "step": 4759 }, { "epoch": 0.3808, "grad_norm": 1.3475890159606934, "learning_rate": 6.8361784805305905e-06, "loss": 0.2607, "step": 4760 }, { "epoch": 0.38088, "grad_norm": 1.749821424484253, "learning_rate": 6.835008653429862e-06, "loss": 0.3755, "step": 4761 }, { "epoch": 0.38096, "grad_norm": 1.7868049144744873, "learning_rate": 6.833838710234274e-06, "loss": 0.5124, "step": 4762 }, { "epoch": 0.38104, "grad_norm": 1.7680143117904663, "learning_rate": 6.8326686510178475e-06, "loss": 0.3669, "step": 4763 }, { "epoch": 0.38112, "grad_norm": 1.4955719709396362, "learning_rate": 6.831498475854607e-06, "loss": 0.3469, "step": 4764 }, { "epoch": 0.3812, "grad_norm": 1.3218600749969482, "learning_rate": 6.8303281848185864e-06, "loss": 0.3065, "step": 4765 }, { "epoch": 0.38128, "grad_norm": 1.1174854040145874, "learning_rate": 6.829157777983828e-06, "loss": 0.2463, "step": 4766 }, { "epoch": 0.38136, "grad_norm": 1.5425974130630493, "learning_rate": 6.827987255424375e-06, "loss": 0.3862, "step": 4767 }, { "epoch": 0.38144, "grad_norm": 1.8264150619506836, "learning_rate": 6.826816617214287e-06, "loss": 0.4402, "step": 4768 }, { "epoch": 0.38152, "grad_norm": 1.5018960237503052, "learning_rate": 6.8256458634276216e-06, "loss": 0.3495, "step": 4769 }, { "epoch": 0.3816, "grad_norm": 1.9828064441680908, "learning_rate": 6.824474994138453e-06, "loss": 0.4644, "step": 4770 }, { "epoch": 0.38168, "grad_norm": 1.6333398818969727, "learning_rate": 6.823304009420855e-06, "loss": 0.3376, "step": 4771 }, { "epoch": 0.38176, "grad_norm": 1.5310996770858765, "learning_rate": 6.822132909348913e-06, "loss": 0.3445, "step": 4772 }, { "epoch": 0.38184, "grad_norm": 1.3783681392669678, "learning_rate": 6.820961693996719e-06, "loss": 0.2656, "step": 4773 }, { "epoch": 0.38192, "grad_norm": 1.7104060649871826, "learning_rate": 6.819790363438372e-06, "loss": 0.3171, "step": 4774 }, { "epoch": 0.382, "grad_norm": 1.5256901979446411, "learning_rate": 6.818618917747977e-06, "loss": 0.3218, "step": 4775 }, { "epoch": 0.38208, "grad_norm": 1.4606126546859741, "learning_rate": 6.817447356999649e-06, "loss": 0.2958, "step": 4776 }, { "epoch": 0.38216, "grad_norm": 1.5362343788146973, "learning_rate": 6.816275681267507e-06, "loss": 0.3253, "step": 4777 }, { "epoch": 0.38224, "grad_norm": 1.2135709524154663, "learning_rate": 6.815103890625682e-06, "loss": 0.2384, "step": 4778 }, { "epoch": 0.38232, "grad_norm": 1.9567755460739136, "learning_rate": 6.813931985148306e-06, "loss": 0.4436, "step": 4779 }, { "epoch": 0.3824, "grad_norm": 1.4235215187072754, "learning_rate": 6.812759964909522e-06, "loss": 0.2772, "step": 4780 }, { "epoch": 0.38248, "grad_norm": 1.557592749595642, "learning_rate": 6.811587829983481e-06, "loss": 0.3733, "step": 4781 }, { "epoch": 0.38256, "grad_norm": 1.7264912128448486, "learning_rate": 6.810415580444339e-06, "loss": 0.3979, "step": 4782 }, { "epoch": 0.38264, "grad_norm": 1.3333731889724731, "learning_rate": 6.809243216366261e-06, "loss": 0.3936, "step": 4783 }, { "epoch": 0.38272, "grad_norm": 1.5328093767166138, "learning_rate": 6.80807073782342e-06, "loss": 0.3305, "step": 4784 }, { "epoch": 0.3828, "grad_norm": 1.6207122802734375, "learning_rate": 6.806898144889992e-06, "loss": 0.3319, "step": 4785 }, { "epoch": 0.38288, "grad_norm": 1.7416425943374634, "learning_rate": 6.8057254376401635e-06, "loss": 0.4054, "step": 4786 }, { "epoch": 0.38296, "grad_norm": 1.8004319667816162, "learning_rate": 6.80455261614813e-06, "loss": 0.4405, "step": 4787 }, { "epoch": 0.38304, "grad_norm": 1.8042232990264893, "learning_rate": 6.803379680488089e-06, "loss": 0.3759, "step": 4788 }, { "epoch": 0.38312, "grad_norm": 1.7607706785202026, "learning_rate": 6.802206630734252e-06, "loss": 0.4345, "step": 4789 }, { "epoch": 0.3832, "grad_norm": 1.388512372970581, "learning_rate": 6.80103346696083e-06, "loss": 0.2903, "step": 4790 }, { "epoch": 0.38328, "grad_norm": 1.2507061958312988, "learning_rate": 6.799860189242049e-06, "loss": 0.2915, "step": 4791 }, { "epoch": 0.38336, "grad_norm": 1.4583854675292969, "learning_rate": 6.798686797652134e-06, "loss": 0.3479, "step": 4792 }, { "epoch": 0.38344, "grad_norm": 1.3498902320861816, "learning_rate": 6.797513292265323e-06, "loss": 0.3524, "step": 4793 }, { "epoch": 0.38352, "grad_norm": 1.3097642660140991, "learning_rate": 6.7963396731558615e-06, "loss": 0.2726, "step": 4794 }, { "epoch": 0.3836, "grad_norm": 1.6635525226593018, "learning_rate": 6.795165940398e-06, "loss": 0.3982, "step": 4795 }, { "epoch": 0.38368, "grad_norm": 1.2881489992141724, "learning_rate": 6.7939920940659966e-06, "loss": 0.2503, "step": 4796 }, { "epoch": 0.38376, "grad_norm": 1.7574576139450073, "learning_rate": 6.792818134234115e-06, "loss": 0.4502, "step": 4797 }, { "epoch": 0.38384, "grad_norm": 1.5125421285629272, "learning_rate": 6.791644060976629e-06, "loss": 0.401, "step": 4798 }, { "epoch": 0.38392, "grad_norm": 1.5928010940551758, "learning_rate": 6.7904698743678175e-06, "loss": 0.3318, "step": 4799 }, { "epoch": 0.384, "grad_norm": 1.3006750345230103, "learning_rate": 6.789295574481969e-06, "loss": 0.2972, "step": 4800 }, { "epoch": 0.38408, "grad_norm": 1.9647408723831177, "learning_rate": 6.788121161393376e-06, "loss": 0.4477, "step": 4801 }, { "epoch": 0.38416, "grad_norm": 1.5676745176315308, "learning_rate": 6.7869466351763405e-06, "loss": 0.3331, "step": 4802 }, { "epoch": 0.38424, "grad_norm": 1.4142674207687378, "learning_rate": 6.78577199590517e-06, "loss": 0.2803, "step": 4803 }, { "epoch": 0.38432, "grad_norm": 1.6045714616775513, "learning_rate": 6.784597243654182e-06, "loss": 0.3822, "step": 4804 }, { "epoch": 0.3844, "grad_norm": 1.3804314136505127, "learning_rate": 6.783422378497696e-06, "loss": 0.362, "step": 4805 }, { "epoch": 0.38448, "grad_norm": 1.6979622840881348, "learning_rate": 6.7822474005100435e-06, "loss": 0.413, "step": 4806 }, { "epoch": 0.38456, "grad_norm": 1.5267022848129272, "learning_rate": 6.781072309765563e-06, "loss": 0.3208, "step": 4807 }, { "epoch": 0.38464, "grad_norm": 1.3634998798370361, "learning_rate": 6.779897106338595e-06, "loss": 0.2869, "step": 4808 }, { "epoch": 0.38472, "grad_norm": 1.6127568483352661, "learning_rate": 6.778721790303494e-06, "loss": 0.3086, "step": 4809 }, { "epoch": 0.3848, "grad_norm": 1.515779733657837, "learning_rate": 6.777546361734616e-06, "loss": 0.3614, "step": 4810 }, { "epoch": 0.38488, "grad_norm": 1.6880332231521606, "learning_rate": 6.7763708207063286e-06, "loss": 0.3785, "step": 4811 }, { "epoch": 0.38496, "grad_norm": 1.5529166460037231, "learning_rate": 6.775195167293003e-06, "loss": 0.3199, "step": 4812 }, { "epoch": 0.38504, "grad_norm": 1.7941169738769531, "learning_rate": 6.7740194015690175e-06, "loss": 0.4966, "step": 4813 }, { "epoch": 0.38512, "grad_norm": 1.9801031351089478, "learning_rate": 6.772843523608762e-06, "loss": 0.3773, "step": 4814 }, { "epoch": 0.3852, "grad_norm": 1.4930901527404785, "learning_rate": 6.771667533486628e-06, "loss": 0.3426, "step": 4815 }, { "epoch": 0.38528, "grad_norm": 1.4776076078414917, "learning_rate": 6.770491431277017e-06, "loss": 0.3437, "step": 4816 }, { "epoch": 0.38536, "grad_norm": 1.2407801151275635, "learning_rate": 6.7693152170543365e-06, "loss": 0.2532, "step": 4817 }, { "epoch": 0.38544, "grad_norm": 1.7486261129379272, "learning_rate": 6.768138890893004e-06, "loss": 0.353, "step": 4818 }, { "epoch": 0.38552, "grad_norm": 2.0305092334747314, "learning_rate": 6.766962452867439e-06, "loss": 0.4327, "step": 4819 }, { "epoch": 0.3856, "grad_norm": 1.3625537157058716, "learning_rate": 6.765785903052072e-06, "loss": 0.2888, "step": 4820 }, { "epoch": 0.38568, "grad_norm": 1.9757248163223267, "learning_rate": 6.764609241521339e-06, "loss": 0.4204, "step": 4821 }, { "epoch": 0.38576, "grad_norm": 1.4327483177185059, "learning_rate": 6.763432468349684e-06, "loss": 0.297, "step": 4822 }, { "epoch": 0.38584, "grad_norm": 1.9168870449066162, "learning_rate": 6.762255583611557e-06, "loss": 0.435, "step": 4823 }, { "epoch": 0.38592, "grad_norm": 1.9956098794937134, "learning_rate": 6.761078587381416e-06, "loss": 0.3668, "step": 4824 }, { "epoch": 0.386, "grad_norm": 1.5235965251922607, "learning_rate": 6.759901479733727e-06, "loss": 0.3124, "step": 4825 }, { "epoch": 0.38608, "grad_norm": 1.2701911926269531, "learning_rate": 6.7587242607429585e-06, "loss": 0.2314, "step": 4826 }, { "epoch": 0.38616, "grad_norm": 2.225844144821167, "learning_rate": 6.7575469304835905e-06, "loss": 0.467, "step": 4827 }, { "epoch": 0.38624, "grad_norm": 1.8088279962539673, "learning_rate": 6.756369489030109e-06, "loss": 0.4096, "step": 4828 }, { "epoch": 0.38632, "grad_norm": 1.5828858613967896, "learning_rate": 6.755191936457006e-06, "loss": 0.3111, "step": 4829 }, { "epoch": 0.3864, "grad_norm": 2.1753506660461426, "learning_rate": 6.754014272838782e-06, "loss": 0.3957, "step": 4830 }, { "epoch": 0.38648, "grad_norm": 1.830126166343689, "learning_rate": 6.752836498249946e-06, "loss": 0.3539, "step": 4831 }, { "epoch": 0.38656, "grad_norm": 1.4874694347381592, "learning_rate": 6.751658612765008e-06, "loss": 0.3003, "step": 4832 }, { "epoch": 0.38664, "grad_norm": 1.2147022485733032, "learning_rate": 6.750480616458492e-06, "loss": 0.2779, "step": 4833 }, { "epoch": 0.38672, "grad_norm": 1.595517873764038, "learning_rate": 6.749302509404924e-06, "loss": 0.3439, "step": 4834 }, { "epoch": 0.3868, "grad_norm": 1.3618996143341064, "learning_rate": 6.748124291678839e-06, "loss": 0.3388, "step": 4835 }, { "epoch": 0.38688, "grad_norm": 1.43161940574646, "learning_rate": 6.74694596335478e-06, "loss": 0.2954, "step": 4836 }, { "epoch": 0.38696, "grad_norm": 1.5240548849105835, "learning_rate": 6.745767524507296e-06, "loss": 0.2675, "step": 4837 }, { "epoch": 0.38704, "grad_norm": 1.5682686567306519, "learning_rate": 6.744588975210939e-06, "loss": 0.3627, "step": 4838 }, { "epoch": 0.38712, "grad_norm": 1.8606730699539185, "learning_rate": 6.743410315540277e-06, "loss": 0.3254, "step": 4839 }, { "epoch": 0.3872, "grad_norm": 1.5660977363586426, "learning_rate": 6.742231545569879e-06, "loss": 0.4426, "step": 4840 }, { "epoch": 0.38728, "grad_norm": 1.6271204948425293, "learning_rate": 6.741052665374318e-06, "loss": 0.3316, "step": 4841 }, { "epoch": 0.38736, "grad_norm": 1.4664791822433472, "learning_rate": 6.739873675028182e-06, "loss": 0.3391, "step": 4842 }, { "epoch": 0.38744, "grad_norm": 1.467913269996643, "learning_rate": 6.738694574606059e-06, "loss": 0.2995, "step": 4843 }, { "epoch": 0.38752, "grad_norm": 1.3580753803253174, "learning_rate": 6.73751536418255e-06, "loss": 0.282, "step": 4844 }, { "epoch": 0.3876, "grad_norm": 1.404686689376831, "learning_rate": 6.736336043832255e-06, "loss": 0.3058, "step": 4845 }, { "epoch": 0.38768, "grad_norm": 1.6138893365859985, "learning_rate": 6.73515661362979e-06, "loss": 0.3093, "step": 4846 }, { "epoch": 0.38776, "grad_norm": 1.5155889987945557, "learning_rate": 6.733977073649774e-06, "loss": 0.2914, "step": 4847 }, { "epoch": 0.38784, "grad_norm": 1.9643943309783936, "learning_rate": 6.732797423966828e-06, "loss": 0.3926, "step": 4848 }, { "epoch": 0.38792, "grad_norm": 1.6930899620056152, "learning_rate": 6.731617664655586e-06, "loss": 0.4257, "step": 4849 }, { "epoch": 0.388, "grad_norm": 1.435917854309082, "learning_rate": 6.73043779579069e-06, "loss": 0.3335, "step": 4850 }, { "epoch": 0.38808, "grad_norm": 1.7660568952560425, "learning_rate": 6.7292578174467835e-06, "loss": 0.3879, "step": 4851 }, { "epoch": 0.38816, "grad_norm": 1.6072431802749634, "learning_rate": 6.728077729698521e-06, "loss": 0.3515, "step": 4852 }, { "epoch": 0.38824, "grad_norm": 1.5983455181121826, "learning_rate": 6.726897532620564e-06, "loss": 0.3655, "step": 4853 }, { "epoch": 0.38832, "grad_norm": 1.5852428674697876, "learning_rate": 6.725717226287578e-06, "loss": 0.3414, "step": 4854 }, { "epoch": 0.3884, "grad_norm": 1.3663932085037231, "learning_rate": 6.724536810774237e-06, "loss": 0.2862, "step": 4855 }, { "epoch": 0.38848, "grad_norm": 1.508460521697998, "learning_rate": 6.723356286155223e-06, "loss": 0.3473, "step": 4856 }, { "epoch": 0.38856, "grad_norm": 2.041231870651245, "learning_rate": 6.722175652505222e-06, "loss": 0.3408, "step": 4857 }, { "epoch": 0.38864, "grad_norm": 1.6656372547149658, "learning_rate": 6.72099490989893e-06, "loss": 0.3285, "step": 4858 }, { "epoch": 0.38872, "grad_norm": 1.4684637784957886, "learning_rate": 6.719814058411049e-06, "loss": 0.3015, "step": 4859 }, { "epoch": 0.3888, "grad_norm": 1.4412018060684204, "learning_rate": 6.718633098116288e-06, "loss": 0.3504, "step": 4860 }, { "epoch": 0.38888, "grad_norm": 1.2994134426116943, "learning_rate": 6.7174520290893594e-06, "loss": 0.3174, "step": 4861 }, { "epoch": 0.38896, "grad_norm": 2.318960428237915, "learning_rate": 6.716270851404989e-06, "loss": 0.5163, "step": 4862 }, { "epoch": 0.38904, "grad_norm": 1.9144383668899536, "learning_rate": 6.715089565137904e-06, "loss": 0.3362, "step": 4863 }, { "epoch": 0.38912, "grad_norm": 1.4348660707473755, "learning_rate": 6.713908170362841e-06, "loss": 0.383, "step": 4864 }, { "epoch": 0.3892, "grad_norm": 1.6656911373138428, "learning_rate": 6.712726667154542e-06, "loss": 0.3147, "step": 4865 }, { "epoch": 0.38928, "grad_norm": 1.9064844846725464, "learning_rate": 6.711545055587759e-06, "loss": 0.5062, "step": 4866 }, { "epoch": 0.38936, "grad_norm": 1.5904337167739868, "learning_rate": 6.710363335737246e-06, "loss": 0.3469, "step": 4867 }, { "epoch": 0.38944, "grad_norm": 1.8707817792892456, "learning_rate": 6.709181507677769e-06, "loss": 0.3365, "step": 4868 }, { "epoch": 0.38952, "grad_norm": 1.9314450025558472, "learning_rate": 6.7079995714840965e-06, "loss": 0.3962, "step": 4869 }, { "epoch": 0.3896, "grad_norm": 1.5333346128463745, "learning_rate": 6.706817527231006e-06, "loss": 0.304, "step": 4870 }, { "epoch": 0.38968, "grad_norm": 1.4997177124023438, "learning_rate": 6.70563537499328e-06, "loss": 0.3568, "step": 4871 }, { "epoch": 0.38976, "grad_norm": 2.006237745285034, "learning_rate": 6.704453114845712e-06, "loss": 0.4563, "step": 4872 }, { "epoch": 0.38984, "grad_norm": 2.0449886322021484, "learning_rate": 6.7032707468631e-06, "loss": 0.4209, "step": 4873 }, { "epoch": 0.38992, "grad_norm": 1.588484525680542, "learning_rate": 6.702088271120245e-06, "loss": 0.3102, "step": 4874 }, { "epoch": 0.39, "grad_norm": 1.401896595954895, "learning_rate": 6.700905687691961e-06, "loss": 0.3116, "step": 4875 }, { "epoch": 0.39008, "grad_norm": 1.8815094232559204, "learning_rate": 6.699722996653065e-06, "loss": 0.481, "step": 4876 }, { "epoch": 0.39016, "grad_norm": 1.3775348663330078, "learning_rate": 6.6985401980783826e-06, "loss": 0.2989, "step": 4877 }, { "epoch": 0.39024, "grad_norm": 1.5223547220230103, "learning_rate": 6.697357292042746e-06, "loss": 0.3186, "step": 4878 }, { "epoch": 0.39032, "grad_norm": 1.3824423551559448, "learning_rate": 6.6961742786209925e-06, "loss": 0.2951, "step": 4879 }, { "epoch": 0.3904, "grad_norm": 1.2650786638259888, "learning_rate": 6.694991157887967e-06, "loss": 0.2914, "step": 4880 }, { "epoch": 0.39048, "grad_norm": 1.40303373336792, "learning_rate": 6.693807929918523e-06, "loss": 0.3228, "step": 4881 }, { "epoch": 0.39056, "grad_norm": 1.308904767036438, "learning_rate": 6.692624594787519e-06, "loss": 0.248, "step": 4882 }, { "epoch": 0.39064, "grad_norm": 1.8516181707382202, "learning_rate": 6.69144115256982e-06, "loss": 0.4687, "step": 4883 }, { "epoch": 0.39072, "grad_norm": 1.5992012023925781, "learning_rate": 6.6902576033403e-06, "loss": 0.363, "step": 4884 }, { "epoch": 0.3908, "grad_norm": 2.104926109313965, "learning_rate": 6.689073947173835e-06, "loss": 0.3872, "step": 4885 }, { "epoch": 0.39088, "grad_norm": 1.384028434753418, "learning_rate": 6.6878901841453135e-06, "loss": 0.3222, "step": 4886 }, { "epoch": 0.39096, "grad_norm": 1.4685722589492798, "learning_rate": 6.6867063143296285e-06, "loss": 0.3079, "step": 4887 }, { "epoch": 0.39104, "grad_norm": 1.832414984703064, "learning_rate": 6.685522337801678e-06, "loss": 0.3503, "step": 4888 }, { "epoch": 0.39112, "grad_norm": 1.4985992908477783, "learning_rate": 6.68433825463637e-06, "loss": 0.343, "step": 4889 }, { "epoch": 0.3912, "grad_norm": 1.4093996286392212, "learning_rate": 6.6831540649086165e-06, "loss": 0.3069, "step": 4890 }, { "epoch": 0.39128, "grad_norm": 1.5814567804336548, "learning_rate": 6.681969768693336e-06, "loss": 0.3293, "step": 4891 }, { "epoch": 0.39136, "grad_norm": 1.4069743156433105, "learning_rate": 6.680785366065458e-06, "loss": 0.3107, "step": 4892 }, { "epoch": 0.39144, "grad_norm": 1.08096182346344, "learning_rate": 6.679600857099913e-06, "loss": 0.2361, "step": 4893 }, { "epoch": 0.39152, "grad_norm": 1.1872847080230713, "learning_rate": 6.67841624187164e-06, "loss": 0.3018, "step": 4894 }, { "epoch": 0.3916, "grad_norm": 1.4051975011825562, "learning_rate": 6.6772315204555896e-06, "loss": 0.3002, "step": 4895 }, { "epoch": 0.39168, "grad_norm": 1.3737307786941528, "learning_rate": 6.676046692926712e-06, "loss": 0.2727, "step": 4896 }, { "epoch": 0.39176, "grad_norm": 1.3034765720367432, "learning_rate": 6.674861759359967e-06, "loss": 0.3119, "step": 4897 }, { "epoch": 0.39184, "grad_norm": 1.5516210794448853, "learning_rate": 6.6736767198303245e-06, "loss": 0.2893, "step": 4898 }, { "epoch": 0.39192, "grad_norm": 1.2979193925857544, "learning_rate": 6.672491574412755e-06, "loss": 0.2884, "step": 4899 }, { "epoch": 0.392, "grad_norm": 1.7181899547576904, "learning_rate": 6.671306323182239e-06, "loss": 0.3425, "step": 4900 }, { "epoch": 0.39208, "grad_norm": 1.7885940074920654, "learning_rate": 6.6701209662137665e-06, "loss": 0.3883, "step": 4901 }, { "epoch": 0.39216, "grad_norm": 2.0114247798919678, "learning_rate": 6.668935503582328e-06, "loss": 0.347, "step": 4902 }, { "epoch": 0.39224, "grad_norm": 1.6095882654190063, "learning_rate": 6.667749935362922e-06, "loss": 0.3343, "step": 4903 }, { "epoch": 0.39232, "grad_norm": 1.5267553329467773, "learning_rate": 6.6665642616305615e-06, "loss": 0.3258, "step": 4904 }, { "epoch": 0.3924, "grad_norm": 1.5813835859298706, "learning_rate": 6.665378482460254e-06, "loss": 0.3566, "step": 4905 }, { "epoch": 0.39248, "grad_norm": 1.3659838438034058, "learning_rate": 6.664192597927024e-06, "loss": 0.3428, "step": 4906 }, { "epoch": 0.39256, "grad_norm": 1.4735257625579834, "learning_rate": 6.663006608105897e-06, "loss": 0.3298, "step": 4907 }, { "epoch": 0.39264, "grad_norm": 1.72593092918396, "learning_rate": 6.6618205130719034e-06, "loss": 0.3582, "step": 4908 }, { "epoch": 0.39272, "grad_norm": 1.6992379426956177, "learning_rate": 6.660634312900089e-06, "loss": 0.3377, "step": 4909 }, { "epoch": 0.3928, "grad_norm": 1.6187288761138916, "learning_rate": 6.659448007665496e-06, "loss": 0.2841, "step": 4910 }, { "epoch": 0.39288, "grad_norm": 1.6214263439178467, "learning_rate": 6.658261597443182e-06, "loss": 0.4277, "step": 4911 }, { "epoch": 0.39296, "grad_norm": 1.5356061458587646, "learning_rate": 6.657075082308204e-06, "loss": 0.3734, "step": 4912 }, { "epoch": 0.39304, "grad_norm": 1.5965813398361206, "learning_rate": 6.655888462335631e-06, "loss": 0.3108, "step": 4913 }, { "epoch": 0.39312, "grad_norm": 1.5381505489349365, "learning_rate": 6.654701737600536e-06, "loss": 0.3088, "step": 4914 }, { "epoch": 0.3932, "grad_norm": 1.7315890789031982, "learning_rate": 6.653514908177999e-06, "loss": 0.3554, "step": 4915 }, { "epoch": 0.39328, "grad_norm": 1.5875256061553955, "learning_rate": 6.652327974143106e-06, "loss": 0.355, "step": 4916 }, { "epoch": 0.39336, "grad_norm": 1.443566083908081, "learning_rate": 6.65114093557095e-06, "loss": 0.2864, "step": 4917 }, { "epoch": 0.39344, "grad_norm": 1.1734968423843384, "learning_rate": 6.649953792536632e-06, "loss": 0.2207, "step": 4918 }, { "epoch": 0.39352, "grad_norm": 1.580723524093628, "learning_rate": 6.648766545115258e-06, "loss": 0.3254, "step": 4919 }, { "epoch": 0.3936, "grad_norm": 1.456050157546997, "learning_rate": 6.647579193381942e-06, "loss": 0.3356, "step": 4920 }, { "epoch": 0.39368, "grad_norm": 0.9805746078491211, "learning_rate": 6.646391737411803e-06, "loss": 0.1802, "step": 4921 }, { "epoch": 0.39376, "grad_norm": 1.4817471504211426, "learning_rate": 6.645204177279968e-06, "loss": 0.4042, "step": 4922 }, { "epoch": 0.39384, "grad_norm": 1.3607851266860962, "learning_rate": 6.644016513061569e-06, "loss": 0.3145, "step": 4923 }, { "epoch": 0.39392, "grad_norm": 1.8702746629714966, "learning_rate": 6.642828744831746e-06, "loss": 0.3977, "step": 4924 }, { "epoch": 0.394, "grad_norm": 1.4964871406555176, "learning_rate": 6.641640872665647e-06, "loss": 0.3943, "step": 4925 }, { "epoch": 0.39408, "grad_norm": 1.3008499145507812, "learning_rate": 6.640452896638421e-06, "loss": 0.3038, "step": 4926 }, { "epoch": 0.39416, "grad_norm": 1.3478739261627197, "learning_rate": 6.639264816825231e-06, "loss": 0.3404, "step": 4927 }, { "epoch": 0.39424, "grad_norm": 1.33133864402771, "learning_rate": 6.638076633301239e-06, "loss": 0.3305, "step": 4928 }, { "epoch": 0.39432, "grad_norm": 1.773242712020874, "learning_rate": 6.63688834614162e-06, "loss": 0.3097, "step": 4929 }, { "epoch": 0.3944, "grad_norm": 1.4814815521240234, "learning_rate": 6.635699955421553e-06, "loss": 0.3365, "step": 4930 }, { "epoch": 0.39448, "grad_norm": 1.6367100477218628, "learning_rate": 6.6345114612162235e-06, "loss": 0.319, "step": 4931 }, { "epoch": 0.39456, "grad_norm": 1.5399671792984009, "learning_rate": 6.633322863600822e-06, "loss": 0.2835, "step": 4932 }, { "epoch": 0.39464, "grad_norm": 1.9397109746932983, "learning_rate": 6.632134162650547e-06, "loss": 0.3802, "step": 4933 }, { "epoch": 0.39472, "grad_norm": 1.1391005516052246, "learning_rate": 6.630945358440606e-06, "loss": 0.2647, "step": 4934 }, { "epoch": 0.3948, "grad_norm": 1.9419512748718262, "learning_rate": 6.6297564510462075e-06, "loss": 0.3465, "step": 4935 }, { "epoch": 0.39488, "grad_norm": 1.6960084438323975, "learning_rate": 6.6285674405425746e-06, "loss": 0.3128, "step": 4936 }, { "epoch": 0.39496, "grad_norm": 1.3205006122589111, "learning_rate": 6.627378327004927e-06, "loss": 0.2925, "step": 4937 }, { "epoch": 0.39504, "grad_norm": 1.6510692834854126, "learning_rate": 6.626189110508498e-06, "loss": 0.3364, "step": 4938 }, { "epoch": 0.39512, "grad_norm": 1.390600323677063, "learning_rate": 6.624999791128527e-06, "loss": 0.2794, "step": 4939 }, { "epoch": 0.3952, "grad_norm": 1.7261474132537842, "learning_rate": 6.623810368940254e-06, "loss": 0.3829, "step": 4940 }, { "epoch": 0.39528, "grad_norm": 1.63364839553833, "learning_rate": 6.6226208440189334e-06, "loss": 0.3381, "step": 4941 }, { "epoch": 0.39536, "grad_norm": 2.4538915157318115, "learning_rate": 6.621431216439822e-06, "loss": 0.4066, "step": 4942 }, { "epoch": 0.39544, "grad_norm": 1.6599690914154053, "learning_rate": 6.620241486278181e-06, "loss": 0.3646, "step": 4943 }, { "epoch": 0.39552, "grad_norm": 1.3066409826278687, "learning_rate": 6.619051653609284e-06, "loss": 0.2685, "step": 4944 }, { "epoch": 0.3956, "grad_norm": 1.3815221786499023, "learning_rate": 6.6178617185084045e-06, "loss": 0.2759, "step": 4945 }, { "epoch": 0.39568, "grad_norm": 2.038173198699951, "learning_rate": 6.616671681050829e-06, "loss": 0.3673, "step": 4946 }, { "epoch": 0.39576, "grad_norm": 1.204803466796875, "learning_rate": 6.615481541311846e-06, "loss": 0.2506, "step": 4947 }, { "epoch": 0.39584, "grad_norm": 1.624482274055481, "learning_rate": 6.614291299366751e-06, "loss": 0.3633, "step": 4948 }, { "epoch": 0.39592, "grad_norm": 1.5271668434143066, "learning_rate": 6.613100955290845e-06, "loss": 0.4242, "step": 4949 }, { "epoch": 0.396, "grad_norm": 1.756809949874878, "learning_rate": 6.611910509159443e-06, "loss": 0.5438, "step": 4950 }, { "epoch": 0.39608, "grad_norm": 1.434124231338501, "learning_rate": 6.610719961047853e-06, "loss": 0.2511, "step": 4951 }, { "epoch": 0.39616, "grad_norm": 1.770772933959961, "learning_rate": 6.609529311031402e-06, "loss": 0.5268, "step": 4952 }, { "epoch": 0.39624, "grad_norm": 1.617964267730713, "learning_rate": 6.608338559185417e-06, "loss": 0.3534, "step": 4953 }, { "epoch": 0.39632, "grad_norm": 1.4315916299819946, "learning_rate": 6.607147705585233e-06, "loss": 0.2874, "step": 4954 }, { "epoch": 0.3964, "grad_norm": 1.1115981340408325, "learning_rate": 6.6059567503061905e-06, "loss": 0.2664, "step": 4955 }, { "epoch": 0.39648, "grad_norm": 1.2988982200622559, "learning_rate": 6.604765693423637e-06, "loss": 0.2593, "step": 4956 }, { "epoch": 0.39656, "grad_norm": 1.7615128755569458, "learning_rate": 6.6035745350129285e-06, "loss": 0.4727, "step": 4957 }, { "epoch": 0.39664, "grad_norm": 1.4541040658950806, "learning_rate": 6.602383275149425e-06, "loss": 0.3868, "step": 4958 }, { "epoch": 0.39672, "grad_norm": 1.7334989309310913, "learning_rate": 6.601191913908495e-06, "loss": 0.3599, "step": 4959 }, { "epoch": 0.3968, "grad_norm": 2.142695188522339, "learning_rate": 6.600000451365508e-06, "loss": 0.4301, "step": 4960 }, { "epoch": 0.39688, "grad_norm": 1.5532326698303223, "learning_rate": 6.598808887595847e-06, "loss": 0.3237, "step": 4961 }, { "epoch": 0.39696, "grad_norm": 1.5868544578552246, "learning_rate": 6.5976172226748965e-06, "loss": 0.3802, "step": 4962 }, { "epoch": 0.39704, "grad_norm": 1.2728503942489624, "learning_rate": 6.596425456678051e-06, "loss": 0.3255, "step": 4963 }, { "epoch": 0.39712, "grad_norm": 1.2911931276321411, "learning_rate": 6.595233589680708e-06, "loss": 0.2739, "step": 4964 }, { "epoch": 0.3972, "grad_norm": 1.98240065574646, "learning_rate": 6.594041621758272e-06, "loss": 0.4007, "step": 4965 }, { "epoch": 0.39728, "grad_norm": 1.2459379434585571, "learning_rate": 6.592849552986159e-06, "loss": 0.2915, "step": 4966 }, { "epoch": 0.39736, "grad_norm": 1.5010486841201782, "learning_rate": 6.591657383439782e-06, "loss": 0.3646, "step": 4967 }, { "epoch": 0.39744, "grad_norm": 1.6176382303237915, "learning_rate": 6.590465113194569e-06, "loss": 0.2956, "step": 4968 }, { "epoch": 0.39752, "grad_norm": 1.6453156471252441, "learning_rate": 6.58927274232595e-06, "loss": 0.3022, "step": 4969 }, { "epoch": 0.3976, "grad_norm": 1.58073091506958, "learning_rate": 6.588080270909363e-06, "loss": 0.3601, "step": 4970 }, { "epoch": 0.39768, "grad_norm": 1.4092754125595093, "learning_rate": 6.586887699020252e-06, "loss": 0.3454, "step": 4971 }, { "epoch": 0.39776, "grad_norm": 1.4815930128097534, "learning_rate": 6.585695026734065e-06, "loss": 0.3053, "step": 4972 }, { "epoch": 0.39784, "grad_norm": 1.828636884689331, "learning_rate": 6.584502254126258e-06, "loss": 0.3526, "step": 4973 }, { "epoch": 0.39792, "grad_norm": 1.1700973510742188, "learning_rate": 6.583309381272296e-06, "loss": 0.2726, "step": 4974 }, { "epoch": 0.398, "grad_norm": 1.965246319770813, "learning_rate": 6.582116408247647e-06, "loss": 0.3759, "step": 4975 }, { "epoch": 0.39808, "grad_norm": 1.8582028150558472, "learning_rate": 6.580923335127787e-06, "loss": 0.3591, "step": 4976 }, { "epoch": 0.39816, "grad_norm": 1.556747317314148, "learning_rate": 6.579730161988197e-06, "loss": 0.401, "step": 4977 }, { "epoch": 0.39824, "grad_norm": 1.0947948694229126, "learning_rate": 6.578536888904367e-06, "loss": 0.201, "step": 4978 }, { "epoch": 0.39832, "grad_norm": 1.6996358633041382, "learning_rate": 6.577343515951787e-06, "loss": 0.3871, "step": 4979 }, { "epoch": 0.3984, "grad_norm": 1.6404669284820557, "learning_rate": 6.576150043205962e-06, "loss": 0.3518, "step": 4980 }, { "epoch": 0.39848, "grad_norm": 1.4954818487167358, "learning_rate": 6.574956470742398e-06, "loss": 0.2871, "step": 4981 }, { "epoch": 0.39856, "grad_norm": 1.6011344194412231, "learning_rate": 6.573762798636608e-06, "loss": 0.3557, "step": 4982 }, { "epoch": 0.39864, "grad_norm": 1.716171383857727, "learning_rate": 6.57256902696411e-06, "loss": 0.3298, "step": 4983 }, { "epoch": 0.39872, "grad_norm": 1.5395945310592651, "learning_rate": 6.571375155800434e-06, "loss": 0.4424, "step": 4984 }, { "epoch": 0.3988, "grad_norm": 1.4372494220733643, "learning_rate": 6.570181185221109e-06, "loss": 0.3702, "step": 4985 }, { "epoch": 0.39888, "grad_norm": 1.6280698776245117, "learning_rate": 6.5689871153016726e-06, "loss": 0.3452, "step": 4986 }, { "epoch": 0.39896, "grad_norm": 1.3518006801605225, "learning_rate": 6.567792946117673e-06, "loss": 0.2486, "step": 4987 }, { "epoch": 0.39904, "grad_norm": 1.4701176881790161, "learning_rate": 6.566598677744658e-06, "loss": 0.3986, "step": 4988 }, { "epoch": 0.39912, "grad_norm": 1.6428401470184326, "learning_rate": 6.565404310258189e-06, "loss": 0.3209, "step": 4989 }, { "epoch": 0.3992, "grad_norm": 1.4474351406097412, "learning_rate": 6.5642098437338245e-06, "loss": 0.2907, "step": 4990 }, { "epoch": 0.39928, "grad_norm": 1.8276543617248535, "learning_rate": 6.563015278247138e-06, "loss": 0.4284, "step": 4991 }, { "epoch": 0.39936, "grad_norm": 1.533747673034668, "learning_rate": 6.561820613873704e-06, "loss": 0.3259, "step": 4992 }, { "epoch": 0.39944, "grad_norm": 1.8883445262908936, "learning_rate": 6.560625850689106e-06, "loss": 0.3844, "step": 4993 }, { "epoch": 0.39952, "grad_norm": 1.6395256519317627, "learning_rate": 6.559430988768934e-06, "loss": 0.348, "step": 4994 }, { "epoch": 0.3996, "grad_norm": 1.8606520891189575, "learning_rate": 6.55823602818878e-06, "loss": 0.353, "step": 4995 }, { "epoch": 0.39968, "grad_norm": 1.542927861213684, "learning_rate": 6.557040969024246e-06, "loss": 0.3696, "step": 4996 }, { "epoch": 0.39976, "grad_norm": 1.8894641399383545, "learning_rate": 6.55584581135094e-06, "loss": 0.3741, "step": 4997 }, { "epoch": 0.39984, "grad_norm": 1.6438285112380981, "learning_rate": 6.554650555244476e-06, "loss": 0.3712, "step": 4998 }, { "epoch": 0.39992, "grad_norm": 1.3953157663345337, "learning_rate": 6.5534552007804715e-06, "loss": 0.3146, "step": 4999 }, { "epoch": 0.4, "grad_norm": 2.405379295349121, "learning_rate": 6.5522597480345564e-06, "loss": 0.6259, "step": 5000 }, { "epoch": 0.40008, "grad_norm": 1.4833146333694458, "learning_rate": 6.551064197082361e-06, "loss": 0.3241, "step": 5001 }, { "epoch": 0.40016, "grad_norm": 1.4547537565231323, "learning_rate": 6.549868547999523e-06, "loss": 0.3425, "step": 5002 }, { "epoch": 0.40024, "grad_norm": 1.2077940702438354, "learning_rate": 6.548672800861686e-06, "loss": 0.2656, "step": 5003 }, { "epoch": 0.40032, "grad_norm": 1.5668079853057861, "learning_rate": 6.547476955744505e-06, "loss": 0.3011, "step": 5004 }, { "epoch": 0.4004, "grad_norm": 1.4270055294036865, "learning_rate": 6.546281012723634e-06, "loss": 0.2762, "step": 5005 }, { "epoch": 0.40048, "grad_norm": 1.215239405632019, "learning_rate": 6.545084971874738e-06, "loss": 0.2474, "step": 5006 }, { "epoch": 0.40056, "grad_norm": 1.7257269620895386, "learning_rate": 6.543888833273486e-06, "loss": 0.3579, "step": 5007 }, { "epoch": 0.40064, "grad_norm": 1.6582391262054443, "learning_rate": 6.5426925969955524e-06, "loss": 0.3379, "step": 5008 }, { "epoch": 0.40072, "grad_norm": 1.4435465335845947, "learning_rate": 6.54149626311662e-06, "loss": 0.3572, "step": 5009 }, { "epoch": 0.4008, "grad_norm": 1.3516596555709839, "learning_rate": 6.5402998317123765e-06, "loss": 0.3044, "step": 5010 }, { "epoch": 0.40088, "grad_norm": 1.3053381443023682, "learning_rate": 6.539103302858517e-06, "loss": 0.2901, "step": 5011 }, { "epoch": 0.40096, "grad_norm": 1.8313065767288208, "learning_rate": 6.537906676630741e-06, "loss": 0.369, "step": 5012 }, { "epoch": 0.40104, "grad_norm": 1.388274908065796, "learning_rate": 6.536709953104756e-06, "loss": 0.2789, "step": 5013 }, { "epoch": 0.40112, "grad_norm": 2.075113296508789, "learning_rate": 6.535513132356275e-06, "loss": 0.5173, "step": 5014 }, { "epoch": 0.4012, "grad_norm": 1.500080943107605, "learning_rate": 6.534316214461014e-06, "loss": 0.4011, "step": 5015 }, { "epoch": 0.40128, "grad_norm": 1.7447412014007568, "learning_rate": 6.533119199494702e-06, "loss": 0.3899, "step": 5016 }, { "epoch": 0.40136, "grad_norm": 1.5773173570632935, "learning_rate": 6.531922087533067e-06, "loss": 0.2934, "step": 5017 }, { "epoch": 0.40144, "grad_norm": 1.36077880859375, "learning_rate": 6.530724878651846e-06, "loss": 0.2573, "step": 5018 }, { "epoch": 0.40152, "grad_norm": 1.796042799949646, "learning_rate": 6.5295275729267874e-06, "loss": 0.3084, "step": 5019 }, { "epoch": 0.4016, "grad_norm": 1.5335108041763306, "learning_rate": 6.528330170433634e-06, "loss": 0.3073, "step": 5020 }, { "epoch": 0.40168, "grad_norm": 1.5497459173202515, "learning_rate": 6.527132671248145e-06, "loss": 0.3406, "step": 5021 }, { "epoch": 0.40176, "grad_norm": 1.802089810371399, "learning_rate": 6.525935075446081e-06, "loss": 0.5411, "step": 5022 }, { "epoch": 0.40184, "grad_norm": 1.59895658493042, "learning_rate": 6.524737383103212e-06, "loss": 0.3232, "step": 5023 }, { "epoch": 0.40192, "grad_norm": 1.7338372468948364, "learning_rate": 6.523539594295309e-06, "loss": 0.3667, "step": 5024 }, { "epoch": 0.402, "grad_norm": 1.4909459352493286, "learning_rate": 6.5223417090981565e-06, "loss": 0.3944, "step": 5025 }, { "epoch": 0.40208, "grad_norm": 1.567449927330017, "learning_rate": 6.521143727587536e-06, "loss": 0.3354, "step": 5026 }, { "epoch": 0.40216, "grad_norm": 1.2927497625350952, "learning_rate": 6.519945649839241e-06, "loss": 0.2782, "step": 5027 }, { "epoch": 0.40224, "grad_norm": 1.6907589435577393, "learning_rate": 6.518747475929073e-06, "loss": 0.4177, "step": 5028 }, { "epoch": 0.40232, "grad_norm": 1.5473442077636719, "learning_rate": 6.517549205932832e-06, "loss": 0.4238, "step": 5029 }, { "epoch": 0.4024, "grad_norm": 1.1968157291412354, "learning_rate": 6.51635083992633e-06, "loss": 0.2689, "step": 5030 }, { "epoch": 0.40248, "grad_norm": 1.7409216165542603, "learning_rate": 6.515152377985385e-06, "loss": 0.5057, "step": 5031 }, { "epoch": 0.40256, "grad_norm": 1.6909297704696655, "learning_rate": 6.513953820185819e-06, "loss": 0.3212, "step": 5032 }, { "epoch": 0.40264, "grad_norm": 1.6705437898635864, "learning_rate": 6.512755166603459e-06, "loss": 0.4307, "step": 5033 }, { "epoch": 0.40272, "grad_norm": 1.2356233596801758, "learning_rate": 6.511556417314142e-06, "loss": 0.2553, "step": 5034 }, { "epoch": 0.4028, "grad_norm": 1.6176491975784302, "learning_rate": 6.510357572393709e-06, "loss": 0.3523, "step": 5035 }, { "epoch": 0.40288, "grad_norm": 1.7396360635757446, "learning_rate": 6.509158631918006e-06, "loss": 0.4734, "step": 5036 }, { "epoch": 0.40296, "grad_norm": 1.2196110486984253, "learning_rate": 6.507959595962885e-06, "loss": 0.3238, "step": 5037 }, { "epoch": 0.40304, "grad_norm": 1.266950249671936, "learning_rate": 6.506760464604206e-06, "loss": 0.2835, "step": 5038 }, { "epoch": 0.40312, "grad_norm": 1.3458009958267212, "learning_rate": 6.5055612379178355e-06, "loss": 0.278, "step": 5039 }, { "epoch": 0.4032, "grad_norm": 1.686802864074707, "learning_rate": 6.504361915979643e-06, "loss": 0.391, "step": 5040 }, { "epoch": 0.40328, "grad_norm": 1.483590841293335, "learning_rate": 6.503162498865504e-06, "loss": 0.3368, "step": 5041 }, { "epoch": 0.40336, "grad_norm": 1.6863470077514648, "learning_rate": 6.501962986651305e-06, "loss": 0.3795, "step": 5042 }, { "epoch": 0.40344, "grad_norm": 1.6084065437316895, "learning_rate": 6.500763379412932e-06, "loss": 0.4195, "step": 5043 }, { "epoch": 0.40352, "grad_norm": 1.6228187084197998, "learning_rate": 6.499563677226281e-06, "loss": 0.4213, "step": 5044 }, { "epoch": 0.4036, "grad_norm": 1.7429604530334473, "learning_rate": 6.498363880167256e-06, "loss": 0.3857, "step": 5045 }, { "epoch": 0.40368, "grad_norm": 0.9011700749397278, "learning_rate": 6.49716398831176e-06, "loss": 0.2047, "step": 5046 }, { "epoch": 0.40376, "grad_norm": 1.3937594890594482, "learning_rate": 6.4959640017357086e-06, "loss": 0.331, "step": 5047 }, { "epoch": 0.40384, "grad_norm": 1.7457072734832764, "learning_rate": 6.494763920515021e-06, "loss": 0.3449, "step": 5048 }, { "epoch": 0.40392, "grad_norm": 1.6541334390640259, "learning_rate": 6.493563744725621e-06, "loss": 0.3061, "step": 5049 }, { "epoch": 0.404, "grad_norm": 1.375489592552185, "learning_rate": 6.49236347444344e-06, "loss": 0.3041, "step": 5050 }, { "epoch": 0.40408, "grad_norm": 1.7582788467407227, "learning_rate": 6.491163109744416e-06, "loss": 0.4728, "step": 5051 }, { "epoch": 0.40416, "grad_norm": 1.3825554847717285, "learning_rate": 6.489962650704491e-06, "loss": 0.3534, "step": 5052 }, { "epoch": 0.40424, "grad_norm": 1.3173959255218506, "learning_rate": 6.4887620973996145e-06, "loss": 0.2669, "step": 5053 }, { "epoch": 0.40432, "grad_norm": 1.8339534997940063, "learning_rate": 6.487561449905744e-06, "loss": 0.3635, "step": 5054 }, { "epoch": 0.4044, "grad_norm": 1.564780831336975, "learning_rate": 6.4863607082988345e-06, "loss": 0.3529, "step": 5055 }, { "epoch": 0.40448, "grad_norm": 1.4042036533355713, "learning_rate": 6.485159872654858e-06, "loss": 0.2709, "step": 5056 }, { "epoch": 0.40456, "grad_norm": 1.776026725769043, "learning_rate": 6.483958943049785e-06, "loss": 0.4495, "step": 5057 }, { "epoch": 0.40464, "grad_norm": 1.5129555463790894, "learning_rate": 6.482757919559594e-06, "loss": 0.3174, "step": 5058 }, { "epoch": 0.40472, "grad_norm": 1.493586540222168, "learning_rate": 6.481556802260273e-06, "loss": 0.3843, "step": 5059 }, { "epoch": 0.4048, "grad_norm": 1.5436426401138306, "learning_rate": 6.4803555912278106e-06, "loss": 0.3938, "step": 5060 }, { "epoch": 0.40488, "grad_norm": 1.4713767766952515, "learning_rate": 6.4791542865382015e-06, "loss": 0.3512, "step": 5061 }, { "epoch": 0.40496, "grad_norm": 1.3419086933135986, "learning_rate": 6.477952888267451e-06, "loss": 0.3038, "step": 5062 }, { "epoch": 0.40504, "grad_norm": 1.6907343864440918, "learning_rate": 6.476751396491566e-06, "loss": 0.3195, "step": 5063 }, { "epoch": 0.40512, "grad_norm": 1.6713168621063232, "learning_rate": 6.475549811286561e-06, "loss": 0.3558, "step": 5064 }, { "epoch": 0.4052, "grad_norm": 1.596295714378357, "learning_rate": 6.474348132728457e-06, "loss": 0.3346, "step": 5065 }, { "epoch": 0.40528, "grad_norm": 1.508131980895996, "learning_rate": 6.473146360893281e-06, "loss": 0.341, "step": 5066 }, { "epoch": 0.40536, "grad_norm": 1.456239104270935, "learning_rate": 6.4719444958570635e-06, "loss": 0.3222, "step": 5067 }, { "epoch": 0.40544, "grad_norm": 1.7773851156234741, "learning_rate": 6.470742537695842e-06, "loss": 0.4088, "step": 5068 }, { "epoch": 0.40552, "grad_norm": 1.5959703922271729, "learning_rate": 6.469540486485662e-06, "loss": 0.3296, "step": 5069 }, { "epoch": 0.4056, "grad_norm": 1.6706092357635498, "learning_rate": 6.468338342302571e-06, "loss": 0.3701, "step": 5070 }, { "epoch": 0.40568, "grad_norm": 1.721103549003601, "learning_rate": 6.467136105222627e-06, "loss": 0.3396, "step": 5071 }, { "epoch": 0.40576, "grad_norm": 1.8070697784423828, "learning_rate": 6.465933775321891e-06, "loss": 0.3382, "step": 5072 }, { "epoch": 0.40584, "grad_norm": 1.7369276285171509, "learning_rate": 6.46473135267643e-06, "loss": 0.3338, "step": 5073 }, { "epoch": 0.40592, "grad_norm": 1.6247727870941162, "learning_rate": 6.463528837362319e-06, "loss": 0.3429, "step": 5074 }, { "epoch": 0.406, "grad_norm": 1.6333701610565186, "learning_rate": 6.4623262294556335e-06, "loss": 0.4485, "step": 5075 }, { "epoch": 0.40608, "grad_norm": 1.7924059629440308, "learning_rate": 6.46112352903246e-06, "loss": 0.2964, "step": 5076 }, { "epoch": 0.40616, "grad_norm": 1.6652660369873047, "learning_rate": 6.4599207361688895e-06, "loss": 0.389, "step": 5077 }, { "epoch": 0.40624, "grad_norm": 1.426188349723816, "learning_rate": 6.458717850941021e-06, "loss": 0.286, "step": 5078 }, { "epoch": 0.40632, "grad_norm": 1.797730565071106, "learning_rate": 6.4575148734249535e-06, "loss": 0.4004, "step": 5079 }, { "epoch": 0.4064, "grad_norm": 1.5713727474212646, "learning_rate": 6.456311803696797e-06, "loss": 0.3227, "step": 5080 }, { "epoch": 0.40648, "grad_norm": 1.5011001825332642, "learning_rate": 6.455108641832666e-06, "loss": 0.3584, "step": 5081 }, { "epoch": 0.40656, "grad_norm": 1.5323615074157715, "learning_rate": 6.453905387908679e-06, "loss": 0.3302, "step": 5082 }, { "epoch": 0.40664, "grad_norm": 1.2687554359436035, "learning_rate": 6.452702042000964e-06, "loss": 0.2704, "step": 5083 }, { "epoch": 0.40672, "grad_norm": 1.6853008270263672, "learning_rate": 6.4514986041856506e-06, "loss": 0.3229, "step": 5084 }, { "epoch": 0.4068, "grad_norm": 1.6804182529449463, "learning_rate": 6.450295074538879e-06, "loss": 0.3343, "step": 5085 }, { "epoch": 0.40688, "grad_norm": 1.2748759984970093, "learning_rate": 6.449091453136789e-06, "loss": 0.2909, "step": 5086 }, { "epoch": 0.40696, "grad_norm": 1.752908706665039, "learning_rate": 6.447887740055532e-06, "loss": 0.3041, "step": 5087 }, { "epoch": 0.40704, "grad_norm": 1.6320358514785767, "learning_rate": 6.446683935371262e-06, "loss": 0.3439, "step": 5088 }, { "epoch": 0.40712, "grad_norm": 1.5598769187927246, "learning_rate": 6.445480039160141e-06, "loss": 0.3354, "step": 5089 }, { "epoch": 0.4072, "grad_norm": 1.6841180324554443, "learning_rate": 6.444276051498334e-06, "loss": 0.3556, "step": 5090 }, { "epoch": 0.40728, "grad_norm": 1.4227954149246216, "learning_rate": 6.443071972462013e-06, "loss": 0.331, "step": 5091 }, { "epoch": 0.40736, "grad_norm": 1.5023025274276733, "learning_rate": 6.441867802127357e-06, "loss": 0.3008, "step": 5092 }, { "epoch": 0.40744, "grad_norm": 1.3716212511062622, "learning_rate": 6.44066354057055e-06, "loss": 0.313, "step": 5093 }, { "epoch": 0.40752, "grad_norm": 1.4322949647903442, "learning_rate": 6.43945918786778e-06, "loss": 0.3185, "step": 5094 }, { "epoch": 0.4076, "grad_norm": 1.3301920890808105, "learning_rate": 6.438254744095247e-06, "loss": 0.2555, "step": 5095 }, { "epoch": 0.40768, "grad_norm": 1.4562873840332031, "learning_rate": 6.437050209329147e-06, "loss": 0.2832, "step": 5096 }, { "epoch": 0.40776, "grad_norm": 2.004117965698242, "learning_rate": 6.435845583645688e-06, "loss": 0.3892, "step": 5097 }, { "epoch": 0.40784, "grad_norm": 1.5688518285751343, "learning_rate": 6.434640867121084e-06, "loss": 0.372, "step": 5098 }, { "epoch": 0.40792, "grad_norm": 1.229386806488037, "learning_rate": 6.433436059831552e-06, "loss": 0.2306, "step": 5099 }, { "epoch": 0.408, "grad_norm": 1.6038435697555542, "learning_rate": 6.432231161853317e-06, "loss": 0.3113, "step": 5100 }, { "epoch": 0.40808, "grad_norm": 1.4195326566696167, "learning_rate": 6.431026173262609e-06, "loss": 0.3563, "step": 5101 }, { "epoch": 0.40816, "grad_norm": 1.3468478918075562, "learning_rate": 6.429821094135663e-06, "loss": 0.3255, "step": 5102 }, { "epoch": 0.40824, "grad_norm": 1.5173962116241455, "learning_rate": 6.42861592454872e-06, "loss": 0.3577, "step": 5103 }, { "epoch": 0.40832, "grad_norm": 1.4830057621002197, "learning_rate": 6.427410664578029e-06, "loss": 0.3339, "step": 5104 }, { "epoch": 0.4084, "grad_norm": 1.4850959777832031, "learning_rate": 6.42620531429984e-06, "loss": 0.3405, "step": 5105 }, { "epoch": 0.40848, "grad_norm": 1.410702109336853, "learning_rate": 6.424999873790414e-06, "loss": 0.3099, "step": 5106 }, { "epoch": 0.40856, "grad_norm": 1.793792486190796, "learning_rate": 6.423794343126013e-06, "loss": 0.3515, "step": 5107 }, { "epoch": 0.40864, "grad_norm": 1.6600618362426758, "learning_rate": 6.422588722382909e-06, "loss": 0.2998, "step": 5108 }, { "epoch": 0.40872, "grad_norm": 1.887887954711914, "learning_rate": 6.4213830116373766e-06, "loss": 0.3906, "step": 5109 }, { "epoch": 0.4088, "grad_norm": 1.5626739263534546, "learning_rate": 6.4201772109656956e-06, "loss": 0.3316, "step": 5110 }, { "epoch": 0.40888, "grad_norm": 1.23286771774292, "learning_rate": 6.418971320444155e-06, "loss": 0.3091, "step": 5111 }, { "epoch": 0.40896, "grad_norm": 1.5620896816253662, "learning_rate": 6.417765340149047e-06, "loss": 0.3231, "step": 5112 }, { "epoch": 0.40904, "grad_norm": 1.6344822645187378, "learning_rate": 6.41655927015667e-06, "loss": 0.4167, "step": 5113 }, { "epoch": 0.40912, "grad_norm": 2.126511335372925, "learning_rate": 6.4153531105433276e-06, "loss": 0.4323, "step": 5114 }, { "epoch": 0.4092, "grad_norm": 1.6435238122940063, "learning_rate": 6.4141468613853295e-06, "loss": 0.3263, "step": 5115 }, { "epoch": 0.40928, "grad_norm": 1.6907066106796265, "learning_rate": 6.412940522758992e-06, "loss": 0.3561, "step": 5116 }, { "epoch": 0.40936, "grad_norm": 1.7472286224365234, "learning_rate": 6.411734094740634e-06, "loss": 0.375, "step": 5117 }, { "epoch": 0.40944, "grad_norm": 1.2598026990890503, "learning_rate": 6.4105275774065846e-06, "loss": 0.2562, "step": 5118 }, { "epoch": 0.40952, "grad_norm": 1.2201982736587524, "learning_rate": 6.409320970833176e-06, "loss": 0.271, "step": 5119 }, { "epoch": 0.4096, "grad_norm": 2.0412731170654297, "learning_rate": 6.408114275096743e-06, "loss": 0.3964, "step": 5120 }, { "epoch": 0.40968, "grad_norm": 1.8842438459396362, "learning_rate": 6.406907490273633e-06, "loss": 0.427, "step": 5121 }, { "epoch": 0.40976, "grad_norm": 1.1666141748428345, "learning_rate": 6.405700616440191e-06, "loss": 0.2608, "step": 5122 }, { "epoch": 0.40984, "grad_norm": 1.613979697227478, "learning_rate": 6.404493653672776e-06, "loss": 0.34, "step": 5123 }, { "epoch": 0.40992, "grad_norm": 1.5906524658203125, "learning_rate": 6.403286602047748e-06, "loss": 0.4057, "step": 5124 }, { "epoch": 0.41, "grad_norm": 1.3950166702270508, "learning_rate": 6.4020794616414725e-06, "loss": 0.3133, "step": 5125 }, { "epoch": 0.41008, "grad_norm": 1.232621431350708, "learning_rate": 6.40087223253032e-06, "loss": 0.2786, "step": 5126 }, { "epoch": 0.41016, "grad_norm": 1.6018669605255127, "learning_rate": 6.3996649147906675e-06, "loss": 0.2908, "step": 5127 }, { "epoch": 0.41024, "grad_norm": 1.5088248252868652, "learning_rate": 6.3984575084989e-06, "loss": 0.2802, "step": 5128 }, { "epoch": 0.41032, "grad_norm": 1.2472470998764038, "learning_rate": 6.3972500137314066e-06, "loss": 0.26, "step": 5129 }, { "epoch": 0.4104, "grad_norm": 1.7943204641342163, "learning_rate": 6.396042430564577e-06, "loss": 0.3852, "step": 5130 }, { "epoch": 0.41048, "grad_norm": 1.9341444969177246, "learning_rate": 6.394834759074817e-06, "loss": 0.392, "step": 5131 }, { "epoch": 0.41056, "grad_norm": 1.7144229412078857, "learning_rate": 6.393626999338527e-06, "loss": 0.3879, "step": 5132 }, { "epoch": 0.41064, "grad_norm": 1.494651198387146, "learning_rate": 6.392419151432121e-06, "loss": 0.3137, "step": 5133 }, { "epoch": 0.41072, "grad_norm": 1.3737579584121704, "learning_rate": 6.3912112154320135e-06, "loss": 0.3458, "step": 5134 }, { "epoch": 0.4108, "grad_norm": 1.907779574394226, "learning_rate": 6.390003191414627e-06, "loss": 0.368, "step": 5135 }, { "epoch": 0.41088, "grad_norm": 1.4840914011001587, "learning_rate": 6.388795079456392e-06, "loss": 0.3191, "step": 5136 }, { "epoch": 0.41096, "grad_norm": 1.8090542554855347, "learning_rate": 6.387586879633736e-06, "loss": 0.366, "step": 5137 }, { "epoch": 0.41104, "grad_norm": 1.704310417175293, "learning_rate": 6.386378592023103e-06, "loss": 0.3767, "step": 5138 }, { "epoch": 0.41112, "grad_norm": 1.956494927406311, "learning_rate": 6.385170216700934e-06, "loss": 0.3635, "step": 5139 }, { "epoch": 0.4112, "grad_norm": 1.283545732498169, "learning_rate": 6.383961753743681e-06, "loss": 0.2738, "step": 5140 }, { "epoch": 0.41128, "grad_norm": 1.40552818775177, "learning_rate": 6.382753203227799e-06, "loss": 0.3511, "step": 5141 }, { "epoch": 0.41136, "grad_norm": 1.738274335861206, "learning_rate": 6.381544565229749e-06, "loss": 0.4043, "step": 5142 }, { "epoch": 0.41144, "grad_norm": 1.6897915601730347, "learning_rate": 6.380335839825996e-06, "loss": 0.3846, "step": 5143 }, { "epoch": 0.41152, "grad_norm": 1.5609641075134277, "learning_rate": 6.379127027093013e-06, "loss": 0.3149, "step": 5144 }, { "epoch": 0.4116, "grad_norm": 1.7952966690063477, "learning_rate": 6.377918127107277e-06, "loss": 0.3799, "step": 5145 }, { "epoch": 0.41168, "grad_norm": 1.743720293045044, "learning_rate": 6.376709139945273e-06, "loss": 0.3468, "step": 5146 }, { "epoch": 0.41176, "grad_norm": 1.60451078414917, "learning_rate": 6.375500065683486e-06, "loss": 0.3882, "step": 5147 }, { "epoch": 0.41184, "grad_norm": 1.7727147340774536, "learning_rate": 6.374290904398416e-06, "loss": 0.367, "step": 5148 }, { "epoch": 0.41192, "grad_norm": 1.4645249843597412, "learning_rate": 6.373081656166557e-06, "loss": 0.3483, "step": 5149 }, { "epoch": 0.412, "grad_norm": 1.3234875202178955, "learning_rate": 6.371872321064414e-06, "loss": 0.2924, "step": 5150 }, { "epoch": 0.41208, "grad_norm": 1.859861969947815, "learning_rate": 6.370662899168501e-06, "loss": 0.3757, "step": 5151 }, { "epoch": 0.41216, "grad_norm": 1.227710485458374, "learning_rate": 6.3694533905553346e-06, "loss": 0.3016, "step": 5152 }, { "epoch": 0.41224, "grad_norm": 1.643816351890564, "learning_rate": 6.368243795301432e-06, "loss": 0.3616, "step": 5153 }, { "epoch": 0.41232, "grad_norm": 1.3771861791610718, "learning_rate": 6.3670341134833235e-06, "loss": 0.3275, "step": 5154 }, { "epoch": 0.4124, "grad_norm": 1.796872854232788, "learning_rate": 6.3658243451775404e-06, "loss": 0.3235, "step": 5155 }, { "epoch": 0.41248, "grad_norm": 1.5238722562789917, "learning_rate": 6.3646144904606214e-06, "loss": 0.3444, "step": 5156 }, { "epoch": 0.41256, "grad_norm": 1.4715383052825928, "learning_rate": 6.363404549409109e-06, "loss": 0.3818, "step": 5157 }, { "epoch": 0.41264, "grad_norm": 1.8105343580245972, "learning_rate": 6.362194522099553e-06, "loss": 0.4152, "step": 5158 }, { "epoch": 0.41272, "grad_norm": 1.5102286338806152, "learning_rate": 6.360984408608507e-06, "loss": 0.3434, "step": 5159 }, { "epoch": 0.4128, "grad_norm": 1.4143092632293701, "learning_rate": 6.359774209012532e-06, "loss": 0.3415, "step": 5160 }, { "epoch": 0.41288, "grad_norm": 1.791418433189392, "learning_rate": 6.358563923388194e-06, "loss": 0.3569, "step": 5161 }, { "epoch": 0.41296, "grad_norm": 1.825252890586853, "learning_rate": 6.35735355181206e-06, "loss": 0.3656, "step": 5162 }, { "epoch": 0.41304, "grad_norm": 1.6053016185760498, "learning_rate": 6.3561430943607105e-06, "loss": 0.3853, "step": 5163 }, { "epoch": 0.41312, "grad_norm": 1.2607011795043945, "learning_rate": 6.354932551110724e-06, "loss": 0.259, "step": 5164 }, { "epoch": 0.4132, "grad_norm": 1.842266321182251, "learning_rate": 6.3537219221386885e-06, "loss": 0.4299, "step": 5165 }, { "epoch": 0.41328, "grad_norm": 1.4322572946548462, "learning_rate": 6.352511207521197e-06, "loss": 0.3413, "step": 5166 }, { "epoch": 0.41336, "grad_norm": 1.5515720844268799, "learning_rate": 6.3513004073348465e-06, "loss": 0.2727, "step": 5167 }, { "epoch": 0.41344, "grad_norm": 1.2557002305984497, "learning_rate": 6.35008952165624e-06, "loss": 0.3055, "step": 5168 }, { "epoch": 0.41352, "grad_norm": 1.6650270223617554, "learning_rate": 6.3488785505619875e-06, "loss": 0.3785, "step": 5169 }, { "epoch": 0.4136, "grad_norm": 1.5979880094528198, "learning_rate": 6.347667494128702e-06, "loss": 0.3962, "step": 5170 }, { "epoch": 0.41368, "grad_norm": 2.0301644802093506, "learning_rate": 6.3464563524330034e-06, "loss": 0.3584, "step": 5171 }, { "epoch": 0.41376, "grad_norm": 1.6554670333862305, "learning_rate": 6.345245125551518e-06, "loss": 0.3181, "step": 5172 }, { "epoch": 0.41384, "grad_norm": 1.6080145835876465, "learning_rate": 6.344033813560875e-06, "loss": 0.5606, "step": 5173 }, { "epoch": 0.41392, "grad_norm": 1.3876444101333618, "learning_rate": 6.342822416537708e-06, "loss": 0.4102, "step": 5174 }, { "epoch": 0.414, "grad_norm": 1.4370923042297363, "learning_rate": 6.341610934558662e-06, "loss": 0.3051, "step": 5175 }, { "epoch": 0.41408, "grad_norm": 1.7648351192474365, "learning_rate": 6.340399367700379e-06, "loss": 0.4381, "step": 5176 }, { "epoch": 0.41416, "grad_norm": 1.2672674655914307, "learning_rate": 6.339187716039514e-06, "loss": 0.2934, "step": 5177 }, { "epoch": 0.41424, "grad_norm": 1.697166085243225, "learning_rate": 6.337975979652723e-06, "loss": 0.3332, "step": 5178 }, { "epoch": 0.41432, "grad_norm": 1.230234980583191, "learning_rate": 6.336764158616669e-06, "loss": 0.302, "step": 5179 }, { "epoch": 0.4144, "grad_norm": 1.0796769857406616, "learning_rate": 6.335552253008018e-06, "loss": 0.2222, "step": 5180 }, { "epoch": 0.41448, "grad_norm": 1.8272526264190674, "learning_rate": 6.3343402629034456e-06, "loss": 0.4759, "step": 5181 }, { "epoch": 0.41456, "grad_norm": 2.0101819038391113, "learning_rate": 6.333128188379629e-06, "loss": 0.4156, "step": 5182 }, { "epoch": 0.41464, "grad_norm": 1.8950108289718628, "learning_rate": 6.3319160295132544e-06, "loss": 0.3522, "step": 5183 }, { "epoch": 0.41472, "grad_norm": 1.547398328781128, "learning_rate": 6.3307037863810075e-06, "loss": 0.274, "step": 5184 }, { "epoch": 0.4148, "grad_norm": 1.70745849609375, "learning_rate": 6.329491459059584e-06, "loss": 0.3314, "step": 5185 }, { "epoch": 0.41488, "grad_norm": 1.835158348083496, "learning_rate": 6.328279047625687e-06, "loss": 0.4241, "step": 5186 }, { "epoch": 0.41496, "grad_norm": 1.7435204982757568, "learning_rate": 6.327066552156018e-06, "loss": 0.4203, "step": 5187 }, { "epoch": 0.41504, "grad_norm": 1.393307089805603, "learning_rate": 6.325853972727288e-06, "loss": 0.2993, "step": 5188 }, { "epoch": 0.41512, "grad_norm": 1.5650931596755981, "learning_rate": 6.324641309416215e-06, "loss": 0.3302, "step": 5189 }, { "epoch": 0.4152, "grad_norm": 1.2271121740341187, "learning_rate": 6.323428562299516e-06, "loss": 0.2451, "step": 5190 }, { "epoch": 0.41528, "grad_norm": 1.5217052698135376, "learning_rate": 6.322215731453922e-06, "loss": 0.3315, "step": 5191 }, { "epoch": 0.41536, "grad_norm": 1.3272700309753418, "learning_rate": 6.321002816956162e-06, "loss": 0.273, "step": 5192 }, { "epoch": 0.41544, "grad_norm": 1.313028335571289, "learning_rate": 6.3197898188829736e-06, "loss": 0.2515, "step": 5193 }, { "epoch": 0.41552, "grad_norm": 1.6728700399398804, "learning_rate": 6.3185767373111e-06, "loss": 0.4031, "step": 5194 }, { "epoch": 0.4156, "grad_norm": 1.6574065685272217, "learning_rate": 6.317363572317289e-06, "loss": 0.3371, "step": 5195 }, { "epoch": 0.41568, "grad_norm": 1.8870322704315186, "learning_rate": 6.316150323978291e-06, "loss": 0.3752, "step": 5196 }, { "epoch": 0.41576, "grad_norm": 1.766973853111267, "learning_rate": 6.3149369923708656e-06, "loss": 0.3786, "step": 5197 }, { "epoch": 0.41584, "grad_norm": 1.1528550386428833, "learning_rate": 6.313723577571779e-06, "loss": 0.2809, "step": 5198 }, { "epoch": 0.41592, "grad_norm": 1.3488599061965942, "learning_rate": 6.3125100796577956e-06, "loss": 0.3158, "step": 5199 }, { "epoch": 0.416, "grad_norm": 1.634043574333191, "learning_rate": 6.311296498705691e-06, "loss": 0.3833, "step": 5200 }, { "epoch": 0.41608, "grad_norm": 1.544848084449768, "learning_rate": 6.310082834792246e-06, "loss": 0.3798, "step": 5201 }, { "epoch": 0.41616, "grad_norm": 1.647979736328125, "learning_rate": 6.308869087994243e-06, "loss": 0.3992, "step": 5202 }, { "epoch": 0.41624, "grad_norm": 1.9953075647354126, "learning_rate": 6.307655258388471e-06, "loss": 0.4337, "step": 5203 }, { "epoch": 0.41632, "grad_norm": 1.6946043968200684, "learning_rate": 6.306441346051727e-06, "loss": 0.4, "step": 5204 }, { "epoch": 0.4164, "grad_norm": 1.4132634401321411, "learning_rate": 6.305227351060809e-06, "loss": 0.2904, "step": 5205 }, { "epoch": 0.41648, "grad_norm": 1.3155604600906372, "learning_rate": 6.304013273492526e-06, "loss": 0.2809, "step": 5206 }, { "epoch": 0.41656, "grad_norm": 1.562078595161438, "learning_rate": 6.302799113423686e-06, "loss": 0.323, "step": 5207 }, { "epoch": 0.41664, "grad_norm": 1.5895940065383911, "learning_rate": 6.3015848709311055e-06, "loss": 0.332, "step": 5208 }, { "epoch": 0.41672, "grad_norm": 1.8904573917388916, "learning_rate": 6.300370546091605e-06, "loss": 0.4672, "step": 5209 }, { "epoch": 0.4168, "grad_norm": 1.3617215156555176, "learning_rate": 6.299156138982011e-06, "loss": 0.2794, "step": 5210 }, { "epoch": 0.41688, "grad_norm": 1.2635133266448975, "learning_rate": 6.2979416496791545e-06, "loss": 0.2584, "step": 5211 }, { "epoch": 0.41696, "grad_norm": 1.8867225646972656, "learning_rate": 6.2967270782598735e-06, "loss": 0.4578, "step": 5212 }, { "epoch": 0.41704, "grad_norm": 1.5454792976379395, "learning_rate": 6.295512424801009e-06, "loss": 0.3402, "step": 5213 }, { "epoch": 0.41712, "grad_norm": 1.5802242755889893, "learning_rate": 6.2942976893794085e-06, "loss": 0.2926, "step": 5214 }, { "epoch": 0.4172, "grad_norm": 1.3554202318191528, "learning_rate": 6.293082872071923e-06, "loss": 0.3325, "step": 5215 }, { "epoch": 0.41728, "grad_norm": 1.8882520198822021, "learning_rate": 6.291867972955411e-06, "loss": 0.463, "step": 5216 }, { "epoch": 0.41736, "grad_norm": 1.6655519008636475, "learning_rate": 6.290652992106735e-06, "loss": 0.3663, "step": 5217 }, { "epoch": 0.41744, "grad_norm": 1.787269115447998, "learning_rate": 6.289437929602763e-06, "loss": 0.358, "step": 5218 }, { "epoch": 0.41752, "grad_norm": 1.5246529579162598, "learning_rate": 6.288222785520368e-06, "loss": 0.3243, "step": 5219 }, { "epoch": 0.4176, "grad_norm": 1.75812828540802, "learning_rate": 6.287007559936426e-06, "loss": 0.3917, "step": 5220 }, { "epoch": 0.41768, "grad_norm": 1.2533934116363525, "learning_rate": 6.285792252927826e-06, "loss": 0.3793, "step": 5221 }, { "epoch": 0.41776, "grad_norm": 1.364107608795166, "learning_rate": 6.284576864571449e-06, "loss": 0.3192, "step": 5222 }, { "epoch": 0.41784, "grad_norm": 1.3123117685317993, "learning_rate": 6.283361394944193e-06, "loss": 0.3421, "step": 5223 }, { "epoch": 0.41792, "grad_norm": 1.1852989196777344, "learning_rate": 6.282145844122956e-06, "loss": 0.2568, "step": 5224 }, { "epoch": 0.418, "grad_norm": 1.9244329929351807, "learning_rate": 6.2809302121846415e-06, "loss": 0.5014, "step": 5225 }, { "epoch": 0.41808, "grad_norm": 1.4049652814865112, "learning_rate": 6.279714499206157e-06, "loss": 0.3108, "step": 5226 }, { "epoch": 0.41816, "grad_norm": 1.9336098432540894, "learning_rate": 6.27849870526442e-06, "loss": 0.3474, "step": 5227 }, { "epoch": 0.41824, "grad_norm": 1.9458836317062378, "learning_rate": 6.277282830436346e-06, "loss": 0.3939, "step": 5228 }, { "epoch": 0.41832, "grad_norm": 2.153730869293213, "learning_rate": 6.276066874798862e-06, "loss": 0.4109, "step": 5229 }, { "epoch": 0.4184, "grad_norm": 1.5371249914169312, "learning_rate": 6.274850838428896e-06, "loss": 0.3472, "step": 5230 }, { "epoch": 0.41848, "grad_norm": 1.760744571685791, "learning_rate": 6.273634721403385e-06, "loss": 0.3729, "step": 5231 }, { "epoch": 0.41856, "grad_norm": 1.7278648614883423, "learning_rate": 6.272418523799266e-06, "loss": 0.3745, "step": 5232 }, { "epoch": 0.41864, "grad_norm": 1.7429898977279663, "learning_rate": 6.271202245693484e-06, "loss": 0.3522, "step": 5233 }, { "epoch": 0.41872, "grad_norm": 1.5041425228118896, "learning_rate": 6.269985887162988e-06, "loss": 0.3114, "step": 5234 }, { "epoch": 0.4188, "grad_norm": 1.4007189273834229, "learning_rate": 6.268769448284736e-06, "loss": 0.2983, "step": 5235 }, { "epoch": 0.41888, "grad_norm": 1.4600435495376587, "learning_rate": 6.267552929135688e-06, "loss": 0.3146, "step": 5236 }, { "epoch": 0.41896, "grad_norm": 1.7335883378982544, "learning_rate": 6.266336329792804e-06, "loss": 0.5255, "step": 5237 }, { "epoch": 0.41904, "grad_norm": 1.3453007936477661, "learning_rate": 6.265119650333059e-06, "loss": 0.3394, "step": 5238 }, { "epoch": 0.41912, "grad_norm": 1.1924803256988525, "learning_rate": 6.263902890833427e-06, "loss": 0.2937, "step": 5239 }, { "epoch": 0.4192, "grad_norm": 1.4555060863494873, "learning_rate": 6.2626860513708875e-06, "loss": 0.2826, "step": 5240 }, { "epoch": 0.41928, "grad_norm": 1.6543127298355103, "learning_rate": 6.261469132022426e-06, "loss": 0.3925, "step": 5241 }, { "epoch": 0.41936, "grad_norm": 1.2936607599258423, "learning_rate": 6.260252132865035e-06, "loss": 0.2698, "step": 5242 }, { "epoch": 0.41944, "grad_norm": 1.5900158882141113, "learning_rate": 6.259035053975708e-06, "loss": 0.35, "step": 5243 }, { "epoch": 0.41952, "grad_norm": 1.5317350625991821, "learning_rate": 6.257817895431446e-06, "loss": 0.3286, "step": 5244 }, { "epoch": 0.4196, "grad_norm": 1.4321229457855225, "learning_rate": 6.256600657309254e-06, "loss": 0.2899, "step": 5245 }, { "epoch": 0.41968, "grad_norm": 1.6110750436782837, "learning_rate": 6.255383339686143e-06, "loss": 0.3403, "step": 5246 }, { "epoch": 0.41976, "grad_norm": 1.6724129915237427, "learning_rate": 6.254165942639128e-06, "loss": 0.3894, "step": 5247 }, { "epoch": 0.41984, "grad_norm": 1.6472164392471313, "learning_rate": 6.252948466245232e-06, "loss": 0.4126, "step": 5248 }, { "epoch": 0.41992, "grad_norm": 1.4509929418563843, "learning_rate": 6.251730910581478e-06, "loss": 0.3197, "step": 5249 }, { "epoch": 0.42, "grad_norm": 1.9968012571334839, "learning_rate": 6.250513275724896e-06, "loss": 0.3536, "step": 5250 }, { "epoch": 0.42008, "grad_norm": 1.2318665981292725, "learning_rate": 6.249295561752525e-06, "loss": 0.322, "step": 5251 }, { "epoch": 0.42016, "grad_norm": 1.3581112623214722, "learning_rate": 6.248077768741404e-06, "loss": 0.2863, "step": 5252 }, { "epoch": 0.42024, "grad_norm": 1.4493217468261719, "learning_rate": 6.246859896768579e-06, "loss": 0.2835, "step": 5253 }, { "epoch": 0.42032, "grad_norm": 1.253295660018921, "learning_rate": 6.245641945911099e-06, "loss": 0.2689, "step": 5254 }, { "epoch": 0.4204, "grad_norm": 1.5289180278778076, "learning_rate": 6.244423916246023e-06, "loss": 0.3176, "step": 5255 }, { "epoch": 0.42048, "grad_norm": 1.4782484769821167, "learning_rate": 6.243205807850408e-06, "loss": 0.3583, "step": 5256 }, { "epoch": 0.42056, "grad_norm": 1.5398263931274414, "learning_rate": 6.241987620801322e-06, "loss": 0.3898, "step": 5257 }, { "epoch": 0.42064, "grad_norm": 1.4873570203781128, "learning_rate": 6.240769355175834e-06, "loss": 0.3556, "step": 5258 }, { "epoch": 0.42072, "grad_norm": 1.5727860927581787, "learning_rate": 6.239551011051021e-06, "loss": 0.2808, "step": 5259 }, { "epoch": 0.4208, "grad_norm": 1.371330738067627, "learning_rate": 6.2383325885039635e-06, "loss": 0.2745, "step": 5260 }, { "epoch": 0.42088, "grad_norm": 1.5367326736450195, "learning_rate": 6.237114087611747e-06, "loss": 0.4118, "step": 5261 }, { "epoch": 0.42096, "grad_norm": 1.6745377779006958, "learning_rate": 6.23589550845146e-06, "loss": 0.3761, "step": 5262 }, { "epoch": 0.42104, "grad_norm": 1.4240877628326416, "learning_rate": 6.234676851100201e-06, "loss": 0.3344, "step": 5263 }, { "epoch": 0.42112, "grad_norm": 1.6593492031097412, "learning_rate": 6.233458115635067e-06, "loss": 0.4413, "step": 5264 }, { "epoch": 0.4212, "grad_norm": 1.6215040683746338, "learning_rate": 6.232239302133167e-06, "loss": 0.4039, "step": 5265 }, { "epoch": 0.42128, "grad_norm": 1.453888177871704, "learning_rate": 6.23102041067161e-06, "loss": 0.253, "step": 5266 }, { "epoch": 0.42136, "grad_norm": 1.473840355873108, "learning_rate": 6.22980144132751e-06, "loss": 0.2827, "step": 5267 }, { "epoch": 0.42144, "grad_norm": 2.1219794750213623, "learning_rate": 6.2285823941779864e-06, "loss": 0.374, "step": 5268 }, { "epoch": 0.42152, "grad_norm": 1.2442418336868286, "learning_rate": 6.227363269300166e-06, "loss": 0.2711, "step": 5269 }, { "epoch": 0.4216, "grad_norm": 1.3598564863204956, "learning_rate": 6.226144066771179e-06, "loss": 0.2622, "step": 5270 }, { "epoch": 0.42168, "grad_norm": 2.1796743869781494, "learning_rate": 6.224924786668161e-06, "loss": 0.4318, "step": 5271 }, { "epoch": 0.42176, "grad_norm": 1.6060525178909302, "learning_rate": 6.2237054290682475e-06, "loss": 0.2812, "step": 5272 }, { "epoch": 0.42184, "grad_norm": 1.5547593832015991, "learning_rate": 6.2224859940485874e-06, "loss": 0.3616, "step": 5273 }, { "epoch": 0.42192, "grad_norm": 1.3605103492736816, "learning_rate": 6.221266481686328e-06, "loss": 0.2399, "step": 5274 }, { "epoch": 0.422, "grad_norm": 1.3548734188079834, "learning_rate": 6.220046892058626e-06, "loss": 0.2625, "step": 5275 }, { "epoch": 0.42208, "grad_norm": 1.6121068000793457, "learning_rate": 6.218827225242638e-06, "loss": 0.3836, "step": 5276 }, { "epoch": 0.42216, "grad_norm": 1.4999924898147583, "learning_rate": 6.217607481315531e-06, "loss": 0.3994, "step": 5277 }, { "epoch": 0.42224, "grad_norm": 1.484944462776184, "learning_rate": 6.216387660354472e-06, "loss": 0.3187, "step": 5278 }, { "epoch": 0.42232, "grad_norm": 1.4199094772338867, "learning_rate": 6.215167762436637e-06, "loss": 0.3337, "step": 5279 }, { "epoch": 0.4224, "grad_norm": 1.7457900047302246, "learning_rate": 6.213947787639203e-06, "loss": 0.3496, "step": 5280 }, { "epoch": 0.42248, "grad_norm": 1.5127125978469849, "learning_rate": 6.212727736039354e-06, "loss": 0.3331, "step": 5281 }, { "epoch": 0.42256, "grad_norm": 1.618870735168457, "learning_rate": 6.211507607714277e-06, "loss": 0.2917, "step": 5282 }, { "epoch": 0.42264, "grad_norm": 1.5503782033920288, "learning_rate": 6.210287402741171e-06, "loss": 0.3699, "step": 5283 }, { "epoch": 0.42272, "grad_norm": 1.6964318752288818, "learning_rate": 6.209067121197228e-06, "loss": 0.3575, "step": 5284 }, { "epoch": 0.4228, "grad_norm": 1.616135835647583, "learning_rate": 6.207846763159655e-06, "loss": 0.3734, "step": 5285 }, { "epoch": 0.42288, "grad_norm": 1.5970280170440674, "learning_rate": 6.206626328705659e-06, "loss": 0.3787, "step": 5286 }, { "epoch": 0.42296, "grad_norm": 1.7362499237060547, "learning_rate": 6.205405817912452e-06, "loss": 0.3442, "step": 5287 }, { "epoch": 0.42304, "grad_norm": 1.6331170797348022, "learning_rate": 6.204185230857252e-06, "loss": 0.3461, "step": 5288 }, { "epoch": 0.42312, "grad_norm": 2.0485804080963135, "learning_rate": 6.202964567617283e-06, "loss": 0.3591, "step": 5289 }, { "epoch": 0.4232, "grad_norm": 1.9766350984573364, "learning_rate": 6.20174382826977e-06, "loss": 0.4748, "step": 5290 }, { "epoch": 0.42328, "grad_norm": 1.2820550203323364, "learning_rate": 6.200523012891945e-06, "loss": 0.2704, "step": 5291 }, { "epoch": 0.42336, "grad_norm": 1.3562285900115967, "learning_rate": 6.199302121561048e-06, "loss": 0.329, "step": 5292 }, { "epoch": 0.42344, "grad_norm": 1.444642186164856, "learning_rate": 6.198081154354317e-06, "loss": 0.4026, "step": 5293 }, { "epoch": 0.42352, "grad_norm": 1.574292778968811, "learning_rate": 6.196860111349001e-06, "loss": 0.299, "step": 5294 }, { "epoch": 0.4236, "grad_norm": 1.5647331476211548, "learning_rate": 6.19563899262235e-06, "loss": 0.2892, "step": 5295 }, { "epoch": 0.42368, "grad_norm": 1.810318112373352, "learning_rate": 6.194417798251622e-06, "loss": 0.3837, "step": 5296 }, { "epoch": 0.42376, "grad_norm": 1.7629601955413818, "learning_rate": 6.193196528314073e-06, "loss": 0.3893, "step": 5297 }, { "epoch": 0.42384, "grad_norm": 1.5911134481430054, "learning_rate": 6.191975182886976e-06, "loss": 0.3948, "step": 5298 }, { "epoch": 0.42392, "grad_norm": 1.661783218383789, "learning_rate": 6.1907537620475955e-06, "loss": 0.3691, "step": 5299 }, { "epoch": 0.424, "grad_norm": 1.1115741729736328, "learning_rate": 6.189532265873209e-06, "loss": 0.2339, "step": 5300 }, { "epoch": 0.42408, "grad_norm": 1.4616998434066772, "learning_rate": 6.188310694441097e-06, "loss": 0.2663, "step": 5301 }, { "epoch": 0.42416, "grad_norm": 1.6125479936599731, "learning_rate": 6.187089047828542e-06, "loss": 0.3089, "step": 5302 }, { "epoch": 0.42424, "grad_norm": 1.8839313983917236, "learning_rate": 6.1858673261128364e-06, "loss": 0.4514, "step": 5303 }, { "epoch": 0.42432, "grad_norm": 1.655595302581787, "learning_rate": 6.184645529371272e-06, "loss": 0.3448, "step": 5304 }, { "epoch": 0.4244, "grad_norm": 1.311193585395813, "learning_rate": 6.183423657681149e-06, "loss": 0.2564, "step": 5305 }, { "epoch": 0.42448, "grad_norm": 2.025151491165161, "learning_rate": 6.182201711119771e-06, "loss": 0.5696, "step": 5306 }, { "epoch": 0.42456, "grad_norm": 1.321874737739563, "learning_rate": 6.180979689764447e-06, "loss": 0.3092, "step": 5307 }, { "epoch": 0.42464, "grad_norm": 2.123602867126465, "learning_rate": 6.179757593692488e-06, "loss": 0.4035, "step": 5308 }, { "epoch": 0.42472, "grad_norm": 1.5167906284332275, "learning_rate": 6.178535422981216e-06, "loss": 0.3193, "step": 5309 }, { "epoch": 0.4248, "grad_norm": 1.7157925367355347, "learning_rate": 6.17731317770795e-06, "loss": 0.3725, "step": 5310 }, { "epoch": 0.42488, "grad_norm": 1.4427510499954224, "learning_rate": 6.176090857950018e-06, "loss": 0.2961, "step": 5311 }, { "epoch": 0.42496, "grad_norm": 1.3995834589004517, "learning_rate": 6.174868463784752e-06, "loss": 0.3033, "step": 5312 }, { "epoch": 0.42504, "grad_norm": 1.511171817779541, "learning_rate": 6.173645995289491e-06, "loss": 0.3218, "step": 5313 }, { "epoch": 0.42512, "grad_norm": 1.7538864612579346, "learning_rate": 6.172423452541574e-06, "loss": 0.4702, "step": 5314 }, { "epoch": 0.4252, "grad_norm": 1.4485112428665161, "learning_rate": 6.1712008356183485e-06, "loss": 0.2855, "step": 5315 }, { "epoch": 0.42528, "grad_norm": 1.475102186203003, "learning_rate": 6.169978144597164e-06, "loss": 0.3669, "step": 5316 }, { "epoch": 0.42536, "grad_norm": 1.569522500038147, "learning_rate": 6.168755379555378e-06, "loss": 0.3393, "step": 5317 }, { "epoch": 0.42544, "grad_norm": 1.7826415300369263, "learning_rate": 6.167532540570351e-06, "loss": 0.3331, "step": 5318 }, { "epoch": 0.42552, "grad_norm": 1.3612468242645264, "learning_rate": 6.166309627719444e-06, "loss": 0.3009, "step": 5319 }, { "epoch": 0.4256, "grad_norm": 1.4943021535873413, "learning_rate": 6.165086641080032e-06, "loss": 0.3387, "step": 5320 }, { "epoch": 0.42568, "grad_norm": 1.629370927810669, "learning_rate": 6.163863580729484e-06, "loss": 0.3885, "step": 5321 }, { "epoch": 0.42576, "grad_norm": 2.004427433013916, "learning_rate": 6.162640446745184e-06, "loss": 0.4166, "step": 5322 }, { "epoch": 0.42584, "grad_norm": 1.5527446269989014, "learning_rate": 6.161417239204512e-06, "loss": 0.3396, "step": 5323 }, { "epoch": 0.42592, "grad_norm": 1.4790587425231934, "learning_rate": 6.160193958184858e-06, "loss": 0.3218, "step": 5324 }, { "epoch": 0.426, "grad_norm": 1.5748834609985352, "learning_rate": 6.158970603763615e-06, "loss": 0.3968, "step": 5325 }, { "epoch": 0.42608, "grad_norm": 1.7657493352890015, "learning_rate": 6.157747176018177e-06, "loss": 0.3378, "step": 5326 }, { "epoch": 0.42616, "grad_norm": 2.0443902015686035, "learning_rate": 6.15652367502595e-06, "loss": 0.4723, "step": 5327 }, { "epoch": 0.42624, "grad_norm": 1.6636515855789185, "learning_rate": 6.155300100864341e-06, "loss": 0.3116, "step": 5328 }, { "epoch": 0.42632, "grad_norm": 1.5545495748519897, "learning_rate": 6.154076453610759e-06, "loss": 0.3614, "step": 5329 }, { "epoch": 0.4264, "grad_norm": 1.6116411685943604, "learning_rate": 6.152852733342623e-06, "loss": 0.3714, "step": 5330 }, { "epoch": 0.42648, "grad_norm": 1.3033878803253174, "learning_rate": 6.151628940137351e-06, "loss": 0.3168, "step": 5331 }, { "epoch": 0.42656, "grad_norm": 1.3937110900878906, "learning_rate": 6.150405074072369e-06, "loss": 0.274, "step": 5332 }, { "epoch": 0.42664, "grad_norm": 1.9616461992263794, "learning_rate": 6.1491811352251085e-06, "loss": 0.4114, "step": 5333 }, { "epoch": 0.42672, "grad_norm": 1.5841161012649536, "learning_rate": 6.1479571236730005e-06, "loss": 0.3684, "step": 5334 }, { "epoch": 0.4268, "grad_norm": 1.403443455696106, "learning_rate": 6.146733039493487e-06, "loss": 0.2633, "step": 5335 }, { "epoch": 0.42688, "grad_norm": 1.5101227760314941, "learning_rate": 6.145508882764013e-06, "loss": 0.2751, "step": 5336 }, { "epoch": 0.42696, "grad_norm": 1.6489311456680298, "learning_rate": 6.144284653562024e-06, "loss": 0.3991, "step": 5337 }, { "epoch": 0.42704, "grad_norm": 1.270580530166626, "learning_rate": 6.143060351964973e-06, "loss": 0.2707, "step": 5338 }, { "epoch": 0.42712, "grad_norm": 1.7063907384872437, "learning_rate": 6.141835978050318e-06, "loss": 0.5227, "step": 5339 }, { "epoch": 0.4272, "grad_norm": 1.2827900648117065, "learning_rate": 6.140611531895522e-06, "loss": 0.3149, "step": 5340 }, { "epoch": 0.42728, "grad_norm": 1.45085608959198, "learning_rate": 6.139387013578051e-06, "loss": 0.3797, "step": 5341 }, { "epoch": 0.42736, "grad_norm": 1.335211992263794, "learning_rate": 6.138162423175375e-06, "loss": 0.3144, "step": 5342 }, { "epoch": 0.42744, "grad_norm": 1.3709321022033691, "learning_rate": 6.136937760764972e-06, "loss": 0.2786, "step": 5343 }, { "epoch": 0.42752, "grad_norm": 1.9491933584213257, "learning_rate": 6.13571302642432e-06, "loss": 0.3513, "step": 5344 }, { "epoch": 0.4276, "grad_norm": 1.6201810836791992, "learning_rate": 6.1344882202309075e-06, "loss": 0.3616, "step": 5345 }, { "epoch": 0.42768, "grad_norm": 1.6798819303512573, "learning_rate": 6.133263342262219e-06, "loss": 0.3304, "step": 5346 }, { "epoch": 0.42776, "grad_norm": 1.485134243965149, "learning_rate": 6.132038392595751e-06, "loss": 0.2942, "step": 5347 }, { "epoch": 0.42784, "grad_norm": 1.5289043188095093, "learning_rate": 6.130813371309002e-06, "loss": 0.311, "step": 5348 }, { "epoch": 0.42792, "grad_norm": 1.708020567893982, "learning_rate": 6.129588278479475e-06, "loss": 0.3565, "step": 5349 }, { "epoch": 0.428, "grad_norm": 1.249941349029541, "learning_rate": 6.1283631141846755e-06, "loss": 0.239, "step": 5350 }, { "epoch": 0.42808, "grad_norm": 1.3397772312164307, "learning_rate": 6.127137878502118e-06, "loss": 0.3618, "step": 5351 }, { "epoch": 0.42816, "grad_norm": 1.3979480266571045, "learning_rate": 6.125912571509319e-06, "loss": 0.2997, "step": 5352 }, { "epoch": 0.42824, "grad_norm": 1.456049919128418, "learning_rate": 6.124687193283799e-06, "loss": 0.315, "step": 5353 }, { "epoch": 0.42832, "grad_norm": 1.965668797492981, "learning_rate": 6.123461743903084e-06, "loss": 0.4174, "step": 5354 }, { "epoch": 0.4284, "grad_norm": 1.4012924432754517, "learning_rate": 6.122236223444703e-06, "loss": 0.325, "step": 5355 }, { "epoch": 0.42848, "grad_norm": 1.5004931688308716, "learning_rate": 6.121010631986192e-06, "loss": 0.3237, "step": 5356 }, { "epoch": 0.42856, "grad_norm": 1.6268855333328247, "learning_rate": 6.119784969605088e-06, "loss": 0.4094, "step": 5357 }, { "epoch": 0.42864, "grad_norm": 1.6038322448730469, "learning_rate": 6.1185592363789355e-06, "loss": 0.2773, "step": 5358 }, { "epoch": 0.42872, "grad_norm": 1.811691403388977, "learning_rate": 6.117333432385283e-06, "loss": 0.3442, "step": 5359 }, { "epoch": 0.4288, "grad_norm": 1.5288585424423218, "learning_rate": 6.116107557701685e-06, "loss": 0.3593, "step": 5360 }, { "epoch": 0.42888, "grad_norm": 1.7061458826065063, "learning_rate": 6.114881612405694e-06, "loss": 0.4021, "step": 5361 }, { "epoch": 0.42896, "grad_norm": 1.6404433250427246, "learning_rate": 6.1136555965748735e-06, "loss": 0.3261, "step": 5362 }, { "epoch": 0.42904, "grad_norm": 1.6574904918670654, "learning_rate": 6.11242951028679e-06, "loss": 0.4901, "step": 5363 }, { "epoch": 0.42912, "grad_norm": 1.2963271141052246, "learning_rate": 6.111203353619014e-06, "loss": 0.2512, "step": 5364 }, { "epoch": 0.4292, "grad_norm": 1.7801556587219238, "learning_rate": 6.109977126649121e-06, "loss": 0.3984, "step": 5365 }, { "epoch": 0.42928, "grad_norm": 1.7426446676254272, "learning_rate": 6.108750829454688e-06, "loss": 0.3063, "step": 5366 }, { "epoch": 0.42936, "grad_norm": 1.6115596294403076, "learning_rate": 6.1075244621133e-06, "loss": 0.3511, "step": 5367 }, { "epoch": 0.42944, "grad_norm": 1.2986208200454712, "learning_rate": 6.106298024702546e-06, "loss": 0.3206, "step": 5368 }, { "epoch": 0.42952, "grad_norm": 1.9597961902618408, "learning_rate": 6.105071517300017e-06, "loss": 0.3735, "step": 5369 }, { "epoch": 0.4296, "grad_norm": 1.4968289136886597, "learning_rate": 6.10384493998331e-06, "loss": 0.2758, "step": 5370 }, { "epoch": 0.42968, "grad_norm": 1.4992220401763916, "learning_rate": 6.102618292830029e-06, "loss": 0.3457, "step": 5371 }, { "epoch": 0.42976, "grad_norm": 1.5327579975128174, "learning_rate": 6.1013915759177765e-06, "loss": 0.3693, "step": 5372 }, { "epoch": 0.42984, "grad_norm": 1.6249293088912964, "learning_rate": 6.1001647893241634e-06, "loss": 0.3094, "step": 5373 }, { "epoch": 0.42992, "grad_norm": 1.7367304563522339, "learning_rate": 6.098937933126806e-06, "loss": 0.341, "step": 5374 }, { "epoch": 0.43, "grad_norm": 1.3340009450912476, "learning_rate": 6.097711007403323e-06, "loss": 0.279, "step": 5375 }, { "epoch": 0.43008, "grad_norm": 1.6486116647720337, "learning_rate": 6.096484012231337e-06, "loss": 0.3304, "step": 5376 }, { "epoch": 0.43016, "grad_norm": 1.1886416673660278, "learning_rate": 6.095256947688478e-06, "loss": 0.2639, "step": 5377 }, { "epoch": 0.43024, "grad_norm": 1.6678515672683716, "learning_rate": 6.094029813852376e-06, "loss": 0.3143, "step": 5378 }, { "epoch": 0.43032, "grad_norm": 1.7223871946334839, "learning_rate": 6.0928026108006675e-06, "loss": 0.4146, "step": 5379 }, { "epoch": 0.4304, "grad_norm": 1.9803104400634766, "learning_rate": 6.091575338610994e-06, "loss": 0.4194, "step": 5380 }, { "epoch": 0.43048, "grad_norm": 1.469686508178711, "learning_rate": 6.090347997361002e-06, "loss": 0.3625, "step": 5381 }, { "epoch": 0.43056, "grad_norm": 1.2683942317962646, "learning_rate": 6.089120587128341e-06, "loss": 0.258, "step": 5382 }, { "epoch": 0.43064, "grad_norm": 1.4345426559448242, "learning_rate": 6.087893107990665e-06, "loss": 0.3373, "step": 5383 }, { "epoch": 0.43072, "grad_norm": 1.7910370826721191, "learning_rate": 6.0866655600256305e-06, "loss": 0.4067, "step": 5384 }, { "epoch": 0.4308, "grad_norm": 1.5200092792510986, "learning_rate": 6.085437943310902e-06, "loss": 0.2964, "step": 5385 }, { "epoch": 0.43088, "grad_norm": 1.9939285516738892, "learning_rate": 6.084210257924148e-06, "loss": 0.3602, "step": 5386 }, { "epoch": 0.43096, "grad_norm": 1.6643867492675781, "learning_rate": 6.082982503943038e-06, "loss": 0.3337, "step": 5387 }, { "epoch": 0.43104, "grad_norm": 1.9779936075210571, "learning_rate": 6.081754681445249e-06, "loss": 0.3836, "step": 5388 }, { "epoch": 0.43112, "grad_norm": 1.9725874662399292, "learning_rate": 6.080526790508461e-06, "loss": 0.3404, "step": 5389 }, { "epoch": 0.4312, "grad_norm": 1.6242984533309937, "learning_rate": 6.079298831210357e-06, "loss": 0.3975, "step": 5390 }, { "epoch": 0.43128, "grad_norm": 1.8685696125030518, "learning_rate": 6.078070803628629e-06, "loss": 0.3465, "step": 5391 }, { "epoch": 0.43136, "grad_norm": 1.4240777492523193, "learning_rate": 6.076842707840969e-06, "loss": 0.2717, "step": 5392 }, { "epoch": 0.43144, "grad_norm": 1.6460999250411987, "learning_rate": 6.0756145439250725e-06, "loss": 0.4144, "step": 5393 }, { "epoch": 0.43152, "grad_norm": 1.7656724452972412, "learning_rate": 6.074386311958643e-06, "loss": 0.383, "step": 5394 }, { "epoch": 0.4316, "grad_norm": 1.7070986032485962, "learning_rate": 6.073158012019388e-06, "loss": 0.3203, "step": 5395 }, { "epoch": 0.43168, "grad_norm": 2.166088581085205, "learning_rate": 6.071929644185014e-06, "loss": 0.454, "step": 5396 }, { "epoch": 0.43176, "grad_norm": 1.7778912782669067, "learning_rate": 6.07070120853324e-06, "loss": 0.3748, "step": 5397 }, { "epoch": 0.43184, "grad_norm": 1.4228088855743408, "learning_rate": 6.069472705141781e-06, "loss": 0.3252, "step": 5398 }, { "epoch": 0.43192, "grad_norm": 1.510135293006897, "learning_rate": 6.068244134088363e-06, "loss": 0.4119, "step": 5399 }, { "epoch": 0.432, "grad_norm": 1.453439712524414, "learning_rate": 6.067015495450715e-06, "loss": 0.2788, "step": 5400 }, { "epoch": 0.43208, "grad_norm": 1.4725075960159302, "learning_rate": 6.065786789306566e-06, "loss": 0.3727, "step": 5401 }, { "epoch": 0.43216, "grad_norm": 1.9456934928894043, "learning_rate": 6.064558015733653e-06, "loss": 0.3712, "step": 5402 }, { "epoch": 0.43224, "grad_norm": 1.5379313230514526, "learning_rate": 6.063329174809715e-06, "loss": 0.3203, "step": 5403 }, { "epoch": 0.43232, "grad_norm": 1.5046453475952148, "learning_rate": 6.0621002666124995e-06, "loss": 0.3101, "step": 5404 }, { "epoch": 0.4324, "grad_norm": 1.9868191480636597, "learning_rate": 6.060871291219753e-06, "loss": 0.3739, "step": 5405 }, { "epoch": 0.43248, "grad_norm": 1.5251084566116333, "learning_rate": 6.0596422487092295e-06, "loss": 0.322, "step": 5406 }, { "epoch": 0.43256, "grad_norm": 1.3554657697677612, "learning_rate": 6.058413139158687e-06, "loss": 0.3143, "step": 5407 }, { "epoch": 0.43264, "grad_norm": 1.592686414718628, "learning_rate": 6.0571839626458875e-06, "loss": 0.3187, "step": 5408 }, { "epoch": 0.43272, "grad_norm": 1.3215630054473877, "learning_rate": 6.055954719248595e-06, "loss": 0.308, "step": 5409 }, { "epoch": 0.4328, "grad_norm": 1.2852091789245605, "learning_rate": 6.054725409044579e-06, "loss": 0.2711, "step": 5410 }, { "epoch": 0.43288, "grad_norm": 1.8097467422485352, "learning_rate": 6.0534960321116175e-06, "loss": 0.3427, "step": 5411 }, { "epoch": 0.43296, "grad_norm": 1.3292269706726074, "learning_rate": 6.052266588527488e-06, "loss": 0.3537, "step": 5412 }, { "epoch": 0.43304, "grad_norm": 1.6236307621002197, "learning_rate": 6.051037078369972e-06, "loss": 0.4301, "step": 5413 }, { "epoch": 0.43312, "grad_norm": 1.4823275804519653, "learning_rate": 6.049807501716856e-06, "loss": 0.3199, "step": 5414 }, { "epoch": 0.4332, "grad_norm": 1.5742340087890625, "learning_rate": 6.048577858645932e-06, "loss": 0.3312, "step": 5415 }, { "epoch": 0.43328, "grad_norm": 1.4714792966842651, "learning_rate": 6.047348149234995e-06, "loss": 0.3228, "step": 5416 }, { "epoch": 0.43336, "grad_norm": 1.3467466831207275, "learning_rate": 6.046118373561845e-06, "loss": 0.3045, "step": 5417 }, { "epoch": 0.43344, "grad_norm": 1.4545137882232666, "learning_rate": 6.044888531704287e-06, "loss": 0.3308, "step": 5418 }, { "epoch": 0.43352, "grad_norm": 1.3145484924316406, "learning_rate": 6.043658623740127e-06, "loss": 0.2933, "step": 5419 }, { "epoch": 0.4336, "grad_norm": 1.6407479047775269, "learning_rate": 6.042428649747177e-06, "loss": 0.3418, "step": 5420 }, { "epoch": 0.43368, "grad_norm": 1.6189351081848145, "learning_rate": 6.041198609803256e-06, "loss": 0.3938, "step": 5421 }, { "epoch": 0.43376, "grad_norm": 1.1933389902114868, "learning_rate": 6.039968503986182e-06, "loss": 0.2499, "step": 5422 }, { "epoch": 0.43384, "grad_norm": 1.6682744026184082, "learning_rate": 6.038738332373781e-06, "loss": 0.3783, "step": 5423 }, { "epoch": 0.43392, "grad_norm": 1.450366497039795, "learning_rate": 6.037508095043881e-06, "loss": 0.3095, "step": 5424 }, { "epoch": 0.434, "grad_norm": 1.6315852403640747, "learning_rate": 6.036277792074316e-06, "loss": 0.3228, "step": 5425 }, { "epoch": 0.43408, "grad_norm": 1.3595662117004395, "learning_rate": 6.035047423542922e-06, "loss": 0.2532, "step": 5426 }, { "epoch": 0.43416, "grad_norm": 1.3640127182006836, "learning_rate": 6.033816989527541e-06, "loss": 0.3054, "step": 5427 }, { "epoch": 0.43424, "grad_norm": 1.3128430843353271, "learning_rate": 6.032586490106018e-06, "loss": 0.2737, "step": 5428 }, { "epoch": 0.43432, "grad_norm": 1.4516959190368652, "learning_rate": 6.0313559253562016e-06, "loss": 0.3047, "step": 5429 }, { "epoch": 0.4344, "grad_norm": 1.5541858673095703, "learning_rate": 6.030125295355949e-06, "loss": 0.2977, "step": 5430 }, { "epoch": 0.43448, "grad_norm": 1.3899632692337036, "learning_rate": 6.028894600183114e-06, "loss": 0.2981, "step": 5431 }, { "epoch": 0.43456, "grad_norm": 1.6935724020004272, "learning_rate": 6.027663839915561e-06, "loss": 0.4806, "step": 5432 }, { "epoch": 0.43464, "grad_norm": 1.4013090133666992, "learning_rate": 6.026433014631155e-06, "loss": 0.3428, "step": 5433 }, { "epoch": 0.43472, "grad_norm": 1.6702803373336792, "learning_rate": 6.025202124407766e-06, "loss": 0.3447, "step": 5434 }, { "epoch": 0.4348, "grad_norm": 1.4864176511764526, "learning_rate": 6.023971169323272e-06, "loss": 0.3252, "step": 5435 }, { "epoch": 0.43488, "grad_norm": 1.4419050216674805, "learning_rate": 6.022740149455547e-06, "loss": 0.314, "step": 5436 }, { "epoch": 0.43496, "grad_norm": 1.489224910736084, "learning_rate": 6.021509064882473e-06, "loss": 0.353, "step": 5437 }, { "epoch": 0.43504, "grad_norm": 1.4233548641204834, "learning_rate": 6.0202779156819405e-06, "loss": 0.3279, "step": 5438 }, { "epoch": 0.43512, "grad_norm": 1.631506323814392, "learning_rate": 6.019046701931836e-06, "loss": 0.4391, "step": 5439 }, { "epoch": 0.4352, "grad_norm": 1.7712832689285278, "learning_rate": 6.0178154237100575e-06, "loss": 0.3618, "step": 5440 }, { "epoch": 0.43528, "grad_norm": 1.745080828666687, "learning_rate": 6.016584081094503e-06, "loss": 0.5306, "step": 5441 }, { "epoch": 0.43536, "grad_norm": 1.2281311750411987, "learning_rate": 6.015352674163075e-06, "loss": 0.2491, "step": 5442 }, { "epoch": 0.43544, "grad_norm": 1.725820779800415, "learning_rate": 6.014121202993682e-06, "loss": 0.3775, "step": 5443 }, { "epoch": 0.43552, "grad_norm": 1.3258579969406128, "learning_rate": 6.012889667664231e-06, "loss": 0.2745, "step": 5444 }, { "epoch": 0.4356, "grad_norm": 1.2049994468688965, "learning_rate": 6.0116580682526415e-06, "loss": 0.2574, "step": 5445 }, { "epoch": 0.43568, "grad_norm": 1.1984772682189941, "learning_rate": 6.010426404836831e-06, "loss": 0.283, "step": 5446 }, { "epoch": 0.43576, "grad_norm": 1.4736144542694092, "learning_rate": 6.009194677494723e-06, "loss": 0.3, "step": 5447 }, { "epoch": 0.43584, "grad_norm": 1.5378073453903198, "learning_rate": 6.007962886304245e-06, "loss": 0.3541, "step": 5448 }, { "epoch": 0.43592, "grad_norm": 1.6598979234695435, "learning_rate": 6.006731031343327e-06, "loss": 0.3801, "step": 5449 }, { "epoch": 0.436, "grad_norm": 1.6594270467758179, "learning_rate": 6.0054991126899055e-06, "loss": 0.4225, "step": 5450 }, { "epoch": 0.43608, "grad_norm": 1.6767255067825317, "learning_rate": 6.004267130421918e-06, "loss": 0.4542, "step": 5451 }, { "epoch": 0.43616, "grad_norm": 1.594008207321167, "learning_rate": 6.003035084617311e-06, "loss": 0.2978, "step": 5452 }, { "epoch": 0.43624, "grad_norm": 1.3313404321670532, "learning_rate": 6.0018029753540295e-06, "loss": 0.2959, "step": 5453 }, { "epoch": 0.43632, "grad_norm": 1.4482040405273438, "learning_rate": 6.0005708027100274e-06, "loss": 0.4336, "step": 5454 }, { "epoch": 0.4364, "grad_norm": 1.7941350936889648, "learning_rate": 5.999338566763258e-06, "loss": 0.3423, "step": 5455 }, { "epoch": 0.43648, "grad_norm": 1.9843556880950928, "learning_rate": 5.998106267591679e-06, "loss": 0.4683, "step": 5456 }, { "epoch": 0.43656, "grad_norm": 1.8331342935562134, "learning_rate": 5.996873905273259e-06, "loss": 0.4164, "step": 5457 }, { "epoch": 0.43664, "grad_norm": 1.5630244016647339, "learning_rate": 5.995641479885962e-06, "loss": 0.3497, "step": 5458 }, { "epoch": 0.43672, "grad_norm": 1.7044670581817627, "learning_rate": 5.99440899150776e-06, "loss": 0.4218, "step": 5459 }, { "epoch": 0.4368, "grad_norm": 2.0335018634796143, "learning_rate": 5.993176440216627e-06, "loss": 0.3786, "step": 5460 }, { "epoch": 0.43688, "grad_norm": 1.4986952543258667, "learning_rate": 5.991943826090545e-06, "loss": 0.3274, "step": 5461 }, { "epoch": 0.43696, "grad_norm": 1.511153221130371, "learning_rate": 5.990711149207496e-06, "loss": 0.3809, "step": 5462 }, { "epoch": 0.43704, "grad_norm": 1.593832015991211, "learning_rate": 5.989478409645466e-06, "loss": 0.346, "step": 5463 }, { "epoch": 0.43712, "grad_norm": 1.7036159038543701, "learning_rate": 5.988245607482449e-06, "loss": 0.4385, "step": 5464 }, { "epoch": 0.4372, "grad_norm": 1.4725526571273804, "learning_rate": 5.987012742796441e-06, "loss": 0.3309, "step": 5465 }, { "epoch": 0.43728, "grad_norm": 1.5867393016815186, "learning_rate": 5.985779815665436e-06, "loss": 0.3505, "step": 5466 }, { "epoch": 0.43736, "grad_norm": 2.0825116634368896, "learning_rate": 5.9845468261674435e-06, "loss": 0.4112, "step": 5467 }, { "epoch": 0.43744, "grad_norm": 1.8679453134536743, "learning_rate": 5.9833137743804645e-06, "loss": 0.4366, "step": 5468 }, { "epoch": 0.43752, "grad_norm": 1.5901966094970703, "learning_rate": 5.982080660382516e-06, "loss": 0.3359, "step": 5469 }, { "epoch": 0.4376, "grad_norm": 1.1409281492233276, "learning_rate": 5.98084748425161e-06, "loss": 0.2324, "step": 5470 }, { "epoch": 0.43768, "grad_norm": 1.6136082410812378, "learning_rate": 5.979614246065765e-06, "loss": 0.3601, "step": 5471 }, { "epoch": 0.43776, "grad_norm": 1.8404332399368286, "learning_rate": 5.978380945903004e-06, "loss": 0.4764, "step": 5472 }, { "epoch": 0.43784, "grad_norm": 1.9501445293426514, "learning_rate": 5.977147583841354e-06, "loss": 0.3829, "step": 5473 }, { "epoch": 0.43792, "grad_norm": 1.4439702033996582, "learning_rate": 5.975914159958846e-06, "loss": 0.317, "step": 5474 }, { "epoch": 0.438, "grad_norm": 1.3018183708190918, "learning_rate": 5.974680674333514e-06, "loss": 0.2696, "step": 5475 }, { "epoch": 0.43808, "grad_norm": 1.4886512756347656, "learning_rate": 5.973447127043398e-06, "loss": 0.3454, "step": 5476 }, { "epoch": 0.43816, "grad_norm": 1.5890697240829468, "learning_rate": 5.97221351816654e-06, "loss": 0.3576, "step": 5477 }, { "epoch": 0.43824, "grad_norm": 1.8298115730285645, "learning_rate": 5.970979847780984e-06, "loss": 0.3515, "step": 5478 }, { "epoch": 0.43832, "grad_norm": 1.5328552722930908, "learning_rate": 5.969746115964783e-06, "loss": 0.3744, "step": 5479 }, { "epoch": 0.4384, "grad_norm": 1.7203729152679443, "learning_rate": 5.968512322795991e-06, "loss": 0.3269, "step": 5480 }, { "epoch": 0.43848, "grad_norm": 1.672613501548767, "learning_rate": 5.967278468352663e-06, "loss": 0.3997, "step": 5481 }, { "epoch": 0.43856, "grad_norm": 1.2868820428848267, "learning_rate": 5.966044552712864e-06, "loss": 0.3223, "step": 5482 }, { "epoch": 0.43864, "grad_norm": 1.2581580877304077, "learning_rate": 5.9648105759546595e-06, "loss": 0.2698, "step": 5483 }, { "epoch": 0.43872, "grad_norm": 1.465779185295105, "learning_rate": 5.963576538156116e-06, "loss": 0.3777, "step": 5484 }, { "epoch": 0.4388, "grad_norm": 1.3545647859573364, "learning_rate": 5.96234243939531e-06, "loss": 0.3381, "step": 5485 }, { "epoch": 0.43888, "grad_norm": 1.3060506582260132, "learning_rate": 5.9611082797503175e-06, "loss": 0.3209, "step": 5486 }, { "epoch": 0.43896, "grad_norm": 1.4939861297607422, "learning_rate": 5.95987405929922e-06, "loss": 0.3916, "step": 5487 }, { "epoch": 0.43904, "grad_norm": 1.9790840148925781, "learning_rate": 5.9586397781201034e-06, "loss": 0.516, "step": 5488 }, { "epoch": 0.43912, "grad_norm": 1.3512998819351196, "learning_rate": 5.957405436291055e-06, "loss": 0.2739, "step": 5489 }, { "epoch": 0.4392, "grad_norm": 1.5846513509750366, "learning_rate": 5.956171033890168e-06, "loss": 0.4435, "step": 5490 }, { "epoch": 0.43928, "grad_norm": 1.4433051347732544, "learning_rate": 5.95493657099554e-06, "loss": 0.3871, "step": 5491 }, { "epoch": 0.43936, "grad_norm": 1.4104619026184082, "learning_rate": 5.953702047685271e-06, "loss": 0.2823, "step": 5492 }, { "epoch": 0.43944, "grad_norm": 1.4222887754440308, "learning_rate": 5.952467464037462e-06, "loss": 0.3227, "step": 5493 }, { "epoch": 0.43952, "grad_norm": 1.3719890117645264, "learning_rate": 5.951232820130224e-06, "loss": 0.295, "step": 5494 }, { "epoch": 0.4396, "grad_norm": 1.7587943077087402, "learning_rate": 5.949998116041671e-06, "loss": 0.4271, "step": 5495 }, { "epoch": 0.43968, "grad_norm": 1.418924331665039, "learning_rate": 5.948763351849913e-06, "loss": 0.2773, "step": 5496 }, { "epoch": 0.43976, "grad_norm": 1.8153401613235474, "learning_rate": 5.947528527633073e-06, "loss": 0.3063, "step": 5497 }, { "epoch": 0.43984, "grad_norm": 1.4398388862609863, "learning_rate": 5.946293643469274e-06, "loss": 0.3319, "step": 5498 }, { "epoch": 0.43992, "grad_norm": 1.4004582166671753, "learning_rate": 5.945058699436641e-06, "loss": 0.327, "step": 5499 }, { "epoch": 0.44, "grad_norm": 1.4153037071228027, "learning_rate": 5.943823695613308e-06, "loss": 0.2834, "step": 5500 }, { "epoch": 0.44008, "grad_norm": 1.4712837934494019, "learning_rate": 5.9425886320774086e-06, "loss": 0.2809, "step": 5501 }, { "epoch": 0.44016, "grad_norm": 1.3245508670806885, "learning_rate": 5.941353508907078e-06, "loss": 0.224, "step": 5502 }, { "epoch": 0.44024, "grad_norm": 1.4912713766098022, "learning_rate": 5.940118326180463e-06, "loss": 0.4387, "step": 5503 }, { "epoch": 0.44032, "grad_norm": 1.4831585884094238, "learning_rate": 5.938883083975706e-06, "loss": 0.303, "step": 5504 }, { "epoch": 0.4404, "grad_norm": 0.9887499809265137, "learning_rate": 5.937647782370957e-06, "loss": 0.2525, "step": 5505 }, { "epoch": 0.44048, "grad_norm": 1.6899783611297607, "learning_rate": 5.936412421444372e-06, "loss": 0.3949, "step": 5506 }, { "epoch": 0.44056, "grad_norm": 1.812455415725708, "learning_rate": 5.935177001274105e-06, "loss": 0.4223, "step": 5507 }, { "epoch": 0.44064, "grad_norm": 1.5728967189788818, "learning_rate": 5.933941521938318e-06, "loss": 0.3647, "step": 5508 }, { "epoch": 0.44072, "grad_norm": 1.8846977949142456, "learning_rate": 5.932705983515176e-06, "loss": 0.4741, "step": 5509 }, { "epoch": 0.4408, "grad_norm": 1.6265419721603394, "learning_rate": 5.931470386082847e-06, "loss": 0.3013, "step": 5510 }, { "epoch": 0.44088, "grad_norm": 1.5003933906555176, "learning_rate": 5.930234729719504e-06, "loss": 0.2989, "step": 5511 }, { "epoch": 0.44096, "grad_norm": 1.764898657798767, "learning_rate": 5.9289990145033226e-06, "loss": 0.3484, "step": 5512 }, { "epoch": 0.44104, "grad_norm": 1.7658780813217163, "learning_rate": 5.927763240512482e-06, "loss": 0.4016, "step": 5513 }, { "epoch": 0.44112, "grad_norm": 1.515289545059204, "learning_rate": 5.926527407825164e-06, "loss": 0.3122, "step": 5514 }, { "epoch": 0.4412, "grad_norm": 1.3999195098876953, "learning_rate": 5.92529151651956e-06, "loss": 0.4573, "step": 5515 }, { "epoch": 0.44128, "grad_norm": 2.083272695541382, "learning_rate": 5.924055566673855e-06, "loss": 0.3821, "step": 5516 }, { "epoch": 0.44136, "grad_norm": 1.596103310585022, "learning_rate": 5.922819558366247e-06, "loss": 0.4768, "step": 5517 }, { "epoch": 0.44144, "grad_norm": 1.2892342805862427, "learning_rate": 5.921583491674935e-06, "loss": 0.2956, "step": 5518 }, { "epoch": 0.44152, "grad_norm": 1.8127224445343018, "learning_rate": 5.920347366678117e-06, "loss": 0.3626, "step": 5519 }, { "epoch": 0.4416, "grad_norm": 1.7415847778320312, "learning_rate": 5.9191111834540006e-06, "loss": 0.3858, "step": 5520 }, { "epoch": 0.44168, "grad_norm": 1.5482152700424194, "learning_rate": 5.917874942080796e-06, "loss": 0.3114, "step": 5521 }, { "epoch": 0.44176, "grad_norm": 1.4196959733963013, "learning_rate": 5.916638642636714e-06, "loss": 0.368, "step": 5522 }, { "epoch": 0.44184, "grad_norm": 1.5246591567993164, "learning_rate": 5.9154022851999725e-06, "loss": 0.3554, "step": 5523 }, { "epoch": 0.44192, "grad_norm": 1.4558786153793335, "learning_rate": 5.914165869848793e-06, "loss": 0.3025, "step": 5524 }, { "epoch": 0.442, "grad_norm": 1.1743532419204712, "learning_rate": 5.912929396661396e-06, "loss": 0.2267, "step": 5525 }, { "epoch": 0.44208, "grad_norm": 1.816625952720642, "learning_rate": 5.911692865716011e-06, "loss": 0.4033, "step": 5526 }, { "epoch": 0.44216, "grad_norm": 1.3450242280960083, "learning_rate": 5.910456277090869e-06, "loss": 0.3432, "step": 5527 }, { "epoch": 0.44224, "grad_norm": 1.6333425045013428, "learning_rate": 5.909219630864204e-06, "loss": 0.3124, "step": 5528 }, { "epoch": 0.44232, "grad_norm": 1.9529705047607422, "learning_rate": 5.907982927114257e-06, "loss": 0.4574, "step": 5529 }, { "epoch": 0.4424, "grad_norm": 1.9013431072235107, "learning_rate": 5.906746165919267e-06, "loss": 0.4198, "step": 5530 }, { "epoch": 0.44248, "grad_norm": 1.8581068515777588, "learning_rate": 5.905509347357481e-06, "loss": 0.4414, "step": 5531 }, { "epoch": 0.44256, "grad_norm": 1.7523456811904907, "learning_rate": 5.904272471507148e-06, "loss": 0.3963, "step": 5532 }, { "epoch": 0.44264, "grad_norm": 1.8803638219833374, "learning_rate": 5.903035538446524e-06, "loss": 0.3608, "step": 5533 }, { "epoch": 0.44272, "grad_norm": 1.630553126335144, "learning_rate": 5.901798548253859e-06, "loss": 0.3344, "step": 5534 }, { "epoch": 0.4428, "grad_norm": 1.7994118928909302, "learning_rate": 5.90056150100742e-06, "loss": 0.4549, "step": 5535 }, { "epoch": 0.44288, "grad_norm": 1.4933981895446777, "learning_rate": 5.8993243967854685e-06, "loss": 0.3576, "step": 5536 }, { "epoch": 0.44296, "grad_norm": 1.6949869394302368, "learning_rate": 5.898087235666271e-06, "loss": 0.3546, "step": 5537 }, { "epoch": 0.44304, "grad_norm": 1.6014376878738403, "learning_rate": 5.8968500177281e-06, "loss": 0.3171, "step": 5538 }, { "epoch": 0.44312, "grad_norm": 1.835667371749878, "learning_rate": 5.895612743049227e-06, "loss": 0.509, "step": 5539 }, { "epoch": 0.4432, "grad_norm": 1.977283000946045, "learning_rate": 5.894375411707933e-06, "loss": 0.4484, "step": 5540 }, { "epoch": 0.44328, "grad_norm": 1.4197109937667847, "learning_rate": 5.8931380237825e-06, "loss": 0.2965, "step": 5541 }, { "epoch": 0.44336, "grad_norm": 1.751121997833252, "learning_rate": 5.891900579351213e-06, "loss": 0.367, "step": 5542 }, { "epoch": 0.44344, "grad_norm": 1.6477973461151123, "learning_rate": 5.89066307849236e-06, "loss": 0.3525, "step": 5543 }, { "epoch": 0.44352, "grad_norm": 1.5611004829406738, "learning_rate": 5.889425521284234e-06, "loss": 0.3347, "step": 5544 }, { "epoch": 0.4436, "grad_norm": 1.138242244720459, "learning_rate": 5.888187907805132e-06, "loss": 0.252, "step": 5545 }, { "epoch": 0.44368, "grad_norm": 1.4641177654266357, "learning_rate": 5.8869502381333525e-06, "loss": 0.3063, "step": 5546 }, { "epoch": 0.44376, "grad_norm": 1.6272233724594116, "learning_rate": 5.8857125123472e-06, "loss": 0.2902, "step": 5547 }, { "epoch": 0.44384, "grad_norm": 1.4607881307601929, "learning_rate": 5.88447473052498e-06, "loss": 0.2488, "step": 5548 }, { "epoch": 0.44392, "grad_norm": 1.5880279541015625, "learning_rate": 5.883236892745003e-06, "loss": 0.426, "step": 5549 }, { "epoch": 0.444, "grad_norm": 1.4784557819366455, "learning_rate": 5.881998999085583e-06, "loss": 0.2709, "step": 5550 }, { "epoch": 0.44408, "grad_norm": 1.4526431560516357, "learning_rate": 5.880761049625038e-06, "loss": 0.3239, "step": 5551 }, { "epoch": 0.44416, "grad_norm": 1.838352918624878, "learning_rate": 5.879523044441687e-06, "loss": 0.3932, "step": 5552 }, { "epoch": 0.44424, "grad_norm": 1.5688480138778687, "learning_rate": 5.878284983613858e-06, "loss": 0.2671, "step": 5553 }, { "epoch": 0.44432, "grad_norm": 1.8419432640075684, "learning_rate": 5.877046867219876e-06, "loss": 0.3733, "step": 5554 }, { "epoch": 0.4444, "grad_norm": 2.1512904167175293, "learning_rate": 5.8758086953380725e-06, "loss": 0.419, "step": 5555 }, { "epoch": 0.44448, "grad_norm": 1.3534338474273682, "learning_rate": 5.874570468046784e-06, "loss": 0.3388, "step": 5556 }, { "epoch": 0.44456, "grad_norm": 2.1905033588409424, "learning_rate": 5.873332185424348e-06, "loss": 0.4197, "step": 5557 }, { "epoch": 0.44464, "grad_norm": 1.410273790359497, "learning_rate": 5.872093847549106e-06, "loss": 0.3875, "step": 5558 }, { "epoch": 0.44472, "grad_norm": 1.7642848491668701, "learning_rate": 5.870855454499407e-06, "loss": 0.4297, "step": 5559 }, { "epoch": 0.4448, "grad_norm": 1.5598673820495605, "learning_rate": 5.869617006353596e-06, "loss": 0.3774, "step": 5560 }, { "epoch": 0.44488, "grad_norm": 1.347758412361145, "learning_rate": 5.868378503190027e-06, "loss": 0.2839, "step": 5561 }, { "epoch": 0.44496, "grad_norm": 1.6148842573165894, "learning_rate": 5.8671399450870535e-06, "loss": 0.3108, "step": 5562 }, { "epoch": 0.44504, "grad_norm": 1.48881196975708, "learning_rate": 5.8659013321230385e-06, "loss": 0.3588, "step": 5563 }, { "epoch": 0.44512, "grad_norm": 1.3120616674423218, "learning_rate": 5.8646626643763435e-06, "loss": 0.2724, "step": 5564 }, { "epoch": 0.4452, "grad_norm": 1.1921863555908203, "learning_rate": 5.863423941925337e-06, "loss": 0.2622, "step": 5565 }, { "epoch": 0.44528, "grad_norm": 1.8236253261566162, "learning_rate": 5.862185164848384e-06, "loss": 0.3556, "step": 5566 }, { "epoch": 0.44536, "grad_norm": 1.6157597303390503, "learning_rate": 5.860946333223862e-06, "loss": 0.369, "step": 5567 }, { "epoch": 0.44544, "grad_norm": 1.5578926801681519, "learning_rate": 5.859707447130144e-06, "loss": 0.3256, "step": 5568 }, { "epoch": 0.44552, "grad_norm": 1.4830211400985718, "learning_rate": 5.858468506645613e-06, "loss": 0.3329, "step": 5569 }, { "epoch": 0.4456, "grad_norm": 1.290466547012329, "learning_rate": 5.857229511848655e-06, "loss": 0.2373, "step": 5570 }, { "epoch": 0.44568, "grad_norm": 1.6431125402450562, "learning_rate": 5.855990462817651e-06, "loss": 0.3732, "step": 5571 }, { "epoch": 0.44576, "grad_norm": 1.8566932678222656, "learning_rate": 5.854751359630997e-06, "loss": 0.4304, "step": 5572 }, { "epoch": 0.44584, "grad_norm": 1.466186285018921, "learning_rate": 5.853512202367083e-06, "loss": 0.328, "step": 5573 }, { "epoch": 0.44592, "grad_norm": 1.5933393239974976, "learning_rate": 5.852272991104308e-06, "loss": 0.3883, "step": 5574 }, { "epoch": 0.446, "grad_norm": 1.850595474243164, "learning_rate": 5.851033725921073e-06, "loss": 0.486, "step": 5575 }, { "epoch": 0.44608, "grad_norm": 1.4042195081710815, "learning_rate": 5.84979440689578e-06, "loss": 0.3212, "step": 5576 }, { "epoch": 0.44616, "grad_norm": 1.733971118927002, "learning_rate": 5.848555034106841e-06, "loss": 0.3999, "step": 5577 }, { "epoch": 0.44624, "grad_norm": 1.5726109743118286, "learning_rate": 5.847315607632662e-06, "loss": 0.3346, "step": 5578 }, { "epoch": 0.44632, "grad_norm": 1.2969645261764526, "learning_rate": 5.846076127551661e-06, "loss": 0.2933, "step": 5579 }, { "epoch": 0.4464, "grad_norm": 1.2956920862197876, "learning_rate": 5.8448365939422534e-06, "loss": 0.275, "step": 5580 }, { "epoch": 0.44648, "grad_norm": 1.5254887342453003, "learning_rate": 5.8435970068828605e-06, "loss": 0.3264, "step": 5581 }, { "epoch": 0.44656, "grad_norm": 1.3373383283615112, "learning_rate": 5.842357366451911e-06, "loss": 0.2972, "step": 5582 }, { "epoch": 0.44664, "grad_norm": 1.5913358926773071, "learning_rate": 5.841117672727827e-06, "loss": 0.5288, "step": 5583 }, { "epoch": 0.44672, "grad_norm": 1.6409987211227417, "learning_rate": 5.839877925789043e-06, "loss": 0.3628, "step": 5584 }, { "epoch": 0.4468, "grad_norm": 1.7899576425552368, "learning_rate": 5.8386381257139925e-06, "loss": 0.3251, "step": 5585 }, { "epoch": 0.44688, "grad_norm": 1.5527466535568237, "learning_rate": 5.837398272581114e-06, "loss": 0.3213, "step": 5586 }, { "epoch": 0.44696, "grad_norm": 1.662186622619629, "learning_rate": 5.836158366468848e-06, "loss": 0.3146, "step": 5587 }, { "epoch": 0.44704, "grad_norm": 1.686964750289917, "learning_rate": 5.8349184074556396e-06, "loss": 0.3395, "step": 5588 }, { "epoch": 0.44712, "grad_norm": 1.8355437517166138, "learning_rate": 5.833678395619939e-06, "loss": 0.449, "step": 5589 }, { "epoch": 0.4472, "grad_norm": 1.7132534980773926, "learning_rate": 5.832438331040196e-06, "loss": 0.3185, "step": 5590 }, { "epoch": 0.44728, "grad_norm": 1.7874171733856201, "learning_rate": 5.831198213794863e-06, "loss": 0.4094, "step": 5591 }, { "epoch": 0.44736, "grad_norm": 1.4985543489456177, "learning_rate": 5.829958043962402e-06, "loss": 0.4014, "step": 5592 }, { "epoch": 0.44744, "grad_norm": 1.901485562324524, "learning_rate": 5.828717821621272e-06, "loss": 0.3675, "step": 5593 }, { "epoch": 0.44752, "grad_norm": 1.6370835304260254, "learning_rate": 5.827477546849938e-06, "loss": 0.3236, "step": 5594 }, { "epoch": 0.4476, "grad_norm": 1.7742931842803955, "learning_rate": 5.826237219726869e-06, "loss": 0.4238, "step": 5595 }, { "epoch": 0.44768, "grad_norm": 1.4299365282058716, "learning_rate": 5.824996840330536e-06, "loss": 0.3525, "step": 5596 }, { "epoch": 0.44776, "grad_norm": 1.4459290504455566, "learning_rate": 5.823756408739412e-06, "loss": 0.2749, "step": 5597 }, { "epoch": 0.44784, "grad_norm": 1.5706748962402344, "learning_rate": 5.822515925031977e-06, "loss": 0.3489, "step": 5598 }, { "epoch": 0.44792, "grad_norm": 0.996229350566864, "learning_rate": 5.821275389286711e-06, "loss": 0.2767, "step": 5599 }, { "epoch": 0.448, "grad_norm": 1.8221417665481567, "learning_rate": 5.820034801582101e-06, "loss": 0.3905, "step": 5600 }, { "epoch": 0.44808, "grad_norm": 1.9977222681045532, "learning_rate": 5.818794161996631e-06, "loss": 0.3864, "step": 5601 }, { "epoch": 0.44816, "grad_norm": 2.152876615524292, "learning_rate": 5.817553470608795e-06, "loss": 0.5186, "step": 5602 }, { "epoch": 0.44824, "grad_norm": 1.5512678623199463, "learning_rate": 5.816312727497085e-06, "loss": 0.2871, "step": 5603 }, { "epoch": 0.44832, "grad_norm": 1.9361984729766846, "learning_rate": 5.815071932740002e-06, "loss": 0.3828, "step": 5604 }, { "epoch": 0.4484, "grad_norm": 1.641486644744873, "learning_rate": 5.813831086416044e-06, "loss": 0.4174, "step": 5605 }, { "epoch": 0.44848, "grad_norm": 1.4502854347229004, "learning_rate": 5.812590188603718e-06, "loss": 0.2888, "step": 5606 }, { "epoch": 0.44856, "grad_norm": 1.4937154054641724, "learning_rate": 5.811349239381528e-06, "loss": 0.3603, "step": 5607 }, { "epoch": 0.44864, "grad_norm": 1.1726678609848022, "learning_rate": 5.810108238827986e-06, "loss": 0.2616, "step": 5608 }, { "epoch": 0.44872, "grad_norm": 1.500806212425232, "learning_rate": 5.808867187021607e-06, "loss": 0.2688, "step": 5609 }, { "epoch": 0.4488, "grad_norm": 1.740702509880066, "learning_rate": 5.8076260840409086e-06, "loss": 0.4273, "step": 5610 }, { "epoch": 0.44888, "grad_norm": 1.5013270378112793, "learning_rate": 5.806384929964408e-06, "loss": 0.3114, "step": 5611 }, { "epoch": 0.44896, "grad_norm": 1.7903553247451782, "learning_rate": 5.805143724870633e-06, "loss": 0.3147, "step": 5612 }, { "epoch": 0.44904, "grad_norm": 1.476843237876892, "learning_rate": 5.8039024688381074e-06, "loss": 0.3131, "step": 5613 }, { "epoch": 0.44912, "grad_norm": 1.402813196182251, "learning_rate": 5.802661161945363e-06, "loss": 0.303, "step": 5614 }, { "epoch": 0.4492, "grad_norm": 1.5428776741027832, "learning_rate": 5.801419804270932e-06, "loss": 0.3801, "step": 5615 }, { "epoch": 0.44928, "grad_norm": 1.5787391662597656, "learning_rate": 5.800178395893353e-06, "loss": 0.3183, "step": 5616 }, { "epoch": 0.44936, "grad_norm": 1.7391003370285034, "learning_rate": 5.798936936891163e-06, "loss": 0.3259, "step": 5617 }, { "epoch": 0.44944, "grad_norm": 1.6685212850570679, "learning_rate": 5.797695427342908e-06, "loss": 0.3274, "step": 5618 }, { "epoch": 0.44952, "grad_norm": 1.4090831279754639, "learning_rate": 5.79645386732713e-06, "loss": 0.3116, "step": 5619 }, { "epoch": 0.4496, "grad_norm": 1.5553460121154785, "learning_rate": 5.795212256922382e-06, "loss": 0.3605, "step": 5620 }, { "epoch": 0.44968, "grad_norm": 1.7685738801956177, "learning_rate": 5.793970596207214e-06, "loss": 0.3015, "step": 5621 }, { "epoch": 0.44976, "grad_norm": 1.7890232801437378, "learning_rate": 5.792728885260184e-06, "loss": 0.3894, "step": 5622 }, { "epoch": 0.44984, "grad_norm": 2.025926113128662, "learning_rate": 5.79148712415985e-06, "loss": 0.3836, "step": 5623 }, { "epoch": 0.44992, "grad_norm": 1.6190468072891235, "learning_rate": 5.790245312984775e-06, "loss": 0.3422, "step": 5624 }, { "epoch": 0.45, "grad_norm": 1.7557851076126099, "learning_rate": 5.789003451813522e-06, "loss": 0.3468, "step": 5625 }, { "epoch": 0.45008, "grad_norm": 1.489891529083252, "learning_rate": 5.78776154072466e-06, "loss": 0.3636, "step": 5626 }, { "epoch": 0.45016, "grad_norm": 1.5357917547225952, "learning_rate": 5.786519579796764e-06, "loss": 0.3056, "step": 5627 }, { "epoch": 0.45024, "grad_norm": 1.792183756828308, "learning_rate": 5.785277569108403e-06, "loss": 0.3856, "step": 5628 }, { "epoch": 0.45032, "grad_norm": 1.6786377429962158, "learning_rate": 5.7840355087381575e-06, "loss": 0.4319, "step": 5629 }, { "epoch": 0.4504, "grad_norm": 1.8204642534255981, "learning_rate": 5.7827933987646115e-06, "loss": 0.3844, "step": 5630 }, { "epoch": 0.45048, "grad_norm": 1.8311585187911987, "learning_rate": 5.781551239266344e-06, "loss": 0.5054, "step": 5631 }, { "epoch": 0.45056, "grad_norm": 1.9697678089141846, "learning_rate": 5.780309030321945e-06, "loss": 0.359, "step": 5632 }, { "epoch": 0.45064, "grad_norm": 1.9044475555419922, "learning_rate": 5.779066772010005e-06, "loss": 0.4049, "step": 5633 }, { "epoch": 0.45072, "grad_norm": 1.8143421411514282, "learning_rate": 5.777824464409117e-06, "loss": 0.4335, "step": 5634 }, { "epoch": 0.4508, "grad_norm": 1.4178447723388672, "learning_rate": 5.776582107597877e-06, "loss": 0.3715, "step": 5635 }, { "epoch": 0.45088, "grad_norm": 1.598649024963379, "learning_rate": 5.775339701654887e-06, "loss": 0.3177, "step": 5636 }, { "epoch": 0.45096, "grad_norm": 1.538147211074829, "learning_rate": 5.7740972466587476e-06, "loss": 0.3332, "step": 5637 }, { "epoch": 0.45104, "grad_norm": 1.4716747999191284, "learning_rate": 5.772854742688066e-06, "loss": 0.4715, "step": 5638 }, { "epoch": 0.45112, "grad_norm": 1.5471874475479126, "learning_rate": 5.771612189821451e-06, "loss": 0.3747, "step": 5639 }, { "epoch": 0.4512, "grad_norm": 1.5069724321365356, "learning_rate": 5.770369588137513e-06, "loss": 0.2939, "step": 5640 }, { "epoch": 0.45128, "grad_norm": 1.0864536762237549, "learning_rate": 5.76912693771487e-06, "loss": 0.215, "step": 5641 }, { "epoch": 0.45136, "grad_norm": 1.2346502542495728, "learning_rate": 5.76788423863214e-06, "loss": 0.2422, "step": 5642 }, { "epoch": 0.45144, "grad_norm": 1.443285346031189, "learning_rate": 5.766641490967942e-06, "loss": 0.26, "step": 5643 }, { "epoch": 0.45152, "grad_norm": 1.8851925134658813, "learning_rate": 5.765398694800902e-06, "loss": 0.4111, "step": 5644 }, { "epoch": 0.4516, "grad_norm": 1.4499890804290771, "learning_rate": 5.764155850209649e-06, "loss": 0.2916, "step": 5645 }, { "epoch": 0.45168, "grad_norm": 1.421256422996521, "learning_rate": 5.7629129572728105e-06, "loss": 0.3533, "step": 5646 }, { "epoch": 0.45176, "grad_norm": 1.4521507024765015, "learning_rate": 5.761670016069025e-06, "loss": 0.3407, "step": 5647 }, { "epoch": 0.45184, "grad_norm": 1.6597949266433716, "learning_rate": 5.760427026676923e-06, "loss": 0.3523, "step": 5648 }, { "epoch": 0.45192, "grad_norm": 1.5322909355163574, "learning_rate": 5.759183989175148e-06, "loss": 0.2657, "step": 5649 }, { "epoch": 0.452, "grad_norm": 1.5093351602554321, "learning_rate": 5.7579409036423426e-06, "loss": 0.3397, "step": 5650 }, { "epoch": 0.45208, "grad_norm": 1.5804753303527832, "learning_rate": 5.756697770157152e-06, "loss": 0.3578, "step": 5651 }, { "epoch": 0.45216, "grad_norm": 1.694562315940857, "learning_rate": 5.755454588798226e-06, "loss": 0.3719, "step": 5652 }, { "epoch": 0.45224, "grad_norm": 1.9576027393341064, "learning_rate": 5.754211359644217e-06, "loss": 0.3491, "step": 5653 }, { "epoch": 0.45232, "grad_norm": 1.5842691659927368, "learning_rate": 5.752968082773778e-06, "loss": 0.3311, "step": 5654 }, { "epoch": 0.4524, "grad_norm": 1.2589999437332153, "learning_rate": 5.751724758265567e-06, "loss": 0.2414, "step": 5655 }, { "epoch": 0.45248, "grad_norm": 2.144315719604492, "learning_rate": 5.750481386198246e-06, "loss": 0.4948, "step": 5656 }, { "epoch": 0.45256, "grad_norm": 1.865082859992981, "learning_rate": 5.749237966650478e-06, "loss": 0.4131, "step": 5657 }, { "epoch": 0.45264, "grad_norm": 1.8475942611694336, "learning_rate": 5.747994499700932e-06, "loss": 0.3729, "step": 5658 }, { "epoch": 0.45272, "grad_norm": 1.8844106197357178, "learning_rate": 5.746750985428278e-06, "loss": 0.5364, "step": 5659 }, { "epoch": 0.4528, "grad_norm": 1.6912811994552612, "learning_rate": 5.745507423911185e-06, "loss": 0.3392, "step": 5660 }, { "epoch": 0.45288, "grad_norm": 1.4193228483200073, "learning_rate": 5.744263815228334e-06, "loss": 0.3969, "step": 5661 }, { "epoch": 0.45296, "grad_norm": 1.7651104927062988, "learning_rate": 5.743020159458401e-06, "loss": 0.4356, "step": 5662 }, { "epoch": 0.45304, "grad_norm": 1.2144150733947754, "learning_rate": 5.741776456680068e-06, "loss": 0.2408, "step": 5663 }, { "epoch": 0.45312, "grad_norm": 1.4597735404968262, "learning_rate": 5.740532706972022e-06, "loss": 0.3017, "step": 5664 }, { "epoch": 0.4532, "grad_norm": 1.3759361505508423, "learning_rate": 5.739288910412949e-06, "loss": 0.2679, "step": 5665 }, { "epoch": 0.45328, "grad_norm": 1.6209099292755127, "learning_rate": 5.738045067081539e-06, "loss": 0.2948, "step": 5666 }, { "epoch": 0.45336, "grad_norm": 1.576300859451294, "learning_rate": 5.736801177056488e-06, "loss": 0.3133, "step": 5667 }, { "epoch": 0.45344, "grad_norm": 1.6057082414627075, "learning_rate": 5.735557240416492e-06, "loss": 0.3194, "step": 5668 }, { "epoch": 0.45352, "grad_norm": 1.371180772781372, "learning_rate": 5.73431325724025e-06, "loss": 0.2877, "step": 5669 }, { "epoch": 0.4536, "grad_norm": 1.5031378269195557, "learning_rate": 5.733069227606466e-06, "loss": 0.3562, "step": 5670 }, { "epoch": 0.45368, "grad_norm": 2.041217803955078, "learning_rate": 5.731825151593845e-06, "loss": 0.4971, "step": 5671 }, { "epoch": 0.45376, "grad_norm": 1.57797372341156, "learning_rate": 5.730581029281095e-06, "loss": 0.3594, "step": 5672 }, { "epoch": 0.45384, "grad_norm": 1.516638159751892, "learning_rate": 5.729336860746928e-06, "loss": 0.3172, "step": 5673 }, { "epoch": 0.45392, "grad_norm": 1.504821538925171, "learning_rate": 5.728092646070058e-06, "loss": 0.3502, "step": 5674 }, { "epoch": 0.454, "grad_norm": 1.4294888973236084, "learning_rate": 5.726848385329202e-06, "loss": 0.3397, "step": 5675 }, { "epoch": 0.45408, "grad_norm": 1.1908982992172241, "learning_rate": 5.725604078603081e-06, "loss": 0.2893, "step": 5676 }, { "epoch": 0.45416, "grad_norm": 1.6021041870117188, "learning_rate": 5.724359725970419e-06, "loss": 0.3478, "step": 5677 }, { "epoch": 0.45424, "grad_norm": 1.542633056640625, "learning_rate": 5.72311532750994e-06, "loss": 0.3476, "step": 5678 }, { "epoch": 0.45432, "grad_norm": 1.6992759704589844, "learning_rate": 5.721870883300374e-06, "loss": 0.3492, "step": 5679 }, { "epoch": 0.4544, "grad_norm": 1.4652698040008545, "learning_rate": 5.720626393420451e-06, "loss": 0.3238, "step": 5680 }, { "epoch": 0.45448, "grad_norm": 1.3540560007095337, "learning_rate": 5.719381857948908e-06, "loss": 0.3257, "step": 5681 }, { "epoch": 0.45456, "grad_norm": 1.3179233074188232, "learning_rate": 5.718137276964481e-06, "loss": 0.3374, "step": 5682 }, { "epoch": 0.45464, "grad_norm": 1.2918938398361206, "learning_rate": 5.716892650545914e-06, "loss": 0.2643, "step": 5683 }, { "epoch": 0.45472, "grad_norm": 1.5984219312667847, "learning_rate": 5.715647978771946e-06, "loss": 0.3463, "step": 5684 }, { "epoch": 0.4548, "grad_norm": 1.7894304990768433, "learning_rate": 5.714403261721327e-06, "loss": 0.4156, "step": 5685 }, { "epoch": 0.45488, "grad_norm": 1.436471939086914, "learning_rate": 5.713158499472802e-06, "loss": 0.3837, "step": 5686 }, { "epoch": 0.45496, "grad_norm": 1.6427680253982544, "learning_rate": 5.711913692105126e-06, "loss": 0.3569, "step": 5687 }, { "epoch": 0.45504, "grad_norm": 1.8029788732528687, "learning_rate": 5.710668839697051e-06, "loss": 0.3681, "step": 5688 }, { "epoch": 0.45512, "grad_norm": 1.501164197921753, "learning_rate": 5.709423942327339e-06, "loss": 0.3378, "step": 5689 }, { "epoch": 0.4552, "grad_norm": 1.6958562135696411, "learning_rate": 5.708179000074746e-06, "loss": 0.3265, "step": 5690 }, { "epoch": 0.45528, "grad_norm": 1.5554603338241577, "learning_rate": 5.7069340130180375e-06, "loss": 0.3386, "step": 5691 }, { "epoch": 0.45536, "grad_norm": 1.462551474571228, "learning_rate": 5.705688981235979e-06, "loss": 0.2857, "step": 5692 }, { "epoch": 0.45544, "grad_norm": 1.7861984968185425, "learning_rate": 5.704443904807341e-06, "loss": 0.426, "step": 5693 }, { "epoch": 0.45552, "grad_norm": 2.0433483123779297, "learning_rate": 5.7031987838108945e-06, "loss": 0.3355, "step": 5694 }, { "epoch": 0.4556, "grad_norm": 1.1957581043243408, "learning_rate": 5.701953618325413e-06, "loss": 0.2442, "step": 5695 }, { "epoch": 0.45568, "grad_norm": 1.7323002815246582, "learning_rate": 5.700708408429676e-06, "loss": 0.3109, "step": 5696 }, { "epoch": 0.45576, "grad_norm": 1.2197766304016113, "learning_rate": 5.699463154202461e-06, "loss": 0.3155, "step": 5697 }, { "epoch": 0.45584, "grad_norm": 1.9804476499557495, "learning_rate": 5.698217855722553e-06, "loss": 0.5005, "step": 5698 }, { "epoch": 0.45592, "grad_norm": 1.695799708366394, "learning_rate": 5.696972513068738e-06, "loss": 0.3383, "step": 5699 }, { "epoch": 0.456, "grad_norm": 1.70815110206604, "learning_rate": 5.695727126319805e-06, "loss": 0.4038, "step": 5700 }, { "epoch": 0.45608, "grad_norm": 1.5041000843048096, "learning_rate": 5.694481695554542e-06, "loss": 0.3908, "step": 5701 }, { "epoch": 0.45616, "grad_norm": 1.1593267917633057, "learning_rate": 5.693236220851748e-06, "loss": 0.2464, "step": 5702 }, { "epoch": 0.45624, "grad_norm": 1.5389819145202637, "learning_rate": 5.691990702290217e-06, "loss": 0.3828, "step": 5703 }, { "epoch": 0.45632, "grad_norm": 1.3315058946609497, "learning_rate": 5.69074513994875e-06, "loss": 0.3166, "step": 5704 }, { "epoch": 0.4564, "grad_norm": 1.4812344312667847, "learning_rate": 5.6894995339061484e-06, "loss": 0.294, "step": 5705 }, { "epoch": 0.45648, "grad_norm": 2.301971197128296, "learning_rate": 5.688253884241221e-06, "loss": 0.4218, "step": 5706 }, { "epoch": 0.45656, "grad_norm": 1.6880017518997192, "learning_rate": 5.687008191032771e-06, "loss": 0.3879, "step": 5707 }, { "epoch": 0.45664, "grad_norm": 2.1264901161193848, "learning_rate": 5.685762454359612e-06, "loss": 0.4719, "step": 5708 }, { "epoch": 0.45672, "grad_norm": 1.2993032932281494, "learning_rate": 5.684516674300557e-06, "loss": 0.2644, "step": 5709 }, { "epoch": 0.4568, "grad_norm": 1.6621084213256836, "learning_rate": 5.6832708509344215e-06, "loss": 0.3838, "step": 5710 }, { "epoch": 0.45688, "grad_norm": 1.6229028701782227, "learning_rate": 5.682024984340027e-06, "loss": 0.2956, "step": 5711 }, { "epoch": 0.45696, "grad_norm": 1.6341001987457275, "learning_rate": 5.6807790745961935e-06, "loss": 0.4009, "step": 5712 }, { "epoch": 0.45704, "grad_norm": 1.3154834508895874, "learning_rate": 5.679533121781745e-06, "loss": 0.2954, "step": 5713 }, { "epoch": 0.45712, "grad_norm": 1.2968782186508179, "learning_rate": 5.67828712597551e-06, "loss": 0.2647, "step": 5714 }, { "epoch": 0.4572, "grad_norm": 1.9382835626602173, "learning_rate": 5.677041087256319e-06, "loss": 0.4006, "step": 5715 }, { "epoch": 0.45728, "grad_norm": 1.3526051044464111, "learning_rate": 5.675795005703002e-06, "loss": 0.3288, "step": 5716 }, { "epoch": 0.45736, "grad_norm": 1.6821001768112183, "learning_rate": 5.674548881394398e-06, "loss": 0.3787, "step": 5717 }, { "epoch": 0.45744, "grad_norm": 1.6728947162628174, "learning_rate": 5.673302714409342e-06, "loss": 0.3524, "step": 5718 }, { "epoch": 0.45752, "grad_norm": 1.9203122854232788, "learning_rate": 5.672056504826677e-06, "loss": 0.4147, "step": 5719 }, { "epoch": 0.4576, "grad_norm": 1.767820954322815, "learning_rate": 5.670810252725246e-06, "loss": 0.5169, "step": 5720 }, { "epoch": 0.45768, "grad_norm": 1.085750937461853, "learning_rate": 5.669563958183893e-06, "loss": 0.2131, "step": 5721 }, { "epoch": 0.45776, "grad_norm": 2.1359403133392334, "learning_rate": 5.668317621281471e-06, "loss": 0.4206, "step": 5722 }, { "epoch": 0.45784, "grad_norm": 1.5051878690719604, "learning_rate": 5.667071242096828e-06, "loss": 0.2954, "step": 5723 }, { "epoch": 0.45792, "grad_norm": 1.2976740598678589, "learning_rate": 5.66582482070882e-06, "loss": 0.284, "step": 5724 }, { "epoch": 0.458, "grad_norm": 1.684295415878296, "learning_rate": 5.664578357196303e-06, "loss": 0.3567, "step": 5725 }, { "epoch": 0.45808, "grad_norm": 1.7063944339752197, "learning_rate": 5.663331851638136e-06, "loss": 0.4559, "step": 5726 }, { "epoch": 0.45816, "grad_norm": 1.91475510597229, "learning_rate": 5.662085304113184e-06, "loss": 0.4579, "step": 5727 }, { "epoch": 0.45824, "grad_norm": 1.6786874532699585, "learning_rate": 5.66083871470031e-06, "loss": 0.3606, "step": 5728 }, { "epoch": 0.45832, "grad_norm": 2.066380023956299, "learning_rate": 5.6595920834783815e-06, "loss": 0.3455, "step": 5729 }, { "epoch": 0.4584, "grad_norm": 1.214521884918213, "learning_rate": 5.658345410526269e-06, "loss": 0.2417, "step": 5730 }, { "epoch": 0.45848, "grad_norm": 1.282861351966858, "learning_rate": 5.657098695922845e-06, "loss": 0.3091, "step": 5731 }, { "epoch": 0.45856, "grad_norm": 1.3990386724472046, "learning_rate": 5.655851939746985e-06, "loss": 0.2745, "step": 5732 }, { "epoch": 0.45864, "grad_norm": 2.0859076976776123, "learning_rate": 5.654605142077567e-06, "loss": 0.3879, "step": 5733 }, { "epoch": 0.45872, "grad_norm": 1.3720123767852783, "learning_rate": 5.653358302993473e-06, "loss": 0.2961, "step": 5734 }, { "epoch": 0.4588, "grad_norm": 1.6038662195205688, "learning_rate": 5.652111422573584e-06, "loss": 0.3159, "step": 5735 }, { "epoch": 0.45888, "grad_norm": 1.583141565322876, "learning_rate": 5.6508645008967885e-06, "loss": 0.3831, "step": 5736 }, { "epoch": 0.45896, "grad_norm": 1.398667573928833, "learning_rate": 5.649617538041973e-06, "loss": 0.3121, "step": 5737 }, { "epoch": 0.45904, "grad_norm": 1.727538824081421, "learning_rate": 5.6483705340880305e-06, "loss": 0.3834, "step": 5738 }, { "epoch": 0.45912, "grad_norm": 1.9524167776107788, "learning_rate": 5.647123489113852e-06, "loss": 0.4467, "step": 5739 }, { "epoch": 0.4592, "grad_norm": 1.4175621271133423, "learning_rate": 5.645876403198337e-06, "loss": 0.3502, "step": 5740 }, { "epoch": 0.45928, "grad_norm": 1.418363094329834, "learning_rate": 5.6446292764203825e-06, "loss": 0.3052, "step": 5741 }, { "epoch": 0.45936, "grad_norm": 1.6092485189437866, "learning_rate": 5.643382108858891e-06, "loss": 0.3851, "step": 5742 }, { "epoch": 0.45944, "grad_norm": 1.4313188791275024, "learning_rate": 5.642134900592766e-06, "loss": 0.3024, "step": 5743 }, { "epoch": 0.45952, "grad_norm": 1.4228692054748535, "learning_rate": 5.6408876517009145e-06, "loss": 0.2845, "step": 5744 }, { "epoch": 0.4596, "grad_norm": 1.565037488937378, "learning_rate": 5.6396403622622455e-06, "loss": 0.3583, "step": 5745 }, { "epoch": 0.45968, "grad_norm": 1.750694751739502, "learning_rate": 5.638393032355671e-06, "loss": 0.3198, "step": 5746 }, { "epoch": 0.45976, "grad_norm": 1.5013595819473267, "learning_rate": 5.637145662060106e-06, "loss": 0.3254, "step": 5747 }, { "epoch": 0.45984, "grad_norm": 1.337208867073059, "learning_rate": 5.635898251454467e-06, "loss": 0.2618, "step": 5748 }, { "epoch": 0.45992, "grad_norm": 1.4910343885421753, "learning_rate": 5.634650800617672e-06, "loss": 0.2893, "step": 5749 }, { "epoch": 0.46, "grad_norm": 1.4709289073944092, "learning_rate": 5.633403309628645e-06, "loss": 0.3244, "step": 5750 }, { "epoch": 0.46008, "grad_norm": 1.6060997247695923, "learning_rate": 5.6321557785663105e-06, "loss": 0.3631, "step": 5751 }, { "epoch": 0.46016, "grad_norm": 1.4144808053970337, "learning_rate": 5.630908207509596e-06, "loss": 0.3068, "step": 5752 }, { "epoch": 0.46024, "grad_norm": 1.6381995677947998, "learning_rate": 5.62966059653743e-06, "loss": 0.3946, "step": 5753 }, { "epoch": 0.46032, "grad_norm": 1.3315595388412476, "learning_rate": 5.628412945728743e-06, "loss": 0.2966, "step": 5754 }, { "epoch": 0.4604, "grad_norm": 1.3688490390777588, "learning_rate": 5.627165255162472e-06, "loss": 0.2812, "step": 5755 }, { "epoch": 0.46048, "grad_norm": 1.2457916736602783, "learning_rate": 5.625917524917555e-06, "loss": 0.316, "step": 5756 }, { "epoch": 0.46056, "grad_norm": 1.8323296308517456, "learning_rate": 5.624669755072929e-06, "loss": 0.426, "step": 5757 }, { "epoch": 0.46064, "grad_norm": 1.5934995412826538, "learning_rate": 5.623421945707538e-06, "loss": 0.3284, "step": 5758 }, { "epoch": 0.46072, "grad_norm": 1.4549036026000977, "learning_rate": 5.622174096900328e-06, "loss": 0.3153, "step": 5759 }, { "epoch": 0.4608, "grad_norm": 1.9752590656280518, "learning_rate": 5.620926208730244e-06, "loss": 0.3554, "step": 5760 }, { "epoch": 0.46088, "grad_norm": 1.4583925008773804, "learning_rate": 5.619678281276235e-06, "loss": 0.2659, "step": 5761 }, { "epoch": 0.46096, "grad_norm": 1.7075728178024292, "learning_rate": 5.618430314617256e-06, "loss": 0.3486, "step": 5762 }, { "epoch": 0.46104, "grad_norm": 1.5881019830703735, "learning_rate": 5.617182308832261e-06, "loss": 0.3726, "step": 5763 }, { "epoch": 0.46112, "grad_norm": 1.6844027042388916, "learning_rate": 5.615934264000205e-06, "loss": 0.2897, "step": 5764 }, { "epoch": 0.4612, "grad_norm": 1.464428186416626, "learning_rate": 5.614686180200051e-06, "loss": 0.2831, "step": 5765 }, { "epoch": 0.46128, "grad_norm": 1.572912573814392, "learning_rate": 5.613438057510757e-06, "loss": 0.3686, "step": 5766 }, { "epoch": 0.46136, "grad_norm": 1.6168537139892578, "learning_rate": 5.61218989601129e-06, "loss": 0.4234, "step": 5767 }, { "epoch": 0.46144, "grad_norm": 1.732604742050171, "learning_rate": 5.610941695780616e-06, "loss": 0.3026, "step": 5768 }, { "epoch": 0.46152, "grad_norm": 1.795530915260315, "learning_rate": 5.6096934568977065e-06, "loss": 0.4124, "step": 5769 }, { "epoch": 0.4616, "grad_norm": 1.4683985710144043, "learning_rate": 5.60844517944153e-06, "loss": 0.3594, "step": 5770 }, { "epoch": 0.46168, "grad_norm": 1.6536790132522583, "learning_rate": 5.607196863491067e-06, "loss": 0.3532, "step": 5771 }, { "epoch": 0.46176, "grad_norm": 1.6116361618041992, "learning_rate": 5.605948509125288e-06, "loss": 0.3776, "step": 5772 }, { "epoch": 0.46184, "grad_norm": 1.5489414930343628, "learning_rate": 5.604700116423173e-06, "loss": 0.3541, "step": 5773 }, { "epoch": 0.46192, "grad_norm": 1.51923668384552, "learning_rate": 5.603451685463706e-06, "loss": 0.3029, "step": 5774 }, { "epoch": 0.462, "grad_norm": 1.9836581945419312, "learning_rate": 5.60220321632587e-06, "loss": 0.3891, "step": 5775 }, { "epoch": 0.46208, "grad_norm": 1.7087833881378174, "learning_rate": 5.600954709088651e-06, "loss": 0.4867, "step": 5776 }, { "epoch": 0.46216, "grad_norm": 1.7288364171981812, "learning_rate": 5.5997061638310405e-06, "loss": 0.321, "step": 5777 }, { "epoch": 0.46224, "grad_norm": 1.787791132926941, "learning_rate": 5.598457580632025e-06, "loss": 0.4304, "step": 5778 }, { "epoch": 0.46232, "grad_norm": 1.1242058277130127, "learning_rate": 5.597208959570602e-06, "loss": 0.3531, "step": 5779 }, { "epoch": 0.4624, "grad_norm": 1.8064368963241577, "learning_rate": 5.595960300725765e-06, "loss": 0.3596, "step": 5780 }, { "epoch": 0.46248, "grad_norm": 2.188750982284546, "learning_rate": 5.594711604176515e-06, "loss": 0.3838, "step": 5781 }, { "epoch": 0.46256, "grad_norm": 1.5916962623596191, "learning_rate": 5.593462870001851e-06, "loss": 0.3726, "step": 5782 }, { "epoch": 0.46264, "grad_norm": 1.3215919733047485, "learning_rate": 5.592214098280778e-06, "loss": 0.3084, "step": 5783 }, { "epoch": 0.46272, "grad_norm": 1.3940409421920776, "learning_rate": 5.5909652890923004e-06, "loss": 0.3357, "step": 5784 }, { "epoch": 0.4628, "grad_norm": 1.4418216943740845, "learning_rate": 5.589716442515426e-06, "loss": 0.2619, "step": 5785 }, { "epoch": 0.46288, "grad_norm": 1.2304273843765259, "learning_rate": 5.588467558629167e-06, "loss": 0.2796, "step": 5786 }, { "epoch": 0.46296, "grad_norm": 1.5511798858642578, "learning_rate": 5.587218637512532e-06, "loss": 0.2769, "step": 5787 }, { "epoch": 0.46304, "grad_norm": 1.0966295003890991, "learning_rate": 5.58596967924454e-06, "loss": 0.2145, "step": 5788 }, { "epoch": 0.46312, "grad_norm": 1.4968533515930176, "learning_rate": 5.5847206839042075e-06, "loss": 0.3548, "step": 5789 }, { "epoch": 0.4632, "grad_norm": 1.6240606307983398, "learning_rate": 5.5834716515705535e-06, "loss": 0.3444, "step": 5790 }, { "epoch": 0.46328, "grad_norm": 1.2611035108566284, "learning_rate": 5.5822225823226e-06, "loss": 0.2815, "step": 5791 }, { "epoch": 0.46336, "grad_norm": 1.897449016571045, "learning_rate": 5.580973476239371e-06, "loss": 0.3436, "step": 5792 }, { "epoch": 0.46344, "grad_norm": 1.5465292930603027, "learning_rate": 5.5797243333998955e-06, "loss": 0.293, "step": 5793 }, { "epoch": 0.46352, "grad_norm": 1.9730859994888306, "learning_rate": 5.578475153883201e-06, "loss": 0.3797, "step": 5794 }, { "epoch": 0.4636, "grad_norm": 1.8337812423706055, "learning_rate": 5.577225937768319e-06, "loss": 0.331, "step": 5795 }, { "epoch": 0.46368, "grad_norm": 1.6676514148712158, "learning_rate": 5.575976685134282e-06, "loss": 0.3331, "step": 5796 }, { "epoch": 0.46376, "grad_norm": 1.2623851299285889, "learning_rate": 5.57472739606013e-06, "loss": 0.2177, "step": 5797 }, { "epoch": 0.46384, "grad_norm": 1.3866863250732422, "learning_rate": 5.573478070624897e-06, "loss": 0.3358, "step": 5798 }, { "epoch": 0.46392, "grad_norm": 1.5590800046920776, "learning_rate": 5.572228708907626e-06, "loss": 0.3113, "step": 5799 }, { "epoch": 0.464, "grad_norm": 1.5001630783081055, "learning_rate": 5.570979310987359e-06, "loss": 0.3252, "step": 5800 }, { "epoch": 0.46408, "grad_norm": 1.6427888870239258, "learning_rate": 5.5697298769431404e-06, "loss": 0.4096, "step": 5801 }, { "epoch": 0.46416, "grad_norm": 1.886500358581543, "learning_rate": 5.56848040685402e-06, "loss": 0.3298, "step": 5802 }, { "epoch": 0.46424, "grad_norm": 1.4350028038024902, "learning_rate": 5.567230900799046e-06, "loss": 0.3214, "step": 5803 }, { "epoch": 0.46432, "grad_norm": 1.796977162361145, "learning_rate": 5.565981358857271e-06, "loss": 0.3826, "step": 5804 }, { "epoch": 0.4644, "grad_norm": 1.937929391860962, "learning_rate": 5.564731781107749e-06, "loss": 0.5146, "step": 5805 }, { "epoch": 0.46448, "grad_norm": 1.6140251159667969, "learning_rate": 5.563482167629537e-06, "loss": 0.3653, "step": 5806 }, { "epoch": 0.46456, "grad_norm": 1.971123218536377, "learning_rate": 5.562232518501694e-06, "loss": 0.481, "step": 5807 }, { "epoch": 0.46464, "grad_norm": 1.7507970333099365, "learning_rate": 5.56098283380328e-06, "loss": 0.3149, "step": 5808 }, { "epoch": 0.46472, "grad_norm": 1.2038670778274536, "learning_rate": 5.55973311361336e-06, "loss": 0.2867, "step": 5809 }, { "epoch": 0.4648, "grad_norm": 1.8903367519378662, "learning_rate": 5.558483358010999e-06, "loss": 0.3769, "step": 5810 }, { "epoch": 0.46488, "grad_norm": 1.3014912605285645, "learning_rate": 5.557233567075263e-06, "loss": 0.3411, "step": 5811 }, { "epoch": 0.46496, "grad_norm": 1.7084940671920776, "learning_rate": 5.555983740885225e-06, "loss": 0.3719, "step": 5812 }, { "epoch": 0.46504, "grad_norm": 1.7625243663787842, "learning_rate": 5.554733879519956e-06, "loss": 0.4562, "step": 5813 }, { "epoch": 0.46512, "grad_norm": 1.159744381904602, "learning_rate": 5.553483983058531e-06, "loss": 0.2261, "step": 5814 }, { "epoch": 0.4652, "grad_norm": 1.9187002182006836, "learning_rate": 5.552234051580024e-06, "loss": 0.3823, "step": 5815 }, { "epoch": 0.46528, "grad_norm": 1.6461594104766846, "learning_rate": 5.5509840851635185e-06, "loss": 0.3652, "step": 5816 }, { "epoch": 0.46536, "grad_norm": 1.612166166305542, "learning_rate": 5.549734083888093e-06, "loss": 0.3041, "step": 5817 }, { "epoch": 0.46544, "grad_norm": 1.890860915184021, "learning_rate": 5.548484047832833e-06, "loss": 0.5575, "step": 5818 }, { "epoch": 0.46552, "grad_norm": 1.3313186168670654, "learning_rate": 5.54723397707682e-06, "loss": 0.2978, "step": 5819 }, { "epoch": 0.4656, "grad_norm": 1.85710608959198, "learning_rate": 5.5459838716991465e-06, "loss": 0.3179, "step": 5820 }, { "epoch": 0.46568, "grad_norm": 1.5691063404083252, "learning_rate": 5.5447337317788986e-06, "loss": 0.3356, "step": 5821 }, { "epoch": 0.46576, "grad_norm": 1.6753313541412354, "learning_rate": 5.5434835573951704e-06, "loss": 0.2848, "step": 5822 }, { "epoch": 0.46584, "grad_norm": 1.4183037281036377, "learning_rate": 5.542233348627056e-06, "loss": 0.3081, "step": 5823 }, { "epoch": 0.46592, "grad_norm": 1.2803384065628052, "learning_rate": 5.540983105553654e-06, "loss": 0.3479, "step": 5824 }, { "epoch": 0.466, "grad_norm": 1.3356515169143677, "learning_rate": 5.539732828254059e-06, "loss": 0.2828, "step": 5825 }, { "epoch": 0.46608, "grad_norm": 1.4406170845031738, "learning_rate": 5.538482516807374e-06, "loss": 0.3388, "step": 5826 }, { "epoch": 0.46616, "grad_norm": 1.526328682899475, "learning_rate": 5.537232171292702e-06, "loss": 0.312, "step": 5827 }, { "epoch": 0.46624, "grad_norm": 1.2586652040481567, "learning_rate": 5.535981791789148e-06, "loss": 0.2932, "step": 5828 }, { "epoch": 0.46632, "grad_norm": 1.7249115705490112, "learning_rate": 5.534731378375819e-06, "loss": 0.489, "step": 5829 }, { "epoch": 0.4664, "grad_norm": 1.484268069267273, "learning_rate": 5.533480931131827e-06, "loss": 0.2965, "step": 5830 }, { "epoch": 0.46648, "grad_norm": 1.7964249849319458, "learning_rate": 5.53223045013628e-06, "loss": 0.4045, "step": 5831 }, { "epoch": 0.46656, "grad_norm": 1.7476415634155273, "learning_rate": 5.530979935468294e-06, "loss": 0.3451, "step": 5832 }, { "epoch": 0.46664, "grad_norm": 1.3235749006271362, "learning_rate": 5.529729387206983e-06, "loss": 0.2304, "step": 5833 }, { "epoch": 0.46672, "grad_norm": 1.5181303024291992, "learning_rate": 5.5284788054314665e-06, "loss": 0.314, "step": 5834 }, { "epoch": 0.4668, "grad_norm": 1.5556458234786987, "learning_rate": 5.527228190220866e-06, "loss": 0.3762, "step": 5835 }, { "epoch": 0.46688, "grad_norm": 1.3270909786224365, "learning_rate": 5.525977541654299e-06, "loss": 0.2586, "step": 5836 }, { "epoch": 0.46696, "grad_norm": 1.6928744316101074, "learning_rate": 5.524726859810895e-06, "loss": 0.3365, "step": 5837 }, { "epoch": 0.46704, "grad_norm": 1.3896623849868774, "learning_rate": 5.523476144769777e-06, "loss": 0.3052, "step": 5838 }, { "epoch": 0.46712, "grad_norm": 1.6852587461471558, "learning_rate": 5.522225396610076e-06, "loss": 0.3974, "step": 5839 }, { "epoch": 0.4672, "grad_norm": 1.4615007638931274, "learning_rate": 5.520974615410921e-06, "loss": 0.3015, "step": 5840 }, { "epoch": 0.46728, "grad_norm": 1.2389006614685059, "learning_rate": 5.519723801251445e-06, "loss": 0.2909, "step": 5841 }, { "epoch": 0.46736, "grad_norm": 1.441849946975708, "learning_rate": 5.518472954210784e-06, "loss": 0.3162, "step": 5842 }, { "epoch": 0.46744, "grad_norm": 1.5538463592529297, "learning_rate": 5.5172220743680745e-06, "loss": 0.3241, "step": 5843 }, { "epoch": 0.46752, "grad_norm": 1.311487078666687, "learning_rate": 5.515971161802454e-06, "loss": 0.2759, "step": 5844 }, { "epoch": 0.4676, "grad_norm": 1.836043119430542, "learning_rate": 5.514720216593063e-06, "loss": 0.4221, "step": 5845 }, { "epoch": 0.46768, "grad_norm": 1.3168381452560425, "learning_rate": 5.513469238819048e-06, "loss": 0.3044, "step": 5846 }, { "epoch": 0.46776, "grad_norm": 1.2070047855377197, "learning_rate": 5.5122182285595525e-06, "loss": 0.229, "step": 5847 }, { "epoch": 0.46784, "grad_norm": 1.5772783756256104, "learning_rate": 5.510967185893723e-06, "loss": 0.3556, "step": 5848 }, { "epoch": 0.46792, "grad_norm": 1.5532734394073486, "learning_rate": 5.509716110900709e-06, "loss": 0.3562, "step": 5849 }, { "epoch": 0.468, "grad_norm": 1.7133153676986694, "learning_rate": 5.508465003659663e-06, "loss": 0.3732, "step": 5850 }, { "epoch": 0.46808, "grad_norm": 1.6906065940856934, "learning_rate": 5.5072138642497365e-06, "loss": 0.398, "step": 5851 }, { "epoch": 0.46816, "grad_norm": 1.8008275032043457, "learning_rate": 5.505962692750087e-06, "loss": 0.3578, "step": 5852 }, { "epoch": 0.46824, "grad_norm": 2.004988193511963, "learning_rate": 5.504711489239871e-06, "loss": 0.4561, "step": 5853 }, { "epoch": 0.46832, "grad_norm": 1.8562557697296143, "learning_rate": 5.5034602537982485e-06, "loss": 0.5832, "step": 5854 }, { "epoch": 0.4684, "grad_norm": 1.4720324277877808, "learning_rate": 5.502208986504378e-06, "loss": 0.2837, "step": 5855 }, { "epoch": 0.46848, "grad_norm": 1.579901099205017, "learning_rate": 5.500957687437427e-06, "loss": 0.2924, "step": 5856 }, { "epoch": 0.46856, "grad_norm": 1.6447190046310425, "learning_rate": 5.499706356676559e-06, "loss": 0.325, "step": 5857 }, { "epoch": 0.46864, "grad_norm": 1.4578676223754883, "learning_rate": 5.498454994300941e-06, "loss": 0.341, "step": 5858 }, { "epoch": 0.46872, "grad_norm": 1.5523934364318848, "learning_rate": 5.497203600389744e-06, "loss": 0.4472, "step": 5859 }, { "epoch": 0.4688, "grad_norm": 1.6504297256469727, "learning_rate": 5.495952175022139e-06, "loss": 0.3244, "step": 5860 }, { "epoch": 0.46888, "grad_norm": 1.4491195678710938, "learning_rate": 5.4947007182772995e-06, "loss": 0.358, "step": 5861 }, { "epoch": 0.46896, "grad_norm": 1.6385431289672852, "learning_rate": 5.4934492302344e-06, "loss": 0.3625, "step": 5862 }, { "epoch": 0.46904, "grad_norm": 1.3259345293045044, "learning_rate": 5.492197710972618e-06, "loss": 0.3199, "step": 5863 }, { "epoch": 0.46912, "grad_norm": 1.4403702020645142, "learning_rate": 5.4909461605711365e-06, "loss": 0.2822, "step": 5864 }, { "epoch": 0.4692, "grad_norm": 1.74454665184021, "learning_rate": 5.489694579109133e-06, "loss": 0.3237, "step": 5865 }, { "epoch": 0.46928, "grad_norm": 1.565183162689209, "learning_rate": 5.4884429666657925e-06, "loss": 0.3899, "step": 5866 }, { "epoch": 0.46936, "grad_norm": 1.3134307861328125, "learning_rate": 5.487191323320298e-06, "loss": 0.421, "step": 5867 }, { "epoch": 0.46944, "grad_norm": 1.5535221099853516, "learning_rate": 5.48593964915184e-06, "loss": 0.403, "step": 5868 }, { "epoch": 0.46952, "grad_norm": 1.4464749097824097, "learning_rate": 5.484687944239605e-06, "loss": 0.3067, "step": 5869 }, { "epoch": 0.4696, "grad_norm": 1.3258053064346313, "learning_rate": 5.483436208662787e-06, "loss": 0.3042, "step": 5870 }, { "epoch": 0.46968, "grad_norm": 1.434024453163147, "learning_rate": 5.482184442500578e-06, "loss": 0.3709, "step": 5871 }, { "epoch": 0.46976, "grad_norm": 1.493878960609436, "learning_rate": 5.480932645832171e-06, "loss": 0.2702, "step": 5872 }, { "epoch": 0.46984, "grad_norm": 1.5920265913009644, "learning_rate": 5.479680818736765e-06, "loss": 0.3356, "step": 5873 }, { "epoch": 0.46992, "grad_norm": 1.51193106174469, "learning_rate": 5.478428961293559e-06, "loss": 0.3876, "step": 5874 }, { "epoch": 0.47, "grad_norm": 1.8197968006134033, "learning_rate": 5.477177073581754e-06, "loss": 0.4746, "step": 5875 }, { "epoch": 0.47008, "grad_norm": 1.4704205989837646, "learning_rate": 5.475925155680552e-06, "loss": 0.2828, "step": 5876 }, { "epoch": 0.47016, "grad_norm": 1.455899953842163, "learning_rate": 5.474673207669159e-06, "loss": 0.2777, "step": 5877 }, { "epoch": 0.47024, "grad_norm": 1.9199295043945312, "learning_rate": 5.473421229626779e-06, "loss": 0.4936, "step": 5878 }, { "epoch": 0.47032, "grad_norm": 1.642419457435608, "learning_rate": 5.472169221632622e-06, "loss": 0.4494, "step": 5879 }, { "epoch": 0.4704, "grad_norm": 1.428835391998291, "learning_rate": 5.470917183765898e-06, "loss": 0.3226, "step": 5880 }, { "epoch": 0.47048, "grad_norm": 1.3991670608520508, "learning_rate": 5.4696651161058186e-06, "loss": 0.2963, "step": 5881 }, { "epoch": 0.47056, "grad_norm": 1.5357342958450317, "learning_rate": 5.468413018731601e-06, "loss": 0.3023, "step": 5882 }, { "epoch": 0.47064, "grad_norm": 1.5561589002609253, "learning_rate": 5.467160891722459e-06, "loss": 0.3432, "step": 5883 }, { "epoch": 0.47072, "grad_norm": 1.3665988445281982, "learning_rate": 5.465908735157608e-06, "loss": 0.333, "step": 5884 }, { "epoch": 0.4708, "grad_norm": 1.4518331289291382, "learning_rate": 5.4646565491162716e-06, "loss": 0.3156, "step": 5885 }, { "epoch": 0.47088, "grad_norm": 1.7487683296203613, "learning_rate": 5.46340433367767e-06, "loss": 0.3997, "step": 5886 }, { "epoch": 0.47096, "grad_norm": 1.7349579334259033, "learning_rate": 5.462152088921028e-06, "loss": 0.3941, "step": 5887 }, { "epoch": 0.47104, "grad_norm": 1.3524267673492432, "learning_rate": 5.460899814925567e-06, "loss": 0.3129, "step": 5888 }, { "epoch": 0.47112, "grad_norm": 1.5749634504318237, "learning_rate": 5.459647511770521e-06, "loss": 0.4254, "step": 5889 }, { "epoch": 0.4712, "grad_norm": 1.641700267791748, "learning_rate": 5.458395179535112e-06, "loss": 0.3265, "step": 5890 }, { "epoch": 0.47128, "grad_norm": 1.3452178239822388, "learning_rate": 5.457142818298573e-06, "loss": 0.3459, "step": 5891 }, { "epoch": 0.47136, "grad_norm": 1.5965007543563843, "learning_rate": 5.455890428140139e-06, "loss": 0.3374, "step": 5892 }, { "epoch": 0.47144, "grad_norm": 1.6195091009140015, "learning_rate": 5.4546380091390425e-06, "loss": 0.4122, "step": 5893 }, { "epoch": 0.47152, "grad_norm": 2.059089422225952, "learning_rate": 5.453385561374521e-06, "loss": 0.5191, "step": 5894 }, { "epoch": 0.4716, "grad_norm": 1.6009624004364014, "learning_rate": 5.452133084925812e-06, "loss": 0.3699, "step": 5895 }, { "epoch": 0.47168, "grad_norm": 1.5404019355773926, "learning_rate": 5.450880579872156e-06, "loss": 0.347, "step": 5896 }, { "epoch": 0.47176, "grad_norm": 2.4010934829711914, "learning_rate": 5.449628046292792e-06, "loss": 0.4262, "step": 5897 }, { "epoch": 0.47184, "grad_norm": 1.5734963417053223, "learning_rate": 5.448375484266968e-06, "loss": 0.4524, "step": 5898 }, { "epoch": 0.47192, "grad_norm": 1.7515249252319336, "learning_rate": 5.447122893873927e-06, "loss": 0.4706, "step": 5899 }, { "epoch": 0.472, "grad_norm": 1.6091927289962769, "learning_rate": 5.445870275192918e-06, "loss": 0.3039, "step": 5900 }, { "epoch": 0.47208, "grad_norm": 1.3763673305511475, "learning_rate": 5.444617628303187e-06, "loss": 0.2805, "step": 5901 }, { "epoch": 0.47216, "grad_norm": 1.7855626344680786, "learning_rate": 5.443364953283986e-06, "loss": 0.3412, "step": 5902 }, { "epoch": 0.47224, "grad_norm": 1.6489816904067993, "learning_rate": 5.442112250214569e-06, "loss": 0.3524, "step": 5903 }, { "epoch": 0.47232, "grad_norm": 1.6601721048355103, "learning_rate": 5.440859519174187e-06, "loss": 0.3718, "step": 5904 }, { "epoch": 0.4724, "grad_norm": 1.6896872520446777, "learning_rate": 5.4396067602421e-06, "loss": 0.3397, "step": 5905 }, { "epoch": 0.47248, "grad_norm": 1.254325270652771, "learning_rate": 5.438353973497565e-06, "loss": 0.2802, "step": 5906 }, { "epoch": 0.47256, "grad_norm": 2.1470906734466553, "learning_rate": 5.43710115901984e-06, "loss": 0.5072, "step": 5907 }, { "epoch": 0.47264, "grad_norm": 1.403336763381958, "learning_rate": 5.435848316888187e-06, "loss": 0.2941, "step": 5908 }, { "epoch": 0.47272, "grad_norm": 1.7086148262023926, "learning_rate": 5.4345954471818695e-06, "loss": 0.3538, "step": 5909 }, { "epoch": 0.4728, "grad_norm": 1.3657201528549194, "learning_rate": 5.433342549980153e-06, "loss": 0.2622, "step": 5910 }, { "epoch": 0.47288, "grad_norm": 1.6097228527069092, "learning_rate": 5.432089625362302e-06, "loss": 0.4418, "step": 5911 }, { "epoch": 0.47296, "grad_norm": 1.664275050163269, "learning_rate": 5.430836673407588e-06, "loss": 0.3672, "step": 5912 }, { "epoch": 0.47304, "grad_norm": 1.4426720142364502, "learning_rate": 5.4295836941952775e-06, "loss": 0.33, "step": 5913 }, { "epoch": 0.47312, "grad_norm": 1.6172068119049072, "learning_rate": 5.428330687804643e-06, "loss": 0.4129, "step": 5914 }, { "epoch": 0.4732, "grad_norm": 1.322485327720642, "learning_rate": 5.427077654314961e-06, "loss": 0.3158, "step": 5915 }, { "epoch": 0.47328, "grad_norm": 1.6107004880905151, "learning_rate": 5.425824593805505e-06, "loss": 0.3416, "step": 5916 }, { "epoch": 0.47336, "grad_norm": 2.201343297958374, "learning_rate": 5.424571506355552e-06, "loss": 0.3547, "step": 5917 }, { "epoch": 0.47344, "grad_norm": 1.490373969078064, "learning_rate": 5.42331839204438e-06, "loss": 0.3098, "step": 5918 }, { "epoch": 0.47352, "grad_norm": 1.3307541608810425, "learning_rate": 5.422065250951268e-06, "loss": 0.3176, "step": 5919 }, { "epoch": 0.4736, "grad_norm": 1.459407925605774, "learning_rate": 5.420812083155502e-06, "loss": 0.3212, "step": 5920 }, { "epoch": 0.47368, "grad_norm": 1.4235657453536987, "learning_rate": 5.4195588887363635e-06, "loss": 0.3285, "step": 5921 }, { "epoch": 0.47376, "grad_norm": 1.2670882940292358, "learning_rate": 5.4183056677731376e-06, "loss": 0.3147, "step": 5922 }, { "epoch": 0.47384, "grad_norm": 1.553990125656128, "learning_rate": 5.41705242034511e-06, "loss": 0.4067, "step": 5923 }, { "epoch": 0.47392, "grad_norm": 1.3905086517333984, "learning_rate": 5.415799146531574e-06, "loss": 0.3246, "step": 5924 }, { "epoch": 0.474, "grad_norm": 1.6242258548736572, "learning_rate": 5.414545846411815e-06, "loss": 0.3597, "step": 5925 }, { "epoch": 0.47408, "grad_norm": 2.0094828605651855, "learning_rate": 5.413292520065129e-06, "loss": 0.3506, "step": 5926 }, { "epoch": 0.47416, "grad_norm": 1.0113637447357178, "learning_rate": 5.4120391675708065e-06, "loss": 0.2126, "step": 5927 }, { "epoch": 0.47424, "grad_norm": 1.8587594032287598, "learning_rate": 5.410785789008145e-06, "loss": 0.4143, "step": 5928 }, { "epoch": 0.47432, "grad_norm": 1.5328631401062012, "learning_rate": 5.4095323844564425e-06, "loss": 0.3364, "step": 5929 }, { "epoch": 0.4744, "grad_norm": 1.8271329402923584, "learning_rate": 5.408278953994996e-06, "loss": 0.3848, "step": 5930 }, { "epoch": 0.47448, "grad_norm": 1.4636245965957642, "learning_rate": 5.407025497703105e-06, "loss": 0.3291, "step": 5931 }, { "epoch": 0.47456, "grad_norm": 1.4129557609558105, "learning_rate": 5.405772015660072e-06, "loss": 0.3842, "step": 5932 }, { "epoch": 0.47464, "grad_norm": 1.7334128618240356, "learning_rate": 5.404518507945205e-06, "loss": 0.4864, "step": 5933 }, { "epoch": 0.47472, "grad_norm": 1.5284255743026733, "learning_rate": 5.403264974637802e-06, "loss": 0.3454, "step": 5934 }, { "epoch": 0.4748, "grad_norm": 1.4403399229049683, "learning_rate": 5.402011415817176e-06, "loss": 0.29, "step": 5935 }, { "epoch": 0.47488, "grad_norm": 1.4352822303771973, "learning_rate": 5.400757831562631e-06, "loss": 0.2549, "step": 5936 }, { "epoch": 0.47496, "grad_norm": 1.5623631477355957, "learning_rate": 5.399504221953478e-06, "loss": 0.2864, "step": 5937 }, { "epoch": 0.47504, "grad_norm": 1.2767705917358398, "learning_rate": 5.3982505870690316e-06, "loss": 0.289, "step": 5938 }, { "epoch": 0.47512, "grad_norm": 1.3673750162124634, "learning_rate": 5.396996926988601e-06, "loss": 0.2931, "step": 5939 }, { "epoch": 0.4752, "grad_norm": 1.759554386138916, "learning_rate": 5.395743241791504e-06, "loss": 0.3399, "step": 5940 }, { "epoch": 0.47528, "grad_norm": 1.6224939823150635, "learning_rate": 5.394489531557059e-06, "loss": 0.3372, "step": 5941 }, { "epoch": 0.47536, "grad_norm": 1.5486100912094116, "learning_rate": 5.393235796364578e-06, "loss": 0.2582, "step": 5942 }, { "epoch": 0.47544, "grad_norm": 0.9767140746116638, "learning_rate": 5.391982036293385e-06, "loss": 0.1933, "step": 5943 }, { "epoch": 0.47552, "grad_norm": 1.7568838596343994, "learning_rate": 5.390728251422801e-06, "loss": 0.3333, "step": 5944 }, { "epoch": 0.4756, "grad_norm": 1.5353167057037354, "learning_rate": 5.389474441832148e-06, "loss": 0.3076, "step": 5945 }, { "epoch": 0.47568, "grad_norm": 1.9737414121627808, "learning_rate": 5.388220607600748e-06, "loss": 0.434, "step": 5946 }, { "epoch": 0.47576, "grad_norm": 1.3293046951293945, "learning_rate": 5.386966748807932e-06, "loss": 0.2834, "step": 5947 }, { "epoch": 0.47584, "grad_norm": 1.4846619367599487, "learning_rate": 5.385712865533023e-06, "loss": 0.3274, "step": 5948 }, { "epoch": 0.47592, "grad_norm": 1.7403323650360107, "learning_rate": 5.384458957855351e-06, "loss": 0.3113, "step": 5949 }, { "epoch": 0.476, "grad_norm": 1.7294260263442993, "learning_rate": 5.383205025854248e-06, "loss": 0.3651, "step": 5950 }, { "epoch": 0.47608, "grad_norm": 1.7438633441925049, "learning_rate": 5.381951069609045e-06, "loss": 0.3918, "step": 5951 }, { "epoch": 0.47616, "grad_norm": 1.3631834983825684, "learning_rate": 5.380697089199075e-06, "loss": 0.2842, "step": 5952 }, { "epoch": 0.47624, "grad_norm": 1.3905704021453857, "learning_rate": 5.379443084703676e-06, "loss": 0.2753, "step": 5953 }, { "epoch": 0.47632, "grad_norm": 1.5892817974090576, "learning_rate": 5.378189056202181e-06, "loss": 0.3348, "step": 5954 }, { "epoch": 0.4764, "grad_norm": 1.856491208076477, "learning_rate": 5.376935003773931e-06, "loss": 0.3874, "step": 5955 }, { "epoch": 0.47648, "grad_norm": 1.2700897455215454, "learning_rate": 5.375680927498265e-06, "loss": 0.309, "step": 5956 }, { "epoch": 0.47656, "grad_norm": 1.3322601318359375, "learning_rate": 5.374426827454522e-06, "loss": 0.2654, "step": 5957 }, { "epoch": 0.47664, "grad_norm": 2.0289957523345947, "learning_rate": 5.373172703722046e-06, "loss": 0.5895, "step": 5958 }, { "epoch": 0.47672, "grad_norm": 1.688299298286438, "learning_rate": 5.371918556380185e-06, "loss": 0.4384, "step": 5959 }, { "epoch": 0.4768, "grad_norm": 1.5574098825454712, "learning_rate": 5.370664385508278e-06, "loss": 0.3618, "step": 5960 }, { "epoch": 0.47688, "grad_norm": 1.470866322517395, "learning_rate": 5.369410191185676e-06, "loss": 0.3197, "step": 5961 }, { "epoch": 0.47696, "grad_norm": 1.5503671169281006, "learning_rate": 5.368155973491729e-06, "loss": 0.4128, "step": 5962 }, { "epoch": 0.47704, "grad_norm": 1.393202304840088, "learning_rate": 5.366901732505784e-06, "loss": 0.2566, "step": 5963 }, { "epoch": 0.47712, "grad_norm": 1.478087306022644, "learning_rate": 5.365647468307193e-06, "loss": 0.3087, "step": 5964 }, { "epoch": 0.4772, "grad_norm": 1.4363526105880737, "learning_rate": 5.364393180975314e-06, "loss": 0.2757, "step": 5965 }, { "epoch": 0.47728, "grad_norm": 1.8411908149719238, "learning_rate": 5.363138870589495e-06, "loss": 0.4493, "step": 5966 }, { "epoch": 0.47736, "grad_norm": 1.8688772916793823, "learning_rate": 5.361884537229095e-06, "loss": 0.3322, "step": 5967 }, { "epoch": 0.47744, "grad_norm": 1.6204429864883423, "learning_rate": 5.360630180973472e-06, "loss": 0.321, "step": 5968 }, { "epoch": 0.47752, "grad_norm": 1.8783400058746338, "learning_rate": 5.359375801901982e-06, "loss": 0.3558, "step": 5969 }, { "epoch": 0.4776, "grad_norm": 1.60300874710083, "learning_rate": 5.358121400093989e-06, "loss": 0.3528, "step": 5970 }, { "epoch": 0.47768, "grad_norm": 1.5977694988250732, "learning_rate": 5.356866975628854e-06, "loss": 0.3363, "step": 5971 }, { "epoch": 0.47776, "grad_norm": 1.698292851448059, "learning_rate": 5.355612528585938e-06, "loss": 0.391, "step": 5972 }, { "epoch": 0.47784, "grad_norm": 1.7773863077163696, "learning_rate": 5.354358059044608e-06, "loss": 0.3306, "step": 5973 }, { "epoch": 0.47792, "grad_norm": 1.5922447443008423, "learning_rate": 5.353103567084229e-06, "loss": 0.3131, "step": 5974 }, { "epoch": 0.478, "grad_norm": 1.8159767389297485, "learning_rate": 5.3518490527841685e-06, "loss": 0.3942, "step": 5975 }, { "epoch": 0.47808, "grad_norm": 1.657160997390747, "learning_rate": 5.350594516223797e-06, "loss": 0.4472, "step": 5976 }, { "epoch": 0.47816, "grad_norm": 2.0365302562713623, "learning_rate": 5.3493399574824824e-06, "loss": 0.3914, "step": 5977 }, { "epoch": 0.47824, "grad_norm": 1.715842843055725, "learning_rate": 5.348085376639598e-06, "loss": 0.3958, "step": 5978 }, { "epoch": 0.47832, "grad_norm": 1.8368576765060425, "learning_rate": 5.3468307737745175e-06, "loss": 0.3646, "step": 5979 }, { "epoch": 0.4784, "grad_norm": 1.8690496683120728, "learning_rate": 5.345576148966612e-06, "loss": 0.5069, "step": 5980 }, { "epoch": 0.47848, "grad_norm": 1.9646300077438354, "learning_rate": 5.344321502295262e-06, "loss": 0.4976, "step": 5981 }, { "epoch": 0.47856, "grad_norm": 1.7964569330215454, "learning_rate": 5.3430668338398425e-06, "loss": 0.3761, "step": 5982 }, { "epoch": 0.47864, "grad_norm": 1.9966002702713013, "learning_rate": 5.341812143679732e-06, "loss": 0.4334, "step": 5983 }, { "epoch": 0.47872, "grad_norm": 2.4749326705932617, "learning_rate": 5.34055743189431e-06, "loss": 0.5564, "step": 5984 }, { "epoch": 0.4788, "grad_norm": 1.9291296005249023, "learning_rate": 5.339302698562959e-06, "loss": 0.3668, "step": 5985 }, { "epoch": 0.47888, "grad_norm": 1.5976835489273071, "learning_rate": 5.3380479437650625e-06, "loss": 0.3629, "step": 5986 }, { "epoch": 0.47896, "grad_norm": 1.5634150505065918, "learning_rate": 5.336793167580002e-06, "loss": 0.3366, "step": 5987 }, { "epoch": 0.47904, "grad_norm": 1.9057620763778687, "learning_rate": 5.3355383700871665e-06, "loss": 0.3585, "step": 5988 }, { "epoch": 0.47912, "grad_norm": 1.6130526065826416, "learning_rate": 5.334283551365941e-06, "loss": 0.4087, "step": 5989 }, { "epoch": 0.4792, "grad_norm": 2.0664384365081787, "learning_rate": 5.333028711495713e-06, "loss": 0.5229, "step": 5990 }, { "epoch": 0.47928, "grad_norm": 1.7554843425750732, "learning_rate": 5.331773850555874e-06, "loss": 0.3727, "step": 5991 }, { "epoch": 0.47936, "grad_norm": 1.2213190793991089, "learning_rate": 5.330518968625812e-06, "loss": 0.2522, "step": 5992 }, { "epoch": 0.47944, "grad_norm": 1.3879023790359497, "learning_rate": 5.32926406578492e-06, "loss": 0.3652, "step": 5993 }, { "epoch": 0.47952, "grad_norm": 1.7158325910568237, "learning_rate": 5.328009142112594e-06, "loss": 0.3132, "step": 5994 }, { "epoch": 0.4796, "grad_norm": 1.8352590799331665, "learning_rate": 5.326754197688227e-06, "loss": 0.3482, "step": 5995 }, { "epoch": 0.47968, "grad_norm": 1.52081298828125, "learning_rate": 5.3254992325912144e-06, "loss": 0.309, "step": 5996 }, { "epoch": 0.47976, "grad_norm": 1.6611655950546265, "learning_rate": 5.324244246900955e-06, "loss": 0.4123, "step": 5997 }, { "epoch": 0.47984, "grad_norm": 1.4265227317810059, "learning_rate": 5.322989240696846e-06, "loss": 0.2749, "step": 5998 }, { "epoch": 0.47992, "grad_norm": 1.5053348541259766, "learning_rate": 5.3217342140582895e-06, "loss": 0.268, "step": 5999 }, { "epoch": 0.48, "grad_norm": 1.3345094919204712, "learning_rate": 5.3204791670646875e-06, "loss": 0.2847, "step": 6000 }, { "epoch": 0.48008, "grad_norm": 1.3959380388259888, "learning_rate": 5.319224099795438e-06, "loss": 0.321, "step": 6001 }, { "epoch": 0.48016, "grad_norm": 1.5935245752334595, "learning_rate": 5.31796901232995e-06, "loss": 0.3813, "step": 6002 }, { "epoch": 0.48024, "grad_norm": 1.4641132354736328, "learning_rate": 5.316713904747626e-06, "loss": 0.3184, "step": 6003 }, { "epoch": 0.48032, "grad_norm": 1.3873530626296997, "learning_rate": 5.315458777127872e-06, "loss": 0.2754, "step": 6004 }, { "epoch": 0.4804, "grad_norm": 1.330491065979004, "learning_rate": 5.3142036295500965e-06, "loss": 0.2641, "step": 6005 }, { "epoch": 0.48048, "grad_norm": 1.4904730319976807, "learning_rate": 5.31294846209371e-06, "loss": 0.3074, "step": 6006 }, { "epoch": 0.48056, "grad_norm": 1.3027527332305908, "learning_rate": 5.311693274838121e-06, "loss": 0.3014, "step": 6007 }, { "epoch": 0.48064, "grad_norm": 1.5734492540359497, "learning_rate": 5.310438067862741e-06, "loss": 0.2831, "step": 6008 }, { "epoch": 0.48072, "grad_norm": 2.1448333263397217, "learning_rate": 5.309182841246984e-06, "loss": 0.4077, "step": 6009 }, { "epoch": 0.4808, "grad_norm": 1.4222567081451416, "learning_rate": 5.307927595070261e-06, "loss": 0.3582, "step": 6010 }, { "epoch": 0.48088, "grad_norm": 1.7736330032348633, "learning_rate": 5.306672329411993e-06, "loss": 0.4902, "step": 6011 }, { "epoch": 0.48096, "grad_norm": 1.4863662719726562, "learning_rate": 5.3054170443515895e-06, "loss": 0.3672, "step": 6012 }, { "epoch": 0.48104, "grad_norm": 1.2911968231201172, "learning_rate": 5.304161739968474e-06, "loss": 0.3233, "step": 6013 }, { "epoch": 0.48112, "grad_norm": 1.451702356338501, "learning_rate": 5.302906416342062e-06, "loss": 0.2513, "step": 6014 }, { "epoch": 0.4812, "grad_norm": 1.4997286796569824, "learning_rate": 5.301651073551774e-06, "loss": 0.2994, "step": 6015 }, { "epoch": 0.48128, "grad_norm": 1.796286702156067, "learning_rate": 5.300395711677032e-06, "loss": 0.3123, "step": 6016 }, { "epoch": 0.48136, "grad_norm": 1.705010175704956, "learning_rate": 5.299140330797258e-06, "loss": 0.4163, "step": 6017 }, { "epoch": 0.48144, "grad_norm": 1.3299634456634521, "learning_rate": 5.297884930991878e-06, "loss": 0.2821, "step": 6018 }, { "epoch": 0.48152, "grad_norm": 1.7674593925476074, "learning_rate": 5.296629512340313e-06, "loss": 0.3192, "step": 6019 }, { "epoch": 0.4816, "grad_norm": 1.500632405281067, "learning_rate": 5.295374074921993e-06, "loss": 0.4209, "step": 6020 }, { "epoch": 0.48168, "grad_norm": 1.5047935247421265, "learning_rate": 5.294118618816342e-06, "loss": 0.344, "step": 6021 }, { "epoch": 0.48176, "grad_norm": 1.670544147491455, "learning_rate": 5.292863144102791e-06, "loss": 0.3467, "step": 6022 }, { "epoch": 0.48184, "grad_norm": 1.3187230825424194, "learning_rate": 5.291607650860769e-06, "loss": 0.3187, "step": 6023 }, { "epoch": 0.48192, "grad_norm": 1.355542778968811, "learning_rate": 5.290352139169708e-06, "loss": 0.2727, "step": 6024 }, { "epoch": 0.482, "grad_norm": 1.7600772380828857, "learning_rate": 5.289096609109037e-06, "loss": 0.5256, "step": 6025 }, { "epoch": 0.48208, "grad_norm": 1.4076069593429565, "learning_rate": 5.287841060758191e-06, "loss": 0.2691, "step": 6026 }, { "epoch": 0.48216, "grad_norm": 1.5772290229797363, "learning_rate": 5.2865854941966036e-06, "loss": 0.3356, "step": 6027 }, { "epoch": 0.48224, "grad_norm": 1.2514276504516602, "learning_rate": 5.285329909503711e-06, "loss": 0.2623, "step": 6028 }, { "epoch": 0.48232, "grad_norm": 1.582513689994812, "learning_rate": 5.284074306758951e-06, "loss": 0.3788, "step": 6029 }, { "epoch": 0.4824, "grad_norm": 1.5977832078933716, "learning_rate": 5.2828186860417594e-06, "loss": 0.3679, "step": 6030 }, { "epoch": 0.48248, "grad_norm": 2.7796149253845215, "learning_rate": 5.281563047431576e-06, "loss": 3.9853, "step": 6031 }, { "epoch": 0.48256, "grad_norm": 1.570940613746643, "learning_rate": 5.2803073910078405e-06, "loss": 0.3789, "step": 6032 }, { "epoch": 0.48264, "grad_norm": 1.6044830083847046, "learning_rate": 5.279051716849993e-06, "loss": 0.3946, "step": 6033 }, { "epoch": 0.48272, "grad_norm": 1.9162310361862183, "learning_rate": 5.27779602503748e-06, "loss": 0.3879, "step": 6034 }, { "epoch": 0.4828, "grad_norm": 1.536901831626892, "learning_rate": 5.27654031564974e-06, "loss": 0.3266, "step": 6035 }, { "epoch": 0.48288, "grad_norm": 1.6972392797470093, "learning_rate": 5.275284588766221e-06, "loss": 0.3758, "step": 6036 }, { "epoch": 0.48296, "grad_norm": 1.5778000354766846, "learning_rate": 5.274028844466366e-06, "loss": 0.3237, "step": 6037 }, { "epoch": 0.48304, "grad_norm": 1.4701905250549316, "learning_rate": 5.272773082829623e-06, "loss": 0.3285, "step": 6038 }, { "epoch": 0.48312, "grad_norm": 1.5418689250946045, "learning_rate": 5.2715173039354395e-06, "loss": 0.3325, "step": 6039 }, { "epoch": 0.4832, "grad_norm": 1.3240424394607544, "learning_rate": 5.270261507863265e-06, "loss": 0.2852, "step": 6040 }, { "epoch": 0.48328, "grad_norm": 1.316301941871643, "learning_rate": 5.26900569469255e-06, "loss": 0.2481, "step": 6041 }, { "epoch": 0.48336, "grad_norm": 1.2276591062545776, "learning_rate": 5.267749864502744e-06, "loss": 0.3035, "step": 6042 }, { "epoch": 0.48344, "grad_norm": 1.5969600677490234, "learning_rate": 5.266494017373299e-06, "loss": 0.3646, "step": 6043 }, { "epoch": 0.48352, "grad_norm": 1.2858132123947144, "learning_rate": 5.265238153383669e-06, "loss": 0.2527, "step": 6044 }, { "epoch": 0.4836, "grad_norm": 1.9083600044250488, "learning_rate": 5.26398227261331e-06, "loss": 0.3858, "step": 6045 }, { "epoch": 0.48368, "grad_norm": 2.1194543838500977, "learning_rate": 5.2627263751416765e-06, "loss": 0.4669, "step": 6046 }, { "epoch": 0.48376, "grad_norm": 1.6560252904891968, "learning_rate": 5.261470461048225e-06, "loss": 0.3027, "step": 6047 }, { "epoch": 0.48384, "grad_norm": 1.7517454624176025, "learning_rate": 5.260214530412409e-06, "loss": 0.4387, "step": 6048 }, { "epoch": 0.48392, "grad_norm": 1.775425910949707, "learning_rate": 5.258958583313692e-06, "loss": 0.2734, "step": 6049 }, { "epoch": 0.484, "grad_norm": 1.6235383749008179, "learning_rate": 5.257702619831531e-06, "loss": 0.2836, "step": 6050 }, { "epoch": 0.48408, "grad_norm": 1.6095234155654907, "learning_rate": 5.25644664004539e-06, "loss": 0.3566, "step": 6051 }, { "epoch": 0.48416, "grad_norm": 2.052558183670044, "learning_rate": 5.2551906440347254e-06, "loss": 0.5749, "step": 6052 }, { "epoch": 0.48424, "grad_norm": 1.8320833444595337, "learning_rate": 5.253934631879005e-06, "loss": 0.3727, "step": 6053 }, { "epoch": 0.48432, "grad_norm": 1.698598027229309, "learning_rate": 5.252678603657689e-06, "loss": 0.3987, "step": 6054 }, { "epoch": 0.4844, "grad_norm": 2.1110999584198, "learning_rate": 5.251422559450243e-06, "loss": 0.4145, "step": 6055 }, { "epoch": 0.48448, "grad_norm": 1.7855218648910522, "learning_rate": 5.250166499336132e-06, "loss": 0.3626, "step": 6056 }, { "epoch": 0.48456, "grad_norm": 1.150992512702942, "learning_rate": 5.248910423394827e-06, "loss": 0.2392, "step": 6057 }, { "epoch": 0.48464, "grad_norm": 1.548820972442627, "learning_rate": 5.2476543317057896e-06, "loss": 0.2712, "step": 6058 }, { "epoch": 0.48472, "grad_norm": 1.7660967111587524, "learning_rate": 5.246398224348492e-06, "loss": 0.4438, "step": 6059 }, { "epoch": 0.4848, "grad_norm": 1.4774236679077148, "learning_rate": 5.245142101402403e-06, "loss": 0.3322, "step": 6060 }, { "epoch": 0.48488, "grad_norm": 1.747956395149231, "learning_rate": 5.2438859629469926e-06, "loss": 0.3714, "step": 6061 }, { "epoch": 0.48496, "grad_norm": 1.422593116760254, "learning_rate": 5.242629809061735e-06, "loss": 0.3844, "step": 6062 }, { "epoch": 0.48504, "grad_norm": 1.4882540702819824, "learning_rate": 5.2413736398260995e-06, "loss": 0.3285, "step": 6063 }, { "epoch": 0.48512, "grad_norm": 1.318860650062561, "learning_rate": 5.240117455319562e-06, "loss": 0.2693, "step": 6064 }, { "epoch": 0.4852, "grad_norm": 1.7626543045043945, "learning_rate": 5.238861255621598e-06, "loss": 0.3453, "step": 6065 }, { "epoch": 0.48528, "grad_norm": 1.5399316549301147, "learning_rate": 5.2376050408116805e-06, "loss": 0.2871, "step": 6066 }, { "epoch": 0.48536, "grad_norm": 1.6021755933761597, "learning_rate": 5.236348810969287e-06, "loss": 0.2933, "step": 6067 }, { "epoch": 0.48544, "grad_norm": 1.620920181274414, "learning_rate": 5.235092566173896e-06, "loss": 0.3498, "step": 6068 }, { "epoch": 0.48552, "grad_norm": 1.4818662405014038, "learning_rate": 5.233836306504983e-06, "loss": 0.2698, "step": 6069 }, { "epoch": 0.4856, "grad_norm": 1.618439793586731, "learning_rate": 5.232580032042032e-06, "loss": 0.4304, "step": 6070 }, { "epoch": 0.48568, "grad_norm": 1.531455636024475, "learning_rate": 5.231323742864519e-06, "loss": 0.3385, "step": 6071 }, { "epoch": 0.48576, "grad_norm": 1.6968480348587036, "learning_rate": 5.230067439051927e-06, "loss": 0.33, "step": 6072 }, { "epoch": 0.48584, "grad_norm": 1.7031455039978027, "learning_rate": 5.228811120683738e-06, "loss": 0.3422, "step": 6073 }, { "epoch": 0.48592, "grad_norm": 1.5051372051239014, "learning_rate": 5.2275547878394335e-06, "loss": 0.2661, "step": 6074 }, { "epoch": 0.486, "grad_norm": 1.9860918521881104, "learning_rate": 5.2262984405985005e-06, "loss": 0.3608, "step": 6075 }, { "epoch": 0.48608, "grad_norm": 1.6986693143844604, "learning_rate": 5.225042079040424e-06, "loss": 0.335, "step": 6076 }, { "epoch": 0.48616, "grad_norm": 1.692690372467041, "learning_rate": 5.223785703244685e-06, "loss": 0.4128, "step": 6077 }, { "epoch": 0.48624, "grad_norm": 1.331215739250183, "learning_rate": 5.222529313290774e-06, "loss": 0.3168, "step": 6078 }, { "epoch": 0.48632, "grad_norm": 1.5098655223846436, "learning_rate": 5.221272909258178e-06, "loss": 0.2776, "step": 6079 }, { "epoch": 0.4864, "grad_norm": 1.5736565589904785, "learning_rate": 5.220016491226387e-06, "loss": 0.4146, "step": 6080 }, { "epoch": 0.48648, "grad_norm": 1.3182066679000854, "learning_rate": 5.2187600592748876e-06, "loss": 0.2653, "step": 6081 }, { "epoch": 0.48656, "grad_norm": 1.3879201412200928, "learning_rate": 5.217503613483172e-06, "loss": 0.3127, "step": 6082 }, { "epoch": 0.48664, "grad_norm": 1.3078670501708984, "learning_rate": 5.21624715393073e-06, "loss": 0.2701, "step": 6083 }, { "epoch": 0.48672, "grad_norm": 1.890555739402771, "learning_rate": 5.214990680697054e-06, "loss": 0.5105, "step": 6084 }, { "epoch": 0.4868, "grad_norm": 1.693420171737671, "learning_rate": 5.213734193861637e-06, "loss": 0.3455, "step": 6085 }, { "epoch": 0.48688, "grad_norm": 1.6514363288879395, "learning_rate": 5.212477693503973e-06, "loss": 0.4083, "step": 6086 }, { "epoch": 0.48696, "grad_norm": 1.5142364501953125, "learning_rate": 5.211221179703555e-06, "loss": 0.3556, "step": 6087 }, { "epoch": 0.48704, "grad_norm": 1.2250992059707642, "learning_rate": 5.209964652539882e-06, "loss": 0.2917, "step": 6088 }, { "epoch": 0.48712, "grad_norm": 1.4357571601867676, "learning_rate": 5.2087081120924464e-06, "loss": 0.2784, "step": 6089 }, { "epoch": 0.4872, "grad_norm": 1.5746808052062988, "learning_rate": 5.207451558440747e-06, "loss": 0.3645, "step": 6090 }, { "epoch": 0.48728, "grad_norm": 1.1087597608566284, "learning_rate": 5.206194991664283e-06, "loss": 0.2236, "step": 6091 }, { "epoch": 0.48736, "grad_norm": 1.6777054071426392, "learning_rate": 5.204938411842551e-06, "loss": 0.4101, "step": 6092 }, { "epoch": 0.48744, "grad_norm": 2.015167474746704, "learning_rate": 5.2036818190550496e-06, "loss": 0.4252, "step": 6093 }, { "epoch": 0.48752, "grad_norm": 1.6957621574401855, "learning_rate": 5.202425213381284e-06, "loss": 0.4532, "step": 6094 }, { "epoch": 0.4876, "grad_norm": 1.689834475517273, "learning_rate": 5.2011685949007506e-06, "loss": 0.322, "step": 6095 }, { "epoch": 0.48768, "grad_norm": 1.2407318353652954, "learning_rate": 5.199911963692953e-06, "loss": 0.2784, "step": 6096 }, { "epoch": 0.48776, "grad_norm": 0.9591389298439026, "learning_rate": 5.198655319837395e-06, "loss": 0.232, "step": 6097 }, { "epoch": 0.48784, "grad_norm": 1.498170018196106, "learning_rate": 5.197398663413579e-06, "loss": 0.2981, "step": 6098 }, { "epoch": 0.48792, "grad_norm": 1.4004147052764893, "learning_rate": 5.1961419945010104e-06, "loss": 0.3348, "step": 6099 }, { "epoch": 0.488, "grad_norm": 1.3157734870910645, "learning_rate": 5.194885313179195e-06, "loss": 0.3471, "step": 6100 }, { "epoch": 0.48808, "grad_norm": 1.5406330823898315, "learning_rate": 5.1936286195276374e-06, "loss": 0.334, "step": 6101 }, { "epoch": 0.48816, "grad_norm": 1.4737725257873535, "learning_rate": 5.192371913625845e-06, "loss": 0.3255, "step": 6102 }, { "epoch": 0.48824, "grad_norm": 1.704572319984436, "learning_rate": 5.1911151955533254e-06, "loss": 0.3709, "step": 6103 }, { "epoch": 0.48832, "grad_norm": 1.5798331499099731, "learning_rate": 5.1898584653895865e-06, "loss": 0.2797, "step": 6104 }, { "epoch": 0.4884, "grad_norm": 1.6223543882369995, "learning_rate": 5.188601723214139e-06, "loss": 0.3452, "step": 6105 }, { "epoch": 0.48848, "grad_norm": 1.5775635242462158, "learning_rate": 5.18734496910649e-06, "loss": 0.3373, "step": 6106 }, { "epoch": 0.48856, "grad_norm": 1.9005051851272583, "learning_rate": 5.186088203146152e-06, "loss": 0.4212, "step": 6107 }, { "epoch": 0.48864, "grad_norm": 1.3695884943008423, "learning_rate": 5.184831425412636e-06, "loss": 0.3052, "step": 6108 }, { "epoch": 0.48872, "grad_norm": 1.3192788362503052, "learning_rate": 5.1835746359854544e-06, "loss": 0.2665, "step": 6109 }, { "epoch": 0.4888, "grad_norm": 1.645479679107666, "learning_rate": 5.1823178349441195e-06, "loss": 0.3848, "step": 6110 }, { "epoch": 0.48888, "grad_norm": 1.4701766967773438, "learning_rate": 5.181061022368145e-06, "loss": 0.2934, "step": 6111 }, { "epoch": 0.48896, "grad_norm": 1.2284507751464844, "learning_rate": 5.179804198337046e-06, "loss": 0.2826, "step": 6112 }, { "epoch": 0.48904, "grad_norm": 1.8432642221450806, "learning_rate": 5.178547362930337e-06, "loss": 0.354, "step": 6113 }, { "epoch": 0.48912, "grad_norm": 1.703628420829773, "learning_rate": 5.1772905162275345e-06, "loss": 0.3431, "step": 6114 }, { "epoch": 0.4892, "grad_norm": 1.5386126041412354, "learning_rate": 5.176033658308154e-06, "loss": 0.3512, "step": 6115 }, { "epoch": 0.48928, "grad_norm": 1.7287826538085938, "learning_rate": 5.174776789251712e-06, "loss": 0.3485, "step": 6116 }, { "epoch": 0.48936, "grad_norm": 1.4497462511062622, "learning_rate": 5.173519909137728e-06, "loss": 0.3208, "step": 6117 }, { "epoch": 0.48944, "grad_norm": 1.2991448640823364, "learning_rate": 5.17226301804572e-06, "loss": 0.2982, "step": 6118 }, { "epoch": 0.48952, "grad_norm": 1.4704984426498413, "learning_rate": 5.171006116055206e-06, "loss": 0.3264, "step": 6119 }, { "epoch": 0.4896, "grad_norm": 1.2503539323806763, "learning_rate": 5.169749203245709e-06, "loss": 0.2779, "step": 6120 }, { "epoch": 0.48968, "grad_norm": 1.3474243879318237, "learning_rate": 5.168492279696747e-06, "loss": 0.3001, "step": 6121 }, { "epoch": 0.48976, "grad_norm": 1.846341609954834, "learning_rate": 5.167235345487841e-06, "loss": 0.3778, "step": 6122 }, { "epoch": 0.48984, "grad_norm": 1.60556960105896, "learning_rate": 5.165978400698516e-06, "loss": 0.3083, "step": 6123 }, { "epoch": 0.48992, "grad_norm": 1.5098539590835571, "learning_rate": 5.164721445408292e-06, "loss": 0.2916, "step": 6124 }, { "epoch": 0.49, "grad_norm": 1.64169180393219, "learning_rate": 5.163464479696694e-06, "loss": 0.3603, "step": 6125 }, { "epoch": 0.49008, "grad_norm": 1.6956416368484497, "learning_rate": 5.162207503643246e-06, "loss": 0.3837, "step": 6126 }, { "epoch": 0.49016, "grad_norm": 1.6618326902389526, "learning_rate": 5.160950517327471e-06, "loss": 0.3823, "step": 6127 }, { "epoch": 0.49024, "grad_norm": 1.7037644386291504, "learning_rate": 5.159693520828895e-06, "loss": 0.3085, "step": 6128 }, { "epoch": 0.49032, "grad_norm": 1.144739031791687, "learning_rate": 5.158436514227045e-06, "loss": 0.2081, "step": 6129 }, { "epoch": 0.4904, "grad_norm": 1.2798271179199219, "learning_rate": 5.157179497601447e-06, "loss": 0.3009, "step": 6130 }, { "epoch": 0.49048, "grad_norm": 1.7405157089233398, "learning_rate": 5.155922471031627e-06, "loss": 0.3543, "step": 6131 }, { "epoch": 0.49056, "grad_norm": 1.0767748355865479, "learning_rate": 5.154665434597115e-06, "loss": 0.2166, "step": 6132 }, { "epoch": 0.49064, "grad_norm": 1.536720633506775, "learning_rate": 5.153408388377438e-06, "loss": 0.2829, "step": 6133 }, { "epoch": 0.49072, "grad_norm": 1.578690767288208, "learning_rate": 5.152151332452125e-06, "loss": 0.3479, "step": 6134 }, { "epoch": 0.4908, "grad_norm": 1.888026475906372, "learning_rate": 5.150894266900708e-06, "loss": 0.3414, "step": 6135 }, { "epoch": 0.49088, "grad_norm": 1.6138415336608887, "learning_rate": 5.149637191802714e-06, "loss": 0.2913, "step": 6136 }, { "epoch": 0.49096, "grad_norm": 1.5494178533554077, "learning_rate": 5.148380107237677e-06, "loss": 0.3035, "step": 6137 }, { "epoch": 0.49104, "grad_norm": 1.4410408735275269, "learning_rate": 5.1471230132851254e-06, "loss": 0.3202, "step": 6138 }, { "epoch": 0.49112, "grad_norm": 1.510621428489685, "learning_rate": 5.145865910024595e-06, "loss": 0.2863, "step": 6139 }, { "epoch": 0.4912, "grad_norm": 1.8136560916900635, "learning_rate": 5.144608797535614e-06, "loss": 0.3635, "step": 6140 }, { "epoch": 0.49128, "grad_norm": 1.6779876947402954, "learning_rate": 5.143351675897721e-06, "loss": 0.4096, "step": 6141 }, { "epoch": 0.49136, "grad_norm": 1.5982199907302856, "learning_rate": 5.1420945451904455e-06, "loss": 0.2868, "step": 6142 }, { "epoch": 0.49144, "grad_norm": 1.6804651021957397, "learning_rate": 5.140837405493324e-06, "loss": 0.3119, "step": 6143 }, { "epoch": 0.49152, "grad_norm": 1.7489296197891235, "learning_rate": 5.13958025688589e-06, "loss": 0.302, "step": 6144 }, { "epoch": 0.4916, "grad_norm": 1.693847417831421, "learning_rate": 5.138323099447681e-06, "loss": 0.3248, "step": 6145 }, { "epoch": 0.49168, "grad_norm": 1.360862374305725, "learning_rate": 5.137065933258233e-06, "loss": 0.2943, "step": 6146 }, { "epoch": 0.49176, "grad_norm": 2.244718313217163, "learning_rate": 5.135808758397082e-06, "loss": 0.4029, "step": 6147 }, { "epoch": 0.49184, "grad_norm": 1.749489426612854, "learning_rate": 5.134551574943765e-06, "loss": 0.3362, "step": 6148 }, { "epoch": 0.49192, "grad_norm": 1.4736480712890625, "learning_rate": 5.1332943829778205e-06, "loss": 0.2781, "step": 6149 }, { "epoch": 0.492, "grad_norm": 1.5586931705474854, "learning_rate": 5.132037182578785e-06, "loss": 0.2709, "step": 6150 }, { "epoch": 0.49208, "grad_norm": 1.2036563158035278, "learning_rate": 5.130779973826199e-06, "loss": 0.2957, "step": 6151 }, { "epoch": 0.49216, "grad_norm": 2.098465919494629, "learning_rate": 5.129522756799602e-06, "loss": 0.4698, "step": 6152 }, { "epoch": 0.49224, "grad_norm": 1.3323612213134766, "learning_rate": 5.128265531578535e-06, "loss": 0.3015, "step": 6153 }, { "epoch": 0.49232, "grad_norm": 1.4440571069717407, "learning_rate": 5.127008298242535e-06, "loss": 0.3037, "step": 6154 }, { "epoch": 0.4924, "grad_norm": 1.412387728691101, "learning_rate": 5.125751056871146e-06, "loss": 0.3701, "step": 6155 }, { "epoch": 0.49248, "grad_norm": 1.841917634010315, "learning_rate": 5.124493807543908e-06, "loss": 0.3041, "step": 6156 }, { "epoch": 0.49256, "grad_norm": 1.612427830696106, "learning_rate": 5.1232365503403626e-06, "loss": 0.3198, "step": 6157 }, { "epoch": 0.49264, "grad_norm": 1.3332172632217407, "learning_rate": 5.1219792853400545e-06, "loss": 0.3406, "step": 6158 }, { "epoch": 0.49272, "grad_norm": 1.876379370689392, "learning_rate": 5.120722012622524e-06, "loss": 0.4073, "step": 6159 }, { "epoch": 0.4928, "grad_norm": 1.5823307037353516, "learning_rate": 5.119464732267317e-06, "loss": 0.3825, "step": 6160 }, { "epoch": 0.49288, "grad_norm": 1.216632604598999, "learning_rate": 5.118207444353975e-06, "loss": 0.3464, "step": 6161 }, { "epoch": 0.49296, "grad_norm": 2.3204121589660645, "learning_rate": 5.116950148962043e-06, "loss": 0.3724, "step": 6162 }, { "epoch": 0.49304, "grad_norm": 1.6256400346755981, "learning_rate": 5.115692846171067e-06, "loss": 0.3355, "step": 6163 }, { "epoch": 0.49312, "grad_norm": 1.4591851234436035, "learning_rate": 5.114435536060593e-06, "loss": 0.309, "step": 6164 }, { "epoch": 0.4932, "grad_norm": 2.0748255252838135, "learning_rate": 5.113178218710164e-06, "loss": 0.3612, "step": 6165 }, { "epoch": 0.49328, "grad_norm": 1.8025262355804443, "learning_rate": 5.11192089419933e-06, "loss": 0.4716, "step": 6166 }, { "epoch": 0.49336, "grad_norm": 1.4925312995910645, "learning_rate": 5.110663562607632e-06, "loss": 0.3646, "step": 6167 }, { "epoch": 0.49344, "grad_norm": 1.710847020149231, "learning_rate": 5.109406224014623e-06, "loss": 0.353, "step": 6168 }, { "epoch": 0.49352, "grad_norm": 1.3324249982833862, "learning_rate": 5.108148878499847e-06, "loss": 0.2612, "step": 6169 }, { "epoch": 0.4936, "grad_norm": 1.7299015522003174, "learning_rate": 5.106891526142854e-06, "loss": 0.3321, "step": 6170 }, { "epoch": 0.49368, "grad_norm": 1.7776432037353516, "learning_rate": 5.105634167023193e-06, "loss": 0.3346, "step": 6171 }, { "epoch": 0.49376, "grad_norm": 1.4523450136184692, "learning_rate": 5.104376801220411e-06, "loss": 0.2788, "step": 6172 }, { "epoch": 0.49384, "grad_norm": 1.5704206228256226, "learning_rate": 5.103119428814057e-06, "loss": 0.3746, "step": 6173 }, { "epoch": 0.49392, "grad_norm": 1.2726726531982422, "learning_rate": 5.1018620498836825e-06, "loss": 0.2869, "step": 6174 }, { "epoch": 0.494, "grad_norm": 1.7216542959213257, "learning_rate": 5.1006046645088355e-06, "loss": 0.4106, "step": 6175 }, { "epoch": 0.49408, "grad_norm": 1.4835233688354492, "learning_rate": 5.09934727276907e-06, "loss": 0.3307, "step": 6176 }, { "epoch": 0.49416, "grad_norm": 1.483788251876831, "learning_rate": 5.0980898747439345e-06, "loss": 0.3311, "step": 6177 }, { "epoch": 0.49424, "grad_norm": 1.5280448198318481, "learning_rate": 5.096832470512981e-06, "loss": 0.3275, "step": 6178 }, { "epoch": 0.49432, "grad_norm": 1.6087018251419067, "learning_rate": 5.095575060155761e-06, "loss": 0.3893, "step": 6179 }, { "epoch": 0.4944, "grad_norm": 1.2583895921707153, "learning_rate": 5.094317643751825e-06, "loss": 0.269, "step": 6180 }, { "epoch": 0.49448, "grad_norm": 2.5767970085144043, "learning_rate": 5.09306022138073e-06, "loss": 0.3843, "step": 6181 }, { "epoch": 0.49456, "grad_norm": 1.3988746404647827, "learning_rate": 5.091802793122025e-06, "loss": 0.2755, "step": 6182 }, { "epoch": 0.49464, "grad_norm": 1.3515316247940063, "learning_rate": 5.090545359055264e-06, "loss": 0.2335, "step": 6183 }, { "epoch": 0.49472, "grad_norm": 1.4173052310943604, "learning_rate": 5.089287919260002e-06, "loss": 0.3242, "step": 6184 }, { "epoch": 0.4948, "grad_norm": 1.7955999374389648, "learning_rate": 5.088030473815791e-06, "loss": 0.4096, "step": 6185 }, { "epoch": 0.49488, "grad_norm": 1.374130129814148, "learning_rate": 5.0867730228021875e-06, "loss": 0.2919, "step": 6186 }, { "epoch": 0.49496, "grad_norm": 1.3074638843536377, "learning_rate": 5.085515566298744e-06, "loss": 0.2317, "step": 6187 }, { "epoch": 0.49504, "grad_norm": 1.6454929113388062, "learning_rate": 5.084258104385018e-06, "loss": 0.286, "step": 6188 }, { "epoch": 0.49512, "grad_norm": 1.3493096828460693, "learning_rate": 5.0830006371405625e-06, "loss": 0.2989, "step": 6189 }, { "epoch": 0.4952, "grad_norm": 1.6204330921173096, "learning_rate": 5.081743164644935e-06, "loss": 0.2795, "step": 6190 }, { "epoch": 0.49528, "grad_norm": 1.340806245803833, "learning_rate": 5.080485686977691e-06, "loss": 0.2935, "step": 6191 }, { "epoch": 0.49536, "grad_norm": 1.6534948348999023, "learning_rate": 5.079228204218387e-06, "loss": 0.3131, "step": 6192 }, { "epoch": 0.49544, "grad_norm": 1.6619195938110352, "learning_rate": 5.07797071644658e-06, "loss": 0.4199, "step": 6193 }, { "epoch": 0.49552, "grad_norm": 1.7682374715805054, "learning_rate": 5.0767132237418275e-06, "loss": 0.3768, "step": 6194 }, { "epoch": 0.4956, "grad_norm": 1.9795269966125488, "learning_rate": 5.075455726183685e-06, "loss": 0.5178, "step": 6195 }, { "epoch": 0.49568, "grad_norm": 1.5913708209991455, "learning_rate": 5.07419822385171e-06, "loss": 0.2969, "step": 6196 }, { "epoch": 0.49576, "grad_norm": 1.3208245038986206, "learning_rate": 5.072940716825462e-06, "loss": 0.2773, "step": 6197 }, { "epoch": 0.49584, "grad_norm": 1.503617286682129, "learning_rate": 5.071683205184499e-06, "loss": 0.2821, "step": 6198 }, { "epoch": 0.49592, "grad_norm": 1.469290852546692, "learning_rate": 5.070425689008381e-06, "loss": 0.289, "step": 6199 }, { "epoch": 0.496, "grad_norm": 1.5494263172149658, "learning_rate": 5.069168168376664e-06, "loss": 0.3019, "step": 6200 }, { "epoch": 0.49608, "grad_norm": 1.5420727729797363, "learning_rate": 5.067910643368908e-06, "loss": 0.2645, "step": 6201 }, { "epoch": 0.49616, "grad_norm": 1.399019718170166, "learning_rate": 5.066653114064674e-06, "loss": 0.3092, "step": 6202 }, { "epoch": 0.49624, "grad_norm": 1.9305436611175537, "learning_rate": 5.065395580543519e-06, "loss": 0.3418, "step": 6203 }, { "epoch": 0.49632, "grad_norm": 1.197814702987671, "learning_rate": 5.064138042885006e-06, "loss": 0.2597, "step": 6204 }, { "epoch": 0.4964, "grad_norm": 1.5163662433624268, "learning_rate": 5.062880501168693e-06, "loss": 0.3871, "step": 6205 }, { "epoch": 0.49648, "grad_norm": 1.3596129417419434, "learning_rate": 5.061622955474142e-06, "loss": 0.3607, "step": 6206 }, { "epoch": 0.49656, "grad_norm": 2.0122718811035156, "learning_rate": 5.060365405880911e-06, "loss": 0.3757, "step": 6207 }, { "epoch": 0.49664, "grad_norm": 1.1948397159576416, "learning_rate": 5.059107852468565e-06, "loss": 0.256, "step": 6208 }, { "epoch": 0.49672, "grad_norm": 1.5326647758483887, "learning_rate": 5.057850295316661e-06, "loss": 0.4097, "step": 6209 }, { "epoch": 0.4968, "grad_norm": 1.3815491199493408, "learning_rate": 5.0565927345047614e-06, "loss": 0.3258, "step": 6210 }, { "epoch": 0.49688, "grad_norm": 1.5262930393218994, "learning_rate": 5.055335170112432e-06, "loss": 0.3474, "step": 6211 }, { "epoch": 0.49696, "grad_norm": 1.2492114305496216, "learning_rate": 5.054077602219229e-06, "loss": 0.2956, "step": 6212 }, { "epoch": 0.49704, "grad_norm": 1.3622100353240967, "learning_rate": 5.052820030904719e-06, "loss": 0.3019, "step": 6213 }, { "epoch": 0.49712, "grad_norm": 1.563354730606079, "learning_rate": 5.051562456248461e-06, "loss": 0.4146, "step": 6214 }, { "epoch": 0.4972, "grad_norm": 1.7303005456924438, "learning_rate": 5.05030487833002e-06, "loss": 0.3324, "step": 6215 }, { "epoch": 0.49728, "grad_norm": 1.239866852760315, "learning_rate": 5.049047297228956e-06, "loss": 0.2878, "step": 6216 }, { "epoch": 0.49736, "grad_norm": 1.492857813835144, "learning_rate": 5.047789713024836e-06, "loss": 0.3036, "step": 6217 }, { "epoch": 0.49744, "grad_norm": 1.4982048273086548, "learning_rate": 5.046532125797219e-06, "loss": 0.3185, "step": 6218 }, { "epoch": 0.49752, "grad_norm": 1.7222771644592285, "learning_rate": 5.0452745356256705e-06, "loss": 0.4005, "step": 6219 }, { "epoch": 0.4976, "grad_norm": 1.1927220821380615, "learning_rate": 5.044016942589754e-06, "loss": 0.2655, "step": 6220 }, { "epoch": 0.49768, "grad_norm": 1.6021721363067627, "learning_rate": 5.042759346769031e-06, "loss": 0.2886, "step": 6221 }, { "epoch": 0.49776, "grad_norm": 2.103205442428589, "learning_rate": 5.041501748243069e-06, "loss": 0.5297, "step": 6222 }, { "epoch": 0.49784, "grad_norm": 1.3879965543746948, "learning_rate": 5.040244147091431e-06, "loss": 0.3109, "step": 6223 }, { "epoch": 0.49792, "grad_norm": 2.045581817626953, "learning_rate": 5.0389865433936776e-06, "loss": 0.3605, "step": 6224 }, { "epoch": 0.498, "grad_norm": 1.3637545108795166, "learning_rate": 5.037728937229378e-06, "loss": 0.2776, "step": 6225 }, { "epoch": 0.49808, "grad_norm": 1.311774492263794, "learning_rate": 5.036471328678095e-06, "loss": 0.2672, "step": 6226 }, { "epoch": 0.49816, "grad_norm": 1.8853634595870972, "learning_rate": 5.035213717819393e-06, "loss": 0.3784, "step": 6227 }, { "epoch": 0.49824, "grad_norm": 1.301746129989624, "learning_rate": 5.033956104732836e-06, "loss": 0.2905, "step": 6228 }, { "epoch": 0.49832, "grad_norm": 1.6232069730758667, "learning_rate": 5.032698489497991e-06, "loss": 0.3362, "step": 6229 }, { "epoch": 0.4984, "grad_norm": 1.5022000074386597, "learning_rate": 5.031440872194422e-06, "loss": 0.2796, "step": 6230 }, { "epoch": 0.49848, "grad_norm": 1.7945005893707275, "learning_rate": 5.030183252901694e-06, "loss": 0.3596, "step": 6231 }, { "epoch": 0.49856, "grad_norm": 1.5525250434875488, "learning_rate": 5.02892563169937e-06, "loss": 0.343, "step": 6232 }, { "epoch": 0.49864, "grad_norm": 1.631269931793213, "learning_rate": 5.027668008667022e-06, "loss": 0.3799, "step": 6233 }, { "epoch": 0.49872, "grad_norm": 1.4517936706542969, "learning_rate": 5.026410383884209e-06, "loss": 0.338, "step": 6234 }, { "epoch": 0.4988, "grad_norm": 1.387960433959961, "learning_rate": 5.025152757430501e-06, "loss": 0.3515, "step": 6235 }, { "epoch": 0.49888, "grad_norm": 1.4274933338165283, "learning_rate": 5.023895129385461e-06, "loss": 0.3152, "step": 6236 }, { "epoch": 0.49896, "grad_norm": 1.2925071716308594, "learning_rate": 5.022637499828656e-06, "loss": 0.3087, "step": 6237 }, { "epoch": 0.49904, "grad_norm": 1.6201573610305786, "learning_rate": 5.021379868839655e-06, "loss": 0.3988, "step": 6238 }, { "epoch": 0.49912, "grad_norm": 1.1447267532348633, "learning_rate": 5.020122236498018e-06, "loss": 0.281, "step": 6239 }, { "epoch": 0.4992, "grad_norm": 1.618818998336792, "learning_rate": 5.018864602883315e-06, "loss": 0.34, "step": 6240 }, { "epoch": 0.49928, "grad_norm": 1.4714045524597168, "learning_rate": 5.017606968075113e-06, "loss": 0.3352, "step": 6241 }, { "epoch": 0.49936, "grad_norm": 1.8967351913452148, "learning_rate": 5.016349332152975e-06, "loss": 0.4835, "step": 6242 }, { "epoch": 0.49944, "grad_norm": 1.4806861877441406, "learning_rate": 5.0150916951964715e-06, "loss": 0.3297, "step": 6243 }, { "epoch": 0.49952, "grad_norm": 1.4988765716552734, "learning_rate": 5.013834057285165e-06, "loss": 0.3896, "step": 6244 }, { "epoch": 0.4996, "grad_norm": 1.3121827840805054, "learning_rate": 5.012576418498626e-06, "loss": 0.3397, "step": 6245 }, { "epoch": 0.49968, "grad_norm": 1.7312182188034058, "learning_rate": 5.0113187789164176e-06, "loss": 0.3462, "step": 6246 }, { "epoch": 0.49976, "grad_norm": 1.460113763809204, "learning_rate": 5.010061138618109e-06, "loss": 0.2435, "step": 6247 }, { "epoch": 0.49984, "grad_norm": 1.681500792503357, "learning_rate": 5.008803497683266e-06, "loss": 0.452, "step": 6248 }, { "epoch": 0.49992, "grad_norm": 1.528382658958435, "learning_rate": 5.007545856191453e-06, "loss": 0.3297, "step": 6249 }, { "epoch": 0.5, "grad_norm": 1.7061470746994019, "learning_rate": 5.006288214222242e-06, "loss": 0.4542, "step": 6250 } ], "logging_steps": 1, "max_steps": 12500, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 1250, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 3.2449718975467684e+18, "train_batch_size": 4, "trial_name": null, "trial_params": null }