{ "best_metric": 1.3743650913238525, "best_model_checkpoint": "miner_id_24/checkpoint-200", "epoch": 0.08122652045892984, "eval_steps": 50, "global_step": 200, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0004061326022946492, "grad_norm": 1.3459707498550415, "learning_rate": 1e-05, "loss": 1.5815, "step": 1 }, { "epoch": 0.0004061326022946492, "eval_loss": 2.1705708503723145, "eval_runtime": 307.0362, "eval_samples_per_second": 13.507, "eval_steps_per_second": 3.377, "step": 1 }, { "epoch": 0.0008122652045892984, "grad_norm": 1.445604681968689, "learning_rate": 2e-05, "loss": 1.7408, "step": 2 }, { "epoch": 0.0012183978068839476, "grad_norm": 1.4932692050933838, "learning_rate": 3e-05, "loss": 1.828, "step": 3 }, { "epoch": 0.0016245304091785967, "grad_norm": 1.3266582489013672, "learning_rate": 4e-05, "loss": 1.8319, "step": 4 }, { "epoch": 0.0020306630114732462, "grad_norm": 1.1805715560913086, "learning_rate": 5e-05, "loss": 1.7692, "step": 5 }, { "epoch": 0.0024367956137678953, "grad_norm": 1.1116411685943604, "learning_rate": 6e-05, "loss": 1.6888, "step": 6 }, { "epoch": 0.0028429282160625444, "grad_norm": 1.1109813451766968, "learning_rate": 7e-05, "loss": 1.6346, "step": 7 }, { "epoch": 0.0032490608183571934, "grad_norm": 1.1213299036026, "learning_rate": 8e-05, "loss": 1.5795, "step": 8 }, { "epoch": 0.003655193420651843, "grad_norm": 1.517636775970459, "learning_rate": 9e-05, "loss": 1.6551, "step": 9 }, { "epoch": 0.0040613260229464924, "grad_norm": 0.8936739563941956, "learning_rate": 0.0001, "loss": 1.5318, "step": 10 }, { "epoch": 0.0044674586252411415, "grad_norm": 0.8239598274230957, "learning_rate": 9.999316524962345e-05, "loss": 1.5003, "step": 11 }, { "epoch": 0.004873591227535791, "grad_norm": 0.8810031414031982, "learning_rate": 9.997266286704631e-05, "loss": 1.5238, "step": 12 }, { "epoch": 0.00527972382983044, "grad_norm": 0.762035071849823, "learning_rate": 9.993849845741524e-05, "loss": 1.4975, "step": 13 }, { "epoch": 0.005685856432125089, "grad_norm": 0.6892722249031067, "learning_rate": 9.989068136093873e-05, "loss": 1.4336, "step": 14 }, { "epoch": 0.006091989034419738, "grad_norm": 0.7215124368667603, "learning_rate": 9.98292246503335e-05, "loss": 1.4898, "step": 15 }, { "epoch": 0.006498121636714387, "grad_norm": 0.6996978521347046, "learning_rate": 9.975414512725057e-05, "loss": 1.5738, "step": 16 }, { "epoch": 0.006904254239009037, "grad_norm": 0.7445999383926392, "learning_rate": 9.966546331768191e-05, "loss": 1.5072, "step": 17 }, { "epoch": 0.007310386841303686, "grad_norm": 0.6681432127952576, "learning_rate": 9.956320346634876e-05, "loss": 1.4469, "step": 18 }, { "epoch": 0.007716519443598335, "grad_norm": 0.7040372490882874, "learning_rate": 9.944739353007344e-05, "loss": 1.4469, "step": 19 }, { "epoch": 0.008122652045892985, "grad_norm": 0.7005623579025269, "learning_rate": 9.931806517013612e-05, "loss": 1.4524, "step": 20 }, { "epoch": 0.008528784648187633, "grad_norm": 0.7213242053985596, "learning_rate": 9.917525374361912e-05, "loss": 1.4383, "step": 21 }, { "epoch": 0.008934917250482283, "grad_norm": 0.7225236296653748, "learning_rate": 9.901899829374047e-05, "loss": 1.4274, "step": 22 }, { "epoch": 0.009341049852776931, "grad_norm": 0.6702327132225037, "learning_rate": 9.884934153917997e-05, "loss": 1.4264, "step": 23 }, { "epoch": 0.009747182455071581, "grad_norm": 0.809352695941925, "learning_rate": 9.86663298624003e-05, "loss": 1.386, "step": 24 }, { "epoch": 0.01015331505736623, "grad_norm": 0.7973009943962097, "learning_rate": 9.847001329696653e-05, "loss": 1.4209, "step": 25 }, { "epoch": 0.01055944765966088, "grad_norm": 0.8504800200462341, "learning_rate": 9.826044551386744e-05, "loss": 1.4312, "step": 26 }, { "epoch": 0.01096558026195553, "grad_norm": 0.8228721022605896, "learning_rate": 9.803768380684242e-05, "loss": 1.4466, "step": 27 }, { "epoch": 0.011371712864250177, "grad_norm": 0.8575668334960938, "learning_rate": 9.780178907671789e-05, "loss": 1.3192, "step": 28 }, { "epoch": 0.011777845466544827, "grad_norm": 0.8672021627426147, "learning_rate": 9.755282581475769e-05, "loss": 1.557, "step": 29 }, { "epoch": 0.012183978068839476, "grad_norm": 0.8064315319061279, "learning_rate": 9.729086208503174e-05, "loss": 1.3426, "step": 30 }, { "epoch": 0.012590110671134126, "grad_norm": 0.9522659778594971, "learning_rate": 9.701596950580806e-05, "loss": 1.488, "step": 31 }, { "epoch": 0.012996243273428774, "grad_norm": 0.8255912065505981, "learning_rate": 9.672822322997305e-05, "loss": 1.3874, "step": 32 }, { "epoch": 0.013402375875723424, "grad_norm": 0.8663228750228882, "learning_rate": 9.642770192448536e-05, "loss": 1.3202, "step": 33 }, { "epoch": 0.013808508478018074, "grad_norm": 0.9034584760665894, "learning_rate": 9.611448774886924e-05, "loss": 1.3119, "step": 34 }, { "epoch": 0.014214641080312722, "grad_norm": 0.8480425477027893, "learning_rate": 9.578866633275288e-05, "loss": 1.1869, "step": 35 }, { "epoch": 0.014620773682607372, "grad_norm": 0.9139435887336731, "learning_rate": 9.545032675245813e-05, "loss": 1.2952, "step": 36 }, { "epoch": 0.01502690628490202, "grad_norm": 0.9090644121170044, "learning_rate": 9.509956150664796e-05, "loss": 1.3284, "step": 37 }, { "epoch": 0.01543303888719667, "grad_norm": 0.9656711220741272, "learning_rate": 9.473646649103818e-05, "loss": 1.366, "step": 38 }, { "epoch": 0.015839171489491318, "grad_norm": 0.943183958530426, "learning_rate": 9.43611409721806e-05, "loss": 1.2349, "step": 39 }, { "epoch": 0.01624530409178597, "grad_norm": 1.0320135354995728, "learning_rate": 9.397368756032445e-05, "loss": 1.2602, "step": 40 }, { "epoch": 0.016651436694080618, "grad_norm": 1.0319671630859375, "learning_rate": 9.357421218136386e-05, "loss": 1.3153, "step": 41 }, { "epoch": 0.017057569296375266, "grad_norm": 0.944151759147644, "learning_rate": 9.316282404787871e-05, "loss": 1.3717, "step": 42 }, { "epoch": 0.017463701898669914, "grad_norm": 0.9808139801025391, "learning_rate": 9.273963562927695e-05, "loss": 1.2681, "step": 43 }, { "epoch": 0.017869834500964566, "grad_norm": 1.066519021987915, "learning_rate": 9.230476262104677e-05, "loss": 1.2371, "step": 44 }, { "epoch": 0.018275967103259214, "grad_norm": 1.1237547397613525, "learning_rate": 9.185832391312644e-05, "loss": 1.3255, "step": 45 }, { "epoch": 0.018682099705553862, "grad_norm": 0.9697301387786865, "learning_rate": 9.140044155740101e-05, "loss": 1.1256, "step": 46 }, { "epoch": 0.019088232307848514, "grad_norm": 1.1894623041152954, "learning_rate": 9.093124073433463e-05, "loss": 1.5102, "step": 47 }, { "epoch": 0.019494364910143162, "grad_norm": 1.3104721307754517, "learning_rate": 9.045084971874738e-05, "loss": 1.3807, "step": 48 }, { "epoch": 0.01990049751243781, "grad_norm": 2.2278523445129395, "learning_rate": 8.995939984474624e-05, "loss": 1.5591, "step": 49 }, { "epoch": 0.02030663011473246, "grad_norm": 3.040036916732788, "learning_rate": 8.945702546981969e-05, "loss": 1.7211, "step": 50 }, { "epoch": 0.02030663011473246, "eval_loss": 1.9163098335266113, "eval_runtime": 309.92, "eval_samples_per_second": 13.381, "eval_steps_per_second": 3.346, "step": 50 }, { "epoch": 0.02071276271702711, "grad_norm": 2.5100488662719727, "learning_rate": 8.894386393810563e-05, "loss": 1.9305, "step": 51 }, { "epoch": 0.02111889531932176, "grad_norm": 2.1271471977233887, "learning_rate": 8.842005554284296e-05, "loss": 1.8833, "step": 52 }, { "epoch": 0.021525027921616407, "grad_norm": 1.6900986433029175, "learning_rate": 8.788574348801675e-05, "loss": 1.8019, "step": 53 }, { "epoch": 0.02193116052391106, "grad_norm": 1.3168630599975586, "learning_rate": 8.73410738492077e-05, "loss": 1.6667, "step": 54 }, { "epoch": 0.022337293126205707, "grad_norm": 0.7889320254325867, "learning_rate": 8.678619553365659e-05, "loss": 1.5371, "step": 55 }, { "epoch": 0.022743425728500355, "grad_norm": 0.8151615262031555, "learning_rate": 8.622126023955446e-05, "loss": 1.5597, "step": 56 }, { "epoch": 0.023149558330795003, "grad_norm": 0.9233412146568298, "learning_rate": 8.564642241456986e-05, "loss": 1.5154, "step": 57 }, { "epoch": 0.023555690933089655, "grad_norm": 0.8535129427909851, "learning_rate": 8.506183921362443e-05, "loss": 1.4912, "step": 58 }, { "epoch": 0.023961823535384303, "grad_norm": 0.736276388168335, "learning_rate": 8.44676704559283e-05, "loss": 1.542, "step": 59 }, { "epoch": 0.02436795613767895, "grad_norm": 0.733509361743927, "learning_rate": 8.386407858128706e-05, "loss": 1.5942, "step": 60 }, { "epoch": 0.024774088739973603, "grad_norm": 0.726263165473938, "learning_rate": 8.32512286056924e-05, "loss": 1.4467, "step": 61 }, { "epoch": 0.02518022134226825, "grad_norm": 0.6740058660507202, "learning_rate": 8.262928807620843e-05, "loss": 1.5055, "step": 62 }, { "epoch": 0.0255863539445629, "grad_norm": 0.6746672987937927, "learning_rate": 8.199842702516583e-05, "loss": 1.4322, "step": 63 }, { "epoch": 0.025992486546857548, "grad_norm": 0.6926741003990173, "learning_rate": 8.135881792367686e-05, "loss": 1.5049, "step": 64 }, { "epoch": 0.0263986191491522, "grad_norm": 0.6756122708320618, "learning_rate": 8.07106356344834e-05, "loss": 1.499, "step": 65 }, { "epoch": 0.026804751751446847, "grad_norm": 0.6307368874549866, "learning_rate": 8.005405736415126e-05, "loss": 1.3963, "step": 66 }, { "epoch": 0.027210884353741496, "grad_norm": 0.6332759261131287, "learning_rate": 7.938926261462366e-05, "loss": 1.3711, "step": 67 }, { "epoch": 0.027617016956036147, "grad_norm": 0.7248176336288452, "learning_rate": 7.871643313414718e-05, "loss": 1.438, "step": 68 }, { "epoch": 0.028023149558330795, "grad_norm": 0.764191210269928, "learning_rate": 7.803575286758364e-05, "loss": 1.4231, "step": 69 }, { "epoch": 0.028429282160625444, "grad_norm": 0.7459575533866882, "learning_rate": 7.734740790612136e-05, "loss": 1.4424, "step": 70 }, { "epoch": 0.028835414762920092, "grad_norm": 0.7871569395065308, "learning_rate": 7.66515864363997e-05, "loss": 1.4006, "step": 71 }, { "epoch": 0.029241547365214744, "grad_norm": 0.7043696641921997, "learning_rate": 7.594847868906076e-05, "loss": 1.3879, "step": 72 }, { "epoch": 0.029647679967509392, "grad_norm": 0.7912059426307678, "learning_rate": 7.52382768867422e-05, "loss": 1.4553, "step": 73 }, { "epoch": 0.03005381256980404, "grad_norm": 0.7687498927116394, "learning_rate": 7.452117519152542e-05, "loss": 1.4772, "step": 74 }, { "epoch": 0.03045994517209869, "grad_norm": 0.7192708253860474, "learning_rate": 7.379736965185368e-05, "loss": 1.3879, "step": 75 }, { "epoch": 0.03086607777439334, "grad_norm": 0.6956753730773926, "learning_rate": 7.30670581489344e-05, "loss": 1.4224, "step": 76 }, { "epoch": 0.03127221037668799, "grad_norm": 0.803547739982605, "learning_rate": 7.233044034264034e-05, "loss": 1.3905, "step": 77 }, { "epoch": 0.031678342978982636, "grad_norm": 0.7080717086791992, "learning_rate": 7.158771761692464e-05, "loss": 1.2997, "step": 78 }, { "epoch": 0.032084475581277284, "grad_norm": 0.7270420789718628, "learning_rate": 7.083909302476453e-05, "loss": 1.3861, "step": 79 }, { "epoch": 0.03249060818357194, "grad_norm": 0.7509323358535767, "learning_rate": 7.008477123264848e-05, "loss": 1.3482, "step": 80 }, { "epoch": 0.03289674078586659, "grad_norm": 0.8757091164588928, "learning_rate": 6.932495846462261e-05, "loss": 1.3756, "step": 81 }, { "epoch": 0.033302873388161236, "grad_norm": 0.8361058831214905, "learning_rate": 6.855986244591104e-05, "loss": 1.3912, "step": 82 }, { "epoch": 0.033709005990455884, "grad_norm": 0.8529960513114929, "learning_rate": 6.778969234612584e-05, "loss": 1.2062, "step": 83 }, { "epoch": 0.03411513859275053, "grad_norm": 0.9276316165924072, "learning_rate": 6.701465872208216e-05, "loss": 1.3142, "step": 84 }, { "epoch": 0.03452127119504518, "grad_norm": 0.851285994052887, "learning_rate": 6.623497346023418e-05, "loss": 1.3927, "step": 85 }, { "epoch": 0.03492740379733983, "grad_norm": 0.9086071252822876, "learning_rate": 6.545084971874738e-05, "loss": 1.2213, "step": 86 }, { "epoch": 0.035333536399634484, "grad_norm": 0.9446242451667786, "learning_rate": 6.466250186922325e-05, "loss": 1.3843, "step": 87 }, { "epoch": 0.03573966900192913, "grad_norm": 0.8604442477226257, "learning_rate": 6.387014543809223e-05, "loss": 1.1815, "step": 88 }, { "epoch": 0.03614580160422378, "grad_norm": 0.8646284937858582, "learning_rate": 6.307399704769099e-05, "loss": 1.3559, "step": 89 }, { "epoch": 0.03655193420651843, "grad_norm": 0.9388222098350525, "learning_rate": 6.227427435703997e-05, "loss": 1.3209, "step": 90 }, { "epoch": 0.03695806680881308, "grad_norm": 0.8788503408432007, "learning_rate": 6.147119600233758e-05, "loss": 1.3472, "step": 91 }, { "epoch": 0.037364199411107725, "grad_norm": 0.9233901500701904, "learning_rate": 6.066498153718735e-05, "loss": 1.263, "step": 92 }, { "epoch": 0.03777033201340237, "grad_norm": 0.9628087282180786, "learning_rate": 5.985585137257401e-05, "loss": 1.203, "step": 93 }, { "epoch": 0.03817646461569703, "grad_norm": 0.9851207733154297, "learning_rate": 5.90440267166055e-05, "loss": 1.2775, "step": 94 }, { "epoch": 0.038582597217991677, "grad_norm": 0.9901912808418274, "learning_rate": 5.8229729514036705e-05, "loss": 1.2643, "step": 95 }, { "epoch": 0.038988729820286325, "grad_norm": 1.051145315170288, "learning_rate": 5.74131823855921e-05, "loss": 1.282, "step": 96 }, { "epoch": 0.03939486242258097, "grad_norm": 0.9764732718467712, "learning_rate": 5.6594608567103456e-05, "loss": 1.3819, "step": 97 }, { "epoch": 0.03980099502487562, "grad_norm": 1.2044464349746704, "learning_rate": 5.577423184847932e-05, "loss": 1.5645, "step": 98 }, { "epoch": 0.04020712762717027, "grad_norm": 1.6172761917114258, "learning_rate": 5.495227651252315e-05, "loss": 1.5574, "step": 99 }, { "epoch": 0.04061326022946492, "grad_norm": 3.0102949142456055, "learning_rate": 5.4128967273616625e-05, "loss": 1.3643, "step": 100 }, { "epoch": 0.04061326022946492, "eval_loss": 1.7166489362716675, "eval_runtime": 309.9898, "eval_samples_per_second": 13.378, "eval_steps_per_second": 3.345, "step": 100 }, { "epoch": 0.04101939283175957, "grad_norm": 1.9809935092926025, "learning_rate": 5.330452921628497e-05, "loss": 1.7555, "step": 101 }, { "epoch": 0.04142552543405422, "grad_norm": 1.9369913339614868, "learning_rate": 5.247918773366112e-05, "loss": 1.7646, "step": 102 }, { "epoch": 0.04183165803634887, "grad_norm": 1.674850344657898, "learning_rate": 5.165316846586541e-05, "loss": 1.7719, "step": 103 }, { "epoch": 0.04223779063864352, "grad_norm": 1.4067672491073608, "learning_rate": 5.0826697238317935e-05, "loss": 1.705, "step": 104 }, { "epoch": 0.042643923240938165, "grad_norm": 1.117656946182251, "learning_rate": 5e-05, "loss": 1.5836, "step": 105 }, { "epoch": 0.043050055843232814, "grad_norm": 0.7025765776634216, "learning_rate": 4.917330276168208e-05, "loss": 1.5923, "step": 106 }, { "epoch": 0.04345618844552746, "grad_norm": 0.8131675720214844, "learning_rate": 4.834683153413459e-05, "loss": 1.5141, "step": 107 }, { "epoch": 0.04386232104782212, "grad_norm": 0.9580807685852051, "learning_rate": 4.7520812266338885e-05, "loss": 1.598, "step": 108 }, { "epoch": 0.044268453650116765, "grad_norm": 0.8206263780593872, "learning_rate": 4.669547078371504e-05, "loss": 1.5433, "step": 109 }, { "epoch": 0.04467458625241141, "grad_norm": 0.697608470916748, "learning_rate": 4.5871032726383386e-05, "loss": 1.4524, "step": 110 }, { "epoch": 0.04508071885470606, "grad_norm": 0.6470988988876343, "learning_rate": 4.504772348747687e-05, "loss": 1.4266, "step": 111 }, { "epoch": 0.04548685145700071, "grad_norm": 0.6238524913787842, "learning_rate": 4.4225768151520694e-05, "loss": 1.4186, "step": 112 }, { "epoch": 0.04589298405929536, "grad_norm": 0.5966043472290039, "learning_rate": 4.3405391432896555e-05, "loss": 1.3564, "step": 113 }, { "epoch": 0.046299116661590006, "grad_norm": 0.5739925503730774, "learning_rate": 4.2586817614407895e-05, "loss": 1.4389, "step": 114 }, { "epoch": 0.04670524926388466, "grad_norm": 0.5564324259757996, "learning_rate": 4.17702704859633e-05, "loss": 1.4113, "step": 115 }, { "epoch": 0.04711138186617931, "grad_norm": 0.5870735049247742, "learning_rate": 4.095597328339452e-05, "loss": 1.3268, "step": 116 }, { "epoch": 0.04751751446847396, "grad_norm": 0.5620031952857971, "learning_rate": 4.0144148627425993e-05, "loss": 1.3327, "step": 117 }, { "epoch": 0.047923647070768606, "grad_norm": 0.5979235768318176, "learning_rate": 3.933501846281267e-05, "loss": 1.3915, "step": 118 }, { "epoch": 0.048329779673063254, "grad_norm": 0.6064242720603943, "learning_rate": 3.852880399766243e-05, "loss": 1.4333, "step": 119 }, { "epoch": 0.0487359122753579, "grad_norm": 0.631511926651001, "learning_rate": 3.772572564296005e-05, "loss": 1.3669, "step": 120 }, { "epoch": 0.04914204487765255, "grad_norm": 0.6458702683448792, "learning_rate": 3.6926002952309016e-05, "loss": 1.4258, "step": 121 }, { "epoch": 0.049548177479947206, "grad_norm": 0.6667675971984863, "learning_rate": 3.612985456190778e-05, "loss": 1.4117, "step": 122 }, { "epoch": 0.049954310082241854, "grad_norm": 0.6369161605834961, "learning_rate": 3.533749813077677e-05, "loss": 1.3541, "step": 123 }, { "epoch": 0.0503604426845365, "grad_norm": 0.6958560347557068, "learning_rate": 3.4549150281252636e-05, "loss": 1.292, "step": 124 }, { "epoch": 0.05076657528683115, "grad_norm": 0.7173919081687927, "learning_rate": 3.3765026539765834e-05, "loss": 1.3786, "step": 125 }, { "epoch": 0.0511727078891258, "grad_norm": 0.7203386425971985, "learning_rate": 3.298534127791785e-05, "loss": 1.4363, "step": 126 }, { "epoch": 0.05157884049142045, "grad_norm": 0.7156944274902344, "learning_rate": 3.221030765387417e-05, "loss": 1.3003, "step": 127 }, { "epoch": 0.051984973093715095, "grad_norm": 0.7624905109405518, "learning_rate": 3.144013755408895e-05, "loss": 1.3995, "step": 128 }, { "epoch": 0.05239110569600975, "grad_norm": 0.8011417984962463, "learning_rate": 3.0675041535377405e-05, "loss": 1.4126, "step": 129 }, { "epoch": 0.0527972382983044, "grad_norm": 0.7700819969177246, "learning_rate": 2.991522876735154e-05, "loss": 1.3715, "step": 130 }, { "epoch": 0.05320337090059905, "grad_norm": 0.8159439563751221, "learning_rate": 2.916090697523549e-05, "loss": 1.4165, "step": 131 }, { "epoch": 0.053609503502893695, "grad_norm": 0.8066980838775635, "learning_rate": 2.8412282383075363e-05, "loss": 1.3141, "step": 132 }, { "epoch": 0.05401563610518834, "grad_norm": 0.928561270236969, "learning_rate": 2.766955965735968e-05, "loss": 1.2663, "step": 133 }, { "epoch": 0.05442176870748299, "grad_norm": 0.8653779029846191, "learning_rate": 2.693294185106562e-05, "loss": 1.4253, "step": 134 }, { "epoch": 0.05482790130977764, "grad_norm": 0.8156696557998657, "learning_rate": 2.6202630348146324e-05, "loss": 1.2435, "step": 135 }, { "epoch": 0.055234033912072295, "grad_norm": 0.8434756398200989, "learning_rate": 2.547882480847461e-05, "loss": 1.2682, "step": 136 }, { "epoch": 0.05564016651436694, "grad_norm": 0.9540610909461975, "learning_rate": 2.476172311325783e-05, "loss": 1.3011, "step": 137 }, { "epoch": 0.05604629911666159, "grad_norm": 0.8743776679039001, "learning_rate": 2.405152131093926e-05, "loss": 1.3464, "step": 138 }, { "epoch": 0.05645243171895624, "grad_norm": 0.8367823362350464, "learning_rate": 2.3348413563600325e-05, "loss": 1.2398, "step": 139 }, { "epoch": 0.05685856432125089, "grad_norm": 0.9489424824714661, "learning_rate": 2.2652592093878666e-05, "loss": 1.2853, "step": 140 }, { "epoch": 0.057264696923545536, "grad_norm": 0.8658900856971741, "learning_rate": 2.196424713241637e-05, "loss": 1.2019, "step": 141 }, { "epoch": 0.057670829525840184, "grad_norm": 0.8773536682128906, "learning_rate": 2.128356686585282e-05, "loss": 1.1453, "step": 142 }, { "epoch": 0.05807696212813484, "grad_norm": 1.0234720706939697, "learning_rate": 2.061073738537635e-05, "loss": 1.3279, "step": 143 }, { "epoch": 0.05848309473042949, "grad_norm": 0.9431840181350708, "learning_rate": 1.9945942635848748e-05, "loss": 1.2179, "step": 144 }, { "epoch": 0.058889227332724135, "grad_norm": 0.927308976650238, "learning_rate": 1.928936436551661e-05, "loss": 1.1919, "step": 145 }, { "epoch": 0.059295359935018783, "grad_norm": 0.9419561624526978, "learning_rate": 1.8641182076323148e-05, "loss": 1.1926, "step": 146 }, { "epoch": 0.05970149253731343, "grad_norm": 1.0439989566802979, "learning_rate": 1.800157297483417e-05, "loss": 1.2789, "step": 147 }, { "epoch": 0.06010762513960808, "grad_norm": 1.4132856130599976, "learning_rate": 1.7370711923791567e-05, "loss": 1.4584, "step": 148 }, { "epoch": 0.06051375774190273, "grad_norm": 1.8904759883880615, "learning_rate": 1.6748771394307585e-05, "loss": 1.2134, "step": 149 }, { "epoch": 0.06091989034419738, "grad_norm": 3.5856516361236572, "learning_rate": 1.6135921418712956e-05, "loss": 1.8681, "step": 150 }, { "epoch": 0.06091989034419738, "eval_loss": 1.4577025175094604, "eval_runtime": 310.1093, "eval_samples_per_second": 13.373, "eval_steps_per_second": 3.344, "step": 150 }, { "epoch": 0.06132602294649203, "grad_norm": 1.1017037630081177, "learning_rate": 1.553232954407171e-05, "loss": 1.5434, "step": 151 }, { "epoch": 0.06173215554878668, "grad_norm": 1.2320129871368408, "learning_rate": 1.4938160786375572e-05, "loss": 1.5389, "step": 152 }, { "epoch": 0.06213828815108133, "grad_norm": 1.2117505073547363, "learning_rate": 1.435357758543015e-05, "loss": 1.5758, "step": 153 }, { "epoch": 0.06254442075337598, "grad_norm": 1.1583046913146973, "learning_rate": 1.3778739760445552e-05, "loss": 1.5657, "step": 154 }, { "epoch": 0.06295055335567062, "grad_norm": 1.0914018154144287, "learning_rate": 1.3213804466343421e-05, "loss": 1.4844, "step": 155 }, { "epoch": 0.06335668595796527, "grad_norm": 1.020070195198059, "learning_rate": 1.2658926150792322e-05, "loss": 1.4348, "step": 156 }, { "epoch": 0.06376281856025992, "grad_norm": 0.9296380877494812, "learning_rate": 1.2114256511983274e-05, "loss": 1.4725, "step": 157 }, { "epoch": 0.06416895116255457, "grad_norm": 0.8589698672294617, "learning_rate": 1.157994445715706e-05, "loss": 1.5322, "step": 158 }, { "epoch": 0.06457508376484922, "grad_norm": 0.7975783348083496, "learning_rate": 1.1056136061894384e-05, "loss": 1.4754, "step": 159 }, { "epoch": 0.06498121636714388, "grad_norm": 0.7380327582359314, "learning_rate": 1.0542974530180327e-05, "loss": 1.4245, "step": 160 }, { "epoch": 0.06538734896943853, "grad_norm": 0.7264203429222107, "learning_rate": 1.0040600155253765e-05, "loss": 1.5668, "step": 161 }, { "epoch": 0.06579348157173318, "grad_norm": 0.6353253126144409, "learning_rate": 9.549150281252633e-06, "loss": 1.4047, "step": 162 }, { "epoch": 0.06619961417402782, "grad_norm": 0.601177990436554, "learning_rate": 9.068759265665384e-06, "loss": 1.4177, "step": 163 }, { "epoch": 0.06660574677632247, "grad_norm": 0.6236414313316345, "learning_rate": 8.599558442598998e-06, "loss": 1.4479, "step": 164 }, { "epoch": 0.06701187937861712, "grad_norm": 0.6228318810462952, "learning_rate": 8.141676086873572e-06, "loss": 1.3862, "step": 165 }, { "epoch": 0.06741801198091177, "grad_norm": 0.6326121091842651, "learning_rate": 7.695237378953223e-06, "loss": 1.2904, "step": 166 }, { "epoch": 0.06782414458320642, "grad_norm": 0.6374157071113586, "learning_rate": 7.260364370723044e-06, "loss": 1.4229, "step": 167 }, { "epoch": 0.06823027718550106, "grad_norm": 0.6612967252731323, "learning_rate": 6.837175952121306e-06, "loss": 1.5289, "step": 168 }, { "epoch": 0.06863640978779571, "grad_norm": 0.6351006031036377, "learning_rate": 6.425787818636131e-06, "loss": 1.4034, "step": 169 }, { "epoch": 0.06904254239009036, "grad_norm": 0.6338084936141968, "learning_rate": 6.026312439675552e-06, "loss": 1.4047, "step": 170 }, { "epoch": 0.06944867499238501, "grad_norm": 0.6329898238182068, "learning_rate": 5.6388590278194096e-06, "loss": 1.37, "step": 171 }, { "epoch": 0.06985480759467966, "grad_norm": 0.6494223475456238, "learning_rate": 5.263533508961827e-06, "loss": 1.3661, "step": 172 }, { "epoch": 0.0702609401969743, "grad_norm": 0.6772523522377014, "learning_rate": 4.900438493352055e-06, "loss": 1.3983, "step": 173 }, { "epoch": 0.07066707279926897, "grad_norm": 0.690122127532959, "learning_rate": 4.549673247541875e-06, "loss": 1.4469, "step": 174 }, { "epoch": 0.07107320540156362, "grad_norm": 0.6646182537078857, "learning_rate": 4.2113336672471245e-06, "loss": 1.351, "step": 175 }, { "epoch": 0.07147933800385826, "grad_norm": 0.7394789457321167, "learning_rate": 3.885512251130763e-06, "loss": 1.3264, "step": 176 }, { "epoch": 0.07188547060615291, "grad_norm": 0.6804371476173401, "learning_rate": 3.5722980755146517e-06, "loss": 1.342, "step": 177 }, { "epoch": 0.07229160320844756, "grad_norm": 0.7120791673660278, "learning_rate": 3.271776770026963e-06, "loss": 1.2843, "step": 178 }, { "epoch": 0.07269773581074221, "grad_norm": 0.8053888082504272, "learning_rate": 2.9840304941919415e-06, "loss": 1.3445, "step": 179 }, { "epoch": 0.07310386841303686, "grad_norm": 0.8388939499855042, "learning_rate": 2.7091379149682685e-06, "loss": 1.4256, "step": 180 }, { "epoch": 0.0735100010153315, "grad_norm": 0.7814505696296692, "learning_rate": 2.4471741852423237e-06, "loss": 1.4238, "step": 181 }, { "epoch": 0.07391613361762615, "grad_norm": 0.7952286601066589, "learning_rate": 2.1982109232821178e-06, "loss": 1.3003, "step": 182 }, { "epoch": 0.0743222662199208, "grad_norm": 0.8597591519355774, "learning_rate": 1.962316193157593e-06, "loss": 1.2381, "step": 183 }, { "epoch": 0.07472839882221545, "grad_norm": 0.9671509861946106, "learning_rate": 1.7395544861325718e-06, "loss": 1.3072, "step": 184 }, { "epoch": 0.0751345314245101, "grad_norm": 1.1733289957046509, "learning_rate": 1.5299867030334814e-06, "loss": 1.278, "step": 185 }, { "epoch": 0.07554066402680475, "grad_norm": 0.884924054145813, "learning_rate": 1.333670137599713e-06, "loss": 1.2248, "step": 186 }, { "epoch": 0.0759467966290994, "grad_norm": 0.849631667137146, "learning_rate": 1.1506584608200367e-06, "loss": 1.2509, "step": 187 }, { "epoch": 0.07635292923139406, "grad_norm": 0.8570610284805298, "learning_rate": 9.810017062595322e-07, "loss": 1.0759, "step": 188 }, { "epoch": 0.0767590618336887, "grad_norm": 1.0926884412765503, "learning_rate": 8.247462563808817e-07, "loss": 1.3148, "step": 189 }, { "epoch": 0.07716519443598335, "grad_norm": 0.997616708278656, "learning_rate": 6.819348298638839e-07, "loss": 1.3344, "step": 190 }, { "epoch": 0.077571327038278, "grad_norm": 0.9728947281837463, "learning_rate": 5.526064699265753e-07, "loss": 1.1879, "step": 191 }, { "epoch": 0.07797745964057265, "grad_norm": 1.0188639163970947, "learning_rate": 4.367965336512403e-07, "loss": 1.3021, "step": 192 }, { "epoch": 0.0783835922428673, "grad_norm": 1.0173429250717163, "learning_rate": 3.3453668231809286e-07, "loss": 1.2605, "step": 193 }, { "epoch": 0.07878972484516195, "grad_norm": 1.1394449472427368, "learning_rate": 2.458548727494292e-07, "loss": 1.3249, "step": 194 }, { "epoch": 0.0791958574474566, "grad_norm": 1.1786574125289917, "learning_rate": 1.7077534966650766e-07, "loss": 1.2217, "step": 195 }, { "epoch": 0.07960199004975124, "grad_norm": 1.216571569442749, "learning_rate": 1.0931863906127327e-07, "loss": 1.3072, "step": 196 }, { "epoch": 0.08000812265204589, "grad_norm": 1.325181245803833, "learning_rate": 6.150154258476315e-08, "loss": 1.2424, "step": 197 }, { "epoch": 0.08041425525434054, "grad_norm": 1.5836933851242065, "learning_rate": 2.7337132953697554e-08, "loss": 1.4076, "step": 198 }, { "epoch": 0.08082038785663519, "grad_norm": 1.8556915521621704, "learning_rate": 6.834750376549792e-09, "loss": 1.3504, "step": 199 }, { "epoch": 0.08122652045892984, "grad_norm": 3.47357177734375, "learning_rate": 0.0, "loss": 1.5353, "step": 200 }, { "epoch": 0.08122652045892984, "eval_loss": 1.3743650913238525, "eval_runtime": 310.3069, "eval_samples_per_second": 13.364, "eval_steps_per_second": 3.342, "step": 200 } ], "logging_steps": 1, "max_steps": 200, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 50, "stateful_callbacks": { "EarlyStoppingCallback": { "args": { "early_stopping_patience": 5, "early_stopping_threshold": 0.0 }, "attributes": { "early_stopping_patience_counter": 0 } }, "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 3.017027157491712e+17, "train_batch_size": 8, "trial_name": null, "trial_params": null }