{ "best_metric": NaN, "best_model_checkpoint": "miner_id_24/checkpoint-50", "epoch": 0.02817298211015636, "eval_steps": 50, "global_step": 200, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0001408649105507818, "grad_norm": NaN, "learning_rate": 1.0100000000000002e-05, "loss": 0.0, "step": 1 }, { "epoch": 0.0001408649105507818, "eval_loss": NaN, "eval_runtime": 995.0245, "eval_samples_per_second": 12.017, "eval_steps_per_second": 3.005, "step": 1 }, { "epoch": 0.0002817298211015636, "grad_norm": NaN, "learning_rate": 2.0200000000000003e-05, "loss": 0.0, "step": 2 }, { "epoch": 0.0004225947316523454, "grad_norm": NaN, "learning_rate": 3.0299999999999998e-05, "loss": 0.0, "step": 3 }, { "epoch": 0.0005634596422031273, "grad_norm": NaN, "learning_rate": 4.0400000000000006e-05, "loss": 0.0, "step": 4 }, { "epoch": 0.000704324552753909, "grad_norm": NaN, "learning_rate": 5.05e-05, "loss": 0.0, "step": 5 }, { "epoch": 0.0008451894633046908, "grad_norm": NaN, "learning_rate": 6.0599999999999996e-05, "loss": 0.0, "step": 6 }, { "epoch": 0.0009860543738554725, "grad_norm": NaN, "learning_rate": 7.07e-05, "loss": 0.0, "step": 7 }, { "epoch": 0.0011269192844062545, "grad_norm": NaN, "learning_rate": 8.080000000000001e-05, "loss": 0.0, "step": 8 }, { "epoch": 0.0012677841949570363, "grad_norm": NaN, "learning_rate": 9.09e-05, "loss": 0.0, "step": 9 }, { "epoch": 0.001408649105507818, "grad_norm": NaN, "learning_rate": 0.000101, "loss": 0.0, "step": 10 }, { "epoch": 0.0015495140160585999, "grad_norm": NaN, "learning_rate": 0.00010099309690211968, "loss": 0.0, "step": 11 }, { "epoch": 0.0016903789266093816, "grad_norm": NaN, "learning_rate": 0.00010097238949571676, "loss": 0.0, "step": 12 }, { "epoch": 0.0018312438371601634, "grad_norm": NaN, "learning_rate": 0.00010093788344198939, "loss": 0.0, "step": 13 }, { "epoch": 0.001972108747710945, "grad_norm": NaN, "learning_rate": 0.00010088958817454812, "loss": 0.0, "step": 14 }, { "epoch": 0.002112973658261727, "grad_norm": NaN, "learning_rate": 0.00010082751689683683, "loss": 0.0, "step": 15 }, { "epoch": 0.002253838568812509, "grad_norm": NaN, "learning_rate": 0.00010075168657852308, "loss": 0.0, "step": 16 }, { "epoch": 0.0023947034793632906, "grad_norm": NaN, "learning_rate": 0.00010066211795085874, "loss": 0.0, "step": 17 }, { "epoch": 0.0025355683899140726, "grad_norm": NaN, "learning_rate": 0.00010055883550101226, "loss": 0.0, "step": 18 }, { "epoch": 0.002676433300464854, "grad_norm": NaN, "learning_rate": 0.00010044186746537416, "loss": 0.0, "step": 19 }, { "epoch": 0.002817298211015636, "grad_norm": NaN, "learning_rate": 0.00010031124582183748, "loss": 0.0, "step": 20 }, { "epoch": 0.0029581631215664177, "grad_norm": NaN, "learning_rate": 0.00010016700628105531, "loss": 0.0, "step": 21 }, { "epoch": 0.0030990280321171997, "grad_norm": NaN, "learning_rate": 0.00010000918827667787, "loss": 0.0, "step": 22 }, { "epoch": 0.0032398929426679813, "grad_norm": NaN, "learning_rate": 9.983783495457178e-05, "loss": 0.0, "step": 23 }, { "epoch": 0.0033807578532187633, "grad_norm": NaN, "learning_rate": 9.96529931610243e-05, "loss": 0.0, "step": 24 }, { "epoch": 0.003521622763769545, "grad_norm": NaN, "learning_rate": 9.945471342993618e-05, "loss": 0.0, "step": 25 }, { "epoch": 0.003662487674320327, "grad_norm": NaN, "learning_rate": 9.92430499690061e-05, "loss": 0.0, "step": 26 }, { "epoch": 0.0038033525848711084, "grad_norm": NaN, "learning_rate": 9.901806064491084e-05, "loss": 0.0, "step": 27 }, { "epoch": 0.00394421749542189, "grad_norm": NaN, "learning_rate": 9.877980696748506e-05, "loss": 0.0, "step": 28 }, { "epoch": 0.0040850824059726724, "grad_norm": NaN, "learning_rate": 9.852835407290526e-05, "loss": 0.0, "step": 29 }, { "epoch": 0.004225947316523454, "grad_norm": NaN, "learning_rate": 9.826377070588204e-05, "loss": 0.0, "step": 30 }, { "epoch": 0.004366812227074236, "grad_norm": NaN, "learning_rate": 9.798612920086614e-05, "loss": 0.0, "step": 31 }, { "epoch": 0.004507677137625018, "grad_norm": NaN, "learning_rate": 9.769550546227278e-05, "loss": 0.0, "step": 32 }, { "epoch": 0.0046485420481758, "grad_norm": NaN, "learning_rate": 9.739197894373021e-05, "loss": 0.0, "step": 33 }, { "epoch": 0.004789406958726581, "grad_norm": NaN, "learning_rate": 9.707563262635793e-05, "loss": 0.0, "step": 34 }, { "epoch": 0.004930271869277363, "grad_norm": NaN, "learning_rate": 9.67465529960804e-05, "loss": 0.0, "step": 35 }, { "epoch": 0.005071136779828145, "grad_norm": NaN, "learning_rate": 9.640483001998271e-05, "loss": 0.0, "step": 36 }, { "epoch": 0.005212001690378927, "grad_norm": NaN, "learning_rate": 9.605055712171443e-05, "loss": 0.0, "step": 37 }, { "epoch": 0.005352866600929708, "grad_norm": NaN, "learning_rate": 9.568383115594856e-05, "loss": 0.0, "step": 38 }, { "epoch": 0.00549373151148049, "grad_norm": NaN, "learning_rate": 9.53047523819024e-05, "loss": 0.0, "step": 39 }, { "epoch": 0.005634596422031272, "grad_norm": NaN, "learning_rate": 9.491342443592769e-05, "loss": 0.0, "step": 40 }, { "epoch": 0.005775461332582054, "grad_norm": NaN, "learning_rate": 9.45099543031775e-05, "loss": 0.0, "step": 41 }, { "epoch": 0.0059163262431328354, "grad_norm": NaN, "learning_rate": 9.40944522883575e-05, "loss": 0.0, "step": 42 }, { "epoch": 0.006057191153683617, "grad_norm": NaN, "learning_rate": 9.366703198556972e-05, "loss": 0.0, "step": 43 }, { "epoch": 0.0061980560642343994, "grad_norm": NaN, "learning_rate": 9.322781024725723e-05, "loss": 0.0, "step": 44 }, { "epoch": 0.006338920974785181, "grad_norm": NaN, "learning_rate": 9.27769071522577e-05, "loss": 0.0, "step": 45 }, { "epoch": 0.006479785885335963, "grad_norm": NaN, "learning_rate": 9.231444597297502e-05, "loss": 0.0, "step": 46 }, { "epoch": 0.006620650795886745, "grad_norm": NaN, "learning_rate": 9.184055314167797e-05, "loss": 0.0, "step": 47 }, { "epoch": 0.006761515706437527, "grad_norm": NaN, "learning_rate": 9.135535821593484e-05, "loss": 0.0, "step": 48 }, { "epoch": 0.006902380616988308, "grad_norm": NaN, "learning_rate": 9.08589938431937e-05, "loss": 0.0, "step": 49 }, { "epoch": 0.00704324552753909, "grad_norm": NaN, "learning_rate": 9.035159572451788e-05, "loss": 0.0, "step": 50 }, { "epoch": 0.00704324552753909, "eval_loss": NaN, "eval_runtime": 839.8948, "eval_samples_per_second": 14.236, "eval_steps_per_second": 3.56, "step": 50 }, { "epoch": 0.007184110438089872, "grad_norm": NaN, "learning_rate": 8.983330257748669e-05, "loss": 0.0, "step": 51 }, { "epoch": 0.007324975348640654, "grad_norm": NaN, "learning_rate": 8.930425609827138e-05, "loss": 0.0, "step": 52 }, { "epoch": 0.007465840259191435, "grad_norm": NaN, "learning_rate": 8.876460092289691e-05, "loss": 0.0, "step": 53 }, { "epoch": 0.007606705169742217, "grad_norm": NaN, "learning_rate": 8.821448458769978e-05, "loss": 0.0, "step": 54 }, { "epoch": 0.007747570080292999, "grad_norm": NaN, "learning_rate": 8.765405748899315e-05, "loss": 0.0, "step": 55 }, { "epoch": 0.00788843499084378, "grad_norm": NaN, "learning_rate": 8.708347284195e-05, "loss": 0.0, "step": 56 }, { "epoch": 0.008029299901394563, "grad_norm": NaN, "learning_rate": 8.650288663871555e-05, "loss": 0.0, "step": 57 }, { "epoch": 0.008170164811945345, "grad_norm": NaN, "learning_rate": 8.591245760576067e-05, "loss": 0.0, "step": 58 }, { "epoch": 0.008311029722496126, "grad_norm": NaN, "learning_rate": 8.531234716048757e-05, "loss": 0.0, "step": 59 }, { "epoch": 0.008451894633046908, "grad_norm": NaN, "learning_rate": 8.470271936709994e-05, "loss": 0.0, "step": 60 }, { "epoch": 0.00859275954359769, "grad_norm": NaN, "learning_rate": 8.408374089174933e-05, "loss": 0.0, "step": 61 }, { "epoch": 0.008733624454148471, "grad_norm": NaN, "learning_rate": 8.345558095697051e-05, "loss": 0.0, "step": 62 }, { "epoch": 0.008874489364699253, "grad_norm": NaN, "learning_rate": 8.281841129541749e-05, "loss": 0.0, "step": 63 }, { "epoch": 0.009015354275250036, "grad_norm": NaN, "learning_rate": 8.217240610291362e-05, "loss": 0.0, "step": 64 }, { "epoch": 0.009156219185800818, "grad_norm": NaN, "learning_rate": 8.151774199082823e-05, "loss": 0.0, "step": 65 }, { "epoch": 0.0092970840963516, "grad_norm": NaN, "learning_rate": 8.085459793779277e-05, "loss": 0.0, "step": 66 }, { "epoch": 0.00943794900690238, "grad_norm": NaN, "learning_rate": 8.018315524076989e-05, "loss": 0.0, "step": 67 }, { "epoch": 0.009578813917453162, "grad_norm": NaN, "learning_rate": 7.950359746548865e-05, "loss": 0.0, "step": 68 }, { "epoch": 0.009719678828003944, "grad_norm": NaN, "learning_rate": 7.881611039625947e-05, "loss": 0.0, "step": 69 }, { "epoch": 0.009860543738554725, "grad_norm": NaN, "learning_rate": 7.812088198518258e-05, "loss": 0.0, "step": 70 }, { "epoch": 0.010001408649105507, "grad_norm": NaN, "learning_rate": 7.741810230076368e-05, "loss": 0.0, "step": 71 }, { "epoch": 0.01014227355965629, "grad_norm": NaN, "learning_rate": 7.670796347595137e-05, "loss": 0.0, "step": 72 }, { "epoch": 0.010283138470207072, "grad_norm": NaN, "learning_rate": 7.599065965560962e-05, "loss": 0.0, "step": 73 }, { "epoch": 0.010424003380757853, "grad_norm": NaN, "learning_rate": 7.526638694344066e-05, "loss": 0.0, "step": 74 }, { "epoch": 0.010564868291308635, "grad_norm": NaN, "learning_rate": 7.453534334837223e-05, "loss": 0.0, "step": 75 }, { "epoch": 0.010705733201859417, "grad_norm": NaN, "learning_rate": 7.379772873042374e-05, "loss": 0.0, "step": 76 }, { "epoch": 0.010846598112410198, "grad_norm": NaN, "learning_rate": 7.305374474606674e-05, "loss": 0.0, "step": 77 }, { "epoch": 0.01098746302296098, "grad_norm": NaN, "learning_rate": 7.230359479309389e-05, "loss": 0.0, "step": 78 }, { "epoch": 0.011128327933511763, "grad_norm": NaN, "learning_rate": 7.154748395501217e-05, "loss": 0.0, "step": 79 }, { "epoch": 0.011269192844062545, "grad_norm": NaN, "learning_rate": 7.078561894497497e-05, "loss": 0.0, "step": 80 }, { "epoch": 0.011410057754613326, "grad_norm": NaN, "learning_rate": 7.001820804926883e-05, "loss": 0.0, "step": 81 }, { "epoch": 0.011550922665164108, "grad_norm": NaN, "learning_rate": 6.924546107037015e-05, "loss": 0.0, "step": 82 }, { "epoch": 0.01169178757571489, "grad_norm": NaN, "learning_rate": 6.846758926958709e-05, "loss": 0.0, "step": 83 }, { "epoch": 0.011832652486265671, "grad_norm": NaN, "learning_rate": 6.768480530930298e-05, "loss": 0.0, "step": 84 }, { "epoch": 0.011973517396816452, "grad_norm": NaN, "learning_rate": 6.689732319483653e-05, "loss": 0.0, "step": 85 }, { "epoch": 0.012114382307367234, "grad_norm": NaN, "learning_rate": 6.610535821593485e-05, "loss": 0.0, "step": 86 }, { "epoch": 0.012255247217918017, "grad_norm": NaN, "learning_rate": 6.530912688791548e-05, "loss": 0.0, "step": 87 }, { "epoch": 0.012396112128468799, "grad_norm": NaN, "learning_rate": 6.450884689247316e-05, "loss": 0.0, "step": 88 }, { "epoch": 0.01253697703901958, "grad_norm": NaN, "learning_rate": 6.37047370181679e-05, "loss": 0.0, "step": 89 }, { "epoch": 0.012677841949570362, "grad_norm": NaN, "learning_rate": 6.289701710061036e-05, "loss": 0.0, "step": 90 }, { "epoch": 0.012818706860121144, "grad_norm": NaN, "learning_rate": 6.208590796236096e-05, "loss": 0.0, "step": 91 }, { "epoch": 0.012959571770671925, "grad_norm": NaN, "learning_rate": 6.127163135255923e-05, "loss": 0.0, "step": 92 }, { "epoch": 0.013100436681222707, "grad_norm": NaN, "learning_rate": 6.045440988629975e-05, "loss": 0.0, "step": 93 }, { "epoch": 0.01324130159177349, "grad_norm": NaN, "learning_rate": 5.9634466983771556e-05, "loss": 0.0, "step": 94 }, { "epoch": 0.013382166502324272, "grad_norm": NaN, "learning_rate": 5.881202680917707e-05, "loss": 0.0, "step": 95 }, { "epoch": 0.013523031412875053, "grad_norm": NaN, "learning_rate": 5.7987314209448023e-05, "loss": 0.0, "step": 96 }, { "epoch": 0.013663896323425835, "grad_norm": NaN, "learning_rate": 5.716055465277449e-05, "loss": 0.0, "step": 97 }, { "epoch": 0.013804761233976616, "grad_norm": NaN, "learning_rate": 5.633197416696411e-05, "loss": 0.0, "step": 98 }, { "epoch": 0.013945626144527398, "grad_norm": NaN, "learning_rate": 5.5501799277648376e-05, "loss": 0.0, "step": 99 }, { "epoch": 0.01408649105507818, "grad_norm": NaN, "learning_rate": 5.467025694635279e-05, "loss": 0.0, "step": 100 }, { "epoch": 0.01408649105507818, "eval_loss": NaN, "eval_runtime": 673.3728, "eval_samples_per_second": 17.757, "eval_steps_per_second": 4.44, "step": 100 }, { "epoch": 0.014227355965628961, "grad_norm": NaN, "learning_rate": 5.383757450844782e-05, "loss": 0.0, "step": 101 }, { "epoch": 0.014368220876179744, "grad_norm": NaN, "learning_rate": 5.300397961099773e-05, "loss": 0.0, "step": 102 }, { "epoch": 0.014509085786730526, "grad_norm": NaN, "learning_rate": 5.216970015052406e-05, "loss": 0.0, "step": 103 }, { "epoch": 0.014649950697281307, "grad_norm": NaN, "learning_rate": 5.133496421070111e-05, "loss": 0.0, "step": 104 }, { "epoch": 0.014790815607832089, "grad_norm": NaN, "learning_rate": 5.05e-05, "loss": 0.0, "step": 105 }, { "epoch": 0.01493168051838287, "grad_norm": NaN, "learning_rate": 4.96650357892989e-05, "loss": 0.0, "step": 106 }, { "epoch": 0.015072545428933652, "grad_norm": NaN, "learning_rate": 4.8830299849475936e-05, "loss": 0.0, "step": 107 }, { "epoch": 0.015213410339484434, "grad_norm": NaN, "learning_rate": 4.799602038900227e-05, "loss": 0.0, "step": 108 }, { "epoch": 0.015354275250035217, "grad_norm": NaN, "learning_rate": 4.716242549155218e-05, "loss": 0.0, "step": 109 }, { "epoch": 0.015495140160585999, "grad_norm": NaN, "learning_rate": 4.632974305364722e-05, "loss": 0.0, "step": 110 }, { "epoch": 0.01563600507113678, "grad_norm": NaN, "learning_rate": 4.549820072235163e-05, "loss": 0.0, "step": 111 }, { "epoch": 0.01577686998168756, "grad_norm": NaN, "learning_rate": 4.4668025833035906e-05, "loss": 0.0, "step": 112 }, { "epoch": 0.015917734892238345, "grad_norm": NaN, "learning_rate": 4.383944534722552e-05, "loss": 0.0, "step": 113 }, { "epoch": 0.016058599802789127, "grad_norm": NaN, "learning_rate": 4.301268579055198e-05, "loss": 0.0, "step": 114 }, { "epoch": 0.016199464713339908, "grad_norm": NaN, "learning_rate": 4.218797319082293e-05, "loss": 0.0, "step": 115 }, { "epoch": 0.01634032962389069, "grad_norm": NaN, "learning_rate": 4.1365533016228466e-05, "loss": 0.0, "step": 116 }, { "epoch": 0.01648119453444147, "grad_norm": NaN, "learning_rate": 4.0545590113700254e-05, "loss": 0.0, "step": 117 }, { "epoch": 0.016622059444992253, "grad_norm": NaN, "learning_rate": 3.972836864744079e-05, "loss": 0.0, "step": 118 }, { "epoch": 0.016762924355543034, "grad_norm": NaN, "learning_rate": 3.891409203763905e-05, "loss": 0.0, "step": 119 }, { "epoch": 0.016903789266093816, "grad_norm": NaN, "learning_rate": 3.810298289938965e-05, "loss": 0.0, "step": 120 }, { "epoch": 0.017044654176644598, "grad_norm": NaN, "learning_rate": 3.72952629818321e-05, "loss": 0.0, "step": 121 }, { "epoch": 0.01718551908719538, "grad_norm": NaN, "learning_rate": 3.649115310752686e-05, "loss": 0.0, "step": 122 }, { "epoch": 0.01732638399774616, "grad_norm": NaN, "learning_rate": 3.5690873112084536e-05, "loss": 0.0, "step": 123 }, { "epoch": 0.017467248908296942, "grad_norm": NaN, "learning_rate": 3.489464178406516e-05, "loss": 0.0, "step": 124 }, { "epoch": 0.017608113818847724, "grad_norm": NaN, "learning_rate": 3.410267680516349e-05, "loss": 0.0, "step": 125 }, { "epoch": 0.017748978729398505, "grad_norm": NaN, "learning_rate": 3.3315194690697024e-05, "loss": 0.0, "step": 126 }, { "epoch": 0.017889843639949287, "grad_norm": NaN, "learning_rate": 3.253241073041291e-05, "loss": 0.0, "step": 127 }, { "epoch": 0.018030708550500072, "grad_norm": NaN, "learning_rate": 3.175453892962985e-05, "loss": 0.0, "step": 128 }, { "epoch": 0.018171573461050854, "grad_norm": NaN, "learning_rate": 3.098179195073118e-05, "loss": 0.0, "step": 129 }, { "epoch": 0.018312438371601635, "grad_norm": NaN, "learning_rate": 3.0214381055025054e-05, "loss": 0.0, "step": 130 }, { "epoch": 0.018453303282152417, "grad_norm": NaN, "learning_rate": 2.9452516044987844e-05, "loss": 0.0, "step": 131 }, { "epoch": 0.0185941681927032, "grad_norm": NaN, "learning_rate": 2.8696405206906116e-05, "loss": 0.0, "step": 132 }, { "epoch": 0.01873503310325398, "grad_norm": NaN, "learning_rate": 2.7946255253933275e-05, "loss": 0.0, "step": 133 }, { "epoch": 0.01887589801380476, "grad_norm": NaN, "learning_rate": 2.7202271269576275e-05, "loss": 0.0, "step": 134 }, { "epoch": 0.019016762924355543, "grad_norm": NaN, "learning_rate": 2.6464656651627787e-05, "loss": 0.0, "step": 135 }, { "epoch": 0.019157627834906325, "grad_norm": NaN, "learning_rate": 2.5733613056559357e-05, "loss": 0.0, "step": 136 }, { "epoch": 0.019298492745457106, "grad_norm": NaN, "learning_rate": 2.5009340344390407e-05, "loss": 0.0, "step": 137 }, { "epoch": 0.019439357656007888, "grad_norm": NaN, "learning_rate": 2.4292036524048648e-05, "loss": 0.0, "step": 138 }, { "epoch": 0.01958022256655867, "grad_norm": NaN, "learning_rate": 2.3581897699236327e-05, "loss": 0.0, "step": 139 }, { "epoch": 0.01972108747710945, "grad_norm": NaN, "learning_rate": 2.287911801481745e-05, "loss": 0.0, "step": 140 }, { "epoch": 0.019861952387660232, "grad_norm": NaN, "learning_rate": 2.2183889603740534e-05, "loss": 0.0, "step": 141 }, { "epoch": 0.020002817298211014, "grad_norm": NaN, "learning_rate": 2.149640253451135e-05, "loss": 0.0, "step": 142 }, { "epoch": 0.0201436822087618, "grad_norm": NaN, "learning_rate": 2.0816844759230112e-05, "loss": 0.0, "step": 143 }, { "epoch": 0.02028454711931258, "grad_norm": NaN, "learning_rate": 2.0145402062207232e-05, "loss": 0.0, "step": 144 }, { "epoch": 0.020425412029863362, "grad_norm": NaN, "learning_rate": 1.9482258009171774e-05, "loss": 0.0, "step": 145 }, { "epoch": 0.020566276940414144, "grad_norm": NaN, "learning_rate": 1.882759389708638e-05, "loss": 0.0, "step": 146 }, { "epoch": 0.020707141850964925, "grad_norm": NaN, "learning_rate": 1.818158870458251e-05, "loss": 0.0, "step": 147 }, { "epoch": 0.020848006761515707, "grad_norm": NaN, "learning_rate": 1.754441904302948e-05, "loss": 0.0, "step": 148 }, { "epoch": 0.02098887167206649, "grad_norm": NaN, "learning_rate": 1.691625910825066e-05, "loss": 0.0, "step": 149 }, { "epoch": 0.02112973658261727, "grad_norm": NaN, "learning_rate": 1.6297280632900087e-05, "loss": 0.0, "step": 150 }, { "epoch": 0.02112973658261727, "eval_loss": NaN, "eval_runtime": 1071.658, "eval_samples_per_second": 11.157, "eval_steps_per_second": 2.79, "step": 150 }, { "epoch": 0.02127060149316805, "grad_norm": NaN, "learning_rate": 1.5687652839512427e-05, "loss": 0.0, "step": 151 }, { "epoch": 0.021411466403718833, "grad_norm": NaN, "learning_rate": 1.5087542394239326e-05, "loss": 0.0, "step": 152 }, { "epoch": 0.021552331314269615, "grad_norm": NaN, "learning_rate": 1.449711336128445e-05, "loss": 0.0, "step": 153 }, { "epoch": 0.021693196224820396, "grad_norm": NaN, "learning_rate": 1.3916527158050007e-05, "loss": 0.0, "step": 154 }, { "epoch": 0.021834061135371178, "grad_norm": NaN, "learning_rate": 1.3345942511006854e-05, "loss": 0.0, "step": 155 }, { "epoch": 0.02197492604592196, "grad_norm": NaN, "learning_rate": 1.2785515412300245e-05, "loss": 0.0, "step": 156 }, { "epoch": 0.02211579095647274, "grad_norm": NaN, "learning_rate": 1.2235399077103106e-05, "loss": 0.0, "step": 157 }, { "epoch": 0.022256655867023526, "grad_norm": NaN, "learning_rate": 1.1695743901728631e-05, "loss": 0.0, "step": 158 }, { "epoch": 0.022397520777574308, "grad_norm": NaN, "learning_rate": 1.1166697422513329e-05, "loss": 0.0, "step": 159 }, { "epoch": 0.02253838568812509, "grad_norm": NaN, "learning_rate": 1.064840427548213e-05, "loss": 0.0, "step": 160 }, { "epoch": 0.02267925059867587, "grad_norm": NaN, "learning_rate": 1.0141006156806303e-05, "loss": 0.0, "step": 161 }, { "epoch": 0.022820115509226652, "grad_norm": NaN, "learning_rate": 9.64464178406516e-06, "loss": 0.0, "step": 162 }, { "epoch": 0.022960980419777434, "grad_norm": NaN, "learning_rate": 9.159446858322036e-06, "loss": 0.0, "step": 163 }, { "epoch": 0.023101845330328215, "grad_norm": NaN, "learning_rate": 8.685554027024989e-06, "loss": 0.0, "step": 164 }, { "epoch": 0.023242710240878997, "grad_norm": NaN, "learning_rate": 8.22309284774231e-06, "loss": 0.0, "step": 165 }, { "epoch": 0.02338357515142978, "grad_norm": NaN, "learning_rate": 7.772189752742756e-06, "loss": 0.0, "step": 166 }, { "epoch": 0.02352444006198056, "grad_norm": NaN, "learning_rate": 7.332968014430274e-06, "loss": 0.0, "step": 167 }, { "epoch": 0.023665304972531342, "grad_norm": NaN, "learning_rate": 6.905547711642518e-06, "loss": 0.0, "step": 168 }, { "epoch": 0.023806169883082123, "grad_norm": NaN, "learning_rate": 6.490045696822492e-06, "loss": 0.0, "step": 169 }, { "epoch": 0.023947034793632905, "grad_norm": NaN, "learning_rate": 6.086575564072307e-06, "loss": 0.0, "step": 170 }, { "epoch": 0.024087899704183686, "grad_norm": NaN, "learning_rate": 5.6952476180976035e-06, "loss": 0.0, "step": 171 }, { "epoch": 0.024228764614734468, "grad_norm": NaN, "learning_rate": 5.316168844051445e-06, "loss": 0.0, "step": 172 }, { "epoch": 0.024369629525285253, "grad_norm": NaN, "learning_rate": 4.949442878285576e-06, "loss": 0.0, "step": 173 }, { "epoch": 0.024510494435836035, "grad_norm": NaN, "learning_rate": 4.5951699800172935e-06, "loss": 0.0, "step": 174 }, { "epoch": 0.024651359346386816, "grad_norm": NaN, "learning_rate": 4.253447003919596e-06, "loss": 0.0, "step": 175 }, { "epoch": 0.024792224256937598, "grad_norm": NaN, "learning_rate": 3.924367373642071e-06, "loss": 0.0, "step": 176 }, { "epoch": 0.02493308916748838, "grad_norm": NaN, "learning_rate": 3.6080210562697984e-06, "loss": 0.0, "step": 177 }, { "epoch": 0.02507395407803916, "grad_norm": NaN, "learning_rate": 3.3044945377272327e-06, "loss": 0.0, "step": 178 }, { "epoch": 0.025214818988589943, "grad_norm": NaN, "learning_rate": 3.013870799133861e-06, "loss": 0.0, "step": 179 }, { "epoch": 0.025355683899140724, "grad_norm": NaN, "learning_rate": 2.736229294117951e-06, "loss": 0.0, "step": 180 }, { "epoch": 0.025496548809691506, "grad_norm": NaN, "learning_rate": 2.4716459270947466e-06, "loss": 0.0, "step": 181 }, { "epoch": 0.025637413720242287, "grad_norm": NaN, "learning_rate": 2.220193032514939e-06, "loss": 0.0, "step": 182 }, { "epoch": 0.02577827863079307, "grad_norm": NaN, "learning_rate": 1.9819393550891686e-06, "loss": 0.0, "step": 183 }, { "epoch": 0.02591914354134385, "grad_norm": NaN, "learning_rate": 1.7569500309938975e-06, "loss": 0.0, "step": 184 }, { "epoch": 0.026060008451894632, "grad_norm": NaN, "learning_rate": 1.5452865700638161e-06, "loss": 0.0, "step": 185 }, { "epoch": 0.026200873362445413, "grad_norm": NaN, "learning_rate": 1.34700683897571e-06, "loss": 0.0, "step": 186 }, { "epoch": 0.026341738272996195, "grad_norm": NaN, "learning_rate": 1.162165045428237e-06, "loss": 0.0, "step": 187 }, { "epoch": 0.02648260318354698, "grad_norm": NaN, "learning_rate": 9.908117233221274e-07, "loss": 0.0, "step": 188 }, { "epoch": 0.02662346809409776, "grad_norm": NaN, "learning_rate": 8.329937189446904e-07, "loss": 0.0, "step": 189 }, { "epoch": 0.026764333004648543, "grad_norm": NaN, "learning_rate": 6.887541781625227e-07, "loss": 0.0, "step": 190 }, { "epoch": 0.026905197915199325, "grad_norm": NaN, "learning_rate": 5.581325346258412e-07, "loss": 0.0, "step": 191 }, { "epoch": 0.027046062825750106, "grad_norm": NaN, "learning_rate": 4.411644989877527e-07, "loss": 0.0, "step": 192 }, { "epoch": 0.027186927736300888, "grad_norm": NaN, "learning_rate": 3.378820491412738e-07, "loss": 0.0, "step": 193 }, { "epoch": 0.02732779264685167, "grad_norm": NaN, "learning_rate": 2.483134214769235e-07, "loss": 0.0, "step": 194 }, { "epoch": 0.02746865755740245, "grad_norm": NaN, "learning_rate": 1.7248310316317272e-07, "loss": 0.0, "step": 195 }, { "epoch": 0.027609522467953233, "grad_norm": NaN, "learning_rate": 1.10411825451886e-07, "loss": 0.0, "step": 196 }, { "epoch": 0.027750387378504014, "grad_norm": NaN, "learning_rate": 6.211655801061078e-08, "loss": 0.0, "step": 197 }, { "epoch": 0.027891252289054796, "grad_norm": NaN, "learning_rate": 2.761050428323453e-08, "loss": 0.0, "step": 198 }, { "epoch": 0.028032117199605577, "grad_norm": NaN, "learning_rate": 6.90309788031529e-09, "loss": 0.0, "step": 199 }, { "epoch": 0.02817298211015636, "grad_norm": NaN, "learning_rate": 0.0, "loss": 0.0, "step": 200 }, { "epoch": 0.02817298211015636, "eval_loss": NaN, "eval_runtime": 1347.7679, "eval_samples_per_second": 8.872, "eval_steps_per_second": 2.218, "step": 200 } ], "logging_steps": 1, "max_steps": 200, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 50, "stateful_callbacks": { "EarlyStoppingCallback": { "args": { "early_stopping_patience": 5, "early_stopping_threshold": 0.0 }, "attributes": { "early_stopping_patience_counter": 3 } }, "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 4.287627183469363e+16, "train_batch_size": 8, "trial_name": null, "trial_params": null }