{ "best_metric": 0.05233228579163551, "best_model_checkpoint": "miner_id_24/checkpoint-100", "epoch": 1.9975031210986267, "eval_steps": 25, "global_step": 100, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.019975031210986267, "grad_norm": 32.32381057739258, "learning_rate": 2e-05, "loss": 6.6435, "step": 1 }, { "epoch": 0.019975031210986267, "eval_loss": 6.483519554138184, "eval_runtime": 6.8225, "eval_samples_per_second": 98.791, "eval_steps_per_second": 6.303, "step": 1 }, { "epoch": 0.039950062421972535, "grad_norm": 32.88056182861328, "learning_rate": 4e-05, "loss": 6.6757, "step": 2 }, { "epoch": 0.0599250936329588, "grad_norm": 31.19997787475586, "learning_rate": 6e-05, "loss": 6.4156, "step": 3 }, { "epoch": 0.07990012484394507, "grad_norm": 23.896907806396484, "learning_rate": 8e-05, "loss": 5.0024, "step": 4 }, { "epoch": 0.09987515605493133, "grad_norm": 13.470758438110352, "learning_rate": 0.0001, "loss": 3.1743, "step": 5 }, { "epoch": 0.1198501872659176, "grad_norm": 9.408984184265137, "learning_rate": 9.997266286704631e-05, "loss": 2.0651, "step": 6 }, { "epoch": 0.13982521847690388, "grad_norm": 9.622489929199219, "learning_rate": 9.989068136093873e-05, "loss": 1.413, "step": 7 }, { "epoch": 0.15980024968789014, "grad_norm": 6.5332932472229, "learning_rate": 9.975414512725057e-05, "loss": 0.7076, "step": 8 }, { "epoch": 0.1797752808988764, "grad_norm": 2.7348451614379883, "learning_rate": 9.956320346634876e-05, "loss": 0.307, "step": 9 }, { "epoch": 0.19975031210986266, "grad_norm": 1.4920560121536255, "learning_rate": 9.931806517013612e-05, "loss": 0.1333, "step": 10 }, { "epoch": 0.21972534332084895, "grad_norm": 0.7912208437919617, "learning_rate": 9.901899829374047e-05, "loss": 0.0924, "step": 11 }, { "epoch": 0.2397003745318352, "grad_norm": 0.4346309304237366, "learning_rate": 9.86663298624003e-05, "loss": 0.0483, "step": 12 }, { "epoch": 0.2596754057428215, "grad_norm": 1.192466139793396, "learning_rate": 9.826044551386744e-05, "loss": 0.0972, "step": 13 }, { "epoch": 0.27965043695380776, "grad_norm": 1.9299390316009521, "learning_rate": 9.780178907671789e-05, "loss": 0.1222, "step": 14 }, { "epoch": 0.299625468164794, "grad_norm": 0.6965848207473755, "learning_rate": 9.729086208503174e-05, "loss": 0.0805, "step": 15 }, { "epoch": 0.3196004993757803, "grad_norm": 0.9696741700172424, "learning_rate": 9.672822322997305e-05, "loss": 0.096, "step": 16 }, { "epoch": 0.33957553058676654, "grad_norm": 0.6358460187911987, "learning_rate": 9.611448774886924e-05, "loss": 0.0818, "step": 17 }, { "epoch": 0.3595505617977528, "grad_norm": 0.5815155506134033, "learning_rate": 9.545032675245813e-05, "loss": 0.0761, "step": 18 }, { "epoch": 0.37952559300873906, "grad_norm": 0.6561976671218872, "learning_rate": 9.473646649103818e-05, "loss": 0.0709, "step": 19 }, { "epoch": 0.3995006242197253, "grad_norm": 1.2013440132141113, "learning_rate": 9.397368756032445e-05, "loss": 0.1275, "step": 20 }, { "epoch": 0.41947565543071164, "grad_norm": 0.5312557816505432, "learning_rate": 9.316282404787871e-05, "loss": 0.0696, "step": 21 }, { "epoch": 0.4394506866416979, "grad_norm": 0.39881420135498047, "learning_rate": 9.230476262104677e-05, "loss": 0.0519, "step": 22 }, { "epoch": 0.45942571785268416, "grad_norm": 0.38855546712875366, "learning_rate": 9.140044155740101e-05, "loss": 0.0509, "step": 23 }, { "epoch": 0.4794007490636704, "grad_norm": 0.5084037780761719, "learning_rate": 9.045084971874738e-05, "loss": 0.0526, "step": 24 }, { "epoch": 0.4993757802746567, "grad_norm": 0.36609482765197754, "learning_rate": 8.945702546981969e-05, "loss": 0.0417, "step": 25 }, { "epoch": 0.4993757802746567, "eval_loss": 0.07570720463991165, "eval_runtime": 6.8153, "eval_samples_per_second": 98.895, "eval_steps_per_second": 6.309, "step": 25 }, { "epoch": 0.519350811485643, "grad_norm": 1.0404666662216187, "learning_rate": 8.842005554284296e-05, "loss": 0.1171, "step": 26 }, { "epoch": 0.5393258426966292, "grad_norm": 0.6350396871566772, "learning_rate": 8.73410738492077e-05, "loss": 0.0752, "step": 27 }, { "epoch": 0.5593008739076155, "grad_norm": 0.5021554827690125, "learning_rate": 8.622126023955446e-05, "loss": 0.0646, "step": 28 }, { "epoch": 0.5792759051186017, "grad_norm": 0.7377936244010925, "learning_rate": 8.506183921362443e-05, "loss": 0.0745, "step": 29 }, { "epoch": 0.599250936329588, "grad_norm": 0.5337192416191101, "learning_rate": 8.386407858128706e-05, "loss": 0.0594, "step": 30 }, { "epoch": 0.6192259675405742, "grad_norm": 0.5033006072044373, "learning_rate": 8.262928807620843e-05, "loss": 0.0501, "step": 31 }, { "epoch": 0.6392009987515606, "grad_norm": 0.4639797806739807, "learning_rate": 8.135881792367686e-05, "loss": 0.0591, "step": 32 }, { "epoch": 0.6591760299625468, "grad_norm": 0.47012022137641907, "learning_rate": 8.005405736415126e-05, "loss": 0.0583, "step": 33 }, { "epoch": 0.6791510611735331, "grad_norm": 0.40740257501602173, "learning_rate": 7.871643313414718e-05, "loss": 0.0393, "step": 34 }, { "epoch": 0.6991260923845194, "grad_norm": 0.4334827959537506, "learning_rate": 7.734740790612136e-05, "loss": 0.0694, "step": 35 }, { "epoch": 0.7191011235955056, "grad_norm": 0.34927257895469666, "learning_rate": 7.594847868906076e-05, "loss": 0.0407, "step": 36 }, { "epoch": 0.7390761548064919, "grad_norm": 0.3759843707084656, "learning_rate": 7.452117519152542e-05, "loss": 0.0333, "step": 37 }, { "epoch": 0.7590511860174781, "grad_norm": 0.3167741596698761, "learning_rate": 7.30670581489344e-05, "loss": 0.0618, "step": 38 }, { "epoch": 0.7790262172284644, "grad_norm": 1.1636722087860107, "learning_rate": 7.158771761692464e-05, "loss": 0.1346, "step": 39 }, { "epoch": 0.7990012484394506, "grad_norm": 0.699979305267334, "learning_rate": 7.008477123264848e-05, "loss": 0.0876, "step": 40 }, { "epoch": 0.818976279650437, "grad_norm": 0.5671290755271912, "learning_rate": 6.855986244591104e-05, "loss": 0.0756, "step": 41 }, { "epoch": 0.8389513108614233, "grad_norm": 0.3836475908756256, "learning_rate": 6.701465872208216e-05, "loss": 0.0673, "step": 42 }, { "epoch": 0.8589263420724095, "grad_norm": 0.3973909020423889, "learning_rate": 6.545084971874738e-05, "loss": 0.0461, "step": 43 }, { "epoch": 0.8789013732833958, "grad_norm": 0.41587159037590027, "learning_rate": 6.387014543809223e-05, "loss": 0.0576, "step": 44 }, { "epoch": 0.898876404494382, "grad_norm": 0.47783681750297546, "learning_rate": 6.227427435703997e-05, "loss": 0.0507, "step": 45 }, { "epoch": 0.9188514357053683, "grad_norm": 0.5148672461509705, "learning_rate": 6.066498153718735e-05, "loss": 0.0676, "step": 46 }, { "epoch": 0.9388264669163545, "grad_norm": 0.5991238355636597, "learning_rate": 5.90440267166055e-05, "loss": 0.0565, "step": 47 }, { "epoch": 0.9588014981273408, "grad_norm": 0.5729100704193115, "learning_rate": 5.74131823855921e-05, "loss": 0.0693, "step": 48 }, { "epoch": 0.978776529338327, "grad_norm": 0.46636906266212463, "learning_rate": 5.577423184847932e-05, "loss": 0.0614, "step": 49 }, { "epoch": 0.9987515605493134, "grad_norm": 0.3745470643043518, "learning_rate": 5.4128967273616625e-05, "loss": 0.0263, "step": 50 }, { "epoch": 0.9987515605493134, "eval_loss": 0.05686701834201813, "eval_runtime": 6.7983, "eval_samples_per_second": 99.142, "eval_steps_per_second": 6.325, "step": 50 }, { "epoch": 1.0187265917602997, "grad_norm": 0.6444295048713684, "learning_rate": 5.247918773366112e-05, "loss": 0.0721, "step": 51 }, { "epoch": 1.038701622971286, "grad_norm": 0.356772780418396, "learning_rate": 5.0826697238317935e-05, "loss": 0.0615, "step": 52 }, { "epoch": 1.058676654182272, "grad_norm": 0.42914852499961853, "learning_rate": 4.917330276168208e-05, "loss": 0.0448, "step": 53 }, { "epoch": 1.0786516853932584, "grad_norm": 0.45043033361434937, "learning_rate": 4.7520812266338885e-05, "loss": 0.0495, "step": 54 }, { "epoch": 1.0986267166042447, "grad_norm": 0.3365642726421356, "learning_rate": 4.5871032726383386e-05, "loss": 0.0594, "step": 55 }, { "epoch": 1.118601747815231, "grad_norm": 0.2681638300418854, "learning_rate": 4.4225768151520694e-05, "loss": 0.0334, "step": 56 }, { "epoch": 1.1385767790262173, "grad_norm": 0.38356465101242065, "learning_rate": 4.2586817614407895e-05, "loss": 0.0523, "step": 57 }, { "epoch": 1.1585518102372034, "grad_norm": 0.40522027015686035, "learning_rate": 4.095597328339452e-05, "loss": 0.0413, "step": 58 }, { "epoch": 1.1785268414481898, "grad_norm": 0.22131456434726715, "learning_rate": 3.933501846281267e-05, "loss": 0.0239, "step": 59 }, { "epoch": 1.198501872659176, "grad_norm": 0.3389541208744049, "learning_rate": 3.772572564296005e-05, "loss": 0.0372, "step": 60 }, { "epoch": 1.2184769038701624, "grad_norm": 0.29863250255584717, "learning_rate": 3.612985456190778e-05, "loss": 0.0212, "step": 61 }, { "epoch": 1.2384519350811485, "grad_norm": 0.22949916124343872, "learning_rate": 3.4549150281252636e-05, "loss": 0.0169, "step": 62 }, { "epoch": 1.2584269662921348, "grad_norm": 0.28763681650161743, "learning_rate": 3.298534127791785e-05, "loss": 0.0261, "step": 63 }, { "epoch": 1.2784019975031211, "grad_norm": 0.37180131673812866, "learning_rate": 3.144013755408895e-05, "loss": 0.0322, "step": 64 }, { "epoch": 1.2983770287141074, "grad_norm": 0.3311071991920471, "learning_rate": 2.991522876735154e-05, "loss": 0.0346, "step": 65 }, { "epoch": 1.3183520599250937, "grad_norm": 0.3131641149520874, "learning_rate": 2.8412282383075363e-05, "loss": 0.0391, "step": 66 }, { "epoch": 1.3383270911360798, "grad_norm": 0.3434888422489166, "learning_rate": 2.693294185106562e-05, "loss": 0.0433, "step": 67 }, { "epoch": 1.3583021223470662, "grad_norm": 0.35058820247650146, "learning_rate": 2.547882480847461e-05, "loss": 0.0353, "step": 68 }, { "epoch": 1.3782771535580525, "grad_norm": 0.23418645560741425, "learning_rate": 2.405152131093926e-05, "loss": 0.0217, "step": 69 }, { "epoch": 1.3982521847690386, "grad_norm": 0.2888217866420746, "learning_rate": 2.2652592093878666e-05, "loss": 0.0274, "step": 70 }, { "epoch": 1.4182272159800249, "grad_norm": 0.39486294984817505, "learning_rate": 2.128356686585282e-05, "loss": 0.0554, "step": 71 }, { "epoch": 1.4382022471910112, "grad_norm": 0.23438122868537903, "learning_rate": 1.9945942635848748e-05, "loss": 0.0198, "step": 72 }, { "epoch": 1.4581772784019975, "grad_norm": 0.29288169741630554, "learning_rate": 1.8641182076323148e-05, "loss": 0.0312, "step": 73 }, { "epoch": 1.4781523096129838, "grad_norm": 0.23595376312732697, "learning_rate": 1.7370711923791567e-05, "loss": 0.0211, "step": 74 }, { "epoch": 1.4981273408239701, "grad_norm": 0.3430394232273102, "learning_rate": 1.6135921418712956e-05, "loss": 0.0217, "step": 75 }, { "epoch": 1.4981273408239701, "eval_loss": 0.05315978825092316, "eval_runtime": 6.797, "eval_samples_per_second": 99.162, "eval_steps_per_second": 6.326, "step": 75 }, { "epoch": 1.5181023720349565, "grad_norm": 0.392977237701416, "learning_rate": 1.4938160786375572e-05, "loss": 0.0447, "step": 76 }, { "epoch": 1.5380774032459426, "grad_norm": 0.2741864323616028, "learning_rate": 1.3778739760445552e-05, "loss": 0.0361, "step": 77 }, { "epoch": 1.5580524344569289, "grad_norm": 0.27243152260780334, "learning_rate": 1.2658926150792322e-05, "loss": 0.0274, "step": 78 }, { "epoch": 1.578027465667915, "grad_norm": 0.40270382165908813, "learning_rate": 1.157994445715706e-05, "loss": 0.0542, "step": 79 }, { "epoch": 1.5980024968789013, "grad_norm": 0.22019416093826294, "learning_rate": 1.0542974530180327e-05, "loss": 0.0271, "step": 80 }, { "epoch": 1.6179775280898876, "grad_norm": 0.3104560077190399, "learning_rate": 9.549150281252633e-06, "loss": 0.0328, "step": 81 }, { "epoch": 1.637952559300874, "grad_norm": 0.2330722212791443, "learning_rate": 8.599558442598998e-06, "loss": 0.0335, "step": 82 }, { "epoch": 1.6579275905118602, "grad_norm": 0.28479820489883423, "learning_rate": 7.695237378953223e-06, "loss": 0.0329, "step": 83 }, { "epoch": 1.6779026217228465, "grad_norm": 0.28051578998565674, "learning_rate": 6.837175952121306e-06, "loss": 0.0266, "step": 84 }, { "epoch": 1.6978776529338329, "grad_norm": 0.23817192018032074, "learning_rate": 6.026312439675552e-06, "loss": 0.024, "step": 85 }, { "epoch": 1.717852684144819, "grad_norm": 0.2637062966823578, "learning_rate": 5.263533508961827e-06, "loss": 0.0312, "step": 86 }, { "epoch": 1.7378277153558053, "grad_norm": 0.263805627822876, "learning_rate": 4.549673247541875e-06, "loss": 0.0224, "step": 87 }, { "epoch": 1.7578027465667914, "grad_norm": 0.24613410234451294, "learning_rate": 3.885512251130763e-06, "loss": 0.0316, "step": 88 }, { "epoch": 1.7777777777777777, "grad_norm": 0.3555324971675873, "learning_rate": 3.271776770026963e-06, "loss": 0.0366, "step": 89 }, { "epoch": 1.797752808988764, "grad_norm": 0.3043622374534607, "learning_rate": 2.7091379149682685e-06, "loss": 0.0401, "step": 90 }, { "epoch": 1.8177278401997503, "grad_norm": 0.46414047479629517, "learning_rate": 2.1982109232821178e-06, "loss": 0.0492, "step": 91 }, { "epoch": 1.8377028714107366, "grad_norm": 0.287507027387619, "learning_rate": 1.7395544861325718e-06, "loss": 0.0219, "step": 92 }, { "epoch": 1.857677902621723, "grad_norm": 0.3227818012237549, "learning_rate": 1.333670137599713e-06, "loss": 0.0388, "step": 93 }, { "epoch": 1.8776529338327093, "grad_norm": 0.397909015417099, "learning_rate": 9.810017062595322e-07, "loss": 0.047, "step": 94 }, { "epoch": 1.8976279650436954, "grad_norm": 0.4243538975715637, "learning_rate": 6.819348298638839e-07, "loss": 0.0484, "step": 95 }, { "epoch": 1.9176029962546817, "grad_norm": 0.26352471113204956, "learning_rate": 4.367965336512403e-07, "loss": 0.0285, "step": 96 }, { "epoch": 1.9375780274656678, "grad_norm": 0.26268190145492554, "learning_rate": 2.458548727494292e-07, "loss": 0.0279, "step": 97 }, { "epoch": 1.957553058676654, "grad_norm": 0.2822859585285187, "learning_rate": 1.0931863906127327e-07, "loss": 0.0192, "step": 98 }, { "epoch": 1.9775280898876404, "grad_norm": 0.30654722452163696, "learning_rate": 2.7337132953697554e-08, "loss": 0.0183, "step": 99 }, { "epoch": 1.9975031210986267, "grad_norm": 0.28469908237457275, "learning_rate": 0.0, "loss": 0.0287, "step": 100 }, { "epoch": 1.9975031210986267, "eval_loss": 0.05233228579163551, "eval_runtime": 6.8792, "eval_samples_per_second": 97.977, "eval_steps_per_second": 6.251, "step": 100 } ], "logging_steps": 1, "max_steps": 100, "num_input_tokens_seen": 0, "num_train_epochs": 2, "save_steps": 25, "stateful_callbacks": { "EarlyStoppingCallback": { "args": { "early_stopping_patience": 1, "early_stopping_threshold": 0.0 }, "attributes": { "early_stopping_patience_counter": 0 } }, "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 4.238145415544832e+17, "train_batch_size": 4, "trial_name": null, "trial_params": null }