{ "best_metric": 0.5091546773910522, "best_model_checkpoint": "miner_id_24/checkpoint-50", "epoch": 0.6329113924050633, "eval_steps": 25, "global_step": 50, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.012658227848101266, "grad_norm": 42.028053283691406, "learning_rate": 2e-05, "loss": 6.0199, "step": 1 }, { "epoch": 0.012658227848101266, "eval_loss": 1.7844791412353516, "eval_runtime": 8.9481, "eval_samples_per_second": 14.975, "eval_steps_per_second": 1.9, "step": 1 }, { "epoch": 0.02531645569620253, "grad_norm": 45.79191970825195, "learning_rate": 4e-05, "loss": 6.0986, "step": 2 }, { "epoch": 0.0379746835443038, "grad_norm": 26.50522804260254, "learning_rate": 6e-05, "loss": 4.7016, "step": 3 }, { "epoch": 0.05063291139240506, "grad_norm": 13.651175498962402, "learning_rate": 8e-05, "loss": 4.4423, "step": 4 }, { "epoch": 0.06329113924050633, "grad_norm": 10.82734489440918, "learning_rate": 0.0001, "loss": 3.682, "step": 5 }, { "epoch": 0.0759493670886076, "grad_norm": 9.763678550720215, "learning_rate": 9.987820251299122e-05, "loss": 3.6171, "step": 6 }, { "epoch": 0.08860759493670886, "grad_norm": 10.851454734802246, "learning_rate": 9.951340343707852e-05, "loss": 3.2633, "step": 7 }, { "epoch": 0.10126582278481013, "grad_norm": 9.515451431274414, "learning_rate": 9.890738003669029e-05, "loss": 2.9692, "step": 8 }, { "epoch": 0.11392405063291139, "grad_norm": 8.546252250671387, "learning_rate": 9.806308479691595e-05, "loss": 3.0759, "step": 9 }, { "epoch": 0.12658227848101267, "grad_norm": 8.0128173828125, "learning_rate": 9.698463103929542e-05, "loss": 3.4095, "step": 10 }, { "epoch": 0.13924050632911392, "grad_norm": 8.757233619689941, "learning_rate": 9.567727288213005e-05, "loss": 2.8278, "step": 11 }, { "epoch": 0.1518987341772152, "grad_norm": 8.649038314819336, "learning_rate": 9.414737964294636e-05, "loss": 2.829, "step": 12 }, { "epoch": 0.16455696202531644, "grad_norm": 8.026497840881348, "learning_rate": 9.24024048078213e-05, "loss": 2.7128, "step": 13 }, { "epoch": 0.17721518987341772, "grad_norm": 7.095391273498535, "learning_rate": 9.045084971874738e-05, "loss": 2.5395, "step": 14 }, { "epoch": 0.189873417721519, "grad_norm": 6.5727715492248535, "learning_rate": 8.83022221559489e-05, "loss": 2.5782, "step": 15 }, { "epoch": 0.20253164556962025, "grad_norm": 9.417537689208984, "learning_rate": 8.596699001693255e-05, "loss": 2.5226, "step": 16 }, { "epoch": 0.21518987341772153, "grad_norm": 6.655349254608154, "learning_rate": 8.345653031794292e-05, "loss": 2.5861, "step": 17 }, { "epoch": 0.22784810126582278, "grad_norm": 6.458268165588379, "learning_rate": 8.07830737662829e-05, "loss": 2.0705, "step": 18 }, { "epoch": 0.24050632911392406, "grad_norm": 7.291686058044434, "learning_rate": 7.795964517353735e-05, "loss": 2.4741, "step": 19 }, { "epoch": 0.25316455696202533, "grad_norm": 6.116868019104004, "learning_rate": 7.500000000000001e-05, "loss": 2.6819, "step": 20 }, { "epoch": 0.26582278481012656, "grad_norm": 5.806187152862549, "learning_rate": 7.191855733945387e-05, "loss": 2.836, "step": 21 }, { "epoch": 0.27848101265822783, "grad_norm": 5.537196159362793, "learning_rate": 6.873032967079561e-05, "loss": 2.5114, "step": 22 }, { "epoch": 0.2911392405063291, "grad_norm": 5.313586235046387, "learning_rate": 6.545084971874738e-05, "loss": 2.25, "step": 23 }, { "epoch": 0.3037974683544304, "grad_norm": 10.902448654174805, "learning_rate": 6.209609477998338e-05, "loss": 2.1196, "step": 24 }, { "epoch": 0.31645569620253167, "grad_norm": 6.734158992767334, "learning_rate": 5.868240888334653e-05, "loss": 2.5926, "step": 25 }, { "epoch": 0.31645569620253167, "eval_loss": 0.5596538186073303, "eval_runtime": 8.8918, "eval_samples_per_second": 15.07, "eval_steps_per_second": 1.912, "step": 25 }, { "epoch": 0.3291139240506329, "grad_norm": 7.093173503875732, "learning_rate": 5.522642316338268e-05, "loss": 2.3286, "step": 26 }, { "epoch": 0.34177215189873417, "grad_norm": 5.931855201721191, "learning_rate": 5.174497483512506e-05, "loss": 1.9256, "step": 27 }, { "epoch": 0.35443037974683544, "grad_norm": 7.020291328430176, "learning_rate": 4.825502516487497e-05, "loss": 2.2209, "step": 28 }, { "epoch": 0.3670886075949367, "grad_norm": 6.310258388519287, "learning_rate": 4.477357683661734e-05, "loss": 2.231, "step": 29 }, { "epoch": 0.379746835443038, "grad_norm": 6.792581081390381, "learning_rate": 4.131759111665349e-05, "loss": 2.2158, "step": 30 }, { "epoch": 0.3924050632911392, "grad_norm": 6.202337265014648, "learning_rate": 3.790390522001662e-05, "loss": 2.0049, "step": 31 }, { "epoch": 0.4050632911392405, "grad_norm": 5.8156914710998535, "learning_rate": 3.4549150281252636e-05, "loss": 2.069, "step": 32 }, { "epoch": 0.4177215189873418, "grad_norm": 5.831786155700684, "learning_rate": 3.12696703292044e-05, "loss": 2.1343, "step": 33 }, { "epoch": 0.43037974683544306, "grad_norm": 6.260862827301025, "learning_rate": 2.8081442660546125e-05, "loss": 2.3229, "step": 34 }, { "epoch": 0.4430379746835443, "grad_norm": 5.3327202796936035, "learning_rate": 2.500000000000001e-05, "loss": 2.0672, "step": 35 }, { "epoch": 0.45569620253164556, "grad_norm": 4.9809417724609375, "learning_rate": 2.2040354826462668e-05, "loss": 1.9, "step": 36 }, { "epoch": 0.46835443037974683, "grad_norm": 6.4525909423828125, "learning_rate": 1.9216926233717085e-05, "loss": 2.0248, "step": 37 }, { "epoch": 0.4810126582278481, "grad_norm": 6.171474933624268, "learning_rate": 1.6543469682057106e-05, "loss": 1.9821, "step": 38 }, { "epoch": 0.4936708860759494, "grad_norm": 4.337746620178223, "learning_rate": 1.4033009983067452e-05, "loss": 1.9903, "step": 39 }, { "epoch": 0.5063291139240507, "grad_norm": 4.888207912445068, "learning_rate": 1.1697777844051105e-05, "loss": 2.0018, "step": 40 }, { "epoch": 0.5189873417721519, "grad_norm": 5.527780532836914, "learning_rate": 9.549150281252633e-06, "loss": 2.0755, "step": 41 }, { "epoch": 0.5316455696202531, "grad_norm": 5.051205158233643, "learning_rate": 7.597595192178702e-06, "loss": 1.979, "step": 42 }, { "epoch": 0.5443037974683544, "grad_norm": 5.09185266494751, "learning_rate": 5.852620357053651e-06, "loss": 2.0654, "step": 43 }, { "epoch": 0.5569620253164557, "grad_norm": 4.7769694328308105, "learning_rate": 4.322727117869951e-06, "loss": 1.7859, "step": 44 }, { "epoch": 0.569620253164557, "grad_norm": 6.080770969390869, "learning_rate": 3.0153689607045845e-06, "loss": 1.8062, "step": 45 }, { "epoch": 0.5822784810126582, "grad_norm": 6.04468297958374, "learning_rate": 1.9369152030840556e-06, "loss": 2.0148, "step": 46 }, { "epoch": 0.5949367088607594, "grad_norm": 6.175866603851318, "learning_rate": 1.0926199633097157e-06, "loss": 1.8943, "step": 47 }, { "epoch": 0.6075949367088608, "grad_norm": 6.0562615394592285, "learning_rate": 4.865965629214819e-07, "loss": 1.9837, "step": 48 }, { "epoch": 0.620253164556962, "grad_norm": 7.458826541900635, "learning_rate": 1.2179748700879012e-07, "loss": 1.5447, "step": 49 }, { "epoch": 0.6329113924050633, "grad_norm": 6.869083881378174, "learning_rate": 0.0, "loss": 2.0644, "step": 50 }, { "epoch": 0.6329113924050633, "eval_loss": 0.5091546773910522, "eval_runtime": 9.1468, "eval_samples_per_second": 14.65, "eval_steps_per_second": 1.859, "step": 50 } ], "logging_steps": 1, "max_steps": 50, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 25, "stateful_callbacks": { "EarlyStoppingCallback": { "args": { "early_stopping_patience": 5, "early_stopping_threshold": 0.0 }, "attributes": { "early_stopping_patience_counter": 0 } }, "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 6.475265990669107e+16, "train_batch_size": 8, "trial_name": null, "trial_params": null }