{ "best_metric": 10.35035228729248, "best_model_checkpoint": "miner_id_24/checkpoint-50", "epoch": 0.022655188038060717, "eval_steps": 25, "global_step": 50, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.00045310376076121433, "grad_norm": 0.08214962482452393, "learning_rate": 2e-05, "loss": 10.3726, "step": 1 }, { "epoch": 0.00045310376076121433, "eval_loss": 10.372055053710938, "eval_runtime": 6.1256, "eval_samples_per_second": 606.801, "eval_steps_per_second": 75.911, "step": 1 }, { "epoch": 0.0009062075215224287, "grad_norm": 0.0846695676445961, "learning_rate": 4e-05, "loss": 10.3757, "step": 2 }, { "epoch": 0.001359311282283643, "grad_norm": 0.076853908598423, "learning_rate": 6e-05, "loss": 10.3727, "step": 3 }, { "epoch": 0.0018124150430448573, "grad_norm": 0.07561151683330536, "learning_rate": 8e-05, "loss": 10.3751, "step": 4 }, { "epoch": 0.0022655188038060714, "grad_norm": 0.08088299632072449, "learning_rate": 0.0001, "loss": 10.3735, "step": 5 }, { "epoch": 0.002718622564567286, "grad_norm": 0.08890769630670547, "learning_rate": 9.987820251299122e-05, "loss": 10.3709, "step": 6 }, { "epoch": 0.0031717263253285004, "grad_norm": 0.08019179105758667, "learning_rate": 9.951340343707852e-05, "loss": 10.3732, "step": 7 }, { "epoch": 0.0036248300860897147, "grad_norm": 0.09421122074127197, "learning_rate": 9.890738003669029e-05, "loss": 10.3725, "step": 8 }, { "epoch": 0.0040779338468509285, "grad_norm": 0.0971372202038765, "learning_rate": 9.806308479691595e-05, "loss": 10.3685, "step": 9 }, { "epoch": 0.004531037607612143, "grad_norm": 0.09252215176820755, "learning_rate": 9.698463103929542e-05, "loss": 10.3698, "step": 10 }, { "epoch": 0.004984141368373357, "grad_norm": 0.10680188983678818, "learning_rate": 9.567727288213005e-05, "loss": 10.3665, "step": 11 }, { "epoch": 0.005437245129134572, "grad_norm": 0.09999203681945801, "learning_rate": 9.414737964294636e-05, "loss": 10.3684, "step": 12 }, { "epoch": 0.0058903488898957865, "grad_norm": 0.10176604986190796, "learning_rate": 9.24024048078213e-05, "loss": 10.3649, "step": 13 }, { "epoch": 0.006343452650657001, "grad_norm": 0.10781875997781754, "learning_rate": 9.045084971874738e-05, "loss": 10.3683, "step": 14 }, { "epoch": 0.006796556411418215, "grad_norm": 0.11113500595092773, "learning_rate": 8.83022221559489e-05, "loss": 10.366, "step": 15 }, { "epoch": 0.007249660172179429, "grad_norm": 0.11764982342720032, "learning_rate": 8.596699001693255e-05, "loss": 10.3658, "step": 16 }, { "epoch": 0.007702763932940644, "grad_norm": 0.12170328199863434, "learning_rate": 8.345653031794292e-05, "loss": 10.3657, "step": 17 }, { "epoch": 0.008155867693701857, "grad_norm": 0.12587465345859528, "learning_rate": 8.07830737662829e-05, "loss": 10.3624, "step": 18 }, { "epoch": 0.008608971454463071, "grad_norm": 0.14366334676742554, "learning_rate": 7.795964517353735e-05, "loss": 10.3616, "step": 19 }, { "epoch": 0.009062075215224286, "grad_norm": 0.15799346566200256, "learning_rate": 7.500000000000001e-05, "loss": 10.3629, "step": 20 }, { "epoch": 0.0095151789759855, "grad_norm": 0.12464886158704758, "learning_rate": 7.191855733945387e-05, "loss": 10.3621, "step": 21 }, { "epoch": 0.009968282736746714, "grad_norm": 0.13898994028568268, "learning_rate": 6.873032967079561e-05, "loss": 10.3571, "step": 22 }, { "epoch": 0.01042138649750793, "grad_norm": 0.1414911299943924, "learning_rate": 6.545084971874738e-05, "loss": 10.3614, "step": 23 }, { "epoch": 0.010874490258269144, "grad_norm": 0.13739123940467834, "learning_rate": 6.209609477998338e-05, "loss": 10.3593, "step": 24 }, { "epoch": 0.011327594019030359, "grad_norm": 0.15723730623722076, "learning_rate": 5.868240888334653e-05, "loss": 10.3598, "step": 25 }, { "epoch": 0.011327594019030359, "eval_loss": 10.357394218444824, "eval_runtime": 6.1267, "eval_samples_per_second": 606.693, "eval_steps_per_second": 75.898, "step": 25 }, { "epoch": 0.011780697779791573, "grad_norm": 0.1885799765586853, "learning_rate": 5.522642316338268e-05, "loss": 10.3603, "step": 26 }, { "epoch": 0.012233801540552787, "grad_norm": 0.16406424343585968, "learning_rate": 5.174497483512506e-05, "loss": 10.3557, "step": 27 }, { "epoch": 0.012686905301314002, "grad_norm": 0.17430010437965393, "learning_rate": 4.825502516487497e-05, "loss": 10.3564, "step": 28 }, { "epoch": 0.013140009062075216, "grad_norm": 0.1869587004184723, "learning_rate": 4.477357683661734e-05, "loss": 10.3561, "step": 29 }, { "epoch": 0.01359311282283643, "grad_norm": 0.16364026069641113, "learning_rate": 4.131759111665349e-05, "loss": 10.3571, "step": 30 }, { "epoch": 0.014046216583597644, "grad_norm": 0.13922007381916046, "learning_rate": 3.790390522001662e-05, "loss": 10.357, "step": 31 }, { "epoch": 0.014499320344358859, "grad_norm": 0.1661142110824585, "learning_rate": 3.4549150281252636e-05, "loss": 10.3528, "step": 32 }, { "epoch": 0.014952424105120073, "grad_norm": 0.17683999240398407, "learning_rate": 3.12696703292044e-05, "loss": 10.3551, "step": 33 }, { "epoch": 0.015405527865881287, "grad_norm": 0.15338218212127686, "learning_rate": 2.8081442660546125e-05, "loss": 10.3499, "step": 34 }, { "epoch": 0.0158586316266425, "grad_norm": 0.17775611579418182, "learning_rate": 2.500000000000001e-05, "loss": 10.351, "step": 35 }, { "epoch": 0.016311735387403714, "grad_norm": 0.17701967060565948, "learning_rate": 2.2040354826462668e-05, "loss": 10.3541, "step": 36 }, { "epoch": 0.01676483914816493, "grad_norm": 0.15589460730552673, "learning_rate": 1.9216926233717085e-05, "loss": 10.3524, "step": 37 }, { "epoch": 0.017217942908926143, "grad_norm": 0.14376643300056458, "learning_rate": 1.6543469682057106e-05, "loss": 10.3524, "step": 38 }, { "epoch": 0.017671046669687357, "grad_norm": 0.17480316758155823, "learning_rate": 1.4033009983067452e-05, "loss": 10.3501, "step": 39 }, { "epoch": 0.01812415043044857, "grad_norm": 0.17131978273391724, "learning_rate": 1.1697777844051105e-05, "loss": 10.3545, "step": 40 }, { "epoch": 0.018577254191209785, "grad_norm": 0.14923687279224396, "learning_rate": 9.549150281252633e-06, "loss": 10.3502, "step": 41 }, { "epoch": 0.019030357951971, "grad_norm": 0.1653790920972824, "learning_rate": 7.597595192178702e-06, "loss": 10.3535, "step": 42 }, { "epoch": 0.019483461712732214, "grad_norm": 0.14821237325668335, "learning_rate": 5.852620357053651e-06, "loss": 10.3546, "step": 43 }, { "epoch": 0.01993656547349343, "grad_norm": 0.1650238186120987, "learning_rate": 4.322727117869951e-06, "loss": 10.3489, "step": 44 }, { "epoch": 0.020389669234254643, "grad_norm": 0.13781306147575378, "learning_rate": 3.0153689607045845e-06, "loss": 10.3507, "step": 45 }, { "epoch": 0.02084277299501586, "grad_norm": 0.16543304920196533, "learning_rate": 1.9369152030840556e-06, "loss": 10.3499, "step": 46 }, { "epoch": 0.021295876755777075, "grad_norm": 0.15752767026424408, "learning_rate": 1.0926199633097157e-06, "loss": 10.3518, "step": 47 }, { "epoch": 0.02174898051653829, "grad_norm": 0.1764352172613144, "learning_rate": 4.865965629214819e-07, "loss": 10.3515, "step": 48 }, { "epoch": 0.022202084277299503, "grad_norm": 0.15777145326137543, "learning_rate": 1.2179748700879012e-07, "loss": 10.3515, "step": 49 }, { "epoch": 0.022655188038060717, "grad_norm": 0.1509462594985962, "learning_rate": 0.0, "loss": 10.35, "step": 50 }, { "epoch": 0.022655188038060717, "eval_loss": 10.35035228729248, "eval_runtime": 6.129, "eval_samples_per_second": 606.456, "eval_steps_per_second": 75.868, "step": 50 } ], "logging_steps": 1, "max_steps": 50, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 25, "stateful_callbacks": { "EarlyStoppingCallback": { "args": { "early_stopping_patience": 5, "early_stopping_threshold": 0.0 }, "attributes": { "early_stopping_patience_counter": 0 } }, "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 4099486777344.0, "train_batch_size": 8, "trial_name": null, "trial_params": null }