|
{ |
|
"best_metric": 5.487072944641113, |
|
"best_model_checkpoint": "miner_id_24/checkpoint-25", |
|
"epoch": 0.01525320317266626, |
|
"eval_steps": 5, |
|
"global_step": 25, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0006101281269066504, |
|
"grad_norm": 3.2737698554992676, |
|
"learning_rate": 2e-05, |
|
"loss": 6.8164, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.0006101281269066504, |
|
"eval_loss": 6.713930606842041, |
|
"eval_runtime": 102.4095, |
|
"eval_samples_per_second": 6.747, |
|
"eval_steps_per_second": 3.379, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.0012202562538133007, |
|
"grad_norm": 2.322046995162964, |
|
"learning_rate": 4e-05, |
|
"loss": 6.6216, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.0018303843807199512, |
|
"grad_norm": 2.4447498321533203, |
|
"learning_rate": 6e-05, |
|
"loss": 6.1817, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.0024405125076266015, |
|
"grad_norm": 4.9170451164245605, |
|
"learning_rate": 8e-05, |
|
"loss": 6.615, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.003050640634533252, |
|
"grad_norm": 2.392526388168335, |
|
"learning_rate": 0.0001, |
|
"loss": 6.4283, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.003050640634533252, |
|
"eval_loss": 6.601726531982422, |
|
"eval_runtime": 100.8001, |
|
"eval_samples_per_second": 6.855, |
|
"eval_steps_per_second": 3.433, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.0036607687614399025, |
|
"grad_norm": 2.418440341949463, |
|
"learning_rate": 0.00012, |
|
"loss": 6.6246, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.004270896888346553, |
|
"grad_norm": 2.0582504272460938, |
|
"learning_rate": 0.00014, |
|
"loss": 6.1862, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.004881025015253203, |
|
"grad_norm": 1.9576926231384277, |
|
"learning_rate": 0.00016, |
|
"loss": 5.9144, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.005491153142159854, |
|
"grad_norm": 1.743924617767334, |
|
"learning_rate": 0.00018, |
|
"loss": 5.9392, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.006101281269066504, |
|
"grad_norm": 2.0612897872924805, |
|
"learning_rate": 0.0002, |
|
"loss": 6.0787, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.006101281269066504, |
|
"eval_loss": 6.148561954498291, |
|
"eval_runtime": 100.8382, |
|
"eval_samples_per_second": 6.853, |
|
"eval_steps_per_second": 3.431, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.006711409395973154, |
|
"grad_norm": 1.978797435760498, |
|
"learning_rate": 0.00019781476007338058, |
|
"loss": 6.0012, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.007321537522879805, |
|
"grad_norm": 3.436152458190918, |
|
"learning_rate": 0.0001913545457642601, |
|
"loss": 6.7344, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.007931665649786455, |
|
"grad_norm": 1.6734408140182495, |
|
"learning_rate": 0.00018090169943749476, |
|
"loss": 5.6312, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.008541793776693106, |
|
"grad_norm": 2.174579620361328, |
|
"learning_rate": 0.00016691306063588583, |
|
"loss": 5.7046, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.009151921903599756, |
|
"grad_norm": 2.0292463302612305, |
|
"learning_rate": 0.00015000000000000001, |
|
"loss": 5.7352, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.009151921903599756, |
|
"eval_loss": 5.779726028442383, |
|
"eval_runtime": 101.1831, |
|
"eval_samples_per_second": 6.829, |
|
"eval_steps_per_second": 3.42, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.009762050030506406, |
|
"grad_norm": 2.211259603500366, |
|
"learning_rate": 0.00013090169943749476, |
|
"loss": 5.5493, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.010372178157413058, |
|
"grad_norm": 2.099989652633667, |
|
"learning_rate": 0.00011045284632676536, |
|
"loss": 5.8209, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.010982306284319707, |
|
"grad_norm": 2.493408679962158, |
|
"learning_rate": 8.954715367323468e-05, |
|
"loss": 5.1595, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.011592434411226357, |
|
"grad_norm": 1.6317073106765747, |
|
"learning_rate": 6.909830056250527e-05, |
|
"loss": 5.3294, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 0.012202562538133009, |
|
"grad_norm": 1.6446095705032349, |
|
"learning_rate": 5.000000000000002e-05, |
|
"loss": 5.276, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.012202562538133009, |
|
"eval_loss": 5.523056507110596, |
|
"eval_runtime": 101.0601, |
|
"eval_samples_per_second": 6.838, |
|
"eval_steps_per_second": 3.424, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.012812690665039659, |
|
"grad_norm": 1.8376624584197998, |
|
"learning_rate": 3.308693936411421e-05, |
|
"loss": 5.4096, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.013422818791946308, |
|
"grad_norm": 1.71583890914917, |
|
"learning_rate": 1.9098300562505266e-05, |
|
"loss": 5.3737, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.014032946918852958, |
|
"grad_norm": 1.852288842201233, |
|
"learning_rate": 8.645454235739903e-06, |
|
"loss": 5.3239, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 0.01464307504575961, |
|
"grad_norm": 1.8837882280349731, |
|
"learning_rate": 2.1852399266194314e-06, |
|
"loss": 5.3845, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.01525320317266626, |
|
"grad_norm": 1.9514257907867432, |
|
"learning_rate": 0.0, |
|
"loss": 5.4464, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.01525320317266626, |
|
"eval_loss": 5.487072944641113, |
|
"eval_runtime": 100.9583, |
|
"eval_samples_per_second": 6.844, |
|
"eval_steps_per_second": 3.427, |
|
"step": 25 |
|
} |
|
], |
|
"logging_steps": 1, |
|
"max_steps": 25, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 10, |
|
"stateful_callbacks": { |
|
"EarlyStoppingCallback": { |
|
"args": { |
|
"early_stopping_patience": 2, |
|
"early_stopping_threshold": 0.0 |
|
}, |
|
"attributes": { |
|
"early_stopping_patience_counter": 0 |
|
} |
|
}, |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 191809506508800.0, |
|
"train_batch_size": 2, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|