{ "best_metric": 1.9286161661148071, "best_model_checkpoint": "miner_id_24/checkpoint-25", "epoch": 0.0034874799469903047, "eval_steps": 5, "global_step": 25, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0001394991978796122, "grad_norm": 5.412474155426025, "learning_rate": 2.0000000000000003e-06, "loss": 7.833, "step": 1 }, { "epoch": 0.0001394991978796122, "eval_loss": 1.9461240768432617, "eval_runtime": 571.8803, "eval_samples_per_second": 5.279, "eval_steps_per_second": 2.64, "step": 1 }, { "epoch": 0.0002789983957592244, "grad_norm": 3.4513301849365234, "learning_rate": 4.000000000000001e-06, "loss": 8.2523, "step": 2 }, { "epoch": 0.0004184975936388366, "grad_norm": 4.496656894683838, "learning_rate": 6e-06, "loss": 7.7379, "step": 3 }, { "epoch": 0.0005579967915184488, "grad_norm": 3.8950307369232178, "learning_rate": 8.000000000000001e-06, "loss": 7.7308, "step": 4 }, { "epoch": 0.000697495989398061, "grad_norm": 5.047878265380859, "learning_rate": 1e-05, "loss": 7.7515, "step": 5 }, { "epoch": 0.000697495989398061, "eval_loss": 1.9448482990264893, "eval_runtime": 568.2175, "eval_samples_per_second": 5.313, "eval_steps_per_second": 2.657, "step": 5 }, { "epoch": 0.0008369951872776732, "grad_norm": 4.40566349029541, "learning_rate": 1.2e-05, "loss": 8.3306, "step": 6 }, { "epoch": 0.0009764943851572854, "grad_norm": 4.030971050262451, "learning_rate": 1.4e-05, "loss": 8.5676, "step": 7 }, { "epoch": 0.0011159935830368976, "grad_norm": 3.6297662258148193, "learning_rate": 1.6000000000000003e-05, "loss": 8.8764, "step": 8 }, { "epoch": 0.0012554927809165098, "grad_norm": 3.5799789428710938, "learning_rate": 1.8e-05, "loss": 7.0126, "step": 9 }, { "epoch": 0.001394991978796122, "grad_norm": 3.509881019592285, "learning_rate": 2e-05, "loss": 6.605, "step": 10 }, { "epoch": 0.001394991978796122, "eval_loss": 1.9371235370635986, "eval_runtime": 571.2429, "eval_samples_per_second": 5.285, "eval_steps_per_second": 2.643, "step": 10 }, { "epoch": 0.0015344911766757342, "grad_norm": 3.6284492015838623, "learning_rate": 1.9781476007338058e-05, "loss": 6.8203, "step": 11 }, { "epoch": 0.0016739903745553464, "grad_norm": 4.006571292877197, "learning_rate": 1.913545457642601e-05, "loss": 7.1611, "step": 12 }, { "epoch": 0.0018134895724349585, "grad_norm": 3.692206382751465, "learning_rate": 1.8090169943749477e-05, "loss": 6.6696, "step": 13 }, { "epoch": 0.0019529887703145707, "grad_norm": 3.8917338848114014, "learning_rate": 1.6691306063588583e-05, "loss": 6.5384, "step": 14 }, { "epoch": 0.002092487968194183, "grad_norm": 3.3947360515594482, "learning_rate": 1.5000000000000002e-05, "loss": 8.0743, "step": 15 }, { "epoch": 0.002092487968194183, "eval_loss": 1.9318677186965942, "eval_runtime": 572.3091, "eval_samples_per_second": 5.275, "eval_steps_per_second": 2.638, "step": 15 }, { "epoch": 0.0022319871660737953, "grad_norm": 3.9302000999450684, "learning_rate": 1.3090169943749475e-05, "loss": 7.5725, "step": 16 }, { "epoch": 0.0023714863639534072, "grad_norm": 3.275446891784668, "learning_rate": 1.1045284632676535e-05, "loss": 6.8308, "step": 17 }, { "epoch": 0.0025109855618330196, "grad_norm": 3.4003710746765137, "learning_rate": 8.954715367323468e-06, "loss": 6.9789, "step": 18 }, { "epoch": 0.0026504847597126316, "grad_norm": 3.6268935203552246, "learning_rate": 6.909830056250527e-06, "loss": 8.397, "step": 19 }, { "epoch": 0.002789983957592244, "grad_norm": 3.25662899017334, "learning_rate": 5.000000000000003e-06, "loss": 7.659, "step": 20 }, { "epoch": 0.002789983957592244, "eval_loss": 1.9291880130767822, "eval_runtime": 565.3468, "eval_samples_per_second": 5.34, "eval_steps_per_second": 2.671, "step": 20 }, { "epoch": 0.002929483155471856, "grad_norm": 4.072402477264404, "learning_rate": 3.308693936411421e-06, "loss": 6.9555, "step": 21 }, { "epoch": 0.0030689823533514684, "grad_norm": 3.289466142654419, "learning_rate": 1.9098300562505266e-06, "loss": 7.8888, "step": 22 }, { "epoch": 0.0032084815512310803, "grad_norm": 3.7418341636657715, "learning_rate": 8.645454235739903e-07, "loss": 7.4557, "step": 23 }, { "epoch": 0.0033479807491106927, "grad_norm": 3.3926737308502197, "learning_rate": 2.1852399266194312e-07, "loss": 8.2331, "step": 24 }, { "epoch": 0.0034874799469903047, "grad_norm": 3.648900270462036, "learning_rate": 0.0, "loss": 7.5158, "step": 25 }, { "epoch": 0.0034874799469903047, "eval_loss": 1.9286161661148071, "eval_runtime": 562.0679, "eval_samples_per_second": 5.371, "eval_steps_per_second": 2.687, "step": 25 } ], "logging_steps": 1, "max_steps": 25, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 10, "stateful_callbacks": { "EarlyStoppingCallback": { "args": { "early_stopping_patience": 2, "early_stopping_threshold": 0.0 }, "attributes": { "early_stopping_patience_counter": 0 } }, "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 8456631717199872.0, "train_batch_size": 2, "trial_name": null, "trial_params": null }