|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.0008861438122792948, |
|
"eval_steps": 10, |
|
"global_step": 50, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.7722876245585897e-05, |
|
"eval_loss": 1.7361408472061157, |
|
"eval_runtime": 1985.431, |
|
"eval_samples_per_second": 11.966, |
|
"eval_steps_per_second": 5.983, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 8.861438122792948e-05, |
|
"grad_norm": 0.8715835809707642, |
|
"learning_rate": 5e-05, |
|
"loss": 1.6216, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.00017722876245585895, |
|
"grad_norm": 1.0856618881225586, |
|
"learning_rate": 0.0001, |
|
"loss": 1.3852, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.00017722876245585895, |
|
"eval_loss": 1.437419056892395, |
|
"eval_runtime": 2011.3717, |
|
"eval_samples_per_second": 11.812, |
|
"eval_steps_per_second": 5.906, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.00026584314368378843, |
|
"grad_norm": 1.6904109716415405, |
|
"learning_rate": 9.619397662556435e-05, |
|
"loss": 1.2922, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.0003544575249117179, |
|
"grad_norm": 1.2493869066238403, |
|
"learning_rate": 8.535533905932738e-05, |
|
"loss": 0.8623, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.0003544575249117179, |
|
"eval_loss": 0.7036842107772827, |
|
"eval_runtime": 2008.721, |
|
"eval_samples_per_second": 11.827, |
|
"eval_steps_per_second": 5.914, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.0004430719061396474, |
|
"grad_norm": 1.8683321475982666, |
|
"learning_rate": 6.91341716182545e-05, |
|
"loss": 0.7287, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.0005316862873675769, |
|
"grad_norm": 1.5432759523391724, |
|
"learning_rate": 5e-05, |
|
"loss": 0.6118, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.0005316862873675769, |
|
"eval_loss": 0.5978233218193054, |
|
"eval_runtime": 2010.9302, |
|
"eval_samples_per_second": 11.814, |
|
"eval_steps_per_second": 5.907, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.0006203006685955064, |
|
"grad_norm": 1.030396819114685, |
|
"learning_rate": 3.086582838174551e-05, |
|
"loss": 0.6266, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.0007089150498234358, |
|
"grad_norm": 0.6566690802574158, |
|
"learning_rate": 1.4644660940672627e-05, |
|
"loss": 0.5257, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.0007089150498234358, |
|
"eval_loss": 0.5642721056938171, |
|
"eval_runtime": 2008.0741, |
|
"eval_samples_per_second": 11.831, |
|
"eval_steps_per_second": 5.916, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.0007975294310513653, |
|
"grad_norm": 0.8434397578239441, |
|
"learning_rate": 3.8060233744356633e-06, |
|
"loss": 0.586, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.0008861438122792948, |
|
"grad_norm": 1.120571255683899, |
|
"learning_rate": 0.0, |
|
"loss": 0.5317, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.0008861438122792948, |
|
"eval_loss": 0.5591886043548584, |
|
"eval_runtime": 2006.7277, |
|
"eval_samples_per_second": 11.839, |
|
"eval_steps_per_second": 5.92, |
|
"step": 50 |
|
} |
|
], |
|
"logging_steps": 5, |
|
"max_steps": 50, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 13, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 8980634932346880.0, |
|
"train_batch_size": 2, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|