|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.010260970687827069, |
|
"eval_steps": 500, |
|
"global_step": 150, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0006840647125218045, |
|
"grad_norm": 0.6777583360671997, |
|
"learning_rate": 0.0001931034482758621, |
|
"loss": 2.5631, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.001368129425043609, |
|
"grad_norm": 0.529003918170929, |
|
"learning_rate": 0.0001793103448275862, |
|
"loss": 2.2202, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.0020521941375654137, |
|
"grad_norm": 0.6383516192436218, |
|
"learning_rate": 0.00016551724137931035, |
|
"loss": 2.2541, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.002736258850087218, |
|
"grad_norm": 0.547764778137207, |
|
"learning_rate": 0.00015172413793103449, |
|
"loss": 2.1831, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.003420323562609023, |
|
"grad_norm": NaN, |
|
"learning_rate": 0.0001393103448275862, |
|
"loss": 2.2025, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.0041043882751308275, |
|
"grad_norm": 0.6107162237167358, |
|
"learning_rate": 0.00012551724137931035, |
|
"loss": 2.1387, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.004788452987652632, |
|
"grad_norm": 0.5852159261703491, |
|
"learning_rate": 0.00011172413793103449, |
|
"loss": 2.1362, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.005472517700174436, |
|
"grad_norm": 0.587351381778717, |
|
"learning_rate": 9.793103448275862e-05, |
|
"loss": 2.1783, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.006156582412696241, |
|
"grad_norm": 0.4379996657371521, |
|
"learning_rate": 8.413793103448277e-05, |
|
"loss": 2.0619, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.006840647125218046, |
|
"grad_norm": 0.6348068714141846, |
|
"learning_rate": 7.03448275862069e-05, |
|
"loss": 2.1559, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.0075247118377398505, |
|
"grad_norm": 0.5908586382865906, |
|
"learning_rate": 5.6551724137931037e-05, |
|
"loss": 2.0957, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.008208776550261655, |
|
"grad_norm": 0.6663537621498108, |
|
"learning_rate": 4.275862068965518e-05, |
|
"loss": 2.2033, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.008892841262783458, |
|
"grad_norm": 0.5192745923995972, |
|
"learning_rate": 2.8965517241379313e-05, |
|
"loss": 2.0376, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.009576905975305264, |
|
"grad_norm": 0.6954190731048584, |
|
"learning_rate": 1.5172413793103448e-05, |
|
"loss": 2.108, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.010260970687827069, |
|
"grad_norm": 0.6196115016937256, |
|
"learning_rate": 1.3793103448275862e-06, |
|
"loss": 1.9693, |
|
"step": 150 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 150, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 5.164445885681664e+16, |
|
"train_batch_size": 2, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|