|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.009028733945782453, |
|
"eval_steps": 50, |
|
"global_step": 200, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 4.5143669728912266e-05, |
|
"eval_loss": 2.5635130405426025, |
|
"eval_runtime": 298.4496, |
|
"eval_samples_per_second": 31.252, |
|
"eval_steps_per_second": 7.814, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.00045143669728912265, |
|
"grad_norm": 0.6829887628555298, |
|
"learning_rate": 0.0002, |
|
"loss": 2.3672, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.0009028733945782453, |
|
"grad_norm": 0.6255219578742981, |
|
"learning_rate": 0.0002, |
|
"loss": 2.174, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.001354310091867368, |
|
"grad_norm": 0.6357229948043823, |
|
"learning_rate": 0.0002, |
|
"loss": 2.1169, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.0018057467891564906, |
|
"grad_norm": 0.6617687940597534, |
|
"learning_rate": 0.0002, |
|
"loss": 2.2879, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.0022571834864456132, |
|
"grad_norm": 0.6272353529930115, |
|
"learning_rate": 0.0002, |
|
"loss": 2.219, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.0022571834864456132, |
|
"eval_loss": 2.1563453674316406, |
|
"eval_runtime": 297.2114, |
|
"eval_samples_per_second": 31.382, |
|
"eval_steps_per_second": 7.846, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.002708620183734736, |
|
"grad_norm": 0.6150534749031067, |
|
"learning_rate": 0.0002, |
|
"loss": 2.0668, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.0031600568810238585, |
|
"grad_norm": 0.629759669303894, |
|
"learning_rate": 0.0002, |
|
"loss": 2.1185, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.003611493578312981, |
|
"grad_norm": 0.5716665983200073, |
|
"learning_rate": 0.0002, |
|
"loss": 2.1695, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.004062930275602104, |
|
"grad_norm": 0.8046770691871643, |
|
"learning_rate": 0.0002, |
|
"loss": 2.002, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.0045143669728912265, |
|
"grad_norm": 0.5744296312332153, |
|
"learning_rate": 0.0002, |
|
"loss": 2.1034, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.0045143669728912265, |
|
"eval_loss": 2.102687120437622, |
|
"eval_runtime": 296.9029, |
|
"eval_samples_per_second": 31.414, |
|
"eval_steps_per_second": 7.854, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.004965803670180349, |
|
"grad_norm": 0.6123510599136353, |
|
"learning_rate": 0.0002, |
|
"loss": 2.0783, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.005417240367469472, |
|
"grad_norm": 0.6725351214408875, |
|
"learning_rate": 0.0002, |
|
"loss": 2.0282, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.005868677064758594, |
|
"grad_norm": 0.6317930221557617, |
|
"learning_rate": 0.0002, |
|
"loss": 2.1056, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.006320113762047717, |
|
"grad_norm": 0.6830048561096191, |
|
"learning_rate": 0.0002, |
|
"loss": 2.063, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.00677155045933684, |
|
"grad_norm": 0.6954776644706726, |
|
"learning_rate": 0.0002, |
|
"loss": 1.9812, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.00677155045933684, |
|
"eval_loss": 2.0730745792388916, |
|
"eval_runtime": 296.9969, |
|
"eval_samples_per_second": 31.404, |
|
"eval_steps_per_second": 7.852, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.007222987156625962, |
|
"grad_norm": 0.6882342100143433, |
|
"learning_rate": 0.0002, |
|
"loss": 2.0299, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.007674423853915085, |
|
"grad_norm": 0.6809920072555542, |
|
"learning_rate": 0.0002, |
|
"loss": 2.1699, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.008125860551204208, |
|
"grad_norm": 0.6971638798713684, |
|
"learning_rate": 0.0002, |
|
"loss": 1.9678, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.00857729724849333, |
|
"grad_norm": 0.6451826691627502, |
|
"learning_rate": 0.0002, |
|
"loss": 2.0033, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.009028733945782453, |
|
"grad_norm": 0.739595890045166, |
|
"learning_rate": 0.0002, |
|
"loss": 2.037, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.009028733945782453, |
|
"eval_loss": 2.0487308502197266, |
|
"eval_runtime": 297.0738, |
|
"eval_samples_per_second": 31.396, |
|
"eval_steps_per_second": 7.85, |
|
"step": 200 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 200, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 50, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 3.890390469967872e+16, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|