|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.14316392269148176, |
|
"eval_steps": 50, |
|
"global_step": 200, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0007158196134574087, |
|
"eval_loss": 0.7471321821212769, |
|
"eval_runtime": 35.0025, |
|
"eval_samples_per_second": 16.827, |
|
"eval_steps_per_second": 8.428, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.0071581961345740875, |
|
"grad_norm": 0.3041437268257141, |
|
"learning_rate": 0.0002, |
|
"loss": 0.6256, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.014316392269148175, |
|
"grad_norm": 0.21548660099506378, |
|
"learning_rate": 0.0002, |
|
"loss": 0.4944, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.021474588403722263, |
|
"grad_norm": 0.302517831325531, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5196, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.02863278453829635, |
|
"grad_norm": 0.2508035898208618, |
|
"learning_rate": 0.0002, |
|
"loss": 0.4403, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.03579098067287044, |
|
"grad_norm": 0.20165234804153442, |
|
"learning_rate": 0.0002, |
|
"loss": 0.4687, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.03579098067287044, |
|
"eval_loss": 0.4433478116989136, |
|
"eval_runtime": 33.6585, |
|
"eval_samples_per_second": 17.499, |
|
"eval_steps_per_second": 8.765, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.04294917680744453, |
|
"grad_norm": 0.19860732555389404, |
|
"learning_rate": 0.0002, |
|
"loss": 0.4482, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.05010737294201861, |
|
"grad_norm": 0.2684027850627899, |
|
"learning_rate": 0.0002, |
|
"loss": 0.4554, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.0572655690765927, |
|
"grad_norm": 0.2177385836839676, |
|
"learning_rate": 0.0002, |
|
"loss": 0.432, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.06442376521116679, |
|
"grad_norm": 0.2141849547624588, |
|
"learning_rate": 0.0002, |
|
"loss": 0.4056, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.07158196134574088, |
|
"grad_norm": 0.22702062129974365, |
|
"learning_rate": 0.0002, |
|
"loss": 0.4283, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.07158196134574088, |
|
"eval_loss": 0.4245603382587433, |
|
"eval_runtime": 33.7143, |
|
"eval_samples_per_second": 17.47, |
|
"eval_steps_per_second": 8.75, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.07874015748031496, |
|
"grad_norm": 0.23127739131450653, |
|
"learning_rate": 0.0002, |
|
"loss": 0.4524, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.08589835361488905, |
|
"grad_norm": 0.231285959482193, |
|
"learning_rate": 0.0002, |
|
"loss": 0.4068, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.09305654974946313, |
|
"grad_norm": 0.22599922120571136, |
|
"learning_rate": 0.0002, |
|
"loss": 0.4394, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.10021474588403723, |
|
"grad_norm": 0.22092868387699127, |
|
"learning_rate": 0.0002, |
|
"loss": 0.4272, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.1073729420186113, |
|
"grad_norm": 0.2594935894012451, |
|
"learning_rate": 0.0002, |
|
"loss": 0.4254, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.1073729420186113, |
|
"eval_loss": 0.4146997332572937, |
|
"eval_runtime": 33.7342, |
|
"eval_samples_per_second": 17.46, |
|
"eval_steps_per_second": 8.745, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.1145311381531854, |
|
"grad_norm": 0.25014057755470276, |
|
"learning_rate": 0.0002, |
|
"loss": 0.4102, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.12168933428775948, |
|
"grad_norm": 0.24548886716365814, |
|
"learning_rate": 0.0002, |
|
"loss": 0.4263, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.12884753042233357, |
|
"grad_norm": 0.237931028008461, |
|
"learning_rate": 0.0002, |
|
"loss": 0.4323, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.13600572655690765, |
|
"grad_norm": 0.21747586131095886, |
|
"learning_rate": 0.0002, |
|
"loss": 0.4161, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.14316392269148176, |
|
"grad_norm": 0.24018722772598267, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3944, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.14316392269148176, |
|
"eval_loss": 0.40698060393333435, |
|
"eval_runtime": 33.7394, |
|
"eval_samples_per_second": 17.457, |
|
"eval_steps_per_second": 8.743, |
|
"step": 200 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 200, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 50, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 6.5912916934656e+16, |
|
"train_batch_size": 2, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|