|
{ |
|
"best_metric": 1.1256643533706665, |
|
"best_model_checkpoint": "./outputs/checkpoint-1200", |
|
"epoch": 1.6, |
|
"eval_steps": 100, |
|
"global_step": 1200, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 0.0002, |
|
"loss": 1.1585, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"eval_loss": 1.2452774047851562, |
|
"eval_runtime": 180.3075, |
|
"eval_samples_per_second": 10.704, |
|
"eval_steps_per_second": 1.342, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 0.0002, |
|
"loss": 1.0217, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"eval_loss": 1.2180681228637695, |
|
"eval_runtime": 177.5054, |
|
"eval_samples_per_second": 10.873, |
|
"eval_steps_per_second": 1.363, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 0.0002, |
|
"loss": 1.0048, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"eval_loss": 1.203295350074768, |
|
"eval_runtime": 177.4062, |
|
"eval_samples_per_second": 10.879, |
|
"eval_steps_per_second": 1.364, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 0.0002, |
|
"loss": 0.992, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"eval_loss": 1.190936803817749, |
|
"eval_runtime": 177.5627, |
|
"eval_samples_per_second": 10.869, |
|
"eval_steps_per_second": 1.363, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 0.0002, |
|
"loss": 0.9767, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"eval_loss": 1.1756863594055176, |
|
"eval_runtime": 177.0747, |
|
"eval_samples_per_second": 10.899, |
|
"eval_steps_per_second": 1.367, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 0.0002, |
|
"loss": 0.9715, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"eval_loss": 1.1637059450149536, |
|
"eval_runtime": 177.4052, |
|
"eval_samples_per_second": 10.879, |
|
"eval_steps_per_second": 1.364, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 0.0002, |
|
"loss": 0.9653, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"eval_loss": 1.1604684591293335, |
|
"eval_runtime": 177.5251, |
|
"eval_samples_per_second": 10.872, |
|
"eval_steps_per_second": 1.363, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"learning_rate": 0.0002, |
|
"loss": 0.9517, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"eval_loss": 1.1489245891571045, |
|
"eval_runtime": 180.9559, |
|
"eval_samples_per_second": 10.666, |
|
"eval_steps_per_second": 1.337, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"learning_rate": 0.0002, |
|
"loss": 0.9397, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"eval_loss": 1.1430203914642334, |
|
"eval_runtime": 178.7234, |
|
"eval_samples_per_second": 10.799, |
|
"eval_steps_per_second": 1.354, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 1.33, |
|
"learning_rate": 0.0002, |
|
"loss": 0.9313, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 1.33, |
|
"eval_loss": 1.1354840993881226, |
|
"eval_runtime": 177.5002, |
|
"eval_samples_per_second": 10.873, |
|
"eval_steps_per_second": 1.363, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 1.47, |
|
"learning_rate": 0.0002, |
|
"loss": 0.9393, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 1.47, |
|
"eval_loss": 1.1310428380966187, |
|
"eval_runtime": 177.407, |
|
"eval_samples_per_second": 10.879, |
|
"eval_steps_per_second": 1.364, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"learning_rate": 0.0002, |
|
"loss": 0.9277, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"eval_loss": 1.1256643533706665, |
|
"eval_runtime": 177.3878, |
|
"eval_samples_per_second": 10.88, |
|
"eval_steps_per_second": 1.364, |
|
"step": 1200 |
|
} |
|
], |
|
"logging_steps": 100, |
|
"max_steps": 2250, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 3, |
|
"save_steps": 100, |
|
"total_flos": 8.96699552329728e+16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|