|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 10.416666666666666, |
|
"eval_steps": 500, |
|
"global_step": 3000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.87, |
|
"grad_norm": 4.0434980392456055, |
|
"learning_rate": 2.2963272120200336e-05, |
|
"loss": 0.4992, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 1.74, |
|
"grad_norm": 0.8384565711021423, |
|
"learning_rate": 2.087646076794658e-05, |
|
"loss": 0.0871, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 1.74, |
|
"eval_loss": 0.06118601933121681, |
|
"eval_runtime": 1.5053, |
|
"eval_samples_per_second": 29.231, |
|
"eval_steps_per_second": 3.986, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 2.6, |
|
"grad_norm": 3.239577054977417, |
|
"learning_rate": 1.878964941569282e-05, |
|
"loss": 0.0646, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 3.47, |
|
"grad_norm": 1.6355056762695312, |
|
"learning_rate": 1.6702838063439067e-05, |
|
"loss": 0.0531, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 3.47, |
|
"eval_loss": 0.04738747701048851, |
|
"eval_runtime": 1.5005, |
|
"eval_samples_per_second": 29.323, |
|
"eval_steps_per_second": 3.999, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 4.34, |
|
"grad_norm": 1.0931097269058228, |
|
"learning_rate": 1.461602671118531e-05, |
|
"loss": 0.0524, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 5.21, |
|
"grad_norm": 0.6507939696311951, |
|
"learning_rate": 1.2529215358931554e-05, |
|
"loss": 0.0493, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 5.21, |
|
"eval_loss": 0.04496830329298973, |
|
"eval_runtime": 1.5011, |
|
"eval_samples_per_second": 29.312, |
|
"eval_steps_per_second": 3.997, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 6.08, |
|
"grad_norm": 0.834311842918396, |
|
"learning_rate": 1.0442404006677797e-05, |
|
"loss": 0.0496, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 6.94, |
|
"grad_norm": 1.1006280183792114, |
|
"learning_rate": 8.355592654424042e-06, |
|
"loss": 0.0484, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 6.94, |
|
"eval_loss": 0.04376932606101036, |
|
"eval_runtime": 1.5008, |
|
"eval_samples_per_second": 29.318, |
|
"eval_steps_per_second": 3.998, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 7.81, |
|
"grad_norm": 0.6833261251449585, |
|
"learning_rate": 6.268781302170284e-06, |
|
"loss": 0.049, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 8.68, |
|
"grad_norm": 0.6349731087684631, |
|
"learning_rate": 4.181969949916528e-06, |
|
"loss": 0.0483, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 8.68, |
|
"eval_loss": 0.04366318881511688, |
|
"eval_runtime": 1.5018, |
|
"eval_samples_per_second": 29.298, |
|
"eval_steps_per_second": 3.995, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 9.55, |
|
"grad_norm": 0.7335835695266724, |
|
"learning_rate": 2.0951585976627714e-06, |
|
"loss": 0.0461, |
|
"step": 2750 |
|
}, |
|
{ |
|
"epoch": 10.42, |
|
"grad_norm": 0.8647783398628235, |
|
"learning_rate": 8.347245409015025e-09, |
|
"loss": 0.048, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 10.42, |
|
"eval_loss": 0.04346328228712082, |
|
"eval_runtime": 1.4995, |
|
"eval_samples_per_second": 29.342, |
|
"eval_steps_per_second": 4.001, |
|
"step": 3000 |
|
} |
|
], |
|
"logging_steps": 250, |
|
"max_steps": 3000, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 11, |
|
"save_steps": 500, |
|
"total_flos": 6.4761311232e+16, |
|
"train_batch_size": 2, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|