|
{ |
|
"best_metric": 0.7299006581306458, |
|
"best_model_checkpoint": "./your_output_dir6/checkpoint-1696", |
|
"epoch": 0.8008814733196915, |
|
"eval_steps": 212, |
|
"global_step": 1696, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 9.111469032188407e-06, |
|
"loss": 0.8491, |
|
"step": 106 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 6.44278153923165e-06, |
|
"loss": 0.7704, |
|
"step": 212 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"eval_loss": 0.799176037311554, |
|
"eval_runtime": 409.8822, |
|
"eval_samples_per_second": 79.008, |
|
"eval_steps_per_second": 4.938, |
|
"step": 212 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 5.2605090984469145e-06, |
|
"loss": 0.7357, |
|
"step": 318 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 4.555734516094203e-06, |
|
"loss": 0.7092, |
|
"step": 424 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"eval_loss": 0.768510639667511, |
|
"eval_runtime": 410.3912, |
|
"eval_samples_per_second": 78.91, |
|
"eval_steps_per_second": 4.932, |
|
"step": 424 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 4.0747728261714994e-06, |
|
"loss": 0.6798, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 3.7197416560053444e-06, |
|
"loss": 0.6657, |
|
"step": 636 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"eval_loss": 0.7549943327903748, |
|
"eval_runtime": 410.6508, |
|
"eval_samples_per_second": 78.86, |
|
"eval_steps_per_second": 4.929, |
|
"step": 636 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 3.4438115910909842e-06, |
|
"loss": 0.6525, |
|
"step": 742 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 3.221390769615825e-06, |
|
"loss": 0.6389, |
|
"step": 848 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"eval_loss": 0.7454479336738586, |
|
"eval_runtime": 410.7721, |
|
"eval_samples_per_second": 78.837, |
|
"eval_steps_per_second": 4.927, |
|
"step": 848 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 3.0371563440628015e-06, |
|
"loss": 0.6297, |
|
"step": 954 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 2.8812994971805396e-06, |
|
"loss": 0.6181, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"eval_loss": 0.7404582500457764, |
|
"eval_runtime": 411.5401, |
|
"eval_samples_per_second": 78.69, |
|
"eval_steps_per_second": 4.918, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 2.747211278973781e-06, |
|
"loss": 0.6017, |
|
"step": 1166 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 2.6302545492234573e-06, |
|
"loss": 0.5909, |
|
"step": 1272 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"eval_loss": 0.7366061210632324, |
|
"eval_runtime": 411.9849, |
|
"eval_samples_per_second": 78.605, |
|
"eval_steps_per_second": 4.913, |
|
"step": 1272 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 2.5270668300275037e-06, |
|
"loss": 0.5807, |
|
"step": 1378 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 2.4351425291892684e-06, |
|
"loss": 0.5714, |
|
"step": 1484 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"eval_loss": 0.7348645329475403, |
|
"eval_runtime": 412.3678, |
|
"eval_samples_per_second": 78.532, |
|
"eval_steps_per_second": 4.908, |
|
"step": 1484 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 2.352571188076687e-06, |
|
"loss": 0.5663, |
|
"step": 1590 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 2.2778672580471017e-06, |
|
"loss": 0.5649, |
|
"step": 1696 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"eval_loss": 0.7299006581306458, |
|
"eval_runtime": 412.693, |
|
"eval_samples_per_second": 78.47, |
|
"eval_steps_per_second": 4.904, |
|
"step": 1696 |
|
} |
|
], |
|
"logging_steps": 106, |
|
"max_steps": 2117, |
|
"num_train_epochs": 1, |
|
"save_steps": 212, |
|
"total_flos": 3.3047999005830676e+18, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|