|
{ |
|
"best_metric": 0.9278258085250854, |
|
"best_model_checkpoint": "./outputs/checkpoint-2200", |
|
"epoch": 2.9333333333333336, |
|
"eval_steps": 100, |
|
"global_step": 2200, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 0.0002, |
|
"loss": 0.9997, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"eval_loss": 1.0932432413101196, |
|
"eval_runtime": 721.0711, |
|
"eval_samples_per_second": 2.677, |
|
"eval_steps_per_second": 0.336, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 0.0002, |
|
"loss": 0.897, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"eval_loss": 1.0685845613479614, |
|
"eval_runtime": 721.8083, |
|
"eval_samples_per_second": 2.674, |
|
"eval_steps_per_second": 0.335, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 0.0002, |
|
"loss": 0.8812, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"eval_loss": 1.0530365705490112, |
|
"eval_runtime": 727.1346, |
|
"eval_samples_per_second": 2.654, |
|
"eval_steps_per_second": 0.333, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 0.0002, |
|
"loss": 0.8694, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"eval_loss": 1.0397064685821533, |
|
"eval_runtime": 725.2926, |
|
"eval_samples_per_second": 2.661, |
|
"eval_steps_per_second": 0.334, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 0.0002, |
|
"loss": 0.8549, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"eval_loss": 1.02787184715271, |
|
"eval_runtime": 726.5869, |
|
"eval_samples_per_second": 2.656, |
|
"eval_steps_per_second": 0.333, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 0.0002, |
|
"loss": 0.8498, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"eval_loss": 1.0169517993927002, |
|
"eval_runtime": 729.2685, |
|
"eval_samples_per_second": 2.646, |
|
"eval_steps_per_second": 0.332, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 0.0002, |
|
"loss": 0.8448, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"eval_loss": 1.011763572692871, |
|
"eval_runtime": 729.6091, |
|
"eval_samples_per_second": 2.645, |
|
"eval_steps_per_second": 0.332, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"learning_rate": 0.0002, |
|
"loss": 0.8276, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"eval_loss": 1.0014097690582275, |
|
"eval_runtime": 726.8878, |
|
"eval_samples_per_second": 2.655, |
|
"eval_steps_per_second": 0.333, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"learning_rate": 0.0002, |
|
"loss": 0.814, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"eval_loss": 0.9944414496421814, |
|
"eval_runtime": 729.3383, |
|
"eval_samples_per_second": 2.646, |
|
"eval_steps_per_second": 0.332, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 1.33, |
|
"learning_rate": 0.0002, |
|
"loss": 0.8088, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 1.33, |
|
"eval_loss": 0.986289381980896, |
|
"eval_runtime": 729.5573, |
|
"eval_samples_per_second": 2.645, |
|
"eval_steps_per_second": 0.332, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 1.47, |
|
"learning_rate": 0.0002, |
|
"loss": 0.8149, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 1.47, |
|
"eval_loss": 0.9808956980705261, |
|
"eval_runtime": 729.1444, |
|
"eval_samples_per_second": 2.647, |
|
"eval_steps_per_second": 0.332, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"learning_rate": 0.0002, |
|
"loss": 0.805, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"eval_loss": 0.9745151996612549, |
|
"eval_runtime": 728.2022, |
|
"eval_samples_per_second": 2.65, |
|
"eval_steps_per_second": 0.332, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 1.73, |
|
"learning_rate": 0.0002, |
|
"loss": 0.7946, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 1.73, |
|
"eval_loss": 0.9681212902069092, |
|
"eval_runtime": 729.3723, |
|
"eval_samples_per_second": 2.646, |
|
"eval_steps_per_second": 0.332, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 1.87, |
|
"learning_rate": 0.0002, |
|
"loss": 0.7937, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 1.87, |
|
"eval_loss": 0.9620557427406311, |
|
"eval_runtime": 727.4393, |
|
"eval_samples_per_second": 2.653, |
|
"eval_steps_per_second": 0.333, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"learning_rate": 0.0002, |
|
"loss": 0.7863, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_loss": 0.9571696519851685, |
|
"eval_runtime": 726.583, |
|
"eval_samples_per_second": 2.656, |
|
"eval_steps_per_second": 0.333, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 2.13, |
|
"learning_rate": 0.0002, |
|
"loss": 0.766, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 2.13, |
|
"eval_loss": 0.9550056457519531, |
|
"eval_runtime": 729.0402, |
|
"eval_samples_per_second": 2.647, |
|
"eval_steps_per_second": 0.332, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 2.27, |
|
"learning_rate": 0.0002, |
|
"loss": 0.7569, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 2.27, |
|
"eval_loss": 0.9508923292160034, |
|
"eval_runtime": 724.9845, |
|
"eval_samples_per_second": 2.662, |
|
"eval_steps_per_second": 0.334, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 2.4, |
|
"learning_rate": 0.0002, |
|
"loss": 0.7573, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 2.4, |
|
"eval_loss": 0.9467226266860962, |
|
"eval_runtime": 729.7266, |
|
"eval_samples_per_second": 2.645, |
|
"eval_steps_per_second": 0.332, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 2.53, |
|
"learning_rate": 0.0002, |
|
"loss": 0.7702, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 2.53, |
|
"eval_loss": 0.9409560561180115, |
|
"eval_runtime": 727.6024, |
|
"eval_samples_per_second": 2.653, |
|
"eval_steps_per_second": 0.333, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 2.67, |
|
"learning_rate": 0.0002, |
|
"loss": 0.7577, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 2.67, |
|
"eval_loss": 0.9364367723464966, |
|
"eval_runtime": 733.1744, |
|
"eval_samples_per_second": 2.632, |
|
"eval_steps_per_second": 0.33, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 2.8, |
|
"learning_rate": 0.0002, |
|
"loss": 0.7525, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 2.8, |
|
"eval_loss": 0.9320885539054871, |
|
"eval_runtime": 728.1051, |
|
"eval_samples_per_second": 2.651, |
|
"eval_steps_per_second": 0.332, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 2.93, |
|
"learning_rate": 0.0002, |
|
"loss": 0.7485, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 2.93, |
|
"eval_loss": 0.9278258085250854, |
|
"eval_runtime": 857.14, |
|
"eval_samples_per_second": 2.252, |
|
"eval_steps_per_second": 0.282, |
|
"step": 2200 |
|
} |
|
], |
|
"logging_steps": 100, |
|
"max_steps": 2250, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 3, |
|
"save_steps": 100, |
|
"total_flos": 5.6988624815579136e+17, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|