|
{ |
|
"best_metric": 1.9613432884216309, |
|
"best_model_checkpoint": "./outputs/checkpoint-2100", |
|
"epoch": 2.9829545454545454, |
|
"eval_steps": 100, |
|
"global_step": 2100, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 0.0002, |
|
"loss": 2.5435, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"eval_loss": 2.2340614795684814, |
|
"eval_runtime": 54.1619, |
|
"eval_samples_per_second": 27.547, |
|
"eval_steps_per_second": 3.453, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 0.0002, |
|
"loss": 2.3658, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"eval_loss": 2.193166971206665, |
|
"eval_runtime": 51.9149, |
|
"eval_samples_per_second": 28.739, |
|
"eval_steps_per_second": 3.602, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 0.0002, |
|
"loss": 2.335, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"eval_loss": 2.1651222705841064, |
|
"eval_runtime": 51.9178, |
|
"eval_samples_per_second": 28.738, |
|
"eval_steps_per_second": 3.602, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 0.0002, |
|
"loss": 2.3016, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"eval_loss": 2.1437063217163086, |
|
"eval_runtime": 52.0427, |
|
"eval_samples_per_second": 28.669, |
|
"eval_steps_per_second": 3.593, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 0.0002, |
|
"loss": 2.2801, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"eval_loss": 2.1198127269744873, |
|
"eval_runtime": 51.8718, |
|
"eval_samples_per_second": 28.763, |
|
"eval_steps_per_second": 3.605, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 0.0002, |
|
"loss": 2.2632, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"eval_loss": 2.100426197052002, |
|
"eval_runtime": 51.8813, |
|
"eval_samples_per_second": 28.758, |
|
"eval_steps_per_second": 3.604, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 0.0002, |
|
"loss": 2.2421, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"eval_loss": 2.0882084369659424, |
|
"eval_runtime": 51.8914, |
|
"eval_samples_per_second": 28.752, |
|
"eval_steps_per_second": 3.604, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"learning_rate": 0.0002, |
|
"loss": 2.207, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"eval_loss": 2.07395076751709, |
|
"eval_runtime": 51.9153, |
|
"eval_samples_per_second": 28.739, |
|
"eval_steps_per_second": 3.602, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"learning_rate": 0.0002, |
|
"loss": 2.1805, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"eval_loss": 2.0585548877716064, |
|
"eval_runtime": 51.8541, |
|
"eval_samples_per_second": 28.773, |
|
"eval_steps_per_second": 3.606, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 1.42, |
|
"learning_rate": 0.0002, |
|
"loss": 2.1797, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 1.42, |
|
"eval_loss": 2.0468482971191406, |
|
"eval_runtime": 51.913, |
|
"eval_samples_per_second": 28.74, |
|
"eval_steps_per_second": 3.602, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 1.56, |
|
"learning_rate": 0.0002, |
|
"loss": 2.1872, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 1.56, |
|
"eval_loss": 2.037107467651367, |
|
"eval_runtime": 52.0873, |
|
"eval_samples_per_second": 28.644, |
|
"eval_steps_per_second": 3.59, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 1.7, |
|
"learning_rate": 0.0002, |
|
"loss": 2.1651, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 1.7, |
|
"eval_loss": 2.0301010608673096, |
|
"eval_runtime": 51.8485, |
|
"eval_samples_per_second": 28.776, |
|
"eval_steps_per_second": 3.607, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 1.85, |
|
"learning_rate": 0.0002, |
|
"loss": 2.1465, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 1.85, |
|
"eval_loss": 2.0145328044891357, |
|
"eval_runtime": 51.9711, |
|
"eval_samples_per_second": 28.708, |
|
"eval_steps_per_second": 3.598, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 1.99, |
|
"learning_rate": 0.0002, |
|
"loss": 2.1501, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 1.99, |
|
"eval_loss": 2.0102152824401855, |
|
"eval_runtime": 51.9656, |
|
"eval_samples_per_second": 28.711, |
|
"eval_steps_per_second": 3.599, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 2.13, |
|
"learning_rate": 0.0002, |
|
"loss": 2.0982, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 2.13, |
|
"eval_loss": 2.0040762424468994, |
|
"eval_runtime": 51.9149, |
|
"eval_samples_per_second": 28.739, |
|
"eval_steps_per_second": 3.602, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 2.27, |
|
"learning_rate": 0.0002, |
|
"loss": 2.1027, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 2.27, |
|
"eval_loss": 1.9931957721710205, |
|
"eval_runtime": 51.964, |
|
"eval_samples_per_second": 28.712, |
|
"eval_steps_per_second": 3.599, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 2.41, |
|
"learning_rate": 0.0002, |
|
"loss": 2.0865, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 2.41, |
|
"eval_loss": 1.989241123199463, |
|
"eval_runtime": 51.9483, |
|
"eval_samples_per_second": 28.721, |
|
"eval_steps_per_second": 3.6, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 2.56, |
|
"learning_rate": 0.0002, |
|
"loss": 2.0789, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 2.56, |
|
"eval_loss": 1.9806030988693237, |
|
"eval_runtime": 52.181, |
|
"eval_samples_per_second": 28.593, |
|
"eval_steps_per_second": 3.584, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 2.7, |
|
"learning_rate": 0.0002, |
|
"loss": 2.0924, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 2.7, |
|
"eval_loss": 1.9745317697525024, |
|
"eval_runtime": 52.0357, |
|
"eval_samples_per_second": 28.673, |
|
"eval_steps_per_second": 3.594, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 2.84, |
|
"learning_rate": 0.0002, |
|
"loss": 2.0839, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 2.84, |
|
"eval_loss": 1.9676258563995361, |
|
"eval_runtime": 52.0774, |
|
"eval_samples_per_second": 28.65, |
|
"eval_steps_per_second": 3.591, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 2.98, |
|
"learning_rate": 0.0002, |
|
"loss": 2.081, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 2.98, |
|
"eval_loss": 1.9613432884216309, |
|
"eval_runtime": 52.027, |
|
"eval_samples_per_second": 28.677, |
|
"eval_steps_per_second": 3.594, |
|
"step": 2100 |
|
} |
|
], |
|
"logging_steps": 100, |
|
"max_steps": 2112, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 3, |
|
"save_steps": 100, |
|
"total_flos": 6.092531163629568e+16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|