|
{ |
|
"best_metric": 0.9637255072593689, |
|
"best_model_checkpoint": "./outputs/checkpoint-2200", |
|
"epoch": 2.9333333333333336, |
|
"eval_steps": 100, |
|
"global_step": 2200, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 0.0002, |
|
"loss": 1.0609, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"eval_loss": 1.075240969657898, |
|
"eval_runtime": 354.5496, |
|
"eval_samples_per_second": 5.444, |
|
"eval_steps_per_second": 0.683, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 0.0002, |
|
"loss": 0.9179, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"eval_loss": 1.0535345077514648, |
|
"eval_runtime": 353.5007, |
|
"eval_samples_per_second": 5.46, |
|
"eval_steps_per_second": 0.685, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 0.0002, |
|
"loss": 0.9039, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"eval_loss": 1.0421228408813477, |
|
"eval_runtime": 353.3408, |
|
"eval_samples_per_second": 5.462, |
|
"eval_steps_per_second": 0.685, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 0.0002, |
|
"loss": 0.8945, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"eval_loss": 1.033984661102295, |
|
"eval_runtime": 353.6405, |
|
"eval_samples_per_second": 5.458, |
|
"eval_steps_per_second": 0.684, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 0.0002, |
|
"loss": 0.8815, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"eval_loss": 1.0270758867263794, |
|
"eval_runtime": 353.4888, |
|
"eval_samples_per_second": 5.46, |
|
"eval_steps_per_second": 0.685, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 0.0002, |
|
"loss": 0.8818, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"eval_loss": 1.0182862281799316, |
|
"eval_runtime": 353.8429, |
|
"eval_samples_per_second": 5.454, |
|
"eval_steps_per_second": 0.684, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 0.0002, |
|
"loss": 0.8787, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"eval_loss": 1.0158191919326782, |
|
"eval_runtime": 354.0822, |
|
"eval_samples_per_second": 5.451, |
|
"eval_steps_per_second": 0.683, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"learning_rate": 0.0002, |
|
"loss": 0.8685, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"eval_loss": 1.0099796056747437, |
|
"eval_runtime": 354.7588, |
|
"eval_samples_per_second": 5.44, |
|
"eval_steps_per_second": 0.682, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"learning_rate": 0.0002, |
|
"loss": 0.8637, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"eval_loss": 1.006648063659668, |
|
"eval_runtime": 354.057, |
|
"eval_samples_per_second": 5.451, |
|
"eval_steps_per_second": 0.684, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 1.33, |
|
"learning_rate": 0.0002, |
|
"loss": 0.8554, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 1.33, |
|
"eval_loss": 1.0002124309539795, |
|
"eval_runtime": 354.0353, |
|
"eval_samples_per_second": 5.451, |
|
"eval_steps_per_second": 0.684, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 1.47, |
|
"learning_rate": 0.0002, |
|
"loss": 0.859, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 1.47, |
|
"eval_loss": 0.9991586804389954, |
|
"eval_runtime": 354.265, |
|
"eval_samples_per_second": 5.448, |
|
"eval_steps_per_second": 0.683, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"learning_rate": 0.0002, |
|
"loss": 0.8584, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"eval_loss": 0.9944009184837341, |
|
"eval_runtime": 353.9279, |
|
"eval_samples_per_second": 5.453, |
|
"eval_steps_per_second": 0.684, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 1.73, |
|
"learning_rate": 0.0002, |
|
"loss": 0.852, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 1.73, |
|
"eval_loss": 0.989990234375, |
|
"eval_runtime": 354.4704, |
|
"eval_samples_per_second": 5.445, |
|
"eval_steps_per_second": 0.683, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 1.87, |
|
"learning_rate": 0.0002, |
|
"loss": 0.846, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 1.87, |
|
"eval_loss": 0.9865836501121521, |
|
"eval_runtime": 358.9224, |
|
"eval_samples_per_second": 5.377, |
|
"eval_steps_per_second": 0.674, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"learning_rate": 0.0002, |
|
"loss": 0.847, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_loss": 0.9840803742408752, |
|
"eval_runtime": 354.4171, |
|
"eval_samples_per_second": 5.446, |
|
"eval_steps_per_second": 0.683, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 2.13, |
|
"learning_rate": 0.0002, |
|
"loss": 0.8378, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 2.13, |
|
"eval_loss": 0.9801441431045532, |
|
"eval_runtime": 355.2231, |
|
"eval_samples_per_second": 5.433, |
|
"eval_steps_per_second": 0.681, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 2.27, |
|
"learning_rate": 0.0002, |
|
"loss": 0.8418, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 2.27, |
|
"eval_loss": 0.9762414693832397, |
|
"eval_runtime": 354.5929, |
|
"eval_samples_per_second": 5.443, |
|
"eval_steps_per_second": 0.682, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 2.4, |
|
"learning_rate": 0.0002, |
|
"loss": 0.831, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 2.4, |
|
"eval_loss": 0.975382924079895, |
|
"eval_runtime": 378.7821, |
|
"eval_samples_per_second": 5.095, |
|
"eval_steps_per_second": 0.639, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 2.53, |
|
"learning_rate": 0.0002, |
|
"loss": 0.8297, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 2.53, |
|
"eval_loss": 0.9727230072021484, |
|
"eval_runtime": 366.7965, |
|
"eval_samples_per_second": 5.262, |
|
"eval_steps_per_second": 0.66, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 2.67, |
|
"learning_rate": 0.0002, |
|
"loss": 0.8283, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 2.67, |
|
"eval_loss": 0.9686517715454102, |
|
"eval_runtime": 366.7742, |
|
"eval_samples_per_second": 5.262, |
|
"eval_steps_per_second": 0.66, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 2.8, |
|
"learning_rate": 0.0002, |
|
"loss": 0.8303, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 2.8, |
|
"eval_loss": 0.9657241106033325, |
|
"eval_runtime": 368.2461, |
|
"eval_samples_per_second": 5.241, |
|
"eval_steps_per_second": 0.657, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 2.93, |
|
"learning_rate": 0.0002, |
|
"loss": 0.8358, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 2.93, |
|
"eval_loss": 0.9637255072593689, |
|
"eval_runtime": 365.4459, |
|
"eval_samples_per_second": 5.281, |
|
"eval_steps_per_second": 0.662, |
|
"step": 2200 |
|
} |
|
], |
|
"logging_steps": 100, |
|
"max_steps": 2250, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 3, |
|
"save_steps": 100, |
|
"total_flos": 6.220289728654295e+17, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|