|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 20.0, |
|
"global_step": 7660, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 1.9878163251157847e-05, |
|
"loss": 2.5054, |
|
"step": 383 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_loss": 2.3042960166931152, |
|
"eval_runtime": 6.5074, |
|
"eval_samples_per_second": 941.694, |
|
"eval_steps_per_second": 14.752, |
|
"step": 383 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"learning_rate": 1.9513096703453338e-05, |
|
"loss": 2.3885, |
|
"step": 766 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_loss": 2.2446680068969727, |
|
"eval_runtime": 6.5762, |
|
"eval_samples_per_second": 931.841, |
|
"eval_steps_per_second": 14.598, |
|
"step": 766 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"learning_rate": 1.8913786142751957e-05, |
|
"loss": 2.3418, |
|
"step": 1149 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_loss": 2.231914758682251, |
|
"eval_runtime": 6.5679, |
|
"eval_samples_per_second": 933.023, |
|
"eval_steps_per_second": 14.617, |
|
"step": 1149 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"learning_rate": 1.809498858405589e-05, |
|
"loss": 2.3045, |
|
"step": 1532 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_loss": 2.188257932662964, |
|
"eval_runtime": 6.6128, |
|
"eval_samples_per_second": 926.691, |
|
"eval_steps_per_second": 14.517, |
|
"step": 1532 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"learning_rate": 1.7076865540693534e-05, |
|
"loss": 2.2772, |
|
"step": 1915 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_loss": 2.189318895339966, |
|
"eval_runtime": 6.8537, |
|
"eval_samples_per_second": 894.11, |
|
"eval_steps_per_second": 14.007, |
|
"step": 1915 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"learning_rate": 1.588448658094913e-05, |
|
"loss": 2.2543, |
|
"step": 2298 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_loss": 2.168344736099243, |
|
"eval_runtime": 6.7894, |
|
"eval_samples_per_second": 902.58, |
|
"eval_steps_per_second": 14.14, |
|
"step": 2298 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"learning_rate": 1.4547212032090465e-05, |
|
"loss": 2.2308, |
|
"step": 2681 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_loss": 2.1453945636749268, |
|
"eval_runtime": 6.5512, |
|
"eval_samples_per_second": 935.407, |
|
"eval_steps_per_second": 14.654, |
|
"step": 2681 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"learning_rate": 1.3097970031670223e-05, |
|
"loss": 2.2139, |
|
"step": 3064 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_loss": 2.1402783393859863, |
|
"eval_runtime": 6.5858, |
|
"eval_samples_per_second": 930.487, |
|
"eval_steps_per_second": 14.577, |
|
"step": 3064 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"learning_rate": 1.1572445727497194e-05, |
|
"loss": 2.2008, |
|
"step": 3447 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_loss": 2.11649489402771, |
|
"eval_runtime": 6.5807, |
|
"eval_samples_per_second": 931.208, |
|
"eval_steps_per_second": 14.588, |
|
"step": 3447 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"learning_rate": 1.0008202590865013e-05, |
|
"loss": 2.1937, |
|
"step": 3830 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_loss": 2.1280927658081055, |
|
"eval_runtime": 6.6743, |
|
"eval_samples_per_second": 918.143, |
|
"eval_steps_per_second": 14.383, |
|
"step": 3830 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"learning_rate": 8.443757479222894e-06, |
|
"loss": 2.1778, |
|
"step": 4213 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"eval_loss": 2.1188840866088867, |
|
"eval_runtime": 6.8275, |
|
"eval_samples_per_second": 897.543, |
|
"eval_steps_per_second": 14.061, |
|
"step": 4213 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"learning_rate": 6.917632223315125e-06, |
|
"loss": 2.1742, |
|
"step": 4596 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_loss": 2.121837854385376, |
|
"eval_runtime": 6.6231, |
|
"eval_samples_per_second": 925.24, |
|
"eval_steps_per_second": 14.495, |
|
"step": 4596 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"learning_rate": 5.467405091861484e-06, |
|
"loss": 2.1611, |
|
"step": 4979 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"eval_loss": 2.099580764770508, |
|
"eval_runtime": 6.7959, |
|
"eval_samples_per_second": 901.723, |
|
"eval_steps_per_second": 14.126, |
|
"step": 4979 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"learning_rate": 4.12878548986627e-06, |
|
"loss": 2.1562, |
|
"step": 5362 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"eval_loss": 2.099181890487671, |
|
"eval_runtime": 6.8991, |
|
"eval_samples_per_second": 888.236, |
|
"eval_steps_per_second": 13.915, |
|
"step": 5362 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"learning_rate": 2.934734674554366e-06, |
|
"loss": 2.1508, |
|
"step": 5745 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"eval_loss": 2.1012661457061768, |
|
"eval_runtime": 6.8384, |
|
"eval_samples_per_second": 896.11, |
|
"eval_steps_per_second": 14.038, |
|
"step": 5745 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"learning_rate": 1.9146541398261966e-06, |
|
"loss": 2.1469, |
|
"step": 6128 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"eval_loss": 2.094507932662964, |
|
"eval_runtime": 6.7332, |
|
"eval_samples_per_second": 910.114, |
|
"eval_steps_per_second": 14.258, |
|
"step": 6128 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"learning_rate": 1.0936616538999777e-06, |
|
"loss": 2.1437, |
|
"step": 6511 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"eval_loss": 2.089902639389038, |
|
"eval_runtime": 6.6174, |
|
"eval_samples_per_second": 926.05, |
|
"eval_steps_per_second": 14.507, |
|
"step": 6511 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"learning_rate": 4.919727764970517e-07, |
|
"loss": 2.1436, |
|
"step": 6894 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"eval_loss": 2.082735061645508, |
|
"eval_runtime": 6.5958, |
|
"eval_samples_per_second": 929.079, |
|
"eval_steps_per_second": 14.555, |
|
"step": 6894 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"learning_rate": 1.2440308466997952e-07, |
|
"loss": 2.1443, |
|
"step": 7277 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"eval_loss": 2.0934832096099854, |
|
"eval_runtime": 6.5708, |
|
"eval_samples_per_second": 932.611, |
|
"eval_steps_per_second": 14.61, |
|
"step": 7277 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"learning_rate": 3.3641254104654906e-12, |
|
"loss": 2.1389, |
|
"step": 7660 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"eval_loss": 2.0939252376556396, |
|
"eval_runtime": 6.5634, |
|
"eval_samples_per_second": 933.664, |
|
"eval_steps_per_second": 14.627, |
|
"step": 7660 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"step": 7660, |
|
"total_flos": 3.24907393591296e+16, |
|
"train_loss": 2.2224227227990374, |
|
"train_runtime": 2024.9575, |
|
"train_samples_per_second": 242.079, |
|
"train_steps_per_second": 3.783 |
|
} |
|
], |
|
"max_steps": 7660, |
|
"num_train_epochs": 20, |
|
"total_flos": 3.24907393591296e+16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|