|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.9999220394480393, |
|
"global_step": 6413, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 4.610166848588804e-05, |
|
"loss": 1.501, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"eval_gen_len": 38.430038252466275, |
|
"eval_loss": 0.11725818365812302, |
|
"eval_runtime": 1525.1588, |
|
"eval_samples_per_second": 3.257, |
|
"eval_steps_per_second": 0.407, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 4.220333697177608e-05, |
|
"loss": 0.0972, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"eval_gen_len": 38.57358566539158, |
|
"eval_loss": 0.10414853692054749, |
|
"eval_runtime": 1374.4335, |
|
"eval_samples_per_second": 3.614, |
|
"eval_steps_per_second": 0.452, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 3.830500545766412e-05, |
|
"loss": 0.0871, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"eval_gen_len": 41.80269780551641, |
|
"eval_loss": 0.09756787866353989, |
|
"eval_runtime": 1416.1217, |
|
"eval_samples_per_second": 3.507, |
|
"eval_steps_per_second": 0.439, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 3.440667394355216e-05, |
|
"loss": 0.0821, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"eval_gen_len": 38.83108516206966, |
|
"eval_loss": 0.09272466599941254, |
|
"eval_runtime": 1386.2597, |
|
"eval_samples_per_second": 3.583, |
|
"eval_steps_per_second": 0.448, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 3.05083424294402e-05, |
|
"loss": 0.0796, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"eval_gen_len": 42.82927320314073, |
|
"eval_loss": 0.09029995650053024, |
|
"eval_runtime": 1431.1493, |
|
"eval_samples_per_second": 3.471, |
|
"eval_steps_per_second": 0.434, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 2.6610010915328243e-05, |
|
"loss": 0.0766, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"eval_gen_len": 41.111938796053956, |
|
"eval_loss": 0.08799975365400314, |
|
"eval_runtime": 1401.8123, |
|
"eval_samples_per_second": 3.543, |
|
"eval_steps_per_second": 0.443, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 2.271167940121628e-05, |
|
"loss": 0.0741, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"eval_gen_len": 40.73605798268573, |
|
"eval_loss": 0.08718982338905334, |
|
"eval_runtime": 1373.6325, |
|
"eval_samples_per_second": 3.616, |
|
"eval_steps_per_second": 0.452, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 1.881334788710432e-05, |
|
"loss": 0.0729, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"eval_gen_len": 39.60056372055567, |
|
"eval_loss": 0.08510363847017288, |
|
"eval_runtime": 1369.314, |
|
"eval_samples_per_second": 3.627, |
|
"eval_steps_per_second": 0.454, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 1.491501637299236e-05, |
|
"loss": 0.0723, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"eval_gen_len": 40.53654117173344, |
|
"eval_loss": 0.08480597287416458, |
|
"eval_runtime": 1394.4001, |
|
"eval_samples_per_second": 3.562, |
|
"eval_steps_per_second": 0.445, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 1.1016684858880399e-05, |
|
"loss": 0.0729, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"eval_gen_len": 41.251258304811756, |
|
"eval_loss": 0.08257019519805908, |
|
"eval_runtime": 1406.7799, |
|
"eval_samples_per_second": 3.531, |
|
"eval_steps_per_second": 0.441, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 7.11835334476844e-06, |
|
"loss": 0.071, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"eval_gen_len": 41.85141936782766, |
|
"eval_loss": 0.08206828683614731, |
|
"eval_runtime": 1411.1175, |
|
"eval_samples_per_second": 3.52, |
|
"eval_steps_per_second": 0.44, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 3.220021830656479e-06, |
|
"loss": 0.0699, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"eval_gen_len": 41.36098248439702, |
|
"eval_loss": 0.08196299523115158, |
|
"eval_runtime": 1408.2137, |
|
"eval_samples_per_second": 3.527, |
|
"eval_steps_per_second": 0.441, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"step": 6413, |
|
"total_flos": 1.2273483536252928e+17, |
|
"train_loss": 0.18819899583558072, |
|
"train_runtime": 25349.1542, |
|
"train_samples_per_second": 4.048, |
|
"train_steps_per_second": 0.253 |
|
} |
|
], |
|
"max_steps": 6413, |
|
"num_train_epochs": 1, |
|
"total_flos": 1.2273483536252928e+17, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|