|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 19.78531558608845, |
|
"eval_steps": 500, |
|
"global_step": 720, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 0.0001, |
|
"loss": 8.9608, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"eval_bleu": 0.2352, |
|
"eval_bp": 0.828, |
|
"eval_counts_1": 2306, |
|
"eval_counts_2": 50, |
|
"eval_counts_3": 12, |
|
"eval_counts_4": 2, |
|
"eval_exact_match": 0.0, |
|
"eval_f1": 0.0092, |
|
"eval_gen_len": 3.1969, |
|
"eval_loss": 2.8882896900177, |
|
"eval_precisions_1": 12.9, |
|
"eval_precisions_2": 0.319, |
|
"eval_precisions_3": 0.0891, |
|
"eval_precisions_4": 0.0178, |
|
"eval_ref_len": 21250, |
|
"eval_rouge1": 0.0081, |
|
"eval_rouge2": 0.0022, |
|
"eval_rougeL": 0.0078, |
|
"eval_rougeLsum": 0.0078, |
|
"eval_runtime": 386.3015, |
|
"eval_samples_per_second": 5.705, |
|
"eval_steps_per_second": 1.426, |
|
"eval_sys_len": 17876, |
|
"eval_totals_1": 17876, |
|
"eval_totals_2": 15672, |
|
"eval_totals_3": 13468, |
|
"eval_totals_4": 11264, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 1.98, |
|
"learning_rate": 0.0001, |
|
"loss": 3.2364, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 1.98, |
|
"eval_bleu": 6.7083, |
|
"eval_bp": 0.9954, |
|
"eval_counts_1": 6125, |
|
"eval_counts_2": 1727, |
|
"eval_counts_3": 687, |
|
"eval_counts_4": 277, |
|
"eval_exact_match": 0.0018, |
|
"eval_f1": 0.2514, |
|
"eval_gen_len": 11.8072, |
|
"eval_loss": 1.9241770505905151, |
|
"eval_precisions_1": 28.9571, |
|
"eval_precisions_2": 9.1144, |
|
"eval_precisions_3": 4.103, |
|
"eval_precisions_4": 1.9051, |
|
"eval_ref_len": 21250, |
|
"eval_rouge1": 0.2457, |
|
"eval_rouge2": 0.1026, |
|
"eval_rougeL": 0.2345, |
|
"eval_rougeLsum": 0.2346, |
|
"eval_runtime": 440.0537, |
|
"eval_samples_per_second": 5.008, |
|
"eval_steps_per_second": 1.252, |
|
"eval_sys_len": 21152, |
|
"eval_totals_1": 21152, |
|
"eval_totals_2": 18948, |
|
"eval_totals_3": 16744, |
|
"eval_totals_4": 14540, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"learning_rate": 0.0001, |
|
"loss": 2.4963, |
|
"step": 109 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_bleu": 9.1493, |
|
"eval_bp": 0.752, |
|
"eval_counts_1": 6903, |
|
"eval_counts_2": 2271, |
|
"eval_counts_3": 975, |
|
"eval_counts_4": 409, |
|
"eval_exact_match": 0.01, |
|
"eval_f1": 0.2909, |
|
"eval_gen_len": 12.176, |
|
"eval_loss": 1.6558014154434204, |
|
"eval_precisions_1": 41.7428, |
|
"eval_precisions_2": 15.8446, |
|
"eval_precisions_3": 8.0386, |
|
"eval_precisions_4": 4.1209, |
|
"eval_ref_len": 21250, |
|
"eval_rouge1": 0.2966, |
|
"eval_rouge2": 0.1415, |
|
"eval_rougeL": 0.2854, |
|
"eval_rougeLsum": 0.2852, |
|
"eval_runtime": 434.1741, |
|
"eval_samples_per_second": 5.076, |
|
"eval_steps_per_second": 1.269, |
|
"eval_sys_len": 16537, |
|
"eval_totals_1": 16537, |
|
"eval_totals_2": 14333, |
|
"eval_totals_3": 12129, |
|
"eval_totals_4": 9925, |
|
"step": 109 |
|
}, |
|
{ |
|
"epoch": 3.98, |
|
"learning_rate": 0.0001, |
|
"loss": 2.2314, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 3.98, |
|
"eval_bleu": 10.187, |
|
"eval_bp": 0.7573, |
|
"eval_counts_1": 7160, |
|
"eval_counts_2": 2440, |
|
"eval_counts_3": 1098, |
|
"eval_counts_4": 501, |
|
"eval_exact_match": 0.0136, |
|
"eval_f1": 0.3069, |
|
"eval_gen_len": 12.157, |
|
"eval_loss": 1.5771422386169434, |
|
"eval_precisions_1": 43.0625, |
|
"eval_precisions_2": 16.9174, |
|
"eval_precisions_3": 8.986, |
|
"eval_precisions_4": 5.0025, |
|
"eval_ref_len": 21250, |
|
"eval_rouge1": 0.314, |
|
"eval_rouge2": 0.1535, |
|
"eval_rougeL": 0.3028, |
|
"eval_rougeLsum": 0.3028, |
|
"eval_runtime": 436.5308, |
|
"eval_samples_per_second": 5.049, |
|
"eval_steps_per_second": 1.262, |
|
"eval_sys_len": 16627, |
|
"eval_totals_1": 16627, |
|
"eval_totals_2": 14423, |
|
"eval_totals_3": 12219, |
|
"eval_totals_4": 10015, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 4.97, |
|
"learning_rate": 0.0001, |
|
"loss": 2.0578, |
|
"step": 181 |
|
}, |
|
{ |
|
"epoch": 4.97, |
|
"eval_bleu": 11.0621, |
|
"eval_bp": 0.7961, |
|
"eval_counts_1": 7447, |
|
"eval_counts_2": 2625, |
|
"eval_counts_3": 1214, |
|
"eval_counts_4": 566, |
|
"eval_exact_match": 0.0163, |
|
"eval_f1": 0.32, |
|
"eval_gen_len": 12.5585, |
|
"eval_loss": 1.5346813201904297, |
|
"eval_precisions_1": 43.0338, |
|
"eval_precisions_2": 17.383, |
|
"eval_precisions_3": 9.413, |
|
"eval_precisions_4": 5.2932, |
|
"eval_ref_len": 21250, |
|
"eval_rouge1": 0.3286, |
|
"eval_rouge2": 0.1628, |
|
"eval_rougeL": 0.3146, |
|
"eval_rougeLsum": 0.3146, |
|
"eval_runtime": 444.2911, |
|
"eval_samples_per_second": 4.961, |
|
"eval_steps_per_second": 1.24, |
|
"eval_sys_len": 17305, |
|
"eval_totals_1": 17305, |
|
"eval_totals_2": 15101, |
|
"eval_totals_3": 12897, |
|
"eval_totals_4": 10693, |
|
"step": 181 |
|
}, |
|
{ |
|
"epoch": 5.99, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8928, |
|
"step": 218 |
|
}, |
|
{ |
|
"epoch": 5.99, |
|
"eval_bleu": 11.4063, |
|
"eval_bp": 0.7556, |
|
"eval_counts_1": 7396, |
|
"eval_counts_2": 2659, |
|
"eval_counts_3": 1257, |
|
"eval_counts_4": 611, |
|
"eval_exact_match": 0.0177, |
|
"eval_f1": 0.3234, |
|
"eval_gen_len": 12.1692, |
|
"eval_loss": 1.512817144393921, |
|
"eval_precisions_1": 44.5596, |
|
"eval_precisions_2": 18.473, |
|
"eval_precisions_3": 10.3117, |
|
"eval_precisions_4": 6.1186, |
|
"eval_ref_len": 21250, |
|
"eval_rouge1": 0.3326, |
|
"eval_rouge2": 0.1684, |
|
"eval_rougeL": 0.3198, |
|
"eval_rougeLsum": 0.3198, |
|
"eval_runtime": 441.07, |
|
"eval_samples_per_second": 4.997, |
|
"eval_steps_per_second": 1.249, |
|
"eval_sys_len": 16598, |
|
"eval_totals_1": 16598, |
|
"eval_totals_2": 14394, |
|
"eval_totals_3": 12190, |
|
"eval_totals_4": 9986, |
|
"step": 218 |
|
}, |
|
{ |
|
"epoch": 6.98, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8573, |
|
"step": 254 |
|
}, |
|
{ |
|
"epoch": 6.98, |
|
"eval_bleu": 11.8292, |
|
"eval_bp": 0.7631, |
|
"eval_counts_1": 7531, |
|
"eval_counts_2": 2758, |
|
"eval_counts_3": 1313, |
|
"eval_counts_4": 641, |
|
"eval_exact_match": 0.0163, |
|
"eval_f1": 0.327, |
|
"eval_gen_len": 12.3035, |
|
"eval_loss": 1.4735780954360962, |
|
"eval_precisions_1": 45.0203, |
|
"eval_precisions_2": 18.9893, |
|
"eval_precisions_3": 10.6575, |
|
"eval_precisions_4": 6.3365, |
|
"eval_ref_len": 21250, |
|
"eval_rouge1": 0.3349, |
|
"eval_rouge2": 0.1717, |
|
"eval_rougeL": 0.3216, |
|
"eval_rougeLsum": 0.3216, |
|
"eval_runtime": 442.6304, |
|
"eval_samples_per_second": 4.979, |
|
"eval_steps_per_second": 1.245, |
|
"eval_sys_len": 16728, |
|
"eval_totals_1": 16728, |
|
"eval_totals_2": 14524, |
|
"eval_totals_3": 12320, |
|
"eval_totals_4": 10116, |
|
"step": 254 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7361, |
|
"step": 291 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_bleu": 12.2208, |
|
"eval_bp": 0.7747, |
|
"eval_counts_1": 7658, |
|
"eval_counts_2": 2849, |
|
"eval_counts_3": 1368, |
|
"eval_counts_4": 668, |
|
"eval_exact_match": 0.0181, |
|
"eval_f1": 0.3334, |
|
"eval_gen_len": 12.4628, |
|
"eval_loss": 1.4544174671173096, |
|
"eval_precisions_1": 45.2387, |
|
"eval_precisions_2": 19.3494, |
|
"eval_precisions_3": 10.9265, |
|
"eval_precisions_4": 6.4754, |
|
"eval_ref_len": 21250, |
|
"eval_rouge1": 0.3414, |
|
"eval_rouge2": 0.1762, |
|
"eval_rougeL": 0.3283, |
|
"eval_rougeLsum": 0.3284, |
|
"eval_runtime": 442.3648, |
|
"eval_samples_per_second": 4.982, |
|
"eval_steps_per_second": 1.246, |
|
"eval_sys_len": 16928, |
|
"eval_totals_1": 16928, |
|
"eval_totals_2": 14724, |
|
"eval_totals_3": 12520, |
|
"eval_totals_4": 10316, |
|
"step": 291 |
|
}, |
|
{ |
|
"epoch": 8.99, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7162, |
|
"step": 327 |
|
}, |
|
{ |
|
"epoch": 8.99, |
|
"eval_bleu": 12.4536, |
|
"eval_bp": 0.767, |
|
"eval_counts_1": 7703, |
|
"eval_counts_2": 2891, |
|
"eval_counts_3": 1390, |
|
"eval_counts_4": 694, |
|
"eval_exact_match": 0.0159, |
|
"eval_f1": 0.3374, |
|
"eval_gen_len": 12.4174, |
|
"eval_loss": 1.4459445476531982, |
|
"eval_precisions_1": 45.8648, |
|
"eval_precisions_2": 19.8136, |
|
"eval_precisions_3": 11.2214, |
|
"eval_precisions_4": 6.8153, |
|
"eval_ref_len": 21250, |
|
"eval_rouge1": 0.3454, |
|
"eval_rouge2": 0.1785, |
|
"eval_rougeL": 0.3325, |
|
"eval_rougeLsum": 0.3323, |
|
"eval_runtime": 436.4836, |
|
"eval_samples_per_second": 5.049, |
|
"eval_steps_per_second": 1.262, |
|
"eval_sys_len": 16795, |
|
"eval_totals_1": 16795, |
|
"eval_totals_2": 14591, |
|
"eval_totals_3": 12387, |
|
"eval_totals_4": 10183, |
|
"step": 327 |
|
}, |
|
{ |
|
"epoch": 9.98, |
|
"learning_rate": 0.0001, |
|
"loss": 1.6589, |
|
"step": 363 |
|
}, |
|
{ |
|
"epoch": 9.98, |
|
"eval_bleu": 12.8553, |
|
"eval_bp": 0.8002, |
|
"eval_counts_1": 7889, |
|
"eval_counts_2": 2983, |
|
"eval_counts_3": 1449, |
|
"eval_counts_4": 719, |
|
"eval_exact_match": 0.0172, |
|
"eval_f1": 0.3435, |
|
"eval_gen_len": 12.7101, |
|
"eval_loss": 1.438312292098999, |
|
"eval_precisions_1": 45.4017, |
|
"eval_precisions_2": 19.6612, |
|
"eval_precisions_3": 11.1737, |
|
"eval_precisions_4": 6.6797, |
|
"eval_ref_len": 21250, |
|
"eval_rouge1": 0.3519, |
|
"eval_rouge2": 0.1816, |
|
"eval_rougeL": 0.3375, |
|
"eval_rougeLsum": 0.3372, |
|
"eval_runtime": 449.3427, |
|
"eval_samples_per_second": 4.905, |
|
"eval_steps_per_second": 1.226, |
|
"eval_sys_len": 17376, |
|
"eval_totals_1": 17376, |
|
"eval_totals_2": 15172, |
|
"eval_totals_3": 12968, |
|
"eval_totals_4": 10764, |
|
"step": 363 |
|
}, |
|
{ |
|
"epoch": 10.99, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5571, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 10.99, |
|
"eval_bleu": 12.9671, |
|
"eval_bp": 0.7894, |
|
"eval_counts_1": 7889, |
|
"eval_counts_2": 2994, |
|
"eval_counts_3": 1457, |
|
"eval_counts_4": 736, |
|
"eval_exact_match": 0.02, |
|
"eval_f1": 0.3457, |
|
"eval_gen_len": 12.6466, |
|
"eval_loss": 1.4213731288909912, |
|
"eval_precisions_1": 45.9063, |
|
"eval_precisions_2": 19.9853, |
|
"eval_precisions_3": 11.4033, |
|
"eval_precisions_4": 6.9611, |
|
"eval_ref_len": 21250, |
|
"eval_rouge1": 0.3529, |
|
"eval_rouge2": 0.1845, |
|
"eval_rougeL": 0.3392, |
|
"eval_rougeLsum": 0.3393, |
|
"eval_runtime": 440.5687, |
|
"eval_samples_per_second": 5.003, |
|
"eval_steps_per_second": 1.251, |
|
"eval_sys_len": 17185, |
|
"eval_totals_1": 17185, |
|
"eval_totals_2": 14981, |
|
"eval_totals_3": 12777, |
|
"eval_totals_4": 10573, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 11.98, |
|
"learning_rate": 0.0001, |
|
"loss": 1.5502, |
|
"step": 436 |
|
}, |
|
{ |
|
"epoch": 11.98, |
|
"eval_bleu": 13.0741, |
|
"eval_bp": 0.7712, |
|
"eval_counts_1": 7930, |
|
"eval_counts_2": 3008, |
|
"eval_counts_3": 1477, |
|
"eval_counts_4": 741, |
|
"eval_exact_match": 0.0213, |
|
"eval_f1": 0.3541, |
|
"eval_gen_len": 12.4483, |
|
"eval_loss": 1.4135174751281738, |
|
"eval_precisions_1": 47.0121, |
|
"eval_precisions_2": 20.5128, |
|
"eval_precisions_3": 11.8539, |
|
"eval_precisions_4": 7.225, |
|
"eval_ref_len": 21250, |
|
"eval_rouge1": 0.3619, |
|
"eval_rouge2": 0.189, |
|
"eval_rougeL": 0.3492, |
|
"eval_rougeLsum": 0.3491, |
|
"eval_runtime": 443.1145, |
|
"eval_samples_per_second": 4.974, |
|
"eval_steps_per_second": 1.243, |
|
"eval_sys_len": 16868, |
|
"eval_totals_1": 16868, |
|
"eval_totals_2": 14664, |
|
"eval_totals_3": 12460, |
|
"eval_totals_4": 10256, |
|
"step": 436 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"learning_rate": 0.0001, |
|
"loss": 1.4564, |
|
"step": 473 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"eval_bleu": 14.1014, |
|
"eval_bp": 0.8309, |
|
"eval_counts_1": 8268, |
|
"eval_counts_2": 3200, |
|
"eval_counts_3": 1616, |
|
"eval_counts_4": 837, |
|
"eval_exact_match": 0.0218, |
|
"eval_f1": 0.3647, |
|
"eval_gen_len": 13.2441, |
|
"eval_loss": 1.3942722082138062, |
|
"eval_precisions_1": 46.1152, |
|
"eval_precisions_2": 20.3498, |
|
"eval_precisions_3": 11.9518, |
|
"eval_precisions_4": 7.396, |
|
"eval_ref_len": 21250, |
|
"eval_rouge1": 0.3729, |
|
"eval_rouge2": 0.1974, |
|
"eval_rougeL": 0.3578, |
|
"eval_rougeLsum": 0.3576, |
|
"eval_runtime": 460.2282, |
|
"eval_samples_per_second": 4.789, |
|
"eval_steps_per_second": 1.197, |
|
"eval_sys_len": 17929, |
|
"eval_totals_1": 17929, |
|
"eval_totals_2": 15725, |
|
"eval_totals_3": 13521, |
|
"eval_totals_4": 11317, |
|
"step": 473 |
|
}, |
|
{ |
|
"epoch": 13.99, |
|
"learning_rate": 0.0001, |
|
"loss": 1.4522, |
|
"step": 509 |
|
}, |
|
{ |
|
"epoch": 13.99, |
|
"eval_bleu": 13.7526, |
|
"eval_bp": 0.7667, |
|
"eval_counts_1": 8047, |
|
"eval_counts_2": 3130, |
|
"eval_counts_3": 1564, |
|
"eval_counts_4": 811, |
|
"eval_exact_match": 0.0227, |
|
"eval_f1": 0.3627, |
|
"eval_gen_len": 12.515, |
|
"eval_loss": 1.3952871561050415, |
|
"eval_precisions_1": 47.9302, |
|
"eval_precisions_2": 21.4604, |
|
"eval_precisions_3": 12.6323, |
|
"eval_precisions_4": 7.9689, |
|
"eval_ref_len": 21250, |
|
"eval_rouge1": 0.3712, |
|
"eval_rouge2": 0.197, |
|
"eval_rougeL": 0.3582, |
|
"eval_rougeLsum": 0.3581, |
|
"eval_runtime": 437.5396, |
|
"eval_samples_per_second": 5.037, |
|
"eval_steps_per_second": 1.259, |
|
"eval_sys_len": 16789, |
|
"eval_totals_1": 16789, |
|
"eval_totals_2": 14585, |
|
"eval_totals_3": 12381, |
|
"eval_totals_4": 10177, |
|
"step": 509 |
|
}, |
|
{ |
|
"epoch": 14.98, |
|
"learning_rate": 0.0001, |
|
"loss": 1.407, |
|
"step": 545 |
|
}, |
|
{ |
|
"epoch": 14.98, |
|
"eval_bleu": 14.7315, |
|
"eval_bp": 0.8306, |
|
"eval_counts_1": 8498, |
|
"eval_counts_2": 3358, |
|
"eval_counts_3": 1703, |
|
"eval_counts_4": 877, |
|
"eval_exact_match": 0.0213, |
|
"eval_f1": 0.3772, |
|
"eval_gen_len": 13.2849, |
|
"eval_loss": 1.3759350776672363, |
|
"eval_precisions_1": 47.4139, |
|
"eval_precisions_2": 21.3627, |
|
"eval_precisions_3": 12.6008, |
|
"eval_precisions_4": 7.7535, |
|
"eval_ref_len": 21250, |
|
"eval_rouge1": 0.3856, |
|
"eval_rouge2": 0.2063, |
|
"eval_rougeL": 0.3709, |
|
"eval_rougeLsum": 0.3706, |
|
"eval_runtime": 453.6157, |
|
"eval_samples_per_second": 4.859, |
|
"eval_steps_per_second": 1.215, |
|
"eval_sys_len": 17923, |
|
"eval_totals_1": 17923, |
|
"eval_totals_2": 15719, |
|
"eval_totals_3": 13515, |
|
"eval_totals_4": 11311, |
|
"step": 545 |
|
}, |
|
{ |
|
"epoch": 15.99, |
|
"learning_rate": 0.0001, |
|
"loss": 1.3294, |
|
"step": 582 |
|
}, |
|
{ |
|
"epoch": 15.99, |
|
"eval_bleu": 14.868, |
|
"eval_bp": 0.8044, |
|
"eval_counts_1": 8481, |
|
"eval_counts_2": 3407, |
|
"eval_counts_3": 1721, |
|
"eval_counts_4": 883, |
|
"eval_exact_match": 0.024, |
|
"eval_f1": 0.3822, |
|
"eval_gen_len": 12.9142, |
|
"eval_loss": 1.3775662183761597, |
|
"eval_precisions_1": 48.5989, |
|
"eval_precisions_2": 22.3454, |
|
"eval_precisions_3": 13.1948, |
|
"eval_precisions_4": 8.1465, |
|
"eval_ref_len": 21250, |
|
"eval_rouge1": 0.3907, |
|
"eval_rouge2": 0.211, |
|
"eval_rougeL": 0.3766, |
|
"eval_rougeLsum": 0.3766, |
|
"eval_runtime": 448.6685, |
|
"eval_samples_per_second": 4.912, |
|
"eval_steps_per_second": 1.228, |
|
"eval_sys_len": 17451, |
|
"eval_totals_1": 17451, |
|
"eval_totals_2": 15247, |
|
"eval_totals_3": 13043, |
|
"eval_totals_4": 10839, |
|
"step": 582 |
|
}, |
|
{ |
|
"epoch": 16.98, |
|
"learning_rate": 0.0001, |
|
"loss": 1.3294, |
|
"step": 618 |
|
}, |
|
{ |
|
"epoch": 16.98, |
|
"eval_bleu": 15.2312, |
|
"eval_bp": 0.835, |
|
"eval_counts_1": 8633, |
|
"eval_counts_2": 3464, |
|
"eval_counts_3": 1767, |
|
"eval_counts_4": 923, |
|
"eval_exact_match": 0.0263, |
|
"eval_f1": 0.3868, |
|
"eval_gen_len": 13.3103, |
|
"eval_loss": 1.380259394645691, |
|
"eval_precisions_1": 47.9505, |
|
"eval_precisions_2": 21.9241, |
|
"eval_precisions_3": 12.9965, |
|
"eval_precisions_4": 8.1022, |
|
"eval_ref_len": 21250, |
|
"eval_rouge1": 0.3946, |
|
"eval_rouge2": 0.2133, |
|
"eval_rougeL": 0.3801, |
|
"eval_rougeLsum": 0.3798, |
|
"eval_runtime": 456.612, |
|
"eval_samples_per_second": 4.827, |
|
"eval_steps_per_second": 1.207, |
|
"eval_sys_len": 18004, |
|
"eval_totals_1": 18004, |
|
"eval_totals_2": 15800, |
|
"eval_totals_3": 13596, |
|
"eval_totals_4": 11392, |
|
"step": 618 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"learning_rate": 0.0001, |
|
"loss": 1.2605, |
|
"step": 655 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"eval_bleu": 14.779, |
|
"eval_bp": 0.8255, |
|
"eval_counts_1": 8560, |
|
"eval_counts_2": 3376, |
|
"eval_counts_3": 1695, |
|
"eval_counts_4": 880, |
|
"eval_exact_match": 0.0231, |
|
"eval_f1": 0.3846, |
|
"eval_gen_len": 13.1665, |
|
"eval_loss": 1.3709588050842285, |
|
"eval_precisions_1": 48.009, |
|
"eval_precisions_2": 21.605, |
|
"eval_precisions_3": 12.6285, |
|
"eval_precisions_4": 7.8445, |
|
"eval_ref_len": 21250, |
|
"eval_rouge1": 0.3922, |
|
"eval_rouge2": 0.2092, |
|
"eval_rougeL": 0.3778, |
|
"eval_rougeLsum": 0.3775, |
|
"eval_runtime": 456.164, |
|
"eval_samples_per_second": 4.832, |
|
"eval_steps_per_second": 1.208, |
|
"eval_sys_len": 17830, |
|
"eval_totals_1": 17830, |
|
"eval_totals_2": 15626, |
|
"eval_totals_3": 13422, |
|
"eval_totals_4": 11218, |
|
"step": 655 |
|
}, |
|
{ |
|
"epoch": 18.99, |
|
"learning_rate": 0.0001, |
|
"loss": 1.2667, |
|
"step": 691 |
|
}, |
|
{ |
|
"epoch": 18.99, |
|
"eval_bleu": 15.0008, |
|
"eval_bp": 0.8257, |
|
"eval_counts_1": 8664, |
|
"eval_counts_2": 3455, |
|
"eval_counts_3": 1733, |
|
"eval_counts_4": 882, |
|
"eval_exact_match": 0.0227, |
|
"eval_f1": 0.3906, |
|
"eval_gen_len": 13.2232, |
|
"eval_loss": 1.3694192171096802, |
|
"eval_precisions_1": 48.5814, |
|
"eval_precisions_2": 22.1049, |
|
"eval_precisions_3": 12.9078, |
|
"eval_precisions_4": 7.8596, |
|
"eval_ref_len": 21250, |
|
"eval_rouge1": 0.3987, |
|
"eval_rouge2": 0.2138, |
|
"eval_rougeL": 0.3853, |
|
"eval_rougeLsum": 0.3851, |
|
"eval_runtime": 454.2362, |
|
"eval_samples_per_second": 4.852, |
|
"eval_steps_per_second": 1.213, |
|
"eval_sys_len": 17834, |
|
"eval_totals_1": 17834, |
|
"eval_totals_2": 15630, |
|
"eval_totals_3": 13426, |
|
"eval_totals_4": 11222, |
|
"step": 691 |
|
}, |
|
{ |
|
"epoch": 19.79, |
|
"learning_rate": 0.0001, |
|
"loss": 1.2074, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 19.79, |
|
"eval_bleu": 15.0442, |
|
"eval_bp": 0.8369, |
|
"eval_counts_1": 8770, |
|
"eval_counts_2": 3465, |
|
"eval_counts_3": 1737, |
|
"eval_counts_4": 880, |
|
"eval_exact_match": 0.0227, |
|
"eval_f1": 0.3941, |
|
"eval_gen_len": 13.4424, |
|
"eval_loss": 1.365785837173462, |
|
"eval_precisions_1": 48.6169, |
|
"eval_precisions_2": 21.8819, |
|
"eval_precisions_3": 12.743, |
|
"eval_precisions_4": 7.7011, |
|
"eval_ref_len": 21250, |
|
"eval_rouge1": 0.4025, |
|
"eval_rouge2": 0.215, |
|
"eval_rougeL": 0.3883, |
|
"eval_rougeLsum": 0.3879, |
|
"eval_runtime": 459.1457, |
|
"eval_samples_per_second": 4.8, |
|
"eval_steps_per_second": 1.2, |
|
"eval_sys_len": 18039, |
|
"eval_totals_1": 18039, |
|
"eval_totals_2": 15835, |
|
"eval_totals_3": 13631, |
|
"eval_totals_4": 11427, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 19.79, |
|
"step": 720, |
|
"total_flos": 4.419252384883016e+17, |
|
"train_loss": 2.0875697082943385, |
|
"train_runtime": 23544.6757, |
|
"train_samples_per_second": 7.912, |
|
"train_steps_per_second": 0.031 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 720, |
|
"num_train_epochs": 20, |
|
"save_steps": 500, |
|
"total_flos": 4.419252384883016e+17, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|