|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 32.25806451612903, |
|
"global_step": 2000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.0, |
|
"eval_gen_len": 19.0, |
|
"eval_loss": 2.797335147857666, |
|
"eval_rouge1": 0.1301, |
|
"eval_rouge2": 0.0352, |
|
"eval_rougeL": 0.1074, |
|
"eval_rougeLsum": 0.1075, |
|
"eval_runtime": 18.6868, |
|
"eval_samples_per_second": 13.271, |
|
"eval_steps_per_second": 0.856, |
|
"step": 62 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_gen_len": 19.0, |
|
"eval_loss": 2.568485975265503, |
|
"eval_rouge1": 0.1455, |
|
"eval_rouge2": 0.051, |
|
"eval_rougeL": 0.1189, |
|
"eval_rougeLsum": 0.1187, |
|
"eval_runtime": 16.801, |
|
"eval_samples_per_second": 14.761, |
|
"eval_steps_per_second": 0.952, |
|
"step": 124 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_gen_len": 19.0, |
|
"eval_loss": 2.475400924682617, |
|
"eval_rouge1": 0.1674, |
|
"eval_rouge2": 0.0692, |
|
"eval_rougeL": 0.1397, |
|
"eval_rougeLsum": 0.1397, |
|
"eval_runtime": 17.3933, |
|
"eval_samples_per_second": 14.258, |
|
"eval_steps_per_second": 0.92, |
|
"step": 186 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_gen_len": 19.0, |
|
"eval_loss": 2.419504404067993, |
|
"eval_rouge1": 0.1901, |
|
"eval_rouge2": 0.0867, |
|
"eval_rougeL": 0.1586, |
|
"eval_rougeLsum": 0.1587, |
|
"eval_runtime": 16.7871, |
|
"eval_samples_per_second": 14.773, |
|
"eval_steps_per_second": 0.953, |
|
"step": 248 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_gen_len": 19.0, |
|
"eval_loss": 2.3755078315734863, |
|
"eval_rouge1": 0.1933, |
|
"eval_rouge2": 0.0907, |
|
"eval_rougeL": 0.1617, |
|
"eval_rougeLsum": 0.1619, |
|
"eval_runtime": 16.6712, |
|
"eval_samples_per_second": 14.876, |
|
"eval_steps_per_second": 0.96, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_gen_len": 19.0, |
|
"eval_loss": 2.3425652980804443, |
|
"eval_rouge1": 0.1946, |
|
"eval_rouge2": 0.0916, |
|
"eval_rougeL": 0.1634, |
|
"eval_rougeLsum": 0.1636, |
|
"eval_runtime": 17.1282, |
|
"eval_samples_per_second": 14.479, |
|
"eval_steps_per_second": 0.934, |
|
"step": 372 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_gen_len": 19.0, |
|
"eval_loss": 2.3197405338287354, |
|
"eval_rouge1": 0.1964, |
|
"eval_rouge2": 0.0929, |
|
"eval_rougeL": 0.1646, |
|
"eval_rougeLsum": 0.1648, |
|
"eval_runtime": 16.7039, |
|
"eval_samples_per_second": 14.847, |
|
"eval_steps_per_second": 0.958, |
|
"step": 434 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_gen_len": 19.0, |
|
"eval_loss": 2.2987782955169678, |
|
"eval_rouge1": 0.1968, |
|
"eval_rouge2": 0.0933, |
|
"eval_rougeL": 0.165, |
|
"eval_rougeLsum": 0.1653, |
|
"eval_runtime": 16.6651, |
|
"eval_samples_per_second": 14.881, |
|
"eval_steps_per_second": 0.96, |
|
"step": 496 |
|
}, |
|
{ |
|
"epoch": 8.06, |
|
"learning_rate": 1.6780645161290323e-05, |
|
"loss": 2.7011, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_gen_len": 19.0, |
|
"eval_loss": 2.279834270477295, |
|
"eval_rouge1": 0.1969, |
|
"eval_rouge2": 0.0946, |
|
"eval_rougeL": 0.1662, |
|
"eval_rougeLsum": 0.1665, |
|
"eval_runtime": 17.134, |
|
"eval_samples_per_second": 14.474, |
|
"eval_steps_per_second": 0.934, |
|
"step": 558 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_gen_len": 19.0, |
|
"eval_loss": 2.265596389770508, |
|
"eval_rouge1": 0.1987, |
|
"eval_rouge2": 0.0962, |
|
"eval_rougeL": 0.1672, |
|
"eval_rougeLsum": 0.1673, |
|
"eval_runtime": 17.1955, |
|
"eval_samples_per_second": 14.422, |
|
"eval_steps_per_second": 0.93, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"eval_gen_len": 19.0, |
|
"eval_loss": 2.2547566890716553, |
|
"eval_rouge1": 0.1958, |
|
"eval_rouge2": 0.0965, |
|
"eval_rougeL": 0.1655, |
|
"eval_rougeLsum": 0.1657, |
|
"eval_runtime": 16.9264, |
|
"eval_samples_per_second": 14.652, |
|
"eval_steps_per_second": 0.945, |
|
"step": 682 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_gen_len": 19.0, |
|
"eval_loss": 2.243624210357666, |
|
"eval_rouge1": 0.1965, |
|
"eval_rouge2": 0.096, |
|
"eval_rougeL": 0.1659, |
|
"eval_rougeLsum": 0.166, |
|
"eval_runtime": 16.9191, |
|
"eval_samples_per_second": 14.658, |
|
"eval_steps_per_second": 0.946, |
|
"step": 744 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"eval_gen_len": 19.0, |
|
"eval_loss": 2.2352294921875, |
|
"eval_rouge1": 0.1964, |
|
"eval_rouge2": 0.0971, |
|
"eval_rougeL": 0.1663, |
|
"eval_rougeLsum": 0.1664, |
|
"eval_runtime": 16.9525, |
|
"eval_samples_per_second": 14.629, |
|
"eval_steps_per_second": 0.944, |
|
"step": 806 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"eval_gen_len": 19.0, |
|
"eval_loss": 2.2252049446105957, |
|
"eval_rouge1": 0.197, |
|
"eval_rouge2": 0.097, |
|
"eval_rougeL": 0.1664, |
|
"eval_rougeLsum": 0.1664, |
|
"eval_runtime": 16.904, |
|
"eval_samples_per_second": 14.671, |
|
"eval_steps_per_second": 0.947, |
|
"step": 868 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"eval_gen_len": 19.0, |
|
"eval_loss": 2.2152445316314697, |
|
"eval_rouge1": 0.1954, |
|
"eval_rouge2": 0.0979, |
|
"eval_rougeL": 0.1664, |
|
"eval_rougeLsum": 0.1665, |
|
"eval_runtime": 17.117, |
|
"eval_samples_per_second": 14.489, |
|
"eval_steps_per_second": 0.935, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"eval_gen_len": 19.0, |
|
"eval_loss": 2.207918643951416, |
|
"eval_rouge1": 0.1954, |
|
"eval_rouge2": 0.0988, |
|
"eval_rougeL": 0.1674, |
|
"eval_rougeLsum": 0.1677, |
|
"eval_runtime": 17.2029, |
|
"eval_samples_per_second": 14.416, |
|
"eval_steps_per_second": 0.93, |
|
"step": 992 |
|
}, |
|
{ |
|
"epoch": 16.13, |
|
"learning_rate": 1.355483870967742e-05, |
|
"loss": 2.3282, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"eval_gen_len": 19.0, |
|
"eval_loss": 2.1986870765686035, |
|
"eval_rouge1": 0.1951, |
|
"eval_rouge2": 0.0995, |
|
"eval_rougeL": 0.1672, |
|
"eval_rougeLsum": 0.1673, |
|
"eval_runtime": 17.1147, |
|
"eval_samples_per_second": 14.49, |
|
"eval_steps_per_second": 0.935, |
|
"step": 1054 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"eval_gen_len": 19.0, |
|
"eval_loss": 2.1938998699188232, |
|
"eval_rouge1": 0.1974, |
|
"eval_rouge2": 0.1015, |
|
"eval_rougeL": 0.1695, |
|
"eval_rougeLsum": 0.1697, |
|
"eval_runtime": 16.7909, |
|
"eval_samples_per_second": 14.77, |
|
"eval_steps_per_second": 0.953, |
|
"step": 1116 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"eval_gen_len": 19.0, |
|
"eval_loss": 2.18984055519104, |
|
"eval_rouge1": 0.1965, |
|
"eval_rouge2": 0.1014, |
|
"eval_rougeL": 0.1691, |
|
"eval_rougeLsum": 0.1693, |
|
"eval_runtime": 16.6689, |
|
"eval_samples_per_second": 14.878, |
|
"eval_steps_per_second": 0.96, |
|
"step": 1178 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"eval_gen_len": 19.0, |
|
"eval_loss": 2.183218240737915, |
|
"eval_rouge1": 0.1963, |
|
"eval_rouge2": 0.0997, |
|
"eval_rougeL": 0.1683, |
|
"eval_rougeLsum": 0.1685, |
|
"eval_runtime": 17.3129, |
|
"eval_samples_per_second": 14.325, |
|
"eval_steps_per_second": 0.924, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 21.0, |
|
"eval_gen_len": 19.0, |
|
"eval_loss": 2.1765005588531494, |
|
"eval_rouge1": 0.1966, |
|
"eval_rouge2": 0.0991, |
|
"eval_rougeL": 0.1676, |
|
"eval_rougeLsum": 0.1678, |
|
"eval_runtime": 16.8703, |
|
"eval_samples_per_second": 14.7, |
|
"eval_steps_per_second": 0.948, |
|
"step": 1302 |
|
}, |
|
{ |
|
"epoch": 22.0, |
|
"eval_gen_len": 19.0, |
|
"eval_loss": 2.1725897789001465, |
|
"eval_rouge1": 0.1963, |
|
"eval_rouge2": 0.0989, |
|
"eval_rougeL": 0.1677, |
|
"eval_rougeLsum": 0.1676, |
|
"eval_runtime": 16.7813, |
|
"eval_samples_per_second": 14.778, |
|
"eval_steps_per_second": 0.953, |
|
"step": 1364 |
|
}, |
|
{ |
|
"epoch": 23.0, |
|
"eval_gen_len": 19.0, |
|
"eval_loss": 2.1676828861236572, |
|
"eval_rouge1": 0.1959, |
|
"eval_rouge2": 0.0988, |
|
"eval_rougeL": 0.168, |
|
"eval_rougeLsum": 0.168, |
|
"eval_runtime": 17.3121, |
|
"eval_samples_per_second": 14.325, |
|
"eval_steps_per_second": 0.924, |
|
"step": 1426 |
|
}, |
|
{ |
|
"epoch": 24.0, |
|
"eval_gen_len": 19.0, |
|
"eval_loss": 2.1647536754608154, |
|
"eval_rouge1": 0.1967, |
|
"eval_rouge2": 0.0994, |
|
"eval_rougeL": 0.169, |
|
"eval_rougeLsum": 0.1692, |
|
"eval_runtime": 16.9203, |
|
"eval_samples_per_second": 14.657, |
|
"eval_steps_per_second": 0.946, |
|
"step": 1488 |
|
}, |
|
{ |
|
"epoch": 24.19, |
|
"learning_rate": 1.0329032258064518e-05, |
|
"loss": 2.2281, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 25.0, |
|
"eval_gen_len": 19.0, |
|
"eval_loss": 2.15854549407959, |
|
"eval_rouge1": 0.1958, |
|
"eval_rouge2": 0.0988, |
|
"eval_rougeL": 0.1685, |
|
"eval_rougeLsum": 0.1687, |
|
"eval_runtime": 17.1171, |
|
"eval_samples_per_second": 14.488, |
|
"eval_steps_per_second": 0.935, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 26.0, |
|
"eval_gen_len": 19.0, |
|
"eval_loss": 2.1557765007019043, |
|
"eval_rouge1": 0.197, |
|
"eval_rouge2": 0.1, |
|
"eval_rougeL": 0.1698, |
|
"eval_rougeLsum": 0.1699, |
|
"eval_runtime": 16.8981, |
|
"eval_samples_per_second": 14.676, |
|
"eval_steps_per_second": 0.947, |
|
"step": 1612 |
|
}, |
|
{ |
|
"epoch": 27.0, |
|
"eval_gen_len": 19.0, |
|
"eval_loss": 2.1530044078826904, |
|
"eval_rouge1": 0.196, |
|
"eval_rouge2": 0.0994, |
|
"eval_rougeL": 0.1685, |
|
"eval_rougeLsum": 0.1687, |
|
"eval_runtime": 16.9499, |
|
"eval_samples_per_second": 14.631, |
|
"eval_steps_per_second": 0.944, |
|
"step": 1674 |
|
}, |
|
{ |
|
"epoch": 28.0, |
|
"eval_gen_len": 19.0, |
|
"eval_loss": 2.1497113704681396, |
|
"eval_rouge1": 0.1971, |
|
"eval_rouge2": 0.101, |
|
"eval_rougeL": 0.1697, |
|
"eval_rougeLsum": 0.1699, |
|
"eval_runtime": 16.8885, |
|
"eval_samples_per_second": 14.685, |
|
"eval_steps_per_second": 0.947, |
|
"step": 1736 |
|
}, |
|
{ |
|
"epoch": 29.0, |
|
"eval_gen_len": 19.0, |
|
"eval_loss": 2.1459004878997803, |
|
"eval_rouge1": 0.1972, |
|
"eval_rouge2": 0.1008, |
|
"eval_rougeL": 0.17, |
|
"eval_rougeLsum": 0.1701, |
|
"eval_runtime": 17.5571, |
|
"eval_samples_per_second": 14.125, |
|
"eval_steps_per_second": 0.911, |
|
"step": 1798 |
|
}, |
|
{ |
|
"epoch": 30.0, |
|
"eval_gen_len": 19.0, |
|
"eval_loss": 2.142939805984497, |
|
"eval_rouge1": 0.1946, |
|
"eval_rouge2": 0.0989, |
|
"eval_rougeL": 0.1677, |
|
"eval_rougeLsum": 0.1678, |
|
"eval_runtime": 17.4108, |
|
"eval_samples_per_second": 14.244, |
|
"eval_steps_per_second": 0.919, |
|
"step": 1860 |
|
}, |
|
{ |
|
"epoch": 31.0, |
|
"eval_gen_len": 19.0, |
|
"eval_loss": 2.14223051071167, |
|
"eval_rouge1": 0.1958, |
|
"eval_rouge2": 0.1, |
|
"eval_rougeL": 0.1691, |
|
"eval_rougeLsum": 0.1692, |
|
"eval_runtime": 17.3596, |
|
"eval_samples_per_second": 14.286, |
|
"eval_steps_per_second": 0.922, |
|
"step": 1922 |
|
}, |
|
{ |
|
"epoch": 32.0, |
|
"eval_gen_len": 19.0, |
|
"eval_loss": 2.139946699142456, |
|
"eval_rouge1": 0.1952, |
|
"eval_rouge2": 0.0992, |
|
"eval_rougeL": 0.1687, |
|
"eval_rougeLsum": 0.1687, |
|
"eval_runtime": 17.1971, |
|
"eval_samples_per_second": 14.421, |
|
"eval_steps_per_second": 0.93, |
|
"step": 1984 |
|
}, |
|
{ |
|
"epoch": 32.26, |
|
"learning_rate": 7.103225806451613e-06, |
|
"loss": 2.1696, |
|
"step": 2000 |
|
} |
|
], |
|
"max_steps": 3100, |
|
"num_train_epochs": 50, |
|
"total_flos": 8635889668325376.0, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|