|
{ |
|
"best_metric": 2.214895725250244, |
|
"best_model_checkpoint": "dq158/pingusPongus/checkpoint-15810", |
|
"epoch": 11.0, |
|
"eval_steps": 500, |
|
"global_step": 17391, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 0.0001, |
|
"loss": 3.1034, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 9.993631921956987e-05, |
|
"loss": 2.6893, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 9.974543908795133e-05, |
|
"loss": 2.5782, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_bleu": 1.0, |
|
"eval_brevity_penalty": 1.0, |
|
"eval_length_ratio": 1.0, |
|
"eval_loss": 2.409773349761963, |
|
"eval_precisions": [ |
|
1.0, |
|
1.0, |
|
1.0, |
|
1.0 |
|
], |
|
"eval_reference_length": 52663, |
|
"eval_runtime": 589.2444, |
|
"eval_samples_per_second": 4.771, |
|
"eval_steps_per_second": 0.597, |
|
"eval_translation_length": 52663, |
|
"step": 1581 |
|
}, |
|
{ |
|
"epoch": 1.27, |
|
"learning_rate": 9.942784582097439e-05, |
|
"loss": 2.4939, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 1.58, |
|
"learning_rate": 9.898434840212306e-05, |
|
"loss": 2.4615, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 1.9, |
|
"learning_rate": 9.841607652186736e-05, |
|
"loss": 2.4147, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_bleu": 1.0, |
|
"eval_brevity_penalty": 1.0, |
|
"eval_length_ratio": 1.0, |
|
"eval_loss": 2.320283889770508, |
|
"eval_precisions": [ |
|
1.0, |
|
1.0, |
|
1.0, |
|
1.0 |
|
], |
|
"eval_reference_length": 53341, |
|
"eval_runtime": 585.7187, |
|
"eval_samples_per_second": 4.799, |
|
"eval_steps_per_second": 0.601, |
|
"eval_translation_length": 53341, |
|
"step": 3162 |
|
}, |
|
{ |
|
"epoch": 2.21, |
|
"learning_rate": 9.772447770008058e-05, |
|
"loss": 2.3609, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 2.53, |
|
"learning_rate": 9.691131359887135e-05, |
|
"loss": 2.3422, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 2.85, |
|
"learning_rate": 9.597865553522297e-05, |
|
"loss": 2.3484, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_bleu": 1.0, |
|
"eval_brevity_penalty": 1.0, |
|
"eval_length_ratio": 1.0, |
|
"eval_loss": 2.2784500122070312, |
|
"eval_precisions": [ |
|
1.0, |
|
1.0, |
|
1.0, |
|
1.0 |
|
], |
|
"eval_reference_length": 52556, |
|
"eval_runtime": 583.4704, |
|
"eval_samples_per_second": 4.818, |
|
"eval_steps_per_second": 0.603, |
|
"eval_translation_length": 52556, |
|
"step": 4743 |
|
}, |
|
{ |
|
"epoch": 3.16, |
|
"learning_rate": 9.492887920487015e-05, |
|
"loss": 2.2993, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 3.48, |
|
"learning_rate": 9.376465863085263e-05, |
|
"loss": 2.2738, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 3.8, |
|
"learning_rate": 9.24889593521603e-05, |
|
"loss": 2.2585, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_bleu": 1.0, |
|
"eval_brevity_penalty": 1.0, |
|
"eval_length_ratio": 1.0, |
|
"eval_loss": 2.257721424102783, |
|
"eval_precisions": [ |
|
1.0, |
|
1.0, |
|
1.0, |
|
1.0 |
|
], |
|
"eval_reference_length": 53121, |
|
"eval_runtime": 580.1345, |
|
"eval_samples_per_second": 4.845, |
|
"eval_steps_per_second": 0.607, |
|
"eval_translation_length": 53121, |
|
"step": 6324 |
|
}, |
|
{ |
|
"epoch": 4.11, |
|
"learning_rate": 9.110503086981956e-05, |
|
"loss": 2.2444, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 4.43, |
|
"learning_rate": 8.961639836966304e-05, |
|
"loss": 2.2152, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 4.74, |
|
"learning_rate": 8.802685374286608e-05, |
|
"loss": 2.2, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_bleu": 1.0, |
|
"eval_brevity_penalty": 1.0, |
|
"eval_length_ratio": 1.0, |
|
"eval_loss": 2.240872383117676, |
|
"eval_precisions": [ |
|
1.0, |
|
1.0, |
|
1.0, |
|
1.0 |
|
], |
|
"eval_reference_length": 52857, |
|
"eval_runtime": 579.7989, |
|
"eval_samples_per_second": 4.848, |
|
"eval_steps_per_second": 0.607, |
|
"eval_translation_length": 52857, |
|
"step": 7905 |
|
}, |
|
{ |
|
"epoch": 5.06, |
|
"learning_rate": 8.63404459271232e-05, |
|
"loss": 2.2021, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 5.38, |
|
"learning_rate": 8.456147059306759e-05, |
|
"loss": 2.1521, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 5.69, |
|
"learning_rate": 8.26944592022048e-05, |
|
"loss": 2.1533, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_bleu": 1.0, |
|
"eval_brevity_penalty": 1.0, |
|
"eval_length_ratio": 1.0, |
|
"eval_loss": 2.2270374298095703, |
|
"eval_precisions": [ |
|
1.0, |
|
1.0, |
|
1.0, |
|
1.0 |
|
], |
|
"eval_reference_length": 52650, |
|
"eval_runtime": 581.0567, |
|
"eval_samples_per_second": 4.838, |
|
"eval_steps_per_second": 0.606, |
|
"eval_translation_length": 52650, |
|
"step": 9486 |
|
}, |
|
{ |
|
"epoch": 6.01, |
|
"learning_rate": 8.07441674642325e-05, |
|
"loss": 2.1582, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 6.33, |
|
"learning_rate": 7.871556322314834e-05, |
|
"loss": 2.1203, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 6.64, |
|
"learning_rate": 7.661381380300255e-05, |
|
"loss": 2.1193, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 6.96, |
|
"learning_rate": 7.444427284552887e-05, |
|
"loss": 2.106, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_bleu": 1.0, |
|
"eval_brevity_penalty": 1.0, |
|
"eval_length_ratio": 1.0, |
|
"eval_loss": 2.2215259075164795, |
|
"eval_precisions": [ |
|
1.0, |
|
1.0, |
|
1.0, |
|
1.0 |
|
], |
|
"eval_reference_length": 53190, |
|
"eval_runtime": 584.4443, |
|
"eval_samples_per_second": 4.81, |
|
"eval_steps_per_second": 0.602, |
|
"eval_translation_length": 53190, |
|
"step": 11067 |
|
}, |
|
{ |
|
"epoch": 7.27, |
|
"learning_rate": 7.221246667318116e-05, |
|
"loss": 2.0653, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 7.59, |
|
"learning_rate": 6.992408021231241e-05, |
|
"loss": 2.0783, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 7.91, |
|
"learning_rate": 6.758494251235275e-05, |
|
"loss": 2.0813, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_bleu": 1.0, |
|
"eval_brevity_penalty": 1.0, |
|
"eval_length_ratio": 1.0, |
|
"eval_loss": 2.219463586807251, |
|
"eval_precisions": [ |
|
1.0, |
|
1.0, |
|
1.0, |
|
1.0 |
|
], |
|
"eval_reference_length": 52305, |
|
"eval_runtime": 583.0366, |
|
"eval_samples_per_second": 4.821, |
|
"eval_steps_per_second": 0.604, |
|
"eval_translation_length": 52305, |
|
"step": 12648 |
|
}, |
|
{ |
|
"epoch": 8.22, |
|
"learning_rate": 6.520101189787285e-05, |
|
"loss": 2.0482, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 8.54, |
|
"learning_rate": 6.27783607913536e-05, |
|
"loss": 2.0496, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 8.86, |
|
"learning_rate": 6.03231602453219e-05, |
|
"loss": 2.0406, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_bleu": 1.0, |
|
"eval_brevity_penalty": 1.0, |
|
"eval_length_ratio": 1.0, |
|
"eval_loss": 2.215216636657715, |
|
"eval_precisions": [ |
|
1.0, |
|
1.0, |
|
1.0, |
|
1.0 |
|
], |
|
"eval_reference_length": 52546, |
|
"eval_runtime": 581.8998, |
|
"eval_samples_per_second": 4.831, |
|
"eval_steps_per_second": 0.605, |
|
"eval_translation_length": 52546, |
|
"step": 14229 |
|
}, |
|
{ |
|
"epoch": 9.17, |
|
"learning_rate": 5.784166422325311e-05, |
|
"loss": 2.0341, |
|
"step": 14500 |
|
}, |
|
{ |
|
"epoch": 9.49, |
|
"learning_rate": 5.534019366928e-05, |
|
"loss": 2.0094, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 9.8, |
|
"learning_rate": 5.282512040728659e-05, |
|
"loss": 2.0032, |
|
"step": 15500 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_bleu": 1.0, |
|
"eval_brevity_penalty": 1.0, |
|
"eval_length_ratio": 1.0, |
|
"eval_loss": 2.214895725250244, |
|
"eval_precisions": [ |
|
1.0, |
|
1.0, |
|
1.0, |
|
1.0 |
|
], |
|
"eval_reference_length": 52363, |
|
"eval_runtime": 584.3355, |
|
"eval_samples_per_second": 4.811, |
|
"eval_steps_per_second": 0.602, |
|
"eval_translation_length": 52363, |
|
"step": 15810 |
|
}, |
|
{ |
|
"epoch": 10.12, |
|
"learning_rate": 5.030285091039936e-05, |
|
"loss": 1.9783, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 10.44, |
|
"learning_rate": 4.777980998221901e-05, |
|
"loss": 1.9852, |
|
"step": 16500 |
|
}, |
|
{ |
|
"epoch": 10.75, |
|
"learning_rate": 4.5262424391360075e-05, |
|
"loss": 1.9982, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"eval_bleu": 1.0, |
|
"eval_brevity_penalty": 1.0, |
|
"eval_length_ratio": 1.0, |
|
"eval_loss": 2.2150418758392334, |
|
"eval_precisions": [ |
|
1.0, |
|
1.0, |
|
1.0, |
|
1.0 |
|
], |
|
"eval_reference_length": 53036, |
|
"eval_runtime": 587.6164, |
|
"eval_samples_per_second": 4.784, |
|
"eval_steps_per_second": 0.599, |
|
"eval_translation_length": 53036, |
|
"step": 17391 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 31620, |
|
"num_train_epochs": 20, |
|
"save_steps": 500, |
|
"total_flos": 1.9050018020838605e+17, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|