{ "best_metric": 2.214895725250244, "best_model_checkpoint": "dq158/pingusPongus/checkpoint-15810", "epoch": 11.0, "eval_steps": 500, "global_step": 17391, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.32, "learning_rate": 0.0001, "loss": 3.1034, "step": 500 }, { "epoch": 0.63, "learning_rate": 9.993631921956987e-05, "loss": 2.6893, "step": 1000 }, { "epoch": 0.95, "learning_rate": 9.974543908795133e-05, "loss": 2.5782, "step": 1500 }, { "epoch": 1.0, "eval_bleu": 1.0, "eval_brevity_penalty": 1.0, "eval_length_ratio": 1.0, "eval_loss": 2.409773349761963, "eval_precisions": [ 1.0, 1.0, 1.0, 1.0 ], "eval_reference_length": 52663, "eval_runtime": 589.2444, "eval_samples_per_second": 4.771, "eval_steps_per_second": 0.597, "eval_translation_length": 52663, "step": 1581 }, { "epoch": 1.27, "learning_rate": 9.942784582097439e-05, "loss": 2.4939, "step": 2000 }, { "epoch": 1.58, "learning_rate": 9.898434840212306e-05, "loss": 2.4615, "step": 2500 }, { "epoch": 1.9, "learning_rate": 9.841607652186736e-05, "loss": 2.4147, "step": 3000 }, { "epoch": 2.0, "eval_bleu": 1.0, "eval_brevity_penalty": 1.0, "eval_length_ratio": 1.0, "eval_loss": 2.320283889770508, "eval_precisions": [ 1.0, 1.0, 1.0, 1.0 ], "eval_reference_length": 53341, "eval_runtime": 585.7187, "eval_samples_per_second": 4.799, "eval_steps_per_second": 0.601, "eval_translation_length": 53341, "step": 3162 }, { "epoch": 2.21, "learning_rate": 9.772447770008058e-05, "loss": 2.3609, "step": 3500 }, { "epoch": 2.53, "learning_rate": 9.691131359887135e-05, "loss": 2.3422, "step": 4000 }, { "epoch": 2.85, "learning_rate": 9.597865553522297e-05, "loss": 2.3484, "step": 4500 }, { "epoch": 3.0, "eval_bleu": 1.0, "eval_brevity_penalty": 1.0, "eval_length_ratio": 1.0, "eval_loss": 2.2784500122070312, "eval_precisions": [ 1.0, 1.0, 1.0, 1.0 ], "eval_reference_length": 52556, "eval_runtime": 583.4704, "eval_samples_per_second": 4.818, "eval_steps_per_second": 0.603, "eval_translation_length": 52556, "step": 4743 }, { "epoch": 3.16, "learning_rate": 9.492887920487015e-05, "loss": 2.2993, "step": 5000 }, { "epoch": 3.48, "learning_rate": 9.376465863085263e-05, "loss": 2.2738, "step": 5500 }, { "epoch": 3.8, "learning_rate": 9.24889593521603e-05, "loss": 2.2585, "step": 6000 }, { "epoch": 4.0, "eval_bleu": 1.0, "eval_brevity_penalty": 1.0, "eval_length_ratio": 1.0, "eval_loss": 2.257721424102783, "eval_precisions": [ 1.0, 1.0, 1.0, 1.0 ], "eval_reference_length": 53121, "eval_runtime": 580.1345, "eval_samples_per_second": 4.845, "eval_steps_per_second": 0.607, "eval_translation_length": 53121, "step": 6324 }, { "epoch": 4.11, "learning_rate": 9.110503086981956e-05, "loss": 2.2444, "step": 6500 }, { "epoch": 4.43, "learning_rate": 8.961639836966304e-05, "loss": 2.2152, "step": 7000 }, { "epoch": 4.74, "learning_rate": 8.802685374286608e-05, "loss": 2.2, "step": 7500 }, { "epoch": 5.0, "eval_bleu": 1.0, "eval_brevity_penalty": 1.0, "eval_length_ratio": 1.0, "eval_loss": 2.240872383117676, "eval_precisions": [ 1.0, 1.0, 1.0, 1.0 ], "eval_reference_length": 52857, "eval_runtime": 579.7989, "eval_samples_per_second": 4.848, "eval_steps_per_second": 0.607, "eval_translation_length": 52857, "step": 7905 }, { "epoch": 5.06, "learning_rate": 8.63404459271232e-05, "loss": 2.2021, "step": 8000 }, { "epoch": 5.38, "learning_rate": 8.456147059306759e-05, "loss": 2.1521, "step": 8500 }, { "epoch": 5.69, "learning_rate": 8.26944592022048e-05, "loss": 2.1533, "step": 9000 }, { "epoch": 6.0, "eval_bleu": 1.0, "eval_brevity_penalty": 1.0, "eval_length_ratio": 1.0, "eval_loss": 2.2270374298095703, "eval_precisions": [ 1.0, 1.0, 1.0, 1.0 ], "eval_reference_length": 52650, "eval_runtime": 581.0567, "eval_samples_per_second": 4.838, "eval_steps_per_second": 0.606, "eval_translation_length": 52650, "step": 9486 }, { "epoch": 6.01, "learning_rate": 8.07441674642325e-05, "loss": 2.1582, "step": 9500 }, { "epoch": 6.33, "learning_rate": 7.871556322314834e-05, "loss": 2.1203, "step": 10000 }, { "epoch": 6.64, "learning_rate": 7.661381380300255e-05, "loss": 2.1193, "step": 10500 }, { "epoch": 6.96, "learning_rate": 7.444427284552887e-05, "loss": 2.106, "step": 11000 }, { "epoch": 7.0, "eval_bleu": 1.0, "eval_brevity_penalty": 1.0, "eval_length_ratio": 1.0, "eval_loss": 2.2215259075164795, "eval_precisions": [ 1.0, 1.0, 1.0, 1.0 ], "eval_reference_length": 53190, "eval_runtime": 584.4443, "eval_samples_per_second": 4.81, "eval_steps_per_second": 0.602, "eval_translation_length": 53190, "step": 11067 }, { "epoch": 7.27, "learning_rate": 7.221246667318116e-05, "loss": 2.0653, "step": 11500 }, { "epoch": 7.59, "learning_rate": 6.992408021231241e-05, "loss": 2.0783, "step": 12000 }, { "epoch": 7.91, "learning_rate": 6.758494251235275e-05, "loss": 2.0813, "step": 12500 }, { "epoch": 8.0, "eval_bleu": 1.0, "eval_brevity_penalty": 1.0, "eval_length_ratio": 1.0, "eval_loss": 2.219463586807251, "eval_precisions": [ 1.0, 1.0, 1.0, 1.0 ], "eval_reference_length": 52305, "eval_runtime": 583.0366, "eval_samples_per_second": 4.821, "eval_steps_per_second": 0.604, "eval_translation_length": 52305, "step": 12648 }, { "epoch": 8.22, "learning_rate": 6.520101189787285e-05, "loss": 2.0482, "step": 13000 }, { "epoch": 8.54, "learning_rate": 6.27783607913536e-05, "loss": 2.0496, "step": 13500 }, { "epoch": 8.86, "learning_rate": 6.03231602453219e-05, "loss": 2.0406, "step": 14000 }, { "epoch": 9.0, "eval_bleu": 1.0, "eval_brevity_penalty": 1.0, "eval_length_ratio": 1.0, "eval_loss": 2.215216636657715, "eval_precisions": [ 1.0, 1.0, 1.0, 1.0 ], "eval_reference_length": 52546, "eval_runtime": 581.8998, "eval_samples_per_second": 4.831, "eval_steps_per_second": 0.605, "eval_translation_length": 52546, "step": 14229 }, { "epoch": 9.17, "learning_rate": 5.784166422325311e-05, "loss": 2.0341, "step": 14500 }, { "epoch": 9.49, "learning_rate": 5.534019366928e-05, "loss": 2.0094, "step": 15000 }, { "epoch": 9.8, "learning_rate": 5.282512040728659e-05, "loss": 2.0032, "step": 15500 }, { "epoch": 10.0, "eval_bleu": 1.0, "eval_brevity_penalty": 1.0, "eval_length_ratio": 1.0, "eval_loss": 2.214895725250244, "eval_precisions": [ 1.0, 1.0, 1.0, 1.0 ], "eval_reference_length": 52363, "eval_runtime": 584.3355, "eval_samples_per_second": 4.811, "eval_steps_per_second": 0.602, "eval_translation_length": 52363, "step": 15810 }, { "epoch": 10.12, "learning_rate": 5.030285091039936e-05, "loss": 1.9783, "step": 16000 }, { "epoch": 10.44, "learning_rate": 4.777980998221901e-05, "loss": 1.9852, "step": 16500 }, { "epoch": 10.75, "learning_rate": 4.5262424391360075e-05, "loss": 1.9982, "step": 17000 }, { "epoch": 11.0, "eval_bleu": 1.0, "eval_brevity_penalty": 1.0, "eval_length_ratio": 1.0, "eval_loss": 2.2150418758392334, "eval_precisions": [ 1.0, 1.0, 1.0, 1.0 ], "eval_reference_length": 53036, "eval_runtime": 587.6164, "eval_samples_per_second": 4.784, "eval_steps_per_second": 0.599, "eval_translation_length": 53036, "step": 17391 } ], "logging_steps": 500, "max_steps": 31620, "num_train_epochs": 20, "save_steps": 500, "total_flos": 1.9050018020838605e+17, "trial_name": null, "trial_params": null }