pingusPongus / last-checkpoint /trainer_state.json
dq158's picture
Training in progress, epoch 11, checkpoint
ca888f2
raw
history blame
9.6 kB
{
"best_metric": 2.214895725250244,
"best_model_checkpoint": "dq158/pingusPongus/checkpoint-15810",
"epoch": 11.0,
"eval_steps": 500,
"global_step": 17391,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.32,
"learning_rate": 0.0001,
"loss": 3.1034,
"step": 500
},
{
"epoch": 0.63,
"learning_rate": 9.993631921956987e-05,
"loss": 2.6893,
"step": 1000
},
{
"epoch": 0.95,
"learning_rate": 9.974543908795133e-05,
"loss": 2.5782,
"step": 1500
},
{
"epoch": 1.0,
"eval_bleu": 1.0,
"eval_brevity_penalty": 1.0,
"eval_length_ratio": 1.0,
"eval_loss": 2.409773349761963,
"eval_precisions": [
1.0,
1.0,
1.0,
1.0
],
"eval_reference_length": 52663,
"eval_runtime": 589.2444,
"eval_samples_per_second": 4.771,
"eval_steps_per_second": 0.597,
"eval_translation_length": 52663,
"step": 1581
},
{
"epoch": 1.27,
"learning_rate": 9.942784582097439e-05,
"loss": 2.4939,
"step": 2000
},
{
"epoch": 1.58,
"learning_rate": 9.898434840212306e-05,
"loss": 2.4615,
"step": 2500
},
{
"epoch": 1.9,
"learning_rate": 9.841607652186736e-05,
"loss": 2.4147,
"step": 3000
},
{
"epoch": 2.0,
"eval_bleu": 1.0,
"eval_brevity_penalty": 1.0,
"eval_length_ratio": 1.0,
"eval_loss": 2.320283889770508,
"eval_precisions": [
1.0,
1.0,
1.0,
1.0
],
"eval_reference_length": 53341,
"eval_runtime": 585.7187,
"eval_samples_per_second": 4.799,
"eval_steps_per_second": 0.601,
"eval_translation_length": 53341,
"step": 3162
},
{
"epoch": 2.21,
"learning_rate": 9.772447770008058e-05,
"loss": 2.3609,
"step": 3500
},
{
"epoch": 2.53,
"learning_rate": 9.691131359887135e-05,
"loss": 2.3422,
"step": 4000
},
{
"epoch": 2.85,
"learning_rate": 9.597865553522297e-05,
"loss": 2.3484,
"step": 4500
},
{
"epoch": 3.0,
"eval_bleu": 1.0,
"eval_brevity_penalty": 1.0,
"eval_length_ratio": 1.0,
"eval_loss": 2.2784500122070312,
"eval_precisions": [
1.0,
1.0,
1.0,
1.0
],
"eval_reference_length": 52556,
"eval_runtime": 583.4704,
"eval_samples_per_second": 4.818,
"eval_steps_per_second": 0.603,
"eval_translation_length": 52556,
"step": 4743
},
{
"epoch": 3.16,
"learning_rate": 9.492887920487015e-05,
"loss": 2.2993,
"step": 5000
},
{
"epoch": 3.48,
"learning_rate": 9.376465863085263e-05,
"loss": 2.2738,
"step": 5500
},
{
"epoch": 3.8,
"learning_rate": 9.24889593521603e-05,
"loss": 2.2585,
"step": 6000
},
{
"epoch": 4.0,
"eval_bleu": 1.0,
"eval_brevity_penalty": 1.0,
"eval_length_ratio": 1.0,
"eval_loss": 2.257721424102783,
"eval_precisions": [
1.0,
1.0,
1.0,
1.0
],
"eval_reference_length": 53121,
"eval_runtime": 580.1345,
"eval_samples_per_second": 4.845,
"eval_steps_per_second": 0.607,
"eval_translation_length": 53121,
"step": 6324
},
{
"epoch": 4.11,
"learning_rate": 9.110503086981956e-05,
"loss": 2.2444,
"step": 6500
},
{
"epoch": 4.43,
"learning_rate": 8.961639836966304e-05,
"loss": 2.2152,
"step": 7000
},
{
"epoch": 4.74,
"learning_rate": 8.802685374286608e-05,
"loss": 2.2,
"step": 7500
},
{
"epoch": 5.0,
"eval_bleu": 1.0,
"eval_brevity_penalty": 1.0,
"eval_length_ratio": 1.0,
"eval_loss": 2.240872383117676,
"eval_precisions": [
1.0,
1.0,
1.0,
1.0
],
"eval_reference_length": 52857,
"eval_runtime": 579.7989,
"eval_samples_per_second": 4.848,
"eval_steps_per_second": 0.607,
"eval_translation_length": 52857,
"step": 7905
},
{
"epoch": 5.06,
"learning_rate": 8.63404459271232e-05,
"loss": 2.2021,
"step": 8000
},
{
"epoch": 5.38,
"learning_rate": 8.456147059306759e-05,
"loss": 2.1521,
"step": 8500
},
{
"epoch": 5.69,
"learning_rate": 8.26944592022048e-05,
"loss": 2.1533,
"step": 9000
},
{
"epoch": 6.0,
"eval_bleu": 1.0,
"eval_brevity_penalty": 1.0,
"eval_length_ratio": 1.0,
"eval_loss": 2.2270374298095703,
"eval_precisions": [
1.0,
1.0,
1.0,
1.0
],
"eval_reference_length": 52650,
"eval_runtime": 581.0567,
"eval_samples_per_second": 4.838,
"eval_steps_per_second": 0.606,
"eval_translation_length": 52650,
"step": 9486
},
{
"epoch": 6.01,
"learning_rate": 8.07441674642325e-05,
"loss": 2.1582,
"step": 9500
},
{
"epoch": 6.33,
"learning_rate": 7.871556322314834e-05,
"loss": 2.1203,
"step": 10000
},
{
"epoch": 6.64,
"learning_rate": 7.661381380300255e-05,
"loss": 2.1193,
"step": 10500
},
{
"epoch": 6.96,
"learning_rate": 7.444427284552887e-05,
"loss": 2.106,
"step": 11000
},
{
"epoch": 7.0,
"eval_bleu": 1.0,
"eval_brevity_penalty": 1.0,
"eval_length_ratio": 1.0,
"eval_loss": 2.2215259075164795,
"eval_precisions": [
1.0,
1.0,
1.0,
1.0
],
"eval_reference_length": 53190,
"eval_runtime": 584.4443,
"eval_samples_per_second": 4.81,
"eval_steps_per_second": 0.602,
"eval_translation_length": 53190,
"step": 11067
},
{
"epoch": 7.27,
"learning_rate": 7.221246667318116e-05,
"loss": 2.0653,
"step": 11500
},
{
"epoch": 7.59,
"learning_rate": 6.992408021231241e-05,
"loss": 2.0783,
"step": 12000
},
{
"epoch": 7.91,
"learning_rate": 6.758494251235275e-05,
"loss": 2.0813,
"step": 12500
},
{
"epoch": 8.0,
"eval_bleu": 1.0,
"eval_brevity_penalty": 1.0,
"eval_length_ratio": 1.0,
"eval_loss": 2.219463586807251,
"eval_precisions": [
1.0,
1.0,
1.0,
1.0
],
"eval_reference_length": 52305,
"eval_runtime": 583.0366,
"eval_samples_per_second": 4.821,
"eval_steps_per_second": 0.604,
"eval_translation_length": 52305,
"step": 12648
},
{
"epoch": 8.22,
"learning_rate": 6.520101189787285e-05,
"loss": 2.0482,
"step": 13000
},
{
"epoch": 8.54,
"learning_rate": 6.27783607913536e-05,
"loss": 2.0496,
"step": 13500
},
{
"epoch": 8.86,
"learning_rate": 6.03231602453219e-05,
"loss": 2.0406,
"step": 14000
},
{
"epoch": 9.0,
"eval_bleu": 1.0,
"eval_brevity_penalty": 1.0,
"eval_length_ratio": 1.0,
"eval_loss": 2.215216636657715,
"eval_precisions": [
1.0,
1.0,
1.0,
1.0
],
"eval_reference_length": 52546,
"eval_runtime": 581.8998,
"eval_samples_per_second": 4.831,
"eval_steps_per_second": 0.605,
"eval_translation_length": 52546,
"step": 14229
},
{
"epoch": 9.17,
"learning_rate": 5.784166422325311e-05,
"loss": 2.0341,
"step": 14500
},
{
"epoch": 9.49,
"learning_rate": 5.534019366928e-05,
"loss": 2.0094,
"step": 15000
},
{
"epoch": 9.8,
"learning_rate": 5.282512040728659e-05,
"loss": 2.0032,
"step": 15500
},
{
"epoch": 10.0,
"eval_bleu": 1.0,
"eval_brevity_penalty": 1.0,
"eval_length_ratio": 1.0,
"eval_loss": 2.214895725250244,
"eval_precisions": [
1.0,
1.0,
1.0,
1.0
],
"eval_reference_length": 52363,
"eval_runtime": 584.3355,
"eval_samples_per_second": 4.811,
"eval_steps_per_second": 0.602,
"eval_translation_length": 52363,
"step": 15810
},
{
"epoch": 10.12,
"learning_rate": 5.030285091039936e-05,
"loss": 1.9783,
"step": 16000
},
{
"epoch": 10.44,
"learning_rate": 4.777980998221901e-05,
"loss": 1.9852,
"step": 16500
},
{
"epoch": 10.75,
"learning_rate": 4.5262424391360075e-05,
"loss": 1.9982,
"step": 17000
},
{
"epoch": 11.0,
"eval_bleu": 1.0,
"eval_brevity_penalty": 1.0,
"eval_length_ratio": 1.0,
"eval_loss": 2.2150418758392334,
"eval_precisions": [
1.0,
1.0,
1.0,
1.0
],
"eval_reference_length": 53036,
"eval_runtime": 587.6164,
"eval_samples_per_second": 4.784,
"eval_steps_per_second": 0.599,
"eval_translation_length": 53036,
"step": 17391
}
],
"logging_steps": 500,
"max_steps": 31620,
"num_train_epochs": 20,
"save_steps": 500,
"total_flos": 1.9050018020838605e+17,
"trial_name": null,
"trial_params": null
}