|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 5.0, |
|
"global_step": 23430, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 4.893299189073837e-05, |
|
"loss": 0.7701, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 4.786598378147674e-05, |
|
"loss": 0.1282, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 4.6798975672215114e-05, |
|
"loss": 0.0914, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 4.5731967562953484e-05, |
|
"loss": 0.0791, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 4.4664959453691854e-05, |
|
"loss": 0.0656, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 4.359795134443022e-05, |
|
"loss": 0.0645, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 4.253094323516859e-05, |
|
"loss": 0.0573, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 4.146393512590696e-05, |
|
"loss": 0.0566, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 4.039692701664533e-05, |
|
"loss": 0.0544, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_bleu": 25.989, |
|
"eval_gen_len": 18.8655, |
|
"eval_loss": 0.044239919632673264, |
|
"eval_runtime": 180.9818, |
|
"eval_samples_per_second": 5.752, |
|
"eval_steps_per_second": 2.879, |
|
"step": 4686 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"learning_rate": 3.93299189073837e-05, |
|
"loss": 0.0476, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"learning_rate": 3.826291079812207e-05, |
|
"loss": 0.0442, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"learning_rate": 3.719590268886043e-05, |
|
"loss": 0.0375, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 1.39, |
|
"learning_rate": 3.6128894579598804e-05, |
|
"loss": 0.0408, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 1.49, |
|
"learning_rate": 3.5061886470337174e-05, |
|
"loss": 0.0351, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"learning_rate": 3.3994878361075544e-05, |
|
"loss": 0.0355, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 1.71, |
|
"learning_rate": 3.2927870251813915e-05, |
|
"loss": 0.0379, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 1.81, |
|
"learning_rate": 3.1860862142552285e-05, |
|
"loss": 0.0331, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 1.92, |
|
"learning_rate": 3.0793854033290656e-05, |
|
"loss": 0.0367, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_bleu": 25.9358, |
|
"eval_gen_len": 18.8713, |
|
"eval_loss": 0.03710315003991127, |
|
"eval_runtime": 179.552, |
|
"eval_samples_per_second": 5.798, |
|
"eval_steps_per_second": 2.902, |
|
"step": 9372 |
|
}, |
|
{ |
|
"epoch": 2.03, |
|
"learning_rate": 2.9726845924029023e-05, |
|
"loss": 0.0307, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 2.13, |
|
"learning_rate": 2.8659837814767393e-05, |
|
"loss": 0.0218, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 2.24, |
|
"learning_rate": 2.7592829705505763e-05, |
|
"loss": 0.0234, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 2.35, |
|
"learning_rate": 2.6525821596244134e-05, |
|
"loss": 0.0265, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 2.45, |
|
"learning_rate": 2.54588134869825e-05, |
|
"loss": 0.0239, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 2.56, |
|
"learning_rate": 2.439180537772087e-05, |
|
"loss": 0.0261, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 2.67, |
|
"learning_rate": 2.332479726845924e-05, |
|
"loss": 0.0247, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 2.77, |
|
"learning_rate": 2.2257789159197612e-05, |
|
"loss": 0.0228, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 2.88, |
|
"learning_rate": 2.1190781049935982e-05, |
|
"loss": 0.0233, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 2.99, |
|
"learning_rate": 2.0123772940674353e-05, |
|
"loss": 0.0222, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_bleu": 25.8976, |
|
"eval_gen_len": 18.8694, |
|
"eval_loss": 0.037382081151008606, |
|
"eval_runtime": 178.2936, |
|
"eval_samples_per_second": 5.839, |
|
"eval_steps_per_second": 2.922, |
|
"step": 14058 |
|
}, |
|
{ |
|
"epoch": 3.09, |
|
"learning_rate": 1.905676483141272e-05, |
|
"loss": 0.0156, |
|
"step": 14500 |
|
}, |
|
{ |
|
"epoch": 3.2, |
|
"learning_rate": 1.7989756722151087e-05, |
|
"loss": 0.0153, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 3.31, |
|
"learning_rate": 1.6922748612889457e-05, |
|
"loss": 0.0174, |
|
"step": 15500 |
|
}, |
|
{ |
|
"epoch": 3.41, |
|
"learning_rate": 1.5855740503627827e-05, |
|
"loss": 0.0171, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 3.52, |
|
"learning_rate": 1.4788732394366198e-05, |
|
"loss": 0.0176, |
|
"step": 16500 |
|
}, |
|
{ |
|
"epoch": 3.63, |
|
"learning_rate": 1.3721724285104568e-05, |
|
"loss": 0.0187, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 3.73, |
|
"learning_rate": 1.2654716175842937e-05, |
|
"loss": 0.0157, |
|
"step": 17500 |
|
}, |
|
{ |
|
"epoch": 3.84, |
|
"learning_rate": 1.1587708066581307e-05, |
|
"loss": 0.0169, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 3.95, |
|
"learning_rate": 1.0520699957319676e-05, |
|
"loss": 0.0152, |
|
"step": 18500 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_bleu": 26.1575, |
|
"eval_gen_len": 18.8694, |
|
"eval_loss": 0.040931396186351776, |
|
"eval_runtime": 182.3446, |
|
"eval_samples_per_second": 5.709, |
|
"eval_steps_per_second": 2.857, |
|
"step": 18744 |
|
}, |
|
{ |
|
"epoch": 4.05, |
|
"learning_rate": 9.453691848058044e-06, |
|
"loss": 0.0148, |
|
"step": 19000 |
|
}, |
|
{ |
|
"epoch": 4.16, |
|
"learning_rate": 8.386683738796415e-06, |
|
"loss": 0.0124, |
|
"step": 19500 |
|
}, |
|
{ |
|
"epoch": 4.27, |
|
"learning_rate": 7.319675629534785e-06, |
|
"loss": 0.0148, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 4.37, |
|
"learning_rate": 6.252667520273155e-06, |
|
"loss": 0.0123, |
|
"step": 20500 |
|
}, |
|
{ |
|
"epoch": 4.48, |
|
"learning_rate": 5.1856594110115235e-06, |
|
"loss": 0.0139, |
|
"step": 21000 |
|
}, |
|
{ |
|
"epoch": 4.59, |
|
"learning_rate": 4.118651301749894e-06, |
|
"loss": 0.0116, |
|
"step": 21500 |
|
}, |
|
{ |
|
"epoch": 4.69, |
|
"learning_rate": 3.051643192488263e-06, |
|
"loss": 0.014, |
|
"step": 22000 |
|
}, |
|
{ |
|
"epoch": 4.8, |
|
"learning_rate": 1.9846350832266325e-06, |
|
"loss": 0.0143, |
|
"step": 22500 |
|
}, |
|
{ |
|
"epoch": 4.91, |
|
"learning_rate": 9.176269739650021e-07, |
|
"loss": 0.0147, |
|
"step": 23000 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_bleu": 26.0973, |
|
"eval_gen_len": 18.8694, |
|
"eval_loss": 0.043235890567302704, |
|
"eval_runtime": 184.0328, |
|
"eval_samples_per_second": 5.657, |
|
"eval_steps_per_second": 2.831, |
|
"step": 23430 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"step": 23430, |
|
"total_flos": 2019762272885760.0, |
|
"train_loss": 0.047856709064706696, |
|
"train_runtime": 5577.5722, |
|
"train_samples_per_second": 8.401, |
|
"train_steps_per_second": 4.201 |
|
} |
|
], |
|
"max_steps": 23430, |
|
"num_train_epochs": 5, |
|
"total_flos": 2019762272885760.0, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|