|
{ |
|
"best_metric": 0.6904399394989014, |
|
"best_model_checkpoint": "nllb_200_distilled_600M_ENtoFO_bsz_64_epochs_10lr7e-05/checkpoint-16500", |
|
"epoch": 5.798947842417985, |
|
"eval_steps": 500, |
|
"global_step": 16500, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 1.4e-05, |
|
"loss": 1.6553, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 2.8e-05, |
|
"loss": 1.3075, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 4.2e-05, |
|
"loss": 1.2606, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 5.6e-05, |
|
"loss": 1.2085, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 7e-05, |
|
"loss": 1.1956, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"eval_bleu": 36.3211, |
|
"eval_chrf++": 55.6095, |
|
"eval_gen_len": 17.5404, |
|
"eval_loss": 1.01542067527771, |
|
"eval_runtime": 1596.0286, |
|
"eval_samples_per_second": 4.587, |
|
"eval_steps_per_second": 2.294, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 6.974955277280858e-05, |
|
"loss": 1.1487, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 6.949910554561716e-05, |
|
"loss": 1.1406, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 6.924865831842576e-05, |
|
"loss": 1.1197, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 6.899821109123434e-05, |
|
"loss": 1.0739, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 6.874776386404293e-05, |
|
"loss": 1.0817, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"eval_bleu": 37.9118, |
|
"eval_chrf++": 57.1775, |
|
"eval_gen_len": 17.6499, |
|
"eval_loss": 0.9288437962532043, |
|
"eval_runtime": 1599.8831, |
|
"eval_samples_per_second": 4.576, |
|
"eval_steps_per_second": 2.288, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 6.849731663685151e-05, |
|
"loss": 1.0784, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 6.824686940966009e-05, |
|
"loss": 1.0538, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 6.799642218246869e-05, |
|
"loss": 1.0431, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 6.774597495527727e-05, |
|
"loss": 1.0347, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 6.749552772808586e-05, |
|
"loss": 1.0162, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"eval_bleu": 39.3818, |
|
"eval_chrf++": 58.3559, |
|
"eval_gen_len": 17.6375, |
|
"eval_loss": 0.8730005025863647, |
|
"eval_runtime": 1591.8771, |
|
"eval_samples_per_second": 4.599, |
|
"eval_steps_per_second": 2.3, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 6.724508050089444e-05, |
|
"loss": 0.9995, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 6.699463327370304e-05, |
|
"loss": 0.987, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 6.674418604651162e-05, |
|
"loss": 1.0009, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 6.64937388193202e-05, |
|
"loss": 0.9843, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 6.624329159212879e-05, |
|
"loss": 0.974, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"eval_bleu": 40.0752, |
|
"eval_chrf++": 58.9288, |
|
"eval_gen_len": 17.741, |
|
"eval_loss": 0.8381767272949219, |
|
"eval_runtime": 1584.9965, |
|
"eval_samples_per_second": 4.619, |
|
"eval_steps_per_second": 2.31, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 6.599284436493739e-05, |
|
"loss": 0.9649, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 6.574239713774597e-05, |
|
"loss": 0.9511, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 6.549194991055455e-05, |
|
"loss": 0.9689, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 6.524150268336314e-05, |
|
"loss": 0.9512, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 6.499105545617173e-05, |
|
"loss": 0.9513, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"eval_bleu": 40.6133, |
|
"eval_chrf++": 59.5241, |
|
"eval_gen_len": 17.7642, |
|
"eval_loss": 0.8131038546562195, |
|
"eval_runtime": 1608.2394, |
|
"eval_samples_per_second": 4.552, |
|
"eval_steps_per_second": 2.276, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 6.474060822898032e-05, |
|
"loss": 0.9476, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 6.44901610017889e-05, |
|
"loss": 0.9202, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 6.423971377459748e-05, |
|
"loss": 0.9423, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 6.398926654740608e-05, |
|
"loss": 0.8949, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"learning_rate": 6.373881932021467e-05, |
|
"loss": 0.8405, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"eval_bleu": 40.8822, |
|
"eval_chrf++": 59.9339, |
|
"eval_gen_len": 17.7189, |
|
"eval_loss": 0.7998338937759399, |
|
"eval_runtime": 1592.0637, |
|
"eval_samples_per_second": 4.598, |
|
"eval_steps_per_second": 2.3, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"learning_rate": 6.348837209302325e-05, |
|
"loss": 0.8431, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"learning_rate": 6.323792486583183e-05, |
|
"loss": 0.8383, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"learning_rate": 6.298747763864043e-05, |
|
"loss": 0.838, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 1.19, |
|
"learning_rate": 6.273703041144901e-05, |
|
"loss": 0.8527, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 1.23, |
|
"learning_rate": 6.24865831842576e-05, |
|
"loss": 0.8252, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 1.23, |
|
"eval_bleu": 41.6082, |
|
"eval_chrf++": 60.3254, |
|
"eval_gen_len": 17.7662, |
|
"eval_loss": 0.7859154939651489, |
|
"eval_runtime": 1613.9109, |
|
"eval_samples_per_second": 4.536, |
|
"eval_steps_per_second": 2.268, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 1.27, |
|
"learning_rate": 6.22361359570662e-05, |
|
"loss": 0.836, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"learning_rate": 6.198568872987478e-05, |
|
"loss": 0.8274, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 1.34, |
|
"learning_rate": 6.173524150268336e-05, |
|
"loss": 0.8257, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 1.37, |
|
"learning_rate": 6.148479427549194e-05, |
|
"loss": 0.8301, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 1.41, |
|
"learning_rate": 6.123434704830053e-05, |
|
"loss": 0.8235, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 1.41, |
|
"eval_bleu": 41.9785, |
|
"eval_chrf++": 60.615, |
|
"eval_gen_len": 17.6996, |
|
"eval_loss": 0.7718562483787537, |
|
"eval_runtime": 1591.3834, |
|
"eval_samples_per_second": 4.6, |
|
"eval_steps_per_second": 2.301, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 1.44, |
|
"learning_rate": 6.098389982110912e-05, |
|
"loss": 0.8335, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 1.48, |
|
"learning_rate": 6.073345259391771e-05, |
|
"loss": 0.8184, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 1.51, |
|
"learning_rate": 6.048300536672629e-05, |
|
"loss": 0.8139, |
|
"step": 4300 |
|
}, |
|
{ |
|
"epoch": 1.55, |
|
"learning_rate": 6.0232558139534877e-05, |
|
"loss": 0.8121, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 1.58, |
|
"learning_rate": 5.998211091234346e-05, |
|
"loss": 0.8174, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 1.58, |
|
"eval_bleu": 41.9942, |
|
"eval_chrf++": 60.7015, |
|
"eval_gen_len": 17.7548, |
|
"eval_loss": 0.7601388692855835, |
|
"eval_runtime": 1610.8686, |
|
"eval_samples_per_second": 4.545, |
|
"eval_steps_per_second": 2.273, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 1.62, |
|
"learning_rate": 5.973166368515206e-05, |
|
"loss": 0.8232, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 1.65, |
|
"learning_rate": 5.948121645796064e-05, |
|
"loss": 0.8099, |
|
"step": 4700 |
|
}, |
|
{ |
|
"epoch": 1.69, |
|
"learning_rate": 5.9230769230769225e-05, |
|
"loss": 0.8154, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 1.72, |
|
"learning_rate": 5.898032200357781e-05, |
|
"loss": 0.818, |
|
"step": 4900 |
|
}, |
|
{ |
|
"epoch": 1.76, |
|
"learning_rate": 5.87298747763864e-05, |
|
"loss": 0.7992, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 1.76, |
|
"eval_bleu": 42.3622, |
|
"eval_chrf++": 61.0481, |
|
"eval_gen_len": 17.8145, |
|
"eval_loss": 0.7486168146133423, |
|
"eval_runtime": 1597.5591, |
|
"eval_samples_per_second": 4.583, |
|
"eval_steps_per_second": 2.292, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 1.79, |
|
"learning_rate": 5.847942754919499e-05, |
|
"loss": 0.8087, |
|
"step": 5100 |
|
}, |
|
{ |
|
"epoch": 1.83, |
|
"learning_rate": 5.822898032200357e-05, |
|
"loss": 0.7958, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 1.86, |
|
"learning_rate": 5.7978533094812156e-05, |
|
"loss": 0.8022, |
|
"step": 5300 |
|
}, |
|
{ |
|
"epoch": 1.9, |
|
"learning_rate": 5.7728085867620747e-05, |
|
"loss": 0.7876, |
|
"step": 5400 |
|
}, |
|
{ |
|
"epoch": 1.93, |
|
"learning_rate": 5.747763864042934e-05, |
|
"loss": 0.7915, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 1.93, |
|
"eval_bleu": 42.4851, |
|
"eval_chrf++": 61.1145, |
|
"eval_gen_len": 17.7756, |
|
"eval_loss": 0.7351738810539246, |
|
"eval_runtime": 1598.7533, |
|
"eval_samples_per_second": 4.579, |
|
"eval_steps_per_second": 2.29, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 1.97, |
|
"learning_rate": 5.722719141323792e-05, |
|
"loss": 0.7795, |
|
"step": 5600 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"learning_rate": 5.6976744186046504e-05, |
|
"loss": 0.8015, |
|
"step": 5700 |
|
}, |
|
{ |
|
"epoch": 2.04, |
|
"learning_rate": 5.6726296958855094e-05, |
|
"loss": 0.7219, |
|
"step": 5800 |
|
}, |
|
{ |
|
"epoch": 2.07, |
|
"learning_rate": 5.647584973166368e-05, |
|
"loss": 0.7231, |
|
"step": 5900 |
|
}, |
|
{ |
|
"epoch": 2.11, |
|
"learning_rate": 5.622540250447227e-05, |
|
"loss": 0.718, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 2.11, |
|
"eval_bleu": 42.5957, |
|
"eval_chrf++": 61.1828, |
|
"eval_gen_len": 17.7144, |
|
"eval_loss": 0.7349444627761841, |
|
"eval_runtime": 1593.2386, |
|
"eval_samples_per_second": 4.595, |
|
"eval_steps_per_second": 2.298, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 2.14, |
|
"learning_rate": 5.597495527728085e-05, |
|
"loss": 0.7155, |
|
"step": 6100 |
|
}, |
|
{ |
|
"epoch": 2.18, |
|
"learning_rate": 5.572450805008944e-05, |
|
"loss": 0.7222, |
|
"step": 6200 |
|
}, |
|
{ |
|
"epoch": 2.21, |
|
"learning_rate": 5.5474060822898026e-05, |
|
"loss": 0.7113, |
|
"step": 6300 |
|
}, |
|
{ |
|
"epoch": 2.25, |
|
"learning_rate": 5.522361359570661e-05, |
|
"loss": 0.7067, |
|
"step": 6400 |
|
}, |
|
{ |
|
"epoch": 2.28, |
|
"learning_rate": 5.497316636851521e-05, |
|
"loss": 0.714, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 2.28, |
|
"eval_bleu": 43.1947, |
|
"eval_chrf++": 61.6389, |
|
"eval_gen_len": 17.7485, |
|
"eval_loss": 0.7279652953147888, |
|
"eval_runtime": 1602.3425, |
|
"eval_samples_per_second": 4.569, |
|
"eval_steps_per_second": 2.285, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 2.32, |
|
"learning_rate": 5.472271914132379e-05, |
|
"loss": 0.7284, |
|
"step": 6600 |
|
}, |
|
{ |
|
"epoch": 2.35, |
|
"learning_rate": 5.4472271914132374e-05, |
|
"loss": 0.7106, |
|
"step": 6700 |
|
}, |
|
{ |
|
"epoch": 2.39, |
|
"learning_rate": 5.422182468694096e-05, |
|
"loss": 0.7226, |
|
"step": 6800 |
|
}, |
|
{ |
|
"epoch": 2.43, |
|
"learning_rate": 5.3971377459749555e-05, |
|
"loss": 0.7151, |
|
"step": 6900 |
|
}, |
|
{ |
|
"epoch": 2.46, |
|
"learning_rate": 5.372093023255814e-05, |
|
"loss": 0.7242, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 2.46, |
|
"eval_bleu": 43.0217, |
|
"eval_chrf++": 61.4, |
|
"eval_gen_len": 17.7472, |
|
"eval_loss": 0.7255465984344482, |
|
"eval_runtime": 1596.9259, |
|
"eval_samples_per_second": 4.584, |
|
"eval_steps_per_second": 2.293, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 2.5, |
|
"learning_rate": 5.347048300536672e-05, |
|
"loss": 0.7115, |
|
"step": 7100 |
|
}, |
|
{ |
|
"epoch": 2.53, |
|
"learning_rate": 5.3220035778175306e-05, |
|
"loss": 0.6996, |
|
"step": 7200 |
|
}, |
|
{ |
|
"epoch": 2.57, |
|
"learning_rate": 5.296958855098389e-05, |
|
"loss": 0.7226, |
|
"step": 7300 |
|
}, |
|
{ |
|
"epoch": 2.6, |
|
"learning_rate": 5.2719141323792486e-05, |
|
"loss": 0.7023, |
|
"step": 7400 |
|
}, |
|
{ |
|
"epoch": 2.64, |
|
"learning_rate": 5.246869409660107e-05, |
|
"loss": 0.7035, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 2.64, |
|
"eval_bleu": 42.9886, |
|
"eval_chrf++": 61.5585, |
|
"eval_gen_len": 17.7513, |
|
"eval_loss": 0.7192216515541077, |
|
"eval_runtime": 1608.427, |
|
"eval_samples_per_second": 4.552, |
|
"eval_steps_per_second": 2.276, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 2.67, |
|
"learning_rate": 5.2218246869409654e-05, |
|
"loss": 0.7175, |
|
"step": 7600 |
|
}, |
|
{ |
|
"epoch": 2.71, |
|
"learning_rate": 5.1967799642218244e-05, |
|
"loss": 0.7164, |
|
"step": 7700 |
|
}, |
|
{ |
|
"epoch": 2.74, |
|
"learning_rate": 5.171735241502683e-05, |
|
"loss": 0.703, |
|
"step": 7800 |
|
}, |
|
{ |
|
"epoch": 2.78, |
|
"learning_rate": 5.146690518783542e-05, |
|
"loss": 0.7067, |
|
"step": 7900 |
|
}, |
|
{ |
|
"epoch": 2.81, |
|
"learning_rate": 5.1216457960644e-05, |
|
"loss": 0.7048, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 2.81, |
|
"eval_bleu": 42.9399, |
|
"eval_chrf++": 61.4851, |
|
"eval_gen_len": 17.7067, |
|
"eval_loss": 0.7168448567390442, |
|
"eval_runtime": 1552.4929, |
|
"eval_samples_per_second": 4.716, |
|
"eval_steps_per_second": 2.358, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 2.85, |
|
"learning_rate": 5.096601073345259e-05, |
|
"loss": 0.7127, |
|
"step": 8100 |
|
}, |
|
{ |
|
"epoch": 2.88, |
|
"learning_rate": 5.0715563506261176e-05, |
|
"loss": 0.7091, |
|
"step": 8200 |
|
}, |
|
{ |
|
"epoch": 2.92, |
|
"learning_rate": 5.046511627906976e-05, |
|
"loss": 0.7122, |
|
"step": 8300 |
|
}, |
|
{ |
|
"epoch": 2.95, |
|
"learning_rate": 5.021466905187835e-05, |
|
"loss": 0.6949, |
|
"step": 8400 |
|
}, |
|
{ |
|
"epoch": 2.99, |
|
"learning_rate": 4.996422182468694e-05, |
|
"loss": 0.685, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 2.99, |
|
"eval_bleu": 43.114, |
|
"eval_chrf++": 61.6028, |
|
"eval_gen_len": 17.844, |
|
"eval_loss": 0.7094260454177856, |
|
"eval_runtime": 1415.2591, |
|
"eval_samples_per_second": 5.173, |
|
"eval_steps_per_second": 2.587, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 3.02, |
|
"learning_rate": 4.9713774597495524e-05, |
|
"loss": 0.6618, |
|
"step": 8600 |
|
}, |
|
{ |
|
"epoch": 3.06, |
|
"learning_rate": 4.946332737030411e-05, |
|
"loss": 0.6417, |
|
"step": 8700 |
|
}, |
|
{ |
|
"epoch": 3.09, |
|
"learning_rate": 4.92128801431127e-05, |
|
"loss": 0.65, |
|
"step": 8800 |
|
}, |
|
{ |
|
"epoch": 3.13, |
|
"learning_rate": 4.896243291592129e-05, |
|
"loss": 0.6375, |
|
"step": 8900 |
|
}, |
|
{ |
|
"epoch": 3.16, |
|
"learning_rate": 4.871198568872987e-05, |
|
"loss": 0.632, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 3.16, |
|
"eval_bleu": 43.3779, |
|
"eval_chrf++": 61.8915, |
|
"eval_gen_len": 17.7121, |
|
"eval_loss": 0.7186790108680725, |
|
"eval_runtime": 1408.2967, |
|
"eval_samples_per_second": 5.198, |
|
"eval_steps_per_second": 2.6, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 3.2, |
|
"learning_rate": 4.8461538461538455e-05, |
|
"loss": 0.6434, |
|
"step": 9100 |
|
}, |
|
{ |
|
"epoch": 3.23, |
|
"learning_rate": 4.821109123434704e-05, |
|
"loss": 0.6354, |
|
"step": 9200 |
|
}, |
|
{ |
|
"epoch": 3.27, |
|
"learning_rate": 4.7960644007155636e-05, |
|
"loss": 0.6374, |
|
"step": 9300 |
|
}, |
|
{ |
|
"epoch": 3.3, |
|
"learning_rate": 4.771019677996422e-05, |
|
"loss": 0.6478, |
|
"step": 9400 |
|
}, |
|
{ |
|
"epoch": 3.34, |
|
"learning_rate": 4.74597495527728e-05, |
|
"loss": 0.6444, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 3.34, |
|
"eval_bleu": 43.0761, |
|
"eval_chrf++": 61.6092, |
|
"eval_gen_len": 17.7518, |
|
"eval_loss": 0.7161450982093811, |
|
"eval_runtime": 1412.5727, |
|
"eval_samples_per_second": 5.183, |
|
"eval_steps_per_second": 2.592, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 3.37, |
|
"learning_rate": 4.720930232558139e-05, |
|
"loss": 0.6358, |
|
"step": 9600 |
|
}, |
|
{ |
|
"epoch": 3.41, |
|
"learning_rate": 4.695885509838998e-05, |
|
"loss": 0.6502, |
|
"step": 9700 |
|
}, |
|
{ |
|
"epoch": 3.44, |
|
"learning_rate": 4.670840787119857e-05, |
|
"loss": 0.6376, |
|
"step": 9800 |
|
}, |
|
{ |
|
"epoch": 3.48, |
|
"learning_rate": 4.645796064400715e-05, |
|
"loss": 0.638, |
|
"step": 9900 |
|
}, |
|
{ |
|
"epoch": 3.51, |
|
"learning_rate": 4.6207513416815735e-05, |
|
"loss": 0.6302, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 3.51, |
|
"eval_bleu": 43.4763, |
|
"eval_chrf++": 61.8105, |
|
"eval_gen_len": 17.7754, |
|
"eval_loss": 0.7070448398590088, |
|
"eval_runtime": 1409.243, |
|
"eval_samples_per_second": 5.195, |
|
"eval_steps_per_second": 2.598, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 3.55, |
|
"learning_rate": 4.5957066189624325e-05, |
|
"loss": 0.632, |
|
"step": 10100 |
|
}, |
|
{ |
|
"epoch": 3.58, |
|
"learning_rate": 4.570661896243291e-05, |
|
"loss": 0.6364, |
|
"step": 10200 |
|
}, |
|
{ |
|
"epoch": 3.62, |
|
"learning_rate": 4.54561717352415e-05, |
|
"loss": 0.6466, |
|
"step": 10300 |
|
}, |
|
{ |
|
"epoch": 3.66, |
|
"learning_rate": 4.520572450805009e-05, |
|
"loss": 0.6373, |
|
"step": 10400 |
|
}, |
|
{ |
|
"epoch": 3.69, |
|
"learning_rate": 4.495527728085867e-05, |
|
"loss": 0.6478, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 3.69, |
|
"eval_bleu": 43.725, |
|
"eval_chrf++": 62.0616, |
|
"eval_gen_len": 17.788, |
|
"eval_loss": 0.705007016658783, |
|
"eval_runtime": 1414.2328, |
|
"eval_samples_per_second": 5.177, |
|
"eval_steps_per_second": 2.589, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 3.73, |
|
"learning_rate": 4.470483005366726e-05, |
|
"loss": 0.6516, |
|
"step": 10600 |
|
}, |
|
{ |
|
"epoch": 3.76, |
|
"learning_rate": 4.445438282647585e-05, |
|
"loss": 0.6334, |
|
"step": 10700 |
|
}, |
|
{ |
|
"epoch": 3.8, |
|
"learning_rate": 4.420393559928444e-05, |
|
"loss": 0.6542, |
|
"step": 10800 |
|
}, |
|
{ |
|
"epoch": 3.83, |
|
"learning_rate": 4.395348837209302e-05, |
|
"loss": 0.646, |
|
"step": 10900 |
|
}, |
|
{ |
|
"epoch": 3.87, |
|
"learning_rate": 4.3703041144901605e-05, |
|
"loss": 0.6374, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 3.87, |
|
"eval_bleu": 43.7206, |
|
"eval_chrf++": 62.1048, |
|
"eval_gen_len": 17.7229, |
|
"eval_loss": 0.6963800191879272, |
|
"eval_runtime": 1413.4358, |
|
"eval_samples_per_second": 5.18, |
|
"eval_steps_per_second": 2.59, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 3.9, |
|
"learning_rate": 4.345259391771019e-05, |
|
"loss": 0.6416, |
|
"step": 11100 |
|
}, |
|
{ |
|
"epoch": 3.94, |
|
"learning_rate": 4.3202146690518786e-05, |
|
"loss": 0.6249, |
|
"step": 11200 |
|
}, |
|
{ |
|
"epoch": 3.97, |
|
"learning_rate": 4.295169946332737e-05, |
|
"loss": 0.638, |
|
"step": 11300 |
|
}, |
|
{ |
|
"epoch": 4.01, |
|
"learning_rate": 4.270125223613595e-05, |
|
"loss": 0.6352, |
|
"step": 11400 |
|
}, |
|
{ |
|
"epoch": 4.04, |
|
"learning_rate": 4.2450805008944536e-05, |
|
"loss": 0.5804, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 4.04, |
|
"eval_bleu": 43.8669, |
|
"eval_chrf++": 62.1364, |
|
"eval_gen_len": 17.7865, |
|
"eval_loss": 0.7024260759353638, |
|
"eval_runtime": 1410.9857, |
|
"eval_samples_per_second": 5.189, |
|
"eval_steps_per_second": 2.595, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 4.08, |
|
"learning_rate": 4.220035778175313e-05, |
|
"loss": 0.5799, |
|
"step": 11600 |
|
}, |
|
{ |
|
"epoch": 4.11, |
|
"learning_rate": 4.194991055456172e-05, |
|
"loss": 0.5852, |
|
"step": 11700 |
|
}, |
|
{ |
|
"epoch": 4.15, |
|
"learning_rate": 4.16994633273703e-05, |
|
"loss": 0.5801, |
|
"step": 11800 |
|
}, |
|
{ |
|
"epoch": 4.18, |
|
"learning_rate": 4.1449016100178884e-05, |
|
"loss": 0.5945, |
|
"step": 11900 |
|
}, |
|
{ |
|
"epoch": 4.22, |
|
"learning_rate": 4.1198568872987475e-05, |
|
"loss": 0.5919, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 4.22, |
|
"eval_bleu": 43.6775, |
|
"eval_chrf++": 61.9586, |
|
"eval_gen_len": 17.8369, |
|
"eval_loss": 0.7032192945480347, |
|
"eval_runtime": 1420.6308, |
|
"eval_samples_per_second": 5.153, |
|
"eval_steps_per_second": 2.577, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 4.25, |
|
"learning_rate": 4.094812164579606e-05, |
|
"loss": 0.5928, |
|
"step": 12100 |
|
}, |
|
{ |
|
"epoch": 4.29, |
|
"learning_rate": 4.069767441860465e-05, |
|
"loss": 0.5923, |
|
"step": 12200 |
|
}, |
|
{ |
|
"epoch": 4.32, |
|
"learning_rate": 4.044722719141323e-05, |
|
"loss": 0.5943, |
|
"step": 12300 |
|
}, |
|
{ |
|
"epoch": 4.36, |
|
"learning_rate": 4.019677996422182e-05, |
|
"loss": 0.5905, |
|
"step": 12400 |
|
}, |
|
{ |
|
"epoch": 4.39, |
|
"learning_rate": 3.9946332737030406e-05, |
|
"loss": 0.5879, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 4.39, |
|
"eval_bleu": 43.9642, |
|
"eval_chrf++": 62.2021, |
|
"eval_gen_len": 17.7873, |
|
"eval_loss": 0.7024480104446411, |
|
"eval_runtime": 1420.746, |
|
"eval_samples_per_second": 5.153, |
|
"eval_steps_per_second": 2.577, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 4.43, |
|
"learning_rate": 3.9695885509839e-05, |
|
"loss": 0.5855, |
|
"step": 12600 |
|
}, |
|
{ |
|
"epoch": 4.46, |
|
"learning_rate": 3.944543828264758e-05, |
|
"loss": 0.5855, |
|
"step": 12700 |
|
}, |
|
{ |
|
"epoch": 4.5, |
|
"learning_rate": 3.919499105545617e-05, |
|
"loss": 0.5833, |
|
"step": 12800 |
|
}, |
|
{ |
|
"epoch": 4.53, |
|
"learning_rate": 3.8944543828264754e-05, |
|
"loss": 0.5767, |
|
"step": 12900 |
|
}, |
|
{ |
|
"epoch": 4.57, |
|
"learning_rate": 3.869409660107334e-05, |
|
"loss": 0.5858, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 4.57, |
|
"eval_bleu": 44.027, |
|
"eval_chrf++": 62.2226, |
|
"eval_gen_len": 17.7678, |
|
"eval_loss": 0.6992958784103394, |
|
"eval_runtime": 1410.8973, |
|
"eval_samples_per_second": 5.189, |
|
"eval_steps_per_second": 2.595, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 4.6, |
|
"learning_rate": 3.8443649373881935e-05, |
|
"loss": 0.5775, |
|
"step": 13100 |
|
}, |
|
{ |
|
"epoch": 4.64, |
|
"learning_rate": 3.819320214669052e-05, |
|
"loss": 0.5825, |
|
"step": 13200 |
|
}, |
|
{ |
|
"epoch": 4.67, |
|
"learning_rate": 3.79427549194991e-05, |
|
"loss": 0.5851, |
|
"step": 13300 |
|
}, |
|
{ |
|
"epoch": 4.71, |
|
"learning_rate": 3.7692307692307686e-05, |
|
"loss": 0.5913, |
|
"step": 13400 |
|
}, |
|
{ |
|
"epoch": 4.74, |
|
"learning_rate": 3.744186046511627e-05, |
|
"loss": 0.5877, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 4.74, |
|
"eval_bleu": 44.1426, |
|
"eval_chrf++": 62.3429, |
|
"eval_gen_len": 17.7805, |
|
"eval_loss": 0.6957116723060608, |
|
"eval_runtime": 1414.8882, |
|
"eval_samples_per_second": 5.174, |
|
"eval_steps_per_second": 2.587, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 4.78, |
|
"learning_rate": 3.719141323792487e-05, |
|
"loss": 0.5857, |
|
"step": 13600 |
|
}, |
|
{ |
|
"epoch": 4.81, |
|
"learning_rate": 3.694096601073345e-05, |
|
"loss": 0.5776, |
|
"step": 13700 |
|
}, |
|
{ |
|
"epoch": 4.85, |
|
"learning_rate": 3.6690518783542034e-05, |
|
"loss": 0.5985, |
|
"step": 13800 |
|
}, |
|
{ |
|
"epoch": 4.89, |
|
"learning_rate": 3.644007155635062e-05, |
|
"loss": 0.5855, |
|
"step": 13900 |
|
}, |
|
{ |
|
"epoch": 4.92, |
|
"learning_rate": 3.618962432915921e-05, |
|
"loss": 0.5895, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 4.92, |
|
"eval_bleu": 44.2097, |
|
"eval_chrf++": 62.4158, |
|
"eval_gen_len": 17.7713, |
|
"eval_loss": 0.6944009065628052, |
|
"eval_runtime": 1417.3776, |
|
"eval_samples_per_second": 5.165, |
|
"eval_steps_per_second": 2.583, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 4.96, |
|
"learning_rate": 3.59391771019678e-05, |
|
"loss": 0.581, |
|
"step": 14100 |
|
}, |
|
{ |
|
"epoch": 4.99, |
|
"learning_rate": 3.568872987477638e-05, |
|
"loss": 0.5835, |
|
"step": 14200 |
|
}, |
|
{ |
|
"epoch": 5.03, |
|
"learning_rate": 3.543828264758497e-05, |
|
"loss": 0.5512, |
|
"step": 14300 |
|
}, |
|
{ |
|
"epoch": 5.06, |
|
"learning_rate": 3.5187835420393556e-05, |
|
"loss": 0.5324, |
|
"step": 14400 |
|
}, |
|
{ |
|
"epoch": 5.1, |
|
"learning_rate": 3.4937388193202146e-05, |
|
"loss": 0.5381, |
|
"step": 14500 |
|
}, |
|
{ |
|
"epoch": 5.1, |
|
"eval_bleu": 43.9778, |
|
"eval_chrf++": 62.2087, |
|
"eval_gen_len": 17.8153, |
|
"eval_loss": 0.7013605833053589, |
|
"eval_runtime": 1419.0259, |
|
"eval_samples_per_second": 5.159, |
|
"eval_steps_per_second": 2.58, |
|
"step": 14500 |
|
}, |
|
{ |
|
"epoch": 5.13, |
|
"learning_rate": 3.468694096601073e-05, |
|
"loss": 0.5364, |
|
"step": 14600 |
|
}, |
|
{ |
|
"epoch": 5.17, |
|
"learning_rate": 3.443649373881932e-05, |
|
"loss": 0.543, |
|
"step": 14700 |
|
}, |
|
{ |
|
"epoch": 5.2, |
|
"learning_rate": 3.4186046511627904e-05, |
|
"loss": 0.5432, |
|
"step": 14800 |
|
}, |
|
{ |
|
"epoch": 5.24, |
|
"learning_rate": 3.3935599284436494e-05, |
|
"loss": 0.542, |
|
"step": 14900 |
|
}, |
|
{ |
|
"epoch": 5.27, |
|
"learning_rate": 3.368515205724508e-05, |
|
"loss": 0.5385, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 5.27, |
|
"eval_bleu": 44.1326, |
|
"eval_chrf++": 62.3372, |
|
"eval_gen_len": 17.8174, |
|
"eval_loss": 0.7036887407302856, |
|
"eval_runtime": 1418.4921, |
|
"eval_samples_per_second": 5.161, |
|
"eval_steps_per_second": 2.581, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 5.31, |
|
"learning_rate": 3.343470483005367e-05, |
|
"loss": 0.5467, |
|
"step": 15100 |
|
}, |
|
{ |
|
"epoch": 5.34, |
|
"learning_rate": 3.318425760286225e-05, |
|
"loss": 0.5466, |
|
"step": 15200 |
|
}, |
|
{ |
|
"epoch": 5.38, |
|
"learning_rate": 3.2933810375670835e-05, |
|
"loss": 0.5439, |
|
"step": 15300 |
|
}, |
|
{ |
|
"epoch": 5.41, |
|
"learning_rate": 3.2683363148479426e-05, |
|
"loss": 0.5403, |
|
"step": 15400 |
|
}, |
|
{ |
|
"epoch": 5.45, |
|
"learning_rate": 3.243291592128801e-05, |
|
"loss": 0.5481, |
|
"step": 15500 |
|
}, |
|
{ |
|
"epoch": 5.45, |
|
"eval_bleu": 44.053, |
|
"eval_chrf++": 62.418, |
|
"eval_gen_len": 17.7614, |
|
"eval_loss": 0.6976599097251892, |
|
"eval_runtime": 1417.644, |
|
"eval_samples_per_second": 5.164, |
|
"eval_steps_per_second": 2.582, |
|
"step": 15500 |
|
}, |
|
{ |
|
"epoch": 5.48, |
|
"learning_rate": 3.21824686940966e-05, |
|
"loss": 0.5445, |
|
"step": 15600 |
|
}, |
|
{ |
|
"epoch": 5.52, |
|
"learning_rate": 3.1932021466905183e-05, |
|
"loss": 0.5518, |
|
"step": 15700 |
|
}, |
|
{ |
|
"epoch": 5.55, |
|
"learning_rate": 3.1681574239713774e-05, |
|
"loss": 0.5451, |
|
"step": 15800 |
|
}, |
|
{ |
|
"epoch": 5.59, |
|
"learning_rate": 3.143112701252236e-05, |
|
"loss": 0.5387, |
|
"step": 15900 |
|
}, |
|
{ |
|
"epoch": 5.62, |
|
"learning_rate": 3.118067978533094e-05, |
|
"loss": 0.5473, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 5.62, |
|
"eval_bleu": 44.2406, |
|
"eval_chrf++": 62.4882, |
|
"eval_gen_len": 17.8263, |
|
"eval_loss": 0.6982511878013611, |
|
"eval_runtime": 1415.9788, |
|
"eval_samples_per_second": 5.17, |
|
"eval_steps_per_second": 2.585, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 5.66, |
|
"learning_rate": 3.093023255813953e-05, |
|
"loss": 0.5483, |
|
"step": 16100 |
|
}, |
|
{ |
|
"epoch": 5.69, |
|
"learning_rate": 3.0679785330948115e-05, |
|
"loss": 0.537, |
|
"step": 16200 |
|
}, |
|
{ |
|
"epoch": 5.73, |
|
"learning_rate": 3.0429338103756705e-05, |
|
"loss": 0.5479, |
|
"step": 16300 |
|
}, |
|
{ |
|
"epoch": 5.76, |
|
"learning_rate": 3.0178890876565292e-05, |
|
"loss": 0.5445, |
|
"step": 16400 |
|
}, |
|
{ |
|
"epoch": 5.8, |
|
"learning_rate": 2.992844364937388e-05, |
|
"loss": 0.5466, |
|
"step": 16500 |
|
}, |
|
{ |
|
"epoch": 5.8, |
|
"eval_bleu": 44.2692, |
|
"eval_chrf++": 62.4172, |
|
"eval_gen_len": 17.7783, |
|
"eval_loss": 0.6904399394989014, |
|
"eval_runtime": 1415.8621, |
|
"eval_samples_per_second": 5.171, |
|
"eval_steps_per_second": 2.586, |
|
"step": 16500 |
|
} |
|
], |
|
"logging_steps": 100, |
|
"max_steps": 28450, |
|
"num_train_epochs": 10, |
|
"save_steps": 1500, |
|
"total_flos": 2.2884516243072614e+18, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|