|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 2.0, |
|
"eval_steps": 500, |
|
"global_step": 50582, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.0001, |
|
"loss": 3.3233, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 9.99998927054457e-05, |
|
"loss": 2.8783, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 9.999957082224324e-05, |
|
"loss": 2.6917, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 9.999903435177409e-05, |
|
"loss": 2.7154, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 9.999828329634069e-05, |
|
"loss": 2.7366, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 9.999731765916636e-05, |
|
"loss": 2.7151, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 9.999613744439543e-05, |
|
"loss": 2.6935, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 9.999474265709312e-05, |
|
"loss": 2.6431, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 9.999313330324557e-05, |
|
"loss": 2.6171, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 9.999130938975975e-05, |
|
"loss": 2.6553, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 9.998927092446351e-05, |
|
"loss": 2.5905, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 9.99870179161055e-05, |
|
"loss": 2.5851, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 9.998455037435515e-05, |
|
"loss": 2.7004, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 9.998186830980259e-05, |
|
"loss": 2.5175, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 9.997897173395868e-05, |
|
"loss": 2.5745, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 9.997586065925489e-05, |
|
"loss": 2.515, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 9.997253509904324e-05, |
|
"loss": 2.5784, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 9.996899506759634e-05, |
|
"loss": 2.5426, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 9.996524058010725e-05, |
|
"loss": 2.5677, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 9.996127165268938e-05, |
|
"loss": 2.5871, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 9.995708830237652e-05, |
|
"loss": 2.6239, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 9.995269054712269e-05, |
|
"loss": 2.5366, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 9.994807840580211e-05, |
|
"loss": 2.5484, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 9.994325189820907e-05, |
|
"loss": 2.5512, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 9.99382110450579e-05, |
|
"loss": 2.5573, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 9.993295586798282e-05, |
|
"loss": 2.5584, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 9.992748638953795e-05, |
|
"loss": 2.5176, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 9.992180263319706e-05, |
|
"loss": 2.5322, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 9.991590462335362e-05, |
|
"loss": 2.4881, |
|
"step": 14500 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 9.990979238532059e-05, |
|
"loss": 2.5357, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 9.990346594533036e-05, |
|
"loss": 2.4982, |
|
"step": 15500 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 9.989692533053464e-05, |
|
"loss": 2.5111, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 9.989017056900434e-05, |
|
"loss": 2.5439, |
|
"step": 16500 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 9.988320168972938e-05, |
|
"loss": 2.4806, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 9.987601872261871e-05, |
|
"loss": 2.4715, |
|
"step": 17500 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 9.986862169850007e-05, |
|
"loss": 2.4796, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 9.986101064911984e-05, |
|
"loss": 2.5061, |
|
"step": 18500 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 9.985318560714301e-05, |
|
"loss": 2.4945, |
|
"step": 19000 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 9.984514660615293e-05, |
|
"loss": 2.4611, |
|
"step": 19500 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 9.983689368065128e-05, |
|
"loss": 2.5054, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 9.98284268660578e-05, |
|
"loss": 2.5272, |
|
"step": 20500 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 9.981974619871019e-05, |
|
"loss": 2.4692, |
|
"step": 21000 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 9.981085171586402e-05, |
|
"loss": 2.4757, |
|
"step": 21500 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 9.980174345569246e-05, |
|
"loss": 2.5493, |
|
"step": 22000 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 9.979242145728618e-05, |
|
"loss": 2.4543, |
|
"step": 22500 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 9.978288576065315e-05, |
|
"loss": 2.4532, |
|
"step": 23000 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 9.977313640671853e-05, |
|
"loss": 2.4311, |
|
"step": 23500 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 9.97631734373244e-05, |
|
"loss": 2.4512, |
|
"step": 24000 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 9.975299689522967e-05, |
|
"loss": 2.46, |
|
"step": 24500 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 9.974260682410984e-05, |
|
"loss": 2.5014, |
|
"step": 25000 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_bleu": 1.0, |
|
"eval_brevity_penalty": 1.0, |
|
"eval_length_ratio": 1.0, |
|
"eval_loss": 2.3814520835876465, |
|
"eval_precisions": [ |
|
1.0, |
|
1.0, |
|
1.0, |
|
1.0 |
|
], |
|
"eval_reference_length": 1439232, |
|
"eval_runtime": 2544.152, |
|
"eval_samples_per_second": 1.105, |
|
"eval_steps_per_second": 1.105, |
|
"eval_translation_length": 1439232, |
|
"step": 25291 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 9.973200326855681e-05, |
|
"loss": 2.3745, |
|
"step": 25500 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"learning_rate": 9.972118627407877e-05, |
|
"loss": 2.2548, |
|
"step": 26000 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"learning_rate": 9.971015588709987e-05, |
|
"loss": 2.1978, |
|
"step": 26500 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"learning_rate": 9.969891215496015e-05, |
|
"loss": 2.1931, |
|
"step": 27000 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"learning_rate": 9.968745512591524e-05, |
|
"loss": 2.1611, |
|
"step": 27500 |
|
}, |
|
{ |
|
"epoch": 1.11, |
|
"learning_rate": 9.967578484913623e-05, |
|
"loss": 2.2166, |
|
"step": 28000 |
|
}, |
|
{ |
|
"epoch": 1.13, |
|
"learning_rate": 9.96639013747094e-05, |
|
"loss": 2.1401, |
|
"step": 28500 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"learning_rate": 9.965180475363603e-05, |
|
"loss": 2.1456, |
|
"step": 29000 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"learning_rate": 9.963949503783216e-05, |
|
"loss": 2.2447, |
|
"step": 29500 |
|
}, |
|
{ |
|
"epoch": 1.19, |
|
"learning_rate": 9.962697228012848e-05, |
|
"loss": 2.2138, |
|
"step": 30000 |
|
}, |
|
{ |
|
"epoch": 1.21, |
|
"learning_rate": 9.961423653426987e-05, |
|
"loss": 2.1736, |
|
"step": 30500 |
|
}, |
|
{ |
|
"epoch": 1.23, |
|
"learning_rate": 9.96012878549154e-05, |
|
"loss": 2.2721, |
|
"step": 31000 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"learning_rate": 9.958812629763798e-05, |
|
"loss": 2.1877, |
|
"step": 31500 |
|
}, |
|
{ |
|
"epoch": 1.27, |
|
"learning_rate": 9.957475191892413e-05, |
|
"loss": 2.1888, |
|
"step": 32000 |
|
}, |
|
{ |
|
"epoch": 1.29, |
|
"learning_rate": 9.956116477617382e-05, |
|
"loss": 2.1881, |
|
"step": 32500 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"learning_rate": 9.954736492770007e-05, |
|
"loss": 2.22, |
|
"step": 33000 |
|
}, |
|
{ |
|
"epoch": 1.32, |
|
"learning_rate": 9.95333524327288e-05, |
|
"loss": 2.1357, |
|
"step": 33500 |
|
}, |
|
{ |
|
"epoch": 1.34, |
|
"learning_rate": 9.951912735139864e-05, |
|
"loss": 2.2336, |
|
"step": 34000 |
|
}, |
|
{ |
|
"epoch": 1.36, |
|
"learning_rate": 9.950468974476051e-05, |
|
"loss": 2.1933, |
|
"step": 34500 |
|
}, |
|
{ |
|
"epoch": 1.38, |
|
"learning_rate": 9.949003967477749e-05, |
|
"loss": 2.2787, |
|
"step": 35000 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"learning_rate": 9.947517720432445e-05, |
|
"loss": 2.1992, |
|
"step": 35500 |
|
}, |
|
{ |
|
"epoch": 1.42, |
|
"learning_rate": 9.946010239718791e-05, |
|
"loss": 2.2127, |
|
"step": 36000 |
|
}, |
|
{ |
|
"epoch": 1.44, |
|
"learning_rate": 9.944481531806567e-05, |
|
"loss": 2.1832, |
|
"step": 36500 |
|
}, |
|
{ |
|
"epoch": 1.46, |
|
"learning_rate": 9.942931603256652e-05, |
|
"loss": 2.226, |
|
"step": 37000 |
|
}, |
|
{ |
|
"epoch": 1.48, |
|
"learning_rate": 9.941360460721e-05, |
|
"loss": 2.2111, |
|
"step": 37500 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"learning_rate": 9.939768110942617e-05, |
|
"loss": 2.233, |
|
"step": 38000 |
|
}, |
|
{ |
|
"epoch": 1.52, |
|
"learning_rate": 9.938154560755516e-05, |
|
"loss": 2.2399, |
|
"step": 38500 |
|
}, |
|
{ |
|
"epoch": 1.54, |
|
"learning_rate": 9.93651981708471e-05, |
|
"loss": 2.2297, |
|
"step": 39000 |
|
}, |
|
{ |
|
"epoch": 1.56, |
|
"learning_rate": 9.934863886946158e-05, |
|
"loss": 2.2551, |
|
"step": 39500 |
|
}, |
|
{ |
|
"epoch": 1.58, |
|
"learning_rate": 9.93318677744675e-05, |
|
"loss": 2.1899, |
|
"step": 40000 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"learning_rate": 9.931488495784278e-05, |
|
"loss": 2.118, |
|
"step": 40500 |
|
}, |
|
{ |
|
"epoch": 1.62, |
|
"learning_rate": 9.929769049247397e-05, |
|
"loss": 2.2226, |
|
"step": 41000 |
|
}, |
|
{ |
|
"epoch": 1.64, |
|
"learning_rate": 9.928028445215595e-05, |
|
"loss": 2.2213, |
|
"step": 41500 |
|
}, |
|
{ |
|
"epoch": 1.66, |
|
"learning_rate": 9.926266691159165e-05, |
|
"loss": 2.1881, |
|
"step": 42000 |
|
}, |
|
{ |
|
"epoch": 1.68, |
|
"learning_rate": 9.924483794639175e-05, |
|
"loss": 2.238, |
|
"step": 42500 |
|
}, |
|
{ |
|
"epoch": 1.7, |
|
"learning_rate": 9.922679763307426e-05, |
|
"loss": 2.2178, |
|
"step": 43000 |
|
}, |
|
{ |
|
"epoch": 1.72, |
|
"learning_rate": 9.920854604906427e-05, |
|
"loss": 2.1311, |
|
"step": 43500 |
|
}, |
|
{ |
|
"epoch": 1.74, |
|
"learning_rate": 9.919008327269361e-05, |
|
"loss": 2.2308, |
|
"step": 44000 |
|
}, |
|
{ |
|
"epoch": 1.76, |
|
"learning_rate": 9.917140938320049e-05, |
|
"loss": 2.2121, |
|
"step": 44500 |
|
}, |
|
{ |
|
"epoch": 1.78, |
|
"learning_rate": 9.91525244607292e-05, |
|
"loss": 2.1756, |
|
"step": 45000 |
|
}, |
|
{ |
|
"epoch": 1.8, |
|
"learning_rate": 9.913342858632968e-05, |
|
"loss": 2.2231, |
|
"step": 45500 |
|
}, |
|
{ |
|
"epoch": 1.82, |
|
"learning_rate": 9.91141218419573e-05, |
|
"loss": 2.197, |
|
"step": 46000 |
|
}, |
|
{ |
|
"epoch": 1.84, |
|
"learning_rate": 9.909460431047235e-05, |
|
"loss": 2.2141, |
|
"step": 46500 |
|
}, |
|
{ |
|
"epoch": 1.86, |
|
"learning_rate": 9.907487607563988e-05, |
|
"loss": 2.2276, |
|
"step": 47000 |
|
}, |
|
{ |
|
"epoch": 1.88, |
|
"learning_rate": 9.905493722212913e-05, |
|
"loss": 2.278, |
|
"step": 47500 |
|
}, |
|
{ |
|
"epoch": 1.9, |
|
"learning_rate": 9.903478783551334e-05, |
|
"loss": 2.2759, |
|
"step": 48000 |
|
}, |
|
{ |
|
"epoch": 1.92, |
|
"learning_rate": 9.90144280022693e-05, |
|
"loss": 2.2556, |
|
"step": 48500 |
|
}, |
|
{ |
|
"epoch": 1.94, |
|
"learning_rate": 9.899385780977695e-05, |
|
"loss": 2.1704, |
|
"step": 49000 |
|
}, |
|
{ |
|
"epoch": 1.96, |
|
"learning_rate": 9.89730773463191e-05, |
|
"loss": 2.2612, |
|
"step": 49500 |
|
}, |
|
{ |
|
"epoch": 1.98, |
|
"learning_rate": 9.895208670108096e-05, |
|
"loss": 2.238, |
|
"step": 50000 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"learning_rate": 9.893088596414981e-05, |
|
"loss": 2.2049, |
|
"step": 50500 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_bleu": 1.0, |
|
"eval_brevity_penalty": 1.0, |
|
"eval_length_ratio": 1.0, |
|
"eval_loss": 2.3368308544158936, |
|
"eval_precisions": [ |
|
1.0, |
|
1.0, |
|
1.0, |
|
1.0 |
|
], |
|
"eval_reference_length": 1439232, |
|
"eval_runtime": 2375.7581, |
|
"eval_samples_per_second": 1.183, |
|
"eval_steps_per_second": 1.183, |
|
"eval_translation_length": 1439232, |
|
"step": 50582 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 758730, |
|
"num_train_epochs": 30, |
|
"save_steps": 500, |
|
"total_flos": 1.1657977248979354e+17, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|