{ "best_metric": null, "best_model_checkpoint": null, "epoch": 2.0, "eval_steps": 500, "global_step": 50582, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.02, "learning_rate": 0.0001, "loss": 3.3233, "step": 500 }, { "epoch": 0.04, "learning_rate": 9.99998927054457e-05, "loss": 2.8783, "step": 1000 }, { "epoch": 0.06, "learning_rate": 9.999957082224324e-05, "loss": 2.6917, "step": 1500 }, { "epoch": 0.08, "learning_rate": 9.999903435177409e-05, "loss": 2.7154, "step": 2000 }, { "epoch": 0.1, "learning_rate": 9.999828329634069e-05, "loss": 2.7366, "step": 2500 }, { "epoch": 0.12, "learning_rate": 9.999731765916636e-05, "loss": 2.7151, "step": 3000 }, { "epoch": 0.14, "learning_rate": 9.999613744439543e-05, "loss": 2.6935, "step": 3500 }, { "epoch": 0.16, "learning_rate": 9.999474265709312e-05, "loss": 2.6431, "step": 4000 }, { "epoch": 0.18, "learning_rate": 9.999313330324557e-05, "loss": 2.6171, "step": 4500 }, { "epoch": 0.2, "learning_rate": 9.999130938975975e-05, "loss": 2.6553, "step": 5000 }, { "epoch": 0.22, "learning_rate": 9.998927092446351e-05, "loss": 2.5905, "step": 5500 }, { "epoch": 0.24, "learning_rate": 9.99870179161055e-05, "loss": 2.5851, "step": 6000 }, { "epoch": 0.26, "learning_rate": 9.998455037435515e-05, "loss": 2.7004, "step": 6500 }, { "epoch": 0.28, "learning_rate": 9.998186830980259e-05, "loss": 2.5175, "step": 7000 }, { "epoch": 0.3, "learning_rate": 9.997897173395868e-05, "loss": 2.5745, "step": 7500 }, { "epoch": 0.32, "learning_rate": 9.997586065925489e-05, "loss": 2.515, "step": 8000 }, { "epoch": 0.34, "learning_rate": 9.997253509904324e-05, "loss": 2.5784, "step": 8500 }, { "epoch": 0.36, "learning_rate": 9.996899506759634e-05, "loss": 2.5426, "step": 9000 }, { "epoch": 0.38, "learning_rate": 9.996524058010725e-05, "loss": 2.5677, "step": 9500 }, { "epoch": 0.4, "learning_rate": 9.996127165268938e-05, "loss": 2.5871, "step": 10000 }, { "epoch": 0.42, "learning_rate": 9.995708830237652e-05, "loss": 2.6239, "step": 10500 }, { "epoch": 0.43, "learning_rate": 9.995269054712269e-05, "loss": 2.5366, "step": 11000 }, { "epoch": 0.45, "learning_rate": 9.994807840580211e-05, "loss": 2.5484, "step": 11500 }, { "epoch": 0.47, "learning_rate": 9.994325189820907e-05, "loss": 2.5512, "step": 12000 }, { "epoch": 0.49, "learning_rate": 9.99382110450579e-05, "loss": 2.5573, "step": 12500 }, { "epoch": 0.51, "learning_rate": 9.993295586798282e-05, "loss": 2.5584, "step": 13000 }, { "epoch": 0.53, "learning_rate": 9.992748638953795e-05, "loss": 2.5176, "step": 13500 }, { "epoch": 0.55, "learning_rate": 9.992180263319706e-05, "loss": 2.5322, "step": 14000 }, { "epoch": 0.57, "learning_rate": 9.991590462335362e-05, "loss": 2.4881, "step": 14500 }, { "epoch": 0.59, "learning_rate": 9.990979238532059e-05, "loss": 2.5357, "step": 15000 }, { "epoch": 0.61, "learning_rate": 9.990346594533036e-05, "loss": 2.4982, "step": 15500 }, { "epoch": 0.63, "learning_rate": 9.989692533053464e-05, "loss": 2.5111, "step": 16000 }, { "epoch": 0.65, "learning_rate": 9.989017056900434e-05, "loss": 2.5439, "step": 16500 }, { "epoch": 0.67, "learning_rate": 9.988320168972938e-05, "loss": 2.4806, "step": 17000 }, { "epoch": 0.69, "learning_rate": 9.987601872261871e-05, "loss": 2.4715, "step": 17500 }, { "epoch": 0.71, "learning_rate": 9.986862169850007e-05, "loss": 2.4796, "step": 18000 }, { "epoch": 0.73, "learning_rate": 9.986101064911984e-05, "loss": 2.5061, "step": 18500 }, { "epoch": 0.75, "learning_rate": 9.985318560714301e-05, "loss": 2.4945, "step": 19000 }, { "epoch": 0.77, "learning_rate": 9.984514660615293e-05, "loss": 2.4611, "step": 19500 }, { "epoch": 0.79, "learning_rate": 9.983689368065128e-05, "loss": 2.5054, "step": 20000 }, { "epoch": 0.81, "learning_rate": 9.98284268660578e-05, "loss": 2.5272, "step": 20500 }, { "epoch": 0.83, "learning_rate": 9.981974619871019e-05, "loss": 2.4692, "step": 21000 }, { "epoch": 0.85, "learning_rate": 9.981085171586402e-05, "loss": 2.4757, "step": 21500 }, { "epoch": 0.87, "learning_rate": 9.980174345569246e-05, "loss": 2.5493, "step": 22000 }, { "epoch": 0.89, "learning_rate": 9.979242145728618e-05, "loss": 2.4543, "step": 22500 }, { "epoch": 0.91, "learning_rate": 9.978288576065315e-05, "loss": 2.4532, "step": 23000 }, { "epoch": 0.93, "learning_rate": 9.977313640671853e-05, "loss": 2.4311, "step": 23500 }, { "epoch": 0.95, "learning_rate": 9.97631734373244e-05, "loss": 2.4512, "step": 24000 }, { "epoch": 0.97, "learning_rate": 9.975299689522967e-05, "loss": 2.46, "step": 24500 }, { "epoch": 0.99, "learning_rate": 9.974260682410984e-05, "loss": 2.5014, "step": 25000 }, { "epoch": 1.0, "eval_bleu": 1.0, "eval_brevity_penalty": 1.0, "eval_length_ratio": 1.0, "eval_loss": 2.3814520835876465, "eval_precisions": [ 1.0, 1.0, 1.0, 1.0 ], "eval_reference_length": 1439232, "eval_runtime": 2544.152, "eval_samples_per_second": 1.105, "eval_steps_per_second": 1.105, "eval_translation_length": 1439232, "step": 25291 }, { "epoch": 1.01, "learning_rate": 9.973200326855681e-05, "loss": 2.3745, "step": 25500 }, { "epoch": 1.03, "learning_rate": 9.972118627407877e-05, "loss": 2.2548, "step": 26000 }, { "epoch": 1.05, "learning_rate": 9.971015588709987e-05, "loss": 2.1978, "step": 26500 }, { "epoch": 1.07, "learning_rate": 9.969891215496015e-05, "loss": 2.1931, "step": 27000 }, { "epoch": 1.09, "learning_rate": 9.968745512591524e-05, "loss": 2.1611, "step": 27500 }, { "epoch": 1.11, "learning_rate": 9.967578484913623e-05, "loss": 2.2166, "step": 28000 }, { "epoch": 1.13, "learning_rate": 9.96639013747094e-05, "loss": 2.1401, "step": 28500 }, { "epoch": 1.15, "learning_rate": 9.965180475363603e-05, "loss": 2.1456, "step": 29000 }, { "epoch": 1.17, "learning_rate": 9.963949503783216e-05, "loss": 2.2447, "step": 29500 }, { "epoch": 1.19, "learning_rate": 9.962697228012848e-05, "loss": 2.2138, "step": 30000 }, { "epoch": 1.21, "learning_rate": 9.961423653426987e-05, "loss": 2.1736, "step": 30500 }, { "epoch": 1.23, "learning_rate": 9.96012878549154e-05, "loss": 2.2721, "step": 31000 }, { "epoch": 1.25, "learning_rate": 9.958812629763798e-05, "loss": 2.1877, "step": 31500 }, { "epoch": 1.27, "learning_rate": 9.957475191892413e-05, "loss": 2.1888, "step": 32000 }, { "epoch": 1.29, "learning_rate": 9.956116477617382e-05, "loss": 2.1881, "step": 32500 }, { "epoch": 1.3, "learning_rate": 9.954736492770007e-05, "loss": 2.22, "step": 33000 }, { "epoch": 1.32, "learning_rate": 9.95333524327288e-05, "loss": 2.1357, "step": 33500 }, { "epoch": 1.34, "learning_rate": 9.951912735139864e-05, "loss": 2.2336, "step": 34000 }, { "epoch": 1.36, "learning_rate": 9.950468974476051e-05, "loss": 2.1933, "step": 34500 }, { "epoch": 1.38, "learning_rate": 9.949003967477749e-05, "loss": 2.2787, "step": 35000 }, { "epoch": 1.4, "learning_rate": 9.947517720432445e-05, "loss": 2.1992, "step": 35500 }, { "epoch": 1.42, "learning_rate": 9.946010239718791e-05, "loss": 2.2127, "step": 36000 }, { "epoch": 1.44, "learning_rate": 9.944481531806567e-05, "loss": 2.1832, "step": 36500 }, { "epoch": 1.46, "learning_rate": 9.942931603256652e-05, "loss": 2.226, "step": 37000 }, { "epoch": 1.48, "learning_rate": 9.941360460721e-05, "loss": 2.2111, "step": 37500 }, { "epoch": 1.5, "learning_rate": 9.939768110942617e-05, "loss": 2.233, "step": 38000 }, { "epoch": 1.52, "learning_rate": 9.938154560755516e-05, "loss": 2.2399, "step": 38500 }, { "epoch": 1.54, "learning_rate": 9.93651981708471e-05, "loss": 2.2297, "step": 39000 }, { "epoch": 1.56, "learning_rate": 9.934863886946158e-05, "loss": 2.2551, "step": 39500 }, { "epoch": 1.58, "learning_rate": 9.93318677744675e-05, "loss": 2.1899, "step": 40000 }, { "epoch": 1.6, "learning_rate": 9.931488495784278e-05, "loss": 2.118, "step": 40500 }, { "epoch": 1.62, "learning_rate": 9.929769049247397e-05, "loss": 2.2226, "step": 41000 }, { "epoch": 1.64, "learning_rate": 9.928028445215595e-05, "loss": 2.2213, "step": 41500 }, { "epoch": 1.66, "learning_rate": 9.926266691159165e-05, "loss": 2.1881, "step": 42000 }, { "epoch": 1.68, "learning_rate": 9.924483794639175e-05, "loss": 2.238, "step": 42500 }, { "epoch": 1.7, "learning_rate": 9.922679763307426e-05, "loss": 2.2178, "step": 43000 }, { "epoch": 1.72, "learning_rate": 9.920854604906427e-05, "loss": 2.1311, "step": 43500 }, { "epoch": 1.74, "learning_rate": 9.919008327269361e-05, "loss": 2.2308, "step": 44000 }, { "epoch": 1.76, "learning_rate": 9.917140938320049e-05, "loss": 2.2121, "step": 44500 }, { "epoch": 1.78, "learning_rate": 9.91525244607292e-05, "loss": 2.1756, "step": 45000 }, { "epoch": 1.8, "learning_rate": 9.913342858632968e-05, "loss": 2.2231, "step": 45500 }, { "epoch": 1.82, "learning_rate": 9.91141218419573e-05, "loss": 2.197, "step": 46000 }, { "epoch": 1.84, "learning_rate": 9.909460431047235e-05, "loss": 2.2141, "step": 46500 }, { "epoch": 1.86, "learning_rate": 9.907487607563988e-05, "loss": 2.2276, "step": 47000 }, { "epoch": 1.88, "learning_rate": 9.905493722212913e-05, "loss": 2.278, "step": 47500 }, { "epoch": 1.9, "learning_rate": 9.903478783551334e-05, "loss": 2.2759, "step": 48000 }, { "epoch": 1.92, "learning_rate": 9.90144280022693e-05, "loss": 2.2556, "step": 48500 }, { "epoch": 1.94, "learning_rate": 9.899385780977695e-05, "loss": 2.1704, "step": 49000 }, { "epoch": 1.96, "learning_rate": 9.89730773463191e-05, "loss": 2.2612, "step": 49500 }, { "epoch": 1.98, "learning_rate": 9.895208670108096e-05, "loss": 2.238, "step": 50000 }, { "epoch": 2.0, "learning_rate": 9.893088596414981e-05, "loss": 2.2049, "step": 50500 }, { "epoch": 2.0, "eval_bleu": 1.0, "eval_brevity_penalty": 1.0, "eval_length_ratio": 1.0, "eval_loss": 2.3368308544158936, "eval_precisions": [ 1.0, 1.0, 1.0, 1.0 ], "eval_reference_length": 1439232, "eval_runtime": 2375.7581, "eval_samples_per_second": 1.183, "eval_steps_per_second": 1.183, "eval_translation_length": 1439232, "step": 50582 } ], "logging_steps": 500, "max_steps": 758730, "num_train_epochs": 30, "save_steps": 500, "total_flos": 1.1657977248979354e+17, "trial_name": null, "trial_params": null }