{ "best_metric": null, "best_model_checkpoint": null, "epoch": 1.0, "eval_steps": 500, "global_step": 34567, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.01, "learning_rate": 0.0001, "loss": 3.3228, "step": 500 }, { "epoch": 0.03, "learning_rate": 9.999994258403258e-05, "loss": 2.8639, "step": 1000 }, { "epoch": 0.04, "learning_rate": 9.99997703362622e-05, "loss": 2.8198, "step": 1500 }, { "epoch": 0.06, "learning_rate": 9.999948325708443e-05, "loss": 2.7858, "step": 2000 }, { "epoch": 0.07, "learning_rate": 9.999908134715859e-05, "loss": 2.7422, "step": 2500 }, { "epoch": 0.09, "learning_rate": 9.999856460740773e-05, "loss": 2.7274, "step": 3000 }, { "epoch": 0.1, "learning_rate": 9.99979330390186e-05, "loss": 2.6958, "step": 3500 }, { "epoch": 0.12, "learning_rate": 9.999718664344171e-05, "loss": 2.6617, "step": 4000 }, { "epoch": 0.13, "learning_rate": 9.999632542239125e-05, "loss": 2.6747, "step": 4500 }, { "epoch": 0.14, "learning_rate": 9.999534937784512e-05, "loss": 2.6564, "step": 5000 }, { "epoch": 0.16, "learning_rate": 9.999425851204496e-05, "loss": 2.585, "step": 5500 }, { "epoch": 0.17, "learning_rate": 9.99930528274961e-05, "loss": 2.6385, "step": 6000 }, { "epoch": 0.19, "learning_rate": 9.999173232696753e-05, "loss": 2.6262, "step": 6500 }, { "epoch": 0.2, "learning_rate": 9.999029701349196e-05, "loss": 2.6055, "step": 7000 }, { "epoch": 0.22, "learning_rate": 9.998874689036583e-05, "loss": 2.5917, "step": 7500 }, { "epoch": 0.23, "learning_rate": 9.998708196114922e-05, "loss": 2.6162, "step": 8000 }, { "epoch": 0.25, "learning_rate": 9.99853022296658e-05, "loss": 2.6188, "step": 8500 }, { "epoch": 0.26, "learning_rate": 9.998340770000302e-05, "loss": 2.5671, "step": 9000 }, { "epoch": 0.27, "learning_rate": 9.998139837651193e-05, "loss": 2.5897, "step": 9500 }, { "epoch": 0.29, "learning_rate": 9.997927426380721e-05, "loss": 2.5414, "step": 10000 }, { "epoch": 0.3, "learning_rate": 9.997703536676718e-05, "loss": 2.5139, "step": 10500 }, { "epoch": 0.32, "learning_rate": 9.997468169053379e-05, "loss": 2.5904, "step": 11000 }, { "epoch": 0.33, "learning_rate": 9.997221324051255e-05, "loss": 2.6288, "step": 11500 }, { "epoch": 0.35, "learning_rate": 9.996963002237263e-05, "loss": 2.598, "step": 12000 }, { "epoch": 0.36, "learning_rate": 9.996693204204674e-05, "loss": 2.5276, "step": 12500 }, { "epoch": 0.38, "learning_rate": 9.996411930573117e-05, "loss": 2.5817, "step": 13000 }, { "epoch": 0.39, "learning_rate": 9.996119181988575e-05, "loss": 2.5316, "step": 13500 }, { "epoch": 0.41, "learning_rate": 9.995814959123386e-05, "loss": 2.4692, "step": 14000 }, { "epoch": 0.42, "learning_rate": 9.995499262676243e-05, "loss": 2.5464, "step": 14500 }, { "epoch": 0.43, "learning_rate": 9.99517209337218e-05, "loss": 2.5222, "step": 15000 }, { "epoch": 0.45, "learning_rate": 9.994833451962592e-05, "loss": 2.5304, "step": 15500 }, { "epoch": 0.46, "learning_rate": 9.994483339225213e-05, "loss": 2.6063, "step": 16000 }, { "epoch": 0.48, "learning_rate": 9.994121755964129e-05, "loss": 2.5286, "step": 16500 }, { "epoch": 0.49, "learning_rate": 9.993748703009764e-05, "loss": 2.5273, "step": 17000 }, { "epoch": 0.51, "learning_rate": 9.993364181218885e-05, "loss": 2.4868, "step": 17500 }, { "epoch": 0.52, "learning_rate": 9.992968191474601e-05, "loss": 2.435, "step": 18000 }, { "epoch": 0.54, "learning_rate": 9.992560734686357e-05, "loss": 2.484, "step": 18500 }, { "epoch": 0.55, "learning_rate": 9.992141811789933e-05, "loss": 2.5301, "step": 19000 }, { "epoch": 0.56, "learning_rate": 9.991711423747445e-05, "loss": 2.4857, "step": 19500 }, { "epoch": 0.58, "learning_rate": 9.991269571547339e-05, "loss": 2.4958, "step": 20000 }, { "epoch": 0.59, "learning_rate": 9.99081625620439e-05, "loss": 2.4757, "step": 20500 }, { "epoch": 0.61, "learning_rate": 9.990351478759696e-05, "loss": 2.544, "step": 21000 }, { "epoch": 0.62, "learning_rate": 9.989875240280689e-05, "loss": 2.4796, "step": 21500 }, { "epoch": 0.64, "learning_rate": 9.989387541861111e-05, "loss": 2.4968, "step": 22000 }, { "epoch": 0.65, "learning_rate": 9.988888384621031e-05, "loss": 2.4426, "step": 22500 }, { "epoch": 0.67, "learning_rate": 9.988377769706834e-05, "loss": 2.4471, "step": 23000 }, { "epoch": 0.68, "learning_rate": 9.987855698291218e-05, "loss": 2.5022, "step": 23500 }, { "epoch": 0.69, "learning_rate": 9.98732217157319e-05, "loss": 2.5202, "step": 24000 }, { "epoch": 0.71, "learning_rate": 9.98677719077807e-05, "loss": 2.5562, "step": 24500 }, { "epoch": 0.72, "learning_rate": 9.986220757157482e-05, "loss": 2.4888, "step": 25000 }, { "epoch": 0.74, "learning_rate": 9.985652871989352e-05, "loss": 2.5049, "step": 25500 }, { "epoch": 0.75, "learning_rate": 9.98507353657791e-05, "loss": 2.4664, "step": 26000 }, { "epoch": 0.77, "learning_rate": 9.984482752253677e-05, "loss": 2.4528, "step": 26500 }, { "epoch": 0.78, "learning_rate": 9.98388052037347e-05, "loss": 2.4577, "step": 27000 }, { "epoch": 0.8, "learning_rate": 9.983266842320402e-05, "loss": 2.4889, "step": 27500 }, { "epoch": 0.81, "learning_rate": 9.982641719503866e-05, "loss": 2.4272, "step": 28000 }, { "epoch": 0.82, "learning_rate": 9.982005153359547e-05, "loss": 2.4783, "step": 28500 }, { "epoch": 0.84, "learning_rate": 9.981357145349406e-05, "loss": 2.4795, "step": 29000 }, { "epoch": 0.85, "learning_rate": 9.98069769696168e-05, "loss": 2.4807, "step": 29500 }, { "epoch": 0.87, "learning_rate": 9.980026809710888e-05, "loss": 2.4951, "step": 30000 }, { "epoch": 0.88, "learning_rate": 9.979344485137813e-05, "loss": 2.5137, "step": 30500 }, { "epoch": 0.9, "learning_rate": 9.978650724809511e-05, "loss": 2.5249, "step": 31000 }, { "epoch": 0.91, "learning_rate": 9.977945530319297e-05, "loss": 2.4092, "step": 31500 }, { "epoch": 0.93, "learning_rate": 9.977228903286746e-05, "loss": 2.4978, "step": 32000 }, { "epoch": 0.94, "learning_rate": 9.976500845357694e-05, "loss": 2.4361, "step": 32500 }, { "epoch": 0.95, "learning_rate": 9.975761358204227e-05, "loss": 2.4774, "step": 33000 }, { "epoch": 0.97, "learning_rate": 9.975010443524679e-05, "loss": 2.4662, "step": 33500 }, { "epoch": 0.98, "learning_rate": 9.974248103043629e-05, "loss": 2.4252, "step": 34000 }, { "epoch": 1.0, "learning_rate": 9.973474338511898e-05, "loss": 2.4689, "step": 34500 }, { "epoch": 1.0, "eval_bleu": 1.0, "eval_brevity_penalty": 1.0, "eval_length_ratio": 1.0, "eval_loss": 2.3501155376434326, "eval_precisions": [ 1.0, 1.0, 1.0, 1.0 ], "eval_reference_length": 1966592, "eval_runtime": 3383.1867, "eval_samples_per_second": 1.135, "eval_steps_per_second": 1.135, "eval_translation_length": 1966592, "step": 34567 } ], "logging_steps": 500, "max_steps": 1037010, "num_train_epochs": 30, "save_steps": 500, "total_flos": 7.966891375696282e+16, "trial_name": null, "trial_params": null }