{ "best_metric": null, "best_model_checkpoint": null, "epoch": 1.0, "eval_steps": 500, "global_step": 68219, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.01, "learning_rate": 8e-05, "loss": 2.273, "step": 500 }, { "epoch": 0.01, "learning_rate": 7.999957460493864e-05, "loss": 2.2497, "step": 1000 }, { "epoch": 0.02, "learning_rate": 7.999829842880257e-05, "loss": 2.3728, "step": 1500 }, { "epoch": 0.03, "learning_rate": 7.999617149873574e-05, "loss": 2.3576, "step": 2000 }, { "epoch": 0.04, "learning_rate": 7.999319385997746e-05, "loss": 2.3374, "step": 2500 }, { "epoch": 0.04, "learning_rate": 7.998936557586135e-05, "loss": 2.3538, "step": 3000 }, { "epoch": 0.05, "learning_rate": 7.998468672781407e-05, "loss": 2.366, "step": 3500 }, { "epoch": 0.06, "learning_rate": 7.997915741535355e-05, "loss": 2.3321, "step": 4000 }, { "epoch": 0.07, "learning_rate": 7.997277775608694e-05, "loss": 2.2838, "step": 4500 }, { "epoch": 0.07, "learning_rate": 7.996554788570796e-05, "loss": 2.2679, "step": 5000 }, { "epoch": 0.08, "learning_rate": 7.995746795799422e-05, "loss": 2.4041, "step": 5500 }, { "epoch": 0.09, "learning_rate": 7.994853814480376e-05, "loss": 2.3415, "step": 6000 }, { "epoch": 0.1, "learning_rate": 7.99387586360715e-05, "loss": 2.3589, "step": 6500 }, { "epoch": 0.1, "learning_rate": 7.992812963980518e-05, "loss": 2.3203, "step": 7000 }, { "epoch": 0.11, "learning_rate": 7.991665138208094e-05, "loss": 2.3517, "step": 7500 }, { "epoch": 0.12, "learning_rate": 7.990432410703848e-05, "loss": 2.4286, "step": 8000 }, { "epoch": 0.12, "learning_rate": 7.989114807687589e-05, "loss": 2.3227, "step": 8500 }, { "epoch": 0.13, "learning_rate": 7.987712357184408e-05, "loss": 2.3945, "step": 9000 }, { "epoch": 0.14, "learning_rate": 7.98622508902408e-05, "loss": 2.3775, "step": 9500 }, { "epoch": 0.15, "learning_rate": 7.984653034840432e-05, "loss": 2.3522, "step": 10000 }, { "epoch": 0.15, "learning_rate": 7.982996228070671e-05, "loss": 2.334, "step": 10500 }, { "epoch": 0.16, "learning_rate": 7.981254703954664e-05, "loss": 2.3742, "step": 11000 }, { "epoch": 0.17, "learning_rate": 7.979428499534201e-05, "loss": 2.2517, "step": 11500 }, { "epoch": 0.18, "learning_rate": 7.977517653652199e-05, "loss": 2.3994, "step": 12000 }, { "epoch": 0.18, "learning_rate": 7.975522206951876e-05, "loss": 2.3606, "step": 12500 }, { "epoch": 0.19, "learning_rate": 7.973442201875895e-05, "loss": 2.3634, "step": 13000 }, { "epoch": 0.2, "learning_rate": 7.971277682665446e-05, "loss": 2.3061, "step": 13500 }, { "epoch": 0.21, "learning_rate": 7.969028695359319e-05, "loss": 2.4985, "step": 14000 }, { "epoch": 0.21, "learning_rate": 7.966695287792921e-05, "loss": 2.3908, "step": 14500 }, { "epoch": 0.22, "learning_rate": 7.96427750959725e-05, "loss": 2.3605, "step": 15000 }, { "epoch": 0.23, "learning_rate": 7.961775412197857e-05, "loss": 2.347, "step": 15500 }, { "epoch": 0.23, "learning_rate": 7.959189048813735e-05, "loss": 2.3233, "step": 16000 }, { "epoch": 0.24, "learning_rate": 7.95651847445619e-05, "loss": 2.3415, "step": 16500 }, { "epoch": 0.25, "learning_rate": 7.953763745927682e-05, "loss": 2.4679, "step": 17000 }, { "epoch": 0.26, "learning_rate": 7.950924921820606e-05, "loss": 2.443, "step": 17500 }, { "epoch": 0.26, "learning_rate": 7.948002062516052e-05, "loss": 2.5141, "step": 18000 }, { "epoch": 0.27, "learning_rate": 7.944995230182513e-05, "loss": 2.4339, "step": 18500 }, { "epoch": 0.28, "learning_rate": 7.941904488774571e-05, "loss": 2.4308, "step": 19000 }, { "epoch": 0.29, "learning_rate": 7.938729904031533e-05, "loss": 2.4312, "step": 19500 }, { "epoch": 0.29, "learning_rate": 7.93547154347603e-05, "loss": 2.4756, "step": 20000 }, { "epoch": 0.3, "learning_rate": 7.932129476412592e-05, "loss": 2.4426, "step": 20500 }, { "epoch": 0.31, "learning_rate": 7.928703773926155e-05, "loss": 2.4779, "step": 21000 }, { "epoch": 0.32, "learning_rate": 7.925194508880567e-05, "loss": 2.4671, "step": 21500 }, { "epoch": 0.32, "learning_rate": 7.921601755917029e-05, "loss": 2.4473, "step": 22000 }, { "epoch": 0.33, "learning_rate": 7.917925591452508e-05, "loss": 2.3929, "step": 22500 }, { "epoch": 0.34, "learning_rate": 7.914166093678117e-05, "loss": 2.4158, "step": 23000 }, { "epoch": 0.34, "learning_rate": 7.910323342557442e-05, "loss": 2.4607, "step": 23500 }, { "epoch": 0.35, "learning_rate": 7.906397419824855e-05, "loss": 2.4866, "step": 24000 }, { "epoch": 0.36, "learning_rate": 7.902388408983759e-05, "loss": 2.3708, "step": 24500 }, { "epoch": 0.37, "learning_rate": 7.898296395304824e-05, "loss": 2.4718, "step": 25000 }, { "epoch": 0.37, "learning_rate": 7.894121465824175e-05, "loss": 2.4436, "step": 25500 }, { "epoch": 0.38, "learning_rate": 7.889863709341528e-05, "loss": 2.498, "step": 26000 }, { "epoch": 0.39, "learning_rate": 7.885523216418312e-05, "loss": 2.4418, "step": 26500 }, { "epoch": 0.4, "learning_rate": 7.881100079375742e-05, "loss": 2.3653, "step": 27000 }, { "epoch": 0.4, "learning_rate": 7.876594392292848e-05, "loss": 2.5256, "step": 27500 }, { "epoch": 0.41, "learning_rate": 7.872006251004482e-05, "loss": 2.549, "step": 28000 }, { "epoch": 0.42, "learning_rate": 7.867335753099278e-05, "loss": 2.4543, "step": 28500 }, { "epoch": 0.43, "learning_rate": 7.86258299791757e-05, "loss": 2.4647, "step": 29000 }, { "epoch": 0.43, "learning_rate": 7.857748086549292e-05, "loss": 2.5375, "step": 29500 }, { "epoch": 0.44, "learning_rate": 7.852831121831812e-05, "loss": 2.4895, "step": 30000 }, { "epoch": 0.45, "learning_rate": 7.847832208347754e-05, "loss": 2.4156, "step": 30500 }, { "epoch": 0.45, "learning_rate": 7.842751452422775e-05, "loss": 2.5006, "step": 31000 }, { "epoch": 0.46, "learning_rate": 7.8375889621233e-05, "loss": 2.459, "step": 31500 }, { "epoch": 0.47, "learning_rate": 7.83234484725422e-05, "loss": 2.469, "step": 32000 }, { "epoch": 0.48, "learning_rate": 7.827019219356568e-05, "loss": 2.4331, "step": 32500 }, { "epoch": 0.48, "learning_rate": 7.821612191705128e-05, "loss": 2.487, "step": 33000 }, { "epoch": 0.49, "learning_rate": 7.816123879306048e-05, "loss": 2.5139, "step": 33500 }, { "epoch": 0.5, "learning_rate": 7.810554398894376e-05, "loss": 2.5117, "step": 34000 }, { "epoch": 0.51, "learning_rate": 7.804903868931584e-05, "loss": 2.5537, "step": 34500 }, { "epoch": 0.51, "learning_rate": 7.79917240960305e-05, "loss": 2.4394, "step": 35000 }, { "epoch": 0.52, "learning_rate": 7.7933601428155e-05, "loss": 2.4285, "step": 35500 }, { "epoch": 0.53, "learning_rate": 7.78746719219441e-05, "loss": 2.5693, "step": 36000 }, { "epoch": 0.54, "learning_rate": 7.781493683081388e-05, "loss": 2.4932, "step": 36500 }, { "epoch": 0.54, "learning_rate": 7.775439742531495e-05, "loss": 2.4468, "step": 37000 }, { "epoch": 0.55, "learning_rate": 7.769305499310553e-05, "loss": 2.4614, "step": 37500 }, { "epoch": 0.56, "learning_rate": 7.763091083892402e-05, "loss": 2.6549, "step": 38000 }, { "epoch": 0.56, "learning_rate": 7.756796628456121e-05, "loss": 2.4871, "step": 38500 }, { "epoch": 0.57, "learning_rate": 7.750422266883222e-05, "loss": 2.413, "step": 39000 }, { "epoch": 0.58, "learning_rate": 7.743968134754806e-05, "loss": 2.3566, "step": 39500 }, { "epoch": 0.59, "learning_rate": 7.737434369348664e-05, "loss": 2.4936, "step": 40000 }, { "epoch": 0.59, "learning_rate": 7.730821109636379e-05, "loss": 2.453, "step": 40500 }, { "epoch": 0.6, "learning_rate": 7.724128496280346e-05, "loss": 2.4851, "step": 41000 }, { "epoch": 0.61, "learning_rate": 7.717356671630802e-05, "loss": 2.4564, "step": 41500 }, { "epoch": 0.62, "learning_rate": 7.710505779722786e-05, "loss": 2.5133, "step": 42000 }, { "epoch": 0.62, "learning_rate": 7.703575966273073e-05, "loss": 2.4546, "step": 42500 }, { "epoch": 0.63, "learning_rate": 7.696567378677089e-05, "loss": 2.5157, "step": 43000 }, { "epoch": 0.64, "learning_rate": 7.689480166005756e-05, "loss": 2.4248, "step": 43500 }, { "epoch": 0.64, "learning_rate": 7.682314479002344e-05, "loss": 2.4853, "step": 44000 }, { "epoch": 0.65, "learning_rate": 7.67507047007924e-05, "loss": 2.4615, "step": 44500 }, { "epoch": 0.66, "learning_rate": 7.667748293314729e-05, "loss": 2.5391, "step": 45000 }, { "epoch": 0.67, "learning_rate": 7.6603481044497e-05, "loss": 2.4464, "step": 45500 }, { "epoch": 0.67, "learning_rate": 7.652870060884345e-05, "loss": 2.4941, "step": 46000 }, { "epoch": 0.68, "learning_rate": 7.645314321674803e-05, "loss": 2.4708, "step": 46500 }, { "epoch": 0.69, "learning_rate": 7.637681047529781e-05, "loss": 2.5972, "step": 47000 }, { "epoch": 0.7, "learning_rate": 7.629970400807136e-05, "loss": 2.5369, "step": 47500 }, { "epoch": 0.7, "learning_rate": 7.622182545510419e-05, "loss": 2.4348, "step": 48000 }, { "epoch": 0.71, "learning_rate": 7.61431764728539e-05, "loss": 2.4546, "step": 48500 }, { "epoch": 0.72, "learning_rate": 7.606375873416491e-05, "loss": 2.4378, "step": 49000 }, { "epoch": 0.73, "learning_rate": 7.598357392823292e-05, "loss": 2.471, "step": 49500 }, { "epoch": 0.73, "learning_rate": 7.590262376056896e-05, "loss": 2.4677, "step": 50000 }, { "epoch": 0.74, "learning_rate": 7.58209099529631e-05, "loss": 2.4245, "step": 50500 }, { "epoch": 0.75, "learning_rate": 7.573843424344783e-05, "loss": 2.4734, "step": 51000 }, { "epoch": 0.75, "learning_rate": 7.565519838626113e-05, "loss": 2.4158, "step": 51500 }, { "epoch": 0.76, "learning_rate": 7.557120415180916e-05, "loss": 2.5098, "step": 52000 }, { "epoch": 0.77, "learning_rate": 7.548645332662853e-05, "loss": 2.5478, "step": 52500 }, { "epoch": 0.78, "learning_rate": 7.540094771334835e-05, "loss": 2.4502, "step": 53000 }, { "epoch": 0.78, "learning_rate": 7.531468913065192e-05, "loss": 2.4264, "step": 53500 }, { "epoch": 0.79, "learning_rate": 7.522767941323798e-05, "loss": 2.4903, "step": 54000 }, { "epoch": 0.8, "learning_rate": 7.513992041178174e-05, "loss": 2.4486, "step": 54500 }, { "epoch": 0.81, "learning_rate": 7.505141399289549e-05, "loss": 2.5171, "step": 55000 }, { "epoch": 0.81, "learning_rate": 7.496216203908891e-05, "loss": 2.5396, "step": 55500 }, { "epoch": 0.82, "learning_rate": 7.487216644872901e-05, "loss": 2.4514, "step": 56000 }, { "epoch": 0.83, "learning_rate": 7.478142913599978e-05, "loss": 2.4017, "step": 56500 }, { "epoch": 0.84, "learning_rate": 7.468995203086146e-05, "loss": 2.4591, "step": 57000 }, { "epoch": 0.84, "learning_rate": 7.459773707900946e-05, "loss": 2.5764, "step": 57500 }, { "epoch": 0.85, "learning_rate": 7.450478624183306e-05, "loss": 2.5013, "step": 58000 }, { "epoch": 0.86, "learning_rate": 7.441110149637363e-05, "loss": 2.51, "step": 58500 }, { "epoch": 0.86, "learning_rate": 7.431668483528254e-05, "loss": 2.3992, "step": 59000 }, { "epoch": 0.87, "learning_rate": 7.422153826677887e-05, "loss": 2.4671, "step": 59500 }, { "epoch": 0.88, "learning_rate": 7.412566381460662e-05, "loss": 2.5362, "step": 60000 }, { "epoch": 0.89, "learning_rate": 7.402906351799175e-05, "loss": 2.4981, "step": 60500 }, { "epoch": 0.89, "learning_rate": 7.39317394315987e-05, "loss": 2.4574, "step": 61000 }, { "epoch": 0.9, "learning_rate": 7.383369362548674e-05, "loss": 2.4777, "step": 61500 }, { "epoch": 0.91, "learning_rate": 7.373492818506597e-05, "loss": 2.4628, "step": 62000 }, { "epoch": 0.92, "learning_rate": 7.363544521105292e-05, "loss": 2.4506, "step": 62500 }, { "epoch": 0.92, "learning_rate": 7.353524681942585e-05, "loss": 2.5644, "step": 63000 }, { "epoch": 0.93, "learning_rate": 7.343433514137987e-05, "loss": 2.5131, "step": 63500 }, { "epoch": 0.94, "learning_rate": 7.333271232328141e-05, "loss": 2.5575, "step": 64000 }, { "epoch": 0.95, "learning_rate": 7.32303805266227e-05, "loss": 2.5901, "step": 64500 }, { "epoch": 0.95, "learning_rate": 7.312734192797583e-05, "loss": 2.5701, "step": 65000 }, { "epoch": 0.96, "learning_rate": 7.302359871894635e-05, "loss": 2.5201, "step": 65500 }, { "epoch": 0.97, "learning_rate": 7.291915310612666e-05, "loss": 2.5402, "step": 66000 }, { "epoch": 0.97, "learning_rate": 7.281400731104918e-05, "loss": 2.4797, "step": 66500 }, { "epoch": 0.98, "learning_rate": 7.2708163570139e-05, "loss": 2.4836, "step": 67000 }, { "epoch": 0.99, "learning_rate": 7.260162413466636e-05, "loss": 2.4707, "step": 67500 }, { "epoch": 1.0, "learning_rate": 7.249439127069873e-05, "loss": 2.4334, "step": 68000 }, { "epoch": 1.0, "eval_bleu": 1.0, "eval_brevity_penalty": 1.0, "eval_length_ratio": 1.0, "eval_loss": 2.243044853210449, "eval_precisions": [ 1.0, 1.0, 1.0, 1.0 ], "eval_reference_length": 7761920, "eval_runtime": 17988.3779, "eval_samples_per_second": 0.843, "eval_steps_per_second": 0.421, "eval_translation_length": 7761920, "step": 68219 } ], "logging_steps": 500, "max_steps": 341095, "num_train_epochs": 5, "save_steps": 5000, "total_flos": 3.144579296777994e+17, "trial_name": null, "trial_params": null }