{ "best_metric": 1.5654487609863281, "best_model_checkpoint": "dq158/pingusPongus/checkpoint-6323", "epoch": 13.0, "eval_steps": 500, "global_step": 82199, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.08, "learning_rate": 5e-06, "loss": 1.8585, "step": 500 }, { "epoch": 0.16, "learning_rate": 4.999805607800008e-06, "loss": 1.823, "step": 1000 }, { "epoch": 0.24, "learning_rate": 4.999222461430692e-06, "loss": 1.8388, "step": 1500 }, { "epoch": 0.32, "learning_rate": 4.998250651579336e-06, "loss": 1.8372, "step": 2000 }, { "epoch": 0.4, "learning_rate": 4.996890329375747e-06, "loss": 1.8066, "step": 2500 }, { "epoch": 0.47, "learning_rate": 4.995141706368742e-06, "loss": 1.8485, "step": 3000 }, { "epoch": 0.55, "learning_rate": 4.993005054493262e-06, "loss": 1.8243, "step": 3500 }, { "epoch": 0.63, "learning_rate": 4.990480706028073e-06, "loss": 1.8278, "step": 4000 }, { "epoch": 0.71, "learning_rate": 4.987569053544098e-06, "loss": 1.8126, "step": 4500 }, { "epoch": 0.79, "learning_rate": 4.98427054984336e-06, "loss": 1.8277, "step": 5000 }, { "epoch": 0.87, "learning_rate": 4.980585707888573e-06, "loss": 1.8475, "step": 5500 }, { "epoch": 0.95, "learning_rate": 4.976515100723365e-06, "loss": 1.8441, "step": 6000 }, { "epoch": 1.0, "eval_bleu": 1.0, "eval_brevity_penalty": 1.0, "eval_length_ratio": 1.0, "eval_loss": 1.5654487609863281, "eval_precisions": [ 1.0, 1.0, 1.0, 1.0 ], "eval_reference_length": 52412, "eval_runtime": 683.1649, "eval_samples_per_second": 4.115, "eval_steps_per_second": 1.029, "eval_translation_length": 52412, "step": 6323 }, { "epoch": 1.03, "learning_rate": 4.972059361383162e-06, "loss": 1.8281, "step": 6500 }, { "epoch": 1.11, "learning_rate": 4.9672191827967395e-06, "loss": 1.8431, "step": 7000 }, { "epoch": 1.19, "learning_rate": 4.961995317678472e-06, "loss": 1.8261, "step": 7500 }, { "epoch": 1.27, "learning_rate": 4.9563885784112645e-06, "loss": 1.8253, "step": 8000 }, { "epoch": 1.34, "learning_rate": 4.950399836920221e-06, "loss": 1.847, "step": 8500 }, { "epoch": 1.42, "learning_rate": 4.944030024537049e-06, "loss": 1.8209, "step": 9000 }, { "epoch": 1.5, "learning_rate": 4.937280131855223e-06, "loss": 1.8153, "step": 9500 }, { "epoch": 1.58, "learning_rate": 4.930151208575933e-06, "loss": 1.8591, "step": 10000 }, { "epoch": 1.66, "learning_rate": 4.9226443633448426e-06, "loss": 1.7892, "step": 10500 }, { "epoch": 1.74, "learning_rate": 4.91476076357968e-06, "loss": 1.8601, "step": 11000 }, { "epoch": 1.82, "learning_rate": 4.906501635288687e-06, "loss": 1.8231, "step": 11500 }, { "epoch": 1.9, "learning_rate": 4.8978682628799575e-06, "loss": 1.7805, "step": 12000 }, { "epoch": 1.98, "learning_rate": 4.888861988961698e-06, "loss": 1.8429, "step": 12500 }, { "epoch": 2.0, "eval_bleu": 1.0, "eval_brevity_penalty": 1.0, "eval_length_ratio": 1.0, "eval_loss": 1.5659914016723633, "eval_precisions": [ 1.0, 1.0, 1.0, 1.0 ], "eval_reference_length": 52542, "eval_runtime": 676.0746, "eval_samples_per_second": 4.158, "eval_steps_per_second": 1.04, "eval_translation_length": 52542, "step": 12646 }, { "epoch": 2.06, "learning_rate": 4.879484214133427e-06, "loss": 1.838, "step": 13000 }, { "epoch": 2.14, "learning_rate": 4.8697363967681696e-06, "loss": 1.8539, "step": 13500 }, { "epoch": 2.21, "learning_rate": 4.8596200527856564e-06, "loss": 1.8081, "step": 14000 }, { "epoch": 2.29, "learning_rate": 4.849136755416576e-06, "loss": 1.8135, "step": 14500 }, { "epoch": 2.37, "learning_rate": 4.838288134957921e-06, "loss": 1.8273, "step": 15000 }, { "epoch": 2.45, "learning_rate": 4.827075878519448e-06, "loss": 1.8316, "step": 15500 }, { "epoch": 2.53, "learning_rate": 4.815501729761316e-06, "loss": 1.8101, "step": 16000 }, { "epoch": 2.61, "learning_rate": 4.803567488622915e-06, "loss": 1.8257, "step": 16500 }, { "epoch": 2.69, "learning_rate": 4.791275011042958e-06, "loss": 1.8036, "step": 17000 }, { "epoch": 2.77, "learning_rate": 4.778626208670853e-06, "loss": 1.8235, "step": 17500 }, { "epoch": 2.85, "learning_rate": 4.765623048569417e-06, "loss": 1.8133, "step": 18000 }, { "epoch": 2.93, "learning_rate": 4.752267552908968e-06, "loss": 1.8316, "step": 18500 }, { "epoch": 3.0, "eval_bleu": 1.0, "eval_brevity_penalty": 1.0, "eval_length_ratio": 1.0, "eval_loss": 1.5685368776321411, "eval_precisions": [ 1.0, 1.0, 1.0, 1.0 ], "eval_reference_length": 52485, "eval_runtime": 677.2148, "eval_samples_per_second": 4.151, "eval_steps_per_second": 1.038, "eval_translation_length": 52485, "step": 18969 }, { "epoch": 3.0, "learning_rate": 4.738561798652854e-06, "loss": 1.7842, "step": 19000 }, { "epoch": 3.08, "learning_rate": 4.724507917234451e-06, "loss": 1.8069, "step": 19500 }, { "epoch": 3.16, "learning_rate": 4.710108094225704e-06, "loss": 1.7776, "step": 20000 }, { "epoch": 3.24, "learning_rate": 4.695364568997228e-06, "loss": 1.8232, "step": 20500 }, { "epoch": 3.32, "learning_rate": 4.680279634370071e-06, "loss": 1.8125, "step": 21000 }, { "epoch": 3.4, "learning_rate": 4.664855636259134e-06, "loss": 1.841, "step": 21500 }, { "epoch": 3.48, "learning_rate": 4.649094973308358e-06, "loss": 1.8519, "step": 22000 }, { "epoch": 3.56, "learning_rate": 4.633000096517698e-06, "loss": 1.8293, "step": 22500 }, { "epoch": 3.64, "learning_rate": 4.61657350886196e-06, "loss": 1.8121, "step": 23000 }, { "epoch": 3.72, "learning_rate": 4.5998177649015565e-06, "loss": 1.7916, "step": 23500 }, { "epoch": 3.8, "learning_rate": 4.582735470385229e-06, "loss": 1.8352, "step": 24000 }, { "epoch": 3.87, "learning_rate": 4.56532928184483e-06, "loss": 1.7778, "step": 24500 }, { "epoch": 3.95, "learning_rate": 4.547601906182184e-06, "loss": 1.815, "step": 25000 }, { "epoch": 4.0, "eval_bleu": 1.0, "eval_brevity_penalty": 1.0, "eval_length_ratio": 1.0, "eval_loss": 1.5691736936569214, "eval_precisions": [ 1.0, 1.0, 1.0, 1.0 ], "eval_reference_length": 52469, "eval_runtime": 676.8094, "eval_samples_per_second": 4.153, "eval_steps_per_second": 1.039, "eval_translation_length": 52469, "step": 25292 }, { "epoch": 4.03, "learning_rate": 4.529556100248137e-06, "loss": 1.8259, "step": 25500 }, { "epoch": 4.11, "learning_rate": 4.511194670413822e-06, "loss": 1.8127, "step": 26000 }, { "epoch": 4.19, "learning_rate": 4.49252047213423e-06, "loss": 1.7847, "step": 26500 }, { "epoch": 4.27, "learning_rate": 4.473536409504151e-06, "loss": 1.8137, "step": 27000 }, { "epoch": 4.35, "learning_rate": 4.454245434806545e-06, "loss": 1.8443, "step": 27500 }, { "epoch": 4.43, "learning_rate": 4.4346505480534205e-06, "loss": 1.7972, "step": 28000 }, { "epoch": 4.51, "learning_rate": 4.4147547965192934e-06, "loss": 1.8035, "step": 28500 }, { "epoch": 4.59, "learning_rate": 4.394561274267293e-06, "loss": 1.7983, "step": 29000 }, { "epoch": 4.67, "learning_rate": 4.374073121667992e-06, "loss": 1.8227, "step": 29500 }, { "epoch": 4.74, "learning_rate": 4.3532935249110366e-06, "loss": 1.7775, "step": 30000 }, { "epoch": 4.82, "learning_rate": 4.3322257155096496e-06, "loss": 1.8181, "step": 30500 }, { "epoch": 4.9, "learning_rate": 4.310872969798085e-06, "loss": 1.8333, "step": 31000 }, { "epoch": 4.98, "learning_rate": 4.289238608422115e-06, "loss": 1.8452, "step": 31500 }, { "epoch": 5.0, "eval_bleu": 1.0, "eval_brevity_penalty": 1.0, "eval_length_ratio": 1.0, "eval_loss": 1.56978178024292, "eval_precisions": [ 1.0, 1.0, 1.0, 1.0 ], "eval_reference_length": 52449, "eval_runtime": 676.5149, "eval_samples_per_second": 4.155, "eval_steps_per_second": 1.039, "eval_translation_length": 52449, "step": 31615 }, { "epoch": 5.06, "learning_rate": 4.267325995822624e-06, "loss": 1.8142, "step": 32000 }, { "epoch": 5.14, "learning_rate": 4.2451385397123864e-06, "loss": 1.8047, "step": 32500 }, { "epoch": 5.22, "learning_rate": 4.222679690546128e-06, "loss": 1.8006, "step": 33000 }, { "epoch": 5.3, "learning_rate": 4.199952940983926e-06, "loss": 1.7971, "step": 33500 }, { "epoch": 5.38, "learning_rate": 4.176961825348059e-06, "loss": 1.825, "step": 34000 }, { "epoch": 5.46, "learning_rate": 4.1537099190733656e-06, "loss": 1.8121, "step": 34500 }, { "epoch": 5.54, "learning_rate": 4.130200838151217e-06, "loss": 1.8179, "step": 35000 }, { "epoch": 5.61, "learning_rate": 4.106438238567183e-06, "loss": 1.8005, "step": 35500 }, { "epoch": 5.69, "learning_rate": 4.08242581573247e-06, "loss": 1.8367, "step": 36000 }, { "epoch": 5.77, "learning_rate": 4.058167303909241e-06, "loss": 1.8062, "step": 36500 }, { "epoch": 5.85, "learning_rate": 4.033666475629881e-06, "loss": 1.8092, "step": 37000 }, { "epoch": 5.93, "learning_rate": 4.008927141110319e-06, "loss": 1.7638, "step": 37500 }, { "epoch": 6.0, "eval_bleu": 1.0, "eval_brevity_penalty": 1.0, "eval_length_ratio": 1.0, "eval_loss": 1.5709949731826782, "eval_precisions": [ 1.0, 1.0, 1.0, 1.0 ], "eval_reference_length": 52568, "eval_runtime": 677.5535, "eval_samples_per_second": 4.149, "eval_steps_per_second": 1.038, "eval_translation_length": 52568, "step": 37938 }, { "epoch": 6.01, "learning_rate": 3.9839531476574855e-06, "loss": 1.801, "step": 38000 }, { "epoch": 6.09, "learning_rate": 3.958748379071004e-06, "loss": 1.7813, "step": 38500 }, { "epoch": 6.17, "learning_rate": 3.933316755039209e-06, "loss": 1.7742, "step": 39000 }, { "epoch": 6.25, "learning_rate": 3.9076622305295755e-06, "loss": 1.7852, "step": 39500 }, { "epoch": 6.33, "learning_rate": 3.88178879517367e-06, "loss": 1.8312, "step": 40000 }, { "epoch": 6.41, "learning_rate": 3.855700472646708e-06, "loss": 1.8144, "step": 40500 }, { "epoch": 6.48, "learning_rate": 3.82940132004182e-06, "loss": 1.7856, "step": 41000 }, { "epoch": 6.56, "learning_rate": 3.8028954272391116e-06, "loss": 1.8139, "step": 41500 }, { "epoch": 6.64, "learning_rate": 3.7761869162696334e-06, "loss": 1.8018, "step": 42000 }, { "epoch": 6.72, "learning_rate": 3.7492799406743512e-06, "loss": 1.7771, "step": 42500 }, { "epoch": 6.8, "learning_rate": 3.722178684858209e-06, "loss": 1.8217, "step": 43000 }, { "epoch": 6.88, "learning_rate": 3.6948873634394e-06, "loss": 1.8276, "step": 43500 }, { "epoch": 6.96, "learning_rate": 3.667410220593933e-06, "loss": 1.8267, "step": 44000 }, { "epoch": 7.0, "eval_bleu": 1.0, "eval_brevity_penalty": 1.0, "eval_length_ratio": 1.0, "eval_loss": 1.571208119392395, "eval_precisions": [ 1.0, 1.0, 1.0, 1.0 ], "eval_reference_length": 52554, "eval_runtime": 677.3503, "eval_samples_per_second": 4.15, "eval_steps_per_second": 1.038, "eval_translation_length": 52554, "step": 44261 }, { "epoch": 7.04, "learning_rate": 3.639751529395606e-06, "loss": 1.7645, "step": 44500 }, { "epoch": 7.12, "learning_rate": 3.611915591151483e-06, "loss": 1.8167, "step": 45000 }, { "epoch": 7.2, "learning_rate": 3.5839067347329844e-06, "loss": 1.808, "step": 45500 }, { "epoch": 7.28, "learning_rate": 3.5557293159026845e-06, "loss": 1.7742, "step": 46000 }, { "epoch": 7.35, "learning_rate": 3.5273877166369326e-06, "loss": 1.784, "step": 46500 }, { "epoch": 7.43, "learning_rate": 3.4988863444443942e-06, "loss": 1.7732, "step": 47000 }, { "epoch": 7.51, "learning_rate": 3.4702296316806243e-06, "loss": 1.8029, "step": 47500 }, { "epoch": 7.59, "learning_rate": 3.4414220348587744e-06, "loss": 1.8167, "step": 48000 }, { "epoch": 7.67, "learning_rate": 3.412468033956543e-06, "loss": 1.8037, "step": 48500 }, { "epoch": 7.75, "learning_rate": 3.3833721317194756e-06, "loss": 1.7689, "step": 49000 }, { "epoch": 7.83, "learning_rate": 3.3541388529607303e-06, "loss": 1.8414, "step": 49500 }, { "epoch": 7.91, "learning_rate": 3.324772743857404e-06, "loss": 1.795, "step": 50000 }, { "epoch": 7.99, "learning_rate": 3.2952783712435406e-06, "loss": 1.8108, "step": 50500 }, { "epoch": 8.0, "eval_bleu": 1.0, "eval_brevity_penalty": 1.0, "eval_length_ratio": 1.0, "eval_loss": 1.5723803043365479, "eval_precisions": [ 1.0, 1.0, 1.0, 1.0 ], "eval_reference_length": 52356, "eval_runtime": 677.354, "eval_samples_per_second": 4.15, "eval_steps_per_second": 1.038, "eval_translation_length": 52356, "step": 50584 }, { "epoch": 8.07, "learning_rate": 3.265660321899923e-06, "loss": 1.8002, "step": 51000 }, { "epoch": 8.14, "learning_rate": 3.235923201840768e-06, "loss": 1.7785, "step": 51500 }, { "epoch": 8.22, "learning_rate": 3.2060716355974274e-06, "loss": 1.7734, "step": 52000 }, { "epoch": 8.3, "learning_rate": 3.1761102654992106e-06, "loss": 1.8028, "step": 52500 }, { "epoch": 8.38, "learning_rate": 3.1460437509514345e-06, "loss": 1.7929, "step": 53000 }, { "epoch": 8.46, "learning_rate": 3.115876767710828e-06, "loss": 1.8039, "step": 53500 }, { "epoch": 8.54, "learning_rate": 3.0856140071583806e-06, "loss": 1.8066, "step": 54000 }, { "epoch": 8.62, "learning_rate": 3.0552601755697765e-06, "loss": 1.7612, "step": 54500 }, { "epoch": 8.7, "learning_rate": 3.024819993383493e-06, "loss": 1.8281, "step": 55000 }, { "epoch": 8.78, "learning_rate": 2.9942981944667193e-06, "loss": 1.7766, "step": 55500 }, { "epoch": 8.86, "learning_rate": 2.963699525379166e-06, "loss": 1.8176, "step": 56000 }, { "epoch": 8.94, "learning_rate": 2.933028744634912e-06, "loss": 1.79, "step": 56500 }, { "epoch": 9.0, "eval_bleu": 1.0, "eval_brevity_penalty": 1.0, "eval_length_ratio": 1.0, "eval_loss": 1.5721025466918945, "eval_precisions": [ 1.0, 1.0, 1.0, 1.0 ], "eval_reference_length": 52640, "eval_runtime": 678.5345, "eval_samples_per_second": 4.143, "eval_steps_per_second": 1.036, "eval_translation_length": 52640, "step": 56907 }, { "epoch": 9.01, "learning_rate": 2.9022906219623958e-06, "loss": 1.8089, "step": 57000 }, { "epoch": 9.09, "learning_rate": 2.871489937562647e-06, "loss": 1.7695, "step": 57500 }, { "epoch": 9.17, "learning_rate": 2.8406314813659073e-06, "loss": 1.7845, "step": 58000 }, { "epoch": 9.25, "learning_rate": 2.8097200522867294e-06, "loss": 1.7954, "step": 58500 }, { "epoch": 9.33, "learning_rate": 2.7787604574776745e-06, "loss": 1.8204, "step": 59000 }, { "epoch": 9.41, "learning_rate": 2.747757511581739e-06, "loss": 1.7786, "step": 59500 }, { "epoch": 9.49, "learning_rate": 2.716716035983611e-06, "loss": 1.7995, "step": 60000 }, { "epoch": 9.57, "learning_rate": 2.685640858059876e-06, "loss": 1.8058, "step": 60500 }, { "epoch": 9.65, "learning_rate": 2.6545368104282955e-06, "loss": 1.7961, "step": 61000 }, { "epoch": 9.73, "learning_rate": 2.623408730196268e-06, "loss": 1.7866, "step": 61500 }, { "epoch": 9.81, "learning_rate": 2.592261458208591e-06, "loss": 1.7643, "step": 62000 }, { "epoch": 9.88, "learning_rate": 2.5610998382946463e-06, "loss": 1.7679, "step": 62500 }, { "epoch": 9.96, "learning_rate": 2.529928716515112e-06, "loss": 1.8195, "step": 63000 }, { "epoch": 10.0, "eval_bleu": 1.0, "eval_brevity_penalty": 1.0, "eval_length_ratio": 1.0, "eval_loss": 1.5732102394104004, "eval_precisions": [ 1.0, 1.0, 1.0, 1.0 ], "eval_reference_length": 52485, "eval_runtime": 677.793, "eval_samples_per_second": 4.147, "eval_steps_per_second": 1.037, "eval_translation_length": 52485, "step": 63230 }, { "epoch": 10.04, "learning_rate": 2.498752940408342e-06, "loss": 1.7938, "step": 63500 }, { "epoch": 10.12, "learning_rate": 2.4675773582364977e-06, "loss": 1.7688, "step": 64000 }, { "epoch": 10.2, "learning_rate": 2.436406818231583e-06, "loss": 1.7701, "step": 64500 }, { "epoch": 10.28, "learning_rate": 2.4052461678414753e-06, "loss": 1.7821, "step": 65000 }, { "epoch": 10.36, "learning_rate": 2.37410025297608e-06, "loss": 1.8251, "step": 65500 }, { "epoch": 10.44, "learning_rate": 2.342973917253726e-06, "loss": 1.7384, "step": 66000 }, { "epoch": 10.52, "learning_rate": 2.3118720012479183e-06, "loss": 1.8001, "step": 66500 }, { "epoch": 10.6, "learning_rate": 2.280799341734556e-06, "loss": 1.8386, "step": 67000 }, { "epoch": 10.68, "learning_rate": 2.249760770939754e-06, "loss": 1.8098, "step": 67500 }, { "epoch": 10.75, "learning_rate": 2.218761115788362e-06, "loss": 1.8059, "step": 68000 }, { "epoch": 10.83, "learning_rate": 2.1878051971533093e-06, "loss": 1.757, "step": 68500 }, { "epoch": 10.91, "learning_rate": 2.156897829105898e-06, "loss": 1.8037, "step": 69000 }, { "epoch": 10.99, "learning_rate": 2.1260438181671446e-06, "loss": 1.7714, "step": 69500 }, { "epoch": 11.0, "eval_bleu": 1.0, "eval_brevity_penalty": 1.0, "eval_length_ratio": 1.0, "eval_loss": 1.5735211372375488, "eval_precisions": [ 1.0, 1.0, 1.0, 1.0 ], "eval_reference_length": 52469, "eval_runtime": 678.026, "eval_samples_per_second": 4.146, "eval_steps_per_second": 1.037, "eval_translation_length": 52469, "step": 69553 }, { "epoch": 11.07, "learning_rate": 2.0952479625603017e-06, "loss": 1.7783, "step": 70000 }, { "epoch": 11.15, "learning_rate": 2.0645150514646657e-06, "loss": 1.7443, "step": 70500 }, { "epoch": 11.23, "learning_rate": 2.0338498642707977e-06, "loss": 1.7678, "step": 71000 }, { "epoch": 11.31, "learning_rate": 2.0032571698372577e-06, "loss": 1.7786, "step": 71500 }, { "epoch": 11.39, "learning_rate": 1.9727417257489874e-06, "loss": 1.7768, "step": 72000 }, { "epoch": 11.47, "learning_rate": 1.9423082775774337e-06, "loss": 1.7953, "step": 72500 }, { "epoch": 11.55, "learning_rate": 1.9119615581425524e-06, "loss": 1.7715, "step": 73000 }, { "epoch": 11.62, "learning_rate": 1.881706286776785e-06, "loss": 1.8047, "step": 73500 }, { "epoch": 11.7, "learning_rate": 1.8515471685911402e-06, "loss": 1.7781, "step": 74000 }, { "epoch": 11.78, "learning_rate": 1.821488893743488e-06, "loss": 1.8197, "step": 74500 }, { "epoch": 11.86, "learning_rate": 1.7915361367091677e-06, "loss": 1.8159, "step": 75000 }, { "epoch": 11.94, "learning_rate": 1.7616935555540475e-06, "loss": 1.8004, "step": 75500 }, { "epoch": 12.0, "eval_bleu": 1.0, "eval_brevity_penalty": 1.0, "eval_length_ratio": 1.0, "eval_loss": 1.5739296674728394, "eval_precisions": [ 1.0, 1.0, 1.0, 1.0 ], "eval_reference_length": 52457, "eval_runtime": 676.9983, "eval_samples_per_second": 4.152, "eval_steps_per_second": 1.038, "eval_translation_length": 52457, "step": 75876 }, { "epoch": 12.02, "learning_rate": 1.7319657912101309e-06, "loss": 1.7871, "step": 76000 }, { "epoch": 12.1, "learning_rate": 1.7023574667538268e-06, "loss": 1.7728, "step": 76500 }, { "epoch": 12.18, "learning_rate": 1.6728731866869999e-06, "loss": 1.792, "step": 77000 }, { "epoch": 12.26, "learning_rate": 1.6435175362209033e-06, "loss": 1.8009, "step": 77500 }, { "epoch": 12.34, "learning_rate": 1.6142950805631178e-06, "loss": 1.751, "step": 78000 }, { "epoch": 12.41, "learning_rate": 1.5852103642075995e-06, "loss": 1.7877, "step": 78500 }, { "epoch": 12.49, "learning_rate": 1.5562679102279453e-06, "loss": 1.7936, "step": 79000 }, { "epoch": 12.57, "learning_rate": 1.5274722195740005e-06, "loss": 1.7884, "step": 79500 }, { "epoch": 12.65, "learning_rate": 1.4988277703718882e-06, "loss": 1.7617, "step": 80000 }, { "epoch": 12.73, "learning_rate": 1.4703390172276072e-06, "loss": 1.7916, "step": 80500 }, { "epoch": 12.81, "learning_rate": 1.4420103905342767e-06, "loss": 1.7773, "step": 81000 }, { "epoch": 12.89, "learning_rate": 1.4138462957831472e-06, "loss": 1.7798, "step": 81500 }, { "epoch": 12.97, "learning_rate": 1.3858511128784937e-06, "loss": 1.7658, "step": 82000 }, { "epoch": 13.0, "eval_bleu": 1.0, "eval_brevity_penalty": 1.0, "eval_length_ratio": 1.0, "eval_loss": 1.5740926265716553, "eval_precisions": [ 1.0, 1.0, 1.0, 1.0 ], "eval_reference_length": 52468, "eval_runtime": 677.6429, "eval_samples_per_second": 4.148, "eval_steps_per_second": 1.037, "eval_translation_length": 52468, "step": 82199 } ], "logging_steps": 500, "max_steps": 126460, "num_train_epochs": 20, "save_steps": 500, "total_flos": 2.251365766099108e+17, "trial_name": null, "trial_params": null }