|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.0, |
|
"eval_steps": 500, |
|
"global_step": 34567, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.0001, |
|
"loss": 3.3228, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 9.999994258403258e-05, |
|
"loss": 2.8639, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 9.99997703362622e-05, |
|
"loss": 2.8198, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 9.999948325708443e-05, |
|
"loss": 2.7858, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 9.999908134715859e-05, |
|
"loss": 2.7422, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 9.999856460740773e-05, |
|
"loss": 2.7274, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 9.99979330390186e-05, |
|
"loss": 2.6958, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 9.999718664344171e-05, |
|
"loss": 2.6617, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 9.999632542239125e-05, |
|
"loss": 2.6747, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 9.999534937784512e-05, |
|
"loss": 2.6564, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 9.999425851204496e-05, |
|
"loss": 2.585, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 9.99930528274961e-05, |
|
"loss": 2.6385, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 9.999173232696753e-05, |
|
"loss": 2.6262, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 9.999029701349196e-05, |
|
"loss": 2.6055, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 9.998874689036583e-05, |
|
"loss": 2.5917, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 9.998708196114922e-05, |
|
"loss": 2.6162, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 9.99853022296658e-05, |
|
"loss": 2.6188, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 9.998340770000302e-05, |
|
"loss": 2.5671, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 9.998139837651193e-05, |
|
"loss": 2.5897, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 9.997927426380721e-05, |
|
"loss": 2.5414, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 9.997703536676718e-05, |
|
"loss": 2.5139, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 9.997468169053379e-05, |
|
"loss": 2.5904, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 9.997221324051255e-05, |
|
"loss": 2.6288, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 9.996963002237263e-05, |
|
"loss": 2.598, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 9.996693204204674e-05, |
|
"loss": 2.5276, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 9.996411930573117e-05, |
|
"loss": 2.5817, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 9.996119181988575e-05, |
|
"loss": 2.5316, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 9.995814959123386e-05, |
|
"loss": 2.4692, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 9.995499262676243e-05, |
|
"loss": 2.5464, |
|
"step": 14500 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 9.99517209337218e-05, |
|
"loss": 2.5222, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 9.994833451962592e-05, |
|
"loss": 2.5304, |
|
"step": 15500 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 9.994483339225213e-05, |
|
"loss": 2.6063, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 9.994121755964129e-05, |
|
"loss": 2.5286, |
|
"step": 16500 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 9.993748703009764e-05, |
|
"loss": 2.5273, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 9.993364181218885e-05, |
|
"loss": 2.4868, |
|
"step": 17500 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 9.992968191474601e-05, |
|
"loss": 2.435, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 9.992560734686357e-05, |
|
"loss": 2.484, |
|
"step": 18500 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 9.992141811789933e-05, |
|
"loss": 2.5301, |
|
"step": 19000 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 9.991711423747445e-05, |
|
"loss": 2.4857, |
|
"step": 19500 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 9.991269571547339e-05, |
|
"loss": 2.4958, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 9.99081625620439e-05, |
|
"loss": 2.4757, |
|
"step": 20500 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 9.990351478759696e-05, |
|
"loss": 2.544, |
|
"step": 21000 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 9.989875240280689e-05, |
|
"loss": 2.4796, |
|
"step": 21500 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 9.989387541861111e-05, |
|
"loss": 2.4968, |
|
"step": 22000 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 9.988888384621031e-05, |
|
"loss": 2.4426, |
|
"step": 22500 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 9.988377769706834e-05, |
|
"loss": 2.4471, |
|
"step": 23000 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 9.987855698291218e-05, |
|
"loss": 2.5022, |
|
"step": 23500 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 9.98732217157319e-05, |
|
"loss": 2.5202, |
|
"step": 24000 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 9.98677719077807e-05, |
|
"loss": 2.5562, |
|
"step": 24500 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 9.986220757157482e-05, |
|
"loss": 2.4888, |
|
"step": 25000 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 9.985652871989352e-05, |
|
"loss": 2.5049, |
|
"step": 25500 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 9.98507353657791e-05, |
|
"loss": 2.4664, |
|
"step": 26000 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 9.984482752253677e-05, |
|
"loss": 2.4528, |
|
"step": 26500 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 9.98388052037347e-05, |
|
"loss": 2.4577, |
|
"step": 27000 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 9.983266842320402e-05, |
|
"loss": 2.4889, |
|
"step": 27500 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 9.982641719503866e-05, |
|
"loss": 2.4272, |
|
"step": 28000 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 9.982005153359547e-05, |
|
"loss": 2.4783, |
|
"step": 28500 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 9.981357145349406e-05, |
|
"loss": 2.4795, |
|
"step": 29000 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 9.98069769696168e-05, |
|
"loss": 2.4807, |
|
"step": 29500 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 9.980026809710888e-05, |
|
"loss": 2.4951, |
|
"step": 30000 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 9.979344485137813e-05, |
|
"loss": 2.5137, |
|
"step": 30500 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 9.978650724809511e-05, |
|
"loss": 2.5249, |
|
"step": 31000 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 9.977945530319297e-05, |
|
"loss": 2.4092, |
|
"step": 31500 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 9.977228903286746e-05, |
|
"loss": 2.4978, |
|
"step": 32000 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 9.976500845357694e-05, |
|
"loss": 2.4361, |
|
"step": 32500 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 9.975761358204227e-05, |
|
"loss": 2.4774, |
|
"step": 33000 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 9.975010443524679e-05, |
|
"loss": 2.4662, |
|
"step": 33500 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 9.974248103043629e-05, |
|
"loss": 2.4252, |
|
"step": 34000 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 9.973474338511898e-05, |
|
"loss": 2.4689, |
|
"step": 34500 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_bleu": 1.0, |
|
"eval_brevity_penalty": 1.0, |
|
"eval_length_ratio": 1.0, |
|
"eval_loss": 2.3501155376434326, |
|
"eval_precisions": [ |
|
1.0, |
|
1.0, |
|
1.0, |
|
1.0 |
|
], |
|
"eval_reference_length": 1966592, |
|
"eval_runtime": 3383.1867, |
|
"eval_samples_per_second": 1.135, |
|
"eval_steps_per_second": 1.135, |
|
"eval_translation_length": 1966592, |
|
"step": 34567 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 1037010, |
|
"num_train_epochs": 30, |
|
"save_steps": 500, |
|
"total_flos": 7.966891375696282e+16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|