|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 25.31645569620253, |
|
"eval_steps": 500, |
|
"global_step": 42000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 4.969861362266426e-05, |
|
"loss": 3.7006, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 4.939722724532851e-05, |
|
"loss": 3.5872, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 4.909584086799277e-05, |
|
"loss": 3.4617, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_bleu": 12.3227, |
|
"eval_gen_len": 54.6075, |
|
"eval_loss": 3.1113245487213135, |
|
"eval_runtime": 122.2496, |
|
"eval_samples_per_second": 3.272, |
|
"eval_steps_per_second": 0.409, |
|
"step": 1659 |
|
}, |
|
{ |
|
"epoch": 1.21, |
|
"learning_rate": 4.8794454490657024e-05, |
|
"loss": 3.298, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 1.51, |
|
"learning_rate": 4.849306811332128e-05, |
|
"loss": 3.2018, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 1.81, |
|
"learning_rate": 4.8191681735985535e-05, |
|
"loss": 3.1014, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_bleu": 15.8487, |
|
"eval_gen_len": 50.1125, |
|
"eval_loss": 2.8111488819122314, |
|
"eval_runtime": 92.4044, |
|
"eval_samples_per_second": 4.329, |
|
"eval_steps_per_second": 0.541, |
|
"step": 3318 |
|
}, |
|
{ |
|
"epoch": 2.11, |
|
"learning_rate": 4.789029535864979e-05, |
|
"loss": 2.9998, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 2.41, |
|
"learning_rate": 4.7588908981314046e-05, |
|
"loss": 2.883, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 2.71, |
|
"learning_rate": 4.7287522603978304e-05, |
|
"loss": 2.8409, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_bleu": 20.5509, |
|
"eval_gen_len": 43.98, |
|
"eval_loss": 2.617112398147583, |
|
"eval_runtime": 70.0674, |
|
"eval_samples_per_second": 5.709, |
|
"eval_steps_per_second": 0.714, |
|
"step": 4977 |
|
}, |
|
{ |
|
"epoch": 3.01, |
|
"learning_rate": 4.6986136226642556e-05, |
|
"loss": 2.8043, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 3.32, |
|
"learning_rate": 4.6684749849306815e-05, |
|
"loss": 2.6486, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 3.62, |
|
"learning_rate": 4.638336347197107e-05, |
|
"loss": 2.6127, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 3.92, |
|
"learning_rate": 4.6081977094635326e-05, |
|
"loss": 2.5718, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_bleu": 21.5273, |
|
"eval_gen_len": 40.8575, |
|
"eval_loss": 2.4335193634033203, |
|
"eval_runtime": 62.0368, |
|
"eval_samples_per_second": 6.448, |
|
"eval_steps_per_second": 0.806, |
|
"step": 6636 |
|
}, |
|
{ |
|
"epoch": 4.22, |
|
"learning_rate": 4.5780590717299585e-05, |
|
"loss": 2.4535, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 4.52, |
|
"learning_rate": 4.547920433996384e-05, |
|
"loss": 2.4269, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 4.82, |
|
"learning_rate": 4.5177817962628096e-05, |
|
"loss": 2.3852, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_bleu": 24.0185, |
|
"eval_gen_len": 38.945, |
|
"eval_loss": 2.2908990383148193, |
|
"eval_runtime": 53.5509, |
|
"eval_samples_per_second": 7.47, |
|
"eval_steps_per_second": 0.934, |
|
"step": 8295 |
|
}, |
|
{ |
|
"epoch": 5.12, |
|
"learning_rate": 4.487643158529235e-05, |
|
"loss": 2.3305, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 5.42, |
|
"learning_rate": 4.45750452079566e-05, |
|
"loss": 2.2361, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 5.73, |
|
"learning_rate": 4.427365883062086e-05, |
|
"loss": 2.2201, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_bleu": 25.0722, |
|
"eval_gen_len": 38.4525, |
|
"eval_loss": 2.2150681018829346, |
|
"eval_runtime": 47.2306, |
|
"eval_samples_per_second": 8.469, |
|
"eval_steps_per_second": 1.059, |
|
"step": 9954 |
|
}, |
|
{ |
|
"epoch": 6.03, |
|
"learning_rate": 4.397227245328511e-05, |
|
"loss": 2.1955, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 6.33, |
|
"learning_rate": 4.367088607594937e-05, |
|
"loss": 2.0928, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 6.63, |
|
"learning_rate": 4.336949969861363e-05, |
|
"loss": 2.0947, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 6.93, |
|
"learning_rate": 4.306811332127788e-05, |
|
"loss": 2.0583, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_bleu": 26.051, |
|
"eval_gen_len": 40.0775, |
|
"eval_loss": 2.1219234466552734, |
|
"eval_runtime": 55.6386, |
|
"eval_samples_per_second": 7.189, |
|
"eval_steps_per_second": 0.899, |
|
"step": 11613 |
|
}, |
|
{ |
|
"epoch": 7.23, |
|
"learning_rate": 4.276672694394214e-05, |
|
"loss": 1.9657, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 7.53, |
|
"learning_rate": 4.246534056660639e-05, |
|
"loss": 1.9594, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 7.84, |
|
"learning_rate": 4.216395418927065e-05, |
|
"loss": 1.9464, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_bleu": 27.8486, |
|
"eval_gen_len": 39.54, |
|
"eval_loss": 2.0415802001953125, |
|
"eval_runtime": 50.0785, |
|
"eval_samples_per_second": 7.987, |
|
"eval_steps_per_second": 0.998, |
|
"step": 13272 |
|
}, |
|
{ |
|
"epoch": 8.14, |
|
"learning_rate": 4.186256781193491e-05, |
|
"loss": 1.8901, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 8.44, |
|
"learning_rate": 4.1561181434599153e-05, |
|
"loss": 1.8331, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 8.74, |
|
"learning_rate": 4.125979505726341e-05, |
|
"loss": 1.8273, |
|
"step": 14500 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_bleu": 28.6882, |
|
"eval_gen_len": 38.97, |
|
"eval_loss": 1.9714975357055664, |
|
"eval_runtime": 47.8353, |
|
"eval_samples_per_second": 8.362, |
|
"eval_steps_per_second": 1.045, |
|
"step": 14931 |
|
}, |
|
{ |
|
"epoch": 9.04, |
|
"learning_rate": 4.095840867992767e-05, |
|
"loss": 1.8071, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 9.34, |
|
"learning_rate": 4.065702230259192e-05, |
|
"loss": 1.724, |
|
"step": 15500 |
|
}, |
|
{ |
|
"epoch": 9.64, |
|
"learning_rate": 4.035563592525618e-05, |
|
"loss": 1.7173, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 9.95, |
|
"learning_rate": 4.0054249547920434e-05, |
|
"loss": 1.7341, |
|
"step": 16500 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_bleu": 29.4158, |
|
"eval_gen_len": 39.27, |
|
"eval_loss": 1.922670602798462, |
|
"eval_runtime": 48.3901, |
|
"eval_samples_per_second": 8.266, |
|
"eval_steps_per_second": 1.033, |
|
"step": 16590 |
|
}, |
|
{ |
|
"epoch": 10.25, |
|
"learning_rate": 3.975286317058469e-05, |
|
"loss": 1.6432, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 10.55, |
|
"learning_rate": 3.945147679324895e-05, |
|
"loss": 1.6414, |
|
"step": 17500 |
|
}, |
|
{ |
|
"epoch": 10.85, |
|
"learning_rate": 3.9150090415913203e-05, |
|
"loss": 1.6285, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"eval_bleu": 29.6336, |
|
"eval_gen_len": 39.7025, |
|
"eval_loss": 1.8723887205123901, |
|
"eval_runtime": 49.1746, |
|
"eval_samples_per_second": 8.134, |
|
"eval_steps_per_second": 1.017, |
|
"step": 18249 |
|
}, |
|
{ |
|
"epoch": 11.15, |
|
"learning_rate": 3.884870403857746e-05, |
|
"loss": 1.5753, |
|
"step": 18500 |
|
}, |
|
{ |
|
"epoch": 11.45, |
|
"learning_rate": 3.8547317661241714e-05, |
|
"loss": 1.5525, |
|
"step": 19000 |
|
}, |
|
{ |
|
"epoch": 11.75, |
|
"learning_rate": 3.8245931283905966e-05, |
|
"loss": 1.5466, |
|
"step": 19500 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_bleu": 31.3296, |
|
"eval_gen_len": 39.8675, |
|
"eval_loss": 1.816349744796753, |
|
"eval_runtime": 49.6256, |
|
"eval_samples_per_second": 8.06, |
|
"eval_steps_per_second": 1.008, |
|
"step": 19908 |
|
}, |
|
{ |
|
"epoch": 12.06, |
|
"learning_rate": 3.7944544906570225e-05, |
|
"loss": 1.5254, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 12.36, |
|
"learning_rate": 3.764315852923448e-05, |
|
"loss": 1.4676, |
|
"step": 20500 |
|
}, |
|
{ |
|
"epoch": 12.66, |
|
"learning_rate": 3.7341772151898736e-05, |
|
"loss": 1.4678, |
|
"step": 21000 |
|
}, |
|
{ |
|
"epoch": 12.96, |
|
"learning_rate": 3.7040385774562995e-05, |
|
"loss": 1.4607, |
|
"step": 21500 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"eval_bleu": 31.7515, |
|
"eval_gen_len": 38.405, |
|
"eval_loss": 1.7929939031600952, |
|
"eval_runtime": 44.5172, |
|
"eval_samples_per_second": 8.985, |
|
"eval_steps_per_second": 1.123, |
|
"step": 21567 |
|
}, |
|
{ |
|
"epoch": 13.26, |
|
"learning_rate": 3.6738999397227247e-05, |
|
"loss": 1.3787, |
|
"step": 22000 |
|
}, |
|
{ |
|
"epoch": 13.56, |
|
"learning_rate": 3.6437613019891505e-05, |
|
"loss": 1.4049, |
|
"step": 22500 |
|
}, |
|
{ |
|
"epoch": 13.86, |
|
"learning_rate": 3.613622664255576e-05, |
|
"loss": 1.385, |
|
"step": 23000 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"eval_bleu": 32.458, |
|
"eval_gen_len": 39.4675, |
|
"eval_loss": 1.7518789768218994, |
|
"eval_runtime": 49.1331, |
|
"eval_samples_per_second": 8.141, |
|
"eval_steps_per_second": 1.018, |
|
"step": 23226 |
|
}, |
|
{ |
|
"epoch": 14.17, |
|
"learning_rate": 3.5834840265220016e-05, |
|
"loss": 1.3403, |
|
"step": 23500 |
|
}, |
|
{ |
|
"epoch": 14.47, |
|
"learning_rate": 3.553345388788427e-05, |
|
"loss": 1.3166, |
|
"step": 24000 |
|
}, |
|
{ |
|
"epoch": 14.77, |
|
"learning_rate": 3.523206751054853e-05, |
|
"loss": 1.321, |
|
"step": 24500 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"eval_bleu": 32.9411, |
|
"eval_gen_len": 38.8025, |
|
"eval_loss": 1.7194263935089111, |
|
"eval_runtime": 45.6686, |
|
"eval_samples_per_second": 8.759, |
|
"eval_steps_per_second": 1.095, |
|
"step": 24885 |
|
}, |
|
{ |
|
"epoch": 15.07, |
|
"learning_rate": 3.493068113321278e-05, |
|
"loss": 1.2976, |
|
"step": 25000 |
|
}, |
|
{ |
|
"epoch": 15.37, |
|
"learning_rate": 3.462929475587703e-05, |
|
"loss": 1.2358, |
|
"step": 25500 |
|
}, |
|
{ |
|
"epoch": 15.67, |
|
"learning_rate": 3.432790837854129e-05, |
|
"loss": 1.2592, |
|
"step": 26000 |
|
}, |
|
{ |
|
"epoch": 15.97, |
|
"learning_rate": 3.402652200120555e-05, |
|
"loss": 1.2662, |
|
"step": 26500 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"eval_bleu": 33.8478, |
|
"eval_gen_len": 39.1275, |
|
"eval_loss": 1.6950603723526, |
|
"eval_runtime": 49.9911, |
|
"eval_samples_per_second": 8.001, |
|
"eval_steps_per_second": 1.0, |
|
"step": 26544 |
|
}, |
|
{ |
|
"epoch": 16.27, |
|
"learning_rate": 3.37251356238698e-05, |
|
"loss": 1.1963, |
|
"step": 27000 |
|
}, |
|
{ |
|
"epoch": 16.58, |
|
"learning_rate": 3.342374924653406e-05, |
|
"loss": 1.2002, |
|
"step": 27500 |
|
}, |
|
{ |
|
"epoch": 16.88, |
|
"learning_rate": 3.312236286919831e-05, |
|
"loss": 1.1939, |
|
"step": 28000 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"eval_bleu": 34.5277, |
|
"eval_gen_len": 39.0225, |
|
"eval_loss": 1.685713529586792, |
|
"eval_runtime": 49.4943, |
|
"eval_samples_per_second": 8.082, |
|
"eval_steps_per_second": 1.01, |
|
"step": 28203 |
|
}, |
|
{ |
|
"epoch": 17.18, |
|
"learning_rate": 3.282097649186257e-05, |
|
"loss": 1.1459, |
|
"step": 28500 |
|
}, |
|
{ |
|
"epoch": 17.48, |
|
"learning_rate": 3.251959011452683e-05, |
|
"loss": 1.1326, |
|
"step": 29000 |
|
}, |
|
{ |
|
"epoch": 17.78, |
|
"learning_rate": 3.221820373719108e-05, |
|
"loss": 1.1406, |
|
"step": 29500 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"eval_bleu": 35.8691, |
|
"eval_gen_len": 38.76, |
|
"eval_loss": 1.6470690965652466, |
|
"eval_runtime": 45.2962, |
|
"eval_samples_per_second": 8.831, |
|
"eval_steps_per_second": 1.104, |
|
"step": 29862 |
|
}, |
|
{ |
|
"epoch": 18.08, |
|
"learning_rate": 3.191681735985534e-05, |
|
"loss": 1.1292, |
|
"step": 30000 |
|
}, |
|
{ |
|
"epoch": 18.38, |
|
"learning_rate": 3.161543098251959e-05, |
|
"loss": 1.071, |
|
"step": 30500 |
|
}, |
|
{ |
|
"epoch": 18.69, |
|
"learning_rate": 3.1314044605183844e-05, |
|
"loss": 1.0918, |
|
"step": 31000 |
|
}, |
|
{ |
|
"epoch": 18.99, |
|
"learning_rate": 3.10126582278481e-05, |
|
"loss": 1.0759, |
|
"step": 31500 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"eval_bleu": 36.4448, |
|
"eval_gen_len": 38.6925, |
|
"eval_loss": 1.6456927061080933, |
|
"eval_runtime": 46.4772, |
|
"eval_samples_per_second": 8.606, |
|
"eval_steps_per_second": 1.076, |
|
"step": 31521 |
|
}, |
|
{ |
|
"epoch": 19.29, |
|
"learning_rate": 3.0711271850512355e-05, |
|
"loss": 1.0193, |
|
"step": 32000 |
|
}, |
|
{ |
|
"epoch": 19.59, |
|
"learning_rate": 3.0409885473176613e-05, |
|
"loss": 1.0248, |
|
"step": 32500 |
|
}, |
|
{ |
|
"epoch": 19.89, |
|
"learning_rate": 3.010849909584087e-05, |
|
"loss": 1.0378, |
|
"step": 33000 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"eval_bleu": 37.2905, |
|
"eval_gen_len": 38.945, |
|
"eval_loss": 1.6285927295684814, |
|
"eval_runtime": 49.741, |
|
"eval_samples_per_second": 8.042, |
|
"eval_steps_per_second": 1.005, |
|
"step": 33180 |
|
}, |
|
{ |
|
"epoch": 20.19, |
|
"learning_rate": 2.9807112718505124e-05, |
|
"loss": 0.9915, |
|
"step": 33500 |
|
}, |
|
{ |
|
"epoch": 20.49, |
|
"learning_rate": 2.9505726341169383e-05, |
|
"loss": 0.9848, |
|
"step": 34000 |
|
}, |
|
{ |
|
"epoch": 20.8, |
|
"learning_rate": 2.9204339963833638e-05, |
|
"loss": 0.9851, |
|
"step": 34500 |
|
}, |
|
{ |
|
"epoch": 21.0, |
|
"eval_bleu": 38.4264, |
|
"eval_gen_len": 38.7175, |
|
"eval_loss": 1.5997543334960938, |
|
"eval_runtime": 44.5032, |
|
"eval_samples_per_second": 8.988, |
|
"eval_steps_per_second": 1.124, |
|
"step": 34839 |
|
}, |
|
{ |
|
"epoch": 21.1, |
|
"learning_rate": 2.8902953586497894e-05, |
|
"loss": 0.97, |
|
"step": 35000 |
|
}, |
|
{ |
|
"epoch": 21.4, |
|
"learning_rate": 2.8601567209162146e-05, |
|
"loss": 0.9436, |
|
"step": 35500 |
|
}, |
|
{ |
|
"epoch": 21.7, |
|
"learning_rate": 2.83001808318264e-05, |
|
"loss": 0.9372, |
|
"step": 36000 |
|
}, |
|
{ |
|
"epoch": 22.0, |
|
"eval_bleu": 37.9614, |
|
"eval_gen_len": 38.9425, |
|
"eval_loss": 1.607030987739563, |
|
"eval_runtime": 47.0014, |
|
"eval_samples_per_second": 8.51, |
|
"eval_steps_per_second": 1.064, |
|
"step": 36498 |
|
}, |
|
{ |
|
"epoch": 22.0, |
|
"learning_rate": 2.7998794454490656e-05, |
|
"loss": 0.9437, |
|
"step": 36500 |
|
}, |
|
{ |
|
"epoch": 22.3, |
|
"learning_rate": 2.7697408077154912e-05, |
|
"loss": 0.8917, |
|
"step": 37000 |
|
}, |
|
{ |
|
"epoch": 22.6, |
|
"learning_rate": 2.7396021699819167e-05, |
|
"loss": 0.8692, |
|
"step": 37500 |
|
}, |
|
{ |
|
"epoch": 22.91, |
|
"learning_rate": 2.7094635322483426e-05, |
|
"loss": 0.9191, |
|
"step": 38000 |
|
}, |
|
{ |
|
"epoch": 23.0, |
|
"eval_bleu": 38.8655, |
|
"eval_gen_len": 38.8825, |
|
"eval_loss": 1.5746939182281494, |
|
"eval_runtime": 50.4993, |
|
"eval_samples_per_second": 7.921, |
|
"eval_steps_per_second": 0.99, |
|
"step": 38157 |
|
}, |
|
{ |
|
"epoch": 23.21, |
|
"learning_rate": 2.679324894514768e-05, |
|
"loss": 0.8555, |
|
"step": 38500 |
|
}, |
|
{ |
|
"epoch": 23.51, |
|
"learning_rate": 2.6491862567811937e-05, |
|
"loss": 0.8533, |
|
"step": 39000 |
|
}, |
|
{ |
|
"epoch": 23.81, |
|
"learning_rate": 2.6190476190476192e-05, |
|
"loss": 0.8673, |
|
"step": 39500 |
|
}, |
|
{ |
|
"epoch": 24.0, |
|
"eval_bleu": 39.4605, |
|
"eval_gen_len": 39.0175, |
|
"eval_loss": 1.5650146007537842, |
|
"eval_runtime": 50.8092, |
|
"eval_samples_per_second": 7.873, |
|
"eval_steps_per_second": 0.984, |
|
"step": 39816 |
|
}, |
|
{ |
|
"epoch": 24.11, |
|
"learning_rate": 2.5889089813140448e-05, |
|
"loss": 0.841, |
|
"step": 40000 |
|
}, |
|
{ |
|
"epoch": 24.41, |
|
"learning_rate": 2.5587703435804706e-05, |
|
"loss": 0.8155, |
|
"step": 40500 |
|
}, |
|
{ |
|
"epoch": 24.71, |
|
"learning_rate": 2.5286317058468955e-05, |
|
"loss": 0.811, |
|
"step": 41000 |
|
}, |
|
{ |
|
"epoch": 25.0, |
|
"eval_bleu": 39.6804, |
|
"eval_gen_len": 38.77, |
|
"eval_loss": 1.5603779554367065, |
|
"eval_runtime": 45.7389, |
|
"eval_samples_per_second": 8.745, |
|
"eval_steps_per_second": 1.093, |
|
"step": 41475 |
|
}, |
|
{ |
|
"epoch": 25.02, |
|
"learning_rate": 2.4984930681133214e-05, |
|
"loss": 0.8335, |
|
"step": 41500 |
|
}, |
|
{ |
|
"epoch": 25.32, |
|
"learning_rate": 2.468354430379747e-05, |
|
"loss": 0.7688, |
|
"step": 42000 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 82950, |
|
"num_train_epochs": 50, |
|
"save_steps": 500, |
|
"total_flos": 1.0127259403812864e+16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|