epoch = 1.0 total_flos = 4.801972373497037e+16 train_loss = 0.6182623529500156 train_runtime = 1876.03 train_samples = 69399 train_samples_per_second = 36.992 train_steps_per_second = 2.312