epoch = 2.0 total_flos = 9.602043799869562e+16 train_loss = 0.42129573760465744 train_runtime = 3759.5481 train_samples = 69399 train_samples_per_second = 36.919 train_steps_per_second = 2.308