max_vocab_size: 320000 num_docs: 20000000 eos_token: <|endoftext|>