max_vocab_size: 320000 num_docs: 9500000 eos_token: <|endoftext|>