deepspeed run_mlm_local.py \ --model_config_id "bert-base-uncased" \ --dataset_id "chaoyan/processed_bert_dataset" \ --tokenizer_id "chaoyan/bert-base-uncased-cat_tokenizer" \ --repository_id "bert-base-uncased-cat_model" \ --max_steps 1000_000 \ --per_device_train_batch_size 64 \ --learning_rate 5e-5 \ --deepspeed ds_config_zero3.json