uer commited on
Commit
fbfe743
1 Parent(s): 8d5c52d

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +32 -32
README.md CHANGED
@@ -48,24 +48,24 @@ Taking the case of T5-Small
48
  Stage1:
49
 
50
  ```
51
- python3 preprocess.py --corpus_path corpora/cluecorpussmall.txt \\
52
- --vocab_path models/google_zh_with_sentinel_vocab.txt \\
53
- --dataset_path cluecorpussmall_t5_seq128_dataset.pt \\
54
- --processes_num 32 --seq_length 128 \\
55
  --dynamic_masking --target t5
56
  ```
57
 
58
  ```
59
- python3 pretrain.py --dataset_path cluecorpussmall_t5_seq128_dataset.pt \\
60
- --vocab_path models/google_zh_with_sentinel_vocab.txt \\
61
- --config_path models/t5/small_config.json \\
62
- --output_model_path models/cluecorpussmall_t5_seq128_model.bin \\
63
- --world_size 8 --gpu_ranks 0 1 2 3 4 5 6 7 \\
64
- --total_steps 1000000 --save_checkpoint_steps 100000 --report_steps 50000 \\
65
- --learning_rate 1e-3 --batch_size 64 \\
66
- --span_masking --span_geo_prob 0.3 --span_max_length 5 \\
67
- --embedding word --relative_position_embedding --remove_embedding_layernorm --tgt_embedding word \\
68
- --encoder transformer --mask fully_visible --layernorm_positioning pre --decoder transformer \\
69
  --target t5 --tie_weights
70
 
71
  ```
@@ -73,34 +73,34 @@ python3 pretrain.py --dataset_path cluecorpussmall_t5_seq128_dataset.pt \\
73
  Stage2:
74
 
75
  ```
76
- python3 preprocess.py --corpus_path corpora/cluecorpussmall.txt \\
77
- --vocab_path models/google_zh_with_sentinel_vocab.txt \\
78
- --dataset_path cluecorpussmall_t5_seq512_dataset.pt \\
79
- --processes_num 32 --seq_length 512 \\
80
  --dynamic_masking --target t5
81
  ```
82
 
83
  ```
84
- python3 pretrain.py --dataset_path cluecorpussmall_t5_seq512_dataset.pt \\
85
- --pretrained_model_path models/cluecorpussmall_t5_seq128_model.bin-1000000 \\
86
- --vocab_path models/google_zh_with_sentinel_vocab.txt \\
87
- --config_path models/t5/small_config.json \\
88
- --output_model_path models/cluecorpussmall_t5_seq512_model.bin \\
89
- --world_size 8 --gpu_ranks 0 1 2 3 4 5 6 7 \\
90
- --total_steps 250000 --save_checkpoint_steps 50000 --report_steps 10000 \\
91
- --learning_rate 5e-4 --batch_size 16 \\
92
- --span_masking --span_geo_prob 0.3 --span_max_length 5 \\
93
- --embedding word --relative_position_embedding --remove_embedding_layernorm --tgt_embedding word \\
94
- --encoder transformer --mask fully_visible --layernorm_positioning pre --decoder transformer \\
95
  --target t5 --tie_weights
96
  ```
97
 
98
  Finally, we convert the pre-trained model into Huggingface's format:
99
 
100
  ```
101
- python3 scripts/convert_t5_from_uer_to_huggingface.py --input_model_path cluecorpussmall_t5_seq512_model.bin-250000 \\
102
- --output_model_path pytorch_model.bin \\
103
- --layers_num 6 \\
104
  --type t5
105
  ```
106
 
 
48
  Stage1:
49
 
50
  ```
51
+ python3 preprocess.py --corpus_path corpora/cluecorpussmall.txt \\\\
52
+ --vocab_path models/google_zh_with_sentinel_vocab.txt \\\\
53
+ --dataset_path cluecorpussmall_t5_seq128_dataset.pt \\\\
54
+ --processes_num 32 --seq_length 128 \\\\
55
  --dynamic_masking --target t5
56
  ```
57
 
58
  ```
59
+ python3 pretrain.py --dataset_path cluecorpussmall_t5_seq128_dataset.pt \
60
+ --vocab_path models/google_zh_with_sentinel_vocab.txt \
61
+ --config_path models/t5/small_config.json \
62
+ --output_model_path models/cluecorpussmall_t5_seq128_model.bin \
63
+ --world_size 8 --gpu_ranks 0 1 2 3 4 5 6 7 \
64
+ --total_steps 1000000 --save_checkpoint_steps 100000 --report_steps 50000 \
65
+ --learning_rate 1e-3 --batch_size 64 \
66
+ --span_masking --span_geo_prob 0.3 --span_max_length 5 \
67
+ --embedding word --relative_position_embedding --remove_embedding_layernorm --tgt_embedding word \
68
+ --encoder transformer --mask fully_visible --layernorm_positioning pre --decoder transformer \
69
  --target t5 --tie_weights
70
 
71
  ```
 
73
  Stage2:
74
 
75
  ```
76
+ python3 preprocess.py --corpus_path corpora/cluecorpussmall.txt \
77
+ --vocab_path models/google_zh_with_sentinel_vocab.txt \
78
+ --dataset_path cluecorpussmall_t5_seq512_dataset.pt \
79
+ --processes_num 32 --seq_length 512 \
80
  --dynamic_masking --target t5
81
  ```
82
 
83
  ```
84
+ python3 pretrain.py --dataset_path cluecorpussmall_t5_seq512_dataset.pt \
85
+ --pretrained_model_path models/cluecorpussmall_t5_seq128_model.bin-1000000 \
86
+ --vocab_path models/google_zh_with_sentinel_vocab.txt \
87
+ --config_path models/t5/small_config.json \
88
+ --output_model_path models/cluecorpussmall_t5_seq512_model.bin \
89
+ --world_size 8 --gpu_ranks 0 1 2 3 4 5 6 7 \
90
+ --total_steps 250000 --save_checkpoint_steps 50000 --report_steps 10000 \
91
+ --learning_rate 5e-4 --batch_size 16 \
92
+ --span_masking --span_geo_prob 0.3 --span_max_length 5 \
93
+ --embedding word --relative_position_embedding --remove_embedding_layernorm --tgt_embedding word \
94
+ --encoder transformer --mask fully_visible --layernorm_positioning pre --decoder transformer \
95
  --target t5 --tie_weights
96
  ```
97
 
98
  Finally, we convert the pre-trained model into Huggingface's format:
99
 
100
  ```
101
+ python3 scripts/convert_t5_from_uer_to_huggingface.py --input_model_path cluecorpussmall_t5_seq512_model.bin-250000 \
102
+ --output_model_path pytorch_model.bin \
103
+ --layers_num 6 \
104
  --type t5
105
  ```
106