Fix tokenizer config saving
Browse files- tokenization_chatglm.py +9 -3
tokenization_chatglm.py
CHANGED
@@ -170,9 +170,9 @@ class ChatGLMTokenizer(PreTrainedTokenizer):
|
|
170 |
vocab_file,
|
171 |
do_lower_case=False,
|
172 |
remove_space=False,
|
173 |
-
bos_token='sop',
|
174 |
-
eos_token='
|
175 |
-
eop_token='eop',
|
176 |
mask_token='[MASK]',
|
177 |
gmask_token='[gMASK]',
|
178 |
padding_side="left",
|
@@ -183,6 +183,12 @@ class ChatGLMTokenizer(PreTrainedTokenizer):
|
|
183 |
do_lower_case=do_lower_case,
|
184 |
remove_space=remove_space,
|
185 |
padding_side=padding_side,
|
|
|
|
|
|
|
|
|
|
|
|
|
186 |
**kwargs
|
187 |
)
|
188 |
|
|
|
170 |
vocab_file,
|
171 |
do_lower_case=False,
|
172 |
remove_space=False,
|
173 |
+
bos_token='<sop>',
|
174 |
+
eos_token='</s>',
|
175 |
+
eop_token='<eop>',
|
176 |
mask_token='[MASK]',
|
177 |
gmask_token='[gMASK]',
|
178 |
padding_side="left",
|
|
|
183 |
do_lower_case=do_lower_case,
|
184 |
remove_space=remove_space,
|
185 |
padding_side=padding_side,
|
186 |
+
bos_token=bos_token,
|
187 |
+
eos_token=eos_token,
|
188 |
+
eop_token=eop_token,
|
189 |
+
mask_token=mask_token,
|
190 |
+
gmask_token=gmask_token,
|
191 |
+
num_image_tokens=num_image_tokens,
|
192 |
**kwargs
|
193 |
)
|
194 |
|