zxdu20 commited on
Commit
7e69b85
1 Parent(s): 9324de7

Fix tokenizer config saving

Browse files
Files changed (1) hide show
  1. tokenization_chatglm.py +9 -3
tokenization_chatglm.py CHANGED
@@ -170,9 +170,9 @@ class ChatGLMTokenizer(PreTrainedTokenizer):
170
  vocab_file,
171
  do_lower_case=False,
172
  remove_space=False,
173
- bos_token='sop',
174
- eos_token='eos',
175
- eop_token='eop',
176
  mask_token='[MASK]',
177
  gmask_token='[gMASK]',
178
  padding_side="left",
@@ -183,6 +183,12 @@ class ChatGLMTokenizer(PreTrainedTokenizer):
183
  do_lower_case=do_lower_case,
184
  remove_space=remove_space,
185
  padding_side=padding_side,
 
 
 
 
 
 
186
  **kwargs
187
  )
188
 
 
170
  vocab_file,
171
  do_lower_case=False,
172
  remove_space=False,
173
+ bos_token='<sop>',
174
+ eos_token='</s>',
175
+ eop_token='<eop>',
176
  mask_token='[MASK]',
177
  gmask_token='[gMASK]',
178
  padding_side="left",
 
183
  do_lower_case=do_lower_case,
184
  remove_space=remove_space,
185
  padding_side=padding_side,
186
+ bos_token=bos_token,
187
+ eos_token=eos_token,
188
+ eop_token=eop_token,
189
+ mask_token=mask_token,
190
+ gmask_token=gmask_token,
191
+ num_image_tokens=num_image_tokens,
192
  **kwargs
193
  )
194