shibing624 commited on
Commit
8224bd2
1 Parent(s): 43821e4

Update tokenization_baichuan.py

Browse files
Files changed (1) hide show
  1. tokenization_baichuan.py +6 -5
tokenization_baichuan.py CHANGED
@@ -52,6 +52,12 @@ class BaichuanTokenizer(PreTrainedTokenizer):
52
  eos_token = AddedToken(eos_token, lstrip=False, rstrip=False) if isinstance(eos_token, str) else eos_token
53
  unk_token = AddedToken(unk_token, lstrip=False, rstrip=False) if isinstance(unk_token, str) else unk_token
54
  pad_token = AddedToken(pad_token, lstrip=False, rstrip=False) if isinstance(pad_token, str) else pad_token
 
 
 
 
 
 
55
  super().__init__(
56
  bos_token=bos_token,
57
  eos_token=eos_token,
@@ -63,11 +69,6 @@ class BaichuanTokenizer(PreTrainedTokenizer):
63
  clean_up_tokenization_spaces=clean_up_tokenization_spaces,
64
  **kwargs,
65
  )
66
- self.vocab_file = vocab_file
67
- self.add_bos_token = add_bos_token
68
- self.add_eos_token = add_eos_token
69
- self.sp_model = spm.SentencePieceProcessor(**self.sp_model_kwargs)
70
- self.sp_model.Load(vocab_file)
71
 
72
  def __getstate__(self):
73
  state = self.__dict__.copy()
 
52
  eos_token = AddedToken(eos_token, lstrip=False, rstrip=False) if isinstance(eos_token, str) else eos_token
53
  unk_token = AddedToken(unk_token, lstrip=False, rstrip=False) if isinstance(unk_token, str) else unk_token
54
  pad_token = AddedToken(pad_token, lstrip=False, rstrip=False) if isinstance(pad_token, str) else pad_token
55
+
56
+ self.vocab_file = vocab_file
57
+ self.add_bos_token = add_bos_token
58
+ self.add_eos_token = add_eos_token
59
+ self.sp_model = spm.SentencePieceProcessor(**self.sp_model_kwargs)
60
+ self.sp_model.Load(vocab_file)
61
  super().__init__(
62
  bos_token=bos_token,
63
  eos_token=eos_token,
 
69
  clean_up_tokenization_spaces=clean_up_tokenization_spaces,
70
  **kwargs,
71
  )
 
 
 
 
 
72
 
73
  def __getstate__(self):
74
  state = self.__dict__.copy()