Remove image tokens when decoding
Browse files- tokenization_chatglm.py +1 -0
tokenization_chatglm.py
CHANGED
@@ -130,6 +130,7 @@ class SPTokenizer:
|
|
130 |
|
131 |
def decode(self, text_ids: List[int], special_tokens=False) -> str:
|
132 |
ids = [int(_id) - self.num_image_tokens for _id in text_ids]
|
|
|
133 |
text = self._get_text_tokenizer(encode_special_tokens=special_tokens).decode(ids)
|
134 |
text = text.replace("<n>", "\n")
|
135 |
text = text.replace(SPTokenizer.get_tab_token(), "\t")
|
|
|
130 |
|
131 |
def decode(self, text_ids: List[int], special_tokens=False) -> str:
|
132 |
ids = [int(_id) - self.num_image_tokens for _id in text_ids]
|
133 |
+
ids = [_id for _id in ids if _id >= 0]
|
134 |
text = self._get_text_tokenizer(encode_special_tokens=special_tokens).decode(ids)
|
135 |
text = text.replace("<n>", "\n")
|
136 |
text = text.replace(SPTokenizer.get_tab_token(), "\t")
|