accelerate tokenizer
#98
by
lugim
- opened
- tokenization_chatglm.py +3 -0
tokenization_chatglm.py
CHANGED
@@ -439,5 +439,8 @@ class ChatGLMTokenizer(PreTrainedTokenizer):
|
|
439 |
encoded_inputs["position_ids"] = np.pad(encoded_inputs["position_ids"],
|
440 |
pad_width=[(0, 0), (difference, 0)])
|
441 |
encoded_inputs[self.model_input_names[0]] = [self.pad_token_id] * difference + required_input
|
|
|
|
|
|
|
442 |
|
443 |
return encoded_inputs
|
|
|
439 |
encoded_inputs["position_ids"] = np.pad(encoded_inputs["position_ids"],
|
440 |
pad_width=[(0, 0), (difference, 0)])
|
441 |
encoded_inputs[self.model_input_names[0]] = [self.pad_token_id] * difference + required_input
|
442 |
+
encoded_inputs["attention_mask"] = encoded_inputs["attention_mask"].tolist()
|
443 |
+
encoded_inputs["position_ids"] = encoded_inputs["position_ids"].tolist()
|
444 |
+
|
445 |
|
446 |
return encoded_inputs
|