Fix generation with latest transformers

by kylesayrs - opened 1 day ago

←

Files changed (2) hide show

modeling_deepseek.py CHANGED Viewed

@@ -1653,7 +1653,7 @@ class DeepseekV3ForCausalLM(DeepseekV3PreTrainedModel):
             if isinstance(past_key_values, Cache):
                 cache_length = past_key_values.get_seq_length()
                 past_length = past_key_values.seen_tokens
-                max_cache_length = past_key_values.get_max_length()
             else:
                 cache_length = past_length = past_key_values[0][0].shape[2]
                 max_cache_length = None

             if isinstance(past_key_values, Cache):
                 cache_length = past_key_values.get_seq_length()
                 past_length = past_key_values.seen_tokens
+                max_cache_length = past_key_values.get_max_cache_shape()
             else:
                 cache_length = past_length = past_key_values[0][0].shape[2]
                 max_cache_length = None

tokenization_moonshot.py CHANGED Viewed

@@ -12,6 +12,7 @@ from typing import (
     Union,
     Optional,
 )
 from shutil import copyfile
 import numpy as np
 from tiktoken.load import load_tiktoken_bpe
@@ -226,8 +227,10 @@ class TikTokenTokenizer(PreTrainedTokenizer):
         if len(kwargs) > 0:
             return super().decode(token_ids, **kwargs)
-        if type(token_ids) is int:
             token_ids = [token_ids]
         return self.model.decode(cast(List[int], token_ids))

     Union,
     Optional,
 )
+import torch
 from shutil import copyfile
 import numpy as np
 from tiktoken.load import load_tiktoken_bpe
         if len(kwargs) > 0:
             return super().decode(token_ids, **kwargs)
+        if isinstance(token_ids, int):
             token_ids = [token_ids]
+        if isinstance(token_ids, torch.Tensor):
+            token_ids = token_ids.tolist()
         return self.model.decode(cast(List[int], token_ids))