Crystalcareai
/

GemMoE-Beta-1

Text Generation

Model card Files Files and versions Community

Crystalcareai commited on Mar 13, 2024

Commit

cf62fb2

·

verified ·

1 Parent(s): da3db13

Update modeling_gemmoe.py

Files changed (1) hide show

modeling_gemmoe.py +12 -1

modeling_gemmoe.py CHANGED Viewed

@@ -553,6 +553,17 @@ class GemmoeSdpaAttention(GemmoeAttention):
     SDPA API.
     """
     def forward(
         self,
         hidden_states: torch.Tensor,
@@ -578,7 +589,7 @@ class GemmoeSdpaAttention(GemmoeAttention):
                 output_attentions=output_attentions,
                 use_cache=use_cache,
                 cache_position=cache_position,
-			),
         bsz, q_len, _ = hidden_states.size()

     SDPA API.
     """
+    def repeat_kv(self, x, n_rep):
+        """
+        This is the equivalent of torch.repeat_interleave(x, dim=1, repeats=n_rep). The hidden states go from (batch,
+        num_key_value_heads, seqlen, head_dim) to (batch, num_attention_heads, seqlen, head_dim)
+        """
+        batch, num_key_value_heads, slen, head_dim = x.shape
+        if n_rep == 1:
+            return x
+        x = x[:, :, None, :, :].expand(batch, num_key_value_heads, n_rep, slen, head_dim)
+        return x.reshape(batch, num_key_value_heads * n_rep, slen, head_dim)
     def forward(
         self,
         hidden_states: torch.Tensor,
                 output_attentions=output_attentions,
                 use_cache=use_cache,
                 cache_position=cache_position,
+            )
         bsz, q_len, _ = hidden_states.size()