Crystalcareai
/

GemMoE-Beta-1

Text Generation

Model card Files Files and versions Community

Crystalcareai commited on Mar 13, 2024

Commit

1318de9

·

verified ·

1 Parent(s): cfc4ccd

Update modeling_gemmoe.py

Files changed (1) hide show

modeling_gemmoe.py +3 -3

modeling_gemmoe.py CHANGED Viewed

@@ -702,10 +702,10 @@ class GemmoeSparseMoeBlock(nn.Module):
             expert_indices = (selected_experts[token_indices] == expert_idx).nonzero(as_tuple=True)[1]
             current_hidden_states *= top_routing_weights[token_indices, expert_indices, None]
-            final_hidden_states.index_add_(0, token_indices, current_hidden_states)
-        final_hidden_states = final_hidden_states.reshape(batch_size, sequence_length, hidden_dim)
-        return final_hidden_states, router_logits
 class GemmoeDecoderLayer(nn.Module):

             expert_indices = (selected_experts[token_indices] == expert_idx).nonzero(as_tuple=True)[1]
             current_hidden_states *= top_routing_weights[token_indices, expert_indices, None]
+            # Cast current_hidden_states to the same data type as final_hidden_states
+            current_hidden_states = current_hidden_states.to(final_hidden_states.dtype)
+            final_hidden_states.index_add_(0, token_indices, current_hidden_states)
 class GemmoeDecoderLayer(nn.Module):