bjoernp
/

micro-bitllama

Text Generation

text-generation-inference

Inference Endpoints

Model card Files Files and versions Community

bjoernp commited on Mar 20, 2024

Commit

f3887a8

·

verified ·

1 Parent(s): bf5f905

Update modeling_bitllama.py

Files changed (1) hide show

modeling_bitllama.py +5 -1

modeling_bitllama.py CHANGED Viewed

@@ -253,9 +253,13 @@ def weight_quant(w):
 class BitLinear(nn.Linear):
     def forward(self, x):
         w = self.weight
-        x_norm = LlamaRMSNorm(x)
         x_quant = x_norm + (activation_quant(x_norm) - x_norm).detach()
         w_quant = w + (weight_quant(w) - w).detach()
         return F.linear(x_quant, w_quant)

 class BitLinear(nn.Linear):
+    def __init__(self, in_features, out_features, bias=True):
+        super().__init__(in_features, out_features, bias=bias)
+        self.norm = LlamaRMSNorm(in_features)
     def forward(self, x):
         w = self.weight
+        x_norm = self.norm(x)
         x_quant = x_norm + (activation_quant(x_norm) - x_norm).detach()
         w_quant = w + (weight_quant(w) - w).detach()
         return F.linear(x_quant, w_quant)