Mrw33554432
/

bitLinear-phi-1.5

Text Generation

text-generation-inference

Inference Endpoints

Model card Files Files and versions Community

Mrw33554432 commited on Apr 12

Commit

89f541f

•

1 Parent(s): d013df6

Upload 2 files

Files changed (2) hide show

bitlinear.py +31 -0
replace_hf.py +49 -0

bitlinear.py ADDED Viewed

	@@ -0,0 +1,31 @@

+import torch.nn.functional as F
+from torch import Tensor, nn
+def weight_quant(w):
+    """
+    from https://github.com/microsoft/unilm/blob/master/bitnet/The-Era-of-1-bit-LLMs__Training_Tips_Code_FAQ.pdf,
+    This is a little bit different from paper by adding '/ scale' in the end,
+    which is super crucial for training (7.5 loss vs 2.5)
+    """
+    scale = 1.0 / w.abs().mean().clamp_(min=1e-5)
+    u = (w * scale).round().clamp_(-1, 1) / scale
+    return u
+class BitLinear(nn.Linear):
+    """
+    A modified version of bit linear, only apply bit quant to weight.
+    """
+    def forward(self, x: Tensor) -> Tensor:
+        """
+        Forward pass of the BitLinear layer, applying quantization to weights.
+        Args:
+            x (Tensor): The input tensor.
+        Returns:
+            Tensor: The output tensor.
+        """
+        w = self.weight
+        w_quant = w + (weight_quant(w) - w).detach()  # Apply quantization adjustments
+        return F.linear(x, w_quant, self.bias)

replace_hf.py ADDED Viewed

	@@ -0,0 +1,49 @@

+import gc
+import torch
+from torch import nn
+from bitlinear import BitLinear
+# Adapt from https://github.com/kyegomez/BitNet/blob/main/bitnet/replace_hf.py
+def replace_linear_in_hf(model, keep_param: bool):
+    """
+    Replaces all instances of nn.Linear in the given model with BitLinear, except lm_head.
+    Args:
+        model (nn.Module): The model to modify.
+    Returns:
+        None
+        :param model: The model to modify.
+        :param keep_param: if ture, the model will keep param from the initial model.
+        if false, the model will be using random init weight (For training)
+    """
+    for name, module in model.named_children():
+        if isinstance(module, nn.Linear):
+            if 'head' in name:
+                continue
+            # Create a new BitLinear layer with random parameters
+            bit_linear = BitLinear(
+                in_features=module.in_features,
+                out_features=module.out_features,
+                bias=module.bias is not None,
+            )
+            if keep_param:
+                # Transfer the weights and bias from the original nn.Linear to the new BitLinear
+                bit_linear.weight.data.copy_(module.weight.data)
+                if module.bias is not None:
+                    bit_linear.bias.data.copy_(module.bias.data)
+            del module
+            # Replace the nn.Linear with the new BitLinear
+            setattr(model, name, bit_linear)
+        else:
+            # Recursively apply to child modules
+            replace_linear_in_hf(module, keep_param)
+    gc.collect()
+    if torch.cuda.is_available():
+        torch.cuda.empty_cache()