Mrw33554432
/

bitLinear-phi-1.5

Text Generation

text-generation-inference

Inference Endpoints

Model card Files Files and versions Community

Mrw33554432 commited on Apr 12

Commit

9dd1412

•

1 Parent(s): 89f541f

Update bitlinear.py

Files changed (1) hide show

bitlinear.py +2 -2

bitlinear.py CHANGED Viewed

@@ -5,8 +5,8 @@ from torch import Tensor, nn
 def weight_quant(w):
     """
     from https://github.com/microsoft/unilm/blob/master/bitnet/The-Era-of-1-bit-LLMs__Training_Tips_Code_FAQ.pdf,
-    This is a little bit different from paper by adding '/ scale' in the end,
-    which is super crucial for training (7.5 loss vs 2.5)
     """
     scale = 1.0 / w.abs().mean().clamp_(min=1e-5)
     u = (w * scale).round().clamp_(-1, 1) / scale

 def weight_quant(w):
     """
     from https://github.com/microsoft/unilm/blob/master/bitnet/The-Era-of-1-bit-LLMs__Training_Tips_Code_FAQ.pdf,
+    This is a little bit different from paper by adding '/ scale' in the end, as released by the paper author.
+    which is super crucial for training (7.5 loss vs 2.5).
     """
     scale = 1.0 / w.abs().mean().clamp_(min=1e-5)
     u = (w * scale).round().clamp_(-1, 1) / scale