Mrw33554432
commited on
Commit
•
9dd1412
1
Parent(s):
89f541f
Update bitlinear.py
Browse files- bitlinear.py +2 -2
bitlinear.py
CHANGED
@@ -5,8 +5,8 @@ from torch import Tensor, nn
|
|
5 |
def weight_quant(w):
|
6 |
"""
|
7 |
from https://github.com/microsoft/unilm/blob/master/bitnet/The-Era-of-1-bit-LLMs__Training_Tips_Code_FAQ.pdf,
|
8 |
-
This is a little bit different from paper by adding '/ scale' in the end,
|
9 |
-
which is super crucial for training (7.5 loss vs 2.5)
|
10 |
"""
|
11 |
scale = 1.0 / w.abs().mean().clamp_(min=1e-5)
|
12 |
u = (w * scale).round().clamp_(-1, 1) / scale
|
|
|
5 |
def weight_quant(w):
|
6 |
"""
|
7 |
from https://github.com/microsoft/unilm/blob/master/bitnet/The-Era-of-1-bit-LLMs__Training_Tips_Code_FAQ.pdf,
|
8 |
+
This is a little bit different from paper by adding '/ scale' in the end, as released by the paper author.
|
9 |
+
which is super crucial for training (7.5 loss vs 2.5).
|
10 |
"""
|
11 |
scale = 1.0 / w.abs().mean().clamp_(min=1e-5)
|
12 |
u = (w * scale).round().clamp_(-1, 1) / scale
|