how to conduct exl2 measurements?

#1
by icoderzqliu - opened

What's the detail of conducting exl2 measurements?

Just measure as usual:

python convert.py -i model_dir -o tmp_dir -om model_dir/measurement.json

Then list the layer bpw for the top solution:

diff --git a/conversion/optimize.py b/conversion/optimize.py
index b346855..6d1ebf6 100644
--- a/conversion/optimize.py
+++ b/conversion/optimize.py
@@ -1,6 +1,7 @@
 from conversion.qparams import QParams
 import math
 import itertools
+import json

 def optimize(job, save_fn, model):

@@ -207,6 +208,7 @@ def optimize(job, save_fn, model):
     print(" -- Quantization strategy:")

     job["strategy"] = {}
+    layer_bpw = {}
     for layer_ in range(num_layers):

         k1 = "model.layers." + str(layer_) + ".self_attn"
@@ -214,10 +216,23 @@ def optimize(job, save_fn, model):
         p1 = params[layer_ * 2][f_solution[layer_ * 2]]
         p2 = params[layer_ * 2 + 1][f_solution[layer_ * 2 + 1]]

+        total_bpw = 0
         for (k, p, n) in zip((k1, k2), (p1, p2), (numel_attn, numel_mlp)):
             job["strategy"][k] = p
             bpw = p["total_bits"] / n
+            total_bpw += bpw
             err = 1 - p["accuracy"]
             print(f" --   {k:50} {bpw:1.4f} bpw - exp. error: {err:1.8f}")
-
-    xx = 0
\ No newline at end of file
+        layer_bpw[layer_] = total_bpw
+
+    t = 0
+    best = []
+    layers = []
+    for i, bpw in reversed(sorted(layer_bpw.items(), key=lambda x: x[1])):
+        print(f"Layer: {i} {bpw}")
+        layers.append(i)
+
+    with open('layer_rank.json', 'w') as f:
+        json.dump(layers, f)
+    xx = 0

Just measure as usual:

python convert.py -i model_dir -o tmp_dir -om model_dir/measurement.json

Then list the layer bpw for the top solution:

diff --git a/conversion/optimize.py b/conversion/optimize.py
index b346855..6d1ebf6 100644
--- a/conversion/optimize.py
+++ b/conversion/optimize.py
@@ -1,6 +1,7 @@
 from conversion.qparams import QParams
 import math
 import itertools
+import json

 def optimize(job, save_fn, model):

@@ -207,6 +208,7 @@ def optimize(job, save_fn, model):
     print(" -- Quantization strategy:")

     job["strategy"] = {}
+    layer_bpw = {}
     for layer_ in range(num_layers):

         k1 = "model.layers." + str(layer_) + ".self_attn"
@@ -214,10 +216,23 @@ def optimize(job, save_fn, model):
         p1 = params[layer_ * 2][f_solution[layer_ * 2]]
         p2 = params[layer_ * 2 + 1][f_solution[layer_ * 2 + 1]]

+        total_bpw = 0
         for (k, p, n) in zip((k1, k2), (p1, p2), (numel_attn, numel_mlp)):
             job["strategy"][k] = p
             bpw = p["total_bits"] / n
+            total_bpw += bpw
             err = 1 - p["accuracy"]
             print(f" --   {k:50} {bpw:1.4f} bpw - exp. error: {err:1.8f}")
-
-    xx = 0
\ No newline at end of file
+        layer_bpw[layer_] = total_bpw
+
+    t = 0
+    best = []
+    layers = []
+    for i, bpw in reversed(sorted(layer_bpw.items(), key=lambda x: x[1])):
+        print(f"Layer: {i} {bpw}")
+        layers.append(i)
+
+    with open('layer_rank.json', 'w') as f:
+        json.dump(layers, f)
+    xx = 0

Thank you for your reply! Where can i get this convert.py file?

It's part of exllamav2: https://github.com/turboderp/exllamav2

Sign up or log in to comment