Update config.json for Transformers GPTQ support

Browse files

Files changed (1) hide show

config.json +29 -1

config.json CHANGED Viewed

@@ -31,6 +31,34 @@
     "vocab_size": 32000,
     "quantization_config": {
         "bits": 4,
         "group_size": -1,
         "damp_percent": 0.1,
         "desc_act": true,
@@ -40,4 +68,4 @@
         "model_file_base_name": "model",
         "quant_method": "gptq"
     }
-}

     "vocab_size": 32000,
     "quantization_config": {
         "bits": 4,
+	"modules_in_block_to_quantize" :[
+        ["self_attn.k_proj", "self_attn.v_proj", "self_attn.q_proj"],
+        ["self_attn.o_proj"],
+        [
+            "block_sparse_moe.experts.0.w1",
+            "block_sparse_moe.experts.1.w1",
+            "block_sparse_moe.experts.2.w1",
+            "block_sparse_moe.experts.3.w1",
+            "block_sparse_moe.experts.4.w1",
+            "block_sparse_moe.experts.5.w1",
+            "block_sparse_moe.experts.6.w1",
+            "block_sparse_moe.experts.7.w1",
+            "block_sparse_moe.experts.0.w3",
+            "block_sparse_moe.experts.1.w3",
+            "block_sparse_moe.experts.2.w3",
+            "block_sparse_moe.experts.3.w3",
+            "block_sparse_moe.experts.4.w3",
+            "block_sparse_moe.experts.5.w3",
+            "block_sparse_moe.experts.6.w3",
+            "block_sparse_moe.experts.7.w3" ],
+        [ "block_sparse_moe.experts.0.w2",
+            "block_sparse_moe.experts.1.w2",
+            "block_sparse_moe.experts.2.w2",
+            "block_sparse_moe.experts.3.w2",
+            "block_sparse_moe.experts.4.w2",
+            "block_sparse_moe.experts.5.w2",
+            "block_sparse_moe.experts.6.w2",
+            "block_sparse_moe.experts.7.w2" ] ],
         "group_size": -1,
         "damp_percent": 0.1,
         "desc_act": true,
         "model_file_base_name": "model",
         "quant_method": "gptq"
     }
+}