Update config.json for Transformers GPTQ support
Browse files- config.json +29 -1
config.json
CHANGED
@@ -31,6 +31,34 @@
|
|
31 |
"vocab_size": 32000,
|
32 |
"quantization_config": {
|
33 |
"bits": 4,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
34 |
"group_size": -1,
|
35 |
"damp_percent": 0.1,
|
36 |
"desc_act": true,
|
@@ -40,4 +68,4 @@
|
|
40 |
"model_file_base_name": "model",
|
41 |
"quant_method": "gptq"
|
42 |
}
|
43 |
-
}
|
|
|
31 |
"vocab_size": 32000,
|
32 |
"quantization_config": {
|
33 |
"bits": 4,
|
34 |
+
"modules_in_block_to_quantize" :[
|
35 |
+
["self_attn.k_proj", "self_attn.v_proj", "self_attn.q_proj"],
|
36 |
+
["self_attn.o_proj"],
|
37 |
+
[
|
38 |
+
"block_sparse_moe.experts.0.w1",
|
39 |
+
"block_sparse_moe.experts.1.w1",
|
40 |
+
"block_sparse_moe.experts.2.w1",
|
41 |
+
"block_sparse_moe.experts.3.w1",
|
42 |
+
"block_sparse_moe.experts.4.w1",
|
43 |
+
"block_sparse_moe.experts.5.w1",
|
44 |
+
"block_sparse_moe.experts.6.w1",
|
45 |
+
"block_sparse_moe.experts.7.w1",
|
46 |
+
"block_sparse_moe.experts.0.w3",
|
47 |
+
"block_sparse_moe.experts.1.w3",
|
48 |
+
"block_sparse_moe.experts.2.w3",
|
49 |
+
"block_sparse_moe.experts.3.w3",
|
50 |
+
"block_sparse_moe.experts.4.w3",
|
51 |
+
"block_sparse_moe.experts.5.w3",
|
52 |
+
"block_sparse_moe.experts.6.w3",
|
53 |
+
"block_sparse_moe.experts.7.w3" ],
|
54 |
+
[ "block_sparse_moe.experts.0.w2",
|
55 |
+
"block_sparse_moe.experts.1.w2",
|
56 |
+
"block_sparse_moe.experts.2.w2",
|
57 |
+
"block_sparse_moe.experts.3.w2",
|
58 |
+
"block_sparse_moe.experts.4.w2",
|
59 |
+
"block_sparse_moe.experts.5.w2",
|
60 |
+
"block_sparse_moe.experts.6.w2",
|
61 |
+
"block_sparse_moe.experts.7.w2" ] ],
|
62 |
"group_size": -1,
|
63 |
"damp_percent": 0.1,
|
64 |
"desc_act": true,
|
|
|
68 |
"model_file_base_name": "model",
|
69 |
"quant_method": "gptq"
|
70 |
}
|
71 |
+
}
|