Update README.md
Browse files
README.md
CHANGED
@@ -22,9 +22,11 @@ model_id = 'mobiuslabsgmbh/Mixtral-8x7B-Instruct-v0.1-hf-attn-4bit-moe-2bit-meta
|
|
22 |
from hqq.engine.hf import HQQModelForCausalLM, AutoTokenizer
|
23 |
tokenizer = AutoTokenizer.from_pretrained(model_id)
|
24 |
model = HQQModelForCausalLM.from_quantized(model_id)
|
25 |
-
|
|
|
26 |
from hqq.core.quantize import *
|
27 |
-
HQQLinear.set_backend(HQQBackend.
|
|
|
28 |
#Text Generation
|
29 |
prompt = "<s> [INST] How do I build a car? [/INST] "
|
30 |
inputs = tokenizer(prompt, return_tensors="pt", add_special_tokens=False)
|
@@ -64,4 +66,5 @@ quant_config['block_sparse_moe.experts.w3'] = experts_params
|
|
64 |
#Quantize
|
65 |
model.quantize_model(quant_config=quant_config, compute_dtype=torch.float16);
|
66 |
model.eval();
|
67 |
-
```
|
|
|
|
22 |
from hqq.engine.hf import HQQModelForCausalLM, AutoTokenizer
|
23 |
tokenizer = AutoTokenizer.from_pretrained(model_id)
|
24 |
model = HQQModelForCausalLM.from_quantized(model_id)
|
25 |
+
|
26 |
+
#Optional: set backend/compile
|
27 |
from hqq.core.quantize import *
|
28 |
+
HQQLinear.set_backend(HQQBackend.ATEN_BACKPROP)
|
29 |
+
|
30 |
#Text Generation
|
31 |
prompt = "<s> [INST] How do I build a car? [/INST] "
|
32 |
inputs = tokenizer(prompt, return_tensors="pt", add_special_tokens=False)
|
|
|
66 |
#Quantize
|
67 |
model.quantize_model(quant_config=quant_config, compute_dtype=torch.float16);
|
68 |
model.eval();
|
69 |
+
```
|
70 |
+
|