mobicham commited on
Commit
3d7f303
1 Parent(s): 6d3c872

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +6 -3
README.md CHANGED
@@ -22,9 +22,11 @@ model_id = 'mobiuslabsgmbh/Mixtral-8x7B-Instruct-v0.1-hf-attn-4bit-moe-2bit-meta
22
  from hqq.engine.hf import HQQModelForCausalLM, AutoTokenizer
23
  tokenizer = AutoTokenizer.from_pretrained(model_id)
24
  model = HQQModelForCausalLM.from_quantized(model_id)
25
- #Optional
 
26
  from hqq.core.quantize import *
27
- HQQLinear.set_backend(HQQBackend.PYTORCH_COMPILE)
 
28
  #Text Generation
29
  prompt = "<s> [INST] How do I build a car? [/INST] "
30
  inputs = tokenizer(prompt, return_tensors="pt", add_special_tokens=False)
@@ -64,4 +66,5 @@ quant_config['block_sparse_moe.experts.w3'] = experts_params
64
  #Quantize
65
  model.quantize_model(quant_config=quant_config, compute_dtype=torch.float16);
66
  model.eval();
67
- ```
 
 
22
  from hqq.engine.hf import HQQModelForCausalLM, AutoTokenizer
23
  tokenizer = AutoTokenizer.from_pretrained(model_id)
24
  model = HQQModelForCausalLM.from_quantized(model_id)
25
+
26
+ #Optional: set backend/compile
27
  from hqq.core.quantize import *
28
+ HQQLinear.set_backend(HQQBackend.ATEN_BACKPROP)
29
+
30
  #Text Generation
31
  prompt = "<s> [INST] How do I build a car? [/INST] "
32
  inputs = tokenizer(prompt, return_tensors="pt", add_special_tokens=False)
 
66
  #Quantize
67
  model.quantize_model(quant_config=quant_config, compute_dtype=torch.float16);
68
  model.eval();
69
+ ```
70
+