Lin-K76 commited on
Commit
3a04ae8
·
verified ·
1 Parent(s): 0035cbf

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +6 -1
README.md CHANGED
@@ -84,7 +84,12 @@ ds = load_dataset("mgoin/ultrachat_2k", split="train_sft").select(range(512))
84
  examples = [tokenizer.apply_chat_template(batch["messages"], tokenize=False) for batch in ds]
85
  examples = tokenizer(examples, padding=True, truncation=True, return_tensors="pt").to("cuda")
86
 
87
- quantize_config = BaseQuantizeConfig(quant_method="fp8", activation_scheme="static")
 
 
 
 
 
88
 
89
  model = AutoFP8ForCausalLM.from_pretrained(
90
  pretrained_model_dir, quantize_config=quantize_config
 
84
  examples = [tokenizer.apply_chat_template(batch["messages"], tokenize=False) for batch in ds]
85
  examples = tokenizer(examples, padding=True, truncation=True, return_tensors="pt").to("cuda")
86
 
87
+ quantize_config = BaseQuantizeConfig(
88
+ quant_method="fp8",
89
+ activation_scheme="static"
90
+ ignore_patterns=["re:.*lm_head"],
91
+ )
92
+
93
 
94
  model = AutoFP8ForCausalLM.from_pretrained(
95
  pretrained_model_dir, quantize_config=quantize_config