Spaces:

PhantHive
/

Phearion-bigbrain-v0.0.1

Paused

PhantHive commited on Feb 19

Commit

3287b3a

•

1 Parent(s): 8783dd5

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -7,10 +7,17 @@ import torch
 device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
 model_id = "phearion/bigbrain-v0.0.1"
 # Load models and tokenizer efficiently
 config = PeftConfig.from_pretrained("phearion/bigbrain-v0.0.1")
 tokenizer = AutoTokenizer.from_pretrained(config.base_model_name_or_path)
-model = AutoModelForCausalLM.from_pretrained(config.base_model_name_or_path)
 # Load the Lora model
 model = PeftModel.from_pretrained(model, model_id)

 device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
 model_id = "phearion/bigbrain-v0.0.1"
+bnb_config = BitsAndBytesConfig(
+            load_in_4bit=True,
+            bnb_4bit_use_double_quant=True,
+            bnb_4bit_quant_type="nf4",
+            bnb_4bit_compute_dtype=torch.bfloat16
+        )
 # Load models and tokenizer efficiently
 config = PeftConfig.from_pretrained("phearion/bigbrain-v0.0.1")
 tokenizer = AutoTokenizer.from_pretrained(config.base_model_name_or_path)
+model = AutoModelForCausalLM.from_pretrained(config.base_model_name_or_path, quantization_config=bnb_config)
 # Load the Lora model
 model = PeftModel.from_pretrained(model, model_id)