Update app.py
Browse files
app.py
CHANGED
@@ -7,10 +7,17 @@ import torch
|
|
7 |
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
8 |
model_id = "phearion/bigbrain-v0.0.1"
|
9 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
10 |
# Load models and tokenizer efficiently
|
11 |
config = PeftConfig.from_pretrained("phearion/bigbrain-v0.0.1")
|
12 |
tokenizer = AutoTokenizer.from_pretrained(config.base_model_name_or_path)
|
13 |
-
model = AutoModelForCausalLM.from_pretrained(config.base_model_name_or_path)
|
14 |
|
15 |
# Load the Lora model
|
16 |
model = PeftModel.from_pretrained(model, model_id)
|
|
|
7 |
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
8 |
model_id = "phearion/bigbrain-v0.0.1"
|
9 |
|
10 |
+
bnb_config = BitsAndBytesConfig(
|
11 |
+
load_in_4bit=True,
|
12 |
+
bnb_4bit_use_double_quant=True,
|
13 |
+
bnb_4bit_quant_type="nf4",
|
14 |
+
bnb_4bit_compute_dtype=torch.bfloat16
|
15 |
+
)
|
16 |
+
|
17 |
# Load models and tokenizer efficiently
|
18 |
config = PeftConfig.from_pretrained("phearion/bigbrain-v0.0.1")
|
19 |
tokenizer = AutoTokenizer.from_pretrained(config.base_model_name_or_path)
|
20 |
+
model = AutoModelForCausalLM.from_pretrained(config.base_model_name_or_path, quantization_config=bnb_config)
|
21 |
|
22 |
# Load the Lora model
|
23 |
model = PeftModel.from_pretrained(model, model_id)
|