Spaces:
Runtime error
Runtime error
Commit
·
347544f
1
Parent(s):
69a54ae
Added accelerate
Browse files
app.py
CHANGED
@@ -6,6 +6,7 @@ import gradio as gr
|
|
6 |
# Loading PEFT model
|
7 |
PEFT_MODEL = "gunjanjoshi/llama2-7b-sharded-bf16-finetuned-mental-health-conversational"
|
8 |
|
|
|
9 |
bnb_config = BitsAndBytesConfig(
|
10 |
load_in_4bit=True,
|
11 |
bnb_4bit_quant_type="nf4",
|
@@ -18,7 +19,7 @@ peft_base_model = AutoModelForCausalLM.from_pretrained(
|
|
18 |
config.base_model_name_or_path,
|
19 |
return_dict=True,
|
20 |
quantization_config=bnb_config,
|
21 |
-
device_map="
|
22 |
trust_remote_code=True,
|
23 |
)
|
24 |
|
@@ -35,9 +36,9 @@ system_message = """You are a helpful and and truthful psychology and psychother
|
|
35 |
|
36 |
def generate_response(user_input):
|
37 |
formatted = f"<s>[INST] <<SYS>>{system_message}<</SYS>>{user_input} [/INST]"
|
38 |
-
input_ids = peft_tokenizer(formatted, return_tensors="pt", truncation=True, max_length=1024).input_ids
|
39 |
outputs = peft_model.generate(input_ids=input_ids, do_sample=True, top_p=0.9, temperature=0.95, max_length=2048)
|
40 |
-
translated_output = peft_tokenizer.batch_decode(outputs.detach().
|
41 |
return translated_output
|
42 |
|
43 |
with gr.Blocks() as demo:
|
|
|
6 |
# Loading PEFT model
|
7 |
PEFT_MODEL = "gunjanjoshi/llama2-7b-sharded-bf16-finetuned-mental-health-conversational"
|
8 |
|
9 |
+
# Modify BitsAndBytesConfig for CPU
|
10 |
bnb_config = BitsAndBytesConfig(
|
11 |
load_in_4bit=True,
|
12 |
bnb_4bit_quant_type="nf4",
|
|
|
19 |
config.base_model_name_or_path,
|
20 |
return_dict=True,
|
21 |
quantization_config=bnb_config,
|
22 |
+
device_map="cpu", # Ensure this is set to CPU
|
23 |
trust_remote_code=True,
|
24 |
)
|
25 |
|
|
|
36 |
|
37 |
def generate_response(user_input):
|
38 |
formatted = f"<s>[INST] <<SYS>>{system_message}<</SYS>>{user_input} [/INST]"
|
39 |
+
input_ids = peft_tokenizer(formatted, return_tensors="pt", truncation=True, max_length=1024).input_ids
|
40 |
outputs = peft_model.generate(input_ids=input_ids, do_sample=True, top_p=0.9, temperature=0.95, max_length=2048)
|
41 |
+
translated_output = peft_tokenizer.batch_decode(outputs.detach().numpy(), skip_special_tokens=True)[0][len(formatted)-1:]
|
42 |
return translated_output
|
43 |
|
44 |
with gr.Blocks() as demo:
|