|
--- |
|
library_name: peft |
|
base_model: NousResearch/Llama-2-7b-chat-hf |
|
license: apache-2.0 |
|
datasets: |
|
- mlabonne/guanaco-llama2-1k |
|
pipeline_tag: text-generation |
|
language: |
|
- en |
|
tags: |
|
- qlora |
|
--- |
|
|
|
```py |
|
from peft import AutoPeftModelForCausalLM, LoraConfig, PeftModel |
|
from transformers import AutoTokenizer, pipeline |
|
import torch |
|
|
|
base_model_name = "NousResearch/Llama-2-7b-chat-hf" |
|
qlora_model_adapter = "sartajbhuvaji/llama-2-7b-resonate-v1" |
|
device_map = {"": 0} |
|
|
|
base_model = AutoModelForCausalLM.from_pretrained( |
|
base_model_name, |
|
low_cpu_mem_usage=True, |
|
return_dict=True, |
|
torch_dtype=torch.float16, |
|
device_map=device_map, |
|
) |
|
|
|
model = PeftModel.from_pretrained(base_model, qlora_model_adapter) |
|
model = model.merge_and_unload() |
|
|
|
tokenizer = AutoTokenizer.from_pretrained(base_model_name, trust_remote_code=True) |
|
tokenizer.pad_token = tokenizer.eos_token |
|
tokenizer.padding_side = "right" |
|
|
|
prompt = "What is a large language model?" |
|
pipe = pipeline(task="text-generation", model=model, tokenizer=tokenizer, max_length=2000) |
|
result = pipe(f"<s>[INST] {prompt} [/INST]") |
|
print(result[0]['generated_text']) |
|
``` |