|
--- |
|
base_model: Qwen/Qwen2-0.5B-Instruct |
|
dataset: timdettmers/openassistant-guanaco |
|
library_name: peft |
|
--- |
|
|
|
# Model Card for Model ID |
|
|
|
Qwen2-0.5B fine-tuned with qlora using the openassistant-guanaco dataset |
|
|
|
|
|
# quickstart |
|
In google colab on the free-tier GPU |
|
|
|
|
|
```python |
|
|
|
!pip install transformers accelerate bitsandbytes peft |
|
|
|
from peft import PeftModel, PeftConfig |
|
import torch |
|
from transformers import AutoTokenizer, AutoModelForCausalLM |
|
|
|
config = PeftConfig.from_pretrained("SeppeHousen/qwen-0.5B-qlora-guanaco") |
|
base_model = AutoModelForCausalLM.from_pretrained("Qwen/Qwen2-0.5B-Instruct") |
|
model = PeftModel.from_pretrained(base_model, "SeppeHousen/qwen-0.5B-qlora-guanaco") |
|
tokenizer = AutoTokenizer.from_pretrained("SeppeHousen/qwen-0.5B-qlora-guanaco") |
|
|
|
messages = [ |
|
{ |
|
"role": "system", |
|
"content": "You are a friendly chatbot who is always happy to help", |
|
}, |
|
{"role": "user", "content": "Arrr, tell me what the weather be like today!"}, |
|
] |
|
|
|
|
|
input_ids = tokenizer.apply_chat_template(messages, truncation=True, add_generation_prompt=True, return_tensors="pt").to('cuda') |
|
model.to('cuda') |
|
|
|
|
|
outputs = model.generate( |
|
input_ids=input_ids, |
|
max_new_tokens=512, |
|
do_sample=True, |
|
temperature=0.7, |
|
top_k=50, |
|
top_p=0.95 |
|
) |
|
print(tokenizer.batch_decode(outputs, skip_special_tokens=True)[0]) |
|
``` |