Update README.md
Browse files## Problem:
If we do not set the device in the pipeline we first get this warning:
```
UserWarning: You are calling .generate() with the `input_ids` being on a device type different than your model's device. `input_ids` is on cpu, whereas the model is on cuda. You may experience unexpected behaviors or slower generation. Please make sure that you have put `input_ids` to the correct device by calling for example input_ids = input_ids.to('cuda') before running `.generate()`.
```
After that we get the error:
```
RuntimeError: Expected all tensors to be on the same device, but found at least two devices, cuda:0 and cpu! (when checking argument for argument index in method wrapper_CUDA__index_select)
```
## Solution:
Set devices for all models and in the pipeline method to avoid tensors being on different devices.
@@ -105,13 +105,17 @@ pip3 install git+https://github.com/casper-hansen/AutoAWQ.git@1c5ccc791fa2cb0697
|
|
105 |
```python
|
106 |
from awq import AutoAWQForCausalLM
|
107 |
from transformers import AutoTokenizer
|
|
|
|
|
|
|
|
|
108 |
|
109 |
model_name_or_path = "TheBloke/Mistral-7B-v0.1-AWQ"
|
110 |
|
111 |
# Load model
|
112 |
model = AutoAWQForCausalLM.from_quantized(model_name_or_path, fuse_layers=True,
|
113 |
-
trust_remote_code=False, safetensors=True)
|
114 |
-
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path, trust_remote_code=False)
|
115 |
|
116 |
prompt = "Tell me about AI"
|
117 |
prompt_template=f'''{prompt}
|
@@ -152,7 +156,8 @@ pipe = pipeline(
|
|
152 |
temperature=0.7,
|
153 |
top_p=0.95,
|
154 |
top_k=40,
|
155 |
-
repetition_penalty=1.1
|
|
|
156 |
)
|
157 |
|
158 |
print(pipe(prompt_template)[0]['generated_text'])
|
|
|
105 |
```python
|
106 |
from awq import AutoAWQForCausalLM
|
107 |
from transformers import AutoTokenizer
|
108 |
+
import torch
|
109 |
+
|
110 |
+
device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
|
111 |
+
|
112 |
|
113 |
model_name_or_path = "TheBloke/Mistral-7B-v0.1-AWQ"
|
114 |
|
115 |
# Load model
|
116 |
model = AutoAWQForCausalLM.from_quantized(model_name_or_path, fuse_layers=True,
|
117 |
+
trust_remote_code=False, safetensors=True, device=device)
|
118 |
+
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path, trust_remote_code=False, device=device)
|
119 |
|
120 |
prompt = "Tell me about AI"
|
121 |
prompt_template=f'''{prompt}
|
|
|
156 |
temperature=0.7,
|
157 |
top_p=0.95,
|
158 |
top_k=40,
|
159 |
+
repetition_penalty=1.1,
|
160 |
+
device=device
|
161 |
)
|
162 |
|
163 |
print(pipe(prompt_template)[0]['generated_text'])
|