zR
commited on
Commit
·
d681afc
1
Parent(s):
fc15412
cuda
Browse files
app.py
CHANGED
@@ -26,7 +26,7 @@ def predict(history, max_length, top_p, temperature):
|
|
26 |
messages = preprocess_messages(history)
|
27 |
model_inputs = tokenizer.apply_chat_template(
|
28 |
messages, add_generation_prompt=True, tokenize=True, return_tensors="pt", return_dict=True
|
29 |
-
)
|
30 |
streamer = TextIteratorStreamer(tokenizer, timeout=60, skip_prompt=True, skip_special_tokens=True)
|
31 |
generate_kwargs = {
|
32 |
"input_ids": model_inputs["input_ids"],
|
|
|
26 |
messages = preprocess_messages(history)
|
27 |
model_inputs = tokenizer.apply_chat_template(
|
28 |
messages, add_generation_prompt=True, tokenize=True, return_tensors="pt", return_dict=True
|
29 |
+
).to(model.device)
|
30 |
streamer = TextIteratorStreamer(tokenizer, timeout=60, skip_prompt=True, skip_special_tokens=True)
|
31 |
generate_kwargs = {
|
32 |
"input_ids": model_inputs["input_ids"],
|