Spaces:

hsuwill000
/

Qwen2.5-0.5B-Instruct-openvino-4bit

Runtime error

App Files Files Community

hsuwill000 commited on Feb 5

Commit

79cac17

verified ·

1 Parent(s): 24dfad1

Update app.py

Browse files

Files changed (1) hide show

app.py +26 -7

app.py CHANGED Viewed

@@ -9,20 +9,39 @@ model = OVModelForCausalLM.from_pretrained(model_id, device_map="auto")
 tokenizer = AutoTokenizer.from_pretrained(model_id)
 # 建立生成管道
-pipe = pipeline("text-generation", model=model, tokenizer=tokenizer)
 def respond(message, history):
     # 將當前訊息與歷史訊息合併
-    input_text = message if not history else history[-1]["content"] + " " + message
-    input_text = message+",(450字內回覆)"
     # 獲取模型的回應
-    response = pipe(input_text, max_length=512, truncation=True, num_return_sequences=1)
-    reply = response[0]['generated_text']
     # 返回新的消息格式
     print(f"Message: {message}")
-    print(f"Reply: {reply}")
-    return reply
 # 設定 Gradio 的聊天界面
 demo = gr.ChatInterface(fn=respond, title="Qwen2.5-3B-Instruct-openvino", description="Qwen2.5-3B-Instruct-openvino", type='messages')

 tokenizer = AutoTokenizer.from_pretrained(model_id)
 # 建立生成管道
+#pipe = pipeline("text-generation", model=model, tokenizer=tokenizer)
 def respond(message, history):
     # 將當前訊息與歷史訊息合併
+    #input_text = message if not history else history[-1]["content"] + " " + message
+    #input_text = message+",(450字內回覆)"
+    input_text = [
+    {"role": "system", "content": "You are Qwen, created by Alibaba Cloud. You are a helpful assistant."},
+    {"role": "user", "content": message}
+    ]
+    text = tokenizer.apply_chat_template(
+        messages,
+        tokenize=False,
+        add_generation_prompt=True
+    )
+    model_inputs = tokenizer([text], return_tensors="pt").to(model.device)
+    generated_ids = model.generate(
+        **model_inputs,
+        max_new_tokens=512
+    )
     # 獲取模型的回應
+    #response = pipe(input_text, max_length=512, truncation=True, num_return_sequences=1)
+    #reply = response[0]['generated_text']
+    generated_ids = [
+        output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
+    ]
+    response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
     # 返回新的消息格式
     print(f"Message: {message}")
+    print(f"Reply: {response}")
+    return response
 # 設定 Gradio 的聊天界面
 demo = gr.ChatInterface(fn=respond, title="Qwen2.5-3B-Instruct-openvino", description="Qwen2.5-3B-Instruct-openvino", type='messages')