hsuwill000 commited on
Commit
79cac17
·
verified ·
1 Parent(s): 24dfad1

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +26 -7
app.py CHANGED
@@ -9,20 +9,39 @@ model = OVModelForCausalLM.from_pretrained(model_id, device_map="auto")
9
  tokenizer = AutoTokenizer.from_pretrained(model_id)
10
 
11
  # 建立生成管道
12
- pipe = pipeline("text-generation", model=model, tokenizer=tokenizer)
13
 
14
  def respond(message, history):
15
  # 將當前訊息與歷史訊息合併
16
- input_text = message if not history else history[-1]["content"] + " " + message
17
- input_text = message+",(450字內回覆)"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
18
  # 獲取模型的回應
19
- response = pipe(input_text, max_length=512, truncation=True, num_return_sequences=1)
20
- reply = response[0]['generated_text']
 
 
 
 
21
 
22
  # 返回新的消息格式
23
  print(f"Message: {message}")
24
- print(f"Reply: {reply}")
25
- return reply
26
 
27
  # 設定 Gradio 的聊天界面
28
  demo = gr.ChatInterface(fn=respond, title="Qwen2.5-3B-Instruct-openvino", description="Qwen2.5-3B-Instruct-openvino", type='messages')
 
9
  tokenizer = AutoTokenizer.from_pretrained(model_id)
10
 
11
  # 建立生成管道
12
+ #pipe = pipeline("text-generation", model=model, tokenizer=tokenizer)
13
 
14
  def respond(message, history):
15
  # 將當前訊息與歷史訊息合併
16
+ #input_text = message if not history else history[-1]["content"] + " " + message
17
+ #input_text = message+",(450字內回覆)"
18
+ input_text = [
19
+ {"role": "system", "content": "You are Qwen, created by Alibaba Cloud. You are a helpful assistant."},
20
+ {"role": "user", "content": message}
21
+ ]
22
+ text = tokenizer.apply_chat_template(
23
+ messages,
24
+ tokenize=False,
25
+ add_generation_prompt=True
26
+ )
27
+ model_inputs = tokenizer([text], return_tensors="pt").to(model.device)
28
+ generated_ids = model.generate(
29
+ **model_inputs,
30
+ max_new_tokens=512
31
+ )
32
+
33
  # 獲取模型的回應
34
+ #response = pipe(input_text, max_length=512, truncation=True, num_return_sequences=1)
35
+ #reply = response[0]['generated_text']
36
+ generated_ids = [
37
+ output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
38
+ ]
39
+ response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
40
 
41
  # 返回新的消息格式
42
  print(f"Message: {message}")
43
+ print(f"Reply: {response}")
44
+ return response
45
 
46
  # 設定 Gradio 的聊天界面
47
  demo = gr.ChatInterface(fn=respond, title="Qwen2.5-3B-Instruct-openvino", description="Qwen2.5-3B-Instruct-openvino", type='messages')