Russal commited on
Commit
4670ca1
1 Parent(s): e114a8f

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +52 -12
README.md CHANGED
@@ -15,20 +15,60 @@ inference: false
15
  Baichuan-13B-Instruction 为 Baichuan-13B 系列模型进行指令微调后的版本,预训练模型可见 [Baichuan-13B-Base](https://huggingface.co/baichuan-inc/Baichuan-13B-Base)。
16
 
17
 
18
- ## 使用方式
19
 
20
- 如下是一个使用Baichuan-13B-Chat进行对话的示例,正确输出为"乔戈里峰。世界第二高峰———乔戈里峰西方登山者称其为k2峰,海拔高度是8611米,位于喀喇昆仑山脉的中巴边境上"
21
  ```python
22
- import torch
23
- from transformers import AutoModelForCausalLM, AutoTokenizer
24
- from transformers.generation.utils import GenerationConfig
25
- tokenizer = AutoTokenizer.from_pretrained("AlpachinoNLP/Baichuan-13B-Instruction", use_fast=False, trust_remote_code=True)
26
- model = AutoModelForCausalLM.from_pretrained("AlpachinoNLP/Baichuan-13B-Instruction", device_map="auto", torch_dtype=torch.float16, trust_remote_code=True)
27
- model.generation_config = GenerationConfig.from_pretrained("AlpachinoNLP/Baichuan-13B-Instruction")
28
- messages = []
29
- messages.append({"role": "Human", "content": "世界上第二高的山峰是哪座"})
30
- response = model.chat(tokenizer, messages)
31
- print(response)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
32
  ```
33
 
34
  ## 量化部署
 
15
  Baichuan-13B-Instruction 为 Baichuan-13B 系列模型进行指令微调后的版本,预训练模型可见 [Baichuan-13B-Base](https://huggingface.co/baichuan-inc/Baichuan-13B-Base)。
16
 
17
 
18
+ ## Demo
19
 
20
+ 如下是一个使用 gradio 的模型 demo"
21
  ```python
22
+ import gradio as gr
23
+ from transformers import AutoTokenizer, AutoModelForCausalLM
24
+
25
+ tokenizer = AutoTokenizer.from_pretrained("AlpachinoNLP/Baichuan-13B-Instruction",trust_remote_code=True,use_fast=False)
26
+ model = AutoModelForCausalLM.from_pretrained("AlpachinoNLP/Baichuan-13B-Instruction",trust_remote_code=True ).half()
27
+ model.cuda()
28
+
29
+ def generate(histories, max_new_tokens=2048, do_sample = True, top_p = 0.95, temperature = 0.35, repetition_penalty=1.1):
30
+ prompt = ""
31
+ for history in histories:
32
+ history_with_identity = "\nHuman:" + history[0] + "\n\nAssistant:" + history[1]
33
+ prompt += history_with_identity
34
+ input_ids = tokenizer(prompt, return_tensors="pt").input_ids.to(model.device)
35
+ outputs = model.generate(
36
+ input_ids = input_ids,
37
+ max_new_tokens=max_new_tokens,
38
+ early_stopping=True,
39
+ do_sample=do_sample,
40
+ top_p=top_p,
41
+ temperature=temperature,
42
+ repetition_penalty=repetition_penalty,
43
+ )
44
+ rets = tokenizer.batch_decode(outputs, skip_special_tokens=True)
45
+ generate_text = rets[0].replace(prompt, "")
46
+ return generate_text
47
+
48
+ with gr.Blocks() as demo:
49
+ chatbot = gr.Chatbot()
50
+ msg = gr.Textbox()
51
+ clear = gr.Button("clear")
52
+
53
+ def user(user_message, history):
54
+ return "", history + [[user_message, ""]]
55
+
56
+ def bot(history):
57
+ print(history)
58
+ bot_message = generate(history)
59
+ history[-1][1] = bot_message
60
+ return history
61
+
62
+ msg.submit(user, [msg, chatbot], [msg, chatbot], queue=False).then(
63
+ bot, chatbot, chatbot
64
+ )
65
+ clear.click(lambda: None, None, chatbot, queue=False)
66
+
67
+ if __name__ == "__main__":
68
+ demo.launch(server_name="0.0.0.0")
69
+
70
+
71
+
72
  ```
73
 
74
  ## 量化部署