shibing624
commited on
Commit
•
ee443b3
1
Parent(s):
1fa90a2
Update README.md
Browse files
README.md
CHANGED
@@ -80,7 +80,7 @@ The following `bitsandbytes` quantization config was used during training:
|
|
80 |
- [shibing624/textgen](https://github.com/shibing624/textgen)
|
81 |
- [shibing624/MedicalGPT](https://github.com/shibing624/MedicalGPT)
|
82 |
|
83 |
-
使用textgen库:[textgen](https://github.com/shibing624/textgen),可调用LLaMA模型:
|
84 |
|
85 |
Install package:
|
86 |
```shell
|
@@ -114,10 +114,10 @@ import torch
|
|
114 |
from transformers import AutoModelForCausalLM, AutoTokenizer, GenerationConfig
|
115 |
|
116 |
|
117 |
-
model = AutoModelForCausalLM.from_pretrained("baichuan-inc/Baichuan-13B-Chat", device_map='auto', trust_remote_code=True)
|
118 |
model.generation_config = GenerationConfig.from_pretrained("baichuan-inc/Baichuan-13B-Chat", trust_remote_code=True)
|
119 |
tokenizer = AutoTokenizer.from_pretrained("baichuan-inc/Baichuan-13B-Chat", trust_remote_code=True)
|
120 |
-
device =
|
121 |
|
122 |
def generate_prompt(instruction):
|
123 |
return f"""A chat between a curious user and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the user's questions.USER: {instruction} ASSISTANT: """
|
@@ -129,9 +129,9 @@ for s in sents:
|
|
129 |
inputs = tokenizer(q, return_tensors="pt")
|
130 |
inputs = inputs.to(device)
|
131 |
|
132 |
-
generate_ids =
|
133 |
**inputs,
|
134 |
-
max_new_tokens=
|
135 |
)
|
136 |
|
137 |
output = tokenizer.batch_decode(generate_ids, skip_special_tokens=True)[0]
|
@@ -164,7 +164,7 @@ vicuna-baichuan-13b-chat
|
|
164 |
└── tokenizer.model
|
165 |
```
|
166 |
|
167 |
-
|
168 |
### Inference Examples
|
169 |
|
170 |
|
|
|
80 |
- [shibing624/textgen](https://github.com/shibing624/textgen)
|
81 |
- [shibing624/MedicalGPT](https://github.com/shibing624/MedicalGPT)
|
82 |
|
83 |
+
使用textgen库:[textgen](https://github.com/shibing624/textgen),可调用Baichuan/LLaMA模型:
|
84 |
|
85 |
Install package:
|
86 |
```shell
|
|
|
114 |
from transformers import AutoModelForCausalLM, AutoTokenizer, GenerationConfig
|
115 |
|
116 |
|
117 |
+
model = AutoModelForCausalLM.from_pretrained("baichuan-inc/Baichuan-13B-Chat", device_map='auto', torch_dtype=torch.float16, trust_remote_code=True)
|
118 |
model.generation_config = GenerationConfig.from_pretrained("baichuan-inc/Baichuan-13B-Chat", trust_remote_code=True)
|
119 |
tokenizer = AutoTokenizer.from_pretrained("baichuan-inc/Baichuan-13B-Chat", trust_remote_code=True)
|
120 |
+
device = torch.device(0) if torch.cuda.is_available() else torch.device("cpu")
|
121 |
|
122 |
def generate_prompt(instruction):
|
123 |
return f"""A chat between a curious user and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the user's questions.USER: {instruction} ASSISTANT: """
|
|
|
129 |
inputs = tokenizer(q, return_tensors="pt")
|
130 |
inputs = inputs.to(device)
|
131 |
|
132 |
+
generate_ids = model.generate(
|
133 |
**inputs,
|
134 |
+
max_new_tokens=512,
|
135 |
)
|
136 |
|
137 |
output = tokenizer.batch_decode(generate_ids, skip_special_tokens=True)[0]
|
|
|
164 |
└── tokenizer.model
|
165 |
```
|
166 |
|
167 |
+
- Inference GPU: 27G
|
168 |
### Inference Examples
|
169 |
|
170 |
|