zRzRzRzRzRzRzR
commited on
Commit
•
269b8ba
1
Parent(s):
8dc9e30
Update README.md
Browse files
README.md
CHANGED
@@ -106,7 +106,7 @@ with torch.no_grad():
|
|
106 |
print(tokenizer.decode(outputs[0], skip_special_tokens=True))
|
107 |
```
|
108 |
|
109 |
-
使用
|
110 |
|
111 |
```python
|
112 |
from transformers import AutoTokenizer
|
@@ -116,9 +116,6 @@ from vllm import LLM, SamplingParams
|
|
116 |
# max_model_len, tp_size = 1048576, 4
|
117 |
|
118 |
# GLM-4-9B-Chat
|
119 |
-
from transformers import AutoTokenizer
|
120 |
-
from vllm import LLM, SamplingParams
|
121 |
-
|
122 |
# 如果遇见 OOM 现象,建议减少max_model_len,或者增加tp_size
|
123 |
max_model_len, tp_size = 131072, 1
|
124 |
model_name = "THUDM/glm-4-9b-chat"
|
|
|
106 |
print(tokenizer.decode(outputs[0], skip_special_tokens=True))
|
107 |
```
|
108 |
|
109 |
+
使用 vLLM后端进行推理:
|
110 |
|
111 |
```python
|
112 |
from transformers import AutoTokenizer
|
|
|
116 |
# max_model_len, tp_size = 1048576, 4
|
117 |
|
118 |
# GLM-4-9B-Chat
|
|
|
|
|
|
|
119 |
# 如果遇见 OOM 现象,建议减少max_model_len,或者增加tp_size
|
120 |
max_model_len, tp_size = 131072, 1
|
121 |
model_name = "THUDM/glm-4-9b-chat"
|