huseinzol05
commited on
Commit
•
e2ebb7c
1
Parent(s):
81a4cf5
Upload autoawq-mallam-1.1b.ipynb
Browse files- autoawq-mallam-1.1b.ipynb +547 -0
autoawq-mallam-1.1b.ipynb
ADDED
@@ -0,0 +1,547 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"cells": [
|
3 |
+
{
|
4 |
+
"cell_type": "code",
|
5 |
+
"execution_count": 4,
|
6 |
+
"id": "19fe0df6",
|
7 |
+
"metadata": {},
|
8 |
+
"outputs": [],
|
9 |
+
"source": [
|
10 |
+
"# !pip3 install https://github.com/casper-hansen/AutoAWQ/releases/download/v0.1.8/autoawq-0.1.8+cu118-cp310-cp310-linux_x86_64.whl"
|
11 |
+
]
|
12 |
+
},
|
13 |
+
{
|
14 |
+
"cell_type": "code",
|
15 |
+
"execution_count": 5,
|
16 |
+
"id": "20861f3e",
|
17 |
+
"metadata": {},
|
18 |
+
"outputs": [],
|
19 |
+
"source": [
|
20 |
+
"from awq import AutoAWQForCausalLM\n",
|
21 |
+
"from transformers import AutoConfig, AwqConfig, AutoTokenizer, AutoModelForCausalLM\n",
|
22 |
+
"import torch\n",
|
23 |
+
"\n",
|
24 |
+
"model_path = 'mesolitica/mallam-1.1b-20k-instructions-rag'"
|
25 |
+
]
|
26 |
+
},
|
27 |
+
{
|
28 |
+
"cell_type": "code",
|
29 |
+
"execution_count": 6,
|
30 |
+
"id": "9939ad4e",
|
31 |
+
"metadata": {},
|
32 |
+
"outputs": [
|
33 |
+
{
|
34 |
+
"data": {
|
35 |
+
"application/vnd.jupyter.widget-view+json": {
|
36 |
+
"model_id": "a7408b5508d3488da91b64410fded9f6",
|
37 |
+
"version_major": 2,
|
38 |
+
"version_minor": 0
|
39 |
+
},
|
40 |
+
"text/plain": [
|
41 |
+
"config.json: 0%| | 0.00/655 [00:00<?, ?B/s]"
|
42 |
+
]
|
43 |
+
},
|
44 |
+
"metadata": {},
|
45 |
+
"output_type": "display_data"
|
46 |
+
},
|
47 |
+
{
|
48 |
+
"data": {
|
49 |
+
"application/vnd.jupyter.widget-view+json": {
|
50 |
+
"model_id": "d03be70a280d4dee8f0467dc05eff53e",
|
51 |
+
"version_major": 2,
|
52 |
+
"version_minor": 0
|
53 |
+
},
|
54 |
+
"text/plain": [
|
55 |
+
"model.safetensors: 0%| | 0.00/2.25G [00:00<?, ?B/s]"
|
56 |
+
]
|
57 |
+
},
|
58 |
+
"metadata": {},
|
59 |
+
"output_type": "display_data"
|
60 |
+
},
|
61 |
+
{
|
62 |
+
"data": {
|
63 |
+
"application/vnd.jupyter.widget-view+json": {
|
64 |
+
"model_id": "c8aba7f9cf0c4e76a6e7b9763e927d8a",
|
65 |
+
"version_major": 2,
|
66 |
+
"version_minor": 0
|
67 |
+
},
|
68 |
+
"text/plain": [
|
69 |
+
"generation_config.json: 0%| | 0.00/111 [00:00<?, ?B/s]"
|
70 |
+
]
|
71 |
+
},
|
72 |
+
"metadata": {},
|
73 |
+
"output_type": "display_data"
|
74 |
+
}
|
75 |
+
],
|
76 |
+
"source": [
|
77 |
+
"model = AutoModelForCausalLM.from_pretrained(model_path, torch_dtype = torch.bfloat16)"
|
78 |
+
]
|
79 |
+
},
|
80 |
+
{
|
81 |
+
"cell_type": "code",
|
82 |
+
"execution_count": 7,
|
83 |
+
"id": "72e76288",
|
84 |
+
"metadata": {},
|
85 |
+
"outputs": [],
|
86 |
+
"source": [
|
87 |
+
"model.save_pretrained('./test', safe_serialization = False)"
|
88 |
+
]
|
89 |
+
},
|
90 |
+
{
|
91 |
+
"cell_type": "code",
|
92 |
+
"execution_count": 8,
|
93 |
+
"id": "aa245150",
|
94 |
+
"metadata": {},
|
95 |
+
"outputs": [],
|
96 |
+
"source": [
|
97 |
+
"model = AutoAWQForCausalLM.from_pretrained('./test')"
|
98 |
+
]
|
99 |
+
},
|
100 |
+
{
|
101 |
+
"cell_type": "code",
|
102 |
+
"execution_count": 9,
|
103 |
+
"id": "d3949cf4",
|
104 |
+
"metadata": {},
|
105 |
+
"outputs": [
|
106 |
+
{
|
107 |
+
"data": {
|
108 |
+
"application/vnd.jupyter.widget-view+json": {
|
109 |
+
"model_id": "f650c1b900e748bd972fc144c30cf136",
|
110 |
+
"version_major": 2,
|
111 |
+
"version_minor": 0
|
112 |
+
},
|
113 |
+
"text/plain": [
|
114 |
+
"tokenizer_config.json: 0%| | 0.00/1.69k [00:00<?, ?B/s]"
|
115 |
+
]
|
116 |
+
},
|
117 |
+
"metadata": {},
|
118 |
+
"output_type": "display_data"
|
119 |
+
},
|
120 |
+
{
|
121 |
+
"data": {
|
122 |
+
"application/vnd.jupyter.widget-view+json": {
|
123 |
+
"model_id": "44912719d8d54135a1a4f291f7e9e261",
|
124 |
+
"version_major": 2,
|
125 |
+
"version_minor": 0
|
126 |
+
},
|
127 |
+
"text/plain": [
|
128 |
+
"tokenizer.json: 0%| | 0.00/1.34M [00:00<?, ?B/s]"
|
129 |
+
]
|
130 |
+
},
|
131 |
+
"metadata": {},
|
132 |
+
"output_type": "display_data"
|
133 |
+
},
|
134 |
+
{
|
135 |
+
"data": {
|
136 |
+
"application/vnd.jupyter.widget-view+json": {
|
137 |
+
"model_id": "1cb98c3aaf5b4594acc24940a33fde68",
|
138 |
+
"version_major": 2,
|
139 |
+
"version_minor": 0
|
140 |
+
},
|
141 |
+
"text/plain": [
|
142 |
+
"special_tokens_map.json: 0%| | 0.00/638 [00:00<?, ?B/s]"
|
143 |
+
]
|
144 |
+
},
|
145 |
+
"metadata": {},
|
146 |
+
"output_type": "display_data"
|
147 |
+
},
|
148 |
+
{
|
149 |
+
"data": {
|
150 |
+
"application/vnd.jupyter.widget-view+json": {
|
151 |
+
"model_id": "2f50e02ec430412fba8b65ee0a79aae9",
|
152 |
+
"version_major": 2,
|
153 |
+
"version_minor": 0
|
154 |
+
},
|
155 |
+
"text/plain": [
|
156 |
+
"Downloading data: 0%| | 0.00/351M [00:00<?, ?B/s]"
|
157 |
+
]
|
158 |
+
},
|
159 |
+
"metadata": {},
|
160 |
+
"output_type": "display_data"
|
161 |
+
},
|
162 |
+
{
|
163 |
+
"data": {
|
164 |
+
"application/vnd.jupyter.widget-view+json": {
|
165 |
+
"model_id": "1c66c052f248428cba3e5a9c45165cc8",
|
166 |
+
"version_major": 2,
|
167 |
+
"version_minor": 0
|
168 |
+
},
|
169 |
+
"text/plain": [
|
170 |
+
"Generating train split: 0 examples [00:00, ? examples/s]"
|
171 |
+
]
|
172 |
+
},
|
173 |
+
"metadata": {},
|
174 |
+
"output_type": "display_data"
|
175 |
+
},
|
176 |
+
{
|
177 |
+
"name": "stderr",
|
178 |
+
"output_type": "stream",
|
179 |
+
"text": [
|
180 |
+
"AWQ: 100%|██████████| 22/22 [02:28<00:00, 6.75s/it]\n"
|
181 |
+
]
|
182 |
+
}
|
183 |
+
],
|
184 |
+
"source": [
|
185 |
+
"quant_path = 'mallam-1.1b-20k-instructions-rag-awq'\n",
|
186 |
+
"quant_config = { \"zero_point\": True, \"q_group_size\": 128, \"w_bit\": 4, \"version\": \"GEMM\" }\n",
|
187 |
+
"\n",
|
188 |
+
"tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True)\n",
|
189 |
+
"model.quantize(tokenizer, quant_config=quant_config, calib_data = 'mesolitica/malaysian-calibration')"
|
190 |
+
]
|
191 |
+
},
|
192 |
+
{
|
193 |
+
"cell_type": "code",
|
194 |
+
"execution_count": 10,
|
195 |
+
"id": "ee290c1e",
|
196 |
+
"metadata": {},
|
197 |
+
"outputs": [
|
198 |
+
{
|
199 |
+
"name": "stderr",
|
200 |
+
"output_type": "stream",
|
201 |
+
"text": [
|
202 |
+
"WARNING:root:`quant_config.json` is being deprecated in the future in favor of quantization_config in config.json.\n"
|
203 |
+
]
|
204 |
+
},
|
205 |
+
{
|
206 |
+
"data": {
|
207 |
+
"text/plain": [
|
208 |
+
"('mallam-1.1b-20k-instructions-rag-awq/tokenizer_config.json',\n",
|
209 |
+
" 'mallam-1.1b-20k-instructions-rag-awq/special_tokens_map.json',\n",
|
210 |
+
" 'mallam-1.1b-20k-instructions-rag-awq/tokenizer.json')"
|
211 |
+
]
|
212 |
+
},
|
213 |
+
"execution_count": 10,
|
214 |
+
"metadata": {},
|
215 |
+
"output_type": "execute_result"
|
216 |
+
}
|
217 |
+
],
|
218 |
+
"source": [
|
219 |
+
"model.save_quantized(quant_path, safetensors = False)\n",
|
220 |
+
"tokenizer.save_pretrained(quant_path)"
|
221 |
+
]
|
222 |
+
},
|
223 |
+
{
|
224 |
+
"cell_type": "code",
|
225 |
+
"execution_count": 12,
|
226 |
+
"id": "737f2403",
|
227 |
+
"metadata": {},
|
228 |
+
"outputs": [
|
229 |
+
{
|
230 |
+
"data": {
|
231 |
+
"text/plain": [
|
232 |
+
"CommitInfo(commit_url='https://huggingface.co/mesolitica/mallam-1.1b-20k-instructions-rag-AWQ/commit/ef848a832fb7b21f0bf4b808a83af300ed07beca', commit_message='Upload tokenizer', commit_description='', oid='ef848a832fb7b21f0bf4b808a83af300ed07beca', pr_url=None, pr_revision=None, pr_num=None)"
|
233 |
+
]
|
234 |
+
},
|
235 |
+
"execution_count": 12,
|
236 |
+
"metadata": {},
|
237 |
+
"output_type": "execute_result"
|
238 |
+
}
|
239 |
+
],
|
240 |
+
"source": [
|
241 |
+
"tokenizer.push_to_hub('mesolitica/mallam-1.1b-20k-instructions-rag-AWQ')"
|
242 |
+
]
|
243 |
+
},
|
244 |
+
{
|
245 |
+
"cell_type": "code",
|
246 |
+
"execution_count": 13,
|
247 |
+
"id": "ed92c8ee",
|
248 |
+
"metadata": {},
|
249 |
+
"outputs": [
|
250 |
+
{
|
251 |
+
"data": {
|
252 |
+
"text/plain": [
|
253 |
+
"CommitInfo(commit_url='https://huggingface.co/mesolitica/mallam-1.1b-20k-instructions-rag-AWQ/commit/40c9e6295642ca0e5cdc4b8ff8888a73c1ce25d3', commit_message='Upload config', commit_description='', oid='40c9e6295642ca0e5cdc4b8ff8888a73c1ce25d3', pr_url=None, pr_revision=None, pr_num=None)"
|
254 |
+
]
|
255 |
+
},
|
256 |
+
"execution_count": 13,
|
257 |
+
"metadata": {},
|
258 |
+
"output_type": "execute_result"
|
259 |
+
}
|
260 |
+
],
|
261 |
+
"source": [
|
262 |
+
"quantization_config = AwqConfig(\n",
|
263 |
+
" bits=quant_config['w_bit'],\n",
|
264 |
+
" group_size=quant_config['q_group_size'],\n",
|
265 |
+
" zero_point=quant_config['zero_point'],\n",
|
266 |
+
" backend='autoawq',\n",
|
267 |
+
" version=quant_config['version'].lower(),\n",
|
268 |
+
")\n",
|
269 |
+
"\n",
|
270 |
+
"config = AutoConfig.from_pretrained(model_path)\n",
|
271 |
+
"config.quantization_config = quantization_config\n",
|
272 |
+
"\n",
|
273 |
+
"config.push_to_hub('mesolitica/mallam-1.1b-20k-instructions-rag-AWQ')"
|
274 |
+
]
|
275 |
+
},
|
276 |
+
{
|
277 |
+
"cell_type": "code",
|
278 |
+
"execution_count": 16,
|
279 |
+
"id": "c74b2f45",
|
280 |
+
"metadata": {},
|
281 |
+
"outputs": [
|
282 |
+
{
|
283 |
+
"name": "stdout",
|
284 |
+
"output_type": "stream",
|
285 |
+
"text": [
|
286 |
+
"config.json\t\tquant_config.json\t tokenizer_config.json\r\n",
|
287 |
+
"generation_config.json\tspecial_tokens_map.json\r\n",
|
288 |
+
"pytorch_model.bin\ttokenizer.json\r\n"
|
289 |
+
]
|
290 |
+
},
|
291 |
+
{
|
292 |
+
"name": "stderr",
|
293 |
+
"output_type": "stream",
|
294 |
+
"text": [
|
295 |
+
"huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...\n",
|
296 |
+
"To disable this warning, you can either:\n",
|
297 |
+
"\t- Avoid using `tokenizers` before the fork if possible\n",
|
298 |
+
"\t- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)\n"
|
299 |
+
]
|
300 |
+
}
|
301 |
+
],
|
302 |
+
"source": [
|
303 |
+
"!ls mallam-1.1b-20k-instructions-rag-awq"
|
304 |
+
]
|
305 |
+
},
|
306 |
+
{
|
307 |
+
"cell_type": "code",
|
308 |
+
"execution_count": 14,
|
309 |
+
"id": "2e0fb591",
|
310 |
+
"metadata": {},
|
311 |
+
"outputs": [],
|
312 |
+
"source": [
|
313 |
+
"from huggingface_hub import HfApi\n",
|
314 |
+
"\n",
|
315 |
+
"api = HfApi()"
|
316 |
+
]
|
317 |
+
},
|
318 |
+
{
|
319 |
+
"cell_type": "code",
|
320 |
+
"execution_count": 17,
|
321 |
+
"id": "dd06cfa2",
|
322 |
+
"metadata": {},
|
323 |
+
"outputs": [
|
324 |
+
{
|
325 |
+
"data": {
|
326 |
+
"application/vnd.jupyter.widget-view+json": {
|
327 |
+
"model_id": "5685dd9dac3b41f6a0d082201cfd610e",
|
328 |
+
"version_major": 2,
|
329 |
+
"version_minor": 0
|
330 |
+
},
|
331 |
+
"text/plain": [
|
332 |
+
"pytorch_model.bin: 0%| | 0.00/778M [00:00<?, ?B/s]"
|
333 |
+
]
|
334 |
+
},
|
335 |
+
"metadata": {},
|
336 |
+
"output_type": "display_data"
|
337 |
+
},
|
338 |
+
{
|
339 |
+
"data": {
|
340 |
+
"text/plain": [
|
341 |
+
"CommitInfo(commit_url='https://huggingface.co/mesolitica/mallam-1.1b-20k-instructions-rag-AWQ/commit/4caae4a226e3dd569ac467c5099cd159ac5fa3e0', commit_message='Upload pytorch_model.bin with huggingface_hub', commit_description='', oid='4caae4a226e3dd569ac467c5099cd159ac5fa3e0', pr_url=None, pr_revision=None, pr_num=None)"
|
342 |
+
]
|
343 |
+
},
|
344 |
+
"execution_count": 17,
|
345 |
+
"metadata": {},
|
346 |
+
"output_type": "execute_result"
|
347 |
+
}
|
348 |
+
],
|
349 |
+
"source": [
|
350 |
+
"api.upload_file(\n",
|
351 |
+
" path_or_fileobj='mallam-1.1b-20k-instructions-rag-awq/pytorch_model.bin',\n",
|
352 |
+
" path_in_repo=\"pytorch_model.bin\",\n",
|
353 |
+
" repo_id='mesolitica/mallam-1.1b-20k-instructions-rag-AWQ',\n",
|
354 |
+
" repo_type=\"model\",\n",
|
355 |
+
")"
|
356 |
+
]
|
357 |
+
},
|
358 |
+
{
|
359 |
+
"cell_type": "code",
|
360 |
+
"execution_count": 18,
|
361 |
+
"id": "1383ff2c",
|
362 |
+
"metadata": {},
|
363 |
+
"outputs": [
|
364 |
+
{
|
365 |
+
"data": {
|
366 |
+
"text/plain": [
|
367 |
+
"CommitInfo(commit_url='https://huggingface.co/mesolitica/mallam-1.1b-20k-instructions-rag-AWQ/commit/81a4cf5cdf2ecd38d74354f558af0d2896a24b7d', commit_message='Upload quant_config.json with huggingface_hub', commit_description='', oid='81a4cf5cdf2ecd38d74354f558af0d2896a24b7d', pr_url=None, pr_revision=None, pr_num=None)"
|
368 |
+
]
|
369 |
+
},
|
370 |
+
"execution_count": 18,
|
371 |
+
"metadata": {},
|
372 |
+
"output_type": "execute_result"
|
373 |
+
}
|
374 |
+
],
|
375 |
+
"source": [
|
376 |
+
"api.upload_file(\n",
|
377 |
+
" path_or_fileobj='mallam-1.1b-20k-instructions-rag-awq/quant_config.json',\n",
|
378 |
+
" path_in_repo=\"quant_config.json\",\n",
|
379 |
+
" repo_id='mesolitica/mallam-1.1b-20k-instructions-rag-AWQ',\n",
|
380 |
+
" repo_type=\"model\",\n",
|
381 |
+
")"
|
382 |
+
]
|
383 |
+
},
|
384 |
+
{
|
385 |
+
"cell_type": "code",
|
386 |
+
"execution_count": 19,
|
387 |
+
"id": "5852ec02",
|
388 |
+
"metadata": {},
|
389 |
+
"outputs": [
|
390 |
+
{
|
391 |
+
"data": {
|
392 |
+
"application/vnd.jupyter.widget-view+json": {
|
393 |
+
"model_id": "51b30c41eab64d23ae86cf451c6bf196",
|
394 |
+
"version_major": 2,
|
395 |
+
"version_minor": 0
|
396 |
+
},
|
397 |
+
"text/plain": [
|
398 |
+
"config.json: 0%| | 0.00/909 [00:00<?, ?B/s]"
|
399 |
+
]
|
400 |
+
},
|
401 |
+
"metadata": {},
|
402 |
+
"output_type": "display_data"
|
403 |
+
},
|
404 |
+
{
|
405 |
+
"name": "stderr",
|
406 |
+
"output_type": "stream",
|
407 |
+
"text": [
|
408 |
+
"You have loaded an AWQ model on CPU and have a CUDA device available, make sure to set your model on a GPU device in order to run your model.\n"
|
409 |
+
]
|
410 |
+
},
|
411 |
+
{
|
412 |
+
"data": {
|
413 |
+
"application/vnd.jupyter.widget-view+json": {
|
414 |
+
"model_id": "26fe778711d8474699ee792ce27e3092",
|
415 |
+
"version_major": 2,
|
416 |
+
"version_minor": 0
|
417 |
+
},
|
418 |
+
"text/plain": [
|
419 |
+
"pytorch_model.bin: 0%| | 0.00/778M [00:00<?, ?B/s]"
|
420 |
+
]
|
421 |
+
},
|
422 |
+
"metadata": {},
|
423 |
+
"output_type": "display_data"
|
424 |
+
}
|
425 |
+
],
|
426 |
+
"source": [
|
427 |
+
"quantized_model = AutoModelForCausalLM.from_pretrained('mesolitica/mallam-1.1b-20k-instructions-rag-AWQ')\n",
|
428 |
+
"_ = quantized_model.cuda()\n"
|
429 |
+
]
|
430 |
+
},
|
431 |
+
{
|
432 |
+
"cell_type": "code",
|
433 |
+
"execution_count": 23,
|
434 |
+
"id": "6adc9240",
|
435 |
+
"metadata": {},
|
436 |
+
"outputs": [
|
437 |
+
{
|
438 |
+
"data": {
|
439 |
+
"text/plain": [
|
440 |
+
"'<s>[INST] kwsp tu apa [/INST]'"
|
441 |
+
]
|
442 |
+
},
|
443 |
+
"execution_count": 23,
|
444 |
+
"metadata": {},
|
445 |
+
"output_type": "execute_result"
|
446 |
+
}
|
447 |
+
],
|
448 |
+
"source": [
|
449 |
+
"messages = [\n",
|
450 |
+
" {'role': 'user', 'content': 'kwsp tu apa'}\n",
|
451 |
+
"]\n",
|
452 |
+
"tokenizer.apply_chat_template(messages, tokenize = False)"
|
453 |
+
]
|
454 |
+
},
|
455 |
+
{
|
456 |
+
"cell_type": "code",
|
457 |
+
"execution_count": 26,
|
458 |
+
"id": "66895e20",
|
459 |
+
"metadata": {},
|
460 |
+
"outputs": [],
|
461 |
+
"source": [
|
462 |
+
"messages = [\n",
|
463 |
+
" {'role': 'user', 'content': 'KWSP tu apa?'}\n",
|
464 |
+
"]\n",
|
465 |
+
"prompt = tokenizer.apply_chat_template(messages, tokenize = False)\n",
|
466 |
+
"inputs = tokenizer([prompt], return_tensors='pt', add_special_tokens=False).to('cuda')"
|
467 |
+
]
|
468 |
+
},
|
469 |
+
{
|
470 |
+
"cell_type": "code",
|
471 |
+
"execution_count": 27,
|
472 |
+
"id": "4b320f33",
|
473 |
+
"metadata": {},
|
474 |
+
"outputs": [
|
475 |
+
{
|
476 |
+
"name": "stderr",
|
477 |
+
"output_type": "stream",
|
478 |
+
"text": [
|
479 |
+
"Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.\n"
|
480 |
+
]
|
481 |
+
},
|
482 |
+
{
|
483 |
+
"name": "stdout",
|
484 |
+
"output_type": "stream",
|
485 |
+
"text": [
|
486 |
+
"CPU times: user 4.65 s, sys: 3.73 ms, total: 4.65 s\n",
|
487 |
+
"Wall time: 4.65 s\n"
|
488 |
+
]
|
489 |
+
},
|
490 |
+
{
|
491 |
+
"data": {
|
492 |
+
"text/plain": [
|
493 |
+
"'<s> [INST] KWSP tu apa? [/INST]Kumpulan Wang Simpanan Pekerja (KWSP) atau EPF adalah tabung simpanan wang pekerja di Malaysia. KWSP ialah institusi keselamatan sosial yang disediakan oleh kerajaan Malaysia untuk membantu pekerja Malaysia menguruskan dana persaraan mereka.\\n\\nKWSP bertujuan untuk menyediakan sumber kewangan untuk membantu pekerja Malaysia melabur dan menyimpan untuk masa depan mereka. Ia mempunyai objektif utama untuk mencapai matlamat ini, iaitu memastikan ahli KWSP mempunyai simpanan yang mencukupi untuk memenuhi keperluan persaraan mereka.\\n\\nKWSP menyediakan faedah persaraan yang berbeza, seperti KWSP Prihatin dan KWSP i-Sinar. Simpanan KWSP yang dikhaskan boleh digunakan untuk tujuan yang berbeza, seperti membiayai pendidikan anak-anak, membeli rumah, atau memenuhi keperluan perubatan.\\n\\nSecara amnya, ahli KWSP layak untuk pelbagai jenis manfaat persaraan yang mereka boleh perolehi, bergantung pada umur, tahap kecacatan, dan sektor pekerjaan mereka. Ini termasuk faedah KWSP, bantuan bayaran pencen, serta kemudahan simpanan tambahan.\\n\\nSekiranya anda ingin mengetahui lebih lanjut mengenai KWSP atau menyertai KWSP, anda boleh menghubungi cawangan KWSP terdekat atau layari laman web KWSP untuk mendapatkan maklumat lanjut.</s>'"
|
494 |
+
]
|
495 |
+
},
|
496 |
+
"execution_count": 27,
|
497 |
+
"metadata": {},
|
498 |
+
"output_type": "execute_result"
|
499 |
+
}
|
500 |
+
],
|
501 |
+
"source": [
|
502 |
+
"%%time\n",
|
503 |
+
"\n",
|
504 |
+
"generate_kwargs = dict(\n",
|
505 |
+
" inputs,\n",
|
506 |
+
" max_new_tokens=1024,\n",
|
507 |
+
" top_p=0.95,\n",
|
508 |
+
" top_k=50,\n",
|
509 |
+
" temperature=0.9,\n",
|
510 |
+
" do_sample=True,\n",
|
511 |
+
" num_beams=1,\n",
|
512 |
+
")\n",
|
513 |
+
"r = quantized_model.generate(**generate_kwargs)\n",
|
514 |
+
"tokenizer.decode(r[0])"
|
515 |
+
]
|
516 |
+
},
|
517 |
+
{
|
518 |
+
"cell_type": "code",
|
519 |
+
"execution_count": null,
|
520 |
+
"id": "a9a93555",
|
521 |
+
"metadata": {},
|
522 |
+
"outputs": [],
|
523 |
+
"source": []
|
524 |
+
}
|
525 |
+
],
|
526 |
+
"metadata": {
|
527 |
+
"kernelspec": {
|
528 |
+
"display_name": "Python 3 (ipykernel)",
|
529 |
+
"language": "python",
|
530 |
+
"name": "python3"
|
531 |
+
},
|
532 |
+
"language_info": {
|
533 |
+
"codemirror_mode": {
|
534 |
+
"name": "ipython",
|
535 |
+
"version": 3
|
536 |
+
},
|
537 |
+
"file_extension": ".py",
|
538 |
+
"mimetype": "text/x-python",
|
539 |
+
"name": "python",
|
540 |
+
"nbconvert_exporter": "python",
|
541 |
+
"pygments_lexer": "ipython3",
|
542 |
+
"version": "3.10.12"
|
543 |
+
}
|
544 |
+
},
|
545 |
+
"nbformat": 4,
|
546 |
+
"nbformat_minor": 5
|
547 |
+
}
|