Update README.md
Browse files
README.md
CHANGED
@@ -5,15 +5,30 @@ tags:
|
|
5 |
- llama3
|
6 |
---
|
7 |
|
8 |
-
This is a model that has been AWQ quantized and converted to run on the NPU installed in the Ryzen AI PC (for example, Ryzen 9 7940HS Processor) (for Windows environment)
|
9 |
|
10 |
-
For information on setting up Ryzen AI for LLMs, see [Running LLM on AMD NPU Hardware](https://www.hackster.io/gharada2013/running-llm-on-amd-npu-hardware-19322f).
|
11 |
|
12 |
-
The following
|
13 |
|
14 |
### setup
|
|
|
15 |
```
|
16 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
17 |
```
|
18 |
|
19 |
### Sample Script
|
@@ -58,8 +73,8 @@ if __name__ == "__main__":
|
|
58 |
p.cpu_affinity([0, 1, 2, 3])
|
59 |
torch.set_num_threads(4)
|
60 |
|
61 |
-
tokenizer = AutoTokenizer.from_pretrained("
|
62 |
-
ckpt = "pytorch_llama3_8b_w_bit_4_awq_lm_amd.pt"
|
63 |
terminators = [
|
64 |
tokenizer.eos_token_id,
|
65 |
tokenizer.convert_tokens_to_ids("<|eot_id|>")
|
|
|
5 |
- llama3
|
6 |
---
|
7 |
|
8 |
+
This is a model that has been AWQ quantized and converted to run on the NPU installed in the Ryzen AI PC (for example, Ryzen 9 7940HS Processor) (for Windows environment)
|
9 |
|
10 |
+
For information on setting up Ryzen AI for LLMs in window 11, see [Running LLM on AMD NPU Hardware](https://www.hackster.io/gharada2013/running-llm-on-amd-npu-hardware-19322f).
|
11 |
|
12 |
+
The following sample assumes that the setup on the above page has been completed.
|
13 |
|
14 |
### setup
|
15 |
+
In cmd windows.
|
16 |
```
|
17 |
+
conda activate ryzenai-transformers
|
18 |
+
<your_install_path>\RyzenAI-SW\example\transformers\setup.bat
|
19 |
+
git lfs install
|
20 |
+
git clone https://huggingface.co/dahara1/llama3-8b-amd-npu
|
21 |
+
cd llama3-8b-amd-npu
|
22 |
+
git lfs pull
|
23 |
+
cd ..
|
24 |
+
copy <your_install_path>\RyzenAI-SW\example\transformers\models\llama2\modeling_llama_amd.py .
|
25 |
+
|
26 |
+
# set up Runtime. see [Runtime Setup](https://ryzenai.docs.amd.com/en/latest/runtime_setup.html)
|
27 |
+
set XLNX_VART_FIRMWARE=<your_install_path>\voe-4.0-win_amd64\1x4.xclbin
|
28 |
+
set NUM_OF_DPU_RUNNERS=1
|
29 |
+
|
30 |
+
# save below sample script as utf8 and llama-3-test.py
|
31 |
+
python llama3-test.py
|
32 |
```
|
33 |
|
34 |
### Sample Script
|
|
|
73 |
p.cpu_affinity([0, 1, 2, 3])
|
74 |
torch.set_num_threads(4)
|
75 |
|
76 |
+
tokenizer = AutoTokenizer.from_pretrained("llama3-8b-amd-npu")
|
77 |
+
ckpt = "llama3-8b-amd-npu/pytorch_llama3_8b_w_bit_4_awq_lm_amd.pt"
|
78 |
terminators = [
|
79 |
tokenizer.eos_token_id,
|
80 |
tokenizer.convert_tokens_to_ids("<|eot_id|>")
|