tool: automatically download llama.cpp and model files; run chat completions server (win, cuda)
Browse files
DeepSeek-V2-Chat.q2_k.gguf/download_and_run.bat
ADDED
@@ -0,0 +1,32 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
@echo off
|
2 |
+
setlocal
|
3 |
+
|
4 |
+
:: Create and change to the directory
|
5 |
+
mkdir DeepSeek-V2-Chat.Q2_K.gguf
|
6 |
+
cd DeepSeek-V2-Chat.Q2_K.gguf
|
7 |
+
|
8 |
+
:: Download the GGUF files
|
9 |
+
for %%i in (1 2 3 4 5) do (
|
10 |
+
powershell -command "Invoke-WebRequest -Uri 'https://huggingface.co/leafspark/DeepSeek-V2-Chat-GGUF/resolve/main/DeepSeek-V2-Chat.q2_k.gguf/DeepSeek-V2-Chat.Q2_K-0000%%i-of-00005.gguf?download=true' -OutFile DeepSeek-V2-Chat.Q2_K-0000%%i-of-00005.gguf"
|
11 |
+
)
|
12 |
+
|
13 |
+
:: Download the llama.cpp binaries
|
14 |
+
powershell -command "Invoke-WebRequest -Uri 'https://github.com/ggerganov/llama.cpp/releases/download/b2961/llama-b2961-bin-win-avx2-x64.zip' -OutFile llama-b2961-bin-win-avx2-x64.zip"
|
15 |
+
powershell -command "Expand-Archive -Path llama-b2961-bin-win-avx2-x64.zip -DestinationPath ."
|
16 |
+
|
17 |
+
:: Check for GPU and download additional files if needed
|
18 |
+
powershell -Command "if (Get-WmiObject -Query 'select * from Win32_VideoController where Description like "NVIDIA%"') {
|
19 |
+
Invoke-WebRequest -Uri 'https://github.com/ggerganov/llama.cpp/releases/download/b2961/cudart-llama-bin-win-cu11.7.1-x64.zip' -OutFile cudart-llama-bin-win-cu11.7.1-x64.zip
|
20 |
+
Invoke-WebRequest -Uri 'https://github.com/ggerganov/llama.cpp/releases/download/b2961/llama-b2961-bin-win-cuda-cu11.7.1-x64.zip' -OutFile llama-b2961-bin-win-cuda-cu11.7.1-x64.zip
|
21 |
+
Expand-Archive -Path cudart-llama-bin-win-cu11.7.1-x64.zip -DestinationPath .
|
22 |
+
Expand-Archive -Path llama-b2961-bin-win-cuda-cu11.7.1-x64.zip -DestinationPath .
|
23 |
+
}"
|
24 |
+
|
25 |
+
:: Execute the server command
|
26 |
+
server.exe ^
|
27 |
+
-m DeepSeek-V2-Chat.q2_k.gguf ^
|
28 |
+
-c 4096 ^
|
29 |
+
-i ^
|
30 |
+
--mlock
|
31 |
+
|
32 |
+
endlocal
|