leafspark's picture
tool: automatically download llama.cpp and model files; run chat completions server (win, cuda)
179c68f verified
raw
history blame
1.58 kB
@echo off
setlocal
:: Create and change to the directory
mkdir DeepSeek-V2-Chat.Q2_K.gguf
cd DeepSeek-V2-Chat.Q2_K.gguf
:: Download the GGUF files
for %%i in (1 2 3 4 5) do (
powershell -command "Invoke-WebRequest -Uri 'https://huggingface.co/leafspark/DeepSeek-V2-Chat-GGUF/resolve/main/DeepSeek-V2-Chat.q2_k.gguf/DeepSeek-V2-Chat.Q2_K-0000%%i-of-00005.gguf?download=true' -OutFile DeepSeek-V2-Chat.Q2_K-0000%%i-of-00005.gguf"
)
:: Download the llama.cpp binaries
powershell -command "Invoke-WebRequest -Uri 'https://github.com/ggerganov/llama.cpp/releases/download/b2961/llama-b2961-bin-win-avx2-x64.zip' -OutFile llama-b2961-bin-win-avx2-x64.zip"
powershell -command "Expand-Archive -Path llama-b2961-bin-win-avx2-x64.zip -DestinationPath ."
:: Check for GPU and download additional files if needed
powershell -Command "if (Get-WmiObject -Query 'select * from Win32_VideoController where Description like "NVIDIA%"') {
Invoke-WebRequest -Uri 'https://github.com/ggerganov/llama.cpp/releases/download/b2961/cudart-llama-bin-win-cu11.7.1-x64.zip' -OutFile cudart-llama-bin-win-cu11.7.1-x64.zip
Invoke-WebRequest -Uri 'https://github.com/ggerganov/llama.cpp/releases/download/b2961/llama-b2961-bin-win-cuda-cu11.7.1-x64.zip' -OutFile llama-b2961-bin-win-cuda-cu11.7.1-x64.zip
Expand-Archive -Path cudart-llama-bin-win-cu11.7.1-x64.zip -DestinationPath .
Expand-Archive -Path llama-b2961-bin-win-cuda-cu11.7.1-x64.zip -DestinationPath .
}"
:: Execute the server command
server.exe ^
-m DeepSeek-V2-Chat.q2_k.gguf ^
-c 4096 ^
-i ^
--mlock
endlocal