|
@echo off |
|
setlocal |
|
|
|
:: Create and change to the directory |
|
mkdir DeepSeek-V2-Chat.Q2_K.gguf |
|
cd DeepSeek-V2-Chat.Q2_K.gguf |
|
|
|
:: Download the GGUF files |
|
for %%i in (1 2 3 4 5) do ( |
|
powershell -command "Invoke-WebRequest -Uri 'https://huggingface.co/leafspark/DeepSeek-V2-Chat-GGUF/resolve/main/DeepSeek-V2-Chat.q2_k.gguf/DeepSeek-V2-Chat.Q2_K-0000%%i-of-00005.gguf?download=true' -OutFile DeepSeek-V2-Chat.Q2_K-0000%%i-of-00005.gguf" |
|
) |
|
|
|
:: Download the llama.cpp binaries |
|
powershell -command "Invoke-WebRequest -Uri 'https://github.com/ggerganov/llama.cpp/releases/download/b2961/llama-b2961-bin-win-avx2-x64.zip' -OutFile llama-b2961-bin-win-avx2-x64.zip" |
|
powershell -command "Expand-Archive -Path llama-b2961-bin-win-avx2-x64.zip -DestinationPath ." |
|
|
|
:: Check for GPU and download additional files if needed |
|
powershell -Command "if (Get-WmiObject -Query 'select * from Win32_VideoController where Description like "NVIDIA%"') { |
|
Invoke-WebRequest -Uri 'https://github.com/ggerganov/llama.cpp/releases/download/b2961/cudart-llama-bin-win-cu11.7.1-x64.zip' -OutFile cudart-llama-bin-win-cu11.7.1-x64.zip |
|
Invoke-WebRequest -Uri 'https://github.com/ggerganov/llama.cpp/releases/download/b2961/llama-b2961-bin-win-cuda-cu11.7.1-x64.zip' -OutFile llama-b2961-bin-win-cuda-cu11.7.1-x64.zip |
|
Expand-Archive -Path cudart-llama-bin-win-cu11.7.1-x64.zip -DestinationPath . |
|
Expand-Archive -Path llama-b2961-bin-win-cuda-cu11.7.1-x64.zip -DestinationPath . |
|
}" |
|
|
|
:: Execute the server command |
|
server.exe ^ |
|
-m DeepSeek-V2-Chat.q2_k.gguf ^ |
|
-c 4096 ^ |
|
-i ^ |
|
--mlock ^ |
|
--override-kv deepseek2.attention.q_lora_rank=int:1536 ^ |
|
--override-kv deepseek2.attention.kv_lora_rank=int:512 ^ |
|
--override-kv deepseek2.expert_shared_count=int:2 ^ |
|
--override-kv deepseek2.expert_feed_forward_length=int:1536 ^ |
|
--override-kv deepseek2.leading_dense_block_count=int:1 |
|
|
|
endlocal |