leafspark's picture
tool: add linux autorun (no CUDA, ubuntu)
47eaf19 verified
raw
history blame
1.45 kB
#!/bin/bash
# Create and change to the directory
mkdir -p DeepSeek-V2-Chat.Q2_K.gguf
cd DeepSeek-V2-Chat.Q2_K.gguf
# Download the GGUF files
for i in {1..5}; do
wget "https://huggingface.co/leafspark/DeepSeek-V2-Chat-GGUF/resolve/main/DeepSeek-V2-Chat.q2_k.gguf/DeepSeek-V2-Chat.Q2_K-0000$i-of-00005.gguf?download=true" -O DeepSeek-V2-Chat.Q2_K-0000$i-of-00005.gguf
done
# Download the llama.cpp binaries based on the OS
case "$(uname -s)" in
Linux)
wget https://github.com/ggerganov/llama.cpp/releases/download/b2961/llama-b2961-bin-ubuntu-x64.zip
unzip llama-b2961-bin-ubuntu-x64.zip -d .
;;
Darwin)
if [[ $(uname -m) == 'arm64' ]]; then
wget https://github.com/ggerganov/llama.cpp/releases/download/b2961/llama-b2961-bin-macos-arm64.zip
unzip llama-b2961-bin-macos-arm64.zip -d .
else
wget https://github.com/ggerganov/llama.cpp/releases/download/b2961/llama-b2961-bin-macos-x64.zip
unzip llama-b2961-bin-macos-x64.zip -d .
fi
;;
esac
# Execute the server command
./server \
-m DeepSeek-V2-Chat.q2_k.gguf \
-c 4096 \
-i \
--mlock
--override-kv deepseek2.attention.q_lora_rank=int:1536
--override-kv deepseek2.attention.kv_lora_rank=int:512
--override-kv deepseek2.expert_shared_count=int:2
--override-kv deepseek2.expert_feed_forward_length=int:1536
--override-kv deepseek2.leading_dense_block_count=int:1