leafspark
/

DeepSeek-V2-Chat-GGUF

Text Generation

Inference Endpoints

Model card Files Files and versions Community

DeepSeek-V2-Chat-GGUF / DeepSeek-V2-Chat.q2_k.gguf /download_and_run.sh

leafspark's picture

tool: add linux autorun (no CUDA, ubuntu)

47eaf19 verified 6 months ago

1.45 kB

	#!/bin/bash

	# Create and change to the directory
	mkdir -p DeepSeek-V2-Chat.Q2_K.gguf
	cd DeepSeek-V2-Chat.Q2_K.gguf

	# Download the GGUF files
	for i in {1..5}; do
	wget "https://huggingface.co/leafspark/DeepSeek-V2-Chat-GGUF/resolve/main/DeepSeek-V2-Chat.q2_k.gguf/DeepSeek-V2-Chat.Q2_K-0000$i-of-00005.gguf?download=true" -O DeepSeek-V2-Chat.Q2_K-0000$i-of-00005.gguf
	done

	# Download the llama.cpp binaries based on the OS
	case "$(uname -s)" in
	Linux)
	wget https://github.com/ggerganov/llama.cpp/releases/download/b2961/llama-b2961-bin-ubuntu-x64.zip
	unzip llama-b2961-bin-ubuntu-x64.zip -d .
	;;
	Darwin)
	if [[ $(uname -m) == 'arm64' ]]; then
	wget https://github.com/ggerganov/llama.cpp/releases/download/b2961/llama-b2961-bin-macos-arm64.zip
	unzip llama-b2961-bin-macos-arm64.zip -d .
	else
	wget https://github.com/ggerganov/llama.cpp/releases/download/b2961/llama-b2961-bin-macos-x64.zip
	unzip llama-b2961-bin-macos-x64.zip -d .
	fi
	;;
	esac

	# Execute the server command
	./server \
	-m DeepSeek-V2-Chat.q2_k.gguf \
	-c 4096 \
	-i \
	--mlock
	--override-kv deepseek2.attention.q_lora_rank=int:1536
	--override-kv deepseek2.attention.kv_lora_rank=int:512
	--override-kv deepseek2.expert_shared_count=int:2
	--override-kv deepseek2.expert_feed_forward_length=int:1536
	--override-kv deepseek2.leading_dense_block_count=int:1