leafspark
/

DeepSeek-V2-Chat-GGUF

Text Generation

Inference Endpoints

Model card Files Files and versions Community

DeepSeek-V2-Chat-GGUF / DeepSeek-V2-Chat.q2_k.gguf /download_and_run.bat

leafspark's picture

tool: add kv overrides to start command (win)

5618137 verified 6 months ago

1.88 kB

	@echo off
	setlocal

	:: Create and change to the directory
	mkdir DeepSeek-V2-Chat.Q2_K.gguf
	cd DeepSeek-V2-Chat.Q2_K.gguf

	:: Download the GGUF files
	for %%i in (1 2 3 4 5) do (
	powershell -command "Invoke-WebRequest -Uri 'https://huggingface.co/leafspark/DeepSeek-V2-Chat-GGUF/resolve/main/DeepSeek-V2-Chat.q2_k.gguf/DeepSeek-V2-Chat.Q2_K-0000%%i-of-00005.gguf?download=true' -OutFile DeepSeek-V2-Chat.Q2_K-0000%%i-of-00005.gguf"
	)

	:: Download the llama.cpp binaries
	powershell -command "Invoke-WebRequest -Uri 'https://github.com/ggerganov/llama.cpp/releases/download/b2961/llama-b2961-bin-win-avx2-x64.zip' -OutFile llama-b2961-bin-win-avx2-x64.zip"
	powershell -command "Expand-Archive -Path llama-b2961-bin-win-avx2-x64.zip -DestinationPath ."

	:: Check for GPU and download additional files if needed
	powershell -Command "if (Get-WmiObject -Query 'select * from Win32_VideoController where Description like "NVIDIA%"') {
	Invoke-WebRequest -Uri 'https://github.com/ggerganov/llama.cpp/releases/download/b2961/cudart-llama-bin-win-cu11.7.1-x64.zip' -OutFile cudart-llama-bin-win-cu11.7.1-x64.zip
	Invoke-WebRequest -Uri 'https://github.com/ggerganov/llama.cpp/releases/download/b2961/llama-b2961-bin-win-cuda-cu11.7.1-x64.zip' -OutFile llama-b2961-bin-win-cuda-cu11.7.1-x64.zip
	Expand-Archive -Path cudart-llama-bin-win-cu11.7.1-x64.zip -DestinationPath .
	Expand-Archive -Path llama-b2961-bin-win-cuda-cu11.7.1-x64.zip -DestinationPath .
	}"

	:: Execute the server command
	server.exe ^
	-m DeepSeek-V2-Chat.q2_k.gguf ^
	-c 4096 ^
	-i ^
	--mlock ^
	--override-kv deepseek2.attention.q_lora_rank=int:1536 ^
	--override-kv deepseek2.attention.kv_lora_rank=int:512 ^
	--override-kv deepseek2.expert_shared_count=int:2 ^
	--override-kv deepseek2.expert_feed_forward_length=int:1536 ^
	--override-kv deepseek2.leading_dense_block_count=int:1

	endlocal