# For mlock support | |
ulimit -l unlimited | |
python3 -B -m llama_cpp.server --model model/ggmlv3-model.bin --n_threads 2 --embedding False | |
# For mlock support | |
ulimit -l unlimited | |
python3 -B -m llama_cpp.server --model model/ggmlv3-model.bin --n_threads 2 --embedding False | |