#!/bin/sh

# For mlock support
ulimit -l unlimited

python3 -B -m llama_cpp.server --model model/ggmlv3-model.bin --n_threads 2 --embedding False