|
FROM nvidia/cuda:11.8.0-devel-ubuntu22.04 |
|
|
|
RUN apt update && apt install git build-essential libopenblas-dev wget python3-pip -y |
|
RUN apt update && apt install -y wget |
|
|
|
RUN useradd -m -u 1000 user |
|
|
|
USER user |
|
|
|
ENV HOME=/home/user \ |
|
PATH=/home/user/.local/bin:$PATH |
|
|
|
|
|
WORKDIR $HOME/app |
|
|
|
|
|
COPY --chown=user . $HOME/app |
|
|
|
|
|
ARG MODEL_PART_A |
|
ARG MODEL_PART_B |
|
ARG MODEL_NAME |
|
ARG ADDITIONAL |
|
RUN git clone https://github.com/theroyallab/tabbyAPI |
|
WORKDIR $HOME/app/tabbyAPI |
|
RUN pip install -r requirements.txt -q |
|
RUN pip install huggingface-hub -q |
|
RUN echo "network:" > config.yml && \ |
|
echo " host: 127.0.0.1" >> config.yml && \ |
|
echo " port: 5000" >> config.yml && \ |
|
echo " disable_auth: False" >> config.yml && \ |
|
echo "" >> config.yml && \ |
|
echo "logging:" >> config.yml && \ |
|
echo " prompt: False" >> config.yml && \ |
|
echo " generation_params: False" >> config.yml && \ |
|
echo "" >> config.yml && \ |
|
echo "sampling:" >> config.yml && \ |
|
echo " override_preset: null" >> config.yml && \ |
|
echo "" >> config.yml && \ |
|
echo "developer:" >> config.yml && \ |
|
echo " unsafe_launch: False" >> config.yml && \ |
|
echo "" >> config.yml && \ |
|
echo "model:" >> config.yml && \ |
|
echo " model_dir: models" >> config.yml && \ |
|
echo " model_name: goliath-120b-gptq" >> config.yml && \ |
|
echo " use_dummy_models: False" >> config.yml |
|
WORKDIR $HOME/app/tabbyAPI/models |
|
RUN mkdir -p goliath-120b-gptq |
|
WORKDIR $HOME/app/tabbyAPI/models/goliath-120b-gptq |
|
RUN pip install huggingface-hub hf-transfer |
|
ENV HF_HUB_ENABLE_HF_TRANSFER=1 |
|
|
|
RUN huggingface-cli download TheBloke/goliath-120b-gptq --local-dir ./ --local-dir-use-symlinks False --cache-dir ~/cache |
|
WORKDIR $HOME/app/tabbyAPI |
|
CMD ["python3", "main.py"] |
|
|