Spaces:

limcheekin
/

orca_mini_v3_13B-GGML

Paused

File size: 865 Bytes

73cc25e
 
 
 
 
 
 
 
 
 
 
 
e87aa8c
73cc25e
 
 
1dd7ff1
73cc25e

# Grab a fresh copy of the Python image
FROM python:3.10-slim

# Install build and runtime dependencies
RUN apt-get update && \
    apt-get install -y \
    libopenblas-dev \
    ninja-build \
    build-essential \
    curl

RUN pip install -U pip setuptools wheel && \
    CMAKE_ARGS="-DLLAMA_OPENBLAS=on" FORCE_CMAKE=1 pip install llama-cpp-python[server]

# Download model
RUN mkdir model && \
    curl -L https://huggingface.co/TheBloke/h2ogpt-4096-llama2-13B-chat-GGML/resolve/main/h2ogpt-4096-llama2-13b-chat.ggmlv3.q5_K_S.bin -o model/ggmlv3-model.bin

COPY ./start_server.sh ./start_server.sh

# Make the server start script executable
RUN chmod +x ./start_server.sh

# Set environment variable for the host
ENV HOST=0.0.0.0
ENV PORT=7860

# Expose a port for the server
EXPOSE ${PORT}

# Run the server start script
CMD ["/bin/sh", "./start_server.sh"]