finetuned_LLM_with_RAG / Dockerfile.txt
Hadeel11's picture
Add Dockerfile for Hugging Face deployment
d7c7a6c
raw
history blame
324 Bytes
FROM huggingface/transformers-pytorch-gpu:latest
RUN pip install --upgrade pip
RUN pip install transformers
ENV MODEL_NAME="Hadeel11/your-finetuned-model"
ENV USE_FP16=True
Expose the port for the inference API
EXPOSE 8080
Run the TGI server
CMD ["python3", "-m", "transformers_serve", "--model-id", "${MODEL_NAME}"]