pdfextraction / Dockerfile
Spanicin's picture
Update Dockerfile
340e9ec verified
raw
history blame contribute delete
767 Bytes
# Use an official lightweight Python image
FROM python:3.11-slim-bullseye
# Set environment variables
ENV PYTHONUNBUFFERED=1 \
PYTHONIOENCODING=UTF-8 \
TESSERACT_PATH="/usr/bin/tesseract" \
HF_HOME="/app/cache"
# Set the working directory
WORKDIR /app
# Install system dependencies
RUN apt-get update && apt-get install -y \
poppler-utils \
tesseract-ocr \
libgl1 \
&& rm -rf /var/lib/apt/lists/*
# Ensure cache directory exists with correct permissions
RUN mkdir -p /app/cache && chmod -R 777 /app/cache
# Copy application files
COPY . /app
# Install Python dependencies
RUN pip install --no-cache-dir -r requirements.txt
# Start the application with Gunicorn
CMD ["gunicorn", "-b", "0.0.0.0:7860", "--timeout", "300", "app:app"]