amaye15 commited on
Commit
48a0b50
·
1 Parent(s): 774982a

Basic docker

Browse files
Files changed (2) hide show
  1. Dockerfile +11 -135
  2. requirements.txt +0 -21
Dockerfile CHANGED
@@ -1,140 +1,16 @@
1
- # # Basic version (recommended)
2
- # docker pull unclecode/crawl4ai:basic-amd64
3
- # docker run -p 7860:7860 unclecode/crawl4ai:basic-amd64
4
 
5
- # syntax=docker/dockerfile:1.4
6
-
7
- ARG TARGETPLATFORM
8
- ARG BUILDPLATFORM
9
-
10
- # Other build arguments
11
- ARG PYTHON_VERSION=3.10
12
-
13
- # Base stage with system dependencies
14
- FROM python:${PYTHON_VERSION}-slim as base
15
-
16
- # Declare ARG variables again within the build stage
17
- ARG INSTALL_TYPE=pass
18
- ARG ENABLE_GPU=false
19
-
20
- # Platform-specific labels
21
- LABEL maintainer="unclecode"
22
- LABEL description="🔥🕷️ Crawl4AI: Open-source LLM Friendly Web Crawler & scraper"
23
- LABEL version="1.0"
24
-
25
- # Environment setup
26
- ENV PYTHONUNBUFFERED=1 \
27
- PYTHONDONTWRITEBYTECODE=1 \
28
- PIP_NO_CACHE_DIR=1 \
29
- PIP_DISABLE_PIP_VERSION_CHECK=1 \
30
- PIP_DEFAULT_TIMEOUT=100 \
31
- DEBIAN_FRONTEND=noninteractive
32
-
33
- # Install system dependencies
34
- RUN apt-get update && apt-get install -y --no-install-recommends \
35
- build-essential \
36
- curl \
37
- wget \
38
- gnupg \
39
- git \
40
- cmake \
41
- pkg-config \
42
- python3-dev \
43
- libjpeg-dev \
44
- libpng-dev \
45
- && rm -rf /var/lib/apt/lists/*
46
-
47
- # Playwright system dependencies for Linux
48
- RUN apt-get update && apt-get install -y --no-install-recommends \
49
- libglib2.0-0 \
50
- libnss3 \
51
- libnspr4 \
52
- libatk1.0-0 \
53
- libatk-bridge2.0-0 \
54
- libcups2 \
55
- libdrm2 \
56
- libdbus-1-3 \
57
- libxcb1 \
58
- libxkbcommon0 \
59
- libx11-6 \
60
- libxcomposite1 \
61
- libxdamage1 \
62
- libxext6 \
63
- libxfixes3 \
64
- libxrandr2 \
65
- libgbm1 \
66
- libpango-1.0-0 \
67
- libcairo2 \
68
- libasound2 \
69
- libatspi2.0-0 \
70
  && rm -rf /var/lib/apt/lists/*
71
 
72
- # GPU support if enabled and architecture is supported
73
- RUN if [ "$ENABLE_GPU" = "true" ] && [ "$TARGETPLATFORM" = "linux/amd64" ] ; then \
74
- apt-get update && apt-get install -y --no-install-recommends \
75
- nvidia-cuda-toolkit \
76
- && rm -rf /var/lib/apt/lists/* ; \
77
- else \
78
- echo "Skipping NVIDIA CUDA Toolkit installation (unsupported platform or GPU disabled)"; \
79
- fi
80
-
81
- # Create and set working directory
82
- WORKDIR /app
83
-
84
- # Copy the entire project
85
- COPY . .
86
-
87
- # Install base requirements
88
- RUN pip install --no-cache-dir -r requirements.txt
89
-
90
- # Install required library for FastAPI
91
- RUN pip install fastapi uvicorn psutil
92
-
93
- # Install ML dependencies first for better layer caching
94
- RUN if [ "$INSTALL_TYPE" = "all" ] ; then \
95
- pip install --no-cache-dir \
96
- torch \
97
- torchvision \
98
- torchaudio \
99
- scikit-learn \
100
- nltk \
101
- transformers \
102
- tokenizers && \
103
- python -m nltk.downloader punkt stopwords ; \
104
- fi
105
-
106
- # Install the package
107
- RUN if [ "$INSTALL_TYPE" = "all" ] ; then \
108
- pip install ".[all]" && \
109
- python -m crawl4ai.model_loader ; \
110
- elif [ "$INSTALL_TYPE" = "torch" ] ; then \
111
- pip install ".[torch]" ; \
112
- elif [ "$INSTALL_TYPE" = "transformer" ] ; then \
113
- pip install ".[transformer]" && \
114
- python -m crawl4ai.model_loader ; \
115
- else \
116
- pip install "." ; \
117
- fi
118
-
119
- # Install MkDocs and required plugins
120
- RUN pip install --no-cache-dir \
121
- mkdocs \
122
- mkdocs-material \
123
- mkdocs-terminal \
124
- pymdown-extensions
125
-
126
- # Build MkDocs documentation
127
- RUN mkdocs build
128
-
129
- # Install Playwright and browsers
130
- RUN if [ "$TARGETPLATFORM" = "linux/amd64" ]; then \
131
- playwright install chromium; \
132
- elif [ "$TARGETPLATFORM" = "linux/arm64" ]; then \
133
- playwright install chromium; \
134
- fi
135
 
136
- # Expose port
137
- EXPOSE 8000 11235 9222 8080 7860
138
 
139
- # Start the FastAPI server
140
- CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "7860"]
 
1
+ # Use an official base image (e.g., Ubuntu) as the starting point
2
+ FROM ubuntu:latest
 
3
 
4
+ # Install Docker CLI (optional, only if you want to run Docker commands inside the container)
5
+ RUN apt-get update && apt-get install -y \
6
+ docker.io \
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7
  && rm -rf /var/lib/apt/lists/*
8
 
9
+ # Pull the unclecode/crawl4ai:basic image
10
+ RUN docker pull unclecode/crawl4ai:basic
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
11
 
12
+ # Expose the port that the application will run on
13
+ EXPOSE 11235
14
 
15
+ # Run the container
16
+ CMD ["docker", "run", "-p", "11235:11235", "unclecode/crawl4ai:basic"]
requirements.txt DELETED
@@ -1,21 +0,0 @@
1
- # Note: These requirements are also specified in pyproject.toml
2
- # This file is kept for development environment setup and compatibility
3
- aiosqlite~=0.20
4
- lxml~=5.3
5
- litellm>=1.53.1
6
- numpy>=1.26.0,<3
7
- pillow~=10.4
8
- playwright>=1.49.0
9
- python-dotenv~=1.0
10
- requests~=2.26
11
- beautifulsoup4~=4.12
12
- tf-playwright-stealth>=1.1.0
13
- xxhash~=3.4
14
- rank-bm25~=0.2
15
- aiofiles>=24.1.0
16
- colorama~=0.4
17
- snowballstemmer~=2.2
18
- pydantic>=2.10
19
- pyOpenSSL>=24.3.0
20
- psutil>=6.1.1
21
- nltk>=3.9.1