1+ # Prototype: Minimal CUDA base image instead of full L4T JetPack
2+ # Comparing l4t-cuda vs l4t-jetpack for size and maintainability
3+
4+ # Stage 1: Builder (use JetPack for CUDA development tools like nvcc)
5+ # JetPack includes CUDA 12.6, nvcc, cuDNN, TensorRT - everything needed for compilation
16FROM nvcr.io/nvidia/l4t-jetpack:r36.4.0 AS builder
27
38ARG DEBIAN_FRONTEND=noninteractive
49ENV LANG=en_US.UTF-8
510
611WORKDIR /app
712
13+ # Install build dependencies and CUDA development tools
814RUN apt-get update -y && \
915 apt-get install -y --no-install-recommends \
16+ build-essential \
17+ cmake \
18+ ninja-build \
19+ file \
20+ libopenblas0 \
21+ libproj-dev \
22+ libsqlite3-dev \
23+ libtiff-dev \
24+ libcurl4-openssl-dev \
25+ libssl-dev \
26+ zlib1g-dev \
27+ wget \
28+ curl \
29+ ca-certificates \
30+ git \
31+ python3-dev \
32+ python3-pip \
1033 libxext6 \
1134 libopencv-dev \
12- uvicorn \
13- python3-pip \
14- git \
15- libgdal-dev \
1635 libvips-dev \
17- wget \
18- rustc \
19- cargo \
20- curl \
21- cmake \
22- ninja-build \
36+ pkg-config \
2337 && rm -rf /var/lib/apt/lists/*
2438
39+ # Remove any pre-installed GDAL
40+ RUN apt-get update && apt-get remove -y libgdal-dev gdal-bin libgdal30 2>/dev/null || true && rm -rf /var/lib/apt/lists/*
41+
42+ # Compile GDAL 3.11.5 from source with Ninja build system
43+ RUN wget https://github.com/OSGeo/gdal/releases/download/v3.11.5/gdal-3.11.5.tar.gz && \
44+ tar -xzf gdal-3.11.5.tar.gz && \
45+ cd gdal-3.11.5 && \
46+ mkdir build && cd build && \
47+ cmake .. \
48+ -GNinja \
49+ -DCMAKE_BUILD_TYPE=Release \
50+ -DCMAKE_INSTALL_PREFIX=/usr/local \
51+ -DBUILD_PYTHON_BINDINGS=OFF \
52+ && \
53+ ninja && \
54+ ninja install && \
55+ ldconfig && \
56+ cd ../.. && \
57+ rm -rf gdal-3.11.5 gdal-3.11.5.tar.gz
58+
59+ # Verify GDAL installation
60+ RUN gdal-config --version && \
61+ test "$(gdal-config --version | cut -d. -f1,2)" = "3.11" || (echo "GDAL version mismatch!" && exit 1)
62+
63+ # Install CMake 3.30.5 for building extensions
2564RUN wget -q https://github.com/Kitware/CMake/releases/download/v3.30.5/cmake-3.30.5-linux-aarch64.sh && \
2665 chmod +x cmake-3.30.5-linux-aarch64.sh && \
2766 ./cmake-3.30.5-linux-aarch64.sh --prefix=/usr/local --skip-license && \
2867 rm cmake-3.30.5-linux-aarch64.sh
2968
69+ # Install uv for fast package installation
3070RUN curl -LsSf https://astral.sh/uv/install.sh | env INSTALLER_NO_MODIFY_PATH=1 sh && \
31- ln -s /root/.local/bin/uv /usr/local/bin/uv
71+ ln -s /root/.local/bin/uv /usr/local/bin/uv && \
72+ uv --version
3273
74+ # Copy requirements files
3375COPY requirements/requirements.sam.txt \
3476 requirements/requirements.clip.txt \
3577 requirements/requirements.http.txt \
@@ -45,41 +87,43 @@ COPY requirements/requirements.sam.txt \
4587 requirements/requirements.easyocr.txt \
4688 ./
4789
90+ # Install PyTorch 2.8.0 with CUDA 12.6 support from jetson-ai-lab.io
4891RUN python3 -m pip install --upgrade pip && \
49- python3 -m pip install "torch>=2.8.0" "torchvision>=0.15.2 " \
92+ python3 -m pip install "torch>=2.8.0" "torchvision>=0.23.0 " \
5093 --index-url https://pypi.jetson-ai-lab.io/jp6/cu126
5194
95+ # Install Python dependencies with uv
5296RUN uv pip install --system --break-system-packages --index-strategy unsafe-best-match \
5397 --extra-index-url https://pypi.jetson-ai-lab.io/jp6/cu126 \
5498 -r _requirements.txt \
55- -r requirements.sam.txt \
56- -r requirements.clip.txt \
99+ -r requirements.jetson.txt \
57100 -r requirements.http.txt \
101+ -r requirements.clip.txt \
102+ -r requirements.transformers.txt \
103+ -r requirements.sam.txt \
58104 -r requirements.gaze.txt \
59105 -r requirements.groundingdino.txt \
60- -r requirements.doctr.txt \
61106 -r requirements.yolo_world.txt \
62- -r requirements.transformers.txt \
63- -r requirements.jetson.txt \
107+ -r requirements.doctr.txt \
64108 -r requirements.sdk.http.txt \
65109 -r requirements.easyocr.txt \
66110 jupyterlab \
67111 "setuptools<=75.5.0" \
68112 packaging \
69- numpy \
70113 && rm -rf ~/.cache/uv
71114
115+ # Build onnxruntime from source with CUDA and TensorRT support
72116WORKDIR /tmp
73117RUN git clone --recursive --branch v1.20.0 https://github.com/microsoft/onnxruntime.git /tmp/onnxruntime
74118
75119WORKDIR /tmp/onnxruntime
76-
77120RUN sed -i 's/be8be39fdbc6e60e94fa7870b280707069b5b81a/32b145f525a8308d7ab1c09388b2e288312d8eba/g' cmake/deps.txt
78121
122+ # JetPack already has all CUDA, cuDNN, and TensorRT libs - no need to copy
79123RUN ./build.sh \
80124 --config Release \
81125 --build_dir build/cuda12 \
82- --parallel 4 \
126+ --parallel 12 \
83127 --use_cuda \
84128 --cuda_version 12.6 \
85129 --cuda_home /usr/local/cuda \
@@ -97,31 +141,7 @@ RUN ./build.sh \
97141
98142RUN uv pip install --system --break-system-packages /tmp/onnxruntime/build/cuda12/Release/dist/onnxruntime_gpu-*.whl
99143
100- FROM nvcr.io/nvidia/l4t-jetpack:r36.4.0 AS runtime
101-
102- ARG DEBIAN_FRONTEND=noninteractive
103- ENV LANG=en_US.UTF-8
104-
105- WORKDIR /app
106-
107- COPY --from=builder /usr/local/lib/python3.10 /usr/local/lib/python3.10
108- COPY --from=builder /usr/local/bin /usr/local/bin
109-
110- RUN apt-get update -y && \
111- apt-get install -y --no-install-recommends \
112- libxext6 \
113- libopencv-dev \
114- uvicorn \
115- python3-pip \
116- git \
117- libgdal-dev \
118- libvips-dev \
119- wget \
120- rustc \
121- cargo \
122- curl \
123- && rm -rf /var/lib/apt/lists/*
124-
144+ # Build and install inference packages (core, gpu, cli, sdk)
125145WORKDIR /build
126146COPY . .
127147RUN ln -sf /usr/bin/python3 /usr/bin/python || true
@@ -140,15 +160,91 @@ RUN python -m pip install --break-system-packages --no-deps dist/inference_gpu*.
140160 dist/inference_sdk*.whl \
141161 "setuptools<=75.5.0"
142162
143- WORKDIR /notebooks
144- COPY examples/notebooks .
163+ WORKDIR /app
164+ COPY requirements/requirements.http.txt requirements.txt
165+
166+ # Runtime stage - minimal CUDA runtime with only necessary libraries
167+ FROM nvcr.io/nvidia/l4t-cuda:12.6.11-runtime
168+
169+ ARG DEBIAN_FRONTEND=noninteractive
170+ ENV LANG=en_US.UTF-8
171+
172+ WORKDIR /app
173+
174+ # Create python symlink for inference CLI compatibility
175+ RUN ln -sf /usr/bin/python3 /usr/bin/python
145176
146- WORKDIR /app/
177+ # Install runtime dependencies only (no -dev packages)
178+ RUN apt-get update -y && \
179+ apt-get install -y --no-install-recommends \
180+ file \
181+ libopenblas0 \
182+ libproj22 \
183+ libsqlite3-0 \
184+ libtiff5 \
185+ libcurl4 \
186+ libssl3 \
187+ zlib1g \
188+ libgomp1 \
189+ python3 \
190+ python3-pip \
191+ libxext6 \
192+ libopencv-core4.5d \
193+ libopencv-imgproc4.5d \
194+ libvips42 \
195+ libglib2.0-0 \
196+ libsm6 \
197+ libjpeg-turbo8 \
198+ libpng16-16 \
199+ libexpat1 \
200+ ca-certificates \
201+ curl \
202+ && rm -rf /var/lib/apt/lists/*
203+
204+ # Copy compiled GDAL from builder
205+ COPY --from=builder /usr/local/bin/gdal* /usr/local/bin/
206+ COPY --from=builder /usr/local/bin/ogr* /usr/local/bin/
207+ COPY --from=builder /usr/local/bin/gnm* /usr/local/bin/
208+ COPY --from=builder /usr/local/lib/libgdal* /usr/local/lib/
209+ COPY --from=builder /usr/local/include/gdal* /usr/local/include/
210+ COPY --from=builder /usr/local/share/gdal /usr/local/share/gdal
211+
212+ # Set GDAL environment variables
213+ ENV GDAL_DATA=/usr/local/share/gdal
214+ ENV LD_LIBRARY_PATH=/usr/local/lib:$LD_LIBRARY_PATH
215+
216+ # Copy cuDNN, CUDA, and TensorRT libraries from builder (JetPack)
217+ # For PyTorch and onnxruntime compatibility
218+ COPY --from=builder /usr/lib/aarch64-linux-gnu/libcudnn*.so* /usr/local/cuda/lib64/
219+ COPY --from=builder /usr/include/aarch64-linux-gnu/cudnn*.h /usr/local/cuda/include/
220+ COPY --from=builder /usr/local/cuda/targets/aarch64-linux/lib/libcupti*.so* /usr/local/cuda/lib64/
221+ COPY --from=builder /usr/local/cuda/targets/aarch64-linux/lib/libnvToolsExt*.so* /usr/local/cuda/lib64/
222+
223+ # TensorRT libraries (for onnxruntime)
224+ COPY --from=builder /usr/lib/aarch64-linux-gnu/libnvinfer*.so* /usr/local/cuda/lib64/
225+ COPY --from=builder /usr/lib/aarch64-linux-gnu/libnvonnxparser*.so* /usr/local/cuda/lib64/
226+ COPY --from=builder /usr/lib/aarch64-linux-gnu/libnvparsers*.so* /usr/local/cuda/lib64/
227+
228+ # Update library paths and cache
229+ ENV LD_LIBRARY_PATH=/usr/local/cuda/lib64:$LD_LIBRARY_PATH
230+ RUN ldconfig
231+
232+ # Copy Python packages and CLI tools from builder
233+ COPY --from=builder /usr/local/lib/python3.10/dist-packages /usr/local/lib/python3.10/dist-packages
234+ COPY --from=builder /usr/local/bin/inference /usr/local/bin/inference
235+
236+ # Set Python path
237+ ENV PYTHONPATH=/usr/local/lib/python3.10/dist-packages:$PYTHONPATH
238+
239+ # Copy application code
147240COPY inference inference
241+ COPY inference_cli inference_cli
242+ COPY inference_sdk inference_sdk
148243COPY docker/config/gpu_http.py gpu_http.py
149244
150- ENV VERSION_CHECK_MODE=continuous \
151- PROJECT=roboflow-platform \
245+ # Environment variables for inference server
246+ ENV VERSION_CHECK_MODE=once \
247+ CORE_MODEL_SAM2_ENABLED=True \
152248 NUM_WORKERS=1 \
153249 HOST=0.0.0.0 \
154250 PORT=9001 \
@@ -160,17 +256,14 @@ ENV VERSION_CHECK_MODE=continuous \
160256 WORKFLOWS_STEP_EXECUTION_MODE=local \
161257 WORKFLOWS_MAX_CONCURRENT_STEPS=4 \
162258 API_LOGGING_ENABLED=True \
163- LMM_ENABLED=True \
164- CORE_MODEL_SAM2_ENABLED=True \
165- CORE_MODEL_OWLV2_ENABLED=True \
166- ENABLE_STREAM_API=True \
167- ENABLE_PROMETHEUS=True \
168- STREAM_API_PRELOADED_PROCESSES=2 \
169- RUNS_ON_JETSON=True \
170- PYTHONPATH=/app:$PYTHONPATH
171-
172- RUN mkdir -p /tmp/ort_cache
259+ DISABLE_WORKFLOW_ENDPOINTS=false
173260
174- EXPOSE 9001
261+ # Add label with versions for comparison
262+ LABEL org.opencontainers.image.description="Inference Server - Jetson 6.2.0 (CUDA base prototype)" \
263+ org.opencontainers.image.base.name="nvcr.io/nvidia/l4t-cuda:12.6.11-runtime" \
264+ cuda.version="12.6.11" \
265+ cudnn.source="l4t-jetpack:r36.4.0" \
266+ gdal.version="3.11.5" \
267+ pytorch.version="2.8.0"
175268
176- ENTRYPOINT uvicorn gpu_http:app --workers $NUM_WORKERS --host $HOST --port $PORT
269+ ENTRYPOINT ["/bin/sh", "-c", "python3 -m uvicorn gpu_http:app --workers $NUM_WORKERS --host $HOST --port $PORT"]
0 commit comments