vllm-project · simon-mo · Jul 25, 2025 · Jul 22, 2025
diff --git a/.github/workflows/lint-and-deploy.yaml b/.github/workflows/lint-and-deploy.yaml
@@ -7,7 +7,7 @@ permissions:
 
 jobs:
   lint-and-deploy:
-    runs-on: ubuntu-latest
+    runs-on: ubuntu-24.04-arm
     steps:
       - name: Checkout
         uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2

diff --git a/docker/Dockerfile.arm b/docker/Dockerfile.arm
@@ -1,4 +1,11 @@
-# This vLLM Dockerfile is used to construct image that can build and run vLLM on x86 CPU platform.
+# This vLLM Dockerfile is used to build images that can run vLLM on both x86_64 and arm64 CPU platforms.
+#
+# Supported platforms:
+#   - linux/amd64 (x86_64)
+#   - linux/arm64 (aarch64)
+#
+# Use the `--platform` option with `docker buildx build` to specify the target architecture, e.g.:
+#   docker buildx build --platform=linux/arm64 -f docker/Dockerfile.cpu .
 #
 # Build targets:
 #   vllm-openai (default): used for serving deployment
@@ -53,7 +60,20 @@ RUN --mount=type=cache,target=/root/.cache/uv \
     uv pip install --upgrade pip && \
     uv pip install -r requirements/cpu.txt
 
-ENV LD_PRELOAD="/usr/lib/x86_64-linux-gnu/libtcmalloc_minimal.so.4:/opt/venv/lib/libiomp5.so:$LD_PRELOAD"
+ARG TARGETARCH
+ENV TARGETARCH=${TARGETARCH}
+
+RUN if [ "$TARGETARCH" = "arm64" ]; then \
+        PRELOAD_PATH="/usr/lib/aarch64-linux-gnu/libtcmalloc_minimal.so.4"; \
+    else \
+        PRELOAD_PATH="/usr/lib/x86_64-linux-gnu/libtcmalloc_minimal.so.4:/opt/venv/lib/libiomp5.so"; \
+    fi && \
+    echo "export LD_PRELOAD=$PRELOAD_PATH" >> ~/.bashrc
+
+# Ensure that the LD_PRELOAD environment variable for export is in effect.
+SHELL ["/bin/bash", "-c"]
+
+ENV LD_PRELOAD=${LD_PRELOAD}
 
 RUN echo 'ulimit -c 0' >> ~/.bashrc
 

diff --git a/docs/getting_started/installation/cpu/arm.inc.md b/docs/getting_started/installation/cpu/arm.inc.md
@@ -33,7 +33,7 @@ Testing has been conducted on AWS Graviton3 instances for compatibility.
 # --8<-- [end:pre-built-images]
 # --8<-- [start:build-image-from-source]
 ```bash
-docker build -f docker/Dockerfile.arm \
+docker build -f docker/Dockerfile.cpu \
         --tag vllm-cpu-env .
 
 # Launching OpenAI server

diff --git a/requirements/cpu.txt b/requirements/cpu.txt
@@ -10,7 +10,8 @@ setuptools>=77.0.3,<80.0.0
 --extra-index-url https://download.pytorch.org/whl/cpu
 torch==2.6.0+cpu; platform_machine == "x86_64" # torch>2.6.0+cpu has performance regression on x86 platform, see https://github.com/pytorch/pytorch/pull/151218
 torch==2.7.0; platform_system == "Darwin"
-torch==2.7.0; platform_machine == "ppc64le" or platform_machine == "aarch64"
+torch==2.7.0; platform_machine == "ppc64le"
+torch==2.6.0; platform_machine == "aarch64" # for arm64 CPUs, torch 2.7.0 has a issue: https://github.com/vllm-project/vllm/issues/17960
 
 # required for the image processor of minicpm-o-2_6, this must be updated alongside torch
 torchaudio; platform_machine != "ppc64le" and platform_machine != "s390x"
@@ -25,3 +26,6 @@ datasets # for benchmark scripts
 intel-openmp==2024.2.1; platform_machine == "x86_64"
 intel_extension_for_pytorch==2.6.0; platform_machine == "x86_64" # torch>2.6.0+cpu has performance regression on x86 platform, see https://github.com/pytorch/pytorch/pull/151218
 triton==3.2.0; platform_machine == "x86_64" # Triton is required for torch 2.6+cpu, as it is imported in torch.compile.
+
+# Use this to gather CPU info and optimize based on ARM Neoverse cores
+py-cpuinfo; platform_machine == "aarch64"