diff --git a/dlc_developer_config.toml b/dlc_developer_config.toml
index 7c6a3dd04658..3e2e4d20f9b5 100644
--- a/dlc_developer_config.toml
+++ b/dlc_developer_config.toml
@@ -15,7 +15,7 @@ neuronx_mode = false
 graviton_mode = false
 # Please only set it to true if you are preparing a ARM64 related PR
 # Do remember to revert it back to false before merging any PR (including ARM64 dedicated PR)
-arm64_mode = false
+arm64_mode = true
 # Please only set it to True if you are preparing a HABANA related PR
 # Do remember to revert it back to False before merging any PR (including HABANA dedicated PR)
 habana_mode = false
@@ -37,7 +37,7 @@ deep_canary_mode = false
 [build]
 # Add in frameworks you would like to build. By default, builds are disabled unless you specify building an image.
 # available frameworks - ["base", "vllm", "autogluon", "huggingface_tensorflow", "huggingface_pytorch", "huggingface_tensorflow_trcomp", "huggingface_pytorch_trcomp", "pytorch_trcomp", "tensorflow", "pytorch", "stabilityai_pytorch"]
-build_frameworks = []
+build_frameworks = ["vllm"]
 
 
 # By default we build both training and inference containers. Set true/false values to determine which to build.
diff --git a/miscellaneous_scripts/build_requirements.txt b/miscellaneous_scripts/build_requirements.txt
new file mode 100644
index 000000000000..eb3416aca407
--- /dev/null
+++ b/miscellaneous_scripts/build_requirements.txt
@@ -0,0 +1,10 @@
+# Should be mirrored in pyproject.toml
+cmake>=3.26.1
+ninja
+packaging>=24.2
+setuptools>=77.0.3,<80.0.0
+setuptools-scm>=8
+torch==2.7.1
+wheel
+jinja2>=3.1.6
+regex
\ No newline at end of file
diff --git a/miscellaneous_scripts/requirements.txt b/miscellaneous_scripts/requirements.txt
new file mode 100644
index 000000000000..825269842b4b
--- /dev/null
+++ b/miscellaneous_scripts/requirements.txt
@@ -0,0 +1,60 @@
+regex # Replace re for higher-performance regex matching
+cachetools
+psutil
+sentencepiece  # Required for LLaMA tokenizer.
+numpy
+requests >= 2.26.0
+tqdm
+blake3
+py-cpuinfo
+transformers >= 4.55.2
+tokenizers >= 0.21.1  # Required for fast incremental detokenization.
+protobuf # Required by LlamaTokenizer.
+fastapi[standard] >= 0.115.0 # Required by FastAPI's form models in the OpenAI API server's audio transcriptions endpoint.
+aiohttp
+openai >= 1.99.1  # For Responses API with reasoning content
+pydantic >= 2.11.7
+prometheus_client >= 0.18.0
+pillow  # Required for image processing
+prometheus-fastapi-instrumentator >= 7.0.0
+tiktoken >= 0.6.0  # Required for DBRX tokenizer
+lm-format-enforcer >= 0.10.11, < 0.11
+llguidance >= 0.7.11, < 0.8.0; 
+# required for outlines backend disk cache
+diskcache == 5.6.3
+lark == 1.2.2
+xgrammar == 0.1.21; 
+typing_extensions >= 4.10
+filelock >= 3.16.1 # need to contain https://github.com/tox-dev/filelock/pull/317
+partial-json-parser # used for parsing partial JSON outputs
+pyzmq >= 25.0.0
+msgspec
+gguf >= 0.13.0
+mistral_common[image,audio] >= 1.8.2
+opencv-python-headless >= 4.11.0    # required for video IO
+pyyaml
+six>=1.16.0; python_version > '3.11' # transitive dependency of pandas that needs to be the latest version for python 3.12
+setuptools>=77.0.3,<80; python_version > '3.11' # Setuptools is used by triton, we need to ensure a modern version is installed for 3.12+ so that it does not try to import distutils, which was removed in 3.12
+einops # Required for Qwen2-VL.
+compressed-tensors == 0.11.0 # required for compressed-tensors
+depyf==0.19.0 # required for profiling and debugging with compilation config
+cloudpickle # allows pickling lambda functions in model_executor/models/registry.py
+watchfiles # required for http server to monitor the updates of TLS files
+python-json-logger # Used by logging as per examples/others/logging_configuration.md
+scipy # Required for phi-4-multimodal-instruct
+ninja # Required for xgrammar, rocm, tpu, xpu
+pybase64 # fast base64 implementation
+cbor2 # Required for cross-language serialization of hashable objects
+setproctitle # Used to set process names for better debugging and monitoring
+openai-harmony >= 0.0.3  # Required for gpt-oss
+
+numba == 0.61.2; python_version > '3.9'
+
+# Dependencies for NVIDIA GPUs
+ray[cgraph]>=2.48.0 # Ray Compiled Graph, required for pipeline parallelism in V1.
+torch==2.8
+torchaudio==2.8
+# These must be updated alongside torch
+torchvision==0.23 # Required for phi3v processor. See https://github.com/pytorch/vision?tab=readme-ov-file#installation for corresponding version
+# https://github.com/facebookresearch/xformers/releases/tag/v0.0.31
+xformers==0.0.31; platform_system == 'Linux' and platform_machine == 'x86_64'  # Requires PyTorch >= 2.7
\ No newline at end of file
diff --git a/scripts/install_efa.sh b/scripts/install_efa.sh
index 9ca36fa18c5d..c12f40ebdd0d 100644
--- a/scripts/install_efa.sh
+++ b/scripts/install_efa.sh
@@ -72,7 +72,7 @@ function install_efa {
     apt-get autoremove -y
     rm -rf /var/lib/apt/lists/*
     ldconfig
-    check_libnccl_net_so
+    # check_libnccl_net_so
 }
 
 # idiomatic parameter and option handling in sh
diff --git a/vllm/CHANGELOG.md b/vllm/CHANGELOG.md
index 945cced31d4d..847aaf48642c 100644
--- a/vllm/CHANGELOG.md
+++ b/vllm/CHANGELOG.md
@@ -2,6 +2,17 @@
 
 All notable changes to vLLM Deep Learning Containers will be documented in this file.
 
+## [0.10.0] - 2025-08-04
+### Updated
+- vllm/vllm-openai version `v0.10.0`
+- EFA installer version `1.43.1`
+- Architecture ARM64 
+### Sample ECR URI
+```
+763104351884.dkr.ecr.us-east-1.amazonaws.com/0.10-gpu-py312-arm64
+763104351884.dkr.ecr.us-east-1.amazonaws.com/0.10.0-gpu-py312-cu128-ubuntu22.04-arm64
+```
+
 ## [0.10.0] - 2025-08-04
 ### Updated
 - vllm/vllm-openai version `v0.10.0`
diff --git a/vllm/arm64/gpu/Dockerfile.arm64 b/vllm/arm64/gpu/Dockerfile.arm64
new file mode 100644
index 000000000000..be18fff511d9
--- /dev/null
+++ b/vllm/arm64/gpu/Dockerfile.arm64
@@ -0,0 +1,308 @@
+
+ARG CUDA_VERSION=12.8.1
+ARG PYTHON_VERSION=3.12
+
+ARG BUILD_BASE_IMAGE=nvidia/cuda:${CUDA_VERSION}-devel-ubuntu20.04
+ARG FINAL_BASE_IMAGE=nvidia/cuda:${CUDA_VERSION}-devel-ubuntu22.04
+
+ARG DEADSNAKES_MIRROR_URL
+ARG DEADSNAKES_GPGKEY_URL
+
+ARG GET_PIP_URL="https://bootstrap.pypa.io/get-pip.py"
+
+ARG PIP_INDEX_URL
+ARG PIP_EXTRA_INDEX_URL
+ARG UV_INDEX_URL=${PIP_INDEX_URL}
+ARG UV_EXTRA_INDEX_URL=${PIP_EXTRA_INDEX_URL}
+
+ARG PYTORCH_CUDA_INDEX_BASE_URL=https://download.pytorch.org/whl
+ARG PYTORCH_CUDA_NIGHTLY_INDEX_BASE_URL=https://download.pytorch.org/whl/nightly
+
+ARG PIP_KEYRING_PROVIDER=disabled
+ARG UV_KEYRING_PROVIDER=${PIP_KEYRING_PROVIDER}
+
+ARG INSTALL_KV_CONNECTORS=false
+
+#################### BASE BUILD IMAGE ####################
+# prepare basic build environment
+FROM ${BUILD_BASE_IMAGE} AS base
+ARG CUDA_VERSION
+ARG PYTHON_VERSION
+ARG TARGETPLATFORM
+ARG INSTALL_KV_CONNECTORS=false
+ENV DEBIAN_FRONTEND=noninteractive
+
+ARG DEADSNAKES_MIRROR_URL
+ARG DEADSNAKES_GPGKEY_URL
+ARG GET_PIP_URL
+
+# Install Python and other dependencies
+RUN echo 'tzdata tzdata/Areas select America' | debconf-set-selections \
+    && echo 'tzdata tzdata/Zones/America select Los_Angeles' | debconf-set-selections \
+    && apt-get update -y \
+    && apt-get install -y ccache software-properties-common git curl sudo \
+    && if [ ! -z ${DEADSNAKES_MIRROR_URL} ] ; then \
+        if [ ! -z "${DEADSNAKES_GPGKEY_URL}" ] ; then \
+            mkdir -p -m 0755 /etc/apt/keyrings ; \
+            curl -L ${DEADSNAKES_GPGKEY_URL} | gpg --dearmor > /etc/apt/keyrings/deadsnakes.gpg ; \
+            sudo chmod 644 /etc/apt/keyrings/deadsnakes.gpg ; \
+            echo "deb [signed-by=/etc/apt/keyrings/deadsnakes.gpg] ${DEADSNAKES_MIRROR_URL} $(lsb_release -cs) main" > /etc/apt/sources.list.d/deadsnakes.list ; \
+        fi ; \
+    else \
+        for i in 1 2 3; do \
+            add-apt-repository -y ppa:deadsnakes/ppa && break || \
+            { echo "Attempt $i failed, retrying in 5s..."; sleep 5; }; \
+        done ; \
+    fi \
+    && apt-get update -y \
+    && apt-get install -y python${PYTHON_VERSION} python${PYTHON_VERSION}-dev python${PYTHON_VERSION}-venv \
+    && update-alternatives --install /usr/bin/python3 python3 /usr/bin/python${PYTHON_VERSION} 1 \
+    && update-alternatives --set python3 /usr/bin/python${PYTHON_VERSION} \
+    && ln -sf /usr/bin/python${PYTHON_VERSION}-config /usr/bin/python3-config \
+    && curl -sS ${GET_PIP_URL} | python${PYTHON_VERSION} \
+    && python3 --version && python3 -m pip --version
+
+ARG PIP_INDEX_URL UV_INDEX_URL
+ARG PIP_EXTRA_INDEX_URL UV_EXTRA_INDEX_URL
+ARG PYTORCH_CUDA_INDEX_BASE_URL
+ARG PYTORCH_CUDA_NIGHTLY_INDEX_BASE_URL
+ARG PIP_KEYRING_PROVIDER UV_KEYRING_PROVIDER
+
+
+RUN ldconfig /usr/local/cuda-$(echo $CUDA_VERSION | cut -d. -f1,2)/compat/
+
+WORKDIR /workspace
+
+COPY requirements.txt requirements.txt
+RUN python3 -m pip install --system -r requirements.txt \
+    --extra-index-url ${PYTORCH_CUDA_INDEX_BASE_URL}/cu$(echo $CUDA_VERSION | cut -d. -f1,2 | tr -d '.')
+
+# cuda arch list used by torch
+ARG torch_cuda_arch_list='7.0 7.5 8.0 8.9 9.0 10.0 12.0'
+ENV TORCH_CUDA_ARCH_LIST=${torch_cuda_arch_list}
+#################### BASE BUILD IMAGE ####################
+
+#################### WHEEL BUILD IMAGE ####################
+FROM base AS build
+ARG TARGETPLATFORM
+
+ARG PIP_INDEX_URL UV_INDEX_URL
+ARG PIP_EXTRA_INDEX_URL UV_EXTRA_INDEX_URL
+ARG PYTORCH_CUDA_INDEX_BASE_URL
+
+COPY build_requirements.txt build_requirements.txt
+
+RUN python3 -m pip install --system -r build_requirements.txt \
+    --extra-index-url ${PYTORCH_CUDA_INDEX_BASE_URL}/cu$(echo $CUDA_VERSION | cut -d. -f1,2 | tr -d '.')
+
+
+ARG VLLM_VERSION="v0.10.1.1"
+
+# Clone vLLM repository
+RUN git clone https://github.com/vllm-project/vllm.git . && \
+    git checkout ${VLLM_VERSION}
+
+WORKDIR /vllm
+
+ARG vllm_target_device="cuda"
+ENV VLLM_TARGET_DEVICE=${vllm_target_device}
+RUN export VLLM_USE_PRECOMPILED="false" && \
+        export VLLM_DOCKER_BUILD_CONTEXT=1 && \
+        python3 setup.py bdist_wheel --dist-dir=dist --py-limited-api=cp38; \
+    fi
+
+#################### EXTENSION Build IMAGE ####################
+
+#################### vLLM installation IMAGE ####################
+# image with vLLM installed
+FROM ${FINAL_BASE_IMAGE} AS vllm-base
+ARG CUDA_VERSION
+ARG PYTHON_VERSION
+ARG INSTALL_KV_CONNECTORS=false
+WORKDIR /vllm-workspace
+ENV DEBIAN_FRONTEND=noninteractive
+ARG TARGETPLATFORM
+
+SHELL ["/bin/bash", "-c"]
+
+ARG DEADSNAKES_MIRROR_URL
+ARG DEADSNAKES_GPGKEY_URL
+ARG GET_PIP_URL
+
+RUN PYTHON_VERSION_STR=$(echo ${PYTHON_VERSION} | sed 's/\.//g') && \
+    echo "export PYTHON_VERSION_STR=${PYTHON_VERSION_STR}" >> /etc/environment
+
+# Install Python and other dependencies
+RUN echo 'tzdata tzdata/Areas select America' | debconf-set-selections \
+    && echo 'tzdata tzdata/Zones/America select Los_Angeles' | debconf-set-selections \
+    && apt-get update -y \
+    && apt-get install -y ccache software-properties-common git curl wget sudo vim python3-pip \
+    && apt-get install -y ffmpeg libsm6 libxext6 libgl1 \
+    && if [ ! -z ${DEADSNAKES_MIRROR_URL} ] ; then \
+        if [ ! -z "${DEADSNAKES_GPGKEY_URL}" ] ; then \
+            mkdir -p -m 0755 /etc/apt/keyrings ; \
+            curl -L ${DEADSNAKES_GPGKEY_URL} | gpg --dearmor > /etc/apt/keyrings/deadsnakes.gpg ; \
+            sudo chmod 644 /etc/apt/keyrings/deadsnakes.gpg ; \
+            echo "deb [signed-by=/etc/apt/keyrings/deadsnakes.gpg] ${DEADSNAKES_MIRROR_URL} $(lsb_release -cs) main" > /etc/apt/sources.list.d/deadsnakes.list ; \
+        fi ; \
+    else \
+        for i in 1 2 3; do \
+            add-apt-repository -y ppa:deadsnakes/ppa && break || \
+            { echo "Attempt $i failed, retrying in 5s..."; sleep 5; }; \
+        done ; \
+    fi \
+    && apt-get update -y \
+    && apt-get install -y python${PYTHON_VERSION} python${PYTHON_VERSION}-dev python${PYTHON_VERSION}-venv libibverbs-dev \
+    && update-alternatives --install /usr/bin/python3 python3 /usr/bin/python${PYTHON_VERSION} 1 \
+    && update-alternatives --set python3 /usr/bin/python${PYTHON_VERSION} \
+    && ln -sf /usr/bin/python${PYTHON_VERSION}-config /usr/bin/python3-config \
+    && curl -sS ${GET_PIP_URL} | python${PYTHON_VERSION} \
+    && python3 --version && python3 -m pip --version
+
+ARG PIP_INDEX_URL UV_INDEX_URL
+ARG PIP_EXTRA_INDEX_URL UV_EXTRA_INDEX_URL
+ARG PYTORCH_CUDA_INDEX_BASE_URL
+ARG PYTORCH_CUDA_NIGHTLY_INDEX_BASE_URL
+ARG PIP_KEYRING_PROVIDER UV_KEYRING_PROVIDER
+
+
+RUN ldconfig /usr/local/cuda-$(echo $CUDA_VERSION | cut -d. -f1,2)/compat/
+
+RUN python3 -m pip install --system \
+            --index-url ${PYTORCH_CUDA_NIGHTLY_INDEX_BASE_URL}/cu$(echo $CUDA_VERSION | cut -d. -f1,2 | tr -d '.') \
+            "torch==2.8.0.*" "torchvision==0.22.0.*" 
+
+RUN python3 -m pip install --system \
+            --index-url ${PYTORCH_CUDA_NIGHTLY_INDEX_BASE_URL}/cu$(echo $CUDA_VERSION | cut -d. -f1,2 | tr -d '.') \
+            --pre pytorch_triton==3.3.0+gitab727c40 
+
+# Install vllm wheel first, so that torch etc will be installed.
+RUN from=build,src=/workspace/dist,target=/vllm-workspace/dist \
+    python3 -m pip install --system dist/*.whl --verbose \
+        --extra-index-url ${PYTORCH_CUDA_INDEX_BASE_URL}/cu$(echo $CUDA_VERSION | cut -d. -f1,2 | tr -d '.')
+
+ARG FLASHINFER_GIT_REPO="https://github.com/flashinfer-ai/flashinfer.git"
+ARG FLASHINFER_GIT_REF="v0.2.12"
+
+ARG FLASHINFER_AOT_COMPILE=false
+
+# Install FlashInfer
+ARG FLASHINFER_GIT_REF="v0.2.12"
+ARG FLASHINFER_GIT_REPO="https://github.com/flashinfer-ai/flashinfer.git"
+ARG FLASHINFER_AOT_COMPILE="true"
+
+RUN bash -c '. /etc/environment && \
+    git clone --depth 1 --recursive --shallow-submodules \
+        --branch ${FLASHINFER_GIT_REF} \
+        ${FLASHINFER_GIT_REPO} flashinfer && \
+    cd flashinfer && \
+    if [ "${FLASHINFER_AOT_COMPILE}" = "true" ]; then \
+        if [[ "${CUDA_VERSION}" == 11.* ]]; then \
+            FI_TORCH_CUDA_ARCH_LIST="7.5 8.0 8.9"; \
+        elif [[ "${CUDA_VERSION}" == 12.[0-7]* ]]; then \
+            FI_TORCH_CUDA_ARCH_LIST="7.5 8.0 8.9 9.0a"; \
+        else \
+            FI_TORCH_CUDA_ARCH_LIST="7.5 8.0 8.9 9.0a 10.0a 12.0"; \
+        fi && \
+        echo "🏗️  Installing FlashInfer with AOT compilation for arches: ${FI_TORCH_CUDA_ARCH_LIST}" && \
+        TORCH_CUDA_ARCH_LIST="${FI_TORCH_CUDA_ARCH_LIST}" python3 -m flashinfer.aot && \
+        TORCH_CUDA_ARCH_LIST="${FI_TORCH_CUDA_ARCH_LIST}" python3 -m pip install --no-build-isolation . \
+            --extra-index-url ${PYTORCH_CUDA_INDEX_BASE_URL}/cu$(echo $CUDA_VERSION | cut -d. -f1,2 | tr -d ".") && \
+        TORCH_CUDA_ARCH_LIST="${FI_TORCH_CUDA_ARCH_LIST}" python3 -m flashinfer --download-cubin || \
+            echo "WARNING: Failed to download flashinfer cubins."; \
+    else \
+        echo "🏗️  Installing FlashInfer without AOT compilation in JIT mode" && \
+        python3 -m pip install . \
+            --extra-index-url ${PYTORCH_CUDA_INDEX_BASE_URL}/cu$(echo $CUDA_VERSION | cut -d. -f1,2 | tr -d "."); \
+    fi && \
+    cd .. && \
+    rm -rf flashinfer'
+
+
+COPY build_requirements.txt build_requirements.txt
+RUN python3 -m pip install --system -r build_requirements.txt \
+        --extra-index-url ${PYTORCH_CUDA_INDEX_BASE_URL}/cu$(echo $CUDA_VERSION | cut -d. -f1,2 | tr -d '.')
+
+# Install DeepGEMM from source
+ARG DEEPGEMM_GIT_REPO="https://github.com/deepseek-ai/DeepGEMM.git"
+ARG DEEPGEMM_GIT_REF="7b6b5563b9d4c1ae07ffbce7f78ad3ac9204827c"
+RUN bash -c ' \
+    . /etc/environment \
+    && CUDA_MAJOR="${CUDA_VERSION%%.*}" \
+    && CUDA_MINOR="${CUDA_VERSION#${CUDA_MAJOR}.}" \
+    && CUDA_MINOR="${CUDA_MINOR%%.*}" \
+    && if [ "$CUDA_MAJOR" -ge 12 ] && [ "$CUDA_MINOR" -ge 8 ]; then \
+        git clone --recursive --shallow-submodules ${DEEPGEMM_GIT_REPO} deepgemm \
+        && echo "🏗️  Building DeepGEMM" \
+        && cd deepgemm \
+        && git checkout ${DEEPGEMM_GIT_REF} \
+        && rm -rf build dist \
+        && rm -rf *.egg-info \
+        && python3 setup.py bdist_wheel \
+        && uv pip install --system dist/*.whl \
+        && cd .. \
+        && rm -rf deepgemm; \
+    else \
+        echo "Skipping DeepGEMM installation (requires CUDA 12.8+ but got ${CUDA_VERSION})"; \
+    fi'
+#################### vLLM installation IMAGE ####################
+
+#################### OPENAI API SERVER ####################
+# base openai image with additional requirements, for any subsequent openai-style images
+FROM vllm-base AS vllm-openai-base
+ARG TARGETPLATFORM
+ARG INSTALL_KV_CONNECTORS=false
+
+ARG PIP_INDEX_URL UV_INDEX_URL
+ARG PIP_EXTRA_INDEX_URL UV_EXTRA_INDEX_URL
+
+
+# install additional dependencies for openai api server
+RUN if [ "$INSTALL_KV_CONNECTORS" = "true" ]; then \
+        python3 -m pip install lmcache \
+    fi; \
+    python3 -m pip install --system accelerate hf_transfer modelscope "bitsandbytes>=0.42.0" 'timm==0.9.10' boto3 runai-model-streamer runai-model-streamer[s3]
+
+ENV VLLM_USAGE_SOURCE production-docker-image
+
+FROM vllm-openai-base AS final
+ARG PYTHON="python3"
+LABEL maintainer="Amazon AI"
+LABEL dlc_major_version="1"
+ENV DEBIAN_FRONTEND=noninteractive \
+    LANG=C.UTF-8 \
+    LC_ALL=C.UTF-8 \
+    DLC_CONTAINER_TYPE=base \
+    # Python won’t try to write .pyc or .pyo files on the import of source modules
+    # Force stdin, stdout and stderr to be totally unbuffered. Good for logging
+    PYTHONDONTWRITEBYTECODE=1 \
+    PYTHONUNBUFFERED=1 \
+    PYTHONIOENCODING=UTF-8 \
+    LD_LIBRARY_PATH="/usr/local/lib:/opt/amazon/ofi-nccl/lib/x86_64-linux-gnu:/opt/amazon/openmpi/lib:/opt/amazon/efa/lib:/usr/local/cuda/lib64:${LD_LIBRARY_PATH}" \
+    PATH="/opt/amazon/openmpi/bin:/opt/amazon/efa/bin:/usr/local/cuda/bin:${PATH}"
+
+WORKDIR /
+
+COPY deep_learning_container.py /usr/local/bin/deep_learning_container.py
+COPY bash_telemetry.sh /usr/local/bin/bash_telemetry.sh
+COPY dockerd_entrypoint.sh /usr/local/bin/dockerd_entrypoint.sh
+
+RUN chmod +x /usr/local/bin/deep_learning_container.py && \
+    chmod +x /usr/local/bin/bash_telemetry.sh && \
+    chmod +x /usr/local/bin/dockerd_entrypoint.sh && \
+    echo 'source /usr/local/bin/bash_telemetry.sh' >> /etc/bash.bashrc 
+
+# RUN mkdir -p /tmp/nvjpeg \
+#     && cd /tmp/nvjpeg \
+#     && wget https://developer.download.nvidia.com/compute/cuda/redist/libnvjpeg/linux-x86_64/libnvjpeg-linux-x86_64-12.4.0.76-archive.tar.xz \
+#     && tar -xvf libnvjpeg-linux-x86_64-12.4.0.76-archive.tar.xz \
+#     && rm -rf /usr/local/cuda/targets/x86_64-linux/lib/libnvjpeg* \
+#     && rm -rf /usr/local/cuda/targets/x86_64-linux/include/nvjpeg.h \
+#     && cp libnvjpeg-linux-x86_64-12.4.0.76-archive/lib/libnvjpeg* /usr/local/cuda/targets/x86_64-linux/lib/ \
+#     && cp libnvjpeg-linux-x86_64-12.4.0.76-archive/include/* /usr/local/cuda/targets/x86_64-linux/include/ \
+#     && rm -rf /tmp/nvjpeg \ 
+#       # remove cuobjdump and nvdisasm
+#     && rm -rf /usr/local/cuda/bin/cuobjdump* \ 
+#     && rm -rf /usr/local/cuda/bin/nvdisasm*  
+
+ENTRYPOINT ["/usr/local/bin/dockerd_entrypoint.sh"] 
\ No newline at end of file
diff --git a/vllm/buildspec-arm64.yml b/vllm/buildspec-arm64.yml
index a4752553d1d0..1aaaad5277fa 100644
--- a/vllm/buildspec-arm64.yml
+++ b/vllm/buildspec-arm64.yml
@@ -2,7 +2,7 @@ account_id: &ACCOUNT_ID <set-$ACCOUNT_ID-in-environment>
 prod_account_id: &PROD_ACCOUNT_ID 763104351884
 region: &REGION <set-$REGION-in-environment>
 framework: &FRAMEWORK vllm
-version: &VERSION "0.10.0"
+version: &VERSION "0.10.1.1"
 short_version: &SHORT_VERSION "0.10"
 arch_type: &ARCH_TYPE arm64
 autopatch_build: "False"
@@ -39,9 +39,9 @@ images:
     python_version: &DOCKER_PYTHON_VERSION py3
     tag_python_version: &TAG_PYTHON_VERSION py312
     os_version: &OS_VERSION ubuntu22.04
-    tag: !join [ *VERSION, "-", *DEVICE_TYPE, "-", *TAG_PYTHON_VERSION, "-", *CUDA_VERSION, "-", *OS_VERSION, "-ec2" ]
-    latest_release_tag: !join [ *VERSION, "-", *DEVICE_TYPE, "-", *TAG_PYTHON_VERSION, "-", *CUDA_VERSION, "-", *OS_VERSION, "-ec2" ]
-    docker_file: !join [ *FRAMEWORK, /, *ARCH_TYPE, /, *DEVICE_TYPE, /Dockerfile ]
+    tag: !join [ *VERSION, "-", *DEVICE_TYPE, "-", *TAG_PYTHON_VERSION, "-", *CUDA_VERSION, "-", *OS_VERSION, "-arm64" ]
+    latest_release_tag: !join [ *VERSION, "-", *DEVICE_TYPE, "-", *TAG_PYTHON_VERSION, "-", *CUDA_VERSION, "-", *OS_VERSION, "-arm64" ]
+    docker_file: !join [ *FRAMEWORK, /, *ARCH_TYPE, /, *DEVICE_TYPE, /Dockerfile.arm64 ]
     target: final
     build: true
     enable_common_stage_build: false
@@ -49,4 +49,5 @@ images:
       test_platforms:
         - sanity
         - security
+        - ec2
         - eks
\ No newline at end of file