diff --git a/dlc_developer_config.toml b/dlc_developer_config.toml index 7c6a3dd04658..3e2e4d20f9b5 100644 --- a/dlc_developer_config.toml +++ b/dlc_developer_config.toml @@ -15,7 +15,7 @@ neuronx_mode = false graviton_mode = false # Please only set it to true if you are preparing a ARM64 related PR # Do remember to revert it back to false before merging any PR (including ARM64 dedicated PR) -arm64_mode = false +arm64_mode = true # Please only set it to True if you are preparing a HABANA related PR # Do remember to revert it back to False before merging any PR (including HABANA dedicated PR) habana_mode = false @@ -37,7 +37,7 @@ deep_canary_mode = false [build] # Add in frameworks you would like to build. By default, builds are disabled unless you specify building an image. # available frameworks - ["base", "vllm", "autogluon", "huggingface_tensorflow", "huggingface_pytorch", "huggingface_tensorflow_trcomp", "huggingface_pytorch_trcomp", "pytorch_trcomp", "tensorflow", "pytorch", "stabilityai_pytorch"] -build_frameworks = [] +build_frameworks = ["vllm"] # By default we build both training and inference containers. Set true/false values to determine which to build. diff --git a/miscellaneous_scripts/build_requirements.txt b/miscellaneous_scripts/build_requirements.txt new file mode 100644 index 000000000000..eb3416aca407 --- /dev/null +++ b/miscellaneous_scripts/build_requirements.txt @@ -0,0 +1,10 @@ +# Should be mirrored in pyproject.toml +cmake>=3.26.1 +ninja +packaging>=24.2 +setuptools>=77.0.3,<80.0.0 +setuptools-scm>=8 +torch==2.7.1 +wheel +jinja2>=3.1.6 +regex \ No newline at end of file diff --git a/miscellaneous_scripts/requirements.txt b/miscellaneous_scripts/requirements.txt new file mode 100644 index 000000000000..825269842b4b --- /dev/null +++ b/miscellaneous_scripts/requirements.txt @@ -0,0 +1,60 @@ +regex # Replace re for higher-performance regex matching +cachetools +psutil +sentencepiece # Required for LLaMA tokenizer. +numpy +requests >= 2.26.0 +tqdm +blake3 +py-cpuinfo +transformers >= 4.55.2 +tokenizers >= 0.21.1 # Required for fast incremental detokenization. +protobuf # Required by LlamaTokenizer. +fastapi[standard] >= 0.115.0 # Required by FastAPI's form models in the OpenAI API server's audio transcriptions endpoint. +aiohttp +openai >= 1.99.1 # For Responses API with reasoning content +pydantic >= 2.11.7 +prometheus_client >= 0.18.0 +pillow # Required for image processing +prometheus-fastapi-instrumentator >= 7.0.0 +tiktoken >= 0.6.0 # Required for DBRX tokenizer +lm-format-enforcer >= 0.10.11, < 0.11 +llguidance >= 0.7.11, < 0.8.0; +# required for outlines backend disk cache +diskcache == 5.6.3 +lark == 1.2.2 +xgrammar == 0.1.21; +typing_extensions >= 4.10 +filelock >= 3.16.1 # need to contain https://github.com/tox-dev/filelock/pull/317 +partial-json-parser # used for parsing partial JSON outputs +pyzmq >= 25.0.0 +msgspec +gguf >= 0.13.0 +mistral_common[image,audio] >= 1.8.2 +opencv-python-headless >= 4.11.0 # required for video IO +pyyaml +six>=1.16.0; python_version > '3.11' # transitive dependency of pandas that needs to be the latest version for python 3.12 +setuptools>=77.0.3,<80; python_version > '3.11' # Setuptools is used by triton, we need to ensure a modern version is installed for 3.12+ so that it does not try to import distutils, which was removed in 3.12 +einops # Required for Qwen2-VL. +compressed-tensors == 0.11.0 # required for compressed-tensors +depyf==0.19.0 # required for profiling and debugging with compilation config +cloudpickle # allows pickling lambda functions in model_executor/models/registry.py +watchfiles # required for http server to monitor the updates of TLS files +python-json-logger # Used by logging as per examples/others/logging_configuration.md +scipy # Required for phi-4-multimodal-instruct +ninja # Required for xgrammar, rocm, tpu, xpu +pybase64 # fast base64 implementation +cbor2 # Required for cross-language serialization of hashable objects +setproctitle # Used to set process names for better debugging and monitoring +openai-harmony >= 0.0.3 # Required for gpt-oss + +numba == 0.61.2; python_version > '3.9' + +# Dependencies for NVIDIA GPUs +ray[cgraph]>=2.48.0 # Ray Compiled Graph, required for pipeline parallelism in V1. +torch==2.8 +torchaudio==2.8 +# These must be updated alongside torch +torchvision==0.23 # Required for phi3v processor. See https://github.com/pytorch/vision?tab=readme-ov-file#installation for corresponding version +# https://github.com/facebookresearch/xformers/releases/tag/v0.0.31 +xformers==0.0.31; platform_system == 'Linux' and platform_machine == 'x86_64' # Requires PyTorch >= 2.7 \ No newline at end of file diff --git a/scripts/install_efa.sh b/scripts/install_efa.sh index 9ca36fa18c5d..c12f40ebdd0d 100644 --- a/scripts/install_efa.sh +++ b/scripts/install_efa.sh @@ -72,7 +72,7 @@ function install_efa { apt-get autoremove -y rm -rf /var/lib/apt/lists/* ldconfig - check_libnccl_net_so + # check_libnccl_net_so } # idiomatic parameter and option handling in sh diff --git a/vllm/CHANGELOG.md b/vllm/CHANGELOG.md index 945cced31d4d..847aaf48642c 100644 --- a/vllm/CHANGELOG.md +++ b/vllm/CHANGELOG.md @@ -2,6 +2,17 @@ All notable changes to vLLM Deep Learning Containers will be documented in this file. +## [0.10.0] - 2025-08-04 +### Updated +- vllm/vllm-openai version `v0.10.0` +- EFA installer version `1.43.1` +- Architecture ARM64 +### Sample ECR URI +``` +763104351884.dkr.ecr.us-east-1.amazonaws.com/0.10-gpu-py312-arm64 +763104351884.dkr.ecr.us-east-1.amazonaws.com/0.10.0-gpu-py312-cu128-ubuntu22.04-arm64 +``` + ## [0.10.0] - 2025-08-04 ### Updated - vllm/vllm-openai version `v0.10.0` diff --git a/vllm/arm64/gpu/Dockerfile.arm64 b/vllm/arm64/gpu/Dockerfile.arm64 new file mode 100644 index 000000000000..be18fff511d9 --- /dev/null +++ b/vllm/arm64/gpu/Dockerfile.arm64 @@ -0,0 +1,308 @@ + +ARG CUDA_VERSION=12.8.1 +ARG PYTHON_VERSION=3.12 + +ARG BUILD_BASE_IMAGE=nvidia/cuda:${CUDA_VERSION}-devel-ubuntu20.04 +ARG FINAL_BASE_IMAGE=nvidia/cuda:${CUDA_VERSION}-devel-ubuntu22.04 + +ARG DEADSNAKES_MIRROR_URL +ARG DEADSNAKES_GPGKEY_URL + +ARG GET_PIP_URL="https://bootstrap.pypa.io/get-pip.py" + +ARG PIP_INDEX_URL +ARG PIP_EXTRA_INDEX_URL +ARG UV_INDEX_URL=${PIP_INDEX_URL} +ARG UV_EXTRA_INDEX_URL=${PIP_EXTRA_INDEX_URL} + +ARG PYTORCH_CUDA_INDEX_BASE_URL=https://download.pytorch.org/whl +ARG PYTORCH_CUDA_NIGHTLY_INDEX_BASE_URL=https://download.pytorch.org/whl/nightly + +ARG PIP_KEYRING_PROVIDER=disabled +ARG UV_KEYRING_PROVIDER=${PIP_KEYRING_PROVIDER} + +ARG INSTALL_KV_CONNECTORS=false + +#################### BASE BUILD IMAGE #################### +# prepare basic build environment +FROM ${BUILD_BASE_IMAGE} AS base +ARG CUDA_VERSION +ARG PYTHON_VERSION +ARG TARGETPLATFORM +ARG INSTALL_KV_CONNECTORS=false +ENV DEBIAN_FRONTEND=noninteractive + +ARG DEADSNAKES_MIRROR_URL +ARG DEADSNAKES_GPGKEY_URL +ARG GET_PIP_URL + +# Install Python and other dependencies +RUN echo 'tzdata tzdata/Areas select America' | debconf-set-selections \ + && echo 'tzdata tzdata/Zones/America select Los_Angeles' | debconf-set-selections \ + && apt-get update -y \ + && apt-get install -y ccache software-properties-common git curl sudo \ + && if [ ! -z ${DEADSNAKES_MIRROR_URL} ] ; then \ + if [ ! -z "${DEADSNAKES_GPGKEY_URL}" ] ; then \ + mkdir -p -m 0755 /etc/apt/keyrings ; \ + curl -L ${DEADSNAKES_GPGKEY_URL} | gpg --dearmor > /etc/apt/keyrings/deadsnakes.gpg ; \ + sudo chmod 644 /etc/apt/keyrings/deadsnakes.gpg ; \ + echo "deb [signed-by=/etc/apt/keyrings/deadsnakes.gpg] ${DEADSNAKES_MIRROR_URL} $(lsb_release -cs) main" > /etc/apt/sources.list.d/deadsnakes.list ; \ + fi ; \ + else \ + for i in 1 2 3; do \ + add-apt-repository -y ppa:deadsnakes/ppa && break || \ + { echo "Attempt $i failed, retrying in 5s..."; sleep 5; }; \ + done ; \ + fi \ + && apt-get update -y \ + && apt-get install -y python${PYTHON_VERSION} python${PYTHON_VERSION}-dev python${PYTHON_VERSION}-venv \ + && update-alternatives --install /usr/bin/python3 python3 /usr/bin/python${PYTHON_VERSION} 1 \ + && update-alternatives --set python3 /usr/bin/python${PYTHON_VERSION} \ + && ln -sf /usr/bin/python${PYTHON_VERSION}-config /usr/bin/python3-config \ + && curl -sS ${GET_PIP_URL} | python${PYTHON_VERSION} \ + && python3 --version && python3 -m pip --version + +ARG PIP_INDEX_URL UV_INDEX_URL +ARG PIP_EXTRA_INDEX_URL UV_EXTRA_INDEX_URL +ARG PYTORCH_CUDA_INDEX_BASE_URL +ARG PYTORCH_CUDA_NIGHTLY_INDEX_BASE_URL +ARG PIP_KEYRING_PROVIDER UV_KEYRING_PROVIDER + + +RUN ldconfig /usr/local/cuda-$(echo $CUDA_VERSION | cut -d. -f1,2)/compat/ + +WORKDIR /workspace + +COPY requirements.txt requirements.txt +RUN python3 -m pip install --system -r requirements.txt \ + --extra-index-url ${PYTORCH_CUDA_INDEX_BASE_URL}/cu$(echo $CUDA_VERSION | cut -d. -f1,2 | tr -d '.') + +# cuda arch list used by torch +ARG torch_cuda_arch_list='7.0 7.5 8.0 8.9 9.0 10.0 12.0' +ENV TORCH_CUDA_ARCH_LIST=${torch_cuda_arch_list} +#################### BASE BUILD IMAGE #################### + +#################### WHEEL BUILD IMAGE #################### +FROM base AS build +ARG TARGETPLATFORM + +ARG PIP_INDEX_URL UV_INDEX_URL +ARG PIP_EXTRA_INDEX_URL UV_EXTRA_INDEX_URL +ARG PYTORCH_CUDA_INDEX_BASE_URL + +COPY build_requirements.txt build_requirements.txt + +RUN python3 -m pip install --system -r build_requirements.txt \ + --extra-index-url ${PYTORCH_CUDA_INDEX_BASE_URL}/cu$(echo $CUDA_VERSION | cut -d. -f1,2 | tr -d '.') + + +ARG VLLM_VERSION="v0.10.1.1" + +# Clone vLLM repository +RUN git clone https://github.com/vllm-project/vllm.git . && \ + git checkout ${VLLM_VERSION} + +WORKDIR /vllm + +ARG vllm_target_device="cuda" +ENV VLLM_TARGET_DEVICE=${vllm_target_device} +RUN export VLLM_USE_PRECOMPILED="false" && \ + export VLLM_DOCKER_BUILD_CONTEXT=1 && \ + python3 setup.py bdist_wheel --dist-dir=dist --py-limited-api=cp38; \ + fi + +#################### EXTENSION Build IMAGE #################### + +#################### vLLM installation IMAGE #################### +# image with vLLM installed +FROM ${FINAL_BASE_IMAGE} AS vllm-base +ARG CUDA_VERSION +ARG PYTHON_VERSION +ARG INSTALL_KV_CONNECTORS=false +WORKDIR /vllm-workspace +ENV DEBIAN_FRONTEND=noninteractive +ARG TARGETPLATFORM + +SHELL ["/bin/bash", "-c"] + +ARG DEADSNAKES_MIRROR_URL +ARG DEADSNAKES_GPGKEY_URL +ARG GET_PIP_URL + +RUN PYTHON_VERSION_STR=$(echo ${PYTHON_VERSION} | sed 's/\.//g') && \ + echo "export PYTHON_VERSION_STR=${PYTHON_VERSION_STR}" >> /etc/environment + +# Install Python and other dependencies +RUN echo 'tzdata tzdata/Areas select America' | debconf-set-selections \ + && echo 'tzdata tzdata/Zones/America select Los_Angeles' | debconf-set-selections \ + && apt-get update -y \ + && apt-get install -y ccache software-properties-common git curl wget sudo vim python3-pip \ + && apt-get install -y ffmpeg libsm6 libxext6 libgl1 \ + && if [ ! -z ${DEADSNAKES_MIRROR_URL} ] ; then \ + if [ ! -z "${DEADSNAKES_GPGKEY_URL}" ] ; then \ + mkdir -p -m 0755 /etc/apt/keyrings ; \ + curl -L ${DEADSNAKES_GPGKEY_URL} | gpg --dearmor > /etc/apt/keyrings/deadsnakes.gpg ; \ + sudo chmod 644 /etc/apt/keyrings/deadsnakes.gpg ; \ + echo "deb [signed-by=/etc/apt/keyrings/deadsnakes.gpg] ${DEADSNAKES_MIRROR_URL} $(lsb_release -cs) main" > /etc/apt/sources.list.d/deadsnakes.list ; \ + fi ; \ + else \ + for i in 1 2 3; do \ + add-apt-repository -y ppa:deadsnakes/ppa && break || \ + { echo "Attempt $i failed, retrying in 5s..."; sleep 5; }; \ + done ; \ + fi \ + && apt-get update -y \ + && apt-get install -y python${PYTHON_VERSION} python${PYTHON_VERSION}-dev python${PYTHON_VERSION}-venv libibverbs-dev \ + && update-alternatives --install /usr/bin/python3 python3 /usr/bin/python${PYTHON_VERSION} 1 \ + && update-alternatives --set python3 /usr/bin/python${PYTHON_VERSION} \ + && ln -sf /usr/bin/python${PYTHON_VERSION}-config /usr/bin/python3-config \ + && curl -sS ${GET_PIP_URL} | python${PYTHON_VERSION} \ + && python3 --version && python3 -m pip --version + +ARG PIP_INDEX_URL UV_INDEX_URL +ARG PIP_EXTRA_INDEX_URL UV_EXTRA_INDEX_URL +ARG PYTORCH_CUDA_INDEX_BASE_URL +ARG PYTORCH_CUDA_NIGHTLY_INDEX_BASE_URL +ARG PIP_KEYRING_PROVIDER UV_KEYRING_PROVIDER + + +RUN ldconfig /usr/local/cuda-$(echo $CUDA_VERSION | cut -d. -f1,2)/compat/ + +RUN python3 -m pip install --system \ + --index-url ${PYTORCH_CUDA_NIGHTLY_INDEX_BASE_URL}/cu$(echo $CUDA_VERSION | cut -d. -f1,2 | tr -d '.') \ + "torch==2.8.0.*" "torchvision==0.22.0.*" + +RUN python3 -m pip install --system \ + --index-url ${PYTORCH_CUDA_NIGHTLY_INDEX_BASE_URL}/cu$(echo $CUDA_VERSION | cut -d. -f1,2 | tr -d '.') \ + --pre pytorch_triton==3.3.0+gitab727c40 + +# Install vllm wheel first, so that torch etc will be installed. +RUN from=build,src=/workspace/dist,target=/vllm-workspace/dist \ + python3 -m pip install --system dist/*.whl --verbose \ + --extra-index-url ${PYTORCH_CUDA_INDEX_BASE_URL}/cu$(echo $CUDA_VERSION | cut -d. -f1,2 | tr -d '.') + +ARG FLASHINFER_GIT_REPO="https://github.com/flashinfer-ai/flashinfer.git" +ARG FLASHINFER_GIT_REF="v0.2.12" + +ARG FLASHINFER_AOT_COMPILE=false + +# Install FlashInfer +ARG FLASHINFER_GIT_REF="v0.2.12" +ARG FLASHINFER_GIT_REPO="https://github.com/flashinfer-ai/flashinfer.git" +ARG FLASHINFER_AOT_COMPILE="true" + +RUN bash -c '. /etc/environment && \ + git clone --depth 1 --recursive --shallow-submodules \ + --branch ${FLASHINFER_GIT_REF} \ + ${FLASHINFER_GIT_REPO} flashinfer && \ + cd flashinfer && \ + if [ "${FLASHINFER_AOT_COMPILE}" = "true" ]; then \ + if [[ "${CUDA_VERSION}" == 11.* ]]; then \ + FI_TORCH_CUDA_ARCH_LIST="7.5 8.0 8.9"; \ + elif [[ "${CUDA_VERSION}" == 12.[0-7]* ]]; then \ + FI_TORCH_CUDA_ARCH_LIST="7.5 8.0 8.9 9.0a"; \ + else \ + FI_TORCH_CUDA_ARCH_LIST="7.5 8.0 8.9 9.0a 10.0a 12.0"; \ + fi && \ + echo "๐Ÿ—๏ธ Installing FlashInfer with AOT compilation for arches: ${FI_TORCH_CUDA_ARCH_LIST}" && \ + TORCH_CUDA_ARCH_LIST="${FI_TORCH_CUDA_ARCH_LIST}" python3 -m flashinfer.aot && \ + TORCH_CUDA_ARCH_LIST="${FI_TORCH_CUDA_ARCH_LIST}" python3 -m pip install --no-build-isolation . \ + --extra-index-url ${PYTORCH_CUDA_INDEX_BASE_URL}/cu$(echo $CUDA_VERSION | cut -d. -f1,2 | tr -d ".") && \ + TORCH_CUDA_ARCH_LIST="${FI_TORCH_CUDA_ARCH_LIST}" python3 -m flashinfer --download-cubin || \ + echo "WARNING: Failed to download flashinfer cubins."; \ + else \ + echo "๐Ÿ—๏ธ Installing FlashInfer without AOT compilation in JIT mode" && \ + python3 -m pip install . \ + --extra-index-url ${PYTORCH_CUDA_INDEX_BASE_URL}/cu$(echo $CUDA_VERSION | cut -d. -f1,2 | tr -d "."); \ + fi && \ + cd .. && \ + rm -rf flashinfer' + + +COPY build_requirements.txt build_requirements.txt +RUN python3 -m pip install --system -r build_requirements.txt \ + --extra-index-url ${PYTORCH_CUDA_INDEX_BASE_URL}/cu$(echo $CUDA_VERSION | cut -d. -f1,2 | tr -d '.') + +# Install DeepGEMM from source +ARG DEEPGEMM_GIT_REPO="https://github.com/deepseek-ai/DeepGEMM.git" +ARG DEEPGEMM_GIT_REF="7b6b5563b9d4c1ae07ffbce7f78ad3ac9204827c" +RUN bash -c ' \ + . /etc/environment \ + && CUDA_MAJOR="${CUDA_VERSION%%.*}" \ + && CUDA_MINOR="${CUDA_VERSION#${CUDA_MAJOR}.}" \ + && CUDA_MINOR="${CUDA_MINOR%%.*}" \ + && if [ "$CUDA_MAJOR" -ge 12 ] && [ "$CUDA_MINOR" -ge 8 ]; then \ + git clone --recursive --shallow-submodules ${DEEPGEMM_GIT_REPO} deepgemm \ + && echo "๐Ÿ—๏ธ Building DeepGEMM" \ + && cd deepgemm \ + && git checkout ${DEEPGEMM_GIT_REF} \ + && rm -rf build dist \ + && rm -rf *.egg-info \ + && python3 setup.py bdist_wheel \ + && uv pip install --system dist/*.whl \ + && cd .. \ + && rm -rf deepgemm; \ + else \ + echo "Skipping DeepGEMM installation (requires CUDA 12.8+ but got ${CUDA_VERSION})"; \ + fi' +#################### vLLM installation IMAGE #################### + +#################### OPENAI API SERVER #################### +# base openai image with additional requirements, for any subsequent openai-style images +FROM vllm-base AS vllm-openai-base +ARG TARGETPLATFORM +ARG INSTALL_KV_CONNECTORS=false + +ARG PIP_INDEX_URL UV_INDEX_URL +ARG PIP_EXTRA_INDEX_URL UV_EXTRA_INDEX_URL + + +# install additional dependencies for openai api server +RUN if [ "$INSTALL_KV_CONNECTORS" = "true" ]; then \ + python3 -m pip install lmcache \ + fi; \ + python3 -m pip install --system accelerate hf_transfer modelscope "bitsandbytes>=0.42.0" 'timm==0.9.10' boto3 runai-model-streamer runai-model-streamer[s3] + +ENV VLLM_USAGE_SOURCE production-docker-image + +FROM vllm-openai-base AS final +ARG PYTHON="python3" +LABEL maintainer="Amazon AI" +LABEL dlc_major_version="1" +ENV DEBIAN_FRONTEND=noninteractive \ + LANG=C.UTF-8 \ + LC_ALL=C.UTF-8 \ + DLC_CONTAINER_TYPE=base \ + # Python wonโ€™t try to write .pyc or .pyo files on the import of source modules + # Force stdin, stdout and stderr to be totally unbuffered. Good for logging + PYTHONDONTWRITEBYTECODE=1 \ + PYTHONUNBUFFERED=1 \ + PYTHONIOENCODING=UTF-8 \ + LD_LIBRARY_PATH="/usr/local/lib:/opt/amazon/ofi-nccl/lib/x86_64-linux-gnu:/opt/amazon/openmpi/lib:/opt/amazon/efa/lib:/usr/local/cuda/lib64:${LD_LIBRARY_PATH}" \ + PATH="/opt/amazon/openmpi/bin:/opt/amazon/efa/bin:/usr/local/cuda/bin:${PATH}" + +WORKDIR / + +COPY deep_learning_container.py /usr/local/bin/deep_learning_container.py +COPY bash_telemetry.sh /usr/local/bin/bash_telemetry.sh +COPY dockerd_entrypoint.sh /usr/local/bin/dockerd_entrypoint.sh + +RUN chmod +x /usr/local/bin/deep_learning_container.py && \ + chmod +x /usr/local/bin/bash_telemetry.sh && \ + chmod +x /usr/local/bin/dockerd_entrypoint.sh && \ + echo 'source /usr/local/bin/bash_telemetry.sh' >> /etc/bash.bashrc + +# RUN mkdir -p /tmp/nvjpeg \ +# && cd /tmp/nvjpeg \ +# && wget https://developer.download.nvidia.com/compute/cuda/redist/libnvjpeg/linux-x86_64/libnvjpeg-linux-x86_64-12.4.0.76-archive.tar.xz \ +# && tar -xvf libnvjpeg-linux-x86_64-12.4.0.76-archive.tar.xz \ +# && rm -rf /usr/local/cuda/targets/x86_64-linux/lib/libnvjpeg* \ +# && rm -rf /usr/local/cuda/targets/x86_64-linux/include/nvjpeg.h \ +# && cp libnvjpeg-linux-x86_64-12.4.0.76-archive/lib/libnvjpeg* /usr/local/cuda/targets/x86_64-linux/lib/ \ +# && cp libnvjpeg-linux-x86_64-12.4.0.76-archive/include/* /usr/local/cuda/targets/x86_64-linux/include/ \ +# && rm -rf /tmp/nvjpeg \ +# # remove cuobjdump and nvdisasm +# && rm -rf /usr/local/cuda/bin/cuobjdump* \ +# && rm -rf /usr/local/cuda/bin/nvdisasm* + +ENTRYPOINT ["/usr/local/bin/dockerd_entrypoint.sh"] \ No newline at end of file diff --git a/vllm/buildspec-arm64.yml b/vllm/buildspec-arm64.yml index a4752553d1d0..1aaaad5277fa 100644 --- a/vllm/buildspec-arm64.yml +++ b/vllm/buildspec-arm64.yml @@ -2,7 +2,7 @@ account_id: &ACCOUNT_ID prod_account_id: &PROD_ACCOUNT_ID 763104351884 region: ®ION framework: &FRAMEWORK vllm -version: &VERSION "0.10.0" +version: &VERSION "0.10.1.1" short_version: &SHORT_VERSION "0.10" arch_type: &ARCH_TYPE arm64 autopatch_build: "False" @@ -39,9 +39,9 @@ images: python_version: &DOCKER_PYTHON_VERSION py3 tag_python_version: &TAG_PYTHON_VERSION py312 os_version: &OS_VERSION ubuntu22.04 - tag: !join [ *VERSION, "-", *DEVICE_TYPE, "-", *TAG_PYTHON_VERSION, "-", *CUDA_VERSION, "-", *OS_VERSION, "-ec2" ] - latest_release_tag: !join [ *VERSION, "-", *DEVICE_TYPE, "-", *TAG_PYTHON_VERSION, "-", *CUDA_VERSION, "-", *OS_VERSION, "-ec2" ] - docker_file: !join [ *FRAMEWORK, /, *ARCH_TYPE, /, *DEVICE_TYPE, /Dockerfile ] + tag: !join [ *VERSION, "-", *DEVICE_TYPE, "-", *TAG_PYTHON_VERSION, "-", *CUDA_VERSION, "-", *OS_VERSION, "-arm64" ] + latest_release_tag: !join [ *VERSION, "-", *DEVICE_TYPE, "-", *TAG_PYTHON_VERSION, "-", *CUDA_VERSION, "-", *OS_VERSION, "-arm64" ] + docker_file: !join [ *FRAMEWORK, /, *ARCH_TYPE, /, *DEVICE_TYPE, /Dockerfile.arm64 ] target: final build: true enable_common_stage_build: false @@ -49,4 +49,5 @@ images: test_platforms: - sanity - security + - ec2 - eks \ No newline at end of file