Skip to content

Commit e017120

Browse files
committed
[ci] fix wheel names for arm wheels (#24898)
Signed-off-by: simon-mo <[email protected]>
1 parent 26b999c commit e017120

File tree

5 files changed

+36
-23
lines changed

5 files changed

+36
-23
lines changed

.buildkite/release-pipeline.yaml

Lines changed: 4 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1,24 +1,22 @@
11
steps:
22
# aarch64 + CUDA builds. PyTorch 2.8 aarch64 + CUDA wheel is only available on CUDA 12.9
33
- label: "Build arm64 wheel - CUDA 12.9"
4+
depends_on: ~
45
id: build-wheel-arm64-cuda-12-9
56
agents:
67
queue: arm64_cpu_queue_postmerge
78
commands:
89
# #NOTE: torch_cuda_arch_list is derived from upstream PyTorch build files here:
910
# https://github.com/pytorch/pytorch/blob/main/.ci/aarch64_linux/aarch64_ci_build.sh#L7
10-
- "DOCKER_BUILDKIT=1 docker build --build-arg max_jobs=16 --build-arg USE_SCCACHE=1 --build-arg GIT_REPO_CHECK=1 --build-arg CUDA_VERSION=12.9.1 --build-arg torch_cuda_arch_list='8.7 9.0 10.0+PTX 12.0' --tag vllm-ci:build-image --target build --progress plain -f docker/Dockerfile ."
11+
- "DOCKER_BUILDKIT=1 docker build --build-arg max_jobs=16 --build-arg USE_SCCACHE=1 --build-arg GIT_REPO_CHECK=1 --build-arg CUDA_VERSION=12.9.1 --build-arg VLLM_MAIN_CUDA_VERSION=12.9 --build-arg torch_cuda_arch_list='8.7 9.0 10.0+PTX 12.0' --tag vllm-ci:build-image --target build --progress plain -f docker/Dockerfile ."
1112
- "mkdir artifacts"
1213
- "docker run --rm -v $(pwd)/artifacts:/artifacts_host vllm-ci:build-image bash -c 'cp -r dist /artifacts_host && chmod -R a+rw /artifacts_host'"
1314
- "bash .buildkite/scripts/upload-wheels.sh"
1415
env:
1516
DOCKER_BUILDKIT: "1"
1617

17-
- block: "Build CUDA 12.8 wheel"
18-
key: block-build-cu128-wheel
19-
2018
- label: "Build wheel - CUDA 12.8"
21-
depends_on: block-build-cu128-wheel
19+
depends_on: ~
2220
id: build-wheel-cuda-12-8
2321
agents:
2422
queue: cpu_queue_postmerge
@@ -30,12 +28,8 @@ steps:
3028
env:
3129
DOCKER_BUILDKIT: "1"
3230

33-
- block: "Build CUDA 12.6 wheel"
34-
key: block-build-cu126-wheel
35-
depends_on: ~
36-
3731
- label: "Build wheel - CUDA 12.6"
38-
depends_on: block-build-cu126-wheel
32+
depends_on: ~
3933
id: build-wheel-cuda-12-6
4034
agents:
4135
queue: cpu_queue_postmerge
@@ -102,8 +96,6 @@ steps:
10296
depends_on:
10397
- create-multi-arch-manifest
10498
- build-wheel-cuda-12-8
105-
- build-wheel-cuda-12-6
106-
- build-wheel-cuda-12-9
10799
id: annotate-release-workflow
108100
agents:
109101
queue: cpu_queue_postmerge

.buildkite/scripts/annotate-release.sh

Lines changed: 22 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -14,18 +14,33 @@ buildkite-agent annotate --style 'info' --context 'release-workflow' << EOF
1414
To download the wheel:
1515
\`\`\`
1616
aws s3 cp s3://vllm-wheels/${RELEASE_VERSION}/vllm-${RELEASE_VERSION}-cp38-abi3-manylinux1_x86_64.whl .
17+
aws s3 cp s3://vllm-wheels/${RELEASE_VERSION}/vllm-${RELEASE_VERSION}-cp38-abi3-manylinux2014_aarch64.whl .
18+
1719
aws s3 cp s3://vllm-wheels/${RELEASE_VERSION}+cu126/vllm-${RELEASE_VERSION}+cu126-cp38-abi3-manylinux1_x86_64.whl .
18-
aws s3 cp s3://vllm-wheels/${RELEASE_VERSION}+cu118/vllm-${RELEASE_VERSION}+cu118-cp38-abi3-manylinux1_x86_64.whl .
20+
aws s3 cp s3://vllm-wheels/${RELEASE_VERSION}+cu129/vllm-${RELEASE_VERSION}+cu129-cp38-abi3-manylinux1_x86_64.whl .
1921
\`\`\`
2022
2123
To download and upload the image:
2224
2325
\`\`\`
24-
docker pull public.ecr.aws/q9t5s3a7/vllm-release-repo:${BUILDKITE_COMMIT}
25-
docker tag public.ecr.aws/q9t5s3a7/vllm-release-repo:${BUILDKITE_COMMIT} vllm/vllm-openai
26-
docker tag vllm/vllm-openai vllm/vllm-openai:latest
27-
docker tag vllm/vllm-openai vllm/vllm-openai:v${RELEASE_VERSION}
28-
docker push vllm/vllm-openai:latest
29-
docker push vllm/vllm-openai:v${RELEASE_VERSION}
26+
docker pull public.ecr.aws/q9t5s3a7/vllm-release-repo:${BUILDKITE_COMMIT}-x86_64
27+
docker pull public.ecr.aws/q9t5s3a7/vllm-release-repo:${BUILDKITE_COMMIT}-aarch64
28+
29+
docker tag public.ecr.aws/q9t5s3a7/vllm-release-repo:${BUILDKITE_COMMIT}-x86_64 vllm/vllm-openai:x86_64
30+
docker tag vllm/vllm-openai:x86_64 vllm/vllm-openai:latest-x86_64
31+
docker tag vllm/vllm-openai:x86_64 vllm/vllm-openai:v${RELEASE_VERSION}-x86_64
32+
docker push vllm/vllm-openai:latest-x86_64
33+
docker push vllm/vllm-openai:v${RELEASE_VERSION}-x86_64
34+
35+
docker tag public.ecr.aws/q9t5s3a7/vllm-release-repo:${BUILDKITE_COMMIT}-aarch64 vllm/vllm-openai:aarch64
36+
docker tag vllm/vllm-openai:aarch64 vllm/vllm-openai:latest-aarch64
37+
docker tag vllm/vllm-openai:aarch64 vllm/vllm-openai:v${RELEASE_VERSION}-aarch64
38+
docker push vllm/vllm-openai:latest-aarch64
39+
docker push vllm/vllm-openai:v${RELEASE_VERSION}-aarch64
40+
41+
docker manifest create vllm/vllm-openai:latest vllm/vllm-openai:latest-x86_64 vllm/vllm-openai:latest-aarch64 --amend
42+
docker manifest create vllm/vllm-openai:v${RELEASE_VERSION} vllm/vllm-openai:v${RELEASE_VERSION}-x86_64 vllm/vllm-openai:v${RELEASE_VERSION}-aarch64 --amend
43+
docker manifest push vllm/vllm-openai:latest
44+
docker manifest push vllm/vllm-openai:v${RELEASE_VERSION}
3045
\`\`\`
3146
EOF

docker/Dockerfile

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -196,6 +196,7 @@ ARG SCCACHE_S3_NO_CREDENTIALS=0
196196

197197
# Flag to control whether to use pre-built vLLM wheels
198198
ARG VLLM_USE_PRECOMPILED=""
199+
ARG VLLM_MAIN_CUDA_VERSION=""
199200

200201
# if USE_SCCACHE is set, use sccache to speed up compilation
201202
RUN --mount=type=cache,target=/root/.cache/uv \
@@ -213,6 +214,7 @@ RUN --mount=type=cache,target=/root/.cache/uv \
213214
&& export SCCACHE_IDLE_TIMEOUT=0 \
214215
&& export CMAKE_BUILD_TYPE=Release \
215216
&& export VLLM_USE_PRECOMPILED="${VLLM_USE_PRECOMPILED}" \
217+
&& export VLLM_MAIN_CUDA_VERSION="${VLLM_MAIN_CUDA_VERSION}" \
216218
&& export VLLM_DOCKER_BUILD_CONTEXT=1 \
217219
&& sccache --show-stats \
218220
&& python3 setup.py bdist_wheel --dist-dir=dist --py-limited-api=cp38 \

setup.py

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -56,8 +56,6 @@ def load_module_from_path(module_name, path):
5656
# fallback to cpu
5757
VLLM_TARGET_DEVICE = "cpu"
5858

59-
MAIN_CUDA_VERSION = "12.8"
60-
6159

6260
def is_sccache_available() -> bool:
6361
return which("sccache") is not None and \
@@ -507,15 +505,15 @@ def get_vllm_version() -> str:
507505
version += f"{sep}precompiled"
508506
else:
509507
cuda_version = str(get_nvcc_cuda_version())
510-
if cuda_version != MAIN_CUDA_VERSION:
508+
if cuda_version != envs.VLLM_MAIN_CUDA_VERSION:
511509
cuda_version_str = cuda_version.replace(".", "")[:3]
512510
# skip this for source tarball, required for pypi
513511
if "sdist" not in sys.argv:
514512
version += f"{sep}cu{cuda_version_str}"
515513
elif _is_hip():
516514
# Get the Rocm Version
517515
rocm_version = get_rocm_version() or torch.version.hip
518-
if rocm_version and rocm_version != MAIN_CUDA_VERSION:
516+
if rocm_version and rocm_version != envs.VLLM_MAIN_CUDA_VERSION:
519517
version += f"{sep}rocm{rocm_version.replace('.', '')[:3]}"
520518
elif _is_tpu():
521519
version += f"{sep}tpu"

vllm/envs.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -70,6 +70,7 @@
7070
VLLM_VIDEO_LOADER_BACKEND: str = "opencv"
7171
VLLM_MM_INPUT_CACHE_GIB: int = 4
7272
VLLM_TARGET_DEVICE: str = "cuda"
73+
VLLM_MAIN_CUDA_VERSION: str = "12.8"
7374
MAX_JOBS: Optional[str] = None
7475
NVCC_THREADS: Optional[str] = None
7576
VLLM_USE_PRECOMPILED: bool = False
@@ -246,6 +247,11 @@ def get_vllm_port() -> Optional[int]:
246247
"VLLM_TARGET_DEVICE":
247248
lambda: os.getenv("VLLM_TARGET_DEVICE", "cuda").lower(),
248249

250+
# Main CUDA version of vLLM, supporting [12.6, 12.8, 12.9],
251+
# 12.8 is the default. This follows PyTorch but can be overridden.
252+
"VLLM_MAIN_CUDA_VERSION":
253+
lambda: os.getenv("VLLM_MAIN_CUDA_VERSION", "").lower() or "12.8",
254+
249255
# Maximum number of compilation jobs to run in parallel.
250256
# By default this is the number of CPUs
251257
"MAX_JOBS":

0 commit comments

Comments
 (0)