diff --git a/docker/Dockerfile b/docker/Dockerfile index b87401c59357..0cd2cfad66fd 100644 --- a/docker/Dockerfile +++ b/docker/Dockerfile @@ -386,6 +386,8 @@ RUN --mount=type=bind,from=build,src=/workspace/dist,target=/vllm-workspace/dist # Install FlashInfer from source ARG FLASHINFER_GIT_REPO="https://github.com/flashinfer-ai/flashinfer.git" +# Keep this in sync with https://github.com/vllm-project/vllm/blob/main/requirements/cuda.txt +# We use `--force-reinstall --no-deps` to avoid issues with the existing FlashInfer wheel. ARG FLASHINFER_GIT_REF="v0.2.9rc2" RUN --mount=type=cache,target=/root/.cache/uv bash - <<'BASH' . /etc/environment @@ -408,7 +410,7 @@ RUN --mount=type=cache,target=/root/.cache/uv bash - <<'BASH' TORCH_CUDA_ARCH_LIST="${FI_TORCH_CUDA_ARCH_LIST}" \ python3 -m flashinfer.aot TORCH_CUDA_ARCH_LIST="${FI_TORCH_CUDA_ARCH_LIST}" \ - uv pip install --system --no-build-isolation . + uv pip install --system --no-build-isolation --force-reinstall --no-deps . popd rm -rf flashinfer BASH diff --git a/requirements/cuda.txt b/requirements/cuda.txt index c1273b224eab..5557c868acaf 100644 --- a/requirements/cuda.txt +++ b/requirements/cuda.txt @@ -12,3 +12,5 @@ torchaudio==2.7.1 torchvision==0.22.1 # Required for phi3v processor. See https://github.com/pytorch/vision?tab=readme-ov-file#installation for corresponding version # https://github.com/facebookresearch/xformers/releases/tag/v0.0.31 xformers==0.0.31; platform_system == 'Linux' and platform_machine == 'x86_64' # Requires PyTorch >= 2.7 +# FlashInfer should be updated together with the Dockerfile +flashinfer_python==0.2.9rc2 \ No newline at end of file