Zhuul · Zhuul · Apr 23, 2025 · Apr 23, 2025 · Apr 23, 2025 · Apr 24, 2025
diff --git a/.github/ci-trigger-20250814-1 b/.github/ci-trigger-20250814-1
@@ -0,0 +1 @@
+trigger: sync_with_upstream
diff --git a/.github/workflows/sync_with_upstream.yml b/.github/workflows/sync_with_upstream.yml
@@ -0,0 +1,80 @@
+name: Sync with Upstream
+
+on:
+  schedule:
+    - cron: '0 0 * * *' # Runs daily at midnight
+  push:
+    branches:
+      - main
+
+jobs:
+  sync:
+    runs-on: ubuntu-latest
+
+    steps:
+      - name: Checkout repository
+        uses: actions/checkout@v4
+        with:
+          fetch-depth: 0
+
+      - name: Set up Git
+        run: |
+          git config --global user.name 'Zhuul'
+          git config --global user.email '[email protected]'
+
+      - name: Add upstream remote
+        run: git remote add upstream https://github.com/vllm-project/vllm.git
+
+      - name: Fetch upstream changes
+        run: git fetch upstream
+
+      - name: Merge upstream changes
+        id: merge
+        run: |
+          git checkout main
+          git merge upstream/main || {
+            echo "Merge conflict detected. Creating a new branch for manual resolution."
+            git checkout -b "merge-conflict-$(date +%Y%m%d%H%M%S)"
+            git push origin HEAD
+            echo "conflict=true" >> "$GITHUB_OUTPUT"
+            exit 1
+          }
+          echo "conflict=false" >> "$GITHUB_OUTPUT"
+
+      - name: Check for workflow file changes
+        id: workflow_change
+        run: |
+          if git diff --name-only upstream/main | grep '^.github/workflows/'; then
+            echo "workflow_changed=true" >> "$GITHUB_OUTPUT"
+          else
+            echo "workflow_changed=false" >> "$GITHUB_OUTPUT"
+          fi
+
+      - name: Set up PAT authentication
+        env:
+          GH_PAT: ${{ secrets.GH_PAT }}
+        run: |
+          git remote set-url origin "https://Zhuul:${GH_PAT}@github.com/Zhuul/vllm.git"
+
+      - name: Push changes if no workflow files changed
+        if: steps.workflow_change.outputs.workflow_changed == 'false' && steps.merge.outputs.conflict == 'false'
+        run: git push origin main
+
+      - name: Create Pull Request for workflow file changes
+        if: steps.workflow_change.outputs.workflow_changed == 'true' && steps.merge.outputs.conflict == 'false'
+        uses: peter-evans/create-pull-request@v6
+        with:
+          token: ${{ secrets.GH_PAT }}
+          commit-message: "Sync with upstream: update workflow files"
+          title: "Sync with upstream: update workflow files"
+          body: |
+            This PR was automatically created because workflow files were updated while syncing with upstream.
+            Please review and merge.
+          branch: workflow-sync-${{ github.run_id }}
+          base: main
+
+      - name: Send notification if merge conflict
+        if: steps.merge.outputs.conflict == 'true'
+        run: |
+          echo "Merge conflict detected. Manual intervention required."
+          # Add your notification logic here (e.g., send an email, create an issue, etc.)
diff --git a/docs/contributing/README.md b/docs/contributing/README.md
@@ -31,6 +31,8 @@ See <gh-file:LICENSE>.
 Depending on the kind of development you'd like to do (e.g. Python, CUDA), you can choose to build vLLM with or without compilation.
 Check out the [building from source][build-from-source] documentation for details.
 
+For a containerized developer workflow, see Podman-first dev: `docs/contributing/podman-dev.md`.
+
 For an optimized workflow when iterating on C++/CUDA kernels, see the [Incremental Compilation Workflow](./incremental_build.md) for recommendations.
 
 ### Building the docs with MkDocs

diff --git a/docs/contributing/podman-dev.md b/docs/contributing/podman-dev.md
@@ -0,0 +1,41 @@
+---
+title: Podman-first Development Environment
+---
+
+This guide documents the Podman-first development workflow for building vLLM from source with CUDA and PyTorch nightly.
+
+Primary entrypoint
+
+- Windows (PowerShell): `./extras/podman/run.ps1`
+- Linux/macOS (bash): `extras/podman/run.sh`
+
+Legacy launchers at `extras/run-vllm-dev.ps1` and `extras/run-vllm-dev.sh` are deprecated and forward to the Podman wrappers.
+
+Prerequisites
+
+- Podman with GPU CDI enabled (on Windows, use Podman Desktop + WSL; ensure NVIDIA drivers and CUDA are installed on the host).
+- Optional named volume for build/work space, e.g., `vllm-work`.
+
+Quick start
+
+Windows (PowerShell):
+
+```powershell
+./extras/podman/run.ps1 -Build
+./extras/podman/run.ps1 -GPUCheck
+./extras/podman/run.ps1 -Setup -WorkVolume vllm-work -Progress
+```
+
+Linux/macOS (bash):
+
+```bash
+extras/podman/run.sh --build
+extras/podman/run.sh --gpu-check
+extras/podman/run.sh --setup --work-volume vllm-work --progress
+```
+
+Notes
+
+- The image uses CUDA 12.9 UBI9 and installs PyTorch nightly cu129 first to ensure latest GPU arch support (including sm_120 when present).
+- The setup step performs an editable vLLM install without downgrading torch family packages.
+- Use a named Podman volume for `/opt/work` to avoid `/tmp` tmpfs pressure and to speed up rebuilds.
diff --git a/extras/.dockerignore b/extras/.dockerignore
@@ -0,0 +1,39 @@
+# Reduce build context to avoid Windows Podman tar write issues
+.git
+.github
+.vscode
+.venv
+venv
+node_modules
+build
+dist
+csrc/
+vllm/
+benchmarks/
+docs/
+examples/
+tests/
+**/__pycache__
+**/*.pyc
+**/*.pyo
+**/*.pyd
+**/*.so
+**/*.o
+**/*.a
+**/*.dll
+**/*.dylib
+extras/build.log
+extras/*.bak
+extras/tools/
+extras/run-vllm-dev-*.ps1
+extras/run-vllm-dev-*.sh
+extras/*wsl*
+extras/*docker*.ps1
+
+!extras/Dockerfile
+!extras/run-vllm-dev.ps1
+!extras/run-vllm-dev.sh
+!extras/dev-setup.sh
+requirements/
+pyproject.toml
+setup.py
diff --git a/extras/Dockerfile b/extras/Dockerfile
@@ -0,0 +1,168 @@
+# vLLM Development Container with GPU Support
+# Uses vLLM's own requirements for automatic dependency management
+
+FROM nvidia/cuda:12.9.1-cudnn-devel-ubi9
+
+# Set CUDA environment variables for build tools
+ENV CUDA_HOME=/usr/local/cuda
+ENV CUDA_ROOT=/usr/local/cuda
+ENV PATH=$CUDA_HOME/bin:$PATH
+ENV LD_LIBRARY_PATH=$CUDA_HOME/lib64:$LD_LIBRARY_PATH
+ENV CUDA_TOOLKIT_ROOT_DIR=$CUDA_HOME
+ENV CUDNN_LIBRARY_PATH=/usr/lib64
+ENV CUDNN_INCLUDE_PATH=/usr/include
+
+# Install system packages with additional CUDA development libraries
+RUN dnf update -y && dnf install --allowerasing -y \
+    python3 python3-pip python3-devel \
+    git gcc gcc-c++ cmake ninja-build \
+    make patch which findutils tar rsync \
+    wget curl vim nano \
+    && dnf clean all
+
+# Create symlinks for python
+RUN ln -sf /usr/bin/python3 /usr/bin/python
+
+# Create a non-root user for development
+RUN useradd -m -s /bin/bash vllmuser && \
+    echo "vllmuser ALL=(ALL) NOPASSWD:ALL" >> /etc/sudoers
+
+# Install essential system tools
+RUN dnf install -y hostname iproute iputils
+
+# Add NVIDIA Machine Learning repo for RHEL9/UBI9 and install NCCL runtime/devel
+# Needed for PyTorch nightly cu129 to avoid ncclCommWindowRegister symbol errors
+# Install NCCL runtime/devel from the CUDA repository available in the base image
+RUN set -euxo pipefail \
+    && dnf makecache -y \
+    && (dnf install -y libnccl libnccl-devel || dnf install -y libnccl-2 libnccl-devel-2) \
+    && dnf clean all
+
+# Set working directory and adjust ownership
+WORKDIR /workspace
+RUN chown -R vllmuser:vllmuser /workspace
+
+# Create build directories with proper permissions
+RUN mkdir -p /workspace/.deps && chown -R vllmuser:vllmuser /workspace/.deps && \
+    mkdir -p /tmp/vllm-build && chmod 777 /tmp/vllm-build && \
+    mkdir -p /opt/work && chmod 777 /opt/work && \
+    mkdir -p /home/vllmuser/.cache && chown -R vllmuser:vllmuser /home/vllmuser/.cache && \
+    mkdir -p /home/vllmuser/.ccache && chown -R vllmuser:vllmuser /home/vllmuser/.ccache && \
+    mkdir -p /home/vllmuser/.cmake && chown -R vllmuser:vllmuser /home/vllmuser/.cmake && \
+    chmod -R 755 /workspace && \
+    chmod -R 777 /tmp
+
+# Switch to the non-root user
+USER vllmuser
+
+# Create and activate virtual environment
+ENV VIRTUAL_ENV=/home/vllmuser/venv
+RUN python3 -m venv $VIRTUAL_ENV
+ENV PATH="$VIRTUAL_ENV/bin:$PATH"
+
+# Set pip configuration
+ENV PIP_DISABLE_PIP_VERSION_CHECK=1
+ENV PIP_NO_CACHE_DIR=1
+ENV PYTHONUNBUFFERED=1
+ENV PIP_DEFAULT_TIMEOUT=120
+ENV PIP_RETRIES=5
+ENV PIP_PREFER_BINARY=1
+
+# Upgrade pip and setuptools to latest versions
+RUN pip install --upgrade pip setuptools>=61 wheel
+
+COPY requirements/ /tmp/requirements/
+
+# Install PyTorch nightly first (includes latest GPU arch support such as Blackwell sm_120 when present)
+RUN pip install --pre torch torchvision torchaudio --index-url https://download.pytorch.org/whl/nightly/cu129
+
+# Install modern build tools and vLLM's build dependencies and CUDA deps early,
+# but sanitize requirements to avoid downgrading torch-family or forcing xformers pins.
+COPY pyproject.toml /tmp/pyproject.toml
+RUN set -euxo pipefail \
+        && cd /tmp \
+        && pip install "setuptools>=61" "setuptools-scm>=8" build wheel ninja cmake \
+        && mkdir -p /tmp/requirements_sanitized \
+        && for f in build.txt cuda.txt common.txt; do \
+                 if [ -f "/tmp/requirements/$f" ]; then \
+                     sed -E '/^(torch|torchvision|torchaudio|xformers)\b/Id' "/tmp/requirements/$f" > "/tmp/requirements_sanitized/$f"; \
+                 fi; \
+             done \
+    && pip install --pre \
+        -r /tmp/requirements_sanitized/build.txt \
+        -r /tmp/requirements_sanitized/cuda.txt \
+        -r /tmp/requirements_sanitized/common.txt \
+    && pip install --pre --upgrade \
+        torch torchvision torchaudio --index-url https://download.pytorch.org/whl/nightly/cu129
+
+# Install minimal development extras
+RUN pip install pytest pytest-asyncio ipython
+
+# Note: vLLM will be installed from source in development mode via dev-setup.sh
+# This ensures compatibility with the PyTorch nightly build
+
+# Create activation script for easy virtual environment access
+RUN echo '#!/bin/bash' > /home/vllmuser/activate_venv.sh && \
+    echo 'source /home/vllmuser/venv/bin/activate' >> /home/vllmuser/activate_venv.sh && \
+    echo 'echo "Virtual environment activated: $VIRTUAL_ENV"' >> /home/vllmuser/activate_venv.sh && \
+    echo 'echo "Python version: $(python --version)"' >> /home/vllmuser/activate_venv.sh && \
+    echo 'echo "PyTorch version: $(python -c \"import torch; print(torch.__version__)\")"' >> /home/vllmuser/activate_venv.sh && \
+    echo 'echo "CUDA available: $(python -c \"import torch; print(torch.cuda.is_available())\")"' >> /home/vllmuser/activate_venv.sh && \
+    chmod +x /home/vllmuser/activate_venv.sh
+
+# Ensure virtual environment is activated in .bashrc
+RUN echo 'source /home/vllmuser/venv/bin/activate' >> /home/vllmuser/.bashrc && \
+    echo 'echo "🐍 Python virtual environment activated"' >> /home/vllmuser/.bashrc && \
+    echo 'echo "🚀 Ready for vLLM development!"' >> /home/vllmuser/.bashrc
+
+# Create development helper script that uses current workspace requirements
+RUN echo '#!/bin/bash' > /home/vllmuser/setup_vllm_dev.sh && \
+    echo 'echo "🔧 Setting up vLLM for development..."' >> /home/vllmuser/setup_vllm_dev.sh && \
+    echo 'cd /workspace' >> /home/vllmuser/setup_vllm_dev.sh && \
+    echo '# Use temporary build directory to avoid permission issues' >> /home/vllmuser/setup_vllm_dev.sh && \
+    echo 'export TMPDIR=/tmp/vllm-build' >> /home/vllmuser/setup_vllm_dev.sh && \
+    echo 'mkdir -p "$TMPDIR" && chmod 777 "$TMPDIR"' >> /home/vllmuser/setup_vllm_dev.sh && \
+    echo 'export CMAKE_BUILD_PARALLEL_LEVEL=4' >> /home/vllmuser/setup_vllm_dev.sh && \
+    echo 'export VLLM_INSTALL_PUNICA_KERNELS=0' >> /home/vllmuser/setup_vllm_dev.sh && \
+    echo 'export MAX_JOBS=4' >> /home/vllmuser/setup_vllm_dev.sh && \
+    echo '# Install current workspace requirements first' >> /home/vllmuser/setup_vllm_dev.sh && \
+    echo 'if [ -f requirements/common.txt ]; then pip install -r requirements/common.txt; fi' >> /home/vllmuser/setup_vllm_dev.sh && \
+    echo '# Use temporary directory for CMake build files' >> /home/vllmuser/setup_vllm_dev.sh && \
+    echo 'FETCHCONTENT_BASE_DIR="$TMPDIR/deps" pip install -e . --no-deps --no-build-isolation --verbose' >> /home/vllmuser/setup_vllm_dev.sh && \
+    echo 'echo "✅ vLLM installed in editable mode!"' >> /home/vllmuser/setup_vllm_dev.sh && \
+    echo 'python -c "import vllm; print(\"vLLM version:\", vllm.__version__)"' >> /home/vllmuser/setup_vllm_dev.sh && \
+    chmod +x /home/vllmuser/setup_vllm_dev.sh
+
+# Add environment variables for better CUDA memory management and build optimization
+ENV PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True
+# Do not pin a single GPU here; let runtime inject device selection
+# ENV CUDA_VISIBLE_DEVICES=0
+ENV CMAKE_BUILD_PARALLEL_LEVEL=4
+ENV VLLM_INSTALL_PUNICA_KERNELS=0
+ENV MAX_JOBS=4
+
+# Enable ccache for faster rebuilds
+ENV CCACHE_DIR=/home/vllmuser/.ccache
+ENV CCACHE_MAXSIZE=10G
+ENV PATH=/usr/lib64/ccache:$PATH
+
+# CUDA arch list including legacy + latest (sm_120) so builds cover both older and newest GPUs.
+ENV TORCH_CUDA_ARCH_LIST="7.0 7.5 8.0 8.6 8.9 9.0 12.0"
+# Do not force-disable Machete; allow upstream defaults. User may still pass -e CMAKE_ARGS for custom CMake settings.
+ENV CMAKE_ARGS=""
+
+# WSL2-specific CUDA environment configuration
+ENV NVIDIA_VISIBLE_DEVICES=all
+ENV NVIDIA_DRIVER_CAPABILITIES=compute,utility
+ENV LD_LIBRARY_PATH=/usr/local/cuda/compat:/usr/lib/wsl/drivers:/usr/lib/wsl/lib:/usr/lib/x86_64-linux-gnu:/usr/local/cuda/lib64:/usr/local/cuda/lib:$LD_LIBRARY_PATH
+
+# Add runtime library detection script
+RUN echo '#!/bin/bash' > /home/vllmuser/check_cuda_libs.sh && \
+    echo 'echo "=== CUDA Library Check ==="' >> /home/vllmuser/check_cuda_libs.sh && \
+    echo 'echo "LD_LIBRARY_PATH: $LD_LIBRARY_PATH"' >> /home/vllmuser/check_cuda_libs.sh && \
+    echo 'echo "Searching for CUDA libraries..."' >> /home/vllmuser/check_cuda_libs.sh && \
+    echo 'find /usr/lib/wsl -name "libcuda.so*" 2>/dev/null | head -3 || echo "No WSL CUDA libs"' >> /home/vllmuser/check_cuda_libs.sh && \
+    echo 'ldconfig -p | grep cuda | head -3 || echo "No CUDA in ldconfig"' >> /home/vllmuser/check_cuda_libs.sh && \
+    echo 'echo "PyTorch CUDA status:"' >> /home/vllmuser/check_cuda_libs.sh && \
+    echo 'python -c "import torch; print(f\"CUDA available: {torch.cuda.is_available()}\"); print(f\"Device count: {torch.cuda.device_count()}\")" 2>/dev/null || echo "PyTorch not available"' >> /home/vllmuser/check_cuda_libs.sh && \
+    chmod +x /home/vllmuser/check_cuda_libs.sh
diff --git a/extras/README.md b/extras/README.md
@@ -0,0 +1,50 @@
+# extras/ overview
+
+This directory hosts all non-core assets: container/build tooling, configs, testing, storage helpers, and optional patches. The goals are clarity, single-responsibility, and easy extension without touching the vLLM core.
+
+Suggested layout (implemented here):
+
+- podman/ — Podman-specific build/launch wrappers and helpers
+- configs/ — Centralized, declarative versions and build configuration
+- secrets/ — Gitignored area for local tokens/config (not committed)
+- testing/ — Test/benchmark harness, matrices, and results
+- storage/ — External volumes and cache management helpers
+- patches/ — Optional patch/plug-in mechanism for controlled tweaks
+
+Primary entrypoint: use `extras/podman/` as the canonical way to build and run the dev container.
+
+Deprecation: the legacy launchers `extras/run-vllm-dev.sh` and `extras/run-vllm-dev.ps1` are deprecated and now forward to the Podman wrappers. Please switch to `extras/podman/run.sh` (Linux/macOS) or `extras/podman/run.ps1` (Windows).
+
+## Quick start
+
+- Edit `extras/configs/build.env` to set CUDA/UBI/Python defaults.
+- Use `extras/podman/build.sh` to build images with those defaults.
+- Use `extras/podman/run.ps1` (Windows) or `extras/podman/run.sh` (Linux/macOS) to run the dev container.
+
+Examples
+
+- Windows (PowerShell):
+	- Build image: `./extras/podman/run.ps1 -Build`
+	- GPU check: `./extras/podman/run.ps1 -GPUCheck`
+	- Setup build: `./extras/podman/run.ps1 -Setup -WorkVolume vllm-work -Progress`
+
+- Linux/macOS (bash):
+	- Build image: `extras/podman/run.sh --build`
+	- GPU check: `extras/podman/run.sh --gpu-check`
+	- Setup build: `extras/podman/run.sh --setup --work-volume vllm-work --progress`
+
+## Secrets
+
+Place tokens in `extras/secrets/` per its README and never commit them. Load them in session or bind-mount into containers.
+
+## Testing
+
+See `extras/testing/README.md` for defining a matrix, recording results, and comparing runs.
+
+## Storage
+
+See `extras/storage/README.md` for model/cache volume guidance for performance and reproducibility.
+
+## Patches
+
+If you need to tweak upstream vLLM without forking, use `extras/patches/` to stage diffs and apply them during build.