diff --git a/.github/ci-trigger-20250814-1 b/.github/ci-trigger-20250814-1
new file mode 100644
index 000000000000..8ca993aa58b2
--- /dev/null
+++ b/.github/ci-trigger-20250814-1
@@ -0,0 +1 @@
+trigger: sync_with_upstream
diff --git a/.github/workflows/sync_with_upstream.yml b/.github/workflows/sync_with_upstream.yml
new file mode 100644
index 000000000000..df1048a43833
--- /dev/null
+++ b/.github/workflows/sync_with_upstream.yml
@@ -0,0 +1,80 @@
+name: Sync with Upstream
+
+on:
+  schedule:
+    - cron: '0 0 * * *' # Runs daily at midnight
+  push:
+    branches:
+      - main
+
+jobs:
+  sync:
+    runs-on: ubuntu-latest
+
+    steps:
+      - name: Checkout repository
+        uses: actions/checkout@v4
+        with:
+          fetch-depth: 0
+
+      - name: Set up Git
+        run: |
+          git config --global user.name 'Zhuul'
+          git config --global user.email '40538530+Zhuul@users.noreply.github.com'
+
+      - name: Add upstream remote
+        run: git remote add upstream https://github.com/vllm-project/vllm.git
+
+      - name: Fetch upstream changes
+        run: git fetch upstream
+
+      - name: Merge upstream changes
+        id: merge
+        run: |
+          git checkout main
+          git merge upstream/main || {
+            echo "Merge conflict detected. Creating a new branch for manual resolution."
+            git checkout -b "merge-conflict-$(date +%Y%m%d%H%M%S)"
+            git push origin HEAD
+            echo "conflict=true" >> "$GITHUB_OUTPUT"
+            exit 1
+          }
+          echo "conflict=false" >> "$GITHUB_OUTPUT"
+
+      - name: Check for workflow file changes
+        id: workflow_change
+        run: |
+          if git diff --name-only upstream/main | grep '^.github/workflows/'; then
+            echo "workflow_changed=true" >> "$GITHUB_OUTPUT"
+          else
+            echo "workflow_changed=false" >> "$GITHUB_OUTPUT"
+          fi
+
+      - name: Set up PAT authentication
+        env:
+          GH_PAT: ${{ secrets.GH_PAT }}
+        run: |
+          git remote set-url origin "https://Zhuul:${GH_PAT}@github.com/Zhuul/vllm.git"
+
+      - name: Push changes if no workflow files changed
+        if: steps.workflow_change.outputs.workflow_changed == 'false' && steps.merge.outputs.conflict == 'false'
+        run: git push origin main
+
+      - name: Create Pull Request for workflow file changes
+        if: steps.workflow_change.outputs.workflow_changed == 'true' && steps.merge.outputs.conflict == 'false'
+        uses: peter-evans/create-pull-request@v6
+        with:
+          token: ${{ secrets.GH_PAT }}
+          commit-message: "Sync with upstream: update workflow files"
+          title: "Sync with upstream: update workflow files"
+          body: |
+            This PR was automatically created because workflow files were updated while syncing with upstream.
+            Please review and merge.
+          branch: workflow-sync-${{ github.run_id }}
+          base: main
+
+      - name: Send notification if merge conflict
+        if: steps.merge.outputs.conflict == 'true'
+        run: |
+          echo "Merge conflict detected. Manual intervention required."
+          # Add your notification logic here (e.g., send an email, create an issue, etc.)
diff --git a/.gitignore b/.gitignore
index 465935d488f8..a5bd3740e844 100644
--- a/.gitignore
+++ b/.gitignore
@@ -209,4 +209,5 @@ shellcheck*/
 csrc/moe/marlin_moe_wna16/kernel_*
 
 # Ignore ep_kernels_workspace folder
-ep_kernels_workspace/
\ No newline at end of file
+ep_kernels_workspace/node_modules/
+package*.json
diff --git a/docs/contributing/README.md b/docs/contributing/README.md
index 5a2a70d57e85..1c2a31cf895c 100644
--- a/docs/contributing/README.md
+++ b/docs/contributing/README.md
@@ -31,6 +31,8 @@ See <gh-file:LICENSE>.
 Depending on the kind of development you'd like to do (e.g. Python, CUDA), you can choose to build vLLM with or without compilation.
 Check out the [building from source][build-from-source] documentation for details.
 
+For a containerized developer workflow, see Podman-first dev: `docs/contributing/podman-dev.md`.
+
 For an optimized workflow when iterating on C++/CUDA kernels, see the [Incremental Compilation Workflow](./incremental_build.md) for recommendations.
 
 ### Building the docs with MkDocs
diff --git a/docs/contributing/podman-dev.md b/docs/contributing/podman-dev.md
new file mode 100644
index 000000000000..881e495f8421
--- /dev/null
+++ b/docs/contributing/podman-dev.md
@@ -0,0 +1,41 @@
+---
+title: Podman-first Development Environment
+---
+
+This guide documents the Podman-first development workflow for building vLLM from source with CUDA and PyTorch nightly.
+
+Primary entrypoint
+
+- Windows (PowerShell): `./extras/podman/run.ps1`
+- Linux/macOS (bash): `extras/podman/run.sh`
+
+Legacy launchers at `extras/run-vllm-dev.ps1` and `extras/run-vllm-dev.sh` are deprecated and forward to the Podman wrappers.
+
+Prerequisites
+
+- Podman with GPU CDI enabled (on Windows, use Podman Desktop + WSL; ensure NVIDIA drivers and CUDA are installed on the host).
+- Optional named volume for build/work space, e.g., `vllm-work`.
+
+Quick start
+
+Windows (PowerShell):
+
+```powershell
+./extras/podman/run.ps1 -Build
+./extras/podman/run.ps1 -GPUCheck
+./extras/podman/run.ps1 -Setup -WorkVolume vllm-work -Progress
+```
+
+Linux/macOS (bash):
+
+```bash
+extras/podman/run.sh --build
+extras/podman/run.sh --gpu-check
+extras/podman/run.sh --setup --work-volume vllm-work --progress
+```
+
+Notes
+
+- The image uses CUDA 12.9 UBI9 and installs PyTorch nightly cu129 first to ensure latest GPU arch support (including sm_120 when present).
+- The setup step performs an editable vLLM install without downgrading torch family packages.
+- Use a named Podman volume for `/opt/work` to avoid `/tmp` tmpfs pressure and to speed up rebuilds.
diff --git a/extras/.dockerignore b/extras/.dockerignore
new file mode 100644
index 000000000000..60a8d81a82c1
--- /dev/null
+++ b/extras/.dockerignore
@@ -0,0 +1,39 @@
+# Reduce build context to avoid Windows Podman tar write issues
+.git
+.github
+.vscode
+.venv
+venv
+node_modules
+build
+dist
+csrc/
+vllm/
+benchmarks/
+docs/
+examples/
+tests/
+**/__pycache__
+**/*.pyc
+**/*.pyo
+**/*.pyd
+**/*.so
+**/*.o
+**/*.a
+**/*.dll
+**/*.dylib
+extras/build.log
+extras/*.bak
+extras/tools/
+extras/run-vllm-dev-*.ps1
+extras/run-vllm-dev-*.sh
+extras/*wsl*
+extras/*docker*.ps1
+
+!extras/Dockerfile
+!extras/run-vllm-dev.ps1
+!extras/run-vllm-dev.sh
+!extras/dev-setup.sh
+requirements/
+pyproject.toml
+setup.py
diff --git a/extras/Dockerfile b/extras/Dockerfile
new file mode 100644
index 000000000000..052da8390c6d
--- /dev/null
+++ b/extras/Dockerfile
@@ -0,0 +1,259 @@
+# vLLM Development Container with GPU Support
+# Uses vLLM's own requirements for automatic dependency management
+
+# Build-time args to control CUDA/OS base and PyTorch nightly index
+ARG CUDA_VERSION=13.0.0
+ARG UBI_VERSION=9
+ARG TORCH_CUDA_INDEX=cu130
+# Base flavor for CUDA image: e.g. 'rockylinux9' (default) or 'ubi9'
+ARG BASE_FLAVOR=rockylinux9
+
+# Switchable base: defaults to Rocky Linux to avoid subscription-gated repos
+FROM nvidia/cuda:${CUDA_VERSION}-cudnn-devel-${BASE_FLAVOR}
+
+# Set CUDA environment variables for build tools
+ENV CUDA_HOME=/usr/local/cuda
+ENV CUDA_ROOT=/usr/local/cuda
+ENV PATH=$CUDA_HOME/bin:$PATH
+ENV LD_LIBRARY_PATH=$CUDA_HOME/lib64:$LD_LIBRARY_PATH
+ENV CUDA_TOOLKIT_ROOT_DIR=$CUDA_HOME
+ENV CUDNN_LIBRARY_PATH=/usr/lib64
+ENV CUDNN_INCLUDE_PATH=/usr/include
+
+# Install system packages with additional CUDA development libraries
+RUN dnf update -y && dnf install --allowerasing -y \
+    python3 python3-pip python3-devel \
+    git gcc gcc-c++ cmake \
+    make patch which findutils tar rsync \
+    wget curl vim nano pkgconfig \
+    zlib-devel bzip2 bzip2-devel xz xz-devel libffi-devel \
+    openssl-devel sqlite-devel \
+    && (dnf install -y readline-devel || true) \
+    && dnf clean all
+
+# Prefer Python 3.12 from packages if available (fallback to system python3)
+RUN dnf install -y python3.12 python3.12-devel || true
+
+## Ensure /usr/bin/python exists for compatibility
+RUN ln -sf $(command -v python3) /usr/bin/python || true
+
+# Create a non-root user for development
+RUN useradd -m -s /bin/bash vllmuser && \
+    echo "vllmuser ALL=(ALL) NOPASSWD:ALL" >> /etc/sudoers
+
+# Install essential system tools
+RUN dnf install -y hostname iproute iputils
+
+ARG REQUIRE_FFMPEG=1
+# Multimedia and image libs with optional ffmpeg-devel enforcement
+# Install EPEL and RPM Fusion repos for EL (9/10) and pull ffmpeg/ffmpeg-devel from there.
+# When REQUIRE_FFMPEG=1, fail the build if ffmpeg is still unavailable.
+RUN set -euxo pipefail \
+        && (dnf install -y dnf-plugins-core || true) \
+        && (dnf config-manager --set-enabled crb || true) \
+        && (dnf makecache -y || true) \
+    && . /etc/os-release \
+    && ELVER="${VERSION_ID%%.*}" \
+    && echo "[Dockerfile] Detected Enterprise Linux major version: ${ELVER}" \
+        && dnf install -y \
+                libjpeg-turbo-devel libpng-devel zlib-devel freetype-devel \
+                libsndfile libsndfile-devel sox sox-devel || true \
+        && if [ "${REQUIRE_FFMPEG}" = "1" ]; then \
+         echo "[Dockerfile] Enabling EPEL and RPM Fusion for ffmpeg (EL${ELVER})"; \
+         dnf install -y https://dl.fedoraproject.org/pub/epel/epel-release-latest-${ELVER}.noarch.rpm; \
+         dnf install -y https://download1.rpmfusion.org/free/el/rpmfusion-free-release-${ELVER}.noarch.rpm; \
+         dnf install -y https://download1.rpmfusion.org/nonfree/el/rpmfusion-nonfree-release-${ELVER}.noarch.rpm; \
+                 dnf makecache -y; \
+                 dnf install -y ffmpeg ffmpeg-devel; \
+                 command -v ffmpeg >/dev/null 2>&1; \
+             else \
+                 # Best-effort install when not enforced
+         (dnf install -y https://dl.fedoraproject.org/pub/epel/epel-release-latest-${ELVER}.noarch.rpm || true); \
+         (dnf install -y https://download1.rpmfusion.org/free/el/rpmfusion-free-release-${ELVER}.noarch.rpm || true); \
+         (dnf install -y https://download1.rpmfusion.org/nonfree/el/rpmfusion-nonfree-release-${ELVER}.noarch.rpm || true); \
+                 (dnf makecache -y || true); \
+                 (dnf install -y ffmpeg ffmpeg-devel || true); \
+             fi \
+        && (dnf install -y --enablerepo=crb ninja-build || \
+            dnf install -y --enablerepo=crb ninja || \
+            dnf install -y ninja-build || \
+            dnf install -y ninja || true) \
+    && dnf clean all || true
+
+
+
+# Add NVIDIA Machine Learning repo for RHEL9/UBI9 and install NCCL runtime/devel
+# Needed for PyTorch nightly cu129 to avoid ncclCommWindowRegister symbol errors
+# Install NCCL runtime/devel from the CUDA repository available in the base image
+RUN set -euxo pipefail \
+    && dnf makecache -y \
+    && (dnf install -y libnccl libnccl-devel || dnf install -y libnccl-2 libnccl-devel-2) \
+    && dnf clean all
+
+# Set working directory and adjust ownership
+WORKDIR /workspace
+RUN chown -R vllmuser:vllmuser /workspace
+
+# Create build directories with proper permissions
+RUN mkdir -p /workspace/.deps && chown -R vllmuser:vllmuser /workspace/.deps && \
+    mkdir -p /tmp/vllm-build && chmod 777 /tmp/vllm-build && \
+    mkdir -p /opt/work && chmod 777 /opt/work && \
+    mkdir -p /home/vllmuser/.cache && chown -R vllmuser:vllmuser /home/vllmuser/.cache && \
+    mkdir -p /home/vllmuser/.ccache && chown -R vllmuser:vllmuser /home/vllmuser/.ccache && \
+    mkdir -p /home/vllmuser/.cmake && chown -R vllmuser:vllmuser /home/vllmuser/.cmake && \
+    chmod -R 755 /workspace && \
+    chmod -R 777 /tmp
+
+# Switch to the non-root user
+USER vllmuser
+
+# Create and activate virtual environment using the best available Python (3.12 preferred)
+ENV VIRTUAL_ENV=/home/vllmuser/venv
+RUN PY_BIN="$(command -v python3.12 || command -v python3)" && "$PY_BIN" -m venv $VIRTUAL_ENV
+ENV PATH="$VIRTUAL_ENV/bin:$PATH"
+
+# Set pip configuration
+ENV PIP_DISABLE_PIP_VERSION_CHECK=1
+ENV PIP_NO_CACHE_DIR=1
+ENV PYTHONUNBUFFERED=1
+ENV PIP_DEFAULT_TIMEOUT=120
+ENV PIP_RETRIES=5
+ENV PIP_PREFER_BINARY=1
+
+# CUDA arch list including legacy + latest so builds cover both older and newest GPUs.
+# Can be overridden at build time with: --build-arg TORCH_CUDA_ARCH_LIST="..."
+ARG TORCH_CUDA_ARCH_LIST="7.0 7.5 8.0 8.6 8.9 9.0 12.0 13.0"
+ENV TORCH_CUDA_ARCH_LIST="${TORCH_CUDA_ARCH_LIST}"
+
+# Upgrade pip and setuptools to latest versions
+RUN pip install --upgrade pip setuptools>=61 wheel
+
+COPY requirements/ /tmp/requirements/
+
+# Install PyTorch nightly first (includes latest GPU arch support such as Blackwell sm_120 when present)
+ARG TORCH_CUDA_INDEX
+RUN pip install --pre torch --index-url https://download.pytorch.org/whl/nightly/${TORCH_CUDA_INDEX}
+
+RUN pip install --pre torchvision --index-url https://download.pytorch.org/whl/nightly/${TORCH_CUDA_INDEX}
+RUN pip install --pre torchaudio --index-url https://download.pytorch.org/whl/nightly/${TORCH_CUDA_INDEX}
+
+# Install PyAV for torchvision video I/O (read_video) compatibility
+RUN pip install --upgrade av
+
+# Install TorchCodec to support torchaudio.load on recent nightlies
+RUN set -euxo pipefail \
+    && (pip install --pre torchcodec \
+        || pip install torchcodec \
+        || pip install --no-deps 'git+https://github.com/pytorch/torchcodec@main')
+
+# Install modern build tools and vLLM's build dependencies and CUDA deps early,
+# but sanitize requirements to avoid downgrading torch-family or forcing xformers pins.
+COPY pyproject.toml /tmp/pyproject.toml
+RUN set -euxo pipefail \
+        && cd /tmp \
+        && pip install "setuptools>=61" "setuptools-scm>=8" build wheel ninja cmake \
+        && mkdir -p /tmp/requirements_sanitized \
+        && for f in build.txt cuda.txt common.txt; do \
+                 if [ -f "/tmp/requirements/$f" ]; then \
+                     sed -E '/^(torch|torchvision|torchaudio|xformers)\b/Id' "/tmp/requirements/$f" > "/tmp/requirements_sanitized/$f"; \
+                 fi; \
+             done \
+    && pip install --pre \
+        -r /tmp/requirements_sanitized/build.txt \
+        -r /tmp/requirements_sanitized/cuda.txt \
+        -r /tmp/requirements_sanitized/common.txt \
+    && pip install --pre --upgrade \
+        torch --index-url https://download.pytorch.org/whl/nightly/${TORCH_CUDA_INDEX}
+
+# Install minimal development extras
+RUN pip install pytest pytest-asyncio ipython
+
+# Note: vLLM will be installed from source in development mode via dev-setup.sh
+# This ensures compatibility with the PyTorch nightly build
+
+# Create activation script for easy virtual environment access
+RUN echo '#!/bin/bash' > /home/vllmuser/activate_venv.sh && \
+    echo 'source /home/vllmuser/venv/bin/activate' >> /home/vllmuser/activate_venv.sh && \
+    echo 'echo "Virtual environment activated: $VIRTUAL_ENV"' >> /home/vllmuser/activate_venv.sh && \
+    echo 'echo "Python version: $(python --version)"' >> /home/vllmuser/activate_venv.sh && \
+    echo 'echo "PyTorch version: $(python -c \"import torch; print(torch.__version__)\")"' >> /home/vllmuser/activate_venv.sh && \
+    echo 'echo "CUDA available: $(python -c \"import torch; print(torch.cuda.is_available())\")"' >> /home/vllmuser/activate_venv.sh && \
+    chmod +x /home/vllmuser/activate_venv.sh
+
+# Ensure virtual environment is activated in .bashrc
+RUN echo 'source /home/vllmuser/venv/bin/activate' >> /home/vllmuser/.bashrc && \
+    echo 'echo "🐍 Python virtual environment activated"' >> /home/vllmuser/.bashrc && \
+    echo 'echo "🚀 Ready for vLLM development!"' >> /home/vllmuser/.bashrc
+
+# Create development helper script that uses current workspace requirements
+RUN echo '#!/bin/bash' > /home/vllmuser/setup_vllm_dev.sh && \
+    echo 'echo "🔧 Setting up vLLM for development..."' >> /home/vllmuser/setup_vllm_dev.sh && \
+    echo 'cd /workspace' >> /home/vllmuser/setup_vllm_dev.sh && \
+    echo '# Use temporary build directory to avoid permission issues' >> /home/vllmuser/setup_vllm_dev.sh && \
+    echo 'export TMPDIR=/tmp/vllm-build' >> /home/vllmuser/setup_vllm_dev.sh && \
+    echo 'mkdir -p "$TMPDIR" && chmod 777 "$TMPDIR"' >> /home/vllmuser/setup_vllm_dev.sh && \
+    echo 'export CMAKE_BUILD_PARALLEL_LEVEL=4' >> /home/vllmuser/setup_vllm_dev.sh && \
+    echo 'export VLLM_INSTALL_PUNICA_KERNELS=0' >> /home/vllmuser/setup_vllm_dev.sh && \
+    echo 'export MAX_JOBS=4' >> /home/vllmuser/setup_vllm_dev.sh && \
+    echo '# Install current workspace requirements first' >> /home/vllmuser/setup_vllm_dev.sh && \
+    echo 'if [ -f requirements/common.txt ]; then pip install -r requirements/common.txt; fi' >> /home/vllmuser/setup_vllm_dev.sh && \
+    echo '# Use temporary directory for CMake build files' >> /home/vllmuser/setup_vllm_dev.sh && \
+    echo 'FETCHCONTENT_BASE_DIR="$TMPDIR/deps" pip install -e . --no-deps --no-build-isolation --verbose' >> /home/vllmuser/setup_vllm_dev.sh && \
+    echo 'echo "✅ vLLM installed in editable mode!"' >> /home/vllmuser/setup_vllm_dev.sh && \
+    echo 'python -c "import vllm; print(\"vLLM version:\", vllm.__version__)"' >> /home/vllmuser/setup_vllm_dev.sh && \
+    chmod +x /home/vllmuser/setup_vllm_dev.sh
+
+# Provide a helper to apply repo patches against the mounted /workspace
+# Create under /usr/local/bin as root, then switch back to non-root user
+USER root
+RUN printf '%s\n' \
+    '#!/usr/bin/env bash' \
+    'set -euo pipefail' \
+    'cd /workspace 2>/dev/null || exit 0' \
+    'SCRIPT=./extras/patches/apply_patches.sh' \
+    'if [ -f "$SCRIPT" ]; then' \
+    '  echo "[apply-patches] Running $SCRIPT"' \
+    '  # Copy to temp and normalize EOL to avoid permission errors on mounted FS' \
+    '  TMP_SCRIPT=$(mktemp /tmp/apply_patches.XXXXXX.sh)' \
+    '  tr -d '\''\r'\'' < "$SCRIPT" > "$TMP_SCRIPT" || cp "$SCRIPT" "$TMP_SCRIPT"' \
+    '  chmod +x "$TMP_SCRIPT"' \
+    '  bash "$TMP_SCRIPT" || {' \
+    '    echo "[apply-patches] Warning: patch apply failed (continuing)" >&2; exit 0; }' \
+    'fi' \
+    > /usr/local/bin/apply-vllm-patches && \
+    chmod +x /usr/local/bin/apply-vllm-patches
+USER vllmuser
+
+# Add environment variables for better CUDA memory management and build optimization
+# Use the new variable name to avoid deprecation warnings.
+ENV PYTORCH_ALLOC_CONF=expandable_segments:True
+# Do not pin a single GPU here; let runtime inject device selection
+# ENV CUDA_VISIBLE_DEVICES=0
+ENV CMAKE_BUILD_PARALLEL_LEVEL=4
+ENV VLLM_INSTALL_PUNICA_KERNELS=0
+ENV MAX_JOBS=4
+
+# Enable ccache for faster rebuilds
+ENV CCACHE_DIR=/home/vllmuser/.ccache
+ENV CCACHE_MAXSIZE=10G
+ENV PATH=/usr/lib64/ccache:$PATH
+
+# (TORCH_CUDA_ARCH_LIST defined earlier)
+# Do not force-disable Machete; allow upstream defaults. User may still pass -e CMAKE_ARGS for custom CMake settings.
+ENV CMAKE_ARGS=""
+
+# WSL2-specific CUDA environment configuration
+ENV NVIDIA_VISIBLE_DEVICES=all
+ENV NVIDIA_DRIVER_CAPABILITIES=compute,utility
+ENV LD_LIBRARY_PATH=/usr/local/cuda/compat:/usr/lib/wsl/drivers:/usr/lib/wsl/lib:/usr/lib/x86_64-linux-gnu:/usr/local/cuda/lib64:/usr/local/cuda/lib:$LD_LIBRARY_PATH
+
+# Add runtime library detection script
+RUN echo '#!/bin/bash' > /home/vllmuser/check_cuda_libs.sh && \
+    echo 'echo "=== CUDA Library Check ==="' >> /home/vllmuser/check_cuda_libs.sh && \
+    echo 'echo "LD_LIBRARY_PATH: $LD_LIBRARY_PATH"' >> /home/vllmuser/check_cuda_libs.sh && \
+    echo 'echo "Searching for CUDA libraries..."' >> /home/vllmuser/check_cuda_libs.sh && \
+    echo 'find /usr/lib/wsl -name "libcuda.so*" 2>/dev/null | head -3 || echo "No WSL CUDA libs"' >> /home/vllmuser/check_cuda_libs.sh && \
+    echo 'ldconfig -p | grep cuda | head -3 || echo "No CUDA in ldconfig"' >> /home/vllmuser/check_cuda_libs.sh && \
+    echo 'echo "PyTorch CUDA status:"' >> /home/vllmuser/check_cuda_libs.sh && \
+    echo 'python -c "import torch; print(f\"CUDA available: {torch.cuda.is_available()}\"); print(f\"Device count: {torch.cuda.device_count()}\")" 2>/dev/null || echo "PyTorch not available"' >> /home/vllmuser/check_cuda_libs.sh && \
+    chmod +x /home/vllmuser/check_cuda_libs.sh
diff --git a/extras/README.md b/extras/README.md
new file mode 100644
index 000000000000..b8e8576084fa
--- /dev/null
+++ b/extras/README.md
@@ -0,0 +1,50 @@
+# extras/ overview
+
+This directory hosts all non-core assets: container/build tooling, configs, testing, storage helpers, and optional patches. The goals are clarity, single-responsibility, and easy extension without touching the vLLM core.
+
+Suggested layout (implemented here):
+
+- podman/ — Podman-specific build/launch wrappers and helpers
+- configs/ — Centralized, declarative versions and build configuration
+- secrets/ — Gitignored area for local tokens/config (not committed)
+- testing/ — Test/benchmark harness, matrices, and results
+- storage/ — External volumes and cache management helpers
+- patches/ — Optional patch/plug-in mechanism for controlled tweaks
+
+Primary entrypoint: use `extras/podman/` as the canonical way to build and run the dev container.
+
+Deprecation: the legacy launchers `extras/run-vllm-dev.sh` and `extras/run-vllm-dev.ps1` are deprecated and now forward to the Podman wrappers. Please switch to `extras/podman/run.sh` (Linux/macOS) or `extras/podman/run.ps1` (Windows).
+
+## Quick start
+
+- Edit `extras/configs/build.env` to set CUDA/UBI/Python defaults.
+- Use `extras/podman/build.sh` to build images with those defaults.
+- Use `extras/podman/run.ps1` (Windows) or `extras/podman/run.sh` (Linux/macOS) to run the dev container.
+
+Examples
+
+- Windows (PowerShell):
+    - Build image: `./extras/podman/run.ps1 -Build`
+    - GPU check: `./extras/podman/run.ps1 -GPUCheck`
+    - Setup build: `./extras/podman/run.ps1 -Setup -WorkVolume vllm-work -Progress`
+
+- Linux/macOS (bash):
+    - Build image: `extras/podman/run.sh --build`
+    - GPU check: `extras/podman/run.sh --gpu-check`
+    - Setup build: `extras/podman/run.sh --setup --work-volume vllm-work --progress`
+
+## Secrets
+
+Place tokens in `extras/secrets/` per its README and never commit them. Load them in session or bind-mount into containers.
+
+## Testing
+
+See `extras/testing/README.md` for defining a matrix, recording results, and comparing runs.
+
+## Storage
+
+See `extras/storage/README.md` for model/cache volume guidance for performance and reproducibility.
+
+## Patches
+
+If you need to tweak upstream vLLM without forking, use `extras/patches/` to stage diffs and apply them during build.
diff --git a/extras/configs/README.md b/extras/configs/README.md
new file mode 100644
index 000000000000..98ef0f02f786
--- /dev/null
+++ b/extras/configs/README.md
@@ -0,0 +1,9 @@
+# configs README
+
+This folder centralizes editable configuration for images/builds:
+
+- build.env: Bash-exported defaults (CUDA/UBI/Python/vLLM tag, arch list, volumes)
+- build.yaml (optional): YAML equivalent for tools that prefer structured configs
+- versions.json (optional): Machine-friendly manifest for automation
+
+Consumers (scripts/Containerfiles) should read values from here and allow runtime overrides via environment variables.
diff --git a/extras/configs/build.env b/extras/configs/build.env
new file mode 100644
index 000000000000..42e5f71b11c6
--- /dev/null
+++ b/extras/configs/build.env
@@ -0,0 +1,35 @@
+# Build configuration
+#
+# Scripts should source this file to obtain default versions.
+# Values can be overridden by environment variables provided at runtime.
+
+# CUDA / UBI / Python baselines
+export CUDA_VERSION=${CUDA_VERSION:-13.0.0}
+export UBI_VERSION=${UBI_VERSION:-9}
+export PYTHON_VERSION=${PYTHON_VERSION:-3.12}
+export BASE_FLAVOR=${BASE_FLAVOR:-rockylinux9}
+
+# vLLM branch/tag to use inside the container when cloning or referring
+export VLLM_TAG=${VLLM_TAG:-main}
+
+# Architectures (space separated) for PyTorch/NVCC
+# Include Blackwell sm_120 via TORCH_CUDA_ARCH_LIST="7.0 7.5 8.0 8.6 8.9 9.0 12.0"
+export TORCH_CUDA_ARCH_LIST=${TORCH_CUDA_ARCH_LIST:-"7.0 7.5 8.0 8.6 8.9 9.0 12.0 13.0"}
+
+# Named volume for build scratch/work dir (Podman recommended)
+export VLLM_WORK_VOLUME=${VLLM_WORK_VOLUME:-vllm-work}
+export VLLM_WORK_DIR_CONTAINER=${VLLM_WORK_DIR_CONTAINER:-/opt/work}
+
+# Image naming
+export VLLM_BASE_IMAGE=${VLLM_BASE_IMAGE:-"nvidia/cuda:${CUDA_VERSION}-cudnn-devel-${BASE_FLAVOR}"}
+export VLLM_IMAGE_TAG=${VLLM_IMAGE_TAG:-"vllm-cuda${CUDA_VERSION}-ubi${UBI_VERSION}"}
+
+# Torch family components: wheels only (nightly index). No source build fallbacks.
+export INSTALL_TORCHVISION=${INSTALL_TORCHVISION:-1}
+export INSTALL_TORCHAUDIO=${INSTALL_TORCHAUDIO:-1}
+
+# FFMPEG optional enforcement for torchaudio features
+# Set to 1 to enable RPM Fusion repos and install ffmpeg/ffmpeg-devel; build will fail if unavailable.
+# Set to 0 to attempt best-effort install and fallback to building torchaudio without FFMPEG when headers are missing.
+#export REQUIRE_FFMPEG=${REQUIRE_FFMPEG:-0}
+export REQUIRE_FFMPEG=${REQUIRE_FFMPEG:-1}
\ No newline at end of file
diff --git a/extras/configs/build.yaml b/extras/configs/build.yaml
new file mode 100644
index 000000000000..277737dd92df
--- /dev/null
+++ b/extras/configs/build.yaml
@@ -0,0 +1,11 @@
+cuda:
+  version: "12.9.1"
+  tag: "latest"
+ubi:
+  version: "9.4"
+  tag: "latest"
+python:
+  version: "3.11"
+  tag: "latest"
+vllm:
+  tag: main
diff --git a/extras/patches/0001-cumem-alloc-env-fallback.diff b/extras/patches/0001-cumem-alloc-env-fallback.diff
new file mode 100644
index 000000000000..c2a322024961
--- /dev/null
+++ b/extras/patches/0001-cumem-alloc-env-fallback.diff
@@ -0,0 +1,14 @@
+diff --git a/vllm/device_allocator/cumem.py b/vllm/device_allocator/cumem.py
+--- a/vllm/device_allocator/cumem.py
++++ b/vllm/device_allocator/cumem.py
+@@ -140,7 +140,9 @@ class CuMemAllocator:
+         return CuMemAllocator.instance
+ 
+     def __init__(self):
+-        conf = os.environ.get("PYTORCH_CUDA_ALLOC_CONF", "")
++        # Prefer new env var; fall back to deprecated one for compatibility
++        conf = os.environ.get("PYTORCH_ALLOC_CONF",
++                              os.environ.get("PYTORCH_CUDA_ALLOC_CONF", ""))
+         assert "expandable_segments:True" not in conf, \
+             ("Expandable segments are not compatible with memory pool. "
+             "Please track https://github.com/pytorch/pytorch/issues/147851 "
diff --git a/extras/patches/README.md b/extras/patches/README.md
new file mode 100644
index 000000000000..ff4f662c4588
--- /dev/null
+++ b/extras/patches/README.md
@@ -0,0 +1,5 @@
+# Patches and plugins scaffolding
+
+- Place unified diffs (*.diff) here.
+- Use `apply_patches.sh` to apply them before building.
+- Optionally, add Python plugins under `plugin/` and load dynamically at runtime.
diff --git a/extras/patches/apply_patches.sh b/extras/patches/apply_patches.sh
new file mode 100644
index 000000000000..2c4ca43d45c8
--- /dev/null
+++ b/extras/patches/apply_patches.sh
@@ -0,0 +1,84 @@
+#!/usr/bin/env bash
+set -euo pipefail
+
+# If CRLF detected, re-exec a normalized temp copy to avoid editing mounted files
+if grep -q $'\r' "$0" 2>/dev/null; then
+  TMP_SELF=$(mktemp /tmp/apply_patches_self.XXXXXX.sh)
+  tr -d '\r' < "$0" > "$TMP_SELF" || cp "$0" "$TMP_SELF"
+  chmod +x "$TMP_SELF"
+  exec "$TMP_SELF" "$@"
+fi
+
+PATCH_DIR=$(cd -- "$(dirname -- "${BASH_SOURCE[0]}")" &>/dev/null && pwd)
+ROOT_DIR=$(cd -- "${PATCH_DIR}/../.." &>/dev/null && pwd)
+
+shopt -s nullglob
+PATCHES=(${PATCH_DIR}/*.diff)
+shopt -u nullglob
+
+if [ ${#PATCHES[@]} -eq 0 ]; then
+  echo "[patches] No patches found; nothing to apply."
+  exit 0
+fi
+
+pushd "${ROOT_DIR}" >/dev/null
+for p in "${PATCHES[@]}"; do
+  echo "[patches] Applying ${p}"
+  # Validate patch looks like a git-format patch and normalize EOL to temp file
+  if ! head -n 1 "$p" | grep -q "^From "; then
+    echo "[patches] Warning: ${p} is not a git-format patch; trying anyway" >&2
+  fi
+  TMP_PATCH=$(mktemp /tmp/patch.XXXXXX.diff)
+  tr -d '\r' < "$p" > "$TMP_PATCH" || cp "$p" "$TMP_PATCH"
+  if ! git apply --check "$TMP_PATCH" 2>/dev/null; then
+    echo "[patches] Check failed for ${p}"
+    # Fallback: targeted edit for cumem allocator env var change
+    case "$(basename "$p")" in
+      0001-cumem-alloc-env-fallback.diff)
+        echo "[patches] Attempting fallback edit for cumem allocator"
+        python - <<'PY'
+import io, os, sys
+PATH = os.path.join('vllm','device_allocator','cumem.py')
+try:
+    with io.open(PATH, 'r', encoding='utf-8', newline='') as f:
+        src = f.read()
+except FileNotFoundError:
+    sys.exit(1)
+
+target = 'conf = os.environ.get("PYTORCH_CUDA_ALLOC_CONF", "")'
+if 'PYTORCH_ALLOC_CONF' in src:
+    print('[patches] cumem already uses PYTORCH_ALLOC_CONF; skipping')
+    sys.exit(0)
+
+if target in src:
+    indent = ' ' * (len(src.split(target)[0].split('\n')[-1]) - len(src.split(target)[0].split('\n')[-1].lstrip(' ')))
+    replacement = (
+        f"{indent}# Prefer new env var; fall back to deprecated one for compatibility\n"
+        f"{indent}conf = os.environ.get(\"PYTORCH_ALLOC_CONF\",\n"
+        f"{indent}                              os.environ.get(\"PYTORCH_CUDA_ALLOC_CONF\", \"\"))"
+    )
+    new_src = src.replace(target, replacement)
+    with io.open(PATH, 'w', encoding='utf-8', newline='\n') as f:
+        f.write(new_src)
+    print('[patches] Applied cumem allocator fallback edit')
+    sys.exit(0)
+else:
+    print('[patches] Could not find target line in cumem.py; no changes made')
+    sys.exit(1)
+PY
+        status=$?
+        if [ $status -ne 0 ]; then
+          echo "[patches] Fallback edit failed" >&2; exit 1
+        fi
+        ;;
+      *)
+        exit 1
+        ;;
+    esac
+  else
+    git apply "$TMP_PATCH"
+  fi
+done
+popd >/dev/null
+
+echo "[patches] Done."
diff --git a/extras/podman/Containerfile b/extras/podman/Containerfile
new file mode 100644
index 000000000000..d42bef4b344e
--- /dev/null
+++ b/extras/podman/Containerfile
@@ -0,0 +1,11 @@
+# syntax=docker/dockerfile:1.7-labs
+
+# Delegator Containerfile.
+# Build using the canonical Dockerfile in extras/ to avoid duplication.
+
+FROM scratch as noop
+
+# Usage:
+#   podman build -f extras/Dockerfile -t vllm-dev:latest .
+# or from this folder (wrapper script does this for you):
+#   bash build.sh
diff --git a/extras/podman/README.md b/extras/podman/README.md
new file mode 100644
index 000000000000..fb0c361203f2
--- /dev/null
+++ b/extras/podman/README.md
@@ -0,0 +1,12 @@
+# Podman helpers for vLLM
+
+This folder contains Podman-specific wrappers. They preserve back-compat by calling the existing scripts in `extras/` when present.
+
+- Containerfile: Thin wrapper that defers to `extras/Dockerfile` by default.
+- build.sh: Builds the image using values from `../configs/build.env`.
+- entrypoint/: Optional entrypoint scripts used inside containers.
+- scripts/: Utility helpers for Podman machine/GPU/volumes.
+
+See README for usage.
+
+Documentation: see `docs/contributing/podman-dev.md` for the Podman-first workflow and deprecation notes for legacy launchers.
diff --git a/extras/podman/build.sh b/extras/podman/build.sh
new file mode 100644
index 000000000000..a4ec5f445825
--- /dev/null
+++ b/extras/podman/build.sh
@@ -0,0 +1,32 @@
+#!/usr/bin/env bash
+set -euo pipefail
+
+# Why: Back-compat wrapper that sources central config and builds using the canonical Dockerfile.
+
+SCRIPT_DIR=$(cd -- "$(dirname -- "${BASH_SOURCE[0]}")" &>/dev/null && pwd)
+ROOT_DIR=$(cd -- "${SCRIPT_DIR}/../.." &>/dev/null && pwd)
+CONFIG_DIR="${SCRIPT_DIR}/../configs"
+
+# shellcheck source=../configs/build.env
+if [ -f "${CONFIG_DIR}/build.env" ]; then
+  # shellcheck disable=SC1091
+  source "${CONFIG_DIR}/build.env"
+fi
+
+CUDA_VERSION=${CUDA_VERSION:-12.9.1}
+UBI_VERSION=${UBI_VERSION:-9}
+VLLM_IMAGE_TAG=${VLLM_IMAGE_TAG:-"vllm-cuda${CUDA_VERSION}-ubi${UBI_VERSION}"}
+
+CONTEXT="${ROOT_DIR}"
+DOCKERFILE_REL="extras/Dockerfile"
+
+echo "[podman/build] Building image ${VLLM_IMAGE_TAG} with CUDA=${CUDA_VERSION}, UBI=${UBI_VERSION}"
+
+podman build \
+  --build-arg CUDA_VERSION="${CUDA_VERSION}" \
+  --build-arg UBI_VERSION="${UBI_VERSION}" \
+  -t "${VLLM_IMAGE_TAG}" \
+  -f "${DOCKERFILE_REL}" \
+  "${CONTEXT}"
+
+echo "[podman/build] Done -> ${VLLM_IMAGE_TAG}"
diff --git a/extras/podman/dev-setup.sh b/extras/podman/dev-setup.sh
new file mode 100644
index 000000000000..153d03b90710
--- /dev/null
+++ b/extras/podman/dev-setup.sh
@@ -0,0 +1,29 @@
+#!/usr/bin/env bash
+# Robust setup entrypoint: prefer extras/dev-setup.sh,
+# otherwise use the image-provided /home/vllmuser/setup_vllm_dev.sh.
+set -euo pipefail
+
+SCRIPT_DIR=$(cd -- "$(dirname -- "${BASH_SOURCE[0]}" )" &>/dev/null && pwd)
+EXTRAS_DIR=$(cd -- "${SCRIPT_DIR}/.." &>/dev/null && pwd)
+
+try_exec() {
+	local target="$1"
+	if [[ -f "$target" ]]; then
+		chmod +x "$target" 2>/dev/null || true
+		exec "$target" "$@"
+	fi
+}
+
+# 1) Current canonical path
+if [[ -f "${EXTRAS_DIR}/dev-setup.sh" ]]; then
+	chmod +x "${EXTRAS_DIR}/dev-setup.sh" 2>/dev/null || true
+	exec "${EXTRAS_DIR}/dev-setup.sh" "$@"
+fi
+
+# 3) Fallback to image helper
+if command -v /home/vllmuser/setup_vllm_dev.sh >/dev/null 2>&1 || [[ -f /home/vllmuser/setup_vllm_dev.sh ]]; then
+	exec /home/vllmuser/setup_vllm_dev.sh "$@"
+fi
+
+echo "[setup] No setup script found at extras/dev-setup.sh, and no image helper present." >&2
+exit 1
diff --git a/extras/podman/entrypoint/apply-patches-then-exec.sh b/extras/podman/entrypoint/apply-patches-then-exec.sh
new file mode 100644
index 000000000000..30196ad5e695
--- /dev/null
+++ b/extras/podman/entrypoint/apply-patches-then-exec.sh
@@ -0,0 +1,9 @@
+#!/usr/bin/env bash
+set -euo pipefail
+
+# Apply repo patches if available; best-effort, normalization handled inside helper.
+if command -v apply-vllm-patches >/dev/null 2>&1; then
+  apply-vllm-patches || true
+fi
+
+exec "$@"
diff --git a/extras/podman/run.ps1 b/extras/podman/run.ps1
new file mode 100644
index 000000000000..7aee56fe9bc4
--- /dev/null
+++ b/extras/podman/run.ps1
@@ -0,0 +1,257 @@
+#!/usr/bin/env pwsh
+[CmdletBinding()] param(
+	[switch]$Build,
+	[switch]$Interactive,
+	[string]$Command = "",
+	[switch]$Setup,
+	[switch]$GPUCheck,
+	[switch]$Mirror,
+	[switch]$Recreate,
+	[string]$WorkVolume = "",
+	[string]$WorkDirHost = "",
+	[switch]$Progress,
+	[switch]$NoCache,
+	[switch]$Pull,
+	[switch]$Help
+)
+
+if ($Help) {
+	Write-Host "Usage: extras/podman/run.ps1 [options]"
+	Write-Host "  -Build                Build the dev image (reads extras/configs/build.env)"
+	Write-Host "  -Interactive          Start an interactive shell"
+	Write-Host "  -Command <cmd>        Run a command inside the dev container"
+	Write-Host "  -Setup                Run project setup inside the container"
+	Write-Host "  -GPUCheck             Run a CUDA/Torch sanity check"
+	Write-Host "  -Mirror               Use local mirror registries if configured"
+	Write-Host "  -Recreate             Recreate the container if running"
+	Write-Host "  -WorkVolume <name>    Named volume to mount at /opt/work"
+	Write-Host "  -WorkDirHost <path>   Host dir to mount at /opt/work"
+	Write-Host "  -Progress             Show progress bars in setup"
+	Write-Host "  -NoCache              Build image without using cache"
+	Write-Host "  -Pull                 Always attempt to pull newer base image"
+	return
+}
+
+if (-not $Interactive -and [string]::IsNullOrEmpty($Command) -and -not $GPUCheck -and -not $Setup) { $Interactive = $true }
+
+if (-not (Get-Command podman -ErrorAction SilentlyContinue)) { Write-Host "❌ Podman not found in PATH" -ForegroundColor Red; exit 1 }
+
+$ContainerName = "vllm-dev"
+$ImageTag = "vllm-dev:latest"
+$SourceDir = (Get-Location).Path
+
+Write-Host "🐋 vLLM Dev Container (Podman)" -ForegroundColor Green
+
+if ($Build) {
+	Write-Host "🔨 Building image (honoring extras/configs/build.env)..." -ForegroundColor Yellow
+	$configPath = Join-Path $SourceDir "extras/configs/build.env"
+	$dockerfilePath = Join-Path $SourceDir "extras/Dockerfile"
+	$cudaVer = $null
+	$baseFlavor = $null
+	$archList = $null
+	$requireFfmpegArg = '1'
+	$tvRef = $null
+	$taRef = $null
+	function Get-DockerArgDefault([string]$name, [string]$fallback) {
+		if (Test-Path $dockerfilePath) {
+			$df = Get-Content -Raw -Path $dockerfilePath
+			$m = [regex]::Match($df, "(?m)^\s*ARG\s+${name}\s*=\s*([^\r\n]+)")
+			if ($m.Success) {
+				return $m.Groups[1].Value.Trim()
+			}
+		}
+		return $fallback
+	}
+	if (Test-Path $configPath) {
+		$cfg = Get-Content -Raw -Path $configPath
+		function Get-EnvDefault([string]$name, [string]$fallback) {
+			# Match a line like: export NAME=VALUE
+			$line = [regex]::Match($cfg, "(?m)^\s*export\s+${name}\s*=\s*([^\r\n]+)")
+			if (-not $line.Success) { return $fallback }
+			$val = $line.Groups[1].Value.Trim()
+			# Strip wrapping quotes if present
+			if (($val.StartsWith('"') -and $val.EndsWith('"')) -or ($val.StartsWith("'") -and $val.EndsWith("'"))) { $val = $val.Substring(1, $val.Length-2) }
+			# If value is Bash-style ${NAME:-default}, extract default
+			if ($val.StartsWith('${') -and $val.Contains(':-')) {
+				$idx = $val.IndexOf(':-'); $end = $val.IndexOf('}', $idx)
+				if ($idx -ge 0 -and $end -gt $idx) {
+					$def = $val.Substring($idx+2, $end-($idx+2)).Trim()
+					if (($def.StartsWith('"') -and $def.EndsWith('"')) -or ($def.StartsWith("'") -and $def.EndsWith("'"))) { $def = $def.Substring(1, $def.Length-2) }
+					return $def
+				}
+			}
+			return $val
+		}
+		$cudaVer = Get-EnvDefault -name 'CUDA_VERSION' -fallback (Get-DockerArgDefault 'CUDA_VERSION' '13.0.0')
+		$baseFlavor = Get-EnvDefault -name 'BASE_FLAVOR' -fallback (Get-DockerArgDefault 'BASE_FLAVOR' 'rockylinux9')
+		$archList = Get-EnvDefault -name 'TORCH_CUDA_ARCH_LIST' -fallback (Get-DockerArgDefault 'TORCH_CUDA_ARCH_LIST' '7.0 7.5 8.0 8.6 8.9 9.0 12.0 13.0')
+	# No longer used: wheels-only installs for torchvision/torchaudio
+		$requireFfmpeg = Get-EnvDefault -name 'REQUIRE_FFMPEG' -fallback (Get-DockerArgDefault 'REQUIRE_FFMPEG' '1')
+		if ($requireFfmpeg -match '^[01]$') { $requireFfmpegArg = $requireFfmpeg } else { $requireFfmpegArg = '1' }
+	}
+	# Derive PyTorch nightly index from CUDA version (e.g., 13.0 -> cu130, 12.9 -> cu129)
+	$torchCudaIndex = if ($cudaVer -match '^13\.') { 'cu130' } elseif ($cudaVer -match '^12\.9') { 'cu129' } else {
+		$parts = $cudaVer.Split('.')
+		if ($parts.Length -ge 2) { 'cu' + $parts[0] + $parts[1] + '0' } else { 'cu129' }
+	}
+	Write-Host ("Config: CUDA={0} BASE_FLAVOR={1} TORCH_CUDA_INDEX={2} ARCH_LIST=({3})" -f $cudaVer,$baseFlavor,$torchCudaIndex,$archList) -ForegroundColor DarkGray
+	$buildCmd = @("build","-f","extras/Dockerfile",
+		"--build-arg","CUDA_VERSION=$cudaVer",
+		"--build-arg","BASE_FLAVOR=$baseFlavor",
+		"--build-arg","TORCH_CUDA_INDEX=$torchCudaIndex",
+		"--build-arg","TORCH_CUDA_ARCH_LIST=$archList",
+	"--build-arg","REQUIRE_FFMPEG=$requireFfmpegArg",
+		"-t",$ImageTag,".")
+	# Use cache by default; add --no-cache only when requested
+	if ($NoCache) { $buildCmd = @($buildCmd[0],"--no-cache") + $buildCmd[1..($buildCmd.Length-1)] }
+	if ($Pull) { $buildCmd = @($buildCmd[0],"--pull=always") + $buildCmd[1..($buildCmd.Length-1)] }
+	& podman @buildCmd
+	if ($LASTEXITCODE -ne 0) { Write-Host "❌ Build failed" -ForegroundColor Red; exit 1 }
+	Write-Host "✅ Build ok" -ForegroundColor Green
+}
+
+# Already running?
+$running = podman ps --filter "name=$ContainerName" --format "{{.Names}}" 2>$null
+
+if ($Recreate -and $running -eq $ContainerName) {
+	Write-Host "♻️  Removing existing container '$ContainerName'" -ForegroundColor Yellow
+	podman rm -f $ContainerName | Out-Null
+	$running = $null
+}
+
+if ($running -eq $ContainerName) {
+	if ($GPUCheck) {
+		Write-Host "🔍 GPU check (existing container)" -ForegroundColor Yellow
+		$cmd = @'
+source /home/vllmuser/venv/bin/activate && python - <<'PY'
+import torch, os
+print("PyTorch:", getattr(torch,"__version__","n/a"))
+print("CUDA:", torch.cuda.is_available())
+print("Devices:", torch.cuda.device_count() if torch.cuda.is_available() else 0)
+print("LD_LIBRARY_PATH:", os.environ.get("LD_LIBRARY_PATH"))
+if torch.cuda.is_available():
+		try:
+				print("GPU 0:", torch.cuda.get_device_name(0))
+		except Exception as e:
+				print("GPU name error:", e)
+PY
+nvidia-smi || true
+'@
+		$cmd = "export NVIDIA_VISIBLE_DEVICES=all; " + $cmd
+		podman exec $ContainerName bash -lc $cmd
+		exit $LASTEXITCODE
+	}
+	if ($Setup) {
+		Write-Host "🔧 Running dev setup in existing container" -ForegroundColor Yellow
+		$envs = @()
+		if ($Mirror) { $envs += @('LOCAL_MIRROR=1') }
+		if ($Progress) { $envs += @('PROGRESS_WATCH=1') }
+		$envs += @('NVIDIA_VISIBLE_DEVICES=all')
+		$envStr = ($envs | ForEach-Object { "export $_;" }) -join ' '
+		$cmd = "$envStr chmod +x ./extras/dev-setup.sh 2>/dev/null || true; ./extras/dev-setup.sh"
+		if ($Progress) { podman exec -it $ContainerName bash -lc $cmd } else { podman exec $ContainerName bash -lc $cmd }
+		exit $LASTEXITCODE
+	}
+	if ($Command) {
+		Write-Host "🚀 Running command in existing container" -ForegroundColor Green
+		$runCmd = "source /home/vllmuser/venv/bin/activate && $Command"
+		podman exec $ContainerName bash -c $runCmd
+		exit $LASTEXITCODE
+	}
+	$resp = Read-Host "Attach to running container? [Y/n]"
+	if ($resp -eq "" -or $resp -match '^[Yy]$') { podman exec -it $ContainerName bash; exit $LASTEXITCODE } else { exit 0 }
+}
+
+# Ensure image exists
+podman image exists $ImageTag
+if ($LASTEXITCODE -ne 0) { Write-Host "❌ Image missing. Use -Build." -ForegroundColor Red; exit 1 }
+
+# Base args (no default /tmp tmpfs; can be enabled via VLLM_TMPFS_TMP_SIZE)
+$runArgs = @("run","--rm","--security-opt=label=disable","--shm-size","8g","-v","${SourceDir}:/workspace:Z")
+if (-not [string]::IsNullOrWhiteSpace($WorkVolume)) { $runArgs += @('-v',"${WorkVolume}:/opt/work:Z") }
+elseif ($WorkDirHost -and (Test-Path $WorkDirHost)) { $runArgs += @('-v',"${WorkDirHost}:/opt/work:Z") }
+$runArgs += @('-w','/workspace','--name',"$ContainerName",'--user','vllmuser','--env','ENGINE=podman')
+# Use a tiny entrypoint to apply patches before executing the requested command
+$runArgs += @('--entrypoint','/workspace/extras/podman/entrypoint/apply-patches-then-exec.sh')
+
+$tmpfsSize = [Environment]::GetEnvironmentVariable('VLLM_TMPFS_TMP_SIZE')
+if (-not [string]::IsNullOrEmpty($tmpfsSize) -and $tmpfsSize -ne '0') { $runArgs += @('--tmpfs',"/tmp:size=$tmpfsSize") }
+
+if ($true) { # Request GPU via CDI hooks
+	$runArgs = @("run","--rm","--security-opt=label=disable","--device=nvidia.com/gpu=all") + $runArgs[2..($runArgs.Length-1)]
+}
+
+# WSL GPU: map /dev/dxg and mount WSL libs
+$runArgs += @('--device','/dev/dxg','-v','/usr/lib/wsl:/usr/lib/wsl:ro')
+if ($Mirror) { $runArgs += @('--env','LOCAL_MIRROR=1') }
+foreach ($ev in 'NVIDIA_VISIBLE_DEVICES','NVIDIA_DRIVER_CAPABILITIES','NVIDIA_REQUIRE_CUDA') {
+	$val = [Environment]::GetEnvironmentVariable($ev)
+	if ($val) { $runArgs += @('--env',"$ev=$val") }
+}
+$runArgs += @('--env','ENGINE=podman','--env','NVIDIA_VISIBLE_DEVICES=all','--env','NVIDIA_DRIVER_CAPABILITIES=compute,utility','--env','NVIDIA_REQUIRE_CUDA=')
+
+if ($GPUCheck) {
+	$pyDiag = @'
+import json, torch, os
+out = {
+		"torch_version": getattr(torch, "__version__", "n/a"),
+		"torch_cuda_version": getattr(getattr(torch, "version", None), "cuda", "n/a"),
+		"cuda_available": torch.cuda.is_available(),
+		"ld_library_path": os.environ.get("LD_LIBRARY_PATH"),
+}
+try:
+		out["device_count"] = torch.cuda.device_count()
+except Exception as e:
+		out["device_count_error"] = str(e)
+if out["cuda_available"] and out.get("device_count", 0) > 0:
+		try:
+				cap = torch.cuda.get_device_capability(0)
+				out["device_0"] = {"name": torch.cuda.get_device_name(0), "capability": f"sm_{cap[0]}{cap[1]}"}
+		except Exception as e:
+				out["device_0_error"] = str(e)
+else:
+		out["diagnostics"] = ["Missing /dev/nvidia* or podman machine without GPU passthrough"]
+print(json.dumps(out, indent=2))
+'@
+	$pyB64 = [Convert]::ToBase64String([Text.Encoding]::UTF8.GetBytes($pyDiag))
+	$gpuScript = @'
+echo '=== GPU Check ==='
+which nvidia-smi && nvidia-smi || echo 'nvidia-smi unavailable'
+echo '--- /dev/nvidia* ---'
+ls -l /dev/nvidia* 2>/dev/null || echo 'no /dev/nvidia* nodes'
+echo '--- Environment (NVIDIA_*) ---'
+env | grep -E '^NVIDIA_' || echo 'no NVIDIA_* env vars'
+if [ "$NVIDIA_VISIBLE_DEVICES" = "void" ]; then echo 'WARN: NVIDIA_VISIBLE_DEVICES=void (no GPU mapped)'; fi
+echo '--- LD_LIBRARY_PATH ---'
+echo "LD_LIBRARY_PATH=$LD_LIBRARY_PATH"
+source /home/vllmuser/venv/bin/activate 2>/dev/null || true
+echo __PY_B64__ | base64 -d > /tmp/gpucheck.py
+python /tmp/gpucheck.py || true
+rm -f /tmp/gpucheck.py
+'@
+	$gpuScript = "export NVIDIA_VISIBLE_DEVICES=all; export LD_LIBRARY_PATH=/usr/lib/wsl/lib:/usr/lib/wsl/drivers:`$LD_LIBRARY_PATH; " + ($gpuScript -replace '__PY_B64__', $pyB64) -replace "`r",""
+	$runArgs += @('--user','root', $ImageTag,'bash','-lc',$gpuScript)
+} elseif ($Setup) {
+	# Use robust setup entrypoint that finds the right script (extras/dev-setup.sh or image helper)
+	$prefix = 'for f in ./extras/dev-setup.sh ./extras/podman/dev-setup.sh; do if [ -f "$f" ]; then sed -i "s/\r$//" "$f" || true; fi; done; chmod +x ./extras/podman/dev-setup.sh 2>/dev/null || true; apply-vllm-patches || true; '
+	$envPrefix = ''
+	if ($Mirror) { $envPrefix += 'export LOCAL_MIRROR=1; ' }
+	if ($Progress) { $envPrefix += 'export PROGRESS_WATCH=1; ' }
+	$envPrefix += 'export TMPDIR=/opt/work/tmp; export TMP=/opt/work/tmp; export TEMP=/opt/work/tmp; mkdir -p /opt/work/tmp; '
+		$setupCmd = $prefix + $envPrefix + "./extras/podman/dev-setup.sh"
+	if ($Progress) { $runArgs += @('-it', $ImageTag, 'bash','-lc', $setupCmd) } else { $runArgs += @($ImageTag, 'bash','-lc', $setupCmd) }
+	Write-Host "🔧 Running dev setup" -ForegroundColor Green
+} elseif ($Interactive -and -not $Command) {
+	$runArgs += @('-it',$ImageTag,'bash')
+	Write-Host "🚀 Interactive shell" -ForegroundColor Green
+} elseif ($Command) {
+	$runArgs += @($ImageTag,'bash','-lc',"export LD_LIBRARY_PATH=/usr/lib/wsl/lib:/usr/lib/wsl/drivers:`$LD_LIBRARY_PATH; source /home/vllmuser/venv/bin/activate && $Command")
+	Write-Host "🚀 Running command" -ForegroundColor Green
+} else {
+	$runArgs += @($ImageTag)
+}
+
+Write-Host "Command: podman $($runArgs -join ' ')" -ForegroundColor Gray
+& podman @runArgs
+
+if ($LASTEXITCODE -eq 0 -and $Interactive) { Write-Host "Exited cleanly" -ForegroundColor Green }
diff --git a/extras/podman/run.sh b/extras/podman/run.sh
new file mode 100644
index 000000000000..ddafbcc578d0
--- /dev/null
+++ b/extras/podman/run.sh
@@ -0,0 +1,158 @@
+#!/usr/bin/env bash
+# Unified lightweight vLLM dev container launcher (Podman-first, Linux/macOS)
+set -euo pipefail
+
+IMAGE_TAG="vllm-dev:latest"
+CONTAINER_NAME="vllm-dev"
+SOURCE_DIR="$(pwd)"
+
+show_help() {
+	cat <<EOF
+Usage: ./extras/podman/run.sh [options]
+
+Options:
+	-b, --build        Build (or rebuild) the image first
+	-c, --command CMD  Run CMD inside container then exit
+	-g, --gpu-check    Run lightweight GPU diagnostics inside container
+	-s, --setup        Run ./extras/dev-setup.sh inside container
+	-p, --progress     Enable in-place progress display during setup
+	-m, --mirror       Copy sources into container (LOCAL_MIRROR=1) for faster build on slow mounts
+	--work-volume NAME Mount named volume NAME at /opt/work (preferred for large builds)
+	-n, --name NAME    Override container name (default: ${CONTAINER_NAME})
+	-h, --help         Show this help and exit
+
+Interactive shell is default if no command/gpu-check specified.
+Examples:
+	extras/podman/run.sh -b
+	extras/podman/run.sh -c "python -c 'import torch;print(torch.cuda.is_available())'"
+	extras/podman/run.sh -g
+EOF
+}
+
+BUILD=0
+GPU_CHECK=0
+SETUP=0
+CMD=""
+MIRROR=0
+PROGRESS=0
+WORK_VOLUME=""
+
+while [[ $# -gt 0 ]]; do
+	case "$1" in
+		-b|--build) BUILD=1; shift ;;
+		-c|--command) CMD="${2:-}"; shift 2 ;;
+		-g|--gpu-check) GPU_CHECK=1; shift ;;
+		-s|--setup) SETUP=1; shift ;;
+		-h|--help) show_help; exit 0 ;;
+		-m|--mirror) MIRROR=1; shift ;;
+		--work-volume) WORK_VOLUME="${2:-}"; shift 2 ;;
+		-n|--name) CONTAINER_NAME="${2:-}"; shift 2 ;;
+		-p|--progress) PROGRESS=1; shift ;;
+		*) echo "Unknown option: $1" >&2; show_help; exit 1 ;;
+	esac
+done
+
+if ! command -v podman >/dev/null 2>&1; then
+	echo "Error: podman not found in PATH" >&2
+	exit 1
+fi
+
+echo "[vLLM] Engine: podman  Image: $IMAGE_TAG  Container: $CONTAINER_NAME"
+
+if [[ $BUILD -eq 1 ]]; then
+	echo "[vLLM] Building image..."
+	if ! podman build -f extras/Dockerfile -t "$IMAGE_TAG" .; then
+		echo "[vLLM] Build failed" >&2
+		exit 1
+	fi
+	echo "[vLLM] Build complete"
+fi
+
+# If container running, attach / exec
+RUNNING=$(podman ps --filter "name=${CONTAINER_NAME}" --format '{{.Names}}' 2>/dev/null || true)
+
+if [[ "$RUNNING" == "$CONTAINER_NAME" ]]; then
+	if [[ $GPU_CHECK -eq 1 ]]; then
+		echo "[vLLM] GPU check (existing container)"
+		exec podman exec "$CONTAINER_NAME" bash -lc 'source /home/vllmuser/venv/bin/activate 2>/dev/null || true; which nvidia-smi && nvidia-smi || true; python - <<PY
+import torch, os
+print("PyTorch:", getattr(torch, "__version__", "n/a"))
+print("CUDA available:", torch.cuda.is_available())
+print("Devices:", torch.cuda.device_count() if torch.cuda.is_available() else 0)
+if torch.cuda.is_available():
+		try: print("GPU 0:", torch.cuda.get_device_name(0))
+		except Exception as e: print("GPU name error:", e)
+PY'
+	fi
+	if [[ $SETUP -eq 1 ]]; then
+		echo "[vLLM] Running dev setup in existing container"
+		if [[ $MIRROR -eq 1 ]]; then
+			exec podman exec "$CONTAINER_NAME" bash -lc 'export LOCAL_MIRROR=1; chmod +x ./extras/dev-setup.sh 2>/dev/null || true; ./extras/dev-setup.sh'
+		else
+			exec podman exec "$CONTAINER_NAME" bash -lc 'chmod +x ./extras/dev-setup.sh 2>/dev/null || true; ./extras/dev-setup.sh'
+		fi
+	fi
+	if [[ -n "$CMD" ]]; then
+		echo "[vLLM] Exec command in existing container"
+		podman exec "$CONTAINER_NAME" bash -lc "source /home/vllmuser/venv/bin/activate 2>/dev/null || true; $CMD"
+		exit $?
+	fi
+	read -r -p "Attach to running container ${CONTAINER_NAME}? [Y/n] " RESP || true
+	if [[ -z "$RESP" || "$RESP" =~ ^[Yy]$ ]]; then
+		exec podman exec -it "$CONTAINER_NAME" bash
+	else
+		exit 0
+	fi
+fi
+
+# Ensure image exists if not building
+if [[ $BUILD -ne 1 ]]; then
+	if ! podman image exists "$IMAGE_TAG"; then
+		echo "Image $IMAGE_TAG missing. Use --build." >&2; exit 1
+	fi
+fi
+
+# Base run args
+RUN_ARGS=(run --rm --device=nvidia.com/gpu=all --security-opt=label=disable --shm-size 8g --name "$CONTAINER_NAME" -v "${SOURCE_DIR}:/workspace:Z" -w /workspace --user vllmuser --env ENGINE=podman)
+
+# Prefer named volume for /opt/work if provided
+if [[ -n "$WORK_VOLUME" ]]; then
+	RUN_ARGS+=(-v "${WORK_VOLUME}:/opt/work:Z")
+fi
+
+# Allow configurable /tmp tmpfs size via VLLM_TMPFS_TMP_SIZE (default 0=disabled)
+TMPFS_SIZE="${VLLM_TMPFS_TMP_SIZE:-0}"
+if [[ -n "$TMPFS_SIZE" && "$TMPFS_SIZE" != "0" ]]; then
+	RUN_ARGS+=(--tmpfs "/tmp:size=${TMPFS_SIZE}")
+fi
+
+# Ensure sane NVIDIA env defaults inside container to avoid 'void' and missing caps
+RUN_ARGS+=(--env "NVIDIA_VISIBLE_DEVICES=${NVIDIA_VISIBLE_DEVICES:-all}" \
+					--env "NVIDIA_DRIVER_CAPABILITIES=${NVIDIA_DRIVER_CAPABILITIES:-compute,utility}" \
+					--env "NVIDIA_REQUIRE_CUDA=")
+
+if [[ $GPU_CHECK -eq 1 ]]; then
+	GPU_SCRIPT=$'echo "=== GPU Check ==="; which nvidia-smi && nvidia-smi || echo "nvidia-smi unavailable"; echo "--- /dev/nvidia* ---"; ls -l /dev/nvidia* 2>/dev/null || echo "no /dev/nvidia* nodes"; echo "--- Environment (NVIDIA_*) ---"; env | grep -E "^NVIDIA_" || echo "no NVIDIA_* env vars"; if [ "$NVIDIA_VISIBLE_DEVICES" = "void" ]; then echo "WARN: NVIDIA_VISIBLE_DEVICES=void (no GPU mapped)"; fi; echo "--- LD_LIBRARY_PATH ---"; echo "LD_LIBRARY_PATH=$LD_LIBRARY_PATH"; source /home/vllmuser/venv/bin/activate 2>/dev/null || true; python - <<PY\nimport json,torch,os\nout={\n \t\'torch_version\':getattr(torch,\'__version__\',\'n/a\'),\n \t\'torch_cuda_version\':getattr(getattr(torch,\'version\',None),\'cuda\',\'n/a\'),\n \t\'cuda_available\':torch.cuda.is_available(),\n \t\'ld_library_path\':os.environ.get(\'LD_LIBRARY_PATH\')\n}\ntry: out[\'device_count\']=torch.cuda.device_count()\nexcept Exception as e: out[\'device_count_error\']=str(e)\nif out[\'cuda_available\'] and out.get(\'device_count\',0)>0:\n\ttry:\n\t\tcap=torch.cuda.get_device_capability(0)\n\t\tout[\'device_0\']={\'name\':torch.cuda.get_device_name(0),\'capability\':f"sm_{cap[0]}{cap[1]}"}\n\texcept Exception as e:\n\t\tout[\'device_0_error\']=str(e)\nelse:\n\tout[\'diagnostics\']=[\'Missing /dev/nvidia* or podman machine without GPU passthrough\']\nprint(json.dumps(out,indent=2))\nPY'
+	RUN_ARGS+=("$IMAGE_TAG" bash -lc "$GPU_SCRIPT")
+elif [[ $SETUP -eq 1 ]]; then
+	if [[ $MIRROR -eq 1 ]]; then
+		RUN_ARGS+=(--env LOCAL_MIRROR=1)
+	fi
+	if [[ $PROGRESS -eq 1 ]]; then
+		RUN_ARGS+=(--env PROGRESS_WATCH=1)
+	RUN_ARGS+=("-it" "$IMAGE_TAG" bash -lc 'chmod +x ./extras/podman/dev-setup.sh 2>/dev/null || true; ./extras/podman/dev-setup.sh')
+	else
+	RUN_ARGS+=("$IMAGE_TAG" bash -lc 'chmod +x ./extras/podman/dev-setup.sh 2>/dev/null || true; ./extras/podman/dev-setup.sh')
+	fi
+elif [[ -n "$CMD" ]]; then
+	RUN_ARGS+=("$IMAGE_TAG" bash -lc "source /home/vllmuser/venv/bin/activate 2>/dev/null || true; $CMD")
+else
+	RUN_ARGS+=("-it" "$IMAGE_TAG" bash)
+	echo "[vLLM] Interactive shell. Helpful inside container:"
+	echo "  ./extras/dev-setup.sh            # Build/install editable vLLM"
+	echo "  python -c 'import torch;print(torch.cuda.is_available())'"
+	echo "  python -c 'import vllm'"
+fi
+
+echo "[vLLM] Command: podman ${RUN_ARGS[*]}"
+exec podman "${RUN_ARGS[@]}"
diff --git a/extras/podman/scripts/gpu_status.sh b/extras/podman/scripts/gpu_status.sh
new file mode 100644
index 000000000000..a50c78b01c03
--- /dev/null
+++ b/extras/podman/scripts/gpu_status.sh
@@ -0,0 +1,9 @@
+#!/usr/bin/env bash
+set -euo pipefail
+
+# Helper to show GPU/CDI status under Podman (Linux/WSL)
+
+podman info --format json | jq '.host' || podman info || true
+
+# Show CDI devices if available
+podman cdi list || true
diff --git a/extras/secrets/.gitignore b/extras/secrets/.gitignore
new file mode 100644
index 000000000000..d4895ec18947
--- /dev/null
+++ b/extras/secrets/.gitignore
@@ -0,0 +1,4 @@
+# Ensure this directory stays out of git; keep this file only.
+*
+!.gitignore
+!README.md
diff --git a/extras/secrets/README.md b/extras/secrets/README.md
new file mode 100644
index 000000000000..b519087af198
--- /dev/null
+++ b/extras/secrets/README.md
@@ -0,0 +1,12 @@
+# secrets directory
+
+This directory is gitignored and intended for local-only secret material such as model hub tokens.
+
+Files are expected to be simple KEY=VALUE lines that can be sourced by shell scripts.
+
+Examples:
+
+- hf-credentials.env
+- cn-modelhub-credentials.env
+
+Do NOT commit secrets. See README for details.
diff --git a/extras/storage/README.md b/extras/storage/README.md
new file mode 100644
index 000000000000..d106b6d7378c
--- /dev/null
+++ b/extras/storage/README.md
@@ -0,0 +1,7 @@
+# Storage helpers
+
+Declare and manage external volumes for models and caches.
+
+- storage-config.yaml: Declarative host/container paths
+- setup_local.sh: Helper to prepare a local volume or directory
+- scripts/: Utilities for warmup, cache management, mounts
diff --git a/extras/storage/scripts/warm_cache.sh b/extras/storage/scripts/warm_cache.sh
new file mode 100644
index 000000000000..1d97b7f044f6
--- /dev/null
+++ b/extras/storage/scripts/warm_cache.sh
@@ -0,0 +1,9 @@
+#!/usr/bin/env bash
+set -euo pipefail
+
+# Placeholder for cache warmup logic.
+# Example usage: ./warm_cache.sh meta-llama/Llama-3-8B /models
+MODEL_ID=${1:-meta-llama/Llama-3-8B}
+TARGET=${2:-/models}
+mkdir -p "$TARGET"
+echo "(scaffold) Would warm cache for $MODEL_ID under $TARGET"
diff --git a/extras/storage/setup_local.sh b/extras/storage/setup_local.sh
new file mode 100644
index 000000000000..101826bc7396
--- /dev/null
+++ b/extras/storage/setup_local.sh
@@ -0,0 +1,9 @@
+#!/usr/bin/env bash
+set -euo pipefail
+
+# Prepare a local directory for models and ensure reasonable permissions.
+TARGET=${1:-/mnt/ml-models}
+mkdir -p "$TARGET"
+chmod 775 "$TARGET" || true
+
+echo "Model storage prepared at: $TARGET"
diff --git a/extras/storage/storage-config.yaml b/extras/storage/storage-config.yaml
new file mode 100644
index 000000000000..90310b572b3c
--- /dev/null
+++ b/extras/storage/storage-config.yaml
@@ -0,0 +1,4 @@
+model_volume:
+  path_host: "/mnt/ml-models"
+  path_container: "/models"
+  shared: true
diff --git a/extras/testing/README.md b/extras/testing/README.md
new file mode 100644
index 000000000000..2c64d538ac97
--- /dev/null
+++ b/extras/testing/README.md
@@ -0,0 +1,7 @@
+# Testing and benchmarking harness
+
+- Define a matrix of models/environments in `test_matrix.yaml`.
+- Run via `python extras/testing/run_tests.py --output-dir extras/testing/results/$(date +%F_%H-%M)`.
+- Store results in `results/` with timestamps for regression tracking.
+
+This scaffolding is intentionally minimal; models and benchmarks can be added incrementally.
diff --git a/extras/testing/compare_results.py b/extras/testing/compare_results.py
new file mode 100644
index 000000000000..f6c91bdd6667
--- /dev/null
+++ b/extras/testing/compare_results.py
@@ -0,0 +1,31 @@
+#!/usr/bin/env python3
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+from __future__ import annotations
+
+import argparse
+import json
+
+
+def load(path: str) -> dict:
+    with open(path, encoding="utf-8") as f:
+        return json.load(f)
+
+
+def main() -> int:
+    p = argparse.ArgumentParser()
+    p.add_argument("a")
+    p.add_argument("b")
+    args = p.parse_args()
+
+    A = load(args.a)
+    B = load(args.b)
+
+    # Placeholder comparison: print keys that differ
+    diffs = sorted(set(A.keys()) ^ set(B.keys()))
+    print(json.dumps({"diff_keys": diffs}))
+    return 0
+
+
+if __name__ == "__main__":
+    raise SystemExit(main())
diff --git a/extras/testing/run_tests.py b/extras/testing/run_tests.py
new file mode 100644
index 000000000000..131521c0dbac
--- /dev/null
+++ b/extras/testing/run_tests.py
@@ -0,0 +1,47 @@
+#!/usr/bin/env python3
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+"""
+Minimal, non-destructive test harness that prints a JSON line per test.
+This is a scaffold; integrate with your local launchers or CI as needed.
+"""
+from __future__ import annotations
+
+import argparse
+import json
+import os
+from datetime import datetime
+
+
+def main() -> int:
+    p = argparse.ArgumentParser()
+    p.add_argument("--cuda-version",
+                   default=os.getenv("CUDA_VERSION", "12.9.1"))
+    p.add_argument("--ubi-version", default=os.getenv("UBI_VERSION", "9.4"))
+    p.add_argument("--models", default="Example-Llama3-8B")
+    p.add_argument("--output-dir",
+                   default=os.path.join("extras", "testing", "results",
+                                        datetime.now().strftime("%F_%H-%M")))
+    args = p.parse_args()
+
+    os.makedirs(args.output_dir, exist_ok=True)
+
+    result = {
+        "ts": datetime.utcnow().isoformat() + "Z",
+        "cuda": args.cuda_version,
+        "ubi": args.ubi_version,
+        "models": args.models.split(","),
+        "status": "scaffold",
+        "notes": "Integrate with vLLM server/client to collect real metrics.",
+    }
+
+    out_path = os.path.join(args.output_dir, "scaffold.json")
+    with open(out_path, "w", encoding="utf-8") as f:
+        json.dump(result, f, indent=2)
+
+    print(json.dumps({"written": out_path}))
+    return 0
+
+
+if __name__ == "__main__":
+    raise SystemExit(main())
diff --git a/extras/testing/test_matrix.yaml b/extras/testing/test_matrix.yaml
new file mode 100644
index 000000000000..270e7ff5ec13
--- /dev/null
+++ b/extras/testing/test_matrix.yaml
@@ -0,0 +1,16 @@
+models:
+  - name: Example-Llama3-8B
+    id: meta-llama/Llama-3-8B
+    chat_template: chat_templates/llama-3-instruct.jinja
+    params:
+      max_tokens: 64
+      temperature: 0.7
+
+environments:
+  - cuda: 12.9.1
+    ubi: 9.4
+
+benchmarks:
+  - name: inference_speed
+    input: "Summarize: vLLM extras modularization plan."
+    metrics: [latency_ms, tokens_per_sec]
diff --git a/vllm/device_allocator/cumem.py b/vllm/device_allocator/cumem.py
index 7963fb15c419..69f38fd0a178 100644
--- a/vllm/device_allocator/cumem.py
+++ b/vllm/device_allocator/cumem.py
@@ -143,7 +143,9 @@ def get_instance() -> "CuMemAllocator":
         return CuMemAllocator.instance
 
     def __init__(self):
-        conf = os.environ.get("PYTORCH_CUDA_ALLOC_CONF", "")
+                # Prefer new env var; fall back to deprecated one for compatibility
+        conf = os.environ.get("PYTORCH_ALLOC_CONF",
+                                      os.environ.get("PYTORCH_CUDA_ALLOC_CONF", ""))
         assert "expandable_segments:True" not in conf, \
             ("Expandable segments are not compatible with memory pool. "
             "Please track https://github.com/pytorch/pytorch/issues/147851 "