diff --git a/.github/ci-trigger-20250814-1 b/.github/ci-trigger-20250814-1
new file mode 100644
index 000000000000..8ca993aa58b2
--- /dev/null
+++ b/.github/ci-trigger-20250814-1
@@ -0,0 +1 @@
+trigger: sync_with_upstream
diff --git a/.github/workflows/sync_with_upstream.yml b/.github/workflows/sync_with_upstream.yml
new file mode 100644
index 000000000000..df1048a43833
--- /dev/null
+++ b/.github/workflows/sync_with_upstream.yml
@@ -0,0 +1,80 @@
+name: Sync with Upstream
+
+on:
+  schedule:
+    - cron: '0 0 * * *' # Runs daily at midnight
+  push:
+    branches:
+      - main
+
+jobs:
+  sync:
+    runs-on: ubuntu-latest
+
+    steps:
+      - name: Checkout repository
+        uses: actions/checkout@v4
+        with:
+          fetch-depth: 0
+
+      - name: Set up Git
+        run: |
+          git config --global user.name 'Zhuul'
+          git config --global user.email '40538530+Zhuul@users.noreply.github.com'
+
+      - name: Add upstream remote
+        run: git remote add upstream https://github.com/vllm-project/vllm.git
+
+      - name: Fetch upstream changes
+        run: git fetch upstream
+
+      - name: Merge upstream changes
+        id: merge
+        run: |
+          git checkout main
+          git merge upstream/main || {
+            echo "Merge conflict detected. Creating a new branch for manual resolution."
+            git checkout -b "merge-conflict-$(date +%Y%m%d%H%M%S)"
+            git push origin HEAD
+            echo "conflict=true" >> "$GITHUB_OUTPUT"
+            exit 1
+          }
+          echo "conflict=false" >> "$GITHUB_OUTPUT"
+
+      - name: Check for workflow file changes
+        id: workflow_change
+        run: |
+          if git diff --name-only upstream/main | grep '^.github/workflows/'; then
+            echo "workflow_changed=true" >> "$GITHUB_OUTPUT"
+          else
+            echo "workflow_changed=false" >> "$GITHUB_OUTPUT"
+          fi
+
+      - name: Set up PAT authentication
+        env:
+          GH_PAT: ${{ secrets.GH_PAT }}
+        run: |
+          git remote set-url origin "https://Zhuul:${GH_PAT}@github.com/Zhuul/vllm.git"
+
+      - name: Push changes if no workflow files changed
+        if: steps.workflow_change.outputs.workflow_changed == 'false' && steps.merge.outputs.conflict == 'false'
+        run: git push origin main
+
+      - name: Create Pull Request for workflow file changes
+        if: steps.workflow_change.outputs.workflow_changed == 'true' && steps.merge.outputs.conflict == 'false'
+        uses: peter-evans/create-pull-request@v6
+        with:
+          token: ${{ secrets.GH_PAT }}
+          commit-message: "Sync with upstream: update workflow files"
+          title: "Sync with upstream: update workflow files"
+          body: |
+            This PR was automatically created because workflow files were updated while syncing with upstream.
+            Please review and merge.
+          branch: workflow-sync-${{ github.run_id }}
+          base: main
+
+      - name: Send notification if merge conflict
+        if: steps.merge.outputs.conflict == 'true'
+        run: |
+          echo "Merge conflict detected. Manual intervention required."
+          # Add your notification logic here (e.g., send an email, create an issue, etc.)
diff --git a/docs/contributing/README.md b/docs/contributing/README.md
index 5a2a70d57e85..1c2a31cf895c 100644
--- a/docs/contributing/README.md
+++ b/docs/contributing/README.md
@@ -31,6 +31,8 @@ See <gh-file:LICENSE>.
 Depending on the kind of development you'd like to do (e.g. Python, CUDA), you can choose to build vLLM with or without compilation.
 Check out the [building from source][build-from-source] documentation for details.
 
+For a containerized developer workflow, see Podman-first dev: `docs/contributing/podman-dev.md`.
+
 For an optimized workflow when iterating on C++/CUDA kernels, see the [Incremental Compilation Workflow](./incremental_build.md) for recommendations.
 
 ### Building the docs with MkDocs
diff --git a/docs/contributing/podman-dev.md b/docs/contributing/podman-dev.md
new file mode 100644
index 000000000000..881e495f8421
--- /dev/null
+++ b/docs/contributing/podman-dev.md
@@ -0,0 +1,41 @@
+---
+title: Podman-first Development Environment
+---
+
+This guide documents the Podman-first development workflow for building vLLM from source with CUDA and PyTorch nightly.
+
+Primary entrypoint
+
+- Windows (PowerShell): `./extras/podman/run.ps1`
+- Linux/macOS (bash): `extras/podman/run.sh`
+
+Legacy launchers at `extras/run-vllm-dev.ps1` and `extras/run-vllm-dev.sh` are deprecated and forward to the Podman wrappers.
+
+Prerequisites
+
+- Podman with GPU CDI enabled (on Windows, use Podman Desktop + WSL; ensure NVIDIA drivers and CUDA are installed on the host).
+- Optional named volume for build/work space, e.g., `vllm-work`.
+
+Quick start
+
+Windows (PowerShell):
+
+```powershell
+./extras/podman/run.ps1 -Build
+./extras/podman/run.ps1 -GPUCheck
+./extras/podman/run.ps1 -Setup -WorkVolume vllm-work -Progress
+```
+
+Linux/macOS (bash):
+
+```bash
+extras/podman/run.sh --build
+extras/podman/run.sh --gpu-check
+extras/podman/run.sh --setup --work-volume vllm-work --progress
+```
+
+Notes
+
+- The image uses CUDA 12.9 UBI9 and installs PyTorch nightly cu129 first to ensure latest GPU arch support (including sm_120 when present).
+- The setup step performs an editable vLLM install without downgrading torch family packages.
+- Use a named Podman volume for `/opt/work` to avoid `/tmp` tmpfs pressure and to speed up rebuilds.
diff --git a/extras/.dockerignore b/extras/.dockerignore
new file mode 100644
index 000000000000..60a8d81a82c1
--- /dev/null
+++ b/extras/.dockerignore
@@ -0,0 +1,39 @@
+# Reduce build context to avoid Windows Podman tar write issues
+.git
+.github
+.vscode
+.venv
+venv
+node_modules
+build
+dist
+csrc/
+vllm/
+benchmarks/
+docs/
+examples/
+tests/
+**/__pycache__
+**/*.pyc
+**/*.pyo
+**/*.pyd
+**/*.so
+**/*.o
+**/*.a
+**/*.dll
+**/*.dylib
+extras/build.log
+extras/*.bak
+extras/tools/
+extras/run-vllm-dev-*.ps1
+extras/run-vllm-dev-*.sh
+extras/*wsl*
+extras/*docker*.ps1
+
+!extras/Dockerfile
+!extras/run-vllm-dev.ps1
+!extras/run-vllm-dev.sh
+!extras/dev-setup.sh
+requirements/
+pyproject.toml
+setup.py
diff --git a/extras/Dockerfile b/extras/Dockerfile
new file mode 100644
index 000000000000..6a5f5a6e4e9d
--- /dev/null
+++ b/extras/Dockerfile
@@ -0,0 +1,168 @@
+# vLLM Development Container with GPU Support
+# Uses vLLM's own requirements for automatic dependency management
+
+FROM nvidia/cuda:12.9.1-cudnn-devel-ubi9
+
+# Set CUDA environment variables for build tools
+ENV CUDA_HOME=/usr/local/cuda
+ENV CUDA_ROOT=/usr/local/cuda
+ENV PATH=$CUDA_HOME/bin:$PATH
+ENV LD_LIBRARY_PATH=$CUDA_HOME/lib64:$LD_LIBRARY_PATH
+ENV CUDA_TOOLKIT_ROOT_DIR=$CUDA_HOME
+ENV CUDNN_LIBRARY_PATH=/usr/lib64
+ENV CUDNN_INCLUDE_PATH=/usr/include
+
+# Install system packages with additional CUDA development libraries
+RUN dnf update -y && dnf install --allowerasing -y \
+    python3 python3-pip python3-devel \
+    git gcc gcc-c++ cmake ninja-build \
+    make patch which findutils tar rsync \
+    wget curl vim nano \
+    && dnf clean all
+
+# Create symlinks for python
+RUN ln -sf /usr/bin/python3 /usr/bin/python
+
+# Create a non-root user for development
+RUN useradd -m -s /bin/bash vllmuser && \
+    echo "vllmuser ALL=(ALL) NOPASSWD:ALL" >> /etc/sudoers
+
+# Install essential system tools
+RUN dnf install -y hostname iproute iputils
+
+# Add NVIDIA Machine Learning repo for RHEL9/UBI9 and install NCCL runtime/devel
+# Needed for PyTorch nightly cu129 to avoid ncclCommWindowRegister symbol errors
+# Install NCCL runtime/devel from the CUDA repository available in the base image
+RUN set -euxo pipefail \
+    && dnf makecache -y \
+    && (dnf install -y libnccl libnccl-devel || dnf install -y libnccl-2 libnccl-devel-2) \
+    && dnf clean all
+
+# Set working directory and adjust ownership
+WORKDIR /workspace
+RUN chown -R vllmuser:vllmuser /workspace
+
+# Create build directories with proper permissions
+RUN mkdir -p /workspace/.deps && chown -R vllmuser:vllmuser /workspace/.deps && \
+    mkdir -p /tmp/vllm-build && chmod 777 /tmp/vllm-build && \
+    mkdir -p /opt/work && chmod 777 /opt/work && \
+    mkdir -p /home/vllmuser/.cache && chown -R vllmuser:vllmuser /home/vllmuser/.cache && \
+    mkdir -p /home/vllmuser/.ccache && chown -R vllmuser:vllmuser /home/vllmuser/.ccache && \
+    mkdir -p /home/vllmuser/.cmake && chown -R vllmuser:vllmuser /home/vllmuser/.cmake && \
+    chmod -R 755 /workspace && \
+    chmod -R 777 /tmp
+
+# Switch to the non-root user
+USER vllmuser
+
+# Create and activate virtual environment
+ENV VIRTUAL_ENV=/home/vllmuser/venv
+RUN python3 -m venv $VIRTUAL_ENV
+ENV PATH="$VIRTUAL_ENV/bin:$PATH"
+
+# Set pip configuration
+ENV PIP_DISABLE_PIP_VERSION_CHECK=1
+ENV PIP_NO_CACHE_DIR=1
+ENV PYTHONUNBUFFERED=1
+ENV PIP_DEFAULT_TIMEOUT=120
+ENV PIP_RETRIES=5
+ENV PIP_PREFER_BINARY=1
+
+# Upgrade pip and setuptools to latest versions
+RUN pip install --upgrade pip setuptools>=61 wheel
+
+COPY requirements/ /tmp/requirements/
+
+# Install PyTorch nightly first (includes latest GPU arch support such as Blackwell sm_120 when present)
+RUN pip install --pre torch torchvision torchaudio --index-url https://download.pytorch.org/whl/nightly/cu129
+
+# Install modern build tools and vLLM's build dependencies and CUDA deps early,
+# but sanitize requirements to avoid downgrading torch-family or forcing xformers pins.
+COPY pyproject.toml /tmp/pyproject.toml
+RUN set -euxo pipefail \
+        && cd /tmp \
+        && pip install "setuptools>=61" "setuptools-scm>=8" build wheel ninja cmake \
+        && mkdir -p /tmp/requirements_sanitized \
+        && for f in build.txt cuda.txt common.txt; do \
+                 if [ -f "/tmp/requirements/$f" ]; then \
+                     sed -E '/^(torch|torchvision|torchaudio|xformers)\b/Id' "/tmp/requirements/$f" > "/tmp/requirements_sanitized/$f"; \
+                 fi; \
+             done \
+    && pip install --pre \
+        -r /tmp/requirements_sanitized/build.txt \
+        -r /tmp/requirements_sanitized/cuda.txt \
+        -r /tmp/requirements_sanitized/common.txt \
+    && pip install --pre --upgrade \
+        torch torchvision torchaudio --index-url https://download.pytorch.org/whl/nightly/cu129
+
+# Install minimal development extras
+RUN pip install pytest pytest-asyncio ipython
+
+# Note: vLLM will be installed from source in development mode via dev-setup.sh
+# This ensures compatibility with the PyTorch nightly build
+
+# Create activation script for easy virtual environment access
+RUN echo '#!/bin/bash' > /home/vllmuser/activate_venv.sh && \
+    echo 'source /home/vllmuser/venv/bin/activate' >> /home/vllmuser/activate_venv.sh && \
+    echo 'echo "Virtual environment activated: $VIRTUAL_ENV"' >> /home/vllmuser/activate_venv.sh && \
+    echo 'echo "Python version: $(python --version)"' >> /home/vllmuser/activate_venv.sh && \
+    echo 'echo "PyTorch version: $(python -c \"import torch; print(torch.__version__)\")"' >> /home/vllmuser/activate_venv.sh && \
+    echo 'echo "CUDA available: $(python -c \"import torch; print(torch.cuda.is_available())\")"' >> /home/vllmuser/activate_venv.sh && \
+    chmod +x /home/vllmuser/activate_venv.sh
+
+# Ensure virtual environment is activated in .bashrc
+RUN echo 'source /home/vllmuser/venv/bin/activate' >> /home/vllmuser/.bashrc && \
+    echo 'echo "🐍 Python virtual environment activated"' >> /home/vllmuser/.bashrc && \
+    echo 'echo "🚀 Ready for vLLM development!"' >> /home/vllmuser/.bashrc
+
+# Create development helper script that uses current workspace requirements
+RUN echo '#!/bin/bash' > /home/vllmuser/setup_vllm_dev.sh && \
+    echo 'echo "🔧 Setting up vLLM for development..."' >> /home/vllmuser/setup_vllm_dev.sh && \
+    echo 'cd /workspace' >> /home/vllmuser/setup_vllm_dev.sh && \
+    echo '# Use temporary build directory to avoid permission issues' >> /home/vllmuser/setup_vllm_dev.sh && \
+    echo 'export TMPDIR=/tmp/vllm-build' >> /home/vllmuser/setup_vllm_dev.sh && \
+    echo 'mkdir -p "$TMPDIR" && chmod 777 "$TMPDIR"' >> /home/vllmuser/setup_vllm_dev.sh && \
+    echo 'export CMAKE_BUILD_PARALLEL_LEVEL=4' >> /home/vllmuser/setup_vllm_dev.sh && \
+    echo 'export VLLM_INSTALL_PUNICA_KERNELS=0' >> /home/vllmuser/setup_vllm_dev.sh && \
+    echo 'export MAX_JOBS=4' >> /home/vllmuser/setup_vllm_dev.sh && \
+    echo '# Install current workspace requirements first' >> /home/vllmuser/setup_vllm_dev.sh && \
+    echo 'if [ -f requirements/common.txt ]; then pip install -r requirements/common.txt; fi' >> /home/vllmuser/setup_vllm_dev.sh && \
+    echo '# Use temporary directory for CMake build files' >> /home/vllmuser/setup_vllm_dev.sh && \
+    echo 'FETCHCONTENT_BASE_DIR="$TMPDIR/deps" pip install -e . --no-deps --no-build-isolation --verbose' >> /home/vllmuser/setup_vllm_dev.sh && \
+    echo 'echo "✅ vLLM installed in editable mode!"' >> /home/vllmuser/setup_vllm_dev.sh && \
+    echo 'python -c "import vllm; print(\"vLLM version:\", vllm.__version__)"' >> /home/vllmuser/setup_vllm_dev.sh && \
+    chmod +x /home/vllmuser/setup_vllm_dev.sh
+
+# Add environment variables for better CUDA memory management and build optimization
+ENV PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True
+# Do not pin a single GPU here; let runtime inject device selection
+# ENV CUDA_VISIBLE_DEVICES=0
+ENV CMAKE_BUILD_PARALLEL_LEVEL=4
+ENV VLLM_INSTALL_PUNICA_KERNELS=0
+ENV MAX_JOBS=4
+
+# Enable ccache for faster rebuilds
+ENV CCACHE_DIR=/home/vllmuser/.ccache
+ENV CCACHE_MAXSIZE=10G
+ENV PATH=/usr/lib64/ccache:$PATH
+
+# CUDA arch list including legacy + latest (sm_120) so builds cover both older and newest GPUs.
+ENV TORCH_CUDA_ARCH_LIST="7.0 7.5 8.0 8.6 8.9 9.0 12.0"
+# Do not force-disable Machete; allow upstream defaults. User may still pass -e CMAKE_ARGS for custom CMake settings.
+ENV CMAKE_ARGS=""
+
+# WSL2-specific CUDA environment configuration
+ENV NVIDIA_VISIBLE_DEVICES=all
+ENV NVIDIA_DRIVER_CAPABILITIES=compute,utility
+ENV LD_LIBRARY_PATH=/usr/local/cuda/compat:/usr/lib/wsl/drivers:/usr/lib/wsl/lib:/usr/lib/x86_64-linux-gnu:/usr/local/cuda/lib64:/usr/local/cuda/lib:$LD_LIBRARY_PATH
+
+# Add runtime library detection script
+RUN echo '#!/bin/bash' > /home/vllmuser/check_cuda_libs.sh && \
+    echo 'echo "=== CUDA Library Check ==="' >> /home/vllmuser/check_cuda_libs.sh && \
+    echo 'echo "LD_LIBRARY_PATH: $LD_LIBRARY_PATH"' >> /home/vllmuser/check_cuda_libs.sh && \
+    echo 'echo "Searching for CUDA libraries..."' >> /home/vllmuser/check_cuda_libs.sh && \
+    echo 'find /usr/lib/wsl -name "libcuda.so*" 2>/dev/null | head -3 || echo "No WSL CUDA libs"' >> /home/vllmuser/check_cuda_libs.sh && \
+    echo 'ldconfig -p | grep cuda | head -3 || echo "No CUDA in ldconfig"' >> /home/vllmuser/check_cuda_libs.sh && \
+    echo 'echo "PyTorch CUDA status:"' >> /home/vllmuser/check_cuda_libs.sh && \
+    echo 'python -c "import torch; print(f\"CUDA available: {torch.cuda.is_available()}\"); print(f\"Device count: {torch.cuda.device_count()}\")" 2>/dev/null || echo "PyTorch not available"' >> /home/vllmuser/check_cuda_libs.sh && \
+    chmod +x /home/vllmuser/check_cuda_libs.sh
diff --git a/extras/README.md b/extras/README.md
new file mode 100644
index 000000000000..bafd5a6dc4ca
--- /dev/null
+++ b/extras/README.md
@@ -0,0 +1,50 @@
+# extras/ overview
+
+This directory hosts all non-core assets: container/build tooling, configs, testing, storage helpers, and optional patches. The goals are clarity, single-responsibility, and easy extension without touching the vLLM core.
+
+Suggested layout (implemented here):
+
+- podman/ — Podman-specific build/launch wrappers and helpers
+- configs/ — Centralized, declarative versions and build configuration
+- secrets/ — Gitignored area for local tokens/config (not committed)
+- testing/ — Test/benchmark harness, matrices, and results
+- storage/ — External volumes and cache management helpers
+- patches/ — Optional patch/plug-in mechanism for controlled tweaks
+
+Primary entrypoint: use `extras/podman/` as the canonical way to build and run the dev container.
+
+Deprecation: the legacy launchers `extras/run-vllm-dev.sh` and `extras/run-vllm-dev.ps1` are deprecated and now forward to the Podman wrappers. Please switch to `extras/podman/run.sh` (Linux/macOS) or `extras/podman/run.ps1` (Windows).
+
+## Quick start
+
+- Edit `extras/configs/build.env` to set CUDA/UBI/Python defaults.
+- Use `extras/podman/build.sh` to build images with those defaults.
+- Use `extras/podman/run.ps1` (Windows) or `extras/podman/run.sh` (Linux/macOS) to run the dev container.
+
+Examples
+
+- Windows (PowerShell):
+	- Build image: `./extras/podman/run.ps1 -Build`
+	- GPU check: `./extras/podman/run.ps1 -GPUCheck`
+	- Setup build: `./extras/podman/run.ps1 -Setup -WorkVolume vllm-work -Progress`
+
+- Linux/macOS (bash):
+	- Build image: `extras/podman/run.sh --build`
+	- GPU check: `extras/podman/run.sh --gpu-check`
+	- Setup build: `extras/podman/run.sh --setup --work-volume vllm-work --progress`
+
+## Secrets
+
+Place tokens in `extras/secrets/` per its README and never commit them. Load them in session or bind-mount into containers.
+
+## Testing
+
+See `extras/testing/README.md` for defining a matrix, recording results, and comparing runs.
+
+## Storage
+
+See `extras/storage/README.md` for model/cache volume guidance for performance and reproducibility.
+
+## Patches
+
+If you need to tweak upstream vLLM without forking, use `extras/patches/` to stage diffs and apply them during build.
diff --git a/extras/configs/README.md b/extras/configs/README.md
new file mode 100644
index 000000000000..98ef0f02f786
--- /dev/null
+++ b/extras/configs/README.md
@@ -0,0 +1,9 @@
+# configs README
+
+This folder centralizes editable configuration for images/builds:
+
+- build.env: Bash-exported defaults (CUDA/UBI/Python/vLLM tag, arch list, volumes)
+- build.yaml (optional): YAML equivalent for tools that prefer structured configs
+- versions.json (optional): Machine-friendly manifest for automation
+
+Consumers (scripts/Containerfiles) should read values from here and allow runtime overrides via environment variables.
diff --git a/extras/configs/build.env b/extras/configs/build.env
new file mode 100644
index 000000000000..37babe3a18d0
--- /dev/null
+++ b/extras/configs/build.env
@@ -0,0 +1,24 @@
+# Build configuration
+#
+# Scripts should source this file to obtain default versions.
+# Values can be overridden by environment variables provided at runtime.
+
+# CUDA / UBI / Python baselines
+export CUDA_VERSION=${CUDA_VERSION:-12.9.1}
+export UBI_VERSION=${UBI_VERSION:-9}
+export PYTHON_VERSION=${PYTHON_VERSION:-3.11}
+
+# vLLM branch/tag to use inside the container when cloning or referring
+export VLLM_TAG=${VLLM_TAG:-main}
+
+# Architectures (space separated) for PyTorch/NVCC
+# Include Blackwell sm_120 via TORCH_CUDA_ARCH_LIST="7.0 7.5 8.0 8.6 8.9 9.0 12.0"
+export TORCH_CUDA_ARCH_LIST=${TORCH_CUDA_ARCH_LIST:-"7.0 7.5 8.0 8.6 8.9 9.0 12.0"}
+
+# Named volume for build scratch/work dir (Podman recommended)
+export VLLM_WORK_VOLUME=${VLLM_WORK_VOLUME:-vllm-work}
+export VLLM_WORK_DIR_CONTAINER=${VLLM_WORK_DIR_CONTAINER:-/opt/work}
+
+# Image naming
+export VLLM_BASE_IMAGE=${VLLM_BASE_IMAGE:-"nvidia/cuda:${CUDA_VERSION}-cudnn-devel-ubi9"}
+export VLLM_IMAGE_TAG=${VLLM_IMAGE_TAG:-"vllm-cuda${CUDA_VERSION}-ubi${UBI_VERSION}"}
diff --git a/extras/configs/build.yaml b/extras/configs/build.yaml
new file mode 100644
index 000000000000..277737dd92df
--- /dev/null
+++ b/extras/configs/build.yaml
@@ -0,0 +1,11 @@
+cuda:
+  version: "12.9.1"
+  tag: "latest"
+ubi:
+  version: "9.4"
+  tag: "latest"
+python:
+  version: "3.11"
+  tag: "latest"
+vllm:
+  tag: main
diff --git a/extras/old/build-from-source.sh b/extras/old/build-from-source.sh
new file mode 100644
index 000000000000..58db6e19e37e
--- /dev/null
+++ b/extras/old/build-from-source.sh
@@ -0,0 +1,50 @@
+#!/usr/bin/env bash
+set -euo pipefail
+set -x
+
+# Activate venv if present
+if [ -f /home/vllmuser/venv/bin/activate ]; then
+  source /home/vllmuser/venv/bin/activate || true
+fi
+
+# Temporary build dirs to avoid permission issues
+export TMPDIR=${TMPDIR:-/tmp/vllm-build}
+umask 0002
+mkdir -p "$TMPDIR" || true
+chmod 777 "$TMPDIR" || true
+export FETCHCONTENT_BASE_DIR="${FETCHCONTENT_BASE_DIR:-$TMPDIR/deps}"
+
+# Parallelism and CUDA arch list (include Blackwell sm_120 == 12.0)
+export CMAKE_BUILD_PARALLEL_LEVEL=${CMAKE_BUILD_PARALLEL_LEVEL:-4}
+export MAX_JOBS=${MAX_JOBS:-4}
+export NVCC_THREADS=${NVCC_THREADS:-2}
+export TORCH_CUDA_ARCH_LIST="${TORCH_CUDA_ARCH_LIST:-7.0 7.5 8.0 8.6 8.9 9.0 12.0}"
+
+# Keep FA2/FA3 and machete enabled by default
+export VLLM_DISABLE_FA3=${VLLM_DISABLE_FA3:-0}   # 0=build FA3
+export FA3_MEMORY_SAFE_MODE=${FA3_MEMORY_SAFE_MODE:-0}
+
+echo "=== Build env ==="
+echo "TORCH_CUDA_ARCH_LIST=$TORCH_CUDA_ARCH_LIST"
+echo "FETCHCONTENT_BASE_DIR=$FETCHCONTENT_BASE_DIR"
+echo "CMAKE_BUILD_PARALLEL_LEVEL=$CMAKE_BUILD_PARALLEL_LEVEL MAX_JOBS=$MAX_JOBS NVCC_THREADS=$NVCC_THREADS"
+
+python - << 'PY'
+import os, torch
+print('torch', torch.__version__)
+print('cuda_version', torch.version.cuda)
+print('cuda_available', torch.cuda.is_available())
+print('arch_list', os.getenv('TORCH_CUDA_ARCH_LIST'))
+PY
+
+# Ensure core build tools present (setup will also ensure, this is harmless)
+python -m pip install -r requirements/build.txt -q
+
+# Run editable build with verbose logs and capture output
+mkdir -p extras
+set +e
+python -m pip install -e . --no-build-isolation -vv |& tee extras/build.log
+status=${PIPESTATUS[0]}
+set -e
+echo "=== pip exited with code: $status ==="
+exit $status
diff --git a/extras/old/dev-setup.sh b/extras/old/dev-setup.sh
new file mode 100644
index 000000000000..9e3edb1da6f4
--- /dev/null
+++ b/extras/old/dev-setup.sh
@@ -0,0 +1,319 @@
+#!/bin/bash
+# dev-setup.sh - Set up vLLM development environment using nightly wheels
+set -euo pipefail
+
+echo "=== vLLM Development Environment Setup ==="
+echo "Container: $(hostname)"
+echo "User: $(whoami)"
+echo "Working directory: $(pwd)"
+echo ""
+
+# Activate virtual environment
+echo "🐍 Activating Python virtual environment..."
+source /home/vllmuser/venv/bin/activate
+echo "Virtual environment: $VIRTUAL_ENV"
+echo "Python version: $(python --version)"
+echo ""
+
+# Check current PyTorch
+echo "📦 Current PyTorch:"
+python -c "import torch; print(f'PyTorch: {torch.__version__}'); print(f'CUDA available: {torch.cuda.is_available()}')" 2>/dev/null || echo "PyTorch not installed"
+echo ""
+
+### Optional: build from a local mirror to avoid slow Windows/virtiofs mounts during heavy C++ builds
+if [ "${LOCAL_MIRROR:-0}" = "1" ]; then
+    echo "📁 LOCAL_MIRROR=1 -> Copying sources for faster builds..."
+    DEST="/opt/work"
+    if ! mkdir -p "$DEST" 2>/dev/null; then
+        echo "⚠️  No permission to create $DEST, falling back to /tmp/work"
+        DEST="/tmp/work"
+        mkdir -p "$DEST"
+    fi
+    echo "   ➜ Mirror destination: $DEST"
+    # Ensure destination doesn't have a stray .git folder that could cause permission errors
+    rm -rf "$DEST/.git" 2>/dev/null || true
+    # Use tar pipeline but avoid preserving ownership/permissions/timestamps to prevent utime errors on Windows mounts
+    # Exclude .git to avoid permission issues and speed up copy
+    if ! tar -C /workspace --exclude='.git' -cf - . | tar -C "$DEST" -xf - --no-same-owner --no-same-permissions 2>/dev/null; then
+        echo "   ⚠️  tar copy failed (likely timestamp/perm issue). Falling back to rsync/cp ..."
+        shopt -s dotglob
+        if command -v rsync >/dev/null 2>&1; then
+            rsync -a --delete --exclude='.git' /workspace/ "$DEST"/ 2>/dev/null || true
+        else
+            for f in /workspace/*; do
+                bname="$(basename "$f")"
+                [ "$bname" = ".git" ] && continue
+                cp -R "$f" "$DEST"/ 2>/dev/null || true
+            done
+        fi
+        shopt -u dotglob
+    fi
+    export VLLM_SRC_DIR="$DEST"
+else
+    export VLLM_SRC_DIR=/workspace
+fi
+echo "Source dir for build: ${VLLM_SRC_DIR}"
+
+# Ensure a large, persistent temporary directory for heavy builds (pip/CMake use $TMPDIR)
+# Default to /opt/work/tmp unless user overrides via VLLM_TMPDIR/TMPDIR
+if [ -n "${VLLM_TMPDIR:-}" ]; then
+    export TMPDIR="$VLLM_TMPDIR"
+fi
+if [ -z "${TMPDIR:-}" ] || [[ "$TMPDIR" == "/tmp"* ]]; then
+    export TMPDIR="/opt/work/tmp"
+fi
+export TMP="$TMPDIR"; export TEMP="$TMPDIR"
+mkdir -p "$TMPDIR" 2>/dev/null || true
+echo "Using TMPDIR=$TMPDIR for build temps"
+
+# Install PyTorch with CUDA 12.9 for RTX 5090 support
+echo "🚀 Installing PyTorch nightly (CUDA 12.9 toolchain) ..."
+pip uninstall -y torch torchvision torchaudio 2>/dev/null || true
+pip install --pre torch torchvision torchaudio --index-url https://download.pytorch.org/whl/nightly/cu129
+
+# Create a constraints file to prevent downgrades of any currently installed package.
+# Use format "name>=version" to allow upgrades but disallow downgrades. Avoid third-party deps.
+CONSTRAINTS_FILE="/tmp/pip-constraints-installed.txt"
+python - <<'PY'
+try:
+    from importlib.metadata import distributions
+except Exception:  # py39 backport
+    from importlib_metadata import distributions  # type: ignore
+
+exclude = {pkg.lower() for pkg in (
+    'pip', 'setuptools', 'wheel'
+)}
+lines = []
+for d in distributions():
+    name = (d.metadata.get('Name') or '').strip()
+    if not name or name.lower() in exclude:
+        continue
+    ver = (d.version or '').strip()
+    if not ver:
+        continue
+    # Remove local version suffix (after '+') to keep constraint parser happy
+    pv = ver.split('+', 1)[0]
+    norm = name.lower().replace('_', '-')
+    lines.append(f"{norm}>={pv}")
+with open('/tmp/pip-constraints-installed.txt','w') as f:
+    f.write('\n'.join(sorted(set(lines))))
+print('📌 Constraints written to /tmp/pip-constraints-installed.txt (count):', len(lines))
+PY
+export PIP_CONSTRAINT="$CONSTRAINTS_FILE"
+echo "Using PIP_CONSTRAINT=$PIP_CONSTRAINT"
+
+# Set CUDA architecture list; include latest (sm_120) so builds are forward-compatible if such GPU is present.
+echo "🔧 Configuring CUDA architectures (legacy + latest)..."
+export TORCH_CUDA_ARCH_LIST="7.0 7.5 8.0 8.6 8.9 9.0 12.0"
+echo "TORCH_CUDA_ARCH_LIST=$TORCH_CUDA_ARCH_LIST"
+
+# Verify PyTorch version and CUDA capabilities
+echo "🔍 Verifying PyTorch installation..."
+python -c "
+import torch
+print(f'PyTorch version: {torch.__version__}')
+print(f'CUDA version: {torch.version.cuda}')
+print(f'CUDA available: {torch.cuda.is_available()}')
+if torch.cuda.is_available():
+    try:
+        device_props = torch.cuda.get_device_properties(0)
+        print(f'GPU: {torch.cuda.get_device_name(0)}')
+        print(f'Compute Capability: {device_props.major}.{device_props.minor}')
+        print(f'Memory: {device_props.total_memory // 1024**3} GB')
+        if device_props.major >= 9:  # Blackwell architecture (RTX 50xx)
+            print('🎉 RTX 50xx series detected - sm_120 support available!')
+        else:
+            print(f'Detected GPU architecture: sm_{device_props.major}{device_props.minor}')
+    except Exception as e:
+        print(f'GPU details unavailable: {e}')
+        print('Note: This is common in containers - GPU access might need container restart')
+"
+echo ""
+
+echo "📦 Preparing to install vLLM from source (editable)..."
+pip uninstall vllm -y 2>/dev/null || true
+
+# Preinstall pinned deps to avoid long resolver work (esp. numba/llvmlite)
+echo "📋 Installing pinned requirements (build + cuda + common), sanitized to keep torch nightly..."
+mkdir -p /tmp/requirements_sanitized
+for f in build.txt cuda.txt common.txt; do
+    if [ -f "requirements/$f" ]; then
+        sed -E '/^(torch|torchvision|torchaudio|xformers)\b/Id' "requirements/$f" > "/tmp/requirements_sanitized/$f"
+    fi
+done
+pip install --pre \
+    -r /tmp/requirements_sanitized/build.txt \
+    -r /tmp/requirements_sanitized/cuda.txt \
+    -r /tmp/requirements_sanitized/common.txt
+
+# Reinstall PyTorch nightly to override any accidental downgrade from requirements
+echo "♻️  Ensuring PyTorch stays on nightly cu129 after requirements..."
+pip install --pre --upgrade \
+    torch torchvision torchaudio \
+    --index-url https://download.pytorch.org/whl/nightly/cu129
+
+# Optionally install xformers if requested; otherwise skip to avoid pin conflicts with torch nightlies.
+if [ "${WITH_XFORMERS:-0}" = "1" ]; then
+    echo "➕ Installing xformers (may override torch constraints)..."
+    pip install --pre xformers -f https://download.pytorch.org/whl/nightly/cu129/torch_nightly.html || true
+else
+    echo "⏭️  Skipping xformers (set WITH_XFORMERS=1 to include)"
+fi
+
+# Build environment tuning
+export VLLM_TARGET_DEVICE=cuda
+export SETUPTOOLS_SCM_PRETEND_VERSION="0.10.1.dev+cu129"
+# Place large build/dependency artifacts on /opt/work to avoid small /tmp tmpfs exhaustion
+export VLLM_BUILD_ROOT=${VLLM_BUILD_ROOT:-/opt/work}
+export FETCHCONTENT_BASE_DIR="$VLLM_BUILD_ROOT/vllm-build/deps"
+mkdir -p "$FETCHCONTENT_BASE_DIR"
+
+# ccache for faster rebuilds
+export CCACHE_DIR=/home/vllmuser/.ccache
+export CCACHE_MAXSIZE=10G
+export PATH=/usr/lib64/ccache:$PATH
+command -v ccache >/dev/null 2>&1 && ccache -s || true
+
+# Respect user-provided MAX_JOBS; otherwise derive a conservative default to avoid FA3 OOM (signal 9)
+if [ -z "${MAX_JOBS:-}" ]; then
+    # Derive from available cores but cap to 4 and adjust for memory pressure
+    CORES=$(nproc 2>/dev/null || echo 4)
+    # Read MemTotal (kB); if < 32GB, use 2; if < 16GB use 1
+    MEM_KB=$(grep -i MemTotal /proc/meminfo 2>/dev/null | awk '{print $2}')
+    if [ -n "$MEM_KB" ]; then
+        if [ "$MEM_KB" -lt 16000000 ]; then
+            MAX_JOBS=1
+        elif [ "$MEM_KB" -lt 32000000 ]; then
+            MAX_JOBS=2
+        else
+            MAX_JOBS=$(( CORES < 4 ? CORES : 4 ))
+        fi
+    else
+        MAX_JOBS=$(( CORES < 4 ? CORES : 4 ))
+    fi
+fi
+export MAX_JOBS
+
+# Allow an optional memory safe mode specifically for heavy FA3 compilation (can be toggled externally)
+if [ "${FA3_MEMORY_SAFE_MODE:-0}" = "1" ]; then
+    echo "⚠️  FA3_MEMORY_SAFE_MODE=1 -> Forcing MAX_JOBS=1 and NVCC_THREADS=1 to reduce peak RAM during compilation"
+    export MAX_JOBS=1
+    export NVCC_THREADS=1
+else
+    # If user has not set NVCC_THREADS, keep it low (2) to reduce per-translation-unit memory usage
+    if [ -z "${NVCC_THREADS:-}" ]; then
+        export NVCC_THREADS=2
+    fi
+fi
+
+# We no longer pass custom CMAKE_ARGS that refer to removed/unsupported options (e.g. ENABLE_MACHETE) to avoid noise.
+unset CMAKE_ARGS 2>/dev/null || true
+# Enable ccache via CMake compiler launchers (C/C++/CUDA) and enable verbose messages
+export CMAKE_ARGS="${CMAKE_ARGS:+$CMAKE_ARGS }-DCMAKE_C_COMPILER_LAUNCHER=ccache -DCMAKE_CXX_COMPILER_LAUNCHER=ccache -DCMAKE_CUDA_COMPILER_LAUNCHER=ccache -DCMAKE_VERBOSE_MAKEFILE=ON -DCMAKE_RULE_MESSAGES=ON"
+export NINJA_STATUS="[%f/%t %o/sec] "
+export CMAKE_COLOR_DIAGNOSTICS=ON
+
+# By default we DO NOT disable FA3; user may export VLLM_DISABLE_FA3=1 before invoking this script to skip it.
+if [ -z "${VLLM_DISABLE_FA3:-}" ]; then
+    export VLLM_DISABLE_FA3=0
+fi
+
+echo "🔧 Build environment configured:"
+echo "  TORCH_CUDA_ARCH_LIST: $TORCH_CUDA_ARCH_LIST"
+echo "  MAX_JOBS: $MAX_JOBS"
+echo "  NVCC_THREADS: ${NVCC_THREADS:-unset}"
+echo "  FETCHCONTENT_BASE_DIR: $FETCHCONTENT_BASE_DIR"
+echo "  VLLM_DISABLE_FA3: $VLLM_DISABLE_FA3 (0=build FA3, 1=skip)"
+echo "  FA3_MEMORY_SAFE_MODE: ${FA3_MEMORY_SAFE_MODE:-0}"
+
+# Build and install vLLM
+echo "🏗️  Building vLLM from source (no dependency resolution)..."
+cd "$VLLM_SRC_DIR"
+# Ensure pip/CMake use our larger build root for temp files
+export TMPDIR="$VLLM_BUILD_ROOT/tmp"
+export CMAKE_BUILD_PARALLEL_LEVEL=${CMAKE_BUILD_PARALLEL_LEVEL:-$MAX_JOBS}
+mkdir -p "$TMPDIR" 2>/dev/null || true
+LOG_DST="$VLLM_SRC_DIR/extras/build.log"
+mkdir -p "$(dirname "$LOG_DST")" 2>/dev/null || true
+set -o pipefail
+TIMEFORMAT='⏱  Build time: %3lR'
+# Progress watcher is fully opt-in now (no auto-enable on TTY)
+PROGRESS_WATCH=${PROGRESS_WATCH:-0}
+
+# Optional lightweight progress watcher: echoes lines like "[25/341] ..." as they appear
+WATCH_PID=""
+if [ "$PROGRESS_WATCH" = "1" ]; then
+    echo "🪄 Progress watcher enabled (looking for [x/total] in build.log)"
+    (
+        # tail -F waits for file to appear; --pid ensures it exits with this script
+        tail --pid=$$ -n +1 -F "$LOG_DST" 2>/dev/null | \
+        awk 'match($0,/\[[0-9]+\/[0-9]+\]/){
+                     ts=strftime("%H:%M:%S");
+                     # print a compact, updating status line
+                     printf("\r[%s] %s", ts, substr($0, RSTART, RLENGTH));
+                     fflush(stdout);
+                 } END { print "" }'
+    ) &
+    WATCH_PID=$!
+fi
+
+# Prefer line-buffered output for better streaming through tee if stdbuf exists
+if command -v stdbuf >/dev/null 2>&1; then
+    time stdbuf -oL -eL pip install --no-build-isolation --no-deps -e . -vv | tee "$LOG_DST"
+else
+    time pip install --no-build-isolation --no-deps -e . -vv | tee "$LOG_DST"
+fi
+
+# Cleanup watcher so we leave the cursor nicely
+if [ -n "${WATCH_PID}" ]; then
+    kill "$WATCH_PID" 2>/dev/null || true
+    echo "" >&2
+fi
+echo "📄 Build log: $LOG_DST"
+
+if [ $? -eq 0 ]; then
+    echo "✅ vLLM editable install completed successfully"
+else
+    echo "❌ Failed to install vLLM"
+    exit 1
+fi
+
+echo ""
+echo "🧪 Testing vLLM installation..."
+python -c "import vllm; print('vLLM version:', vllm.__version__)"
+
+echo ""
+echo "🎮 Testing GPU support..."
+python -c "
+import torch
+print('CUDA available:', torch.cuda.is_available())
+if torch.cuda.is_available():
+    print('GPU count:', torch.cuda.device_count())
+    try:
+        print('Current GPU:', torch.cuda.get_device_name(0))
+    except Exception as e:
+        print('GPU name unavailable (container GPU access issue)')
+else:
+    print('No GPU detected - check container GPU mounting')
+"
+
+echo ""
+echo "📁 vLLM Development Environment Ready!"
+echo "======================================"
+echo "Source code: /workspace"
+echo "Virtual env: $VIRTUAL_ENV"
+echo "GPU support: $(python -c 'import torch; print(torch.cuda.is_available())')"
+echo ""
+echo "🛠️  Quick Commands:"
+echo "  python -c 'import vllm'                    # Test vLLM import"
+echo "  python -c 'import torch; print(torch.cuda.is_available())'  # Test CUDA"
+echo "  nvidia-smi                                 # Check GPU status"
+echo ""
+echo "� Ready for vLLM development!"
+echo "- Edit code: files are mounted from host"
+echo "- Test changes: python -m pytest tests/"
+echo "- Test environment: python /workspace/extras/final_environment_test.py"
+echo "- Run vLLM: python -m vllm.entrypoints.openai.api_server"
+echo "- SSH access: ssh vllmuser@localhost -p 2222 (password: vllmdev)"
+echo ""
+echo "✨ Happy coding!"
diff --git a/extras/old/run-vllm-dev.ps1 b/extras/old/run-vllm-dev.ps1
new file mode 100644
index 000000000000..55820ff7471d
--- /dev/null
+++ b/extras/old/run-vllm-dev.ps1
@@ -0,0 +1,6 @@
+#!/usr/bin/env pwsh
+# Deprecated: please use extras/podman/run.ps1. This script forwards for back-compat.
+param([Parameter(ValueFromRemainingArguments=$true)] [string[]]$Args)
+$pod = Join-Path $PSScriptRoot 'podman\run.ps1'
+if (-not (Test-Path $pod)) { Write-Error "Missing: $pod"; exit 1 }
+& $pod @Args
diff --git a/extras/old/run-vllm-dev.sh b/extras/old/run-vllm-dev.sh
new file mode 100644
index 000000000000..b5a8a906ad06
--- /dev/null
+++ b/extras/old/run-vllm-dev.sh
@@ -0,0 +1,6 @@
+#!/usr/bin/env bash
+set -euo pipefail
+
+# Deprecated: please use extras/podman/run.sh. This script forwards for back-compat.
+SCRIPT_DIR=$(cd -- "$(dirname -- "${BASH_SOURCE[0]}" )" &>/dev/null && pwd)
+exec "${SCRIPT_DIR}/podman/run.sh" "$@"
diff --git a/extras/old/test-vllm-container.ps1 b/extras/old/test-vllm-container.ps1
new file mode 100644
index 000000000000..61852551c124
--- /dev/null
+++ b/extras/old/test-vllm-container.ps1
@@ -0,0 +1,32 @@
+# vLLM Container Test Script
+# Run this from the vLLM workspace directory
+
+Write-Host "🚀 Testing vLLM Container Environment..." -ForegroundColor Green
+Write-Host ("=" * 50)
+
+# Test 1: Basic container functionality  
+Write-Host "`n📋 Test 1: Container and GPU Access" -ForegroundColor Yellow
+& podman run --rm --device=nvidia.com/gpu=all vllm-dev-fixed:v2 bash -c 'source /home/vllmuser/venv/bin/activate; cd /tmp; python -c "import torch; print(torch.cuda.is_available())"'
+
+if ($LASTEXITCODE -eq 0) {
+    Write-Host "✅ Container and GPU access working!" -ForegroundColor Green
+} else {
+    Write-Host "❌ Container or GPU access failed!" -ForegroundColor Red
+    exit 1
+}
+
+# Test 2: vLLM installation
+Write-Host "`n📋 Test 2: vLLM Installation" -ForegroundColor Yellow  
+& podman run --rm --device=nvidia.com/gpu=all vllm-dev-fixed:v2 bash -c 'source /home/vllmuser/venv/bin/activate; cd /tmp; python -c "import vllm; print(vllm.__version__)"'
+
+if ($LASTEXITCODE -eq 0) {
+    Write-Host "✅ vLLM installation working!" -ForegroundColor Green
+} else {
+    Write-Host "❌ vLLM installation failed!" -ForegroundColor Red
+    exit 1
+}
+
+Write-Host "`n🎉 SUCCESS: vLLM container environment is fully functional!" -ForegroundColor Green
+Write-Host "`n📖 Usage:" -ForegroundColor Cyan
+Write-Host '  podman run --rm -it --device=nvidia.com/gpu=all -v "${PWD}:/workspace" vllm-dev-fixed:v2' -ForegroundColor White
+Write-Host "`n📚 Documentation: See CONTAINER_SETUP_COMPLETE.md for detailed usage guide" -ForegroundColor Cyan
diff --git a/extras/patches/README.md b/extras/patches/README.md
new file mode 100644
index 000000000000..ff4f662c4588
--- /dev/null
+++ b/extras/patches/README.md
@@ -0,0 +1,5 @@
+# Patches and plugins scaffolding
+
+- Place unified diffs (*.diff) here.
+- Use `apply_patches.sh` to apply them before building.
+- Optionally, add Python plugins under `plugin/` and load dynamically at runtime.
diff --git a/extras/patches/apply_patches.sh b/extras/patches/apply_patches.sh
new file mode 100644
index 000000000000..70437f0bd645
--- /dev/null
+++ b/extras/patches/apply_patches.sh
@@ -0,0 +1,24 @@
+#!/usr/bin/env bash
+set -euo pipefail
+
+PATCH_DIR=$(cd -- "$(dirname -- "${BASH_SOURCE[0]}")" &>/dev/null && pwd)
+ROOT_DIR=$(cd -- "${PATCH_DIR}/../.." &>/dev/null && pwd)
+
+shopt -s nullglob
+PATCHES=(${PATCH_DIR}/*.diff)
+shopt -u nullglob
+
+if [ ${#PATCHES[@]} -eq 0 ]; then
+  echo "[patches] No patches found; nothing to apply."
+  exit 0
+fi
+
+pushd "${ROOT_DIR}" >/dev/null
+for p in "${PATCHES[@]}"; do
+  echo "[patches] Applying ${p}"
+  git apply --check "${p}"
+  git apply "${p}"
+ done
+popd >/dev/null
+
+echo "[patches] Done."
diff --git a/extras/podman/Containerfile b/extras/podman/Containerfile
new file mode 100644
index 000000000000..d42bef4b344e
--- /dev/null
+++ b/extras/podman/Containerfile
@@ -0,0 +1,11 @@
+# syntax=docker/dockerfile:1.7-labs
+
+# Delegator Containerfile.
+# Build using the canonical Dockerfile in extras/ to avoid duplication.
+
+FROM scratch as noop
+
+# Usage:
+#   podman build -f extras/Dockerfile -t vllm-dev:latest .
+# or from this folder (wrapper script does this for you):
+#   bash build.sh
diff --git a/extras/podman/README.md b/extras/podman/README.md
new file mode 100644
index 000000000000..fb0c361203f2
--- /dev/null
+++ b/extras/podman/README.md
@@ -0,0 +1,12 @@
+# Podman helpers for vLLM
+
+This folder contains Podman-specific wrappers. They preserve back-compat by calling the existing scripts in `extras/` when present.
+
+- Containerfile: Thin wrapper that defers to `extras/Dockerfile` by default.
+- build.sh: Builds the image using values from `../configs/build.env`.
+- entrypoint/: Optional entrypoint scripts used inside containers.
+- scripts/: Utility helpers for Podman machine/GPU/volumes.
+
+See README for usage.
+
+Documentation: see `docs/contributing/podman-dev.md` for the Podman-first workflow and deprecation notes for legacy launchers.
diff --git a/extras/podman/build.sh b/extras/podman/build.sh
new file mode 100644
index 000000000000..a4ec5f445825
--- /dev/null
+++ b/extras/podman/build.sh
@@ -0,0 +1,32 @@
+#!/usr/bin/env bash
+set -euo pipefail
+
+# Why: Back-compat wrapper that sources central config and builds using the canonical Dockerfile.
+
+SCRIPT_DIR=$(cd -- "$(dirname -- "${BASH_SOURCE[0]}")" &>/dev/null && pwd)
+ROOT_DIR=$(cd -- "${SCRIPT_DIR}/../.." &>/dev/null && pwd)
+CONFIG_DIR="${SCRIPT_DIR}/../configs"
+
+# shellcheck source=../configs/build.env
+if [ -f "${CONFIG_DIR}/build.env" ]; then
+  # shellcheck disable=SC1091
+  source "${CONFIG_DIR}/build.env"
+fi
+
+CUDA_VERSION=${CUDA_VERSION:-12.9.1}
+UBI_VERSION=${UBI_VERSION:-9}
+VLLM_IMAGE_TAG=${VLLM_IMAGE_TAG:-"vllm-cuda${CUDA_VERSION}-ubi${UBI_VERSION}"}
+
+CONTEXT="${ROOT_DIR}"
+DOCKERFILE_REL="extras/Dockerfile"
+
+echo "[podman/build] Building image ${VLLM_IMAGE_TAG} with CUDA=${CUDA_VERSION}, UBI=${UBI_VERSION}"
+
+podman build \
+  --build-arg CUDA_VERSION="${CUDA_VERSION}" \
+  --build-arg UBI_VERSION="${UBI_VERSION}" \
+  -t "${VLLM_IMAGE_TAG}" \
+  -f "${DOCKERFILE_REL}" \
+  "${CONTEXT}"
+
+echo "[podman/build] Done -> ${VLLM_IMAGE_TAG}"
diff --git a/extras/podman/dev-setup.sh b/extras/podman/dev-setup.sh
new file mode 100644
index 000000000000..09eea6079a02
--- /dev/null
+++ b/extras/podman/dev-setup.sh
@@ -0,0 +1,35 @@
+#!/usr/bin/env bash
+# Robust setup entrypoint: prefer extras/dev-setup.sh, fallback to extras/old/dev-setup.sh,
+# otherwise use the image-provided /home/vllmuser/setup_vllm_dev.sh.
+set -euo pipefail
+
+SCRIPT_DIR=$(cd -- "$(dirname -- "${BASH_SOURCE[0]}" )" &>/dev/null && pwd)
+EXTRAS_DIR=$(cd -- "${SCRIPT_DIR}/.." &>/dev/null && pwd)
+
+try_exec() {
+	local target="$1"
+	if [[ -f "$target" ]]; then
+		chmod +x "$target" 2>/dev/null || true
+		exec "$target" "$@"
+	fi
+}
+
+# 1) Current canonical path
+if [[ -f "${EXTRAS_DIR}/dev-setup.sh" ]]; then
+	chmod +x "${EXTRAS_DIR}/dev-setup.sh" 2>/dev/null || true
+	exec "${EXTRAS_DIR}/dev-setup.sh" "$@"
+fi
+
+# 2) Legacy archived location
+if [[ -f "${EXTRAS_DIR}/old/dev-setup.sh" ]]; then
+	chmod +x "${EXTRAS_DIR}/old/dev-setup.sh" 2>/dev/null || true
+	exec "${EXTRAS_DIR}/old/dev-setup.sh" "$@"
+fi
+
+# 3) Fallback to image helper
+if command -v /home/vllmuser/setup_vllm_dev.sh >/dev/null 2>&1 || [[ -f /home/vllmuser/setup_vllm_dev.sh ]]; then
+	exec /home/vllmuser/setup_vllm_dev.sh "$@"
+fi
+
+echo "[setup] No setup script found at extras/dev-setup.sh or extras/old/dev-setup.sh, and no image helper present." >&2
+exit 1
diff --git a/extras/podman/run.ps1 b/extras/podman/run.ps1
new file mode 100644
index 000000000000..6724db007417
--- /dev/null
+++ b/extras/podman/run.ps1
@@ -0,0 +1,180 @@
+#!/usr/bin/env pwsh
+[CmdletBinding()] param(
+	[switch]$Build,
+	[switch]$Interactive,
+	[string]$Command = "",
+	[switch]$Setup,
+	[switch]$GPUCheck,
+	[switch]$Mirror,
+	[switch]$Recreate,
+	[string]$WorkVolume = "",
+	[string]$WorkDirHost = "",
+	[switch]$Progress,
+	[switch]$Help
+)
+
+if ($Help) {
+	Write-Host "Usage: extras/podman/run.ps1 [-Build] [-Interactive] [-Command <cmd>] [-Setup] [-GPUCheck] [-Mirror] [-Recreate] [-WorkVolume <name>] [-WorkDirHost <path>] [-Progress]"; exit 0
+}
+
+if (-not $Interactive -and [string]::IsNullOrEmpty($Command) -and -not $GPUCheck -and -not $Setup) { $Interactive = $true }
+
+if (-not (Get-Command podman -ErrorAction SilentlyContinue)) { Write-Host "❌ Podman not found in PATH" -ForegroundColor Red; exit 1 }
+
+$ContainerName = "vllm-dev"
+$ImageTag = "vllm-dev:latest"
+$SourceDir = (Get-Location).Path
+
+Write-Host "🐋 vLLM Dev Container (Podman)" -ForegroundColor Green
+
+if ($Build) {
+	Write-Host "🔨 Building image..." -ForegroundColor Yellow
+	$buildCmd = @("build","-f","extras/Dockerfile","-t",$ImageTag,".")
+	& podman @buildCmd
+	if ($LASTEXITCODE -ne 0) { Write-Host "❌ Build failed" -ForegroundColor Red; exit 1 }
+	Write-Host "✅ Build ok" -ForegroundColor Green
+}
+
+# Already running?
+$running = podman ps --filter "name=$ContainerName" --format "{{.Names}}" 2>$null
+
+if ($Recreate -and $running -eq $ContainerName) {
+	Write-Host "♻️  Removing existing container '$ContainerName'" -ForegroundColor Yellow
+	podman rm -f $ContainerName | Out-Null
+	$running = $null
+}
+
+if ($running -eq $ContainerName) {
+	if ($GPUCheck) {
+		Write-Host "🔍 GPU check (existing container)" -ForegroundColor Yellow
+		$cmd = @'
+source /home/vllmuser/venv/bin/activate && python - <<'PY'
+import torch, os
+print("PyTorch:", getattr(torch,"__version__","n/a"))
+print("CUDA:", torch.cuda.is_available())
+print("Devices:", torch.cuda.device_count() if torch.cuda.is_available() else 0)
+print("LD_LIBRARY_PATH:", os.environ.get("LD_LIBRARY_PATH"))
+if torch.cuda.is_available():
+		try:
+				print("GPU 0:", torch.cuda.get_device_name(0))
+		except Exception as e:
+				print("GPU name error:", e)
+PY
+nvidia-smi || true
+'@
+		$cmd = "export NVIDIA_VISIBLE_DEVICES=all; " + $cmd
+		podman exec $ContainerName bash -lc $cmd
+		exit $LASTEXITCODE
+	}
+	if ($Setup) {
+		Write-Host "🔧 Running dev setup in existing container" -ForegroundColor Yellow
+		$envs = @()
+		if ($Mirror) { $envs += @('LOCAL_MIRROR=1') }
+		if ($Progress) { $envs += @('PROGRESS_WATCH=1') }
+		$envs += @('NVIDIA_VISIBLE_DEVICES=all')
+		$envStr = ($envs | ForEach-Object { "export $_;" }) -join ' '
+		$cmd = "$envStr chmod +x ./extras/dev-setup.sh 2>/dev/null || true; ./extras/dev-setup.sh"
+		if ($Progress) { podman exec -it $ContainerName bash -lc $cmd } else { podman exec $ContainerName bash -lc $cmd }
+		exit $LASTEXITCODE
+	}
+	if ($Command) {
+		Write-Host "🚀 Running command in existing container" -ForegroundColor Green
+		$runCmd = "source /home/vllmuser/venv/bin/activate && $Command"
+		podman exec $ContainerName bash -c $runCmd
+		exit $LASTEXITCODE
+	}
+	$resp = Read-Host "Attach to running container? [Y/n]"
+	if ($resp -eq "" -or $resp -match '^[Yy]$') { podman exec -it $ContainerName bash; exit $LASTEXITCODE } else { exit 0 }
+}
+
+# Ensure image exists
+podman image exists $ImageTag
+if ($LASTEXITCODE -ne 0) { Write-Host "❌ Image missing. Use -Build." -ForegroundColor Red; exit 1 }
+
+# Base args (no default /tmp tmpfs; can be enabled via VLLM_TMPFS_TMP_SIZE)
+$runArgs = @("run","--rm","--security-opt=label=disable","--shm-size","8g","-v","${SourceDir}:/workspace:Z")
+if (-not [string]::IsNullOrWhiteSpace($WorkVolume)) { $runArgs += @('-v',"${WorkVolume}:/opt/work:Z") }
+elseif ($WorkDirHost -and (Test-Path $WorkDirHost)) { $runArgs += @('-v',"${WorkDirHost}:/opt/work:Z") }
+$runArgs += @('-w','/workspace','--name',"$ContainerName",'--user','vllmuser','--env','ENGINE=podman')
+
+$tmpfsSize = [Environment]::GetEnvironmentVariable('VLLM_TMPFS_TMP_SIZE')
+if (-not [string]::IsNullOrEmpty($tmpfsSize) -and $tmpfsSize -ne '0') { $runArgs += @('--tmpfs',"/tmp:size=$tmpfsSize") }
+
+if ($true) { # Request GPU via CDI hooks
+	$runArgs = @("run","--rm","--security-opt=label=disable","--device=nvidia.com/gpu=all") + $runArgs[2..($runArgs.Length-1)]
+}
+
+# WSL GPU: map /dev/dxg and mount WSL libs
+$runArgs += @('--device','/dev/dxg','-v','/usr/lib/wsl:/usr/lib/wsl:ro')
+if ($Mirror) { $runArgs += @('--env','LOCAL_MIRROR=1') }
+foreach ($ev in 'NVIDIA_VISIBLE_DEVICES','NVIDIA_DRIVER_CAPABILITIES','NVIDIA_REQUIRE_CUDA') {
+	$val = [Environment]::GetEnvironmentVariable($ev)
+	if ($val) { $runArgs += @('--env',"$ev=$val") }
+}
+$runArgs += @('--env','ENGINE=podman','--env','NVIDIA_VISIBLE_DEVICES=all','--env','NVIDIA_DRIVER_CAPABILITIES=compute,utility','--env','NVIDIA_REQUIRE_CUDA=')
+
+if ($GPUCheck) {
+	$pyDiag = @'
+import json, torch, os
+out = {
+		"torch_version": getattr(torch, "__version__", "n/a"),
+		"torch_cuda_version": getattr(getattr(torch, "version", None), "cuda", "n/a"),
+		"cuda_available": torch.cuda.is_available(),
+		"ld_library_path": os.environ.get("LD_LIBRARY_PATH"),
+}
+try:
+		out["device_count"] = torch.cuda.device_count()
+except Exception as e:
+		out["device_count_error"] = str(e)
+if out["cuda_available"] and out.get("device_count", 0) > 0:
+		try:
+				cap = torch.cuda.get_device_capability(0)
+				out["device_0"] = {"name": torch.cuda.get_device_name(0), "capability": f"sm_{cap[0]}{cap[1]}"}
+		except Exception as e:
+				out["device_0_error"] = str(e)
+else:
+		out["diagnostics"] = ["Missing /dev/nvidia* or podman machine without GPU passthrough"]
+print(json.dumps(out, indent=2))
+'@
+	$pyB64 = [Convert]::ToBase64String([Text.Encoding]::UTF8.GetBytes($pyDiag))
+	$gpuScript = @'
+echo '=== GPU Check ==='
+which nvidia-smi && nvidia-smi || echo 'nvidia-smi unavailable'
+echo '--- /dev/nvidia* ---'
+ls -l /dev/nvidia* 2>/dev/null || echo 'no /dev/nvidia* nodes'
+echo '--- Environment (NVIDIA_*) ---'
+env | grep -E '^NVIDIA_' || echo 'no NVIDIA_* env vars'
+if [ "$NVIDIA_VISIBLE_DEVICES" = "void" ]; then echo 'WARN: NVIDIA_VISIBLE_DEVICES=void (no GPU mapped)'; fi
+echo '--- LD_LIBRARY_PATH ---'
+echo "LD_LIBRARY_PATH=$LD_LIBRARY_PATH"
+source /home/vllmuser/venv/bin/activate 2>/dev/null || true
+echo __PY_B64__ | base64 -d > /tmp/gpucheck.py
+python /tmp/gpucheck.py || true
+rm -f /tmp/gpucheck.py
+'@
+	$gpuScript = "export NVIDIA_VISIBLE_DEVICES=all; export LD_LIBRARY_PATH=/usr/lib/wsl/lib:/usr/lib/wsl/drivers:`$LD_LIBRARY_PATH; " + ($gpuScript -replace '__PY_B64__', $pyB64) -replace "`r",""
+	$runArgs += @('--user','root', $ImageTag,'bash','-lc',$gpuScript)
+} elseif ($Setup) {
+	# Use robust setup entrypoint that finds the right script (extras/dev-setup.sh, extras/old/dev-setup.sh, or image helper)
+	$prefix = "chmod +x ./extras/podman/dev-setup.sh 2>/dev/null || true; "
+	$envPrefix = ''
+	if ($Mirror) { $envPrefix += 'export LOCAL_MIRROR=1; ' }
+	if ($Progress) { $envPrefix += 'export PROGRESS_WATCH=1; ' }
+	$envPrefix += 'export TMPDIR=/opt/work/tmp; export TMP=/opt/work/tmp; export TEMP=/opt/work/tmp; mkdir -p /opt/work/tmp; '
+		$setupCmd = $prefix + $envPrefix + "./extras/podman/dev-setup.sh"
+	if ($Progress) { $runArgs += @('-it', $ImageTag, 'bash','-lc', $setupCmd) } else { $runArgs += @($ImageTag, 'bash','-lc', $setupCmd) }
+	Write-Host "🔧 Running dev setup" -ForegroundColor Green
+} elseif ($Interactive -and -not $Command) {
+	$runArgs += @('-it',$ImageTag,'bash')
+	Write-Host "🚀 Interactive shell" -ForegroundColor Green
+} elseif ($Command) {
+	$runArgs += @($ImageTag,'bash','-lc',"source /home/vllmuser/venv/bin/activate && $Command")
+	Write-Host "🚀 Running command" -ForegroundColor Green
+} else {
+	$runArgs += @($ImageTag)
+}
+
+Write-Host "Command: podman $($runArgs -join ' ')" -ForegroundColor Gray
+& podman @runArgs
+
+if ($LASTEXITCODE -eq 0 -and $Interactive) { Write-Host "Exited cleanly" -ForegroundColor Green }
diff --git a/extras/podman/run.sh b/extras/podman/run.sh
new file mode 100644
index 000000000000..ddafbcc578d0
--- /dev/null
+++ b/extras/podman/run.sh
@@ -0,0 +1,158 @@
+#!/usr/bin/env bash
+# Unified lightweight vLLM dev container launcher (Podman-first, Linux/macOS)
+set -euo pipefail
+
+IMAGE_TAG="vllm-dev:latest"
+CONTAINER_NAME="vllm-dev"
+SOURCE_DIR="$(pwd)"
+
+show_help() {
+	cat <<EOF
+Usage: ./extras/podman/run.sh [options]
+
+Options:
+	-b, --build        Build (or rebuild) the image first
+	-c, --command CMD  Run CMD inside container then exit
+	-g, --gpu-check    Run lightweight GPU diagnostics inside container
+	-s, --setup        Run ./extras/dev-setup.sh inside container
+	-p, --progress     Enable in-place progress display during setup
+	-m, --mirror       Copy sources into container (LOCAL_MIRROR=1) for faster build on slow mounts
+	--work-volume NAME Mount named volume NAME at /opt/work (preferred for large builds)
+	-n, --name NAME    Override container name (default: ${CONTAINER_NAME})
+	-h, --help         Show this help and exit
+
+Interactive shell is default if no command/gpu-check specified.
+Examples:
+	extras/podman/run.sh -b
+	extras/podman/run.sh -c "python -c 'import torch;print(torch.cuda.is_available())'"
+	extras/podman/run.sh -g
+EOF
+}
+
+BUILD=0
+GPU_CHECK=0
+SETUP=0
+CMD=""
+MIRROR=0
+PROGRESS=0
+WORK_VOLUME=""
+
+while [[ $# -gt 0 ]]; do
+	case "$1" in
+		-b|--build) BUILD=1; shift ;;
+		-c|--command) CMD="${2:-}"; shift 2 ;;
+		-g|--gpu-check) GPU_CHECK=1; shift ;;
+		-s|--setup) SETUP=1; shift ;;
+		-h|--help) show_help; exit 0 ;;
+		-m|--mirror) MIRROR=1; shift ;;
+		--work-volume) WORK_VOLUME="${2:-}"; shift 2 ;;
+		-n|--name) CONTAINER_NAME="${2:-}"; shift 2 ;;
+		-p|--progress) PROGRESS=1; shift ;;
+		*) echo "Unknown option: $1" >&2; show_help; exit 1 ;;
+	esac
+done
+
+if ! command -v podman >/dev/null 2>&1; then
+	echo "Error: podman not found in PATH" >&2
+	exit 1
+fi
+
+echo "[vLLM] Engine: podman  Image: $IMAGE_TAG  Container: $CONTAINER_NAME"
+
+if [[ $BUILD -eq 1 ]]; then
+	echo "[vLLM] Building image..."
+	if ! podman build -f extras/Dockerfile -t "$IMAGE_TAG" .; then
+		echo "[vLLM] Build failed" >&2
+		exit 1
+	fi
+	echo "[vLLM] Build complete"
+fi
+
+# If container running, attach / exec
+RUNNING=$(podman ps --filter "name=${CONTAINER_NAME}" --format '{{.Names}}' 2>/dev/null || true)
+
+if [[ "$RUNNING" == "$CONTAINER_NAME" ]]; then
+	if [[ $GPU_CHECK -eq 1 ]]; then
+		echo "[vLLM] GPU check (existing container)"
+		exec podman exec "$CONTAINER_NAME" bash -lc 'source /home/vllmuser/venv/bin/activate 2>/dev/null || true; which nvidia-smi && nvidia-smi || true; python - <<PY
+import torch, os
+print("PyTorch:", getattr(torch, "__version__", "n/a"))
+print("CUDA available:", torch.cuda.is_available())
+print("Devices:", torch.cuda.device_count() if torch.cuda.is_available() else 0)
+if torch.cuda.is_available():
+		try: print("GPU 0:", torch.cuda.get_device_name(0))
+		except Exception as e: print("GPU name error:", e)
+PY'
+	fi
+	if [[ $SETUP -eq 1 ]]; then
+		echo "[vLLM] Running dev setup in existing container"
+		if [[ $MIRROR -eq 1 ]]; then
+			exec podman exec "$CONTAINER_NAME" bash -lc 'export LOCAL_MIRROR=1; chmod +x ./extras/dev-setup.sh 2>/dev/null || true; ./extras/dev-setup.sh'
+		else
+			exec podman exec "$CONTAINER_NAME" bash -lc 'chmod +x ./extras/dev-setup.sh 2>/dev/null || true; ./extras/dev-setup.sh'
+		fi
+	fi
+	if [[ -n "$CMD" ]]; then
+		echo "[vLLM] Exec command in existing container"
+		podman exec "$CONTAINER_NAME" bash -lc "source /home/vllmuser/venv/bin/activate 2>/dev/null || true; $CMD"
+		exit $?
+	fi
+	read -r -p "Attach to running container ${CONTAINER_NAME}? [Y/n] " RESP || true
+	if [[ -z "$RESP" || "$RESP" =~ ^[Yy]$ ]]; then
+		exec podman exec -it "$CONTAINER_NAME" bash
+	else
+		exit 0
+	fi
+fi
+
+# Ensure image exists if not building
+if [[ $BUILD -ne 1 ]]; then
+	if ! podman image exists "$IMAGE_TAG"; then
+		echo "Image $IMAGE_TAG missing. Use --build." >&2; exit 1
+	fi
+fi
+
+# Base run args
+RUN_ARGS=(run --rm --device=nvidia.com/gpu=all --security-opt=label=disable --shm-size 8g --name "$CONTAINER_NAME" -v "${SOURCE_DIR}:/workspace:Z" -w /workspace --user vllmuser --env ENGINE=podman)
+
+# Prefer named volume for /opt/work if provided
+if [[ -n "$WORK_VOLUME" ]]; then
+	RUN_ARGS+=(-v "${WORK_VOLUME}:/opt/work:Z")
+fi
+
+# Allow configurable /tmp tmpfs size via VLLM_TMPFS_TMP_SIZE (default 0=disabled)
+TMPFS_SIZE="${VLLM_TMPFS_TMP_SIZE:-0}"
+if [[ -n "$TMPFS_SIZE" && "$TMPFS_SIZE" != "0" ]]; then
+	RUN_ARGS+=(--tmpfs "/tmp:size=${TMPFS_SIZE}")
+fi
+
+# Ensure sane NVIDIA env defaults inside container to avoid 'void' and missing caps
+RUN_ARGS+=(--env "NVIDIA_VISIBLE_DEVICES=${NVIDIA_VISIBLE_DEVICES:-all}" \
+					--env "NVIDIA_DRIVER_CAPABILITIES=${NVIDIA_DRIVER_CAPABILITIES:-compute,utility}" \
+					--env "NVIDIA_REQUIRE_CUDA=")
+
+if [[ $GPU_CHECK -eq 1 ]]; then
+	GPU_SCRIPT=$'echo "=== GPU Check ==="; which nvidia-smi && nvidia-smi || echo "nvidia-smi unavailable"; echo "--- /dev/nvidia* ---"; ls -l /dev/nvidia* 2>/dev/null || echo "no /dev/nvidia* nodes"; echo "--- Environment (NVIDIA_*) ---"; env | grep -E "^NVIDIA_" || echo "no NVIDIA_* env vars"; if [ "$NVIDIA_VISIBLE_DEVICES" = "void" ]; then echo "WARN: NVIDIA_VISIBLE_DEVICES=void (no GPU mapped)"; fi; echo "--- LD_LIBRARY_PATH ---"; echo "LD_LIBRARY_PATH=$LD_LIBRARY_PATH"; source /home/vllmuser/venv/bin/activate 2>/dev/null || true; python - <<PY\nimport json,torch,os\nout={\n \t\'torch_version\':getattr(torch,\'__version__\',\'n/a\'),\n \t\'torch_cuda_version\':getattr(getattr(torch,\'version\',None),\'cuda\',\'n/a\'),\n \t\'cuda_available\':torch.cuda.is_available(),\n \t\'ld_library_path\':os.environ.get(\'LD_LIBRARY_PATH\')\n}\ntry: out[\'device_count\']=torch.cuda.device_count()\nexcept Exception as e: out[\'device_count_error\']=str(e)\nif out[\'cuda_available\'] and out.get(\'device_count\',0)>0:\n\ttry:\n\t\tcap=torch.cuda.get_device_capability(0)\n\t\tout[\'device_0\']={\'name\':torch.cuda.get_device_name(0),\'capability\':f"sm_{cap[0]}{cap[1]}"}\n\texcept Exception as e:\n\t\tout[\'device_0_error\']=str(e)\nelse:\n\tout[\'diagnostics\']=[\'Missing /dev/nvidia* or podman machine without GPU passthrough\']\nprint(json.dumps(out,indent=2))\nPY'
+	RUN_ARGS+=("$IMAGE_TAG" bash -lc "$GPU_SCRIPT")
+elif [[ $SETUP -eq 1 ]]; then
+	if [[ $MIRROR -eq 1 ]]; then
+		RUN_ARGS+=(--env LOCAL_MIRROR=1)
+	fi
+	if [[ $PROGRESS -eq 1 ]]; then
+		RUN_ARGS+=(--env PROGRESS_WATCH=1)
+	RUN_ARGS+=("-it" "$IMAGE_TAG" bash -lc 'chmod +x ./extras/podman/dev-setup.sh 2>/dev/null || true; ./extras/podman/dev-setup.sh')
+	else
+	RUN_ARGS+=("$IMAGE_TAG" bash -lc 'chmod +x ./extras/podman/dev-setup.sh 2>/dev/null || true; ./extras/podman/dev-setup.sh')
+	fi
+elif [[ -n "$CMD" ]]; then
+	RUN_ARGS+=("$IMAGE_TAG" bash -lc "source /home/vllmuser/venv/bin/activate 2>/dev/null || true; $CMD")
+else
+	RUN_ARGS+=("-it" "$IMAGE_TAG" bash)
+	echo "[vLLM] Interactive shell. Helpful inside container:"
+	echo "  ./extras/dev-setup.sh            # Build/install editable vLLM"
+	echo "  python -c 'import torch;print(torch.cuda.is_available())'"
+	echo "  python -c 'import vllm'"
+fi
+
+echo "[vLLM] Command: podman ${RUN_ARGS[*]}"
+exec podman "${RUN_ARGS[@]}"
diff --git a/extras/podman/scripts/gpu_status.sh b/extras/podman/scripts/gpu_status.sh
new file mode 100644
index 000000000000..a50c78b01c03
--- /dev/null
+++ b/extras/podman/scripts/gpu_status.sh
@@ -0,0 +1,9 @@
+#!/usr/bin/env bash
+set -euo pipefail
+
+# Helper to show GPU/CDI status under Podman (Linux/WSL)
+
+podman info --format json | jq '.host' || podman info || true
+
+# Show CDI devices if available
+podman cdi list || true
diff --git a/extras/secrets/.gitignore b/extras/secrets/.gitignore
new file mode 100644
index 000000000000..d4895ec18947
--- /dev/null
+++ b/extras/secrets/.gitignore
@@ -0,0 +1,4 @@
+# Ensure this directory stays out of git; keep this file only.
+*
+!.gitignore
+!README.md
diff --git a/extras/secrets/README.md b/extras/secrets/README.md
new file mode 100644
index 000000000000..ec4e155665e8
--- /dev/null
+++ b/extras/secrets/README.md
@@ -0,0 +1,11 @@
+# secrets directory
+
+This directory is gitignored and intended for local-only secret material such as model hub tokens.
+
+Files are expected to be simple KEY=VALUE lines that can be sourced by shell scripts.
+
+Examples:
+- hf-credentials.env
+- cn-modelhub-credentials.env
+
+Do NOT commit secrets. See README for details.
diff --git a/extras/storage/README.md b/extras/storage/README.md
new file mode 100644
index 000000000000..d106b6d7378c
--- /dev/null
+++ b/extras/storage/README.md
@@ -0,0 +1,7 @@
+# Storage helpers
+
+Declare and manage external volumes for models and caches.
+
+- storage-config.yaml: Declarative host/container paths
+- setup_local.sh: Helper to prepare a local volume or directory
+- scripts/: Utilities for warmup, cache management, mounts
diff --git a/extras/storage/scripts/warm_cache.sh b/extras/storage/scripts/warm_cache.sh
new file mode 100644
index 000000000000..1d97b7f044f6
--- /dev/null
+++ b/extras/storage/scripts/warm_cache.sh
@@ -0,0 +1,9 @@
+#!/usr/bin/env bash
+set -euo pipefail
+
+# Placeholder for cache warmup logic.
+# Example usage: ./warm_cache.sh meta-llama/Llama-3-8B /models
+MODEL_ID=${1:-meta-llama/Llama-3-8B}
+TARGET=${2:-/models}
+mkdir -p "$TARGET"
+echo "(scaffold) Would warm cache for $MODEL_ID under $TARGET"
diff --git a/extras/storage/setup_local.sh b/extras/storage/setup_local.sh
new file mode 100644
index 000000000000..101826bc7396
--- /dev/null
+++ b/extras/storage/setup_local.sh
@@ -0,0 +1,9 @@
+#!/usr/bin/env bash
+set -euo pipefail
+
+# Prepare a local directory for models and ensure reasonable permissions.
+TARGET=${1:-/mnt/ml-models}
+mkdir -p "$TARGET"
+chmod 775 "$TARGET" || true
+
+echo "Model storage prepared at: $TARGET"
diff --git a/extras/storage/storage-config.yaml b/extras/storage/storage-config.yaml
new file mode 100644
index 000000000000..90310b572b3c
--- /dev/null
+++ b/extras/storage/storage-config.yaml
@@ -0,0 +1,4 @@
+model_volume:
+  path_host: "/mnt/ml-models"
+  path_container: "/models"
+  shared: true
diff --git a/extras/testing/README.md b/extras/testing/README.md
new file mode 100644
index 000000000000..2c64d538ac97
--- /dev/null
+++ b/extras/testing/README.md
@@ -0,0 +1,7 @@
+# Testing and benchmarking harness
+
+- Define a matrix of models/environments in `test_matrix.yaml`.
+- Run via `python extras/testing/run_tests.py --output-dir extras/testing/results/$(date +%F_%H-%M)`.
+- Store results in `results/` with timestamps for regression tracking.
+
+This scaffolding is intentionally minimal; models and benchmarks can be added incrementally.
diff --git a/extras/testing/compare_results.py b/extras/testing/compare_results.py
new file mode 100644
index 000000000000..628e08e7d72c
--- /dev/null
+++ b/extras/testing/compare_results.py
@@ -0,0 +1,27 @@
+#!/usr/bin/env python3
+from __future__ import annotations
+import argparse
+import json
+import os
+import sys
+
+def load(path: str) -> dict:
+    with open(path, "r", encoding="utf-8") as f:
+        return json.load(f)
+
+def main() -> int:
+    p = argparse.ArgumentParser()
+    p.add_argument("a")
+    p.add_argument("b")
+    args = p.parse_args()
+
+    A = load(args.a)
+    B = load(args.b)
+
+    # Placeholder comparison: print keys that differ
+    diffs = sorted(set(A.keys()) ^ set(B.keys()))
+    print(json.dumps({"diff_keys": diffs}))
+    return 0
+
+if __name__ == "__main__":
+    raise SystemExit(main())
diff --git a/extras/testing/run_tests.py b/extras/testing/run_tests.py
new file mode 100644
index 000000000000..0e58573bb8d0
--- /dev/null
+++ b/extras/testing/run_tests.py
@@ -0,0 +1,42 @@
+#!/usr/bin/env python3
+"""
+Minimal, non-destructive test harness that prints a JSON line per test.
+This is a scaffold; integrate with your local launchers or CI as needed.
+"""
+from __future__ import annotations
+import argparse
+import json
+import os
+import sys
+from datetime import datetime
+
+
+def main() -> int:
+    p = argparse.ArgumentParser()
+    p.add_argument("--cuda-version", default=os.getenv("CUDA_VERSION", "12.9.1"))
+    p.add_argument("--ubi-version", default=os.getenv("UBI_VERSION", "9.4"))
+    p.add_argument("--models", default="Example-Llama3-8B")
+    p.add_argument("--output-dir", default=os.path.join("extras", "testing", "results", datetime.now().strftime("%F_%H-%M")))
+    args = p.parse_args()
+
+    os.makedirs(args.output_dir, exist_ok=True)
+
+    result = {
+        "ts": datetime.utcnow().isoformat() + "Z",
+        "cuda": args.cuda_version,
+        "ubi": args.ubi_version,
+        "models": args.models.split(","),
+        "status": "scaffold",
+        "notes": "Integrate with vLLM server/client to collect real metrics.",
+    }
+
+    out_path = os.path.join(args.output_dir, "scaffold.json")
+    with open(out_path, "w", encoding="utf-8") as f:
+        json.dump(result, f, indent=2)
+
+    print(json.dumps({"written": out_path}))
+    return 0
+
+
+if __name__ == "__main__":
+    raise SystemExit(main())
diff --git a/extras/testing/test_matrix.yaml b/extras/testing/test_matrix.yaml
new file mode 100644
index 000000000000..270e7ff5ec13
--- /dev/null
+++ b/extras/testing/test_matrix.yaml
@@ -0,0 +1,16 @@
+models:
+  - name: Example-Llama3-8B
+    id: meta-llama/Llama-3-8B
+    chat_template: chat_templates/llama-3-instruct.jinja
+    params:
+      max_tokens: 64
+      temperature: 0.7
+
+environments:
+  - cuda: 12.9.1
+    ubi: 9.4
+
+benchmarks:
+  - name: inference_speed
+    input: "Summarize: vLLM extras modularization plan."
+    metrics: [latency_ms, tokens_per_sec]