|
| 1 | +#!/usr/bin/env bash |
| 2 | +set -euo pipefail |
| 3 | + |
| 4 | +# Normalize CRLF and re-exec if needed |
| 5 | +if grep -q $'\r' "$0" 2>/dev/null; then |
| 6 | + TMP_SELF=$(mktemp /tmp/apply_patches_self.XXXXXX.sh) |
| 7 | + tr -d '\r' < "$0" > "$TMP_SELF" || cp "$0" "$TMP_SELF" |
| 8 | + chmod +x "$TMP_SELF" 2>/dev/null || true |
| 9 | + exec "$TMP_SELF" "$@" |
| 10 | +fi |
| 11 | + |
| 12 | +# Resolve paths |
| 13 | +SCRIPT_DIR=$(cd -- "$(dirname -- "${BASH_SOURCE[0]}")" &>/dev/null && pwd) |
| 14 | +# Treat current working directory as repo root (wrapper cd's to /workspace) |
| 15 | +ROOT_DIR=${ROOT_DIR:-$(pwd)} |
| 16 | +# Prefer patches from repo under ./extras/patches; fall back to script dir (e.g., /tmp copy) |
| 17 | +PRIMARY_PATCH_DIR="${ROOT_DIR}/extras/patches" |
| 18 | +PATCH_DIR="$PRIMARY_PATCH_DIR" |
| 19 | +if [ ! -d "$PATCH_DIR" ] || ! ls "$PATCH_DIR"/*.diff >/dev/null 2>&1; then |
| 20 | + PATCH_DIR="$SCRIPT_DIR" |
| 21 | +fi |
| 22 | + |
| 23 | +pushd "$ROOT_DIR" >/dev/null |
| 24 | + |
| 25 | +shopt -s nullglob |
| 26 | +PATCHES=("${PATCH_DIR}"/*.diff) |
| 27 | +shopt -u nullglob |
| 28 | + |
| 29 | +echo "[patches] Using ROOT_DIR=$ROOT_DIR" |
| 30 | +echo "[patches] Scanning ${PATCH_DIR} for .diff files" |
| 31 | +echo "[patches] Found ${#PATCHES[@]} .diff file(s) in ${PATCH_DIR}" |
| 32 | +for pp in "${PATCHES[@]}"; do echo " - $(basename "$pp")"; done |
| 33 | + |
| 34 | +for p in "${PATCHES[@]}"; do |
| 35 | + echo "[patches] Applying ${p}" |
| 36 | + # Normalize EOL to a temp patch file |
| 37 | + TMP_PATCH=$(mktemp /tmp/patch.XXXXXX.diff) |
| 38 | + tr -d '\r' < "$p" > "$TMP_PATCH" 2>/dev/null || cp "$p" "$TMP_PATCH" |
| 39 | + if git apply --check "$TMP_PATCH" 2>/dev/null; then |
| 40 | + git apply "$TMP_PATCH" || true |
| 41 | + continue |
| 42 | + fi |
| 43 | + echo "[patches] git apply check failed for $(basename "$p"); attempting fallback if known" |
| 44 | + case "$(basename "$p")" in |
| 45 | + 0001-cumem-alloc-env-fallback.diff) |
| 46 | + echo "[patches] Fallback: update cumem allocator env var preference" |
| 47 | + python - <<'PY' |
| 48 | +import io, os |
| 49 | +path = os.path.join('vllm','device_allocator','cumem.py') |
| 50 | +try: |
| 51 | + with io.open(path, 'r', encoding='utf-8', newline='') as f: |
| 52 | + src = f.read() |
| 53 | +except FileNotFoundError: |
| 54 | + raise SystemExit(0) |
| 55 | +if 'PYTORCH_ALLOC_CONF' in src: |
| 56 | + print('[patches] cumem already prefers PYTORCH_ALLOC_CONF; skipping') |
| 57 | + raise SystemExit(0) |
| 58 | +needle = 'conf = os.environ.get("PYTORCH_CUDA_ALLOC_CONF", "")' |
| 59 | +if needle in src: |
| 60 | + new = src.replace(needle, |
| 61 | + 'conf = os.environ.get("PYTORCH_ALLOC_CONF",\n' |
| 62 | + ' os.environ.get("PYTORCH_CUDA_ALLOC_CONF", ""))') |
| 63 | + with io.open(path, 'w', encoding='utf-8', newline='\n') as f: |
| 64 | + f.write(new) |
| 65 | + print('[patches] Applied cumem env var fallback edit') |
| 66 | +else: |
| 67 | + print('[patches] cumem pattern not found; skipping') |
| 68 | +PY |
| 69 | + ;; |
| 70 | + 0002-cub-reduce-to-sum-cuda13.diff) |
| 71 | + echo "[patches] Fallback will be handled by the post-pass rewrite" |
| 72 | + ;; |
| 73 | + *) |
| 74 | + echo "[patches] Unknown patch; skipping fallback" |
| 75 | + ;; |
| 76 | + esac |
| 77 | +done |
| 78 | + |
| 79 | +echo "[patches] Post-pass: normalize CUB reductions to device lambdas for CUDA 13" |
| 80 | +python - <<'PY' |
| 81 | +import io, os, re |
| 82 | +
|
| 83 | +files = [] |
| 84 | +for root, _, names in os.walk('csrc'): |
| 85 | + for n in names: |
| 86 | + if n.endswith(('.cu', '.cuh')): |
| 87 | + files.append(os.path.join(root, n)) |
| 88 | +
|
| 89 | +def lam_for(op: str) -> str: |
| 90 | + if op == 'Sum': |
| 91 | + return '[] __device__ (auto a, auto b) { return a + b; }' |
| 92 | + if op == 'Max': |
| 93 | + return '[] __device__ (auto a, auto b) { return a > b ? a : b; }' |
| 94 | + return '[] __device__ (auto a, auto b) { return a < b ? a : b; }' |
| 95 | +
|
| 96 | +# Patterns |
| 97 | +pat_method = re.compile(r'(BlockReduce\([^)]*\))\s*\.\s*(Sum|Max|Min)\(\s*([\s\S]*?)\s*\)', re.DOTALL) |
| 98 | +pat_functor = re.compile(r'(BlockReduce\([^)]*\))\s*\.\s*Reduce\(\s*([\s\S]*?)\s*,\s*cub::(Sum|Max|Min)\s*(?:\(\)|\{\})\s*([\s\S]*?)\)', re.DOTALL) |
| 99 | +
|
| 100 | +changed_any = False |
| 101 | +for path in files: |
| 102 | + try: |
| 103 | + with io.open(path, 'r', encoding='utf-8', newline='') as f: |
| 104 | + src = f.read() |
| 105 | + except FileNotFoundError: |
| 106 | + continue |
| 107 | +
|
| 108 | + new_src = src |
| 109 | +
|
| 110 | + # Replace method form first |
| 111 | + def repl_method(m): |
| 112 | + recv, op, expr = m.group(1), m.group(2), (m.group(3) or '').strip() |
| 113 | + return f"{recv}.Reduce({expr}, {lam_for(op)})" |
| 114 | + new_src = pat_method.sub(repl_method, new_src) |
| 115 | +
|
| 116 | + # Replace functor form |
| 117 | + def repl_functor(m): |
| 118 | + recv, expr, op, tail = m.group(1), (m.group(2) or '').strip(), m.group(3), (m.group(4) or '').rstrip() |
| 119 | + return f"{recv}.Reduce({expr}, {lam_for(op)}{tail})" |
| 120 | + new_src = pat_functor.sub(repl_functor, new_src) |
| 121 | +
|
| 122 | + if new_src != src: |
| 123 | + with io.open(path, 'w', encoding='utf-8', newline='\n') as f: |
| 124 | + f.write(new_src) |
| 125 | + print(f"[patches] Rewrote CUB reductions in {path}") |
| 126 | + changed_any = True |
| 127 | +
|
| 128 | +if not changed_any: |
| 129 | + print('[patches] Post-pass: no changes (already applied)') |
| 130 | +PY |
| 131 | + |
| 132 | +# Also relax cumem allocator assert to allow user opting into expandable segments |
| 133 | +python - <<'PY' |
| 134 | +import io, os, re |
| 135 | +path = os.path.join('vllm','device_allocator','cumem.py') |
| 136 | +try: |
| 137 | + with io.open(path, 'r', encoding='utf-8') as f: |
| 138 | + src = f.read() |
| 139 | +except FileNotFoundError: |
| 140 | + print('[patches] cumem.py not found; skipping assert relax') |
| 141 | +else: |
| 142 | + new_src = src |
| 143 | + # Remove the multi-line assert block guarding expandable_segments |
| 144 | + new_src = re.sub( |
| 145 | + r"assert\s+\"expandable_segments:True\"\s+not\s+in\s+conf,\s*\\\n\s*\(.*?\)\s*\n", |
| 146 | + "", |
| 147 | + new_src, |
| 148 | + flags=re.DOTALL, |
| 149 | + ) |
| 150 | + # If a single-line variant exists, remove it too |
| 151 | + new_src = re.sub( |
| 152 | + r"^\s*assert\s+\"expandable_segments:True\".*$\n", |
| 153 | + "", |
| 154 | + new_src, |
| 155 | + flags=re.MULTILINE, |
| 156 | + ) |
| 157 | + if new_src != src: |
| 158 | + with io.open(path, 'w', encoding='utf-8', newline='\n') as f: |
| 159 | + f.write(new_src) |
| 160 | + print('[patches] Relaxed expandable_segments assert in vllm/device_allocator/cumem.py') |
| 161 | + else: |
| 162 | + print('[patches] No expandable_segments assert to relax (already updated)') |
| 163 | +PY |
| 164 | + |
| 165 | +# Ensure FlashMLA target sees CUDA CCCL/targets include dirs for <cuda/std/...> |
| 166 | +python - <<'PY' |
| 167 | +import io, os, re |
| 168 | +path = os.path.join('cmake','external_projects','flashmla.cmake') |
| 169 | +try: |
| 170 | + with io.open(path, 'r', encoding='utf-8') as f: |
| 171 | + src = f.read() |
| 172 | +except FileNotFoundError: |
| 173 | + print('[patches] flashmla.cmake not found; skipping CCCL include fix') |
| 174 | +else: |
| 175 | + if 'target_include_directories(_flashmla_C PRIVATE ${CUDAToolkit_INCLUDE_DIRS})' in src: |
| 176 | + print('[patches] FlashMLA already includes CUDAToolkit include dirs') |
| 177 | + else: |
| 178 | + # Insert after the WITH_SOABI) line of define_gpu_extension_target block |
| 179 | + new_src = re.sub( |
| 180 | + r"(define_gpu_extension_target\([\s\S]*?_flashmla_C[\s\S]*?WITH_SOABI\))", |
| 181 | + r"\1\n if(CUDAToolkit_INCLUDE_DIRS)\n target_include_directories(_flashmla_C PRIVATE ${CUDAToolkit_INCLUDE_DIRS})\n endif()", |
| 182 | + src, |
| 183 | + count=1, |
| 184 | + ) |
| 185 | + if new_src != src: |
| 186 | + with io.open(path, 'w', encoding='utf-8', newline='\n') as f: |
| 187 | + f.write(new_src) |
| 188 | + print('[patches] Added CUDAToolkit include dirs to FlashMLA target') |
| 189 | + else: |
| 190 | + # Fallback: append near the end before endif() |
| 191 | + idx = src.rfind('add_custom_target(_flashmla_C)') |
| 192 | + if idx == -1: |
| 193 | + appended = src + "\nif(CUDAToolkit_INCLUDE_DIRS)\n target_include_directories(_flashmla_C PRIVATE ${CUDAToolkit_INCLUDE_DIRS})\nendif()\n" |
| 194 | + else: |
| 195 | + appended = src |
| 196 | + with io.open(path, 'w', encoding='utf-8', newline='\n') as f: |
| 197 | + f.write(appended) |
| 198 | + print('[patches] Appended CUDAToolkit include dirs to FlashMLA target (fallback)') |
| 199 | +PY |
| 200 | + |
| 201 | +# Strengthen FlashMLA include path injection: add explicit libcudacxx targets paths if missing |
| 202 | +python - <<'PY' |
| 203 | +import io, os, re |
| 204 | +MARK = '# _flashmla_C_LIBCUDACXX_INJECT' |
| 205 | +path = os.path.join('cmake','external_projects','flashmla.cmake') |
| 206 | +try: |
| 207 | + with io.open(path, 'r', encoding='utf-8') as f: |
| 208 | + src = f.read() |
| 209 | +except FileNotFoundError: |
| 210 | + print('[patches] flashmla.cmake not found; skipping libcudacxx inject') |
| 211 | +else: |
| 212 | + if MARK in src: |
| 213 | + print('[patches] FlashMLA libcudacxx include injection already present') |
| 214 | + else: |
| 215 | + inject_block = f""" |
| 216 | +{MARK} |
| 217 | +if(CUDAToolkit_ROOT) |
| 218 | + # Explicit libcudacxx include roots for <cuda/std/...> |
| 219 | + foreach(_cxx_inc |
| 220 | + "${{CUDAToolkit_ROOT}}/targets/x86_64-linux/include" |
| 221 | + "${{CUDAToolkit_ROOT}}/targets/x86_64-linux/include/cccl" |
| 222 | + "${{CUDAToolkit_ROOT}}/include/cccl" |
| 223 | + ) |
| 224 | + if(EXISTS "${_cxx_inc}") |
| 225 | + target_include_directories(_flashmla_C PRIVATE "${_cxx_inc}") |
| 226 | + endif() |
| 227 | + endforeach() |
| 228 | +endif() |
| 229 | +""".strip('\n') + '\n' |
| 230 | +
|
| 231 | + # Heuristic: place after first existing target_include_directories for _flashmla_C or after define block |
| 232 | + pattern = r'(target_include_directories\(_flashmla_C[^\n]*\n)' |
| 233 | + m = re.search(pattern, src) |
| 234 | + if m: |
| 235 | + pos = m.end(1) |
| 236 | + new_src = src[:pos] + inject_block + src[pos:] |
| 237 | + else: |
| 238 | + # Append near end |
| 239 | + new_src = src + '\n' + inject_block + '\n' |
| 240 | + with io.open(path, 'w', encoding='utf-8', newline='\n') as f: |
| 241 | + f.write(new_src) |
| 242 | + print('[patches] Injected explicit libcudacxx include directories into FlashMLA target') |
| 243 | +PY |
| 244 | + |
| 245 | +popd >/dev/null |
| 246 | + |
| 247 | +echo "[patches] Done." |
0 commit comments