Skip to content

Commit 2dd72d2

Browse files
authored
update flashinfer to v0.2.9rc1 (#21485)
Signed-off-by: Weiliang Liu <[email protected]>
1 parent a6c7fb8 commit 2dd72d2

File tree

3 files changed

+6
-15
lines changed

3 files changed

+6
-15
lines changed

docker/Dockerfile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -386,7 +386,7 @@ RUN --mount=type=bind,from=build,src=/workspace/dist,target=/vllm-workspace/dist
386386

387387
# Install FlashInfer from source
388388
ARG FLASHINFER_GIT_REPO="https://github.com/flashinfer-ai/flashinfer.git"
389-
ARG FLASHINFER_GIT_REF="v0.2.8"
389+
ARG FLASHINFER_GIT_REF="v0.2.9rc1"
390390
RUN --mount=type=cache,target=/root/.cache/uv bash - <<'BASH'
391391
. /etc/environment
392392
git clone --depth 1 --recursive --shallow-submodules \

vllm/attention/backends/flashinfer.py

Lines changed: 3 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1169,16 +1169,12 @@ def forward(
11691169
query=decode_query,
11701170
kv_cache=kv_cache.permute(*stride_order),
11711171
workspace_buffer=workspace_buffer,
1172-
num_heads=num_heads,
1173-
num_kv_heads=num_kv_heads,
1174-
scale=softmax_scale,
11751172
block_tables=attn_metadata.block_tables,
11761173
seq_lens=decode_meta.seq_lens_tensor,
1177-
block_size=attn_metadata.page_size,
11781174
max_seq_len=attn_metadata.max_decode_seq_len,
1179-
kv_cache_dtype=kv_cache_dtype,
1180-
k_scale=layer._k_scale_float,
1181-
v_scale=layer._v_scale_float)
1175+
bmm1_scale=layer._k_scale_float * softmax_scale,
1176+
bmm2_scale=layer._v_scale_float,
1177+
)
11821178

11831179
if prefill_output is None and decode_output is not None:
11841180
# Decode only batch.

vllm/v1/attention/backends/flashinfer.py

Lines changed: 2 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -678,15 +678,10 @@ def forward(
678678
query=decode_query,
679679
kv_cache=kv_cache_permute,
680680
workspace_buffer=attn_metadata.workspace_buffer,
681-
num_heads=self.num_heads,
682-
num_kv_heads=self.num_kv_heads,
683-
scale=self.scale,
684681
block_tables=block_tables_decode,
685682
seq_lens=seq_lens_decode,
686-
block_size=attn_metadata.page_size,
687683
max_seq_len=attn_metadata.max_seq_len,
688-
kv_cache_dtype=self.kv_cache_dtype,
689-
k_scale=layer._k_scale_float,
690-
v_scale=layer._v_scale_float,
684+
bmm1_scale=layer._k_scale_float * self.scale,
685+
bmm2_scale=layer._v_scale_float,
691686
))
692687
return output_padded

0 commit comments

Comments
 (0)