Skip to content

Commit 5fe781a

Browse files
committed
Fix attention ops interface error
Signed-off-by: Xiao Yu <[email protected]>
1 parent 839d822 commit 5fe781a

File tree

1 file changed

+2
-1
lines changed

1 file changed

+2
-1
lines changed

vllm/_custom_ops.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -117,8 +117,9 @@ def paged_attention_rocm(
117117
k_scale: torch.Tensor,
118118
v_scale: torch.Tensor,
119119
fp8_out_scale: Optional[torch.Tensor] = None,
120-
mfma_type: str = envs.is_set("VLLM_USE_FP8_MFMA") ? "fp8" : "f16",
120+
mfma_type: str = "fp8" if envs.is_set("VLLM_USE_FP8_MFMA") else "f16",
121121
) -> None:
122+
print(f"\nmfma_type: {mfma_type}")
122123
torch.ops._rocm_C.paged_attention(out, exp_sum, max_logits, tmp_out, query,
123124
key_cache, value_cache, num_kv_heads,
124125
scale, block_tables, seq_lens,

0 commit comments

Comments
 (0)