Skip to content

Commit 860f3e0

Browse files
Address pre-commit hooks
Signed-off-by: Matthew Bonanni <[email protected]>
1 parent 8ae24a3 commit 860f3e0

File tree

3 files changed

+4
-6
lines changed

3 files changed

+4
-6
lines changed

vllm/attention/ops/flashmla.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -83,8 +83,8 @@ def flash_mla_with_kvcache(
8383
softmax_scale: float. The scaling of QK^T before applying softmax.
8484
Default to 1 / sqrt(head_dim).
8585
causal: bool. Whether to apply causal attention mask.
86-
descale_q: (batch_size), torch.float32. Descaling factors for Q, used for fp8 quantization.
87-
descale_k: (batch_size), torch.float32. Descaling factors for K, used for fp8 quantization.
86+
descale_q: (batch_size), torch.float32. Descaling factors for Q.
87+
descale_k: (batch_size), torch.float32. Descaling factors for K.
8888
8989
Return:
9090
out: (batch_size, seq_len_q, num_heads_q, head_dim_v).

vllm/v1/attention/backends/mla/cutlass_mla.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7,8 +7,7 @@
77
import torch
88

99
import vllm._custom_ops as ops
10-
from vllm.attention.backends.abstract import (AttentionLayer,
11-
AttentionType,
10+
from vllm.attention.backends.abstract import (AttentionLayer, AttentionType,
1211
is_quantized_kv_cache)
1312
from vllm.logger import init_logger
1413
from vllm.v1.attention.backends.mla.common import (MLACommonBackend,

vllm/v1/attention/backends/mla/triton_mla.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6,8 +6,7 @@
66
import torch
77

88
from vllm import envs
9-
from vllm.attention.backends.abstract import (AttentionLayer,
10-
AttentionType,
9+
from vllm.attention.backends.abstract import (AttentionLayer, AttentionType,
1110
is_quantized_kv_cache)
1211
from vllm.attention.ops.triton_decode_attention import decode_attention_fwd
1312
from vllm.attention.ops.triton_flash_attention import triton_attention

0 commit comments

Comments
 (0)