Address pre-commit hooks

MatthewBonanni · MatthewBonanni · commit 860f3e03f200 · 2025-08-11T18:57:11.000Z
Signed-off-by: Matthew Bonanni &lt;mbonanni001@gmail.com&gt;
diff --git a/vllm/attention/ops/flashmla.py b/vllm/attention/ops/flashmla.py
@@ -83,8 +83,8 @@ def flash_mla_with_kvcache(
         softmax_scale: float. The scaling of QK^T before applying softmax. 
                        Default to 1 / sqrt(head_dim).
         causal: bool. Whether to apply causal attention mask.
-        descale_q: (batch_size), torch.float32. Descaling factors for Q, used for fp8 quantization.
-        descale_k: (batch_size), torch.float32. Descaling factors for K, used for fp8 quantization.
+        descale_q: (batch_size), torch.float32. Descaling factors for Q.
+        descale_k: (batch_size), torch.float32. Descaling factors for K.
 
     Return:
         out: (batch_size, seq_len_q, num_heads_q, head_dim_v).
diff --git a/vllm/v1/attention/backends/mla/cutlass_mla.py b/vllm/v1/attention/backends/mla/cutlass_mla.py
@@ -7,8 +7,7 @@
 import torch
 
 import vllm._custom_ops as ops
-from vllm.attention.backends.abstract import (AttentionLayer,
-                                              AttentionType,
+from vllm.attention.backends.abstract import (AttentionLayer, AttentionType,
                                               is_quantized_kv_cache)
 from vllm.logger import init_logger
 from vllm.v1.attention.backends.mla.common import (MLACommonBackend,
diff --git a/vllm/v1/attention/backends/mla/triton_mla.py b/vllm/v1/attention/backends/mla/triton_mla.py
@@ -6,8 +6,7 @@
 import torch
 
 from vllm import envs
-from vllm.attention.backends.abstract import (AttentionLayer,
-                                              AttentionType,
+from vllm.attention.backends.abstract import (AttentionLayer, AttentionType,
                                               is_quantized_kv_cache)
 from vllm.attention.ops.triton_decode_attention import decode_attention_fwd
 from vllm.attention.ops.triton_flash_attention import triton_attention