Skip to content
Merged
3 changes: 2 additions & 1 deletion tests/kernels/attention/test_deepgemm_attention.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
import torch

from vllm.platforms import current_platform
from vllm.utils import cdiv, has_deep_gemm
from vllm.utils import cdiv
from vllm.utils.deep_gemm import (
_ceil_to_ue8m0,
calc_diff,
Expand All @@ -15,6 +15,7 @@
get_num_sms,
get_paged_mqa_logits_metadata,
)
from vllm.utils.import_utils import has_deep_gemm


def kv_cache_cast_to_fp8(x: torch.Tensor) -> torch.Tensor:
Expand Down
2 changes: 1 addition & 1 deletion tests/kernels/moe/modular_kernel_tools/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@
FusedMoEQuantConfig,
)
from vllm.model_executor.layers.fused_moe.fused_moe import fused_topk
from vllm.utils import has_deep_ep, has_deep_gemm, has_pplx
from vllm.utils.import_utils import has_deep_ep, has_deep_gemm, has_pplx

from .mk_objects import (
TestMoEQuantConfig,
Expand Down
2 changes: 1 addition & 1 deletion tests/kernels/moe/modular_kernel_tools/mk_objects.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,9 +35,9 @@
cutlass_fp8_supported,
)
from vllm.platforms import current_platform
from vllm.utils import has_deep_ep, has_deep_gemm, has_pplx
from vllm.utils.deep_gemm import is_deep_gemm_supported
from vllm.utils.flashinfer import has_flashinfer_cutlass_fused_moe
from vllm.utils.import_utils import has_deep_ep, has_deep_gemm, has_pplx


@dataclass
Expand Down
2 changes: 1 addition & 1 deletion tests/kernels/moe/parallel_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
from torch.multiprocessing import spawn # pyright: ignore[reportPrivateImportUsage]
from typing_extensions import ParamSpec

from vllm.utils import has_deep_ep
from vllm.utils.import_utils import has_deep_ep
from vllm.utils.network_utils import get_open_port

if has_deep_ep():
Expand Down
2 changes: 1 addition & 1 deletion tests/kernels/moe/test_block_fp8.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,11 +21,11 @@
modular_triton_fused_moe,
)
from vllm.platforms import current_platform
from vllm.utils import has_deep_gemm
from vllm.utils.deep_gemm import (
get_mk_alignment_for_contiguous_layout,
is_deep_gemm_e8m0_used,
)
from vllm.utils.import_utils import has_deep_gemm

dg_available = has_deep_gemm()

Expand Down
2 changes: 1 addition & 1 deletion tests/kernels/moe/test_deepep_deepgemm_moe.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,8 +21,8 @@
from vllm.model_executor.layers.fused_moe.fused_moe import fused_experts
from vllm.model_executor.layers.fused_moe.modular_kernel import FusedMoEModularKernel
from vllm.platforms import current_platform
from vllm.utils import has_deep_ep, has_deep_gemm
from vllm.utils.deep_gemm import is_deep_gemm_e8m0_used, is_deep_gemm_supported
from vllm.utils.import_utils import has_deep_ep, has_deep_gemm

from ...utils import multi_gpu_test
from .parallel_utils import ProcessGroupInfo, parallel_launch
Expand Down
2 changes: 1 addition & 1 deletion tests/kernels/moe/test_deepep_moe.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@
per_token_group_quant_fp8,
)
from vllm.platforms import current_platform
from vllm.utils import has_deep_ep
from vllm.utils.import_utils import has_deep_ep

from ...utils import multi_gpu_test
from .parallel_utils import ProcessGroupInfo, parallel_launch
Expand Down
2 changes: 1 addition & 1 deletion tests/kernels/moe/test_gpt_oss_triton_kernels.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
import torch
import torch.nn.functional as F

from vllm.utils import has_triton_kernels
from vllm.utils.import_utils import has_triton_kernels

if not has_triton_kernels():
pytest.skip(
Expand Down
2 changes: 1 addition & 1 deletion tests/kernels/moe/test_modular_kernel_combinations.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,8 @@
import vllm.model_executor.layers.fused_moe.modular_kernel as mk
from vllm.config import VllmConfig, set_current_vllm_config
from vllm.platforms import current_platform
from vllm.utils import has_deep_ep, has_deep_gemm, has_pplx
from vllm.utils.flashinfer import has_flashinfer_cutlass_fused_moe
from vllm.utils.import_utils import has_deep_ep, has_deep_gemm, has_pplx
from vllm.utils.torch_utils import cuda_device_count_stateless

from .modular_kernel_tools.common import (
Expand Down
2 changes: 1 addition & 1 deletion tests/kernels/quantization/test_block_fp8.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,12 +18,12 @@
w8a8_triton_block_scaled_mm,
)
from vllm.platforms import current_platform
from vllm.utils import has_deep_gemm
from vllm.utils.deep_gemm import (
fp8_gemm_nt,
get_col_major_tma_aligned_tensor,
per_block_cast_to_fp8,
)
from vllm.utils.import_utils import has_deep_gemm

if current_platform.get_device_capability() < (9, 0):
pytest.skip("FP8 Triton requires CUDA 9.0 or higher", allow_module_level=True)
Expand Down
1 change: 0 additions & 1 deletion tests/models/quantization/test_fp8.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,6 @@
from vllm.attention.utils.fa_utils import flash_attn_supports_fp8
from vllm.platforms import current_platform
from vllm.utils import STR_BACKEND_ENV_VAR

from ..utils import check_logprobs_close


Expand Down
2 changes: 1 addition & 1 deletion vllm/distributed/device_communicators/all2all.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,8 @@
from vllm.distributed import get_dp_group, get_ep_group
from vllm.forward_context import get_forward_context
from vllm.logger import init_logger
from vllm.utils import has_deep_ep, has_pplx
from vllm.utils.flashinfer import has_flashinfer_all2all
from vllm.utils.import_utils import has_deep_ep, has_pplx

from .base_device_communicator import All2AllManagerBase, Cache

Expand Down
3 changes: 2 additions & 1 deletion vllm/model_executor/layers/fused_moe/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,8 +14,9 @@
OCP_MX_Scheme,
)
from vllm.model_executor.layers.quantization.utils.quant_utils import GroupShape
from vllm.utils import cdiv, has_triton_kernels
from vllm.utils import cdiv
from vllm.utils.flashinfer import has_flashinfer_cutlass_fused_moe
from vllm.utils.import_utils import has_triton_kernels

logger = init_logger(__name__)

Expand Down
2 changes: 1 addition & 1 deletion vllm/model_executor/layers/fused_moe/deep_gemm_moe.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,12 +26,12 @@
from vllm.model_executor.layers.quantization.utils.fp8_utils import (
per_token_group_quant_fp8,
)
from vllm.utils import has_deep_gemm
from vllm.utils.deep_gemm import (
get_mk_alignment_for_contiguous_layout,
m_grouped_fp8_gemm_nt_contiguous,
)
from vllm.utils.func_utils import run_once
from vllm.utils.import_utils import has_deep_gemm

logger = init_logger(__name__)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
TopKWeightAndReduceNoOP,
)
from vllm.triton_utils import tl, triton
from vllm.utils import has_triton_kernels
from vllm.utils.import_utils import has_triton_kernels

logger = init_logger(__name__)

Expand Down
3 changes: 2 additions & 1 deletion vllm/model_executor/layers/fused_moe/layer.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,8 +55,9 @@
from vllm.model_executor.utils import set_weight_attrs
from vllm.platforms import current_platform
from vllm.platforms.interface import CpuArchEnum
from vllm.utils import cdiv, has_deep_ep, has_pplx, round_up
from vllm.utils import cdiv, round_up
from vllm.utils.flashinfer import has_flashinfer_cutlass_fused_moe
from vllm.utils.import_utils import has_deep_ep, has_pplx
from vllm.utils.torch_utils import current_stream, direct_register_custom_op
from vllm.v1.worker.ubatching import dbo_current_ubatch_id

Expand Down
2 changes: 1 addition & 1 deletion vllm/model_executor/layers/quantization/fp8.py
Original file line number Diff line number Diff line change
Expand Up @@ -93,7 +93,6 @@
from vllm.model_executor.utils import set_weight_attrs
from vllm.platforms import current_platform
from vllm.scalar_type import scalar_types
from vllm.utils import has_deep_gemm
from vllm.utils.deep_gemm import (
fp8_gemm_nt,
get_col_major_tma_aligned_tensor,
Expand All @@ -102,6 +101,7 @@
should_use_deepgemm_for_fp8_linear,
)
from vllm.utils.flashinfer import has_flashinfer_moe
from vllm.utils.import_utils import has_deep_gemm

if TYPE_CHECKING:
from vllm.model_executor.models.utils import WeightsMapper
Expand Down
6 changes: 2 additions & 4 deletions vllm/model_executor/layers/quantization/mxfp4.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,11 +48,9 @@
from vllm.model_executor.utils import set_weight_attrs
from vllm.platforms import current_platform
from vllm.scalar_type import scalar_types
from vllm.utils import (
has_triton_kernels,
round_up,
)
from vllm.utils import round_up
from vllm.utils.flashinfer import has_flashinfer
from vllm.utils.import_utils import has_triton_kernels
from vllm.utils.torch_utils import is_torch_equal_or_newer

logger = init_logger(__name__)
Expand Down
41 changes: 0 additions & 41 deletions vllm/utils/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@
import datetime
import enum
import getpass
import importlib
import inspect
import json
import multiprocessing
Expand Down Expand Up @@ -1062,46 +1061,6 @@ def check_use_alibi(model_config: ModelConfig) -> bool:
)


@cache
def _has_module(module_name: str) -> bool:
"""Return True if *module_name* can be found in the current environment.

The result is cached so that subsequent queries for the same module incur
no additional overhead.
"""
return importlib.util.find_spec(module_name) is not None


def has_pplx() -> bool:
"""Whether the optional `pplx_kernels` package is available."""

return _has_module("pplx_kernels")


def has_deep_ep() -> bool:
"""Whether the optional `deep_ep` package is available."""

return _has_module("deep_ep")


def has_deep_gemm() -> bool:
"""Whether the optional `deep_gemm` package is available."""

return _has_module("deep_gemm")


def has_triton_kernels() -> bool:
"""Whether the optional `triton_kernels` package is available."""

return _has_module("triton_kernels")


def has_tilelang() -> bool:
"""Whether the optional `tilelang` package is available."""

return _has_module("tilelang")


def set_process_title(
name: str, suffix: str = "", prefix: str = envs.VLLM_PROCESS_NAME_PREFIX
) -> None:
Expand Down
3 changes: 2 additions & 1 deletion vllm/utils/deep_gemm.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,8 @@
import vllm.envs as envs
from vllm.logger import logger
from vllm.platforms import current_platform
from vllm.utils import cdiv, has_deep_gemm
from vllm.utils import cdiv
from vllm.utils.import_utils import has_deep_gemm


@functools.cache
Expand Down
36 changes: 36 additions & 0 deletions vllm/utils/import_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -324,3 +324,39 @@ def __dir__(self) -> list[str]:
if self._module is None:
self._module = self._load()
return dir(self._module)


# Optional dependency detection utilities
@cache
def _has_module(module_name: str) -> bool:
"""Return True if *module_name* can be found in the current environment.
The result is cached so that subsequent queries for the same module incur
no additional overhead.
"""
return importlib.util.find_spec(module_name) is not None


def has_pplx() -> bool:
"""Whether the optional `pplx_kernels` package is available."""
return _has_module("pplx_kernels")


def has_deep_ep() -> bool:
"""Whether the optional `deep_ep` package is available."""
return _has_module("deep_ep")


def has_deep_gemm() -> bool:
"""Whether the optional `deep_gemm` package is available."""
return _has_module("deep_gemm")


def has_triton_kernels() -> bool:
"""Whether the optional `triton_kernels` package is available."""
return _has_module("triton_kernels")


def has_tilelang() -> bool:
"""Whether the optional `tilelang` package is available."""
return _has_module("tilelang")
2 changes: 1 addition & 1 deletion vllm/v1/worker/gpu_ubatch_wrapper.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@
from vllm.logger import init_logger
from vllm.platforms import current_platform
from vllm.sequence import IntermediateTensors
from vllm.utils import has_deep_gemm
from vllm.utils.import_utils import has_deep_gemm
from vllm.v1.worker.ubatching import UBatchContext, make_ubatch_contexts

logger = init_logger(__name__)
Expand Down