Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 4 additions & 1 deletion tests/models/language/pooling/test_embedding.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
from vllm.config import PoolerConfig
from vllm.platforms import current_platform

from ...utils import check_embeddings_close
from ...utils import check_embeddings_close, check_transformers_version


@pytest.fixture(autouse=True)
Expand Down Expand Up @@ -56,6 +56,9 @@ def test_models(
model,
monkeypatch,
) -> None:
if model == "Alibaba-NLP/gte-Qwen2-1.5B-instruct":
check_transformers_version(model, max_transformers_version="4.53.2")

if model == "BAAI/bge-multilingual-gemma2" and current_platform.is_rocm():
# ROCm Triton FA does not currently support sliding window attention
# switch to use ROCm CK FA backend
Expand Down
9 changes: 9 additions & 0 deletions tests/models/language/pooling/test_gte.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@

import pytest

from ...utils import check_transformers_version
from .embed_utils import EmbedModelInfo, correctness_test_embed_models
from .mteb_utils import mteb_test_embed_models

Expand Down Expand Up @@ -60,6 +61,10 @@
@pytest.mark.parametrize("model_info", MODELS)
def test_embed_models_mteb(hf_runner, vllm_runner,
model_info: EmbedModelInfo) -> None:
if model_info.name == "Alibaba-NLP/gte-Qwen2-1.5B-instruct":
check_transformers_version(model_info.name,
max_transformers_version="4.53.2")

vllm_extra_kwargs: dict[str, Any] = {}
if model_info.architecture == "GteNewModel":
vllm_extra_kwargs["hf_overrides"] = {"architectures": ["GteNewModel"]}
Expand All @@ -72,6 +77,10 @@ def test_embed_models_mteb(hf_runner, vllm_runner,
def test_embed_models_correctness(hf_runner, vllm_runner,
model_info: EmbedModelInfo,
example_prompts) -> None:
if model_info.name == "Alibaba-NLP/gte-Qwen2-1.5B-instruct":
check_transformers_version(model_info.name,
max_transformers_version="4.53.2")

vllm_extra_kwargs: dict[str, Any] = {}
if model_info.architecture == "GteNewModel":
vllm_extra_kwargs["hf_overrides"] = {"architectures": ["GteNewModel"]}
Expand Down
4 changes: 4 additions & 0 deletions tests/models/language/pooling/test_reward.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
from vllm.platforms import current_platform

from ....conftest import HfRunner
from ...utils import check_transformers_version


@pytest.fixture(autouse=True)
Expand Down Expand Up @@ -86,6 +87,9 @@ def test_prm_models(
dtype: str,
monkeypatch,
) -> None:
check_transformers_version("Qwen/Qwen2.5-Math-PRM-7B",
max_transformers_version="4.53.2")

if current_platform.is_cpu() and os.environ.get("VLLM_USE_V1", "0") == "0":
pytest.skip("CPU only supports V1")

Expand Down
11 changes: 11 additions & 0 deletions tests/models/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -412,3 +412,14 @@ def dummy_hf_overrides(
})

return hf_config


def check_transformers_version(model: str,
min_transformers_version: Optional[str] = None,
max_transformers_version: Optional[str] = None):
from .registry import _HfExamplesInfo

return _HfExamplesInfo(model,
min_transformers_version=min_transformers_version,
max_transformers_version=max_transformers_version
).check_transformers_version(on_fail="skip")