Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 0 additions & 8 deletions tests/models/language/pooling/test_classification.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,14 +6,6 @@

from vllm.platforms import current_platform

# TODO: enable when float32 is supported by V1
# @pytest.fixture(autouse=True)
# def v1(run_with_both_engines):
# # Simple autouse wrapper to run both engines for each test
# # This can be promoted up to conftest.py to run for every
# # test in a package
# pass


@pytest.mark.parametrize(
"model",
Expand Down
10 changes: 0 additions & 10 deletions tests/models/language/pooling/test_gte.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,17 +56,10 @@
enable_test=False),
]

V1FlashAttentionImpNotSupported = [
"Alibaba-NLP/gte-Qwen2-1.5B-instruct", "Alibaba-NLP/gte-modernbert-base"
]


@pytest.mark.parametrize("model_info", MODELS)
def test_embed_models_mteb(hf_runner, vllm_runner, model_info: EmbedModelInfo,
monkeypatch) -> None:
if model_info.name in V1FlashAttentionImpNotSupported:
monkeypatch.setenv("VLLM_USE_V1", "0")

vllm_extra_kwargs: dict[str, Any] = {}
if model_info.architecture == "GteNewModel":
vllm_extra_kwargs["hf_overrides"] = {"architectures": ["GteNewModel"]}
Expand All @@ -79,9 +72,6 @@ def test_embed_models_mteb(hf_runner, vllm_runner, model_info: EmbedModelInfo,
def test_embed_models_correctness(hf_runner, vllm_runner,
model_info: EmbedModelInfo, example_prompts,
monkeypatch) -> None:
if model_info.name in V1FlashAttentionImpNotSupported:
monkeypatch.setenv("VLLM_USE_V1", "0")

vllm_extra_kwargs: dict[str, Any] = {}
if model_info.architecture == "GteNewModel":
vllm_extra_kwargs["hf_overrides"] = {"architectures": ["GteNewModel"]}
Expand Down
13 changes: 0 additions & 13 deletions tests/models/language/pooling/test_jina.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@

import pytest

import vllm.envs as envs
from vllm import PoolingParams

from ...utils import EmbedModelInfo, RerankModelInfo
Expand All @@ -24,14 +23,6 @@
]


@pytest.fixture(autouse=True)
def v1(run_with_both_engines):
# Simple autouse wrapper to run both engines for each test
# This can be promoted up to conftest.py to run for every
# test in a package
pass


@pytest.mark.parametrize("model_info", EMBEDDING_MODELS)
def test_embed_models_mteb(hf_runner, vllm_runner,
model_info: EmbedModelInfo) -> None:
Expand Down Expand Up @@ -63,10 +54,6 @@ def hf_model_callback(model):
@pytest.mark.parametrize("model_info", RERANK_MODELS)
def test_rerank_models_mteb(hf_runner, vllm_runner,
model_info: RerankModelInfo) -> None:
if (model_info.architecture == "XLMRobertaForSequenceClassification"
and envs.VLLM_USE_V1):
pytest.skip("Not supported yet")

mteb_test_rerank_models(hf_runner, vllm_runner, model_info)


Expand Down
5 changes: 5 additions & 0 deletions vllm/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -4885,6 +4885,11 @@
if self.model_config is None:
return

# Avoid running try_verify_and_update_config multiple times
if getattr(self.model_config, "config_updated", False):
return
self.model_config.config_updated = True

Check failure on line 4891 in vllm/config.py

View workflow job for this annotation

GitHub Actions / pre-commit

"ModelConfig" has no attribute "config_updated" [attr-defined]

Check failure on line 4891 in vllm/config.py

View workflow job for this annotation

GitHub Actions / pre-commit

"ModelConfig" has no attribute "config_updated" [attr-defined]

Check failure on line 4891 in vllm/config.py

View workflow job for this annotation

GitHub Actions / pre-commit

"ModelConfig" has no attribute "config_updated" [attr-defined]

Check failure on line 4891 in vllm/config.py

View workflow job for this annotation

GitHub Actions / pre-commit

"ModelConfig" has no attribute "config_updated" [attr-defined]

Check failure on line 4891 in vllm/config.py

View workflow job for this annotation

GitHub Actions / pre-commit

"ModelConfig" has no attribute "config_updated" [attr-defined]

Check failure on line 4891 in vllm/config.py

View workflow job for this annotation

GitHub Actions / pre-commit

"ModelConfig" has no attribute "config_updated" [attr-defined]

Check failure on line 4891 in vllm/config.py

View workflow job for this annotation

GitHub Actions / pre-commit

"ModelConfig" has no attribute "config_updated" [attr-defined]

Check failure on line 4891 in vllm/config.py

View workflow job for this annotation

GitHub Actions / pre-commit

"ModelConfig" has no attribute "config_updated" [attr-defined]

Check failure on line 4891 in vllm/config.py

View workflow job for this annotation

GitHub Actions / pre-commit

"ModelConfig" has no attribute "config_updated" [attr-defined]

Check failure on line 4891 in vllm/config.py

View workflow job for this annotation

GitHub Actions / pre-commit

"ModelConfig" has no attribute "config_updated" [attr-defined]

architecture = self.model_config.architecture
if architecture is None:
return
Expand Down
6 changes: 2 additions & 4 deletions vllm/model_executor/models/bert_with_rope.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,6 @@
from transformers import PretrainedConfig

from vllm.attention import Attention, AttentionType
from vllm.compilation.decorators import support_torch_compile
from vllm.config import CacheConfig, VllmConfig
from vllm.distributed import get_tensor_model_parallel_world_size
from vllm.model_executor.layers.activation import (get_act_and_mul_fn,
Expand Down Expand Up @@ -364,7 +363,6 @@ def forward(self, positions: torch.Tensor, hidden_states: torch.Tensor):
return hidden_states


@support_torch_compile
class BertWithRopeEncoder(nn.Module):

def __init__(self,
Expand Down Expand Up @@ -398,7 +396,7 @@ def forward(
return hidden_states


class BertWithRope(nn.Module, SupportsV0Only, SupportsQuant):
class BertWithRope(nn.Module, SupportsQuant):
hf_to_vllm_mapper = WeightsMapper(orig_to_new_prefix={"model.": ""})

def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""):
Expand Down Expand Up @@ -468,7 +466,7 @@ def load_weights(self, weights: Iterable[tuple[str,
return loaded_params


class NomicBertModel(BertWithRope):
class NomicBertModel(BertWithRope, SupportsV0Only):
# for https://huggingface.co/nomic-ai/nomic-bert-2048

hf_to_vllm_mapper = WeightsMapper(
Expand Down
2 changes: 0 additions & 2 deletions vllm/model_executor/models/modernbert.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,6 @@
from transformers import ModernBertConfig

from vllm.attention import Attention, AttentionType
from vllm.compilation.decorators import support_torch_compile
from vllm.config import VllmConfig
from vllm.distributed import get_tensor_model_parallel_world_size
from vllm.model_executor.layers.linear import (QKVParallelLinear,
Expand Down Expand Up @@ -200,7 +199,6 @@ def forward(
return hidden_states


@support_torch_compile
class ModernBertModel(nn.Module):
hf_to_vllm_mapper = WeightsMapper(
orig_to_new_prefix={"layers.": "encoder_layer.layers."})
Expand Down