5656 enable_test = False ),
5757]
5858
59- V1FlashAttentionImpNotSupported = [
60- "Alibaba-NLP/gte-Qwen2-1.5B-instruct" , "Alibaba-NLP/gte-modernbert-base"
61- ]
62-
6359
6460@pytest .mark .parametrize ("model_info" , MODELS )
65- def test_embed_models_mteb (hf_runner , vllm_runner , model_info : EmbedModelInfo ,
66- monkeypatch ) -> None :
67- if model_info .name in V1FlashAttentionImpNotSupported :
68- monkeypatch .setenv ("VLLM_USE_V1" , "0" )
69-
61+ def test_embed_models_mteb (hf_runner , vllm_runner ,
62+ model_info : EmbedModelInfo ) -> None :
7063 vllm_extra_kwargs : dict [str , Any ] = {}
7164 if model_info .architecture == "GteNewModel" :
7265 vllm_extra_kwargs ["hf_overrides" ] = {"architectures" : ["GteNewModel" ]}
@@ -77,11 +70,8 @@ def test_embed_models_mteb(hf_runner, vllm_runner, model_info: EmbedModelInfo,
7770
7871@pytest .mark .parametrize ("model_info" , MODELS )
7972def test_embed_models_correctness (hf_runner , vllm_runner ,
80- model_info : EmbedModelInfo , example_prompts ,
81- monkeypatch ) -> None :
82- if model_info .name in V1FlashAttentionImpNotSupported :
83- monkeypatch .setenv ("VLLM_USE_V1" , "0" )
84-
73+ model_info : EmbedModelInfo ,
74+ example_prompts ) -> None :
8575 vllm_extra_kwargs : dict [str , Any ] = {}
8676 if model_info .architecture == "GteNewModel" :
8777 vllm_extra_kwargs ["hf_overrides" ] = {"architectures" : ["GteNewModel" ]}
0 commit comments