[Model] Siglip2 Model Support (#27566)

piood · web-flow · commit 4f882be4a0d3 · 2025-10-27T06:57:37.000-07:00
Signed-off-by: piood &lt;2477084691@qq.com&gt;
diff --git a/docs/models/supported_models.md b/docs/models/supported_models.md
@@ -775,7 +775,7 @@ The following table lists those that are tested in vLLM.
 | `CLIPModel` | CLIP | T / I | `openai/clip-vit-base-patch32`, `openai/clip-vit-large-patch14`, etc. | | |
 | `LlavaNextForConditionalGeneration`<sup>C</sup> | LLaVA-NeXT-based | T / I | `royokong/e5-v` | | ✅︎ |
 | `Phi3VForCausalLM`<sup>C</sup> | Phi-3-Vision-based | T + I | `TIGER-Lab/VLM2Vec-Full` | | ✅︎ |
-| `SiglipModel` | SigLIP | T / I | `google/siglip-base-patch16-224` | | |
+| `SiglipModel` | SigLIP, SigLIP2 | T / I | `google/siglip-base-patch16-224`, `google/siglip2-base-patch16-224` | | |
 | `*ForConditionalGeneration`<sup>C</sup>, `*ForCausalLM`<sup>C</sup>, etc. | Generative models | \* | N/A | \* | \* |
 
 <sup>C</sup> Automatically converted into an embedding model via `--convert embed`. ([details](./pooling_models.md#model-conversion))  
diff --git a/tests/models/multimodal/pooling/test_siglip.py b/tests/models/multimodal/pooling/test_siglip.py
@@ -19,7 +19,7 @@
     }
 )
 
-MODELS = ["google/siglip-base-patch16-224"]
+MODELS = ["google/siglip-base-patch16-224", "google/siglip2-base-patch16-224"]
 
 
 def _run_test(
diff --git a/vllm/model_executor/models/siglip.py b/vllm/model_executor/models/siglip.py
@@ -174,9 +174,11 @@ class SiglipMultiModalProcessor(BaseMultiModalProcessor[SiglipProcessingInfo]):
     @cached_property
     def image_token_id(self) -> int:
         tokenizer = self.info.get_tokenizer()
-        dummy_token_id = 0
-
-        assert dummy_token_id not in tokenizer.all_special_ids
+        dummy_token_id = next(
+            token_id
+            for token_id in range(tokenizer.vocab_size)
+            if token_id not in tokenizer.all_special_ids
+        )
 
         return dummy_token_id
 
diff --git a/vllm/transformers_utils/config.py b/vllm/transformers_utils/config.py
@@ -26,7 +26,10 @@
 )
 from transformers import GenerationConfig, PretrainedConfig
 from transformers.models.auto.image_processing_auto import get_image_processor_config
-from transformers.models.auto.modeling_auto import MODEL_FOR_CAUSAL_LM_MAPPING_NAMES
+from transformers.models.auto.modeling_auto import (
+    MODEL_FOR_CAUSAL_LM_MAPPING_NAMES,
+    MODEL_MAPPING_NAMES,
+)
 from transformers.models.auto.tokenization_auto import get_tokenizer_config
 from transformers.utils import CONFIG_NAME as HF_CONFIG_NAME
 
@@ -616,6 +619,13 @@ def get_config(
         model_type = MODEL_FOR_CAUSAL_LM_MAPPING_NAMES[config.model_type]
         config.update({"architectures": [model_type]})
 
+    # Architecture mapping for models without explicit architectures field
+    if not config.architectures:
+        if config.model_type not in MODEL_MAPPING_NAMES:
+            raise ValueError(f"Cannot find architecture name for {config.model_type}")
+        model_type = MODEL_MAPPING_NAMES[config.model_type]
+        config.update({"architectures": [model_type]})
+
     # ModelOpt 0.31.0 and after saves the quantization config in the model
     # config file.
     quantization_config = config_dict.get("quantization_config", None)

Original file line number	Diff line number	Diff line change
`@@ -19,7 +19,7 @@`
`19`	`19`	`}`
`20`	`20`	`)`
`21`	`21`
`22`		`-MODELS = ["google/siglip-base-patch16-224"]`
	`22`	`+MODELS = ["google/siglip-base-patch16-224", "google/siglip2-base-patch16-224"]`
`23`	`23`
`24`	`24`
`25`	`25`	`def _run_test(`