From 1aadbf37d48ad548ecc192583987d594d796477f Mon Sep 17 00:00:00 2001 From: Yeonsil Yoon Date: Tue, 30 Sep 2025 11:52:09 -0700 Subject: [PATCH 01/12] Ovis 2 5 (#1993) ## Essential Elements of an Effective PR Description Checklist - [ ] The purpose of the PR, such as "Fix some issue (link existing issues this PR will resolve)". - [ ] The test plan, such as providing test command. - [ ] The test results, such as pasting the results comparison before and after, or e2e results ## Purpose ## Test Plan ## Test Result --------- Co-authored-by: Christopher Manteuffel --- .../lm-eval-harness/configs/models-ovis.txt | 1 + .../lm-eval-harness/configs/ovis2_5-9b.yaml | 12 + .jenkins/test_config.yaml | 107 +-- .jenkins/vision/configs/models-ovis.txt | 1 + .jenkins/vision/configs/ovis2_5-9b.yaml | 7 + .jenkins/vision/test_enc_dec_model.py | 2 + .../vision_language_multi_image.py | 31 + .../generation/vlm_utils/model_utils.py | 57 ++ vllm/entrypoints/chat_utils.py | 3 +- vllm/model_executor/models/ovis2_5.py | 566 ++++++++++++++++ vllm/model_executor/models/registry.py | 1 + vllm/model_executor/models/siglip2navit.py | 626 ++++++++++++++++++ vllm/transformers_utils/config.py | 16 + .../transformers_utils/processors/__init__.py | 3 +- vllm/transformers_utils/processors/ovis2_5.py | 458 +++++++++++++ 15 files changed, 1841 insertions(+), 50 deletions(-) create mode 100644 .jenkins/lm-eval-harness/configs/models-ovis.txt create mode 100644 .jenkins/lm-eval-harness/configs/ovis2_5-9b.yaml create mode 100644 .jenkins/vision/configs/models-ovis.txt create mode 100644 .jenkins/vision/configs/ovis2_5-9b.yaml create mode 100644 vllm/model_executor/models/ovis2_5.py create mode 100644 vllm/model_executor/models/siglip2navit.py create mode 100644 vllm/transformers_utils/processors/ovis2_5.py diff --git a/.jenkins/lm-eval-harness/configs/models-ovis.txt b/.jenkins/lm-eval-harness/configs/models-ovis.txt new file mode 100644 index 000000000000..c20ecf534544 --- /dev/null +++ b/.jenkins/lm-eval-harness/configs/models-ovis.txt @@ -0,0 +1 @@ +ovis2_5-9b.yaml \ No newline at end of file diff --git a/.jenkins/lm-eval-harness/configs/ovis2_5-9b.yaml b/.jenkins/lm-eval-harness/configs/ovis2_5-9b.yaml new file mode 100644 index 000000000000..19546a4bd3e1 --- /dev/null +++ b/.jenkins/lm-eval-harness/configs/ovis2_5-9b.yaml @@ -0,0 +1,12 @@ +model_name: "/mnt/weka/data/llm/aidc-ai/ovis2.5-9b" +tasks: +- name: "gsm8k" + metrics: + - name: "exact_match,strict-match" + value: 0.700 + - name: "exact_match,flexible-extract" + value: 0.700 +limit: 256 +num_fewshot: 8 +dtype: "bfloat16" +trust_remote_code: True diff --git a/.jenkins/test_config.yaml b/.jenkins/test_config.yaml index 0ac488e1b150..d2c55a038363 100644 --- a/.jenkins/test_config.yaml +++ b/.jenkins/test_config.yaml @@ -5,54 +5,59 @@ stages: - name: v0_gsm8k_small_g3_tp1_part1 flavor: g3 command: >- - export PT_HPU_LAZY_MODE=1 && + export PT_HPU_LAZY_MODE=1 && cd .jenkins/lm-eval-harness && bash run-tests.sh -c configs/models-small.txt -t 1 - name: v0_gsm8k_small_g3_tp1_part2 flavor: g3 command: >- - export PT_HPU_LAZY_MODE=1 && + export PT_HPU_LAZY_MODE=1 && cd .jenkins/lm-eval-harness && bash run-tests.sh -c configs/models-small-2.txt -t 1 - name: v0_gsm8k_small_g3_tp1_part3 flavor: g3 command: >- - export PT_HPU_LAZY_MODE=1 && + export PT_HPU_LAZY_MODE=1 && cd .jenkins/lm-eval-harness && bash run-tests.sh -c configs/models-small-3.txt -t 1 - name: v0_gsm8k_small_g3_tp2 flavor: g3.s command: >- - export PT_HPU_LAZY_MODE=1 && + export PT_HPU_LAZY_MODE=1 && cd .jenkins/lm-eval-harness && bash run-tests.sh -c configs/models-small.txt -t 2 - name: v0_gsm8k_small_g2_tp1 flavor: g2 command: >- - export PT_HPU_LAZY_MODE=1 && + export PT_HPU_LAZY_MODE=1 && cd .jenkins/lm-eval-harness && bash run-tests.sh -c configs/models-small.txt -t 1 - name: v0_gsm8k_small_g2_tp2 flavor: g2.s command: >- - export PT_HPU_LAZY_MODE=1 && + export PT_HPU_LAZY_MODE=1 && cd .jenkins/lm-eval-harness && bash run-tests.sh -c configs/models-small.txt -t 2 - name: v0_gsm8k_g2_deepseek-v2-lite_tp1 flavor: g3 command: >- - export PT_HPU_LAZY_MODE=1 && + export PT_HPU_LAZY_MODE=1 && cd .jenkins/lm-eval-harness && bash run-tests.sh -c configs/models-deepseek.txt -t 1 - name: v0_gsm8k_g3_gemma3_tp1 flavor: g3.s command: >- export PT_HPU_LAZY_MODE=1 && cd .jenkins/lm-eval-harness && bash run-tests.sh -c configs/models-gemma.txt -t 1 + - name: v0_gsm8k_g3_ovis2_5_tp1 + flavor: g3.s + command: >- + export PT_HPU_LAZY_MODE=1 && export VLLM_SKIP_WARMUP=true && + cd .jenkins/lm-eval-harness && bash run-tests.sh -c configs/models-ovis.txt -t 1 - name: test_gsm8k_small_models_apc steps: - name: gsm8k_small_g3_tp1_apc flavor: g3 command: >- - export VLLM_CONTIGUOUS_PA=false && + export VLLM_CONTIGUOUS_PA=false && cd .jenkins/lm-eval-harness && bash run-tests.sh -c configs/models-small.txt -t 1 -a - name: gsm8k_small_g2_tp1_apc flavor: g2 command: >- - export VLLM_CONTIGUOUS_PA=false && + export VLLM_CONTIGUOUS_PA=false && cd .jenkins/lm-eval-harness && bash run-tests.sh -c configs/models-small.txt -t 1 -a - name: test_gsm8k_small_models_merged_prefill steps: @@ -66,100 +71,100 @@ stages: - name: v0_gsm8k_large_g3_tp2_part1 flavor: g3.s command: >- - export PT_HPU_LAZY_MODE=1 && + export PT_HPU_LAZY_MODE=1 && cd .jenkins/lm-eval-harness && bash run-tests.sh -c configs/models-large.txt -t 2 - name: v0_gsm8k_large_g3_tp2_part2 flavor: g3.s command: >- - export PT_HPU_LAZY_MODE=1 && + export PT_HPU_LAZY_MODE=1 && cd .jenkins/lm-eval-harness && bash run-tests.sh -c configs/models-large-2.txt -t 2 - name: v0_gsm8k_large_g2_tp4 flavor: g2.m command: >- - export PT_HPU_LAZY_MODE=1 && + export PT_HPU_LAZY_MODE=1 && cd .jenkins/lm-eval-harness && bash run-tests.sh -c configs/models-large.txt -t 4 - name: test_gsm8k_fp8 steps: - name: gsm8k_small_g3_tp1_fp8 flavor: g3 command: >- - cd .jenkins/lm-eval-harness && - PT_HPU_LAZY_MODE=1 + cd .jenkins/lm-eval-harness && + PT_HPU_LAZY_MODE=1 bash run-tests.sh -c configs/models-fp8-g3-tp1.txt -t 1 # - name: gsm8k_small_g3_tp2_fp8 # flavor: g3.s # command: >- - # cd .jenkins/lm-eval-harness && - # PT_HPU_LAZY_MODE=1 + # cd .jenkins/lm-eval-harness && + # PT_HPU_LAZY_MODE=1 # bash run-tests.sh -c configs/models-fp8.txt -t 2 - name: test_gsm8k_fp8_bypass_inc steps: - name: gsm8k_fp8_llama4_scout_g3_tp2_compressed_tensor flavor: g3.s command: >- - cd .jenkins/lm-eval-harness && - PT_HPU_LAZY_MODE=1 + cd .jenkins/lm-eval-harness && + PT_HPU_LAZY_MODE=1 bash run-tests.sh -c configs/models-fp8-compressedtensor.txt -t 2 - name: gsm8k_fp8_qwen3_30B_g3_tp1_block_scale_dynamic flavor: g3 command: >- - cd .jenkins/lm-eval-harness && - PT_HPU_LAZY_MODE=1 + cd .jenkins/lm-eval-harness && + PT_HPU_LAZY_MODE=1 bash run-tests.sh -c configs/models-fp8-blockfp8.txt -t 1 - name: gsm8k_fp8_qwen3_30B_g3_tp1_block_scale_dequant flavor: g3 command: >- - cd .jenkins/lm-eval-harness && - PT_HPU_LAZY_MODE=1 VLLM_HPU_FORCE_CHANNEL_FP8=0 + cd .jenkins/lm-eval-harness && + PT_HPU_LAZY_MODE=1 VLLM_HPU_FORCE_CHANNEL_FP8=0 bash run-tests.sh -c configs/models-fp8-blockfp8.txt -t 1 - name: test_gsm8k_mss steps: - name: gsm8k_small_g3_tp1_mss flavor: g3 command: >- - cd .jenkins/lm-eval-harness && - PT_HPU_LAZY_MODE=1 + cd .jenkins/lm-eval-harness && + PT_HPU_LAZY_MODE=1 bash run-tests.sh -c configs/models-mss.txt -t 1 - name: gsm8k_small_g2_tp1_mss flavor: g2 command: >- - cd .jenkins/lm-eval-harness && - PT_HPU_LAZY_MODE=1 + cd .jenkins/lm-eval-harness && + PT_HPU_LAZY_MODE=1 bash run-tests.sh -c configs/models-mss.txt -t 1 - name: gsm8k_small_g3_tp2_mss flavor: g3.s command: >- - cd .jenkins/lm-eval-harness && - PT_HPU_LAZY_MODE=1 + cd .jenkins/lm-eval-harness && + PT_HPU_LAZY_MODE=1 bash run-tests.sh -c configs/models-mss.txt -t 2 - name: gsm8k_small_g2_tp2_mss flavor: g2.s command: >- - cd .jenkins/lm-eval-harness && - PT_HPU_LAZY_MODE=1 + cd .jenkins/lm-eval-harness && + PT_HPU_LAZY_MODE=1 bash run-tests.sh -c configs/models-mss.txt -t 2 - name: gsm8k_small_g2_tp1_spec_decode flavor: g2 command: >- - cd .jenkins/lm-eval-harness && - PT_HPU_LAZY_MODE=1 + cd .jenkins/lm-eval-harness && + PT_HPU_LAZY_MODE=1 bash run-tests.sh -c configs/models-mss.txt -t 1 - name: test_gsm8k_spec_decode steps: # - name: gsm8k_small_g2_tp1_mlp_spec_decode # flavor: g2 # command: >- - # PT_HPU_LAZY_MODE=1 VLLM_CONTIGUOUS_PA=false VLLM_SKIP_WARMUP=True + # PT_HPU_LAZY_MODE=1 VLLM_CONTIGUOUS_PA=false VLLM_SKIP_WARMUP=True # pytest -v tests/spec_decode/e2e/test_mlp_correctness.py::test_mlp_e2e_greedy_correctness - name: gsm8k_small_g2_tp1_medusa_spec_decode flavor: g2 command: >- - PT_HPU_LAZY_MODE=1 VLLM_CONTIGUOUS_PA=false VLLM_SKIP_WARMUP=True + PT_HPU_LAZY_MODE=1 VLLM_CONTIGUOUS_PA=false VLLM_SKIP_WARMUP=True pytest -v tests/spec_decode/e2e/test_medusa_correctness.py::test_medusa_e2e_greedy_correctness - name: gsm8k_small_g2_tp1_eagle_spec_decode flavor: g2 command: >- - PT_HPU_LAZY_MODE=1 VLLM_CONTIGUOUS_PA=false VLLM_SKIP_WARMUP=True + PT_HPU_LAZY_MODE=1 VLLM_CONTIGUOUS_PA=false VLLM_SKIP_WARMUP=True pytest -v tests/spec_decode/e2e/test_eagle_correctness.py::test_eagle_e2e_greedy_correctness #TODO(kwisniewski98) temporary disable test, until model specific for Gaudi2 is uploaded to test infrastructure # - name: test_deepseek_mtp @@ -167,38 +172,38 @@ stages: # - name: test_deepseek_mtp_correctness # flavor: g3 # command: >- - # PT_HPU_LAZY_MODE=1 VLLM_CONTIGUOUS_PA=false VLLM_SKIP_WARMUP=True + # PT_HPU_LAZY_MODE=1 VLLM_CONTIGUOUS_PA=false VLLM_SKIP_WARMUP=True # pytest -v tests/spec_decode/e2e/test_mtp_correctness.py::test_mtp_e2e_greedy_correctness - name: tests_lora steps: - name: test_llama_lora flavor: g2 command: >- - PT_HPU_LAZY_MODE=1 VLLM_SKIP_WARMUP=true + PT_HPU_LAZY_MODE=1 VLLM_SKIP_WARMUP=true pytest -v tests/lora/test_llama_hpu.py::test_llama_lora_1x - name: test_multilora flavor: g2 command: >- - PT_HPU_LAZY_MODE=1 VLLM_SKIP_WARMUP=true + PT_HPU_LAZY_MODE=1 VLLM_SKIP_WARMUP=true pytest -v tests/lora/test_multilora_hpu.py::test_llama_multilora_1x # - name: test_long_context # flavor: g2 # command: >- - # PT_HPU_LAZY_MODE=1 VLLM_SKIP_WARMUP=true + # PT_HPU_LAZY_MODE=1 VLLM_SKIP_WARMUP=true # pytest -v tests/lora/test_long_context_hpu.py::test_quality - name: tests_multimodal steps: - name: multimodal_small_g3_tp1 flavor: g3 command: >- - cd .jenkins/vision && - PT_HPU_LAZY_MODE=1 + cd .jenkins/vision && + PT_HPU_LAZY_MODE=1 bash run-tests.sh -c configs/models-small.txt -t 1 - name: multimodal_small_g3_tp2 flavor: g3.s command: >- cd .jenkins/vision && - PT_HPU_LAZY_MODE=1 + PT_HPU_LAZY_MODE=1 bash run-tests.sh -c configs/models-small.txt -t 2 - name: multimodal_qwen_tp1 flavor: g3.s @@ -210,13 +215,13 @@ stages: flavor: g3 command: >- cd .jenkins/vision && - PT_HPU_LAZY_MODE=1 + PT_HPU_LAZY_MODE=1 bash run-tests.sh -c configs/models-mss.txt -t 1 - name: multimodal_small_g3_tp2_mss flavor: g3.s command: >- cd .jenkins/vision && - PT_HPU_LAZY_MODE=1 + PT_HPU_LAZY_MODE=1 bash run-tests.sh -c configs/models-mss.txt -t 2 - name: multimodal_llama4_scout_g3_tp2_ep flavor: g3.s @@ -230,17 +235,23 @@ stages: cd .jenkins/vision && PT_HPU_LAZY_MODE=1 bash run-tests.sh -c configs/models-gemma.txt -t 1 + - name: multimodal_ovis2_5_g3_tp1_ep + flavor: g3.s + command: >- + cd .jenkins/vision && + PT_HPU_LAZY_MODE=1 VLLM_SKIP_WARMUP=true + bash run-tests.sh -c configs/models-ovis.txt -t 1 - name: tests_int4_quantization steps: - name: test_awq flavor: g2 command: >- - PT_HPU_LAZY_MODE=1 VLLM_SKIP_WARMUP=true + PT_HPU_LAZY_MODE=1 VLLM_SKIP_WARMUP=true pytest -v tests/quantization/test_awq.py::test_awq - name: test_gptq flavor: g2 command: >- - PT_HPU_LAZY_MODE=1 VLLM_SKIP_WARMUP=true + PT_HPU_LAZY_MODE=1 VLLM_SKIP_WARMUP=true pytest -v tests/quantization/test_gptq.py::test_gptq - name: tests_guided_decode steps: @@ -248,8 +259,8 @@ stages: flavor: g2 command: >- pip install -e tests/vllm_test_utils && - export VLLM_SKIP_WARMUP=true && PT_HPU_LAZY_MODE=1 - pytest -v tests/entrypoints/llm/test_lazy_outlines.py -s -vvv --log-cli-level=INFO + export VLLM_SKIP_WARMUP=true && PT_HPU_LAZY_MODE=1 + pytest -v tests/entrypoints/llm/test_lazy_outlines.py -s -vvv --log-cli-level=INFO # - name: test_guided_generate # flavor: g2 # command: >- diff --git a/.jenkins/vision/configs/models-ovis.txt b/.jenkins/vision/configs/models-ovis.txt new file mode 100644 index 000000000000..c20ecf534544 --- /dev/null +++ b/.jenkins/vision/configs/models-ovis.txt @@ -0,0 +1 @@ +ovis2_5-9b.yaml \ No newline at end of file diff --git a/.jenkins/vision/configs/ovis2_5-9b.yaml b/.jenkins/vision/configs/ovis2_5-9b.yaml new file mode 100644 index 000000000000..8c86fc59245d --- /dev/null +++ b/.jenkins/vision/configs/ovis2_5-9b.yaml @@ -0,0 +1,7 @@ +model_name: "/mnt/weka/data/llm/aidc-ai/ovis2.5-9b" +dtype: "bfloat16" +max_model_len: 32768 +max_num_seqs: 32 +num_prompts: 4 +limit_mm_per_prompt_image: 5 +trust_remote_code: True diff --git a/.jenkins/vision/test_enc_dec_model.py b/.jenkins/vision/test_enc_dec_model.py index a1571c64f41a..2e021a2ebb70 100644 --- a/.jenkins/vision/test_enc_dec_model.py +++ b/.jenkins/vision/test_enc_dec_model.py @@ -24,6 +24,7 @@ def fail_on_exit(): def launch_enc_dec_model(config, question, images): model_name = config.get('model_name') dtype = config.get('dtype', 'bfloat16') + trust_remote_code = config.get('trust_remote_code', False) max_num_seqs = config.get('max_num_seqs', 128) max_model_len = config.get('max_model_len', 4096) enforce_eager = config.get('enforce_eager', False) @@ -41,6 +42,7 @@ def launch_enc_dec_model(config, question, images): enable_expert_parallel=enable_expert_parallel, enforce_eager=enforce_eager, limit_mm_per_prompt={"image": limit_mm_per_prompt_image}, + trust_remote_code=trust_remote_code, ) tokenizer = AutoTokenizer.from_pretrained(model_name) diff --git a/examples/offline_inference/vision_language_multi_image.py b/examples/offline_inference/vision_language_multi_image.py index ea7a793d026b..899f0a009532 100644 --- a/examples/offline_inference/vision_language_multi_image.py +++ b/examples/offline_inference/vision_language_multi_image.py @@ -460,6 +460,36 @@ def load_ovis(question: str, image_urls: list[str]) -> ModelRequestData: ) +# ovis2_5 +def load_ovis2_5(question: str, image_urls: list[str]) -> ModelRequestData: + model_name = "AIDC-AI/Ovis2.5-2B" + + engine_args = EngineArgs( + model=model_name, + max_model_len=8192, + max_num_seqs=2, + trust_remote_code=True, + dtype="half", + limit_mm_per_prompt={"image": len(image_urls)}, + ) + + placeholders = "\n".join( + f"Image-{i}: \n" for i, _ in enumerate(image_urls, start=1) + ) + messages = [{"role": "user", "content": f"{placeholders}\n{question}"}] + + tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True) + prompt = tokenizer.apply_chat_template( + messages, tokenize=False, add_generation_prompt=True + ) + + return ModelRequestData( + engine_args=engine_args, + prompt=prompt, + image_data=[fetch_image(url) for url in image_urls], + ) + + def load_pixtral_hf(question: str, image_urls: list[str]) -> ModelRequestData: model_name = "mistral-community/pixtral-12b" @@ -742,6 +772,7 @@ def load_tarsier(question: str, image_urls: list[str]) -> ModelRequestData: "mllama": load_mllama, "NVLM_D": load_nvlm_d, "ovis": load_ovis, + "ovis2_5": load_ovis2_5, "phi3_v": load_phi3v, "phi4_mm": load_phi4mm, "pixtral_hf": load_pixtral_hf, diff --git a/tests/models/multimodal/generation/vlm_utils/model_utils.py b/tests/models/multimodal/generation/vlm_utils/model_utils.py index af4c72f44b67..2a937c56fbb9 100644 --- a/tests/models/multimodal/generation/vlm_utils/model_utils.py +++ b/tests/models/multimodal/generation/vlm_utils/model_utils.py @@ -788,6 +788,63 @@ def processor(*args, text="", images=None, **kwargs): return hf_model +def ovis2_5_patch_hf_runner(hf_model: HfRunner) -> HfRunner: + """Patches and returns an instance of the HfRunner to use for Ovis2.""" + hf_model.model.get_output_embeddings = lambda: \ + hf_model.model.llm.get_output_embeddings() + + def processor(*args, text="", images=None, videos=None, **kwargs): + if images is None: + images = [] + else: + images = [images] if isinstance(images, Image) else images + if videos is None: + videos = [] + else: + videos = [videos] if isinstance(videos, np.ndarray) else videos + videos = [[Image.fromarray(frame) for frame in vid] + for vid in videos] + + prompt_start_and_end = { + "qwen2": ("<|im_start|>user\n", "<|im_end|>\n"), + "llama": + ("<|start_header_id|>user<|end_header_id|>\n\n", "<|eot_id|>"), + "gemma2": ("user\n", "\n"), + } + for start, end in prompt_start_and_end.values(): + if start in text and end in text: + text = text.split(start)[1].split(end)[0] + break + + images_message = [{"type": "image", "image": img} for img in images] + videos_message = [{"type": "video", "video": vid} for vid in videos] + + messages = [{ + "role": + "user", + "content": [ + *images_message, + *videos_message, + { + "type": "text", + "text": text + }, + ], + }] + + input_ids, pixel_values, grid_thws = hf_model.model.preprocess_inputs( + messages=messages, enable_thinking=True) + inputs = { + "inputs": input_ids, + "pixel_values": pixel_values, + "grid_thws": grid_thws, + } + return BatchFeature(data=inputs, tensor_type="pt") + + hf_model.processor = processor + return hf_model + + def qwen2_5_omni_patch_hf_runner(hf_model: HfRunner) -> HfRunner: """Patches and returns an instance of the HfRunner for Qwen2.5-Omni.""" thinker = hf_model.model.thinker diff --git a/vllm/entrypoints/chat_utils.py b/vllm/entrypoints/chat_utils.py index f5f45a62ca2f..af09cf0580ba 100644 --- a/vllm/entrypoints/chat_utils.py +++ b/vllm/entrypoints/chat_utils.py @@ -527,7 +527,8 @@ def _placeholder_str(self, modality: ModalityStr, if model_type in ("aya_vision", "chameleon", "deepseek_vl_v2", "internvl_chat", "ovis", "skywork_chat", - "NVLM_D", "h2ovl_chat", "idefics3", "smolvlm"): + "NVLM_D", "h2ovl_chat", "idefics3", "smolvlm", + "ovis2_5"): return "" if model_type in ("mllama", "llama4"): return "<|image|>" diff --git a/vllm/model_executor/models/ovis2_5.py b/vllm/model_executor/models/ovis2_5.py new file mode 100644 index 000000000000..dd110f7c7cf9 --- /dev/null +++ b/vllm/model_executor/models/ovis2_5.py @@ -0,0 +1,566 @@ +# SPDX-License-Identifier: Apache-2.0 +# SPDX-FileCopyrightText: Copyright contributors to the vLLM project +""" PyTorch Ovis model.""" +from collections.abc import Iterable, Mapping +from functools import partial +from typing import Optional, Union + +import torch +import torch.nn as nn +from transformers import BaseImageProcessor, BatchFeature, PretrainedConfig + +from vllm.config import VllmConfig +from vllm.model_executor.layers.linear import ReplicatedLinear +from vllm.model_executor.layers.quantization.base_config import ( + QuantizationConfig) +from vllm.model_executor.models.ovis import (OvisImagePatchInputs, + VisualEmbedding) +from vllm.model_executor.models.siglip2navit import Siglip2NavitModel +from vllm.model_executor.models.utils import (AutoWeightsLoader, flatten_bn, + init_vllm_registered_model, + maybe_prefix) +from vllm.model_executor.sampling_metadata import SamplingMetadata +from vllm.multimodal import MULTIMODAL_REGISTRY +from vllm.multimodal.inputs import (MultiModalDataDict, MultiModalFieldConfig, + MultiModalKwargs) +from vllm.multimodal.parse import ImageSize, MultiModalDataItems +from vllm.multimodal.processing import (BaseMultiModalProcessor, + BaseProcessingInfo, PromptReplacement) +from vllm.multimodal.profiling import BaseDummyInputsBuilder +from vllm.sequence import IntermediateTensors +from vllm.transformers_utils.processors.ovis2_5 import Ovis2_5Processor + +from .interfaces import MultiModalEmbeddings, SupportsMultiModal + +IMAGE_TOKEN = "" +VIDEO_TOKEN = "