Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions vllm/model_executor/models/internvl.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,9 +25,9 @@
from habana_frameworks.mediapipe.media_types import dtype as dt
from habana_frameworks.mediapipe.media_types import imgtype as it
from habana_frameworks.mediapipe.media_types import readerOutType as ro
from habana_frameworks.mediapipe.operators.reader_nodes.reader_nodes import media_ext_reader_op_impl

Check failure on line 28 in vllm/model_executor/models/internvl.py

View workflow job for this annotation

GitHub Actions / pre-commit

Ruff (E501)

vllm/model_executor/models/internvl.py:28:81: E501 Line too long (100 > 80)
from habana_frameworks.mediapipe.operators.reader_nodes.reader_nodes import media_ext_reader_op_tensor_info

Check failure on line 29 in vllm/model_executor/models/internvl.py

View workflow job for this annotation

GitHub Actions / pre-commit

Ruff (E501)

vllm/model_executor/models/internvl.py:29:81: E501 Line too long (107 > 80)
from habana_frameworks.mediapipe.plugins.iterator_pytorch import MediaGenericPytorchIterator

Check failure on line 30 in vllm/model_executor/models/internvl.py

View workflow job for this annotation

GitHub Actions / pre-commit

Ruff (E501)

vllm/model_executor/models/internvl.py:30:81: E501 Line too long (92 > 80)
import numpy as np
from queue import Queue
import io
Expand Down Expand Up @@ -313,14 +313,14 @@

# Handle MediaPipe pipe_manager destructor
from habana_frameworks.mediapipe.backend.cal import pipe_manager, cpp_pipe_manager_list

Check failure on line 316 in vllm/model_executor/models/internvl.py

View workflow job for this annotation

GitHub Actions / pre-commit

Ruff (E501)

vllm/model_executor/models/internvl.py:316:81: E501 Line too long (87 > 80)

Check failure on line 316 in vllm/model_executor/models/internvl.py

View workflow job for this annotation

GitHub Actions / pre-commit

Ruff (E402)

vllm/model_executor/models/internvl.py:316:1: E402 Module level import not at top of file
def _patched_close(self):
"""Patched close method that handles None cpp_pipe_manager_list during shutdown"""
try:
# Check if cpp_pipe_manager_list exists and is not None

Check failure on line 320 in vllm/model_executor/models/internvl.py

View workflow job for this annotation

GitHub Actions / pre-commit

Ruff (E501)

vllm/model_executor/models/internvl.py:320:81: E501 Line too long (86 > 80)
if cpp_pipe_manager_list is not None and self._pm_ in cpp_pipe_manager_list:
cpp_pipe_manager_list.remove(self._pm_)
except (TypeError, AttributeError):

Check failure on line 323 in vllm/model_executor/models/internvl.py

View workflow job for this annotation

GitHub Actions / pre-commit

Ruff (E501)

vllm/model_executor/models/internvl.py:323:81: E501 Line too long (84 > 80)
# Handle case where cpp_pipe_manager_list is None or not iterable
pass

Expand Down Expand Up @@ -1491,6 +1491,7 @@
end_idx = start_idx + i
batch_sliced_pixel_values = \
pixel_values[start_idx:end_idx, ...]
batch_sliced_pixel_values = batch_sliced_pixel_values.contiguous().clone()
if is_lazy:
vit_embeds_minibatch = \
self.vision_model(
Expand Down
3 changes: 3 additions & 0 deletions vllm/model_executor/models/ovis2_5.py
Original file line number Diff line number Diff line change
Expand Up @@ -548,8 +548,11 @@ def _process_image_input(
image_patches_flat.to(target_dtype), grid_thws,
self.vision_buckets)

visual_embeds = visual_embeds.contiguous().clone()
grid_thws = grid_thws.contiguous().clone()
visual_tokens = self.visual_tokenizer(visual_embeds, grid_thws)
visual_embeds = self.vte(visual_tokens) # 1:1 numeric eq.
indicator_tokens = indicator_tokens.contiguous().clone()
indicator_embeds = self.vte(indicator_tokens)
padded_patches_per_image = [
grid[1] * grid[2] // (self.config.vit_config.hidden_stride**2)
Expand Down
14 changes: 14 additions & 0 deletions vllm/worker/hpu_model_runner.py
Original file line number Diff line number Diff line change
Expand Up @@ -3960,6 +3960,20 @@ def try_revert_dummy_output_tokens():
with self.profiler.record_event('internal',
model_event_name,
args=profiler_args):
# Clone inputs_embeds early to prevent cache miss failure
if 'InternVLChatModel' in str(type(self.model.model)) or 'Ovis2_5' in str(type(self.model.model)):
if "inputs_embeds" in execute_model_kwargs:
orig = execute_model_kwargs["inputs_embeds"]
execute_model_kwargs["inputs_embeds"] = orig.contiguous().clone()
if sampling_metadata.selected_token_indices is not None:
sampling_metadata.selected_token_indices = sampling_metadata.selected_token_indices.clone()
if 'Ovis2_5' in str(type(self.model.model)):
if "indicator_tokens" in execute_model_kwargs:
execute_model_kwargs.pop('indicator_tokens', None)
if "pixel_values" in execute_model_kwargs:
execute_model_kwargs.pop('pixel_values', None)
if "grids" in execute_model_kwargs:
execute_model_kwargs.pop('grids', None)
hidden_states = self.model.forward(
**execute_model_kwargs,
selected_token_indices=sampling_metadata.
Expand Down
Loading