Skip to content

Commit 5aafc16

Browse files
ariG23498zixi-qi
authored andcommitted
[Bugfix] Decode Tokenized IDs to Strings for hf_processor in llm.chat() with model_impl=transformers (vllm-project#21353)
Signed-off-by: ariG23498 <[email protected]> Signed-off-by: qizixi <[email protected]>
1 parent 6728377 commit 5aafc16

File tree

2 files changed

+45
-0
lines changed

2 files changed

+45
-0
lines changed
Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,40 @@
1+
# SPDX-License-Identifier: Apache-2.0
2+
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
3+
import pytest
4+
5+
from vllm.assets.image import ImageAsset
6+
from vllm.config import ModelConfig
7+
from vllm.multimodal import MULTIMODAL_REGISTRY
8+
9+
10+
# yapf: disable
11+
@pytest.mark.parametrize("model_id",
12+
["llava-hf/llava-onevision-qwen2-0.5b-ov-hf"])
13+
def test_multimodal_processor(model_id):
14+
model_config = ModelConfig(
15+
model=model_id,
16+
model_impl="transformers",
17+
)
18+
19+
mm_processor = MULTIMODAL_REGISTRY.create_processor(model_config, )
20+
21+
image_pil = ImageAsset('cherry_blossom').pil_image
22+
mm_data = {"image": image_pil}
23+
str_prompt = "<|im_start|>user <image>\nWhat is the content of this image?<|im_end|><|im_start|>assistant\n" # noqa: E501
24+
str_processed_inputs = mm_processor.apply(
25+
prompt=str_prompt,
26+
mm_data=mm_data,
27+
hf_processor_mm_kwargs={},
28+
)
29+
30+
ids_prompt = [
31+
151644, 872, 220, 151646, 198, 3838, 374, 279, 2213, 315, 419, 2168,
32+
30, 151645, 151644, 77091, 198
33+
]
34+
ids_processed_inputs = mm_processor.apply(
35+
prompt=ids_prompt,
36+
mm_data=mm_data,
37+
hf_processor_mm_kwargs={},
38+
)
39+
40+
assert str_processed_inputs["prompt"] == ids_processed_inputs["prompt"]

vllm/model_executor/models/transformers.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -320,6 +320,11 @@ def apply(
320320

321321
mm_items = self._to_mm_items(mm_data)
322322
hf_processor = self.info.get_hf_processor(**hf_processor_mm_kwargs)
323+
if not isinstance(prompt, str):
324+
# the prompt is the tokenized ids which is not supported
325+
# by the hf_processor, which is why we would need to decode the ids
326+
# into string
327+
prompt = hf_processor.decode(prompt)
323328

324329
(prompt_ids, processed_data,
325330
mm_token_type_ids) = self._apply_hf_processor_text_mm(

0 commit comments

Comments
 (0)