Skip to content

Commit 67d96c2

Browse files
authored
Use slow tokenizer for open llama models (#168)
1 parent 033f5c7 commit 67d96c2

File tree

1 file changed

+6
-1
lines changed

1 file changed

+6
-1
lines changed

vllm/engine/tokenizer_utils.py

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,12 @@ def get_tokenizer(
1717
) -> Union[PreTrainedTokenizer, PreTrainedTokenizerFast]:
1818
"""Gets a tokenizer for the given model name via Huggingface."""
1919
config = AutoConfig.from_pretrained(model_name)
20-
if config.model_type == "llama" and getattr(kwargs, "use_fast", True):
20+
if "open_llama" in model_name:
21+
kwargs["use_fast"] = False
22+
logger.info(
23+
"OpenLLaMA models do not support the fast tokenizer. "
24+
"Using the slow tokenizer instead.")
25+
elif config.model_type == "llama" and getattr(kwargs, "use_fast", True):
2126
# LLaMA fast tokenizer causes protobuf errors in some environments.
2227
# However, we found that the below LLaMA fast tokenizer works well in
2328
# most environments.

0 commit comments

Comments
 (0)