Skip to content

Commit 36bbdbb

Browse files
author
xusenlin
committed
Fix model input length
1 parent 141a5bd commit 36bbdbb

File tree

1 file changed

+3
-2
lines changed

1 file changed

+3
-2
lines changed

api/vllm_routes/utils.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -21,10 +21,11 @@ async def get_gen_prompt(request, model_name):
2121

2222

2323
async def get_model_inputs(request, prompt, model_name):
24+
max_input_tokens = VLLM_ENGINE.max_model_len - request.max_tokens
2425
if isinstance(prompt, str):
25-
input_ids = VLLM_ENGINE.encode_tokenizer(prompt).input_ids
26+
input_ids = VLLM_ENGINE.encode_tokenizer(prompt).input_ids[-max_input_tokens:] # truncate left
2627
elif isinstance(prompt[0], int):
27-
input_ids = prompt
28+
input_ids = prompt[-max_input_tokens:] # truncate left
2829
else:
2930
if "baichuan-13b" in model_name:
3031
input_ids = build_baichuan_chat_input(

0 commit comments

Comments
 (0)