We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
There was an error while loading. Please reload this page.
1 parent 141a5bd commit 36bbdbbCopy full SHA for 36bbdbb
api/vllm_routes/utils.py
@@ -21,10 +21,11 @@ async def get_gen_prompt(request, model_name):
21
22
23
async def get_model_inputs(request, prompt, model_name):
24
+ max_input_tokens = VLLM_ENGINE.max_model_len - request.max_tokens
25
if isinstance(prompt, str):
- input_ids = VLLM_ENGINE.encode_tokenizer(prompt).input_ids
26
+ input_ids = VLLM_ENGINE.encode_tokenizer(prompt).input_ids[-max_input_tokens:] # truncate left
27
elif isinstance(prompt[0], int):
- input_ids = prompt
28
+ input_ids = prompt[-max_input_tokens:] # truncate left
29
else:
30
if "baichuan-13b" in model_name:
31
input_ids = build_baichuan_chat_input(
0 commit comments