Skip to content

Commit 633bd64

Browse files
qthequartermastermanepwalsh
authored andcommitted
[PERF] Use pybase64 to more quickly decode prompt embeddings (vllm-project#22469)
Signed-off-by: Andrew Sansom <[email protected]>
1 parent c754f09 commit 633bd64

File tree

1 file changed

+3
-2
lines changed

1 file changed

+3
-2
lines changed

vllm/entrypoints/openai/serving_engine.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,6 @@
11
# SPDX-License-Identifier: Apache-2.0
22
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
33
import asyncio
4-
import base64
54
import io
65
import json
76
import sys
@@ -12,6 +11,7 @@
1211
from typing import (Annotated, Any, Callable, ClassVar, Generic, Optional,
1312
TypeVar, Union, cast, overload)
1413

14+
import pybase64
1515
import torch
1616
from fastapi import Request
1717
from pydantic import BaseModel, ConfigDict, Field
@@ -1008,7 +1008,8 @@ def _load_prompt_embeds(
10081008
) -> list[EmbedsPrompt]:
10091009

10101010
def _load_and_validate_embed(embed: bytes) -> EmbedsPrompt:
1011-
tensor = torch.load(io.BytesIO(base64.b64decode(embed)),
1011+
tensor = torch.load(io.BytesIO(
1012+
pybase64.b64decode(embed, validate=True)),
10121013
weights_only=True)
10131014
assert isinstance(tensor, torch.Tensor) and tensor.dtype in (
10141015
torch.float32,

0 commit comments

Comments
 (0)