Skip to content

Commit 720b10f

Browse files
[1/N] API Server (Remove Proxy) (#11529)
1 parent b85a977 commit 720b10f

File tree

2 files changed

+17
-7
lines changed

2 files changed

+17
-7
lines changed

vllm/entrypoints/openai/api_server.py

Lines changed: 12 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -585,12 +585,18 @@ async def authentication(request: Request, call_next):
585585
status_code=401)
586586
return await call_next(request)
587587

588-
@app.middleware("http")
589-
async def add_request_id(request: Request, call_next):
590-
request_id = request.headers.get("X-Request-Id") or uuid.uuid4().hex
591-
response = await call_next(request)
592-
response.headers["X-Request-Id"] = request_id
593-
return response
588+
if args.enable_request_id_headers:
589+
logger.warning(
590+
"CAUTION: Enabling X-Request-Id headers in the API Server. "
591+
"This can harm performance at high QPS.")
592+
593+
@app.middleware("http")
594+
async def add_request_id(request: Request, call_next):
595+
request_id = request.headers.get(
596+
"X-Request-Id") or uuid.uuid4().hex
597+
response = await call_next(request)
598+
response.headers["X-Request-Id"] = request_id
599+
return response
594600

595601
for middleware in args.middleware:
596602
module_path, object_name = middleware.rsplit(".", 1)

vllm/entrypoints/openai/cli_args.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -196,7 +196,11 @@ def make_arg_parser(parser: FlexibleArgumentParser) -> FlexibleArgumentParser:
196196
action="store_true",
197197
help="If specified, will run the OpenAI frontend server in the same "
198198
"process as the model serving engine.")
199-
199+
parser.add_argument(
200+
"--enable-request-id-headers",
201+
action="store_true",
202+
help="If specified, API server will add X-Request-Id header to "
203+
"responses. Caution: this hurts performance at high QPS.")
200204
parser.add_argument(
201205
"--enable-auto-tool-choice",
202206
action="store_true",

0 commit comments

Comments
 (0)