Skip to content

Commit 1d24ccb

Browse files
authored
[Fix] Better error message when there is OOM during cache initialization (#203)
1 parent 14f0b39 commit 1d24ccb

File tree

2 files changed

+7
-0
lines changed

2 files changed

+7
-0
lines changed

vllm/engine/llm_engine.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -127,6 +127,12 @@ def _init_cache(self) -> None:
127127
# FIXME(woosuk): Change to debug log.
128128
logger.info(f'# GPU blocks: {num_gpu_blocks}, '
129129
f'# CPU blocks: {num_cpu_blocks}')
130+
131+
if num_gpu_blocks <= 0 or num_cpu_blocks <= 0:
132+
raise ValueError("No available memory for the cache blocks. "
133+
"Try increasing `gpu_memory_utilization` when "
134+
"initializing the engine.")
135+
130136
self.cache_config.num_gpu_blocks = num_gpu_blocks
131137
self.cache_config.num_cpu_blocks = num_cpu_blocks
132138

vllm/outputs.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -53,6 +53,7 @@ class RequestOutput:
5353
prompt: The prompt string of the request.
5454
prompt_token_ids: The token IDs of the prompt.
5555
outputs: The output sequences of the request.
56+
finished: Whether the whole request is finished.
5657
"""
5758
def __init__(
5859
self,

0 commit comments

Comments
 (0)