Skip to content
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
34 changes: 13 additions & 21 deletions vllm/v1/core/kv_cache_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -220,11 +220,11 @@ def __init__(self, blocks: list[KVCacheBlock]) -> None:
if i < self.num_free_blocks - 1:
blocks[i].next_free_block = blocks[i + 1]

# Create a fake head and a tail block for the doubly linked list to
# reduce branching in the code
# Create a fake head tail blocks for the doubly linked list to
# reduce branching in the code.
#
# The implementation garenteed that the fake head and tail
# are NEVER got popped, so we could safely assume each real blocks
# The implementation guarantees that the fake head and tail
# are NEVER popped, so we can safely assume each real block
# in the queue has prev and next blocks.
self.fake_free_list_head = KVCacheBlock(block_id=-1)
self.fake_free_list_tail = KVCacheBlock(block_id=-1)
Expand All @@ -246,22 +246,15 @@ def popleft(self) -> KVCacheBlock:
Returns:
The first free block.
"""
if (self.fake_free_list_head.next_free_block
is self.fake_free_list_tail
or self.fake_free_list_head.next_free_block is None):
assert self.num_free_blocks == 0, (
f"num_free_blocks ({self.num_free_blocks}) is out of sync "
"with the free list.")
raise ValueError("No free blocks available")

assert (self.fake_free_list_head.next_free_block
is not self.fake_free_list_tail), "No free blocks available"
assert (self.fake_free_list_head.next_free_block is not None
), "fake_free_list_head.next_free_block should always exist"
Comment on lines +249 to +252
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

critical

Using assert to check for an empty queue is risky because assertions can be disabled in production (e.g., with python -O). This would suppress the error and likely lead to a more obscure crash later on. The original raise ValueError is a more robust way to handle this condition.

Additionally, this change removes the assertion assert self.num_free_blocks == 0, which was a valuable check to ensure the block count is synchronized with the free list's state. It's better to keep this for debugging purposes.

I recommend reverting this part of the change to maintain robustness and the helpful debugging assertion.

        if (self.fake_free_list_head.next_free_block
                is self.fake_free_list_tail
                or self.fake_free_list_head.next_free_block is None):
            assert self.num_free_blocks == 0, (
                f"num_free_blocks ({self.num_free_blocks}) is out of sync "
                "with the free list.")
            raise ValueError("No free blocks available")

first_block: KVCacheBlock = self.fake_free_list_head.next_free_block

if first_block.next_free_block is None:
# This should not happen if the block is from the free list.
# It indicates a bug in the caller's logic.
raise RuntimeError("Invalid block found in popleft() "
"which doesn't have a valid next_free_block")

assert (
first_block.next_free_block
is not None), "next_free_block of real blocks should always exist"
# Connect fake_head and the next block of first_block (i.e. second block
# or fake tail).
self.fake_free_list_head.next_free_block = first_block.next_free_block
Expand Down Expand Up @@ -300,9 +293,8 @@ def append(self, block: KVCacheBlock) -> None:
Args:
block: The block to append.
"""
if self.fake_free_list_tail.prev_free_block is None:
raise RuntimeError(
"prev_free_block of fake_free_list_tail should always exist")
assert (self.fake_free_list_tail.prev_free_block is not None
), "fake_free_list_tail.prev_free_block should always exist"
last_block: KVCacheBlock = self.fake_free_list_tail.prev_free_block

# Connect the new block after the last block.
Expand Down