Skip to content

Commit d77fec2

Browse files
committed
Allow fast prefill for hybrid memory allocator
Signed-off-by: Yong Hoon Shin <[email protected]>
1 parent b6ee234 commit d77fec2

File tree

1 file changed

+0
-6
lines changed

1 file changed

+0
-6
lines changed

vllm/config/__init__.py

Lines changed: 0 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -3756,12 +3756,6 @@ def __post_init__(self):
37563756
# local attention.
37573757
self.scheduler_config.disable_hybrid_kv_cache_manager = True
37583758

3759-
if self.cache_config.kv_sharing_fast_prefill:
3760-
# There is an IMA issue currently when using fast prefill with
3761-
# hybrid kv cache manager (e.g. interleaved sliding window)
3762-
# TODO(sarckk): investigate and fix
3763-
self.scheduler_config.disable_hybrid_kv_cache_manager = True
3764-
37653759
def update_sizes_for_sequence_parallelism(self,
37663760
possible_sizes: list) -> list:
37673761
# remove the sizes that not multiple of tp_size when

0 commit comments

Comments
 (0)