We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
There was an error while loading. Please reload this page.
1 parent b6ee234 commit d77fec2Copy full SHA for d77fec2
vllm/config/__init__.py
@@ -3756,12 +3756,6 @@ def __post_init__(self):
3756
# local attention.
3757
self.scheduler_config.disable_hybrid_kv_cache_manager = True
3758
3759
- if self.cache_config.kv_sharing_fast_prefill:
3760
- # There is an IMA issue currently when using fast prefill with
3761
- # hybrid kv cache manager (e.g. interleaved sliding window)
3762
- # TODO(sarckk): investigate and fix
3763
- self.scheduler_config.disable_hybrid_kv_cache_manager = True
3764
-
3765
def update_sizes_for_sequence_parallelism(self,
3766
possible_sizes: list) -> list:
3767
# remove the sizes that not multiple of tp_size when
0 commit comments