From 82bd90a0906a04c967db8a24b382604000418838 Mon Sep 17 00:00:00 2001 From: Kebe Date: Tue, 22 Jul 2025 12:50:59 +0800 Subject: [PATCH] [Misc] Remove deprecated args in v0.10 Signed-off-by: Kebe --- .../offline_inference/neuron_speculation.py | 1 - tests/neuron/2_core/test_mistral.py | 1 - tests/neuron/2_core/test_multi_lora.py | 2 -- vllm/engine/arg_utils.py | 21 ------------------- 4 files changed, 25 deletions(-) diff --git a/examples/offline_inference/neuron_speculation.py b/examples/offline_inference/neuron_speculation.py index 26276cba202b..7fc22caee742 100644 --- a/examples/offline_inference/neuron_speculation.py +++ b/examples/offline_inference/neuron_speculation.py @@ -37,7 +37,6 @@ def initialize_llm(): max_num_seqs=4, max_model_len=2048, block_size=2048, - use_v2_block_manager=True, device="neuron", tensor_parallel_size=32, ) diff --git a/tests/neuron/2_core/test_mistral.py b/tests/neuron/2_core/test_mistral.py index d02fff943e90..ff59be1725b6 100644 --- a/tests/neuron/2_core/test_mistral.py +++ b/tests/neuron/2_core/test_mistral.py @@ -9,7 +9,6 @@ def test_mistral(): tensor_parallel_size=2, max_num_seqs=4, max_model_len=128, - use_v2_block_manager=True, override_neuron_config={ "sequence_parallel_enabled": False, "skip_warmup": True diff --git a/tests/neuron/2_core/test_multi_lora.py b/tests/neuron/2_core/test_multi_lora.py index 6b97f47d4db3..52ca9fe7b666 100644 --- a/tests/neuron/2_core/test_multi_lora.py +++ b/tests/neuron/2_core/test_multi_lora.py @@ -14,7 +14,6 @@ def test_llama_single_lora(): tensor_parallel_size=2, max_num_seqs=4, max_model_len=512, - use_v2_block_manager=True, override_neuron_config={ "sequence_parallel_enabled": False, "skip_warmup": True, @@ -57,7 +56,6 @@ def test_llama_multiple_lora(): tensor_parallel_size=2, max_num_seqs=4, max_model_len=512, - use_v2_block_manager=True, override_neuron_config={ "sequence_parallel_enabled": False, diff --git a/vllm/engine/arg_utils.py b/vllm/engine/arg_utils.py index 28b1c1c363a7..975743ff2154 100644 --- a/vllm/engine/arg_utils.py +++ b/vllm/engine/arg_utils.py @@ -313,7 +313,6 @@ class EngineArgs: CacheConfig.prefix_caching_hash_algo disable_sliding_window: bool = ModelConfig.disable_sliding_window disable_cascade_attn: bool = ModelConfig.disable_cascade_attn - use_v2_block_manager: bool = True swap_space: float = CacheConfig.swap_space cpu_offload_gb: float = CacheConfig.cpu_offload_gb gpu_memory_utilization: float = CacheConfig.gpu_memory_utilization @@ -364,7 +363,6 @@ class EngineArgs: max_prompt_adapter_token: int = \ PromptAdapterConfig.max_prompt_adapter_token - device: Device = DeviceConfig.device num_scheduler_steps: int = SchedulerConfig.num_scheduler_steps multi_step_stream_outputs: bool = SchedulerConfig.multi_step_stream_outputs ray_workers_use_nsight: bool = ParallelConfig.ray_workers_use_nsight @@ -745,16 +743,6 @@ def add_cli_args(parser: FlexibleArgumentParser) -> FlexibleArgumentParser: "--max-prompt-adapter-token", **prompt_adapter_kwargs["max_prompt_adapter_token"]) - # Device arguments - device_kwargs = get_kwargs(DeviceConfig) - device_group = parser.add_argument_group( - title="DeviceConfig", - description=DeviceConfig.__doc__, - ) - device_group.add_argument("--device", - **device_kwargs["device"], - deprecated=True) - # Speculative arguments speculative_group = parser.add_argument_group( title="SpeculativeConfig", @@ -856,15 +844,6 @@ def add_cli_args(parser: FlexibleArgumentParser) -> FlexibleArgumentParser: **vllm_kwargs["additional_config"]) # Other arguments - parser.add_argument('--use-v2-block-manager', - action='store_true', - default=True, - deprecated=True, - help='[DEPRECATED] block manager v1 has been ' - 'removed and SelfAttnBlockSpaceManager (i.e. ' - 'block manager v2) is now the default. ' - 'Setting this flag to True or False' - ' has no effect on vLLM behavior.') parser.add_argument('--disable-log-stats', action='store_true', help='Disable logging statistics.')