Skip to content

Commit db75c11

Browse files
committed
[Misc] Remove deprecated args in v0.10
Signed-off-by: Kebe <[email protected]>
1 parent e7b2042 commit db75c11

File tree

4 files changed

+0
-24
lines changed

4 files changed

+0
-24
lines changed

examples/offline_inference/neuron_speculation.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,6 @@ def initialize_llm():
3737
max_num_seqs=4,
3838
max_model_len=2048,
3939
block_size=2048,
40-
use_v2_block_manager=True,
4140
device="neuron",
4241
tensor_parallel_size=32,
4342
)

tests/neuron/2_core/test_mistral.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,6 @@ def test_mistral():
99
tensor_parallel_size=2,
1010
max_num_seqs=4,
1111
max_model_len=128,
12-
use_v2_block_manager=True,
1312
override_neuron_config={
1413
"sequence_parallel_enabled": False,
1514
"skip_warmup": True

tests/neuron/2_core/test_multi_lora.py

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,6 @@ def test_llama_single_lora():
1414
tensor_parallel_size=2,
1515
max_num_seqs=4,
1616
max_model_len=512,
17-
use_v2_block_manager=True,
1817
override_neuron_config={
1918
"sequence_parallel_enabled": False,
2019
"skip_warmup": True,
@@ -57,7 +56,6 @@ def test_llama_multiple_lora():
5756
tensor_parallel_size=2,
5857
max_num_seqs=4,
5958
max_model_len=512,
60-
use_v2_block_manager=True,
6159
override_neuron_config={
6260
"sequence_parallel_enabled":
6361
False,

vllm/engine/arg_utils.py

Lines changed: 0 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -313,7 +313,6 @@ class EngineArgs:
313313
CacheConfig.prefix_caching_hash_algo
314314
disable_sliding_window: bool = ModelConfig.disable_sliding_window
315315
disable_cascade_attn: bool = ModelConfig.disable_cascade_attn
316-
use_v2_block_manager: bool = True
317316
swap_space: float = CacheConfig.swap_space
318317
cpu_offload_gb: float = CacheConfig.cpu_offload_gb
319318
gpu_memory_utilization: float = CacheConfig.gpu_memory_utilization
@@ -745,16 +744,6 @@ def add_cli_args(parser: FlexibleArgumentParser) -> FlexibleArgumentParser:
745744
"--max-prompt-adapter-token",
746745
**prompt_adapter_kwargs["max_prompt_adapter_token"])
747746

748-
# Device arguments
749-
device_kwargs = get_kwargs(DeviceConfig)
750-
device_group = parser.add_argument_group(
751-
title="DeviceConfig",
752-
description=DeviceConfig.__doc__,
753-
)
754-
device_group.add_argument("--device",
755-
**device_kwargs["device"],
756-
deprecated=True)
757-
758747
# Speculative arguments
759748
speculative_group = parser.add_argument_group(
760749
title="SpeculativeConfig",
@@ -856,15 +845,6 @@ def add_cli_args(parser: FlexibleArgumentParser) -> FlexibleArgumentParser:
856845
**vllm_kwargs["additional_config"])
857846

858847
# Other arguments
859-
parser.add_argument('--use-v2-block-manager',
860-
action='store_true',
861-
default=True,
862-
deprecated=True,
863-
help='[DEPRECATED] block manager v1 has been '
864-
'removed and SelfAttnBlockSpaceManager (i.e. '
865-
'block manager v2) is now the default. '
866-
'Setting this flag to True or False'
867-
' has no effect on vLLM behavior.')
868848
parser.add_argument('--disable-log-stats',
869849
action='store_true',
870850
help='Disable logging statistics.')

0 commit comments

Comments
 (0)