File tree Expand file tree Collapse file tree 1 file changed +2
-1
lines changed Expand file tree Collapse file tree 1 file changed +2
-1
lines changed Original file line number Diff line number Diff line change @@ -143,6 +143,8 @@ def __init__(
143143 # the backends)
144144 if envs .VLLM_USE_V1 :
145145 self .use_irope = extra_impl_args .pop ("use_irope" , False )
146+ else :
147+ self .use_irope = extra_impl_args .get ("use_irope" , False )
146148
147149 quant_method = quant_config .get_quant_method (
148150 self , prefix = prefix ) if quant_config else None
@@ -177,7 +179,6 @@ def __init__(
177179 kv_sharing_target_layer_name , ** extra_impl_args )
178180 self .backend = backend_name_to_enum (attn_backend .get_name ())
179181 self .dtype = dtype
180- self .use_irope = extra_impl_args .get ("use_irope" , False )
181182
182183 # For cuda-alike (CUDA and ROCM) and cpu platforms, we control how
183184 # torch.compile works by registering the attention as one giant
You can’t perform that action at this time.
0 commit comments