diff --git a/vllm/worker/hpu_model_runner.py b/vllm/worker/hpu_model_runner.py index b0bb02f405ca..593aeeaf7248 100644 --- a/vllm/worker/hpu_model_runner.py +++ b/vllm/worker/hpu_model_runner.py @@ -36,7 +36,8 @@ HabanaMemoryProfiler, HabanaProfilerCounterHelper, format_bytes) -from vllm_hpu_extension.runtime import finalize_config, get_config +from vllm_hpu_extension.runtime import (clear_config, finalize_config, + get_config) import vllm.envs as envs from vllm.attention import AttentionMetadata, get_attn_backend @@ -4129,6 +4130,7 @@ def _make_decode_output( return SamplerOutput(sampler_outputs) def __del__(self): + clear_config() self.shutdown_inc() def _patch_prev_output(self):