openvinotoolkit
diff --git a/‎src/plugins/intel_npu/src/backend/include/zero_pipeline.hpp‎
Lines changed: 9 additions & 0 deletions b/‎src/plugins/intel_npu/src/backend/include/zero_pipeline.hpp‎
Lines changed: 9 additions & 0 deletions
diff --git a/‎src/plugins/intel_npu/src/backend/src/zero_dynamic_pipeline.cpp‎
Lines changed: 18 additions & 3 deletions b/‎src/plugins/intel_npu/src/backend/src/zero_dynamic_pipeline.cpp‎
Lines changed: 18 additions & 3 deletions
diff --git a/‎src/plugins/intel_npu/src/backend/src/zero_pipeline.cpp‎
Lines changed: 42 additions & 9 deletions b/‎src/plugins/intel_npu/src/backend/src/zero_pipeline.cpp‎
Lines changed: 42 additions & 9 deletions
diff --git a/‎src/plugins/intel_npu/src/common/include/intel_npu/common/igraph.hpp‎
Lines changed: 3 additions & 3 deletions b/‎src/plugins/intel_npu/src/common/include/intel_npu/common/igraph.hpp‎
Lines changed: 3 additions & 3 deletions
diff --git a/‎src/plugins/intel_npu/src/common/src/igraph.cpp‎
Lines changed: 7 additions & 3 deletions b/‎src/plugins/intel_npu/src/common/src/igraph.cpp‎
Lines changed: 7 additions & 3 deletions
diff --git a/‎src/plugins/intel_npu/src/compiler_adapter/include/dynamic_graph.hpp‎
Lines changed: 9 additions & 5 deletions b/‎src/plugins/intel_npu/src/compiler_adapter/include/dynamic_graph.hpp‎
Lines changed: 9 additions & 5 deletions
diff --git a/‎src/plugins/intel_npu/src/compiler_adapter/include/graph.hpp‎
Lines changed: 9 additions & 5 deletions b/‎src/plugins/intel_npu/src/compiler_adapter/include/graph.hpp‎
Lines changed: 9 additions & 5 deletions
diff --git a/‎src/plugins/intel_npu/src/compiler_adapter/src/dynamic_graph.cpp‎
Lines changed: 37 additions & 48 deletions b/‎src/plugins/intel_npu/src/compiler_adapter/src/dynamic_graph.cpp‎
Lines changed: 37 additions & 48 deletions
@@ -43,6 +43,13 @@ class IPipeline {
 protected:
     void enable_profiling();
 
+    struct CommandQueueStateSnapshot {
+        CommandQueueDesc desc;
+        uint64_t version;
+    };
+
+    CommandQueueStateSnapshot get_command_queue_state_snapshot();
+
     std::shared_ptr<ZeroInitStructsHolder> _init_structs;
     std::shared_ptr<IGraph> _graph;
     const Config _config;
@@ -60,6 +67,8 @@ class IPipeline {
      */
     size_t _batch_size;
 
+    std::shared_ptr<CommandQueue> _command_queue = nullptr;
+    uint64_t _command_queue_version = 0;
     std::vector<std::unique_ptr<Fence>> _fences;
     std::shared_ptr<EventPool> _event_pool;
     std::vector<std::shared_ptr<Event>> _events;
 
@@ -77,9 +77,8 @@ DynamicPipeline::DynamicPipeline(const std::shared_ptr<ZeroInitStructsHolder>& i
 
     if (_sync_output_with_fences) {
         _fences.reserve(_batch_size);
-
         for (size_t i = 0; i < _batch_size; i++) {
-            _fences.emplace_back(std::make_unique<Fence>(_graph->get_command_queue()));
+            _fences.emplace_back(std::make_unique<Fence>(_command_queue));
         }
     }
 
@@ -181,7 +180,23 @@ void DynamicPipeline::push() {
     auto* dynamicGraph = dynamic_cast<IDynamicGraph*>(_graph.get());
     OPENVINO_ASSERT(dynamicGraph != nullptr, "Failed to cast graph to IDynamicGraph");
 
-    auto commandQueueHandle = _graph->get_command_queue()->handle();
+    const auto command_queue_version = _graph->get_command_queue_desc_version();
+    const bool command_queue_changed = (command_queue_version != _command_queue_version);
+    if (command_queue_changed) {
+        const auto command_queue_state = get_command_queue_state_snapshot();
+        if (command_queue_state.version != _command_queue_version) {
+            _command_queue = CommandQueuePool::getInstance().getCommandQueue(_init_structs, command_queue_state.desc);
+            _command_queue_version = command_queue_state.version;
+
+            if (_sync_output_with_fences) {
+                for (size_t i = 0; i < _fences.size(); i++) {
+                    _fences[i] = std::make_unique<Fence>(_command_queue);
+                }
+            }
+        }
+    }
+
+    auto commandQueueHandle = _command_queue->handle();
     for (size_t i = 0; i < _command_lists.size(); ++i) {
         OV_ITT_TASK_CHAIN(ZERO_PIPELINE_IP_PUSH, itt::domains::LevelZeroBackend, "Pipeline", "push");
 
 
@@ -51,11 +51,17 @@ IPipeline::IPipeline(const std::shared_ptr<ZeroInitStructsHolder>& init_structs,
       _graph(graph),
       _config(config),
       _batch_size(batch_size),
+      _command_queue(nullptr),
+      _command_queue_version(0),
       _extension_version(init_structs->getCommandQueueDdiTable().version()),
       _run_inferences_sequentially(_extension_version < ZE_MAKE_VERSION(1, 1) &&
                                    _config.get<RUN_INFERENCES_SEQUENTIALLY>()),
       _pipeline_unique_id_per_graph(get_graph_unique_id_or_throw(graph)),
       _logger(logName, _config.get<LOG_LEVEL>()) {
+    const auto command_queue_state = get_command_queue_state_snapshot();
+    _command_queue = CommandQueuePool::getInstance().getCommandQueue(_init_structs, command_queue_state.desc);
+    _command_queue_version = command_queue_state.version;
+
     bool perf_count_enabled = _config.has<PERF_COUNT>() && _config.get<PERF_COUNT>();
     std::optional<bool> compiled_with_profiling = _graph->is_profiling_blob();
 
@@ -95,6 +101,18 @@ IPipeline::IPipeline(const std::shared_ptr<ZeroInitStructsHolder>& init_structs,
     }
 };
 
+IPipeline::CommandQueueStateSnapshot IPipeline::get_command_queue_state_snapshot() {
+    while (true) {
+        const auto version_before = _graph->get_command_queue_desc_version();
+        auto desc = _graph->get_command_queue_desc();
+        const auto version_after = _graph->get_command_queue_desc_version();
+
+        if (version_before == version_after) {
+            return {desc, version_after};
+        }
+    }
+}
+
 std::vector<ov::ProfilingInfo> IPipeline::get_profiling_info() const {
     _logger.debug("get_profiling_info - started");
     if (!_config.has<PERF_COUNT>() || !_config.get<PERF_COUNT>()) {
@@ -159,19 +177,18 @@ Pipeline::Pipeline(const std::shared_ptr<ZeroInitStructsHolder>& init_structs,
         }
     }
 
-    _command_lists.reserve(_batch_size);
-    for (size_t i = 0; i < _batch_size; i++) {
-        _command_lists.emplace_back(std::make_unique<CommandList>(_init_structs));
-    }
-
     if (_sync_output_with_fences) {
         _fences.reserve(_batch_size);
-
         for (size_t i = 0; i < _batch_size; i++) {
-            _fences.emplace_back(std::make_unique<Fence>(_graph->get_command_queue()));
+            _fences.emplace_back(std::make_unique<Fence>(_command_queue));
         }
     }
 
+    _command_lists.reserve(_batch_size);
+    for (size_t i = 0; i < _batch_size; i++) {
+        _command_lists.emplace_back(std::make_unique<CommandList>(_init_structs));
+    }
+
     for (size_t i = 0; i < _batch_size; i++) {
         _logger.debug("Pipeline - set args for command list number: %zu", i);
         size_t io_index = 0;
@@ -283,6 +300,22 @@ void Pipeline::push() {
         _graph->set_last_submitted_id(_pipeline_unique_id_per_graph);
     }
 
+    const auto command_queue_version = _graph->get_command_queue_desc_version();
+    const bool command_queue_changed = (command_queue_version != _command_queue_version);
+    if (command_queue_changed) {
+        const auto command_queue_state = get_command_queue_state_snapshot();
+        if (command_queue_state.version != _command_queue_version) {
+            _command_queue = CommandQueuePool::getInstance().getCommandQueue(_init_structs, command_queue_state.desc);
+            _command_queue_version = command_queue_state.version;
+
+            if (_sync_output_with_fences) {
+                for (size_t i = 0; i < _fences.size(); i++) {
+                    _fences[i] = std::make_unique<Fence>(_command_queue);
+                }
+            }
+        }
+    }
+
     for (size_t i = 0; i < _command_lists.size(); ++i) {
         _command_lists.at(i)->close();
         // Emit a marker for pipeline::push() with the command list handle as the metadata
@@ -292,9 +325,9 @@ void Pipeline::push() {
                                 (uintptr_t)_command_lists.at(i)->handle());
         OV_ITT_TASK_CHAIN(ZERO_PIPELINE_IP_PUSH, itt::domains::LevelZeroBackend, "Pipeline", "push");
         if (_sync_output_with_fences) {
-            _graph->get_command_queue()->executeCommandList(*_command_lists.at(i), *_fences.at(i));
+            _command_queue->executeCommandList(*_command_lists.at(i), *_fences.at(i));
         } else {
-            _graph->get_command_queue()->executeCommandList(*_command_lists.at(i));
+            _command_queue->executeCommandList(*_command_lists.at(i));
         }
     }
 
 
@@ -48,9 +48,9 @@ class IGraph : public std::enable_shared_from_this<IGraph> {
 
     virtual void update_network_name(std::string_view name);
 
-    virtual const std::shared_ptr<CommandQueue>& get_command_queue() const;
-
-    virtual void set_workload_type(const ov::WorkloadType workloadType) const;
+    virtual CommandQueueDesc get_command_queue_desc() const;
+    virtual uint64_t get_command_queue_desc_version() const;
+    virtual void set_workload_type(const ov::WorkloadType workloadType);
 
     std::mutex& get_mutex() {
         return _initialize_mutex;
 
@@ -50,11 +50,15 @@ void IGraph::update_network_name(std::string_view) {
     OPENVINO_THROW("update_network_name not implemented");
 }
 
-const std::shared_ptr<CommandQueue>& IGraph::get_command_queue() const {
-    OPENVINO_THROW("get_command_queue not implemented");
+CommandQueueDesc IGraph::get_command_queue_desc() const {
+    OPENVINO_THROW("get_command_queue_desc not implemented");
 }
 
-void IGraph::set_workload_type(const ov::WorkloadType) const {
+uint64_t IGraph::get_command_queue_desc_version() const {
+    OPENVINO_THROW("get_command_queue_desc_version not implemented");
+}
+
+void IGraph::set_workload_type(const ov::WorkloadType) {
     OPENVINO_THROW("set_workload_type not implemented");
 }
 
 
@@ -4,6 +4,9 @@
 
 #pragma once
 
+#include <atomic>
+#include <mutex>
+
 #include <ze_graph_ext.h>
 
 #include "intel_npu/common/idynamic_graph.hpp"
@@ -125,9 +128,9 @@ class DynamicGraph final : public IDynamicGraph {
 
     void update_network_name(std::string_view name) override;
 
-    const std::shared_ptr<CommandQueue>& get_command_queue() const override;
-
-    void set_workload_type(const ov::WorkloadType workloadType) const override;
+    CommandQueueDesc get_command_queue_desc() const override;
+    uint64_t get_command_queue_desc_version() const override;
+    void set_workload_type(const ov::WorkloadType workloadType) override;
 
     void set_batch_size(std::size_t batch) override;
 
@@ -170,8 +173,9 @@ class DynamicGraph final : public IDynamicGraph {
      */
     uint64_t _num_of_subgraphs = 1;
 
-    mutable std::mutex _commandQueueMutex;
-    std::shared_ptr<CommandQueue> _commandQueue;
+    mutable std::mutex _commandQueueDescMutex;
+    std::atomic<uint64_t> _commandQueueVersion{0};
+    CommandQueueDesc _commandQueueDesc;
     std::vector<std::shared_ptr<Event>> _lastSubmittedEvent;
 
     std::optional<ov::Tensor> _blob;
 
@@ -6,6 +6,9 @@
 
 #pragma once
 
+#include <atomic>
+#include <mutex>
+
 #include <ze_graph_ext.h>
 
 #include "intel_npu/common/igraph.hpp"
@@ -40,9 +43,9 @@ class Graph : public IGraph {
 
     void update_network_name(std::string_view name) override;
 
-    const std::shared_ptr<CommandQueue>& get_command_queue() const override;
-
-    void set_workload_type(const ov::WorkloadType workloadType) const override;
+    CommandQueueDesc get_command_queue_desc() const override;
+    uint64_t get_command_queue_desc_version() const override;
+    void set_workload_type(const ov::WorkloadType workloadType) override;
 
     void set_last_submitted_event(const std::shared_ptr<Event>& event, size_t indexOfCommandList) override;
     const std::shared_ptr<Event>& get_last_submitted_event(size_t indexOfCommandList) const override;
@@ -72,8 +75,9 @@ class Graph : public IGraph {
     GraphDescriptor _graphDesc;
     NetworkMetadata _metadata;
 
-    mutable std::mutex _commandQueueMutex;
-    std::shared_ptr<CommandQueue> _commandQueue;
+    mutable std::mutex _commandQueueDescMutex;
+    std::atomic<uint64_t> _commandQueueVersion{0};
+    CommandQueueDesc _commandQueueDesc;
     std::vector<std::shared_ptr<Event>> _lastSubmittedEvent;
 
     std::optional<ov::Tensor> _blob;
 
@@ -489,29 +489,19 @@ void DynamicGraph::update_network_name(std::string_view name) {
     _metadata.name = name;
 }
 
-const std::shared_ptr<CommandQueue>& DynamicGraph::get_command_queue() const {
-    return _commandQueue;
+CommandQueueDesc DynamicGraph::get_command_queue_desc() const {
+    std::lock_guard<std::mutex> lock(_commandQueueDescMutex);
+    return _commandQueueDesc;
 }
 
-void DynamicGraph::set_workload_type(const ov::WorkloadType workloadType) const {
-    std::lock_guard<std::mutex> lock(_commandQueueMutex);
-    if (_commandQueue == nullptr) {
-        return;
-    }
-
-    ze_command_queue_workload_type_t zeWorkloadType;
-    switch (workloadType) {
-    case ov::WorkloadType::DEFAULT:
-        zeWorkloadType = ze_command_queue_workload_type_t::ZE_WORKLOAD_TYPE_DEFAULT;
-        break;
-    case ov::WorkloadType::EFFICIENT:
-        zeWorkloadType = ze_command_queue_workload_type_t::ZE_WORKLOAD_TYPE_BACKGROUND;
-        break;
-    default:
-        OPENVINO_THROW("Unknown value for WorkloadType!");
-    }
+uint64_t DynamicGraph::get_command_queue_desc_version() const {
+    return _commandQueueVersion.load(std::memory_order_acquire);
+}
 
-    _commandQueue->setWorkloadType(zeWorkloadType);
+void DynamicGraph::set_workload_type(const ov::WorkloadType workloadType) {
+    std::lock_guard<std::mutex> lock(_commandQueueDescMutex);
+    _commandQueueDesc.workload = zeroUtils::toZeQueueWorkloadType(workloadType);
+    _commandQueueVersion.fetch_add(1, std::memory_order_release);
 }
 
 void DynamicGraph::set_argument_value(uint32_t argi, const void* argv) const {
@@ -554,34 +544,37 @@ void DynamicGraph::initialize_impl(const FilteredConfig& config) {
         return;
     }
 
-    if (_commandQueue == nullptr) {
-        _logger.debug("Graph initialize without graph handle");
+    _logger.debug("Graph initialize without graph handle");
 
-        uint32_t commandQueueOptions = 0;
-        if (config.has<TURBO>() && config.get<TURBO>()) {
-            OPENVINO_ASSERT(_zeroInitStruct->getCommandQueueDdiTable().version() >= ZE_MAKE_VERSION(1, 0),
-                            "Turbo is not supported by the current driver");
-            commandQueueOptions = commandQueueOptions | ZE_NPU_COMMAND_QUEUE_OPTION_TURBO;
-        }
-        OPENVINO_ASSERT(!(_zeroInitStruct->getCommandQueueDdiTable().version() < ZE_MAKE_VERSION(1, 1) &&
-                          config.has<RUN_INFERENCES_SEQUENTIALLY>() && config.get<RUN_INFERENCES_SEQUENTIALLY>()),
-                        "Running inferences sequentially is not supported by the current driver");
-
-        {
-            std::lock_guard<std::mutex> lock(_commandQueueMutex);
-            _commandQueue = std::make_shared<CommandQueue>(_zeroInitStruct,
-                                                           zeroUtils::toZeQueuePriority(config.get<MODEL_PRIORITY>()),
-                                                           commandQueueOptions);
-        }
+    uint32_t commandQueueOptions = 0;
+    if (config.has<TURBO>() && config.get<TURBO>()) {
+        OPENVINO_ASSERT(_zeroInitStruct->getCommandQueueDdiTable().version() >= ZE_MAKE_VERSION(1, 0),
+                        "Turbo is not supported by the current driver");
+        commandQueueOptions = commandQueueOptions | ZE_NPU_COMMAND_QUEUE_OPTION_TURBO;
+    }
+    OPENVINO_ASSERT(!(_zeroInitStruct->getCommandQueueDdiTable().version() < ZE_MAKE_VERSION(1, 1) &&
+                      config.has<RUN_INFERENCES_SEQUENTIALLY>() && config.get<RUN_INFERENCES_SEQUENTIALLY>()),
+                    "Running inferences sequentially is not supported by the current driver");
 
-        if (config.has<WORKLOAD_TYPE>()) {
-            set_workload_type(config.get<WORKLOAD_TYPE>());
-        }
+    {
+        std::lock_guard<std::mutex> lock(_commandQueueDescMutex);
+        _commandQueueDesc = CommandQueueDesc{
+            zeroUtils::toZeQueuePriority(config.get<MODEL_PRIORITY>()),
+            zeroUtils::toZeQueueWorkloadType(config.has<WORKLOAD_TYPE>()
+                                                 ? std::optional<ov::WorkloadType>{config.get<WORKLOAD_TYPE>()}
+                                                 : std::nullopt),
+            commandQueueOptions,
+            this};
+        _commandQueueVersion.fetch_add(1, std::memory_order_release);
+    }
 
-        _logger.debug("Graph initialize finish");
+    _logger.debug("Graph initialize finish");
 
-        _batchSize = determine_batch_size();
-    }
+    _batchSize = determine_batch_size();
+
+    OPENVINO_ASSERT(_zeroInitStruct->getCommandQueueDdiTable().version() >= ZE_MAKE_VERSION(1, 1) ||
+                        !config.get<RUN_INFERENCES_SEQUENTIALLY>(),
+                    "Running inferences sequentially is not supported by the current driver");
 
     // To ensure that the initialization of the graph does not exit prematurely due to nullptrs
     _init_completed.store(true, std::memory_order_release);
@@ -668,10 +661,6 @@ DynamicGraph::~DynamicGraph() {
     if (!_lastSubmittedEvent.empty()) {
         _lastSubmittedEvent.clear();
     }
-
-    if (_commandQueue != nullptr) {
-        _commandQueue.reset();
-    }
 }
 
 void DynamicGraph::execute(const std::shared_ptr<ZeroInitStructsHolder>& zeroInitStruct,
Original file line number	Diff line number	Diff line change
`@@ -50,11 +50,15 @@ void IGraph::update_network_name(std::string_view) {`
`50`	`50`	`OPENVINO_THROW("update_network_name not implemented");`
`51`	`51`	`}`
`52`	`52`
`53`		`-const std::shared_ptr<CommandQueue>& IGraph::get_command_queue() const {`
`54`		`- OPENVINO_THROW("get_command_queue not implemented");`
	`53`	`+CommandQueueDesc IGraph::get_command_queue_desc() const {`
	`54`	`+ OPENVINO_THROW("get_command_queue_desc not implemented");`
`55`	`55`	`}`
`56`	`56`
`57`		`-void IGraph::set_workload_type(const ov::WorkloadType) const {`
	`57`	`+uint64_t IGraph::get_command_queue_desc_version() const {`
	`58`	`+ OPENVINO_THROW("get_command_queue_desc_version not implemented");`
	`59`	`+}`
	`60`	`+`
	`61`	`+void IGraph::set_workload_type(const ov::WorkloadType) {`
`58`	`62`	`OPENVINO_THROW("set_workload_type not implemented");`
`59`	`63`	`}`
`60`	`64`