Skip to content

Commit 657d60a

Browse files
[NPU] Disable idle pruning if property is set (#33364)
### Details: - *Disable idle pruning if property is set* ### Tickets: - *CVS-178801* --------- Signed-off-by: Bogdan Pereanu <bogdan.pereanu@intel.com> Signed-off-by: alexandruenache1111 <alexandru.enache@intel.com> Co-authored-by: alexandruenache1111 <alexandru.enache@intel.com>
1 parent 7859e26 commit 657d60a

File tree

19 files changed

+271
-85
lines changed

19 files changed

+271
-85
lines changed

docs/articles_en/openvino-workflow/running-inference/inference-devices-and-modes/npu-device.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -152,6 +152,7 @@ offer a limited set of supported OpenVINO features.
152152
ov::intel_npu::bypass_umd_caching
153153
ov::intel_npu::defer_weights_load
154154
ov::intel_npu::run_inferences_sequentially
155+
ov::intel_npu::disable_idle_memory_prunning
155156
156157
.. tab-item:: Read-only properties
157158

src/bindings/python/src/pyopenvino/core/properties/properties.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -351,4 +351,5 @@ void regmodule_properties(py::module m) {
351351
wrap_property_RW(m_intel_npu, ov::intel_npu::qdq_optimization, "qdq_optimization");
352352
wrap_property_RW(m_intel_npu, ov::intel_npu::qdq_optimization_aggressive, "qdq_optimization_aggressive");
353353
wrap_property_RW(m_intel_npu, ov::intel_npu::run_inferences_sequentially, "run_inferences_sequentially");
354+
wrap_property_RW(m_intel_npu, ov::intel_npu::disable_idle_memory_prunning, "disable_idle_memory_prunning");
354355
}

src/bindings/python/tests/test_runtime/test_properties.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -496,6 +496,11 @@ def test_properties_ro(ov_property_ro, expected_value):
496496
"NPU_QDQ_OPTIMIZATION_AGGRESSIVE",
497497
((True, True),),
498498
),
499+
(
500+
intel_npu.disable_idle_memory_prunning,
501+
"NPU_DISABLE_IDLE_MEMORY_PRUNING",
502+
((True, True),),
503+
),
499504
(props.enable_weightless, "ENABLE_WEIGHTLESS", ((True, True), (False, False))),
500505
],
501506
)

src/inference/include/openvino/runtime/intel_npu/properties.hpp

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -148,5 +148,13 @@ static constexpr ov::Property<bool> defer_weights_load{"NPU_DEFER_WEIGHTS_LOAD"}
148148
*/
149149
static constexpr ov::Property<bool> run_inferences_sequentially{"NPU_RUN_INFERENCES_SEQUENTIALLY"};
150150

151+
/**
152+
* @brief [Only for NPU Plugin]
153+
* Type: boolean, default is false.
154+
* This option allows to disable pruning of memory during idle time.
155+
* @ingroup ov_runtime_npu_prop_cpp_api
156+
*/
157+
static constexpr ov::Property<bool> disable_idle_memory_prunning{"NPU_DISABLE_IDLE_MEMORY_PRUNING"};
158+
151159
} // namespace intel_npu
152160
} // namespace ov

src/plugins/intel_npu/README.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -207,6 +207,7 @@ The following properties are supported (may differ based on current system confi
207207
| `ov::intel_npu::bypass_umd_caching`/</br>`NPU_BYPASS_UMD_CACHING` | RW | Bypass the caching of compiled models in UMD. | `YES`/ `NO`| `NO` |
208208
| `ov::intel_npu::defer_weights_load`/</br>`NPU_DEFER_WEIGHTS_LOAD` | RW | Delay loading the weights until inference is created. | `YES`/ `NO`| `NO` |
209209
| `ov::intel_npu::run_inferences_sequentially`/</br>`NPU_RUN_INFERENCES_SEQUENTIALLY` | RW | Run inferences in async mode sequentially in the order in which they are started to optimize host scheduling. | `YES`/ `NO`| `NO` |
210+
| `ov::intel_npu::disable_idle_memory_prunning`/</br>`NPU_DISABLE_IDLE_MEMORY_PRUNING` | RW | Enable/Disable pruning of memory during idle time. | `YES` / `NO` | `NO` |
210211
<br>
211212

212213
### Compiled_model properties VS Plugin properties

src/plugins/intel_npu/src/al/include/intel_npu/config/options.hpp

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1491,4 +1491,22 @@ struct ENABLE_STRIDES_FOR final : OptionBase<ENABLE_STRIDES_FOR, std::string> {
14911491
}
14921492
};
14931493

1494+
struct DISABLE_IDLE_MEMORY_PRUNING final : OptionBase<DISABLE_IDLE_MEMORY_PRUNING, bool> {
1495+
static std::string_view key() {
1496+
return ov::intel_npu::disable_idle_memory_prunning.name();
1497+
}
1498+
1499+
static bool defaultValue() {
1500+
return false;
1501+
}
1502+
1503+
static OptionMode mode() {
1504+
return OptionMode::RunTime;
1505+
}
1506+
1507+
static bool isPublic() {
1508+
return true;
1509+
}
1510+
};
1511+
14941512
} // namespace intel_npu

src/plugins/intel_npu/src/backend/include/zero_backend.hpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -28,10 +28,11 @@ class ZeroEngineBackend final : public IEngineBackend {
2828

2929
bool isCommandQueueExtSupported() const override;
3030
bool isLUIDExtSupported() const override;
31+
bool isContextExtSupported() const override;
3132

3233
void* getContext() const override;
3334

34-
void updateInfo(const Config& config) override;
35+
void updateInfo(const ov::AnyMap& properties) override;
3536

3637
const std::shared_ptr<ZeroInitStructsHolder> getInitStructs() const override;
3738

src/plugins/intel_npu/src/backend/include/zero_device.hpp

Lines changed: 11 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -34,9 +34,8 @@ class ZeroDevice : public IDevice {
3434

3535
std::shared_ptr<SyncInferRequest> createInferRequest(const std::shared_ptr<const ICompiledModel>& compiledModel,
3636
const Config& config) override;
37-
void updateInfo(const Config& config) override {
38-
log.setLevel(config.get<LOG_LEVEL>());
39-
}
37+
38+
void updateInfo(const ov::AnyMap& properties) override;
4039

4140
ZeroDevice& operator=(const ZeroDevice&) = delete;
4241
ZeroDevice(const ZeroDevice&) = delete;
@@ -46,18 +45,15 @@ class ZeroDevice : public IDevice {
4645
private:
4746
const std::shared_ptr<ZeroInitStructsHolder> _initStructs;
4847

49-
ze_device_properties_t device_properties = {};
50-
51-
ze_pci_ext_properties_t pci_properties = {};
52-
53-
ze_device_luid_ext_properties_t device_luid = {};
54-
55-
std::map<ov::element::Type, float> device_gops = {{ov::element::f32, 0.f},
56-
{ov::element::f16, 0.f},
57-
{ov::element::bf16, 0.f},
58-
{ov::element::u8, 0.f},
59-
{ov::element::i8, 0.f}};
48+
ze_device_properties_t _device_properties = {};
49+
ze_pci_ext_properties_t _pci_properties = {};
50+
ze_device_luid_ext_properties_t _device_luid = {};
51+
std::map<ov::element::Type, float> _device_gops = {{ov::element::f32, 0.f},
52+
{ov::element::f16, 0.f},
53+
{ov::element::bf16, 0.f},
54+
{ov::element::u8, 0.f},
55+
{ov::element::i8, 0.f}};
6056

61-
Logger log;
57+
Logger _log;
6258
};
6359
} // namespace intel_npu

src/plugins/intel_npu/src/backend/src/zero_backend.cpp

Lines changed: 18 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,10 @@ bool ZeroEngineBackend::isLUIDExtSupported() const {
3737
return _initStruct->isExtensionSupported(std::string(ZE_DEVICE_LUID_EXT_NAME), ZE_MAKE_VERSION(1, 0));
3838
}
3939

40+
bool ZeroEngineBackend::isContextExtSupported() const {
41+
return _initStruct->isExtensionSupported(std::string(ZE_CONTEXT_NPU_EXT_NAME), ZE_MAKE_VERSION(1, 0));
42+
}
43+
4044
const std::shared_ptr<IDevice> ZeroEngineBackend::getDevice() const {
4145
if (_devices.empty()) {
4246
_logger.debug("ZeroEngineBackend - getDevice() returning empty list");
@@ -130,11 +134,22 @@ void* ZeroEngineBackend::getContext() const {
130134
return _initStruct->getContext();
131135
}
132136

133-
void ZeroEngineBackend::updateInfo(const Config& config) {
134-
_logger.setLevel(config.get<LOG_LEVEL>());
137+
void ZeroEngineBackend::updateInfo(const ov::AnyMap& properties) {
138+
if (properties.count(ov::log::level.name()) != 0) {
139+
_logger.setLevel(properties.at(ov::log::level.name()).as<ov::log::Level>());
140+
}
141+
142+
if (properties.count(ov::intel_npu::disable_idle_memory_prunning.name()) != 0) {
143+
if (properties.at(ov::intel_npu::disable_idle_memory_prunning.name()).as<bool>()) {
144+
_initStruct->clearContextOptions(ZE_NPU_CONTEXT_OPTION_IDLE_OPTIMIZATIONS);
145+
} else {
146+
_initStruct->setContextOptions(ZE_NPU_CONTEXT_OPTION_IDLE_OPTIMIZATIONS);
147+
}
148+
}
149+
135150
if (_devices.size() > 0) {
136151
for (auto& dev : _devices) {
137-
dev.second->updateInfo(config);
152+
dev.second->updateInfo(properties);
138153
}
139154
}
140155
}

src/plugins/intel_npu/src/backend/src/zero_device.cpp

Lines changed: 37 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -13,35 +13,35 @@ using namespace intel_npu;
1313

1414
ZeroDevice::ZeroDevice(const std::shared_ptr<ZeroInitStructsHolder>& initStructs)
1515
: _initStructs(initStructs),
16-
log("ZeroDevice", Logger::global().level()) {
17-
log.debug("ZeroDevice::ZeroDevice init");
18-
device_properties.stype = ZE_STRUCTURE_TYPE_DEVICE_PROPERTIES;
16+
_log("ZeroDevice", Logger::global().level()) {
17+
_log.debug("ZeroDevice::ZeroDevice init");
18+
_device_properties.stype = ZE_STRUCTURE_TYPE_DEVICE_PROPERTIES;
1919

2020
// Get LUID info, if supported
2121
if (_initStructs->isExtensionSupported(std::string(ZE_DEVICE_LUID_EXT_NAME), ZE_MAKE_VERSION(1, 0))) {
22-
device_luid.stype = ZE_STRUCTURE_TYPE_DEVICE_LUID_EXT_PROPERTIES;
23-
device_properties.pNext = &device_luid;
22+
_device_luid.stype = ZE_STRUCTURE_TYPE_DEVICE_LUID_EXT_PROPERTIES;
23+
_device_properties.pNext = &_device_luid;
2424
}
2525
THROW_ON_FAIL_FOR_LEVELZERO("zeDeviceGetProperties",
26-
zeDeviceGetProperties(_initStructs->getDevice(), &device_properties));
26+
zeDeviceGetProperties(_initStructs->getDevice(), &_device_properties));
2727

2828
// Query PCI information
2929
// Older drivers do not have this implementend. Linux driver returns NOT_IMPLEMENTED, while windows driver returns
3030
// zero values. If this is detected, we populate only device with ID from device_properties for backwards
3131
// compatibility. For any other error, we just fall-back to device ID to assure backwards compatibilty with even
3232
// older drivers
33-
pci_properties.stype = ZE_STRUCTURE_TYPE_PCI_EXT_PROPERTIES;
34-
ze_result_t retpci = zeDevicePciGetPropertiesExt(_initStructs->getDevice(), &pci_properties);
33+
_pci_properties.stype = ZE_STRUCTURE_TYPE_PCI_EXT_PROPERTIES;
34+
ze_result_t retpci = zeDevicePciGetPropertiesExt(_initStructs->getDevice(), &_pci_properties);
3535
if (ZE_RESULT_SUCCESS == retpci) {
3636
// windows driver specific backwards compatibility
37-
if (pci_properties.address.device == 0) {
38-
log.warning("PCI information not available in driver. Falling back to deviceId");
39-
pci_properties.address.device = device_properties.deviceId;
37+
if (_pci_properties.address.device == 0) {
38+
_log.warning("PCI information not available in driver. Falling back to deviceId");
39+
_pci_properties.address.device = _device_properties.deviceId;
4040
}
4141
} else {
4242
// general backwards compatibility
43-
log.warning("PCI information not available in driver. Falling back to deviceId");
44-
pci_properties.address.device = device_properties.deviceId;
43+
_log.warning("PCI information not available in driver. Falling back to deviceId");
44+
_pci_properties.address.device = _device_properties.deviceId;
4545
}
4646

4747
/// Calculate and store device GOPS with formula: frequency * number of tiles * ops per tile
@@ -53,12 +53,12 @@ ZeroDevice::ZeroDevice(const std::shared_ptr<ZeroInitStructsHolder>& initStructs
5353
gops_support_drv_version = 1715354569; /// Linux driver version which supports Gops calculations
5454
#endif // _WIN32 || __CYGWIN__
5555
if (_initStructs->getDriverVersion() >= gops_support_drv_version) {
56-
float gops = (device_properties.coreClockRate / powf(1000, 3)) * device_properties.numSlices *
57-
device_properties.physicalEUSimdWidth;
58-
device_gops[ov::element::f32] = 0;
59-
device_gops[ov::element::u8] = gops;
60-
device_gops[ov::element::i8] = gops;
61-
device_gops[ov::element::f16] = 0.5f * gops;
56+
float gops = (_device_properties.coreClockRate / powf(1000, 3)) * _device_properties.numSlices *
57+
_device_properties.physicalEUSimdWidth;
58+
_device_gops[ov::element::f32] = 0;
59+
_device_gops[ov::element::u8] = gops;
60+
_device_gops[ov::element::i8] = gops;
61+
_device_gops[ov::element::f16] = 0.5f * gops;
6262
}
6363
}
6464

@@ -69,7 +69,7 @@ std::string ZeroDevice::getName() const {
6969
#define NPU_4000_DEVICE_ID 0x643E
7070

7171
std::string name;
72-
switch (device_properties.deviceId) {
72+
switch (_device_properties.deviceId) {
7373
case NPU_3720_P_DEVICE_ID:
7474
case NPU_3720_S_DEVICE_ID:
7575
name = ov::intel_npu::Platform::NPU3720;
@@ -85,15 +85,15 @@ std::string ZeroDevice::getName() const {
8585
}
8686

8787
std::string ZeroDevice::getFullDeviceName() const {
88-
return device_properties.name;
88+
return _device_properties.name;
8989
}
9090

9191
IDevice::Uuid ZeroDevice::getUuid() const {
9292
Uuid uuid{};
93-
static_assert(sizeof(device_properties.uuid.id) == uuid.uuid.size(),
93+
static_assert(sizeof(_device_properties.uuid.id) == uuid.uuid.size(),
9494
"ze_device_uuid_t::id size doesn't match intel_npu::Uuid::uuid size");
9595

96-
std::copy(std::begin(device_properties.uuid.id), std::end(device_properties.uuid.id), std::begin(uuid.uuid));
96+
std::copy(std::begin(_device_properties.uuid.id), std::end(_device_properties.uuid.id), std::begin(uuid.uuid));
9797

9898
return uuid;
9999
}
@@ -103,17 +103,17 @@ ov::device::LUID ZeroDevice::getLUID() const {
103103
// incompatibility check
104104
static_assert(ZE_MAX_DEVICE_LUID_SIZE_EXT == ov::device::LUID::MAX_LUID_SIZE, "LUID size mismatch");
105105
for (int i = 0; i < ZE_MAX_DEVICE_LUID_SIZE_EXT; i++) {
106-
luidstruct.luid[i] = device_luid.luid.id[i];
106+
luidstruct.luid[i] = _device_luid.luid.id[i];
107107
}
108108
return luidstruct;
109109
}
110110

111111
uint32_t ZeroDevice::getSubDevId() const {
112-
return device_properties.subdeviceId;
112+
return _device_properties.subdeviceId;
113113
}
114114

115115
uint32_t ZeroDevice::getMaxNumSlices() const {
116-
return device_properties.numSlices;
116+
return _device_properties.numSlices;
117117
}
118118

119119
uint64_t ZeroDevice::getAllocMemSize() const {
@@ -155,14 +155,14 @@ uint64_t ZeroDevice::getTotalMemSize() const {
155155
}
156156

157157
ov::device::PCIInfo ZeroDevice::getPciInfo() const {
158-
return ov::device::PCIInfo{pci_properties.address.domain,
159-
pci_properties.address.bus,
160-
pci_properties.address.device,
161-
pci_properties.address.function};
158+
return ov::device::PCIInfo{_pci_properties.address.domain,
159+
_pci_properties.address.bus,
160+
_pci_properties.address.device,
161+
_pci_properties.address.function};
162162
}
163163

164164
std::map<ov::element::Type, float> ZeroDevice::getGops() const {
165-
return device_gops;
165+
return _device_gops;
166166
}
167167

168168
ov::device::Type ZeroDevice::getDeviceType() const {
@@ -174,3 +174,9 @@ std::shared_ptr<SyncInferRequest> ZeroDevice::createInferRequest(
174174
const Config& config) {
175175
return std::make_shared<ZeroInferRequest>(_initStructs, compiledModel, config);
176176
}
177+
178+
void ZeroDevice::updateInfo(const ov::AnyMap& properties) {
179+
if (properties.count(ov::log::level.name()) != 0) {
180+
_log.setLevel(properties.at(ov::log::level.name()).as<ov::log::Level>());
181+
}
182+
}

0 commit comments

Comments
 (0)