Skip to content

Commit 80e83d3

Browse files
[NPUW]Add MOE caching properties to NPU plugin (#34648)
### Details: Add the following NPUW MOE-related properties to _cachingProperties: - NPUW_MOE_TOKEN_CHUNK_SIZE - NPUW_MOE_POOL_SIZE - NPUW_LLM_PREFILL_MOE_HINT - NPUW_LLM_GENERATE_MOE_HINT These properties affect model compilation and need to be part of the cache key. ### Tickets: - *ticket-id* ### AI Assistance: - *AI assistance used: no / yes* - *If yes, summarize how AI was used and what human validation was performed (build/tests/manual checks).* --------- Signed-off-by: intelgaoxiong <xiong.gao@intel.com>
1 parent 2ad8a3b commit 80e83d3

File tree

4 files changed

+11
-1
lines changed

4 files changed

+11
-1
lines changed

src/plugins/intel_npu/src/plugin/include/properties.hpp

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -182,6 +182,8 @@ class Properties final {
182182
ov::intel_npu::npuw::partitioning::dcoff_type.name(),
183183
ov::intel_npu::npuw::partitioning::dcoff_with_scale.name(),
184184
ov::intel_npu::npuw::partitioning::funcall_for_all.name(),
185+
ov::intel_npu::npuw::partitioning::moe_token_chunk_size.name(),
186+
ov::intel_npu::npuw::partitioning::moe_pool_size.name(),
185187
ov::intel_npu::npuw::funcall_async.name(),
186188
ov::intel_npu::npuw::unfold_ireqs.name(),
187189
ov::intel_npu::npuw::fallback_exec.name(),
@@ -211,7 +213,9 @@ class Properties final {
211213
ov::intel_npu::npuw::llm::shared_lm_head_config.name(),
212214
ov::intel_npu::npuw::llm::additional_shared_lm_head_config.name(),
213215
ov::intel_npu::npuw::llm::optimize_fp8.name(),
214-
ov::intel_npu::npuw::eagle::enabled.name()};
216+
ov::intel_npu::npuw::eagle::enabled.name(),
217+
ov::intel_npu::npuw::llm::prefill_moe_hint.name(),
218+
ov::intel_npu::npuw::llm::generate_moe_hint.name()};
215219

216220
const std::vector<ov::PropertyName> _internalSupportedProperties = {ov::internal::caching_properties.name(),
217221
ov::internal::caching_with_mmap.name(),

src/plugins/intel_npu/src/plugin/src/plugin.cpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -340,6 +340,8 @@ void init_config(const IEngineBackend* backend, OptionsDesc& options, FilteredCo
340340
REGISTER_OPTION(NPUW_KOKORO);
341341
REGISTER_OPTION(NPUW_KOKORO_BLOCK_SIZE);
342342
REGISTER_OPTION(NPUW_KOKORO_OVERLAP_SIZE);
343+
REGISTER_OPTION(NPUW_MOE_TOKEN_CHUNK_SIZE);
344+
REGISTER_OPTION(NPUW_MOE_POOL_SIZE);
343345

344346
config.enableRuntimeOptions();
345347

src/plugins/intel_npu/src/plugin/src/properties.cpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -659,6 +659,8 @@ void Properties::registerPluginProperties() {
659659
TRY_REGISTER_SIMPLE_PROPERTY(ov::intel_npu::npuw::kokoro::enabled, NPUW_KOKORO);
660660
TRY_REGISTER_SIMPLE_PROPERTY(ov::intel_npu::npuw::kokoro::block_size, NPUW_KOKORO_BLOCK_SIZE);
661661
TRY_REGISTER_SIMPLE_PROPERTY(ov::intel_npu::npuw::kokoro::overlap_size, NPUW_KOKORO_OVERLAP_SIZE);
662+
TRY_REGISTER_SIMPLE_PROPERTY(ov::intel_npu::npuw::partitioning::moe_token_chunk_size, NPUW_MOE_TOKEN_CHUNK_SIZE);
663+
TRY_REGISTER_SIMPLE_PROPERTY(ov::intel_npu::npuw::partitioning::moe_pool_size, NPUW_MOE_POOL_SIZE);
662664

663665
// 2. Metrics (static device and enviroment properties)
664666
// ========

src/plugins/intel_npu/tests/functional/internal/plugin/test_properties.hpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -247,6 +247,8 @@ class PropertiesManagerTests : public ov::test::behavior::OVPluginTestBase,
247247
REGISTER_OPTION(NPUW_KOKORO);
248248
REGISTER_OPTION(NPUW_KOKORO_BLOCK_SIZE);
249249
REGISTER_OPTION(NPUW_KOKORO_OVERLAP_SIZE);
250+
REGISTER_OPTION(NPUW_MOE_TOKEN_CHUNK_SIZE);
251+
REGISTER_OPTION(NPUW_MOE_POOL_SIZE);
250252

251253
npu_config.enableRuntimeOptions();
252254

0 commit comments

Comments
 (0)