Fix whisper decoder config

eshiryae · eshiryae · commit 217d973e248c · 2025-08-11T11:54:31.000+01:00
diff --git a/src/cpp/src/utils.cpp b/src/cpp/src/utils.cpp
@@ -98,13 +98,18 @@ void update_npu_config_whisper(ov::AnyMap& config,
                                const ov::genai::utils::KVDesc& kv_desc) {
     update_config(config, {"NPU_USE_NPUW", "YES"});
     update_config(config, {"NPUW_ONLINE_PIPELINE", "NONE"});
+    update_config(config, {"NPUW_FUNCALL_FOR_ALL", "NO"});
+    update_config(config, {"NPUW_FOLD", "NO"});
     update_config(config, {"NPUW_LLM", "YES"});
 
     update_config(config, {"NPUW_LLM_BATCH_DIM", kv_pos.batch});
     update_config(config, {"NPUW_LLM_SEQ_LEN_DIM", kv_pos.seq_len});
 
     update_config(config, {"NPUW_LLM_MAX_PROMPT_LEN", kv_desc.max_prompt_len});
     update_config(config, {"NPUW_LLM_MIN_RESPONSE_LEN", kv_desc.min_response_len});
+
+    // To disable chunking
+    update_config(config, {"NPUW_LLM_PREFILL_HINT", "STATIC"});
 }
 
 inline bool is_paged_attention_available() {
diff --git a/src/cpp/src/whisper/pipeline.cpp b/src/cpp/src/whisper/pipeline.cpp
@@ -80,7 +80,7 @@ class WhisperPipeline::WhisperPipelineStatefulImpl : public WhisperPipeline::Whi
             auto encoder_model = core.read_model(models_path / "openvino_encoder_model.xml", {}, properties);
             // NB: only batch_size == 1 is supported now for NPU
             reshape_to_static_encoder(encoder_model, m_feature_extractor.feature_size);
-            compiled_model = core.compile_model(encoder_model, "NPU");
+            compiled_model = core.compile_model(encoder_model, "NPU", properties);
         } else {
             compiled_model = core.compile_model(models_path / "openvino_encoder_model.xml", device, properties);
         }

Original file line number	Diff line number	Diff line change
`@@ -80,7 +80,7 @@ class WhisperPipeline::WhisperPipelineStatefulImpl : public WhisperPipeline::Whi`
`80`	`80`	`auto encoder_model = core.read_model(models_path / "openvino_encoder_model.xml", {}, properties);`
`81`	`81`	`// NB: only batch_size == 1 is supported now for NPU`
`82`	`82`	`reshape_to_static_encoder(encoder_model, m_feature_extractor.feature_size);`
`83`		`- compiled_model = core.compile_model(encoder_model, "NPU");`
	`83`	`+ compiled_model = core.compile_model(encoder_model, "NPU", properties);`
`84`	`84`	`} else {`
`85`	`85`	`compiled_model = core.compile_model(models_path / "openvino_encoder_model.xml", device, properties);`
`86`	`86`	`}`