File tree Expand file tree Collapse file tree 2 files changed +6
-1
lines changed Expand file tree Collapse file tree 2 files changed +6
-1
lines changed Original file line number Diff line number Diff line change @@ -101,13 +101,18 @@ void update_npu_config_whisper(ov::AnyMap& config,
101101 const ov::genai::utils::KVDesc& kv_desc) {
102102 update_config (config, {" NPU_USE_NPUW" , " YES" });
103103 update_config (config, {" NPUW_ONLINE_PIPELINE" , " NONE" });
104+ update_config (config, {" NPUW_FUNCALL_FOR_ALL" , " NO" });
105+ update_config (config, {" NPUW_FOLD" , " NO" });
104106 update_config (config, {" NPUW_LLM" , " YES" });
105107
106108 update_config (config, {" NPUW_LLM_BATCH_DIM" , kv_pos.batch });
107109 update_config (config, {" NPUW_LLM_SEQ_LEN_DIM" , kv_pos.seq_len });
108110
109111 update_config (config, {" NPUW_LLM_MAX_PROMPT_LEN" , kv_desc.max_prompt_len });
110112 update_config (config, {" NPUW_LLM_MIN_RESPONSE_LEN" , kv_desc.min_response_len });
113+
114+ // To disable chunking
115+ update_config (config, {" NPUW_LLM_PREFILL_HINT" , " STATIC" });
111116}
112117
113118inline bool is_paged_attention_available () {
Original file line number Diff line number Diff line change @@ -77,7 +77,7 @@ class WhisperPipeline::WhisperPipelineStatefulImpl : public WhisperPipeline::Whi
7777 auto encoder_model = core.read_model (models_path / " openvino_encoder_model.xml" , {}, properties);
7878 // NB: only batch_size == 1 is supported now for NPU
7979 reshape_to_static_encoder (encoder_model, m_feature_extractor.feature_size );
80- compiled_model = core.compile_model (encoder_model, " NPU" );
80+ compiled_model = core.compile_model (encoder_model, " NPU" , properties );
8181 } else {
8282 compiled_model = core.compile_model (models_path / " openvino_encoder_model.xml" , device, properties);
8383 }
You can’t perform that action at this time.
0 commit comments