File tree Expand file tree Collapse file tree 2 files changed +6
-1
lines changed Expand file tree Collapse file tree 2 files changed +6
-1
lines changed Original file line number Diff line number Diff line change @@ -98,13 +98,18 @@ void update_npu_config_whisper(ov::AnyMap& config,
9898 const ov::genai::utils::KVDesc& kv_desc) {
9999 update_config (config, {" NPU_USE_NPUW" , " YES" });
100100 update_config (config, {" NPUW_ONLINE_PIPELINE" , " NONE" });
101+ update_config (config, {" NPUW_FUNCALL_FOR_ALL" , " NO" });
102+ update_config (config, {" NPUW_FOLD" , " NO" });
101103 update_config (config, {" NPUW_LLM" , " YES" });
102104
103105 update_config (config, {" NPUW_LLM_BATCH_DIM" , kv_pos.batch });
104106 update_config (config, {" NPUW_LLM_SEQ_LEN_DIM" , kv_pos.seq_len });
105107
106108 update_config (config, {" NPUW_LLM_MAX_PROMPT_LEN" , kv_desc.max_prompt_len });
107109 update_config (config, {" NPUW_LLM_MIN_RESPONSE_LEN" , kv_desc.min_response_len });
110+
111+ // To disable chunking
112+ update_config (config, {" NPUW_LLM_PREFILL_HINT" , " STATIC" });
108113}
109114
110115inline bool is_paged_attention_available () {
Original file line number Diff line number Diff line change @@ -80,7 +80,7 @@ class WhisperPipeline::WhisperPipelineStatefulImpl : public WhisperPipeline::Whi
8080 auto encoder_model = core.read_model (models_path / " openvino_encoder_model.xml" , {}, properties);
8181 // NB: only batch_size == 1 is supported now for NPU
8282 reshape_to_static_encoder (encoder_model, m_feature_extractor.feature_size );
83- compiled_model = core.compile_model (encoder_model, " NPU" );
83+ compiled_model = core.compile_model (encoder_model, " NPU" , properties );
8484 } else {
8585 compiled_model = core.compile_model (models_path / " openvino_encoder_model.xml" , device, properties);
8686 }
You can’t perform that action at this time.
0 commit comments