We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
1 parent 5d3b52d commit 64256eeCopy full SHA for 64256ee
src/plugins/intel_npu/src/plugin/npuw/llm_infer_request.cpp
@@ -660,7 +660,6 @@ void ov::npuw::LLMInferRequest::infer_chunked_prefill(ov::SoPtr<ov::ITensor> inp
660
auto current_prompts_len = std::min(remaining_prompts, chunk_prompt_len);
661
662
m_llm_profile["1/prefill:3a.prepare_chunk"].record([&]() {
663
-
664
// Handle first chunk with prefix caching: populate attention mask for restored cache
665
if (enable_prefix_caching && cache_context.restore_prefix_cache) {
666
m_prefix_caching_helper->populate_attention_mask_for_restored_cache(attention_mask,
0 commit comments