Skip to content

Commit b1b9bcb

Browse files
Info -> warning: Override performance mode to THROUGHPUT for compilation - review
1 parent 48f74cd commit b1b9bcb

File tree

1 file changed

+25
-10
lines changed

1 file changed

+25
-10
lines changed

src/plugins/intel_npu/src/plugin/src/plugin.cpp

Lines changed: 25 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -674,29 +674,44 @@ std::shared_ptr<ov::ICompiledModel> Plugin::compile_model(const std::shared_ptr<
674674
std::shared_ptr<intel_npu::IGraph> graph;
675675

676676
auto compileWithConfig = [&](auto&& modelToCompile, const auto& config) {
677-
if (!localConfig.get<WEIGHTLESS_BLOB>() && !localConfig.get<ENABLE_WEIGHTLESS>() ) {
677+
if (!localConfig.get<WEIGHTLESS_BLOB>() && !localConfig.get<ENABLE_WEIGHTLESS>()) {
678678
return compiler->compile(modelToCompile, config);
679679
} else {
680680
check_weightless_cache_attribute_occurrence(model);
681681
return compiler->compileWS(std::move(modelToCompile), config);
682682
}
683683
};
684684

685+
const bool performanceHintSetByUser = properties.find(ov::hint::performance_mode.name()) != properties.end();
686+
685687
try {
686688
_logger.debug("performing compile");
687689

688690
// Determine which model to use
689691
auto modelToCompile = successfullyDebatched ? batchedModel : model->clone();
690692

691-
if (successfullyDebatched && localConfig.get<PERFORMANCE_HINT>() == ov::hint::PerformanceMode::LATENCY) {
692-
_logger.warning("Plugin batching is enabled. Overriding performance mode to THROUGHPUT for compilation.");
693-
694-
auto modifiedConfig = localConfig; // Copy only when needed
695-
std::stringstream strStream;
696-
strStream << ov::hint::PerformanceMode::THROUGHPUT;
697-
modifiedConfig.update({{ov::hint::performance_mode.name(), strStream.str()}});
698-
699-
graph = compileWithConfig(std::move(modelToCompile), modifiedConfig);
693+
if (successfullyDebatched) {
694+
if (!performanceHintSetByUser) {
695+
_logger.info("Setting performance mode to THROUGHPUT for batched model compilation.");
696+
697+
auto modifiedConfig = localConfig; // Copy only when needed
698+
std::stringstream strStream;
699+
strStream << ov::hint::PerformanceMode::THROUGHPUT;
700+
modifiedConfig.update({{ov::hint::performance_mode.name(), strStream.str()}});
701+
702+
graph = compileWithConfig(std::move(modelToCompile), modifiedConfig);
703+
} else if (localConfig.get<PERFORMANCE_HINT>() == ov::hint::PerformanceMode::LATENCY) {
704+
_logger.warning("PERFORMANCE_HINT is explicitly set to LATENCY mode, but batch dimension (N) is "
705+
"detected in the model. The NPU Plugin will reshape the model to batch size 1 and "
706+
"process each batch slice separately.");
707+
_logger.warning("For optimal performance with batched models, THROUGHPUT mode is highly recommended, "
708+
"as LATENCY mode prevents parallel batch processing.");
709+
_logger.warning("If batch detection appears incorrect, verify that the input and output layouts are "
710+
"configured properly.");
711+
graph = compileWithConfig(std::move(modelToCompile), localConfig);
712+
} else {
713+
graph = compileWithConfig(std::move(modelToCompile), localConfig);
714+
}
700715
} else {
701716
graph = compileWithConfig(std::move(modelToCompile), localConfig); // No copy
702717
}

0 commit comments

Comments
 (0)