@@ -674,29 +674,44 @@ std::shared_ptr<ov::ICompiledModel> Plugin::compile_model(const std::shared_ptr<
674674 std::shared_ptr<intel_npu::IGraph> graph;
675675
676676 auto compileWithConfig = [&](auto && modelToCompile, const auto & config) {
677- if (!localConfig.get <WEIGHTLESS_BLOB>() && !localConfig.get <ENABLE_WEIGHTLESS>() ) {
677+ if (!localConfig.get <WEIGHTLESS_BLOB>() && !localConfig.get <ENABLE_WEIGHTLESS>()) {
678678 return compiler->compile (modelToCompile, config);
679679 } else {
680680 check_weightless_cache_attribute_occurrence (model);
681681 return compiler->compileWS (std::move (modelToCompile), config);
682682 }
683683 };
684684
685+ const bool performanceHintSetByUser = properties.find (ov::hint::performance_mode.name ()) != properties.end ();
686+
685687 try {
686688 _logger.debug (" performing compile" );
687689
688690 // Determine which model to use
689691 auto modelToCompile = successfullyDebatched ? batchedModel : model->clone ();
690692
691- if (successfullyDebatched && localConfig.get <PERFORMANCE_HINT>() == ov::hint::PerformanceMode::LATENCY) {
692- _logger.warning (" Plugin batching is enabled. Overriding performance mode to THROUGHPUT for compilation." );
693-
694- auto modifiedConfig = localConfig; // Copy only when needed
695- std::stringstream strStream;
696- strStream << ov::hint::PerformanceMode::THROUGHPUT;
697- modifiedConfig.update ({{ov::hint::performance_mode.name (), strStream.str ()}});
698-
699- graph = compileWithConfig (std::move (modelToCompile), modifiedConfig);
693+ if (successfullyDebatched) {
694+ if (!performanceHintSetByUser) {
695+ _logger.info (" Setting performance mode to THROUGHPUT for batched model compilation." );
696+
697+ auto modifiedConfig = localConfig; // Copy only when needed
698+ std::stringstream strStream;
699+ strStream << ov::hint::PerformanceMode::THROUGHPUT;
700+ modifiedConfig.update ({{ov::hint::performance_mode.name (), strStream.str ()}});
701+
702+ graph = compileWithConfig (std::move (modelToCompile), modifiedConfig);
703+ } else if (localConfig.get <PERFORMANCE_HINT>() == ov::hint::PerformanceMode::LATENCY) {
704+ _logger.warning (" PERFORMANCE_HINT is explicitly set to LATENCY mode, but batch dimension (N) is "
705+ " detected in the model. The NPU Plugin will reshape the model to batch size 1 and "
706+ " process each batch slice separately." );
707+ _logger.warning (" For optimal performance with batched models, THROUGHPUT mode is highly recommended, "
708+ " as LATENCY mode prevents parallel batch processing." );
709+ _logger.warning (" If batch detection appears incorrect, verify that the input and output layouts are "
710+ " configured properly." );
711+ graph = compileWithConfig (std::move (modelToCompile), localConfig);
712+ } else {
713+ graph = compileWithConfig (std::move (modelToCompile), localConfig);
714+ }
700715 } else {
701716 graph = compileWithConfig (std::move (modelToCompile), localConfig); // No copy
702717 }
0 commit comments