Skip to content

Commit a7eb69b

Browse files
[NPU] Allow WS with no init schedules to succeed (#34731)
### Details: When weightless compilation is requested, depending on the input model, compiler can theoretically find no weights that need to be processed through init schedule. In this case, a regular "blob with weights" is going to be returned and the plugin must be able to work with it successfully. ### Tickets: - [EISW-206596](https://jira.devtools.intel.com/browse/EISW-206596) ### AI Assistance: - *AI assistance used: no*
1 parent e0ab225 commit a7eb69b

File tree

2 files changed

+98
-1
lines changed

2 files changed

+98
-1
lines changed

src/plugins/intel_npu/src/compiler_adapter/src/plugin_compiler_adapter.cpp

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -138,7 +138,12 @@ std::shared_ptr<IGraph> PluginCompilerAdapter::compileWS(std::shared_ptr<ov::Mod
138138

139139
std::shared_ptr<NetworkDescription> mainNetworkDescription = initMainNetworkDescriptions.back();
140140
initMainNetworkDescriptions.pop_back();
141-
OPENVINO_ASSERT(initMainNetworkDescriptions.size() > 0, "No init schedules have been returned by the compiler");
141+
if (initMainNetworkDescriptions.empty()) {
142+
_logger.warning("NPU compiler did not produce any init schedules. "
143+
"This likely means that the compiled model blob has weights inside even "
144+
"though weightless compilation was requested.");
145+
}
146+
142147
std::vector<std::shared_ptr<NetworkDescription>> initNetworkDescriptions =
143148
std::move(initMainNetworkDescriptions);
144149
tensorMain = std::move(mainNetworkDescription->compiledNetworkTensor);

src/plugins/intel_npu/tests/functional/behavior/weights_separation.hpp

Lines changed: 92 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -122,6 +122,40 @@ class WeightsSeparationTests : public ov::test::behavior::OVPluginTestBase,
122122
return model;
123123
}
124124

125+
// This is a special model that has weightless constants that are guaranteed
126+
// to be skipped by weights schedule. This tests cases where compiler
127+
// produces "blob with weights" when "weightless blob" is requested: in
128+
// theory, this may happen, and must not cause any errors.
129+
std::shared_ptr<ov::Model> createTestModelWeightlessWithDummyConstants() {
130+
constexpr auto precision = element::f32;
131+
132+
const auto reshapeWeights =
133+
std::make_shared<op::v0::Constant>(element::i64, Shape{3}, std::vector<int64_t>{1, 2, 3});
134+
135+
const auto input1 = std::make_shared<op::v0::Parameter>(precision, Shape{6});
136+
const auto input2 = std::make_shared<op::v0::Parameter>(precision, Shape{1, 2, 3});
137+
const auto reshapedInput1 = std::make_shared<op::v1::Reshape>(input1, reshapeWeights, /*special_zero=*/false);
138+
auto add = std::make_shared<op::v1::Add>(reshapedInput1, input2);
139+
140+
reshapeWeights->set_friendly_name("weights");
141+
input1->set_friendly_name("input1");
142+
input2->set_friendly_name("input2");
143+
reshapedInput1->set_friendly_name("reshapedInput1");
144+
add->set_friendly_name("add");
145+
146+
// Note: Reshape weights with weightless cache attribute satisfy the
147+
// basic requirement to create weights schedule. However, since this is
148+
// a static reshape, these weights would "disappear" during compilation,
149+
// causing the compiler to put nothing into the weights schedule.
150+
reshapeWeights->get_rt_info()[ov::WeightlessCacheAttribute::get_type_info_static()] =
151+
ov::WeightlessCacheAttribute(reshapeWeights->get_byte_size(), 0, reshapeWeights->get_element_type());
152+
153+
auto model =
154+
std::make_shared<Model>(OutputVector{add}, ParameterVector{input1, input2}, "Dummy weightless model");
155+
ov::util::set_tensors_names(AUTO, *model, {}, {{0, {"add"}}});
156+
return model;
157+
}
158+
125159
/**
126160
* @brief This model was fine-tuned in order to compile fast and yield a light init schedule.
127161
*/
@@ -163,6 +197,9 @@ class WeightsSeparationTests : public ov::test::behavior::OVPluginTestBase,
163197
OV_ASSERT_NO_THROW(utils::compare(expected, output));
164198
}
165199

200+
// This is a "template" of a test used in multiple configurations
201+
void runCorrectInferenceResultIfCannotCompileAsWeightless();
202+
166203
protected:
167204
std::shared_ptr<ov::Core> core = utils::PluginCache::get().core();
168205
ov::AnyMap configuration;
@@ -487,6 +524,41 @@ TEST_P(WeightsSeparationTests, WeightlessBlobIsSmaller) {
487524
ASSERT_TRUE(weightfullBlobStream.str().size() > weightlessBlobStream.str().size());
488525
}
489526

527+
void WeightsSeparationTests::runCorrectInferenceResultIfCannotCompileAsWeightless() {
528+
model = createTestModelWeightlessWithDummyConstants();
529+
530+
model_path = ov::util::path_join({utils::getCurrentWorkingDir(), utils::generateTestFilePrefix()}).string();
531+
ov::serialize(model, model_path + ".xml", model_path + ".bin");
532+
533+
// compilation should succeed
534+
configuration.insert(ov::intel_npu::weightless_blob(true));
535+
OV_ASSERT_NO_THROW(compiled_model = core->compile_model(model, target_device, configuration));
536+
ASSERT_TRUE(compiled_model);
537+
538+
std::stringstream export_stream;
539+
compiled_model.export_model(export_stream);
540+
541+
configuration.insert(ov::weights_path(model_path + ".bin"));
542+
OV_ASSERT_NO_THROW(compiled_model = core->import_model(export_stream, target_device, configuration));
543+
ASSERT_TRUE(compiled_model);
544+
545+
// inference should also succeed
546+
const ov::Tensor input1 =
547+
utils::create_tensor(element::f32, Shape{6}, std::vector<float>{40.0f, 40.0f, 40.0f, 40.0f, 40.0f, 40.0f});
548+
const ov::Tensor input2 =
549+
utils::create_tensor(element::f32, Shape{1, 2, 3}, std::vector<float>{0.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f});
550+
OV_ASSERT_NO_THROW(inference_request = compiled_model.create_infer_request());
551+
OV_ASSERT_NO_THROW(inference_request.set_tensor("input1", input1));
552+
OV_ASSERT_NO_THROW(inference_request.set_tensor("input2", input2));
553+
OV_ASSERT_NO_THROW(inference_request.infer());
554+
555+
const ov::Tensor expected = utils::create_tensor(element::f32,
556+
Shape{1, 2, 3},
557+
std::vector<float>{40.0f, 41.0f, 42.0f, 43.0f, 44.0f, 45.0f});
558+
const ov::Tensor output = inference_request.get_tensor("add");
559+
OV_ASSERT_NO_THROW(utils::compare(expected, output));
560+
}
561+
490562
using WeightsSeparationOneShotTests = WeightsSeparationTests;
491563

492564
/**
@@ -504,6 +576,17 @@ TEST_P(WeightsSeparationOneShotTests, CorrectInferenceResultNoImportOneShot) {
504576
create_infer_request_and_check_result();
505577
}
506578

579+
/**
580+
* @brief compile -> import the result, ov::weights_path provided -> create inference request -> run one inference and
581+
* check the result
582+
* compilation and inference must work even if the weightless model is not actually weightless
583+
* (compiler may not be able to find any "suitable" weights)
584+
*/
585+
TEST_P(WeightsSeparationOneShotTests, CorrectInferenceResultIfCannotCompileAsWeightlessOneShot) {
586+
configuration.insert(ov::intel_npu::separate_weights_version(ov::intel_npu::WSVersion::ONE_SHOT));
587+
runCorrectInferenceResultIfCannotCompileAsWeightless();
588+
}
589+
507590
using WeightsSeparationIterativeTests = WeightsSeparationTests;
508591

509592
/**
@@ -521,6 +604,15 @@ TEST_P(WeightsSeparationIterativeTests, CorrectInferenceResultNoImportIterative)
521604
create_infer_request_and_check_result();
522605
}
523606

607+
/**
608+
* @brief Compiles a special model in WeightsSeparation ITERATIVE mode to ensure
609+
* compilation succeeds. This is similar to the ONE_SHOT version test.
610+
*/
611+
TEST_P(WeightsSeparationIterativeTests, CorrectInferenceResultIfCannotCompileAsWeightlessIterative) {
612+
configuration.insert(ov::intel_npu::separate_weights_version(ov::intel_npu::WSVersion::ITERATIVE));
613+
runCorrectInferenceResultIfCannotCompileAsWeightless();
614+
}
615+
524616
} // namespace behavior
525617
} // namespace test
526618
} // namespace ov

0 commit comments

Comments
 (0)