@@ -122,6 +122,40 @@ class WeightsSeparationTests : public ov::test::behavior::OVPluginTestBase,
122122 return model;
123123 }
124124
125+ // This is a special model that has weightless constants that are guaranteed
126+ // to be skipped by weights schedule. This tests cases where compiler
127+ // produces "blob with weights" when "weightless blob" is requested: in
128+ // theory, this may happen, and must not cause any errors.
129+ std::shared_ptr<ov::Model> createTestModelWeightlessWithDummyConstants () {
130+ constexpr auto precision = element::f32 ;
131+
132+ const auto reshapeWeights =
133+ std::make_shared<op::v0::Constant>(element::i64 , Shape{3 }, std::vector<int64_t >{1 , 2 , 3 });
134+
135+ const auto input1 = std::make_shared<op::v0::Parameter>(precision, Shape{6 });
136+ const auto input2 = std::make_shared<op::v0::Parameter>(precision, Shape{1 , 2 , 3 });
137+ const auto reshapedInput1 = std::make_shared<op::v1::Reshape>(input1, reshapeWeights, /* special_zero=*/ false );
138+ auto add = std::make_shared<op::v1::Add>(reshapedInput1, input2);
139+
140+ reshapeWeights->set_friendly_name (" weights" );
141+ input1->set_friendly_name (" input1" );
142+ input2->set_friendly_name (" input2" );
143+ reshapedInput1->set_friendly_name (" reshapedInput1" );
144+ add->set_friendly_name (" add" );
145+
146+ // Note: Reshape weights with weightless cache attribute satisfy the
147+ // basic requirement to create weights schedule. However, since this is
148+ // a static reshape, these weights would "disappear" during compilation,
149+ // causing the compiler to put nothing into the weights schedule.
150+ reshapeWeights->get_rt_info ()[ov::WeightlessCacheAttribute::get_type_info_static ()] =
151+ ov::WeightlessCacheAttribute (reshapeWeights->get_byte_size (), 0 , reshapeWeights->get_element_type ());
152+
153+ auto model =
154+ std::make_shared<Model>(OutputVector{add}, ParameterVector{input1, input2}, " Dummy weightless model" );
155+ ov::util::set_tensors_names (AUTO, *model, {}, {{0 , {" add" }}});
156+ return model;
157+ }
158+
125159 /* *
126160 * @brief This model was fine-tuned in order to compile fast and yield a light init schedule.
127161 */
@@ -163,6 +197,9 @@ class WeightsSeparationTests : public ov::test::behavior::OVPluginTestBase,
163197 OV_ASSERT_NO_THROW (utils::compare (expected, output));
164198 }
165199
200+ // This is a "template" of a test used in multiple configurations
201+ void runCorrectInferenceResultIfCannotCompileAsWeightless ();
202+
166203protected:
167204 std::shared_ptr<ov::Core> core = utils::PluginCache::get().core();
168205 ov::AnyMap configuration;
@@ -487,6 +524,41 @@ TEST_P(WeightsSeparationTests, WeightlessBlobIsSmaller) {
487524 ASSERT_TRUE (weightfullBlobStream.str ().size () > weightlessBlobStream.str ().size ());
488525}
489526
527+ void WeightsSeparationTests::runCorrectInferenceResultIfCannotCompileAsWeightless () {
528+ model = createTestModelWeightlessWithDummyConstants ();
529+
530+ model_path = ov::util::path_join ({utils::getCurrentWorkingDir (), utils::generateTestFilePrefix ()}).string ();
531+ ov::serialize (model, model_path + " .xml" , model_path + " .bin" );
532+
533+ // compilation should succeed
534+ configuration.insert (ov::intel_npu::weightless_blob (true ));
535+ OV_ASSERT_NO_THROW (compiled_model = core->compile_model (model, target_device, configuration));
536+ ASSERT_TRUE (compiled_model);
537+
538+ std::stringstream export_stream;
539+ compiled_model.export_model (export_stream);
540+
541+ configuration.insert (ov::weights_path (model_path + " .bin" ));
542+ OV_ASSERT_NO_THROW (compiled_model = core->import_model (export_stream, target_device, configuration));
543+ ASSERT_TRUE (compiled_model);
544+
545+ // inference should also succeed
546+ const ov::Tensor input1 =
547+ utils::create_tensor (element::f32 , Shape{6 }, std::vector<float >{40 .0f , 40 .0f , 40 .0f , 40 .0f , 40 .0f , 40 .0f });
548+ const ov::Tensor input2 =
549+ utils::create_tensor (element::f32 , Shape{1 , 2 , 3 }, std::vector<float >{0 .0f , 1 .0f , 2 .0f , 3 .0f , 4 .0f , 5 .0f });
550+ OV_ASSERT_NO_THROW (inference_request = compiled_model.create_infer_request ());
551+ OV_ASSERT_NO_THROW (inference_request.set_tensor (" input1" , input1));
552+ OV_ASSERT_NO_THROW (inference_request.set_tensor (" input2" , input2));
553+ OV_ASSERT_NO_THROW (inference_request.infer ());
554+
555+ const ov::Tensor expected = utils::create_tensor (element::f32 ,
556+ Shape{1 , 2 , 3 },
557+ std::vector<float >{40 .0f , 41 .0f , 42 .0f , 43 .0f , 44 .0f , 45 .0f });
558+ const ov::Tensor output = inference_request.get_tensor (" add" );
559+ OV_ASSERT_NO_THROW (utils::compare (expected, output));
560+ }
561+
490562using WeightsSeparationOneShotTests = WeightsSeparationTests;
491563
492564/* *
@@ -504,6 +576,17 @@ TEST_P(WeightsSeparationOneShotTests, CorrectInferenceResultNoImportOneShot) {
504576 create_infer_request_and_check_result ();
505577}
506578
579+ /* *
580+ * @brief compile -> import the result, ov::weights_path provided -> create inference request -> run one inference and
581+ * check the result
582+ * compilation and inference must work even if the weightless model is not actually weightless
583+ * (compiler may not be able to find any "suitable" weights)
584+ */
585+ TEST_P (WeightsSeparationOneShotTests, CorrectInferenceResultIfCannotCompileAsWeightlessOneShot) {
586+ configuration.insert (ov::intel_npu::separate_weights_version (ov::intel_npu::WSVersion::ONE_SHOT));
587+ runCorrectInferenceResultIfCannotCompileAsWeightless ();
588+ }
589+
507590using WeightsSeparationIterativeTests = WeightsSeparationTests;
508591
509592/* *
@@ -521,6 +604,15 @@ TEST_P(WeightsSeparationIterativeTests, CorrectInferenceResultNoImportIterative)
521604 create_infer_request_and_check_result ();
522605}
523606
607+ /* *
608+ * @brief Compiles a special model in WeightsSeparation ITERATIVE mode to ensure
609+ * compilation succeeds. This is similar to the ONE_SHOT version test.
610+ */
611+ TEST_P (WeightsSeparationIterativeTests, CorrectInferenceResultIfCannotCompileAsWeightlessIterative) {
612+ configuration.insert (ov::intel_npu::separate_weights_version (ov::intel_npu::WSVersion::ITERATIVE));
613+ runCorrectInferenceResultIfCannotCompileAsWeightless ();
614+ }
615+
524616} // namespace behavior
525617} // namespace test
526618} // namespace ov
0 commit comments