diff --git a/src/common/transformations/src/transformations/common_optimizations/matmul_const_transposes_extraction.cpp b/src/common/transformations/src/transformations/common_optimizations/matmul_const_transposes_extraction.cpp index 622223384533fc..fe81af9eab0886 100644 --- a/src/common/transformations/src/transformations/common_optimizations/matmul_const_transposes_extraction.cpp +++ b/src/common/transformations/src/transformations/common_optimizations/matmul_const_transposes_extraction.cpp @@ -10,6 +10,7 @@ #include "openvino/op/fake_quantize.hpp" #include "openvino/op/matmul.hpp" #include "openvino/op/transpose.hpp" +#include "openvino/pass/constant_folding.hpp" #include "openvino/pass/pattern/op/wrap_type.hpp" ov::pass::MatMulConstTransposesExtraction::MatMulConstTransposesExtraction() { @@ -36,6 +37,10 @@ ov::pass::MatMulConstTransposesExtraction::MatMulConstTransposesExtraction() { std::shared_ptr transpose = std::make_shared( weights, ov::op::v0::Constant::create(element::i32, {transpose_order.size()}, transpose_order)); + if (ov::is_type(weights.get_node_shared_ptr())) { + transpose->get_rt_info()["postponed_constant"] = true; + ov::pass::disable_constant_folding(transpose); + } auto new_matmul = std::make_shared(pattern_value_map.at(data_pattern), transpose, matmul->get_transpose_a(), diff --git a/src/common/transformations/tests/common_optimizations/matmul_const_transposes_extraction.cpp b/src/common/transformations/tests/common_optimizations/matmul_const_transposes_extraction.cpp index 17d8d0341fc375..43fd93f4f58abe 100644 --- a/src/common/transformations/tests/common_optimizations/matmul_const_transposes_extraction.cpp +++ b/src/common/transformations/tests/common_optimizations/matmul_const_transposes_extraction.cpp @@ -30,8 +30,10 @@ TEST_F(TransformationTestsF, MatMulConstTransposesExtractionConstantWeights) { { auto data = std::make_shared(element::f32, Shape{1, 3, 4}); - auto weights = opset8::Constant::create(element::f32, Shape{1, 2, 3}, {1, 3, 5, 2, 4, 6}); - auto matmul = std::make_shared(data, weights, true, true); + auto weights = opset8::Constant::create(element::f32, Shape{1, 3, 2}, {1, 2, 3, 4, 5, 6}); + auto transpose = + std::make_shared(weights, op::v0::Constant::create(element::i32, Shape{3}, {0, 2, 1})); + auto matmul = std::make_shared(data, transpose, true, true); model_ref = std::make_shared(matmul, ParameterVector{data}); } comparator.enable(FunctionsComparator::CmpValues::CONST_VALUES); @@ -97,8 +99,10 @@ TEST_F(TransformationTestsF, MatMulConstTransposesExtractionNonUnitDims_transpos } { auto data = std::make_shared(element::f32, Shape{1, 3, 4}); - auto weights = opset8::Constant::create(element::f32, Shape{2, 2, 3}, {1, 3, 5, 2, 4, 6, 1, 3, 5, 2, 4, 6}); - auto matmul = std::make_shared(data, weights, true, true); + auto weights = opset8::Constant::create(element::f32, Shape{2, 3, 2}, {1, 2, 3, 4, 5, 6, 1, 2, 3, 4, 5, 6}); + auto transpose = + std::make_shared(weights, op::v0::Constant::create(element::i32, Shape{3}, {0, 2, 1})); + auto matmul = std::make_shared(data, transpose, true, true); model_ref = std::make_shared(matmul, ParameterVector{data}); } comparator.enable(FunctionsComparator::CmpValues::CONST_VALUES); @@ -117,8 +121,10 @@ TEST_F(TransformationTestsF, MatMulConstTransposesExtractionNonUnitDims_transpos } { auto data = std::make_shared(element::f32, Shape{1, 4, 3}); - auto weights = opset8::Constant::create(element::f32, Shape{2, 2, 3}, {1, 3, 5, 2, 4, 6, 1, 3, 5, 2, 4, 6}); - auto matmul = std::make_shared(data, weights, false, true); + auto weights = opset8::Constant::create(element::f32, Shape{2, 3, 2}, {1, 2, 3, 4, 5, 6, 1, 2, 3, 4, 5, 6}); + auto transpose = + std::make_shared(weights, op::v0::Constant::create(element::i32, Shape{3}, {0, 2, 1})); + auto matmul = std::make_shared(data, transpose, false, true); model_ref = std::make_shared(matmul, ParameterVector{data}); } comparator.enable(FunctionsComparator::CmpValues::CONST_VALUES); diff --git a/src/core/src/xml_util/xml_serialize_util.cpp b/src/core/src/xml_util/xml_serialize_util.cpp index 739ef5fb62df7f..028e0eb733f12b 100644 --- a/src/core/src/xml_util/xml_serialize_util.cpp +++ b/src/core/src/xml_util/xml_serialize_util.cpp @@ -25,6 +25,7 @@ #include "openvino/op/util/max_pool_base.hpp" #include "openvino/op/util/op_types.hpp" #include "openvino/op/util/sub_graph_base.hpp" +#include "openvino/pass/constant_folding.hpp" #include "openvino/runtime/string_aligned_buffer.hpp" #include "openvino/xml_util/constant_writer.hpp" #include "transformations/rt_info/disable_fp16_compression.hpp" @@ -52,6 +53,9 @@ class PostponedConstantReplacer { if (node->get_rt_info().count("postponed_constant")) { OPENVINO_ASSERT(node->get_output_size() == 1); ov::OutputVector outputs(1); + if (ov::pass::constant_folding_is_disabled(node)) { + node->get_rt_info().erase(ov::pass::DisableConstantFolding::get_type_info_static()); + } OPENVINO_ASSERT( node->constant_fold(outputs, node->input_values()), "Node with set `postponed_constant` attribute cannot be fold to constant when saving model to IR file");