@@ -83,10 +83,10 @@ ConvertMOEToMOECompressed::ConvertMOEToMOECompressed() {
8383 auto topk_m = any_input ();
8484
8585 auto moe_root = wrap_type<ov::op::internal::MOE>({hidden_states_m, routing_weights_m, topk_m, convert_m_0, convert_m_1, convert_m_2},
86- [](const ov::Output<ov::Node>& output) {
87- auto moe = ov::as_type_ptr<ov::op::internal::MOE>(output.get_node_shared_ptr ());
88- return moe->get_config ().expert_type == ov::op::internal::MOE::Expert_type::GEMM3_SWIGLU;
89- });
86+ [](const ov::Output<ov::Node>& output) {
87+ auto moe = ov::as_type_ptr<ov::op::internal::MOE>(output.get_node_shared_ptr ());
88+ return moe->get_config ().expert_type == ov::op::internal::MOE::Expert_type::GEMM3_SWIGLU;
89+ });
9090
9191 ov::matcher_pass_callback callback = [OV_CAPTURE_CPY_AND_THIS](ov::pass::pattern::Matcher& m) {
9292 const auto & pattern_map = m.get_pattern_value_map ();
@@ -108,7 +108,7 @@ ConvertMOEToMOECompressed::ConvertMOEToMOECompressed() {
108108 auto zp_0 = pattern_map.at (zp_m_0).get_node_shared_ptr ();
109109 auto scale_0_shape = scale_0->get_shape ();
110110 scale_0_shape.pop_back ();
111- auto reshape_const = ov::op::v0::Constant::create (ov::element::i32 , ov::Shape{ scale_0_shape.size () }, scale_0_shape);
111+ auto reshape_const = ov::op::v0::Constant::create (ov::element::i32 , ov::Shape{scale_0_shape.size ()}, scale_0_shape);
112112 auto scale_0_reshape = std::make_shared<ov::op::v1::Reshape>(scale_0, reshape_const, false );
113113 auto zp_0_reshape = std::make_shared<ov::op::v1::Reshape>(zp_0, reshape_const, false );
114114 ov::enable_keep_const_precision (scale_0_reshape);
@@ -117,7 +117,7 @@ ConvertMOEToMOECompressed::ConvertMOEToMOECompressed() {
117117 std::vector<size_t > transpose_order_0 (scale_0_reshape->get_shape ().size ());
118118 std::iota (transpose_order_0.begin (), transpose_order_0.end (), 0 );
119119 std::swap (*(transpose_order_0.end () - 1 ), *(transpose_order_0.end () - 2 ));
120- auto transpose_0_const = ov::op::v0::Constant::create (ov::element::i32 , ov::Shape{ transpose_order_0.size () }, transpose_order_0);
120+ auto transpose_0_const = ov::op::v0::Constant::create (ov::element::i32 , ov::Shape{transpose_order_0.size ()}, transpose_order_0);
121121 auto transpose_0_scale = std::make_shared<ov::op::v1::Transpose>(scale_0_reshape, transpose_0_const);
122122 auto transpose_0_zp = std::make_shared<ov::op::v1::Transpose>(zp_0_reshape, transpose_0_const);
123123 ov::enable_keep_const_precision (transpose_0_scale);
@@ -128,7 +128,7 @@ ConvertMOEToMOECompressed::ConvertMOEToMOECompressed() {
128128 auto zp_1 = pattern_map.at (zp_m_1).get_node_shared_ptr ();
129129 auto scale_1_shape = scale_1->get_shape ();
130130 scale_1_shape.pop_back ();
131- auto reshape_const_1 = ov::op::v0::Constant::create (ov::element::i32 , ov::Shape{ scale_1_shape.size () }, scale_1_shape);
131+ auto reshape_const_1 = ov::op::v0::Constant::create (ov::element::i32 , ov::Shape{scale_1_shape.size ()}, scale_1_shape);
132132 auto scale_1_reshape = std::make_shared<ov::op::v1::Reshape>(scale_1, reshape_const_1, false );
133133 auto zp_1_reshape = std::make_shared<ov::op::v1::Reshape>(zp_1, reshape_const_1, false );
134134 ov::enable_keep_const_precision (scale_1_reshape);
@@ -137,7 +137,7 @@ ConvertMOEToMOECompressed::ConvertMOEToMOECompressed() {
137137 std::vector<size_t > transpose_order_1 (scale_1_reshape->get_shape ().size ());
138138 std::iota (transpose_order_1.begin (), transpose_order_1.end (), 0 );
139139 std::swap (*(transpose_order_1.end () - 1 ), *(transpose_order_1.end () - 2 ));
140- auto transpose_1_const = ov::op::v0::Constant::create (ov::element::i32 , ov::Shape{ transpose_order_1.size () }, transpose_order_1);
140+ auto transpose_1_const = ov::op::v0::Constant::create (ov::element::i32 , ov::Shape{transpose_order_1.size ()}, transpose_order_1);
141141 auto transpose_1_scale = std::make_shared<ov::op::v1::Transpose>(scale_1_reshape, transpose_1_const);
142142 auto transpose_1_zp = std::make_shared<ov::op::v1::Transpose>(zp_1_reshape, transpose_1_const);
143143 ov::enable_keep_const_precision (transpose_1_scale);
@@ -148,7 +148,7 @@ ConvertMOEToMOECompressed::ConvertMOEToMOECompressed() {
148148 auto zp_2 = pattern_map.at (zp_m_2).get_node_shared_ptr ();
149149 auto scale_2_shape = scale_2->get_shape ();
150150 scale_2_shape.pop_back ();
151- auto reshape_const_2 = ov::op::v0::Constant::create (ov::element::i32 , ov::Shape{ scale_2_shape.size () }, scale_2_shape);
151+ auto reshape_const_2 = ov::op::v0::Constant::create (ov::element::i32 , ov::Shape{scale_2_shape.size ()}, scale_2_shape);
152152 auto scale_2_reshape = std::make_shared<ov::op::v1::Reshape>(scale_2, reshape_const_2, false );
153153 auto zp_2_reshape = std::make_shared<ov::op::v1::Reshape>(zp_2, reshape_const_2, false );
154154 ov::enable_keep_const_precision (scale_2_reshape);
@@ -157,7 +157,7 @@ ConvertMOEToMOECompressed::ConvertMOEToMOECompressed() {
157157 std::vector<size_t > transpose_order_2 (scale_2_reshape->get_shape ().size ());
158158 std::iota (transpose_order_2.begin (), transpose_order_2.end (), 0 );
159159 std::swap (*(transpose_order_2.end () - 1 ), *(transpose_order_2.end () - 2 ));
160- auto transpose_2_const = ov::op::v0::Constant::create (ov::element::i32 , ov::Shape{ transpose_order_2.size () }, transpose_order_2);
160+ auto transpose_2_const = ov::op::v0::Constant::create (ov::element::i32 , ov::Shape{transpose_order_2.size ()}, transpose_order_2);
161161 auto transpose_2_scale = std::make_shared<ov::op::v1::Transpose>(scale_2_reshape, transpose_2_const);
162162 auto transpose_2_zp = std::make_shared<ov::op::v1::Transpose>(zp_2_reshape, transpose_2_const);
163163 ov::enable_keep_const_precision (transpose_2_scale);
@@ -217,8 +217,13 @@ ConvertMOEToMOECompressed::ConvertMOEToMOECompressed() {
217217 ov::copy_runtime_info (moe, moe_compressed);
218218 ov::replace_node (moe, moe_compressed);
219219
220- std::cout << " ConvertMOEToMOECompressed is hit : num_expert = " << config.num_expert << " , top_k = " << config.top_k
221- << " , hidden_size = " << config.hidden_size << " , inter_size = " << config.inter_size << " , group_size = " << config.group_size << std::endl;
220+ static bool first_time = true ;
221+ if (first_time) {
222+ first_time = false ;
223+ std::cout << " [ ConvertMOEToMOECompressed ]: num_expert = " << config.num_expert << " , top_k = " << config.top_k
224+ << " , hidden_size = " << config.hidden_size << " , inter_size = " << config.inter_size << " , group_size = " << config.group_size
225+ << std::endl;
226+ }
222227 return true ;
223228 };
224229
0 commit comments