Skip to content

Commit 421ec91

Browse files
committed
minor update
1 parent d67be1a commit 421ec91

File tree

4 files changed

+44
-26
lines changed

4 files changed

+44
-26
lines changed

src/plugins/intel_gpu/src/graph/impls/ocl_v2/moe_opt.hpp

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -65,7 +65,11 @@ struct MOEOpt : public ImplementationManager {
6565
if (!one_of(wei_layout.data_type, supported_wei_type)) {
6666
return false;
6767
}
68-
std::cout << "ocl::moe::opt is supported..." << std::endl;
68+
static bool first_time = true;
69+
if (first_time) {
70+
first_time = false;
71+
std::cout << "[ ocl::moe::opt ] validation passed!" << std::endl;
72+
}
6973
return true;
7074
}
7175
};

src/plugins/intel_gpu/src/plugin/transformations/convert_moe_to_compressed.cpp

Lines changed: 17 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -83,10 +83,10 @@ ConvertMOEToMOECompressed::ConvertMOEToMOECompressed() {
8383
auto topk_m = any_input();
8484

8585
auto moe_root = wrap_type<ov::op::internal::MOE>({hidden_states_m, routing_weights_m, topk_m, convert_m_0, convert_m_1, convert_m_2},
86-
[](const ov::Output<ov::Node>& output) {
87-
auto moe = ov::as_type_ptr<ov::op::internal::MOE>(output.get_node_shared_ptr());
88-
return moe->get_config().expert_type == ov::op::internal::MOE::Expert_type::GEMM3_SWIGLU;
89-
});
86+
[](const ov::Output<ov::Node>& output) {
87+
auto moe = ov::as_type_ptr<ov::op::internal::MOE>(output.get_node_shared_ptr());
88+
return moe->get_config().expert_type == ov::op::internal::MOE::Expert_type::GEMM3_SWIGLU;
89+
});
9090

9191
ov::matcher_pass_callback callback = [OV_CAPTURE_CPY_AND_THIS](ov::pass::pattern::Matcher& m) {
9292
const auto& pattern_map = m.get_pattern_value_map();
@@ -108,7 +108,7 @@ ConvertMOEToMOECompressed::ConvertMOEToMOECompressed() {
108108
auto zp_0 = pattern_map.at(zp_m_0).get_node_shared_ptr();
109109
auto scale_0_shape = scale_0->get_shape();
110110
scale_0_shape.pop_back();
111-
auto reshape_const = ov::op::v0::Constant::create(ov::element::i32, ov::Shape{ scale_0_shape.size() }, scale_0_shape);
111+
auto reshape_const = ov::op::v0::Constant::create(ov::element::i32, ov::Shape{scale_0_shape.size()}, scale_0_shape);
112112
auto scale_0_reshape = std::make_shared<ov::op::v1::Reshape>(scale_0, reshape_const, false);
113113
auto zp_0_reshape = std::make_shared<ov::op::v1::Reshape>(zp_0, reshape_const, false);
114114
ov::enable_keep_const_precision(scale_0_reshape);
@@ -117,7 +117,7 @@ ConvertMOEToMOECompressed::ConvertMOEToMOECompressed() {
117117
std::vector<size_t> transpose_order_0(scale_0_reshape->get_shape().size());
118118
std::iota(transpose_order_0.begin(), transpose_order_0.end(), 0);
119119
std::swap(*(transpose_order_0.end() - 1), *(transpose_order_0.end() - 2));
120-
auto transpose_0_const = ov::op::v0::Constant::create(ov::element::i32, ov::Shape{ transpose_order_0.size() }, transpose_order_0);
120+
auto transpose_0_const = ov::op::v0::Constant::create(ov::element::i32, ov::Shape{transpose_order_0.size()}, transpose_order_0);
121121
auto transpose_0_scale = std::make_shared<ov::op::v1::Transpose>(scale_0_reshape, transpose_0_const);
122122
auto transpose_0_zp = std::make_shared<ov::op::v1::Transpose>(zp_0_reshape, transpose_0_const);
123123
ov::enable_keep_const_precision(transpose_0_scale);
@@ -128,7 +128,7 @@ ConvertMOEToMOECompressed::ConvertMOEToMOECompressed() {
128128
auto zp_1 = pattern_map.at(zp_m_1).get_node_shared_ptr();
129129
auto scale_1_shape = scale_1->get_shape();
130130
scale_1_shape.pop_back();
131-
auto reshape_const_1 = ov::op::v0::Constant::create(ov::element::i32, ov::Shape{ scale_1_shape.size() }, scale_1_shape);
131+
auto reshape_const_1 = ov::op::v0::Constant::create(ov::element::i32, ov::Shape{scale_1_shape.size()}, scale_1_shape);
132132
auto scale_1_reshape = std::make_shared<ov::op::v1::Reshape>(scale_1, reshape_const_1, false);
133133
auto zp_1_reshape = std::make_shared<ov::op::v1::Reshape>(zp_1, reshape_const_1, false);
134134
ov::enable_keep_const_precision(scale_1_reshape);
@@ -137,7 +137,7 @@ ConvertMOEToMOECompressed::ConvertMOEToMOECompressed() {
137137
std::vector<size_t> transpose_order_1(scale_1_reshape->get_shape().size());
138138
std::iota(transpose_order_1.begin(), transpose_order_1.end(), 0);
139139
std::swap(*(transpose_order_1.end() - 1), *(transpose_order_1.end() - 2));
140-
auto transpose_1_const = ov::op::v0::Constant::create(ov::element::i32, ov::Shape{ transpose_order_1.size() }, transpose_order_1);
140+
auto transpose_1_const = ov::op::v0::Constant::create(ov::element::i32, ov::Shape{transpose_order_1.size()}, transpose_order_1);
141141
auto transpose_1_scale = std::make_shared<ov::op::v1::Transpose>(scale_1_reshape, transpose_1_const);
142142
auto transpose_1_zp = std::make_shared<ov::op::v1::Transpose>(zp_1_reshape, transpose_1_const);
143143
ov::enable_keep_const_precision(transpose_1_scale);
@@ -148,7 +148,7 @@ ConvertMOEToMOECompressed::ConvertMOEToMOECompressed() {
148148
auto zp_2 = pattern_map.at(zp_m_2).get_node_shared_ptr();
149149
auto scale_2_shape = scale_2->get_shape();
150150
scale_2_shape.pop_back();
151-
auto reshape_const_2 = ov::op::v0::Constant::create(ov::element::i32, ov::Shape{ scale_2_shape.size() }, scale_2_shape);
151+
auto reshape_const_2 = ov::op::v0::Constant::create(ov::element::i32, ov::Shape{scale_2_shape.size()}, scale_2_shape);
152152
auto scale_2_reshape = std::make_shared<ov::op::v1::Reshape>(scale_2, reshape_const_2, false);
153153
auto zp_2_reshape = std::make_shared<ov::op::v1::Reshape>(zp_2, reshape_const_2, false);
154154
ov::enable_keep_const_precision(scale_2_reshape);
@@ -157,7 +157,7 @@ ConvertMOEToMOECompressed::ConvertMOEToMOECompressed() {
157157
std::vector<size_t> transpose_order_2(scale_2_reshape->get_shape().size());
158158
std::iota(transpose_order_2.begin(), transpose_order_2.end(), 0);
159159
std::swap(*(transpose_order_2.end() - 1), *(transpose_order_2.end() - 2));
160-
auto transpose_2_const = ov::op::v0::Constant::create(ov::element::i32, ov::Shape{ transpose_order_2.size() }, transpose_order_2);
160+
auto transpose_2_const = ov::op::v0::Constant::create(ov::element::i32, ov::Shape{transpose_order_2.size()}, transpose_order_2);
161161
auto transpose_2_scale = std::make_shared<ov::op::v1::Transpose>(scale_2_reshape, transpose_2_const);
162162
auto transpose_2_zp = std::make_shared<ov::op::v1::Transpose>(zp_2_reshape, transpose_2_const);
163163
ov::enable_keep_const_precision(transpose_2_scale);
@@ -217,8 +217,13 @@ ConvertMOEToMOECompressed::ConvertMOEToMOECompressed() {
217217
ov::copy_runtime_info(moe, moe_compressed);
218218
ov::replace_node(moe, moe_compressed);
219219

220-
std::cout << "ConvertMOEToMOECompressed is hit : num_expert = " << config.num_expert << ", top_k = " << config.top_k
221-
<< ", hidden_size = " << config.hidden_size << ", inter_size = " << config.inter_size << ", group_size = " << config.group_size << std::endl;
220+
static bool first_time = true;
221+
if (first_time) {
222+
first_time = false;
223+
std::cout << "[ ConvertMOEToMOECompressed ]: num_expert = " << config.num_expert << ", top_k = " << config.top_k
224+
<< ", hidden_size = " << config.hidden_size << ", inter_size = " << config.inter_size << ", group_size = " << config.group_size
225+
<< std::endl;
226+
}
222227
return true;
223228
};
224229

src/plugins/intel_gpu/src/plugin/transformations/fuse_moe_compressed.cpp

Lines changed: 22 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -20,13 +20,13 @@
2020
#include "openvino/op/multiply.hpp"
2121
#include "openvino/op/reduce_sum.hpp"
2222
#include "openvino/op/reshape.hpp"
23-
#include "openvino/op/topk.hpp"
24-
#include "openvino/op/transpose.hpp"
25-
#include "openvino/op/unsqueeze.hpp"
2623
#include "openvino/op/scatter_elements_update.hpp"
2724
#include "openvino/op/shape_of.hpp"
2825
#include "openvino/op/softmax.hpp"
2926
#include "openvino/op/subtract.hpp"
27+
#include "openvino/op/topk.hpp"
28+
#include "openvino/op/transpose.hpp"
29+
#include "openvino/op/unsqueeze.hpp"
3030
#include "openvino/pass/pattern/op/pattern.hpp"
3131
#include "openvino/pass/pattern/op/wrap_type.hpp"
3232
#include "transformations/rt_info/keep_const_precision.hpp"
@@ -56,8 +56,7 @@ FuseMOECompressed::FuseMOECompressed() {
5656
auto concat_m = wrap_type<ov::op::v0::Concat>({unsqueeze_m, unsqueeze_const_m}, consumers_count(1));
5757
auto concat1_m = wrap_type<ov::op::v0::Concat>({unsqueeze_const_m, unsqueeze_m, any_input()}, consumers_count(1));
5858
auto bc_m = wrap_type<ov::op::v3::Broadcast>({any_input(), concat_m}, consumers_count(1));
59-
auto scatter_m = wrap_type<ov::op::v12::ScatterElementsUpdate>(
60-
{bc_m->output(0), topk_m->output(1), norm_m->output(0), any_input()}, consumers_count(1));
59+
auto scatter_m = wrap_type<ov::op::v12::ScatterElementsUpdate>({bc_m->output(0), topk_m->output(1), norm_m->output(0), any_input()}, consumers_count(1));
6160
auto transpose_m = wrap_type<ov::op::v1::Transpose>({scatter_m, any_input()}, consumers_count(1));
6261
auto reshape_m = wrap_type<ov::op::v1::Reshape>({transpose_m, concat1_m}, consumers_count(1));
6362
auto unsqueeze_moe_m = wrap_type<ov::op::v0::Unsqueeze>({reshape_m, any_input()}, consumers_count(1));
@@ -73,11 +72,18 @@ FuseMOECompressed::FuseMOECompressed() {
7372
auto down_zp_m = any_input();
7473

7574
// moe compressed
76-
auto moe_compressed_m = wrap_type<ov::intel_gpu::op::MOECompressed>(
77-
{hidden_state_m->output(0), unsqueeze_moe_m->output(0), topk_m->output(1),
78-
gate_wei_m->output(0), gate_scale_m->output(0), gate_zp_m->output(0),
79-
up_wei_m->output(0), up_scale_m->output(0), up_zp_m->output(0),
80-
down_wei_m->output(0), down_scale_m->output(0), down_zp_m->output(0)});
75+
auto moe_compressed_m = wrap_type<ov::intel_gpu::op::MOECompressed>({hidden_state_m->output(0),
76+
unsqueeze_moe_m->output(0),
77+
topk_m->output(1),
78+
gate_wei_m->output(0),
79+
gate_scale_m->output(0),
80+
gate_zp_m->output(0),
81+
up_wei_m->output(0),
82+
up_scale_m->output(0),
83+
up_zp_m->output(0),
84+
down_wei_m->output(0),
85+
down_scale_m->output(0),
86+
down_zp_m->output(0)});
8187

8288
ov::matcher_pass_callback callback = [OV_CAPTURE_CPY_AND_THIS](ov::pass::pattern::Matcher& m) {
8389
const auto& pattern_map = m.get_pattern_value_map();
@@ -117,8 +123,12 @@ FuseMOECompressed::FuseMOECompressed() {
117123
ov::copy_runtime_info(moe_compressed, moe_fused_compressed);
118124
ov::replace_node(moe_compressed, moe_fused_compressed);
119125

120-
std::cout << "FuseMOECompressed is hit : num_expert = " << config.num_expert << ", top_k = " << config.top_k << ", hidden_size = " << config.hidden_size
121-
<< ", inter_size = " << config.inter_size << ", group_size = " << config.group_size << std::endl;
126+
static bool first_time = true;
127+
if (first_time) {
128+
first_time = false;
129+
std::cout << "[ FuseMOECompressed ]: num_expert = " << config.num_expert << ", top_k = " << config.top_k << ", hidden_size = " << config.hidden_size
130+
<< ", inter_size = " << config.inter_size << ", group_size = " << config.group_size << std::endl;
131+
}
122132

123133
return true;
124134
};

src/plugins/intel_gpu/src/plugin/transformations_pipeline.cpp

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -192,7 +192,6 @@
192192
#include "openvino/op/ceiling.hpp"
193193
#include "openvino/op/clamp.hpp"
194194
#include "openvino/op/matmul.hpp"
195-
#include "openvino/op/moe.hpp"
196195
#include "openvino/op/reverse_sequence.hpp"
197196
#include "openvino/op/roll.hpp"
198197
#include "openvino/op/shuffle_channels.hpp"

0 commit comments

Comments
 (0)