File tree Expand file tree Collapse file tree 2 files changed +4
-5
lines changed
src/plugins/intel_gpu/src Expand file tree Collapse file tree 2 files changed +4
-5
lines changed Original file line number Diff line number Diff line change @@ -66,13 +66,12 @@ JitConstants MoEGemmMicroGenerator::get_jit_constants(const kernel_impl_params&
6666 jit.make (" NUM_GROUPS" , scale_shape[2 ]);
6767 else
6868 jit.make (" NUM_GROUPS" , 1 );
69-
69+ size_t expert_stride = weight_shape. size () == 4 ? (weight_shape[ 1 ] * weight_shape[ 2 ] * weight_shape[ 3 ]) : (weight_shape[ 1 ] * weight_shape[ 2 ]);
7070 if (is_u4_i4) {
71- size_t stride = weight_shape.size () == 4 ? (weight_shape[1 ] * weight_shape[2 ] * weight_shape[3 ]) / 2 : (weight_shape[1 ] * weight_shape[2 ]) / 2 ;
72- jit.make (" EXPERT_STRIDE" , stride);
71+ jit.make (" EXPERT_STRIDE" , expert_stride / 2 );
7372 jit.make (" WEIGHT_COMPRESSED_INT4" , 1 );
7473 } else {
75- jit.make (" EXPERT_STRIDE" , (weight_shape[ 1 ] * weight_shape[ 2 ]) );
74+ jit.make (" EXPERT_STRIDE" , expert_stride );
7675 }
7776 if (!cfg.is_weight_symmetric_quantized )
7877 jit.make (" WEIGHT_ZP_DT" , to_ocl_type (data_types::f16 ));
Original file line number Diff line number Diff line change @@ -405,7 +405,7 @@ void TransformationsPipeline::apply(std::shared_ptr<ov::Model> func) {
405405 // Currently moe op is only supported by >= xe2
406406 auto & engine = m_context->get_engine ();
407407 const auto & info = engine.get_device_info ();
408- return (info.arch != cldnn::gpu_arch::xe2) && (info.arch != cldnn::gpu_arch::xe2 );
408+ return (info.arch != cldnn::gpu_arch::xe2) && (info.arch != cldnn::gpu_arch::xe3 );
409409 });
410410 bool is_pa = false ;
411411 for (const auto & op : func->get_ops ()) {
You can’t perform that action at this time.
0 commit comments