@@ -513,7 +513,7 @@ class MOEOptMLPReduce : public KernelGenerator {
513513 }
514514};
515515
516- dnnl::memory convert2dnnl (const memory::ptr& ptr, const std::vector<int64_t >& dim, dnnl::memory::format_tag tag, int offset = 0 ) {
516+ dnnl::memory convert2dnnl (const memory::ptr& ptr, const std::vector<int64_t >& dim, dnnl::memory::format_tag tag, int64_t offset = 0 ) {
517517 OV_ITT_SCOPED_TASK (ov::intel_gpu::itt::domains::intel_gpu_plugin, openvino::itt::handle (" convert2dnnl" ));
518518 return ptr->get_onednn_memory (dnnl::memory::desc (dnnl::memory::dims (dim), convert_data_type (ptr->get_layout ().data_type ), tag), offset);
519519}
@@ -626,19 +626,19 @@ class MOEOptImpl : public PrimitiveImplOCL {
626626 dnnl_weights[2 ].oc = _hidden_size;
627627 for (int i = 0 ; i < 3 ; i++) {
628628 // weight shape: [ic, oc], type: u4
629- size_t wei_offset = j * dnnl_weights[i].ic * dnnl_weights[i].oc / 2 ;
629+ int64_t wei_offset = j * dnnl_weights[i].ic * dnnl_weights[i].oc / 2 ;
630630 dnnl_weights[i].weight =
631631 convert2dnnl (moe_fusion_wei_addr.weight [i], {dnnl_weights[i].ic , dnnl_weights[i].oc }, dnnl::memory::format_tag::ba, wei_offset);
632632
633633 // scale shape: [ic / ic_group_size, oc], type: f16
634- size_t scale_offset = j * dnnl_weights[i].ic * dnnl_weights[i].oc / dnnl_weights[i].ic_group_size * 2 ;
634+ int64_t scale_offset = j * dnnl_weights[i].ic * dnnl_weights[i].oc / dnnl_weights[i].ic_group_size * 2 ;
635635 dnnl_weights[i].scale = convert2dnnl (moe_fusion_wei_addr.scale [i],
636636 {dnnl_weights[i].ic / dnnl_weights[i].ic_group_size , dnnl_weights[i].oc },
637637 dnnl::memory::format_tag::ab,
638638 scale_offset);
639639
640640 // zp shape: [ic / ic_group_size, oc], type: u4
641- size_t zp_offset = j * dnnl_weights[i].ic * dnnl_weights[i].oc / dnnl_weights[i].ic_group_size / 2 ;
641+ int64_t zp_offset = j * dnnl_weights[i].ic * dnnl_weights[i].oc / dnnl_weights[i].ic_group_size / 2 ;
642642 dnnl_weights[i].zp = convert2dnnl (moe_fusion_wei_addr.zp [i],
643643 {dnnl_weights[i].ic / dnnl_weights[i].ic_group_size , dnnl_weights[i].oc },
644644 dnnl::memory::format_tag::ab,
@@ -689,7 +689,7 @@ class MOEOptImpl : public PrimitiveImplOCL {
689689 // scratch.y = down(scratch.gate) * routing_weights
690690 internal_buffers.emplace_back (layout_down_out, true ); // 4: x, scratch.x has same layout with down output
691691 layout routing_layout (ov::PartialShape{batch * max_topk}, data_type, cldnn::format::bfyx);
692- internal_buffers.emplace_back (routing_layout, true ); // 5: routing_weights
692+ internal_buffers.emplace_back (routing_layout, true ); // 5: routing_weights
693693 internal_buffers.emplace_back (layout_gateup_out, true ); // 6: gate, scratch.gate has same layout with up
694694 // expert masks for gpu
695695 layout index_layout (ov::PartialShape{batch}, ov::element::i32 , cldnn::format::bfyx);
0 commit comments