Skip to content

Commit cdbae53

Browse files
sungeunke-ddykim
andauthored
[GPU] onednn integration rls-v3.12-pc (#34446)
### Details: ``` commit 3e48e1f324199c61d935478e0787ebbc7e7616b6 (HEAD, origin/rls-v3.12-pc) Author: Kealan Barbieri <kealan.barbieri@intel.com> Date: Mon Mar 2 11:20:01 2026 -0800 xe: gemm: jit: fixup dp4a sums on xe3p ``` --------- Co-authored-by: Kim, Eddy <eddy.kim@intel.com>
1 parent 36051ef commit cdbae53

File tree

10 files changed

+35
-33
lines changed

10 files changed

+35
-33
lines changed

src/plugins/intel_gpu/include/intel_gpu/runtime/device_info.hpp

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,9 @@ enum class gpu_arch {
3434
xe_hpc = 6,
3535
xe2 = 7,
3636
xe3 = 8,
37+
xe3p_35_10 = 9,
38+
xe3p_35_11 = 10,
39+
xe3p_unknown = 11,
3740
};
3841

3942
/// @brief Defines version of GFX IP

src/plugins/intel_gpu/src/graph/impls/ocl/kernels_cache.cpp

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,7 @@
4444
#ifndef NOMINMAX
4545
# define NOMINMAX
4646
#endif
47-
#include "gpu/intel/microkernels/fuser.hpp"
47+
#include "gpu/intel/gemm/jit/include/gemmstone/microkernel/fuser.hpp"
4848
#endif
4949

5050
namespace {
@@ -350,8 +350,7 @@ void kernels_cache::build_batch(const batch_program& batch, compiled_kernels& co
350350
std::vector<uint8_t> binary = kernels[0]->get_binary();
351351
kernels.clear();
352352
// Update binary and rebuild kernel
353-
using namespace dnnl::impl::gpu::intel;
354-
micro::fuseMicrokernels(binary, combined_source.c_str());
353+
gemmstone::microkernel::fuse(binary, combined_source.c_str());
355354
_builder->build_kernels(binary.data(), binary.size(), KernelFormat::NATIVE_BIN, "", kernels);
356355
#else // ENABLE_ONEDNN_FOR_GPU
357356
OPENVINO_THROW("[GPU] Can't compile kernel w/ microkernels as onednn is not available");

src/plugins/intel_gpu/src/graph/impls/ocl_v2/moe/moe_3gemm_gen_micro.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -223,7 +223,7 @@ void MoE3GemmMicroGenerator::init_microkernels(const kernel_impl_params& params,
223223
GPU_DEBUG_TRACE_DETAIL << "\t weight group size: " << group_size << "\n";
224224

225225
micro::GEMMProblem problem_moe;
226-
micro::GEMMProtocol::Options opts_moe;
226+
micro::GEMMOptions opts_moe;
227227
opts_moe.slmPtr = true;
228228
opts_moe.kParallelLocal = !is_prefill;
229229
enum class MICRO_DIMENSIONALITY { NONE = -1, SCALAR = 0, VECTOR = 1, MATRIX = 2 };

src/plugins/intel_gpu/src/graph/impls/ocl_v2/moe/moe_gemm_gen_micro.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -149,7 +149,7 @@ void MoEGemmMicroGenerator::init_microkernels(const kernel_impl_params& params,
149149
GPU_DEBUG_TRACE_DETAIL << "init_microkernels for " << (is_prefill ? "prefill" : "generate") << " : Seq_len:" << n << " Ofm:" << m << " K:" << k << "\n";
150150

151151
micro::GEMMProblem problem_moe;
152-
micro::GEMMProtocol::Options opts_moe;
152+
micro::GEMMOptions opts_moe;
153153
opts_moe.slmPtr = true;
154154
opts_moe.kParallelLocal = !is_prefill;
155155
enum class MICRO_DIMENSIONALITY { NONE = -1, SCALAR = 0, VECTOR = 1, MATRIX = 2 };

src/plugins/intel_gpu/src/graph/impls/ocl_v2/sdpa/sdpa_gen_micro.cpp

Lines changed: 4 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -28,12 +28,8 @@ size_t get_subgroup_size(gpu_arch arch) {
2828
case gpu_arch::xe_hp:
2929
case gpu_arch::xe_hpg:
3030
return 8;
31-
case gpu_arch::xe_hpc:
32-
case gpu_arch::xe2:
33-
case gpu_arch::xe3:
34-
return 16;
3531
default:
36-
return 0;
32+
return 16;
3733
}
3834
}
3935

@@ -1465,8 +1461,7 @@ void SDPAMicroGenerator::init_microkernels(const kernel_impl_params& params,
14651461
is_paged_attention,
14661462
is_prefill);
14671463
break;
1468-
case gpu_arch::xe2:
1469-
case gpu_arch::xe3: {
1464+
default: {
14701465
config = choose_config_xe2(static_cast<int32_t>(k_head_size),
14711466
static_cast<int32_t>(nkeys_v),
14721467
thin_q,
@@ -1476,8 +1471,6 @@ void SDPAMicroGenerator::init_microkernels(const kernel_impl_params& params,
14761471
is_prefill);
14771472
break;
14781473
}
1479-
default:
1480-
break;
14811474
}
14821475

14831476
OPENVINO_ASSERT(config != nullptr);
@@ -1501,7 +1494,7 @@ void SDPAMicroGenerator::init_microkernels(const kernel_impl_params& params,
15011494
problem_kq.A.layout = (is_paged_attention && !is_prefill) ? micro::MatrixLayout::N : micro::MatrixLayout::T;
15021495

15031496
/* Set up microkernel options */
1504-
micro::GEMMProtocol::Options opts_kq;
1497+
micro::GEMMOptions opts_kq;
15051498
opts_kq.localB = true;
15061499
opts_kq.slmPtr = true;
15071500

@@ -1635,7 +1628,7 @@ void SDPAMicroGenerator::init_microkernels(const kernel_impl_params& params,
16351628
}
16361629

16371630
/* Set up microkernel options */
1638-
micro::GEMMProtocol::Options opts_vs;
1631+
micro::GEMMOptions opts_vs;
16391632
opts_vs.localB = true;
16401633
opts_vs.slmPtr = true;
16411634

src/plugins/intel_gpu/src/kernel_selector/kernel_selector_params.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -366,6 +366,9 @@ enum class gpu_arch {
366366
xe_hpc = 6,
367367
xe2 = 7,
368368
xe3 = 8,
369+
xe3p_35_10 = 9,
370+
xe3p_35_11 = 10,
371+
xe3p_unknown = 11,
369372
};
370373

371374

src/plugins/intel_gpu/src/kernel_selector/micro_utils.hpp

Lines changed: 15 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -17,27 +17,28 @@
1717
# define NOMINMAX
1818
#endif
1919

20-
#include "gpu/intel/microkernels/package.hpp"
20+
#include "gpu/intel/gemm/jit/include/gemmstone/microkernel/package.hpp"
2121
#include "gpu/intel/gemm/jit/include/gemmstone/kernel_selector.hpp"
22-
#include "gpu/intel/gemm/jit/include/gemmstone/microkernel_provider.hpp"
23-
#include "gpu/intel/microkernels/shim.hpp"
22+
#include "gpu/intel/gemm/jit/include/gemmstone/microkernel_selector.hpp"
23+
#include "gpu/intel/gemm/jit/include/gemmstone/microkernel/shim.hpp"
2424
#include "common/utils.hpp"
2525

2626
namespace micro {
2727

28-
using Package = dnnl::impl::gpu::intel::micro::Package;
29-
using HWInformation = gemmstone::HWInformation;
28+
using Package = gemmstone::microkernel::Package;
29+
using HWInformation = gemmstone::microkernel::HWInformation;
3030
using GEMMProblem = gemmstone::GEMMProblem;
3131
using ABOffset = gemmstone::ABOffset;
3232
using GEMMStrategy = gemmstone::GEMMStrategy;
33-
using GEMMProtocol = dnnl::impl::gpu::intel::micro::GEMMProtocol;
33+
using GEMMProtocol = gemmstone::microkernel::Protocol;
34+
using GEMMOptions = gemmstone::microkernel::GEMMOptions;
3435
using MatrixLayout = gemmstone::MatrixLayout;
3536
using Type = gemmstone::Type;
3637
using SizeParams = gemmstone::SizeParams;
3738
using StrategyRequirement = gemmstone::StrategyRequirement;
38-
using ShimOptions = dnnl::impl::gpu::intel::micro::ShimOptions;
39-
using HostLanguage = dnnl::impl::gpu::intel::micro::HostLanguage;
40-
using Setting = dnnl::impl::gpu::intel::micro::Setting;
39+
using ShimOptions = gemmstone::microkernel::ShimOptions;
40+
using HostLanguage = gemmstone::microkernel::HostLanguage;
41+
using Setting = gemmstone::microkernel::Package::Setting;
4142

4243
using dnnl::impl::utils::rnd_up_pow2;
4344

@@ -72,18 +73,18 @@ struct MicroKernelPackage {
7273
}
7374
};
7475

75-
inline Package select_gemm_microkernel(GEMMProtocol protocol, HWInformation hw_info, SizeParams sizes, const GEMMProblem &problem,
76+
inline Package select_gemm_microkernel(GEMMOptions &options, HWInformation hw_info, SizeParams sizes, const GEMMProblem &problem,
7677
const std::vector<StrategyRequirement> &reqs = std::vector<StrategyRequirement>(),
7778
void (*strategyAdjuster)(GEMMStrategy &strategy) = nullptr, gemmstone::SelectionObserver *observer = nullptr) {
78-
return gemmstone::selectGEMMMicrokernel(protocol, hw_info, sizes, problem, reqs, strategyAdjuster, observer);
79+
return gemmstone::microkernel::selectGEMM(options, hw_info, sizes, problem, reqs, strategyAdjuster);
7980
}
80-
inline Package select_gemm_microkernel(GEMMProtocol protocol, HWInformation hw_info, SizeParams sizes, const GEMMProblem &problem,
81+
inline Package select_gemm_microkernel(GEMMOptions &options, HWInformation hw_info, SizeParams sizes, const GEMMProblem &problem,
8182
gemmstone::SelectionObserver *observer) {
82-
return gemmstone::selectGEMMMicrokernel(protocol, hw_info, sizes, problem, {}, nullptr, observer);
83+
return gemmstone::microkernel::selectGEMM(options, hw_info, sizes, problem, {}, nullptr);
8384
}
8485

8586
static inline int alignment_for_ld(int ld) {
86-
return gemmstone::alignmentForLD(ld);
87+
return gemmstone::microkernel::alignmentForLD(ld);
8788
}
8889

8990
} // namespace micro

src/plugins/intel_gpu/src/runtime/ocl/ocl_device.cpp

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -63,6 +63,9 @@ gpu_arch convert_ngen_arch(ngen::HW gpu_arch) {
6363
case ngen::HW::XeHPC: return gpu_arch::xe_hpc;
6464
case ngen::HW::Xe2: return gpu_arch::xe2;
6565
case ngen::HW::Xe3: return gpu_arch::xe3;
66+
case ngen::HW::XE3P_35_10: return gpu_arch::xe3p_35_10;
67+
case ngen::HW::XE3P_35_11: return gpu_arch::xe3p_35_11;
68+
case ngen::HW::XE3P_UNKNOWN: return gpu_arch::xe3p_unknown;
6669
case ngen::HW::Gen10:
6770
case ngen::HW::Unknown: return gpu_arch::unknown;
6871
}

src/plugins/intel_gpu/thirdparty/CMakeLists.txt

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -14,8 +14,8 @@ if(ENABLE_ONEDNN_FOR_GPU)
1414
set(ONEDNN_BUILD_DIR "${CMAKE_CURRENT_BINARY_DIR}/onednn_gpu_build")
1515
set(ONEDNN_INSTALL_DIR "${CMAKE_CURRENT_BINARY_DIR}/onednn_gpu_install" CACHE PATH "Installation path for oneDNN GPU library")
1616
set(ONEDNN_PREFIX_DIR "${CMAKE_CURRENT_BINARY_DIR}/onednn_gpu_root")
17-
set(ONEDNN_ENABLED_PRIMITIVES "CONCAT;CONVOLUTION;DECONVOLUTION;INNER_PRODUCT;MATMUL;REORDER;POOLING;REDUCTION;SDPA;RNN")
18-
set(ONEDNN_ENABLED_ISA "XELP;XEHP;XEHPG;XEHPC;XE2;XE3")
17+
set(ONEDNN_ENABLED_PRIMITIVES "CONCAT;CONVOLUTION;DECONVOLUTION;GATED_MLP;INNER_PRODUCT;MATMUL;REORDER;POOLING;REDUCTION;SDPA;RNN")
18+
set(ONEDNN_ENABLED_ISA "ALL")
1919
set(DNNL_GPU_LIBRARY_NAME "openvino_onednn_gpu" CACHE STRING "Name of oneDNN library for Intel GPU Plugin")
2020

2121
if(X86_64)

0 commit comments

Comments
 (0)