Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,9 @@ enum class gpu_arch {
xe_hpc = 6,
xe2 = 7,
xe3 = 8,
xe3p_35_10 = 9,
xe3p_35_11 = 10,
xe3p_unknown = 11,
};

/// @brief Defines version of GFX IP
Expand Down
5 changes: 2 additions & 3 deletions src/plugins/intel_gpu/src/graph/impls/ocl/kernels_cache.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@
#ifndef NOMINMAX
# define NOMINMAX
#endif
#include "gpu/intel/microkernels/fuser.hpp"
#include "gpu/intel/gemm/jit/include/gemmstone/microkernel/fuser.hpp"
#endif

namespace {
Expand Down Expand Up @@ -350,8 +350,7 @@ void kernels_cache::build_batch(const batch_program& batch, compiled_kernels& co
std::vector<uint8_t> binary = kernels[0]->get_binary();
kernels.clear();
// Update binary and rebuild kernel
using namespace dnnl::impl::gpu::intel;
micro::fuseMicrokernels(binary, combined_source.c_str());
gemmstone::microkernel::fuse(binary, combined_source.c_str());
_builder->build_kernels(binary.data(), binary.size(), KernelFormat::NATIVE_BIN, "", kernels);
#else // ENABLE_ONEDNN_FOR_GPU
OPENVINO_THROW("[GPU] Can't compile kernel w/ microkernels as onednn is not available");
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -223,7 +223,7 @@ void MoE3GemmMicroGenerator::init_microkernels(const kernel_impl_params& params,
GPU_DEBUG_TRACE_DETAIL << "\t weight group size: " << group_size << "\n";

micro::GEMMProblem problem_moe;
micro::GEMMProtocol::Options opts_moe;
micro::GEMMOptions opts_moe;
opts_moe.slmPtr = true;
opts_moe.kParallelLocal = !is_prefill;
enum class MICRO_DIMENSIONALITY { NONE = -1, SCALAR = 0, VECTOR = 1, MATRIX = 2 };
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -149,7 +149,7 @@ void MoEGemmMicroGenerator::init_microkernels(const kernel_impl_params& params,
GPU_DEBUG_TRACE_DETAIL << "init_microkernels for " << (is_prefill ? "prefill" : "generate") << " : Seq_len:" << n << " Ofm:" << m << " K:" << k << "\n";

micro::GEMMProblem problem_moe;
micro::GEMMProtocol::Options opts_moe;
micro::GEMMOptions opts_moe;
opts_moe.slmPtr = true;
opts_moe.kParallelLocal = !is_prefill;
enum class MICRO_DIMENSIONALITY { NONE = -1, SCALAR = 0, VECTOR = 1, MATRIX = 2 };
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -28,12 +28,8 @@ size_t get_subgroup_size(gpu_arch arch) {
case gpu_arch::xe_hp:
case gpu_arch::xe_hpg:
return 8;
case gpu_arch::xe_hpc:
case gpu_arch::xe2:
case gpu_arch::xe3:
return 16;
default:
return 0;
return 16;
}
}

Expand Down Expand Up @@ -1465,8 +1461,7 @@ void SDPAMicroGenerator::init_microkernels(const kernel_impl_params& params,
is_paged_attention,
is_prefill);
break;
case gpu_arch::xe2:
case gpu_arch::xe3: {
default: {
config = choose_config_xe2(static_cast<int32_t>(k_head_size),
static_cast<int32_t>(nkeys_v),
thin_q,
Expand All @@ -1476,8 +1471,6 @@ void SDPAMicroGenerator::init_microkernels(const kernel_impl_params& params,
is_prefill);
break;
}
default:
break;
}

OPENVINO_ASSERT(config != nullptr);
Expand All @@ -1501,7 +1494,7 @@ void SDPAMicroGenerator::init_microkernels(const kernel_impl_params& params,
problem_kq.A.layout = (is_paged_attention && !is_prefill) ? micro::MatrixLayout::N : micro::MatrixLayout::T;

/* Set up microkernel options */
micro::GEMMProtocol::Options opts_kq;
micro::GEMMOptions opts_kq;
opts_kq.localB = true;
opts_kq.slmPtr = true;

Expand Down Expand Up @@ -1635,7 +1628,7 @@ void SDPAMicroGenerator::init_microkernels(const kernel_impl_params& params,
}

/* Set up microkernel options */
micro::GEMMProtocol::Options opts_vs;
micro::GEMMOptions opts_vs;
opts_vs.localB = true;
opts_vs.slmPtr = true;

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -366,6 +366,9 @@ enum class gpu_arch {
xe_hpc = 6,
xe2 = 7,
xe3 = 8,
xe3p_35_10 = 9,
xe3p_35_11 = 10,
xe3p_unknown = 11,
};


Expand Down
29 changes: 15 additions & 14 deletions src/plugins/intel_gpu/src/kernel_selector/micro_utils.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -17,27 +17,28 @@
# define NOMINMAX
#endif

#include "gpu/intel/microkernels/package.hpp"
#include "gpu/intel/gemm/jit/include/gemmstone/microkernel/package.hpp"
#include "gpu/intel/gemm/jit/include/gemmstone/kernel_selector.hpp"
#include "gpu/intel/gemm/jit/include/gemmstone/microkernel_provider.hpp"
#include "gpu/intel/microkernels/shim.hpp"
#include "gpu/intel/gemm/jit/include/gemmstone/microkernel_selector.hpp"
#include "gpu/intel/gemm/jit/include/gemmstone/microkernel/shim.hpp"
#include "common/utils.hpp"

namespace micro {

using Package = dnnl::impl::gpu::intel::micro::Package;
using HWInformation = gemmstone::HWInformation;
using Package = gemmstone::microkernel::Package;
using HWInformation = gemmstone::microkernel::HWInformation;
using GEMMProblem = gemmstone::GEMMProblem;
using ABOffset = gemmstone::ABOffset;
using GEMMStrategy = gemmstone::GEMMStrategy;
using GEMMProtocol = dnnl::impl::gpu::intel::micro::GEMMProtocol;
using GEMMProtocol = gemmstone::microkernel::Protocol;
using GEMMOptions = gemmstone::microkernel::GEMMOptions;
using MatrixLayout = gemmstone::MatrixLayout;
using Type = gemmstone::Type;
using SizeParams = gemmstone::SizeParams;
using StrategyRequirement = gemmstone::StrategyRequirement;
using ShimOptions = dnnl::impl::gpu::intel::micro::ShimOptions;
using HostLanguage = dnnl::impl::gpu::intel::micro::HostLanguage;
using Setting = dnnl::impl::gpu::intel::micro::Setting;
using ShimOptions = gemmstone::microkernel::ShimOptions;
using HostLanguage = gemmstone::microkernel::HostLanguage;
using Setting = gemmstone::microkernel::Package::Setting;

using dnnl::impl::utils::rnd_up_pow2;

Expand Down Expand Up @@ -72,18 +73,18 @@ struct MicroKernelPackage {
}
};

inline Package select_gemm_microkernel(GEMMProtocol protocol, HWInformation hw_info, SizeParams sizes, const GEMMProblem &problem,
inline Package select_gemm_microkernel(GEMMOptions &options, HWInformation hw_info, SizeParams sizes, const GEMMProblem &problem,
const std::vector<StrategyRequirement> &reqs = std::vector<StrategyRequirement>(),
void (*strategyAdjuster)(GEMMStrategy &strategy) = nullptr, gemmstone::SelectionObserver *observer = nullptr) {
return gemmstone::selectGEMMMicrokernel(protocol, hw_info, sizes, problem, reqs, strategyAdjuster, observer);
return gemmstone::microkernel::selectGEMM(options, hw_info, sizes, problem, reqs, strategyAdjuster);
}
inline Package select_gemm_microkernel(GEMMProtocol protocol, HWInformation hw_info, SizeParams sizes, const GEMMProblem &problem,
inline Package select_gemm_microkernel(GEMMOptions &options, HWInformation hw_info, SizeParams sizes, const GEMMProblem &problem,
gemmstone::SelectionObserver *observer) {
return gemmstone::selectGEMMMicrokernel(protocol, hw_info, sizes, problem, {}, nullptr, observer);
return gemmstone::microkernel::selectGEMM(options, hw_info, sizes, problem, {}, nullptr);
}

static inline int alignment_for_ld(int ld) {
return gemmstone::alignmentForLD(ld);
return gemmstone::microkernel::alignmentForLD(ld);
}

} // namespace micro
Expand Down
3 changes: 3 additions & 0 deletions src/plugins/intel_gpu/src/runtime/ocl/ocl_device.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,9 @@ gpu_arch convert_ngen_arch(ngen::HW gpu_arch) {
case ngen::HW::XeHPC: return gpu_arch::xe_hpc;
case ngen::HW::Xe2: return gpu_arch::xe2;
case ngen::HW::Xe3: return gpu_arch::xe3;
case ngen::HW::XE3P_35_10: return gpu_arch::xe3p_35_10;
case ngen::HW::XE3P_35_11: return gpu_arch::xe3p_35_11;
case ngen::HW::XE3P_UNKNOWN: return gpu_arch::xe3p_unknown;
case ngen::HW::Gen10:
case ngen::HW::Unknown: return gpu_arch::unknown;
}
Expand Down
4 changes: 2 additions & 2 deletions src/plugins/intel_gpu/thirdparty/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -14,8 +14,8 @@ if(ENABLE_ONEDNN_FOR_GPU)
set(ONEDNN_BUILD_DIR "${CMAKE_CURRENT_BINARY_DIR}/onednn_gpu_build")
set(ONEDNN_INSTALL_DIR "${CMAKE_CURRENT_BINARY_DIR}/onednn_gpu_install" CACHE PATH "Installation path for oneDNN GPU library")
set(ONEDNN_PREFIX_DIR "${CMAKE_CURRENT_BINARY_DIR}/onednn_gpu_root")
set(ONEDNN_ENABLED_PRIMITIVES "CONCAT;CONVOLUTION;DECONVOLUTION;INNER_PRODUCT;MATMUL;REORDER;POOLING;REDUCTION;SDPA;RNN")
set(ONEDNN_ENABLED_ISA "XELP;XEHP;XEHPG;XEHPC;XE2;XE3")
set(ONEDNN_ENABLED_PRIMITIVES "CONCAT;CONVOLUTION;DECONVOLUTION;GATED_MLP;INNER_PRODUCT;MATMUL;REORDER;POOLING;REDUCTION;SDPA;RNN")
set(ONEDNN_ENABLED_ISA "ALL")
set(DNNL_GPU_LIBRARY_NAME "openvino_onednn_gpu" CACHE STRING "Name of oneDNN library for Intel GPU Plugin")

if(X86_64)
Expand Down
2 changes: 1 addition & 1 deletion src/plugins/intel_gpu/thirdparty/onednn_gpu
Loading