From 811f610de9d4e8bb82932fb97ba621ab7c83b9bd Mon Sep 17 00:00:00 2001 From: Mirza Halilcevic Date: Wed, 29 Oct 2025 08:43:19 -0500 Subject: [PATCH 1/5] Update gfx1201 GEMM and CONV quick-tune lists. --- .../Rock/Tuning/QuickTuningPerfconfigs.inc | 345 +++++++++--------- .../Dialect/Rock/ParamLookupTableTests.cpp | 6 +- 2 files changed, 172 insertions(+), 179 deletions(-) diff --git a/mlir/include/mlir/Dialect/Rock/Tuning/QuickTuningPerfconfigs.inc b/mlir/include/mlir/Dialect/Rock/Tuning/QuickTuningPerfconfigs.inc index 9660c49fb9f3..47fa43bcdce3 100644 --- a/mlir/include/mlir/Dialect/Rock/Tuning/QuickTuningPerfconfigs.inc +++ b/mlir/include/mlir/Dialect/Rock/Tuning/QuickTuningPerfconfigs.inc @@ -41,22 +41,6 @@ const InitParamsNonAccel PopulateParams::initParametersGemmGfx1100[PopulateParam }; // END_GEMM_NonAccel_f32_gfx1100_DEFS -// BEGIN_GEMM_NonAccel_f32_gfx1200_DEFS -const InitParamsNonAccel PopulateParams::initParametersGemmGfx1200[PopulateParams::nInitParametersGemmGfx1200] = { - {128,128,128,8,4,4,1,1,2}, - {64,64,32,16,4,2,1,1,2}, - {64,64,32,16,2,4,1,1,2}, - {64,32,64,16,4,2,1,1,2}, - {64,128,64,16,4,4,1,1,2}, - {64,64,32,16,4,4,1,1,2}, - {256,128,128,4,2,4,1,1,2}, - {64,128,64,8,2,4,1,1,2}, - {64,128,64,8,4,4,1,1,2}, - {64,32,32,16,2,2,1,1,2}, - {128,128,64,16,4,2,1,1,2} -}; -// END_GEMM_NonAccel_f32_gfx1200_DEFS - // BEGIN_CONV_NonAccel_f32_gfx1000_DEFS const InitParamsNonAccel PopulateParams::initParametersConvGfx1000[PopulateParams::nInitParametersConvGfx1000] = { {128,128,128,4,2,4,1,1,2}, @@ -116,37 +100,54 @@ const InitParamsNonAccel PopulateParams::initParametersConvGfx1100[PopulateParam }; // END_CONV_NonAccel_f32_gfx1100_DEFS -// BEGIN_CONV_NonAccel_f32_gfx1200_DEFS -const InitParamsNonAccel PopulateParams::initParametersConvGfx1200[PopulateParams::nInitParametersConvGfx1200] = { - {128,128,128,16,4,2,1,1,2}, - {64,32,64,16,2,2,1,1,2}, - {128,128,128,8,2,2,1,1,2}, +// BEGIN_GEMM_NonAccel_f32_gfx1201_DEFS +const InitParamsNonAccel PopulateParams::initParametersGemmGfx1201[PopulateParams::nInitParametersGemmGfx1201] = { + {128,128,128,16,2,2,1,1,2}, + {64,128,64,16,2,2,1,1,2}, {64,64,32,16,2,2,1,1,2}, - {64,32,32,16,2,2,1,1,2}, - {64,64,64,16,2,4,1,1,2}, - {64,64,64,16,4,2,1,1,2}, - {64,128,32,16,2,4,1,1,2}, - {128,128,128,16,2,4,1,1,2}, - {64,32,64,8,2,2,1,1,2}, - {64,128,32,16,2,2,1,1,2}, - {64,32,64,4,2,2,1,1,2}, - {64,32,32,8,2,2,1,1,2}, - {64,64,128,4,2,2,1,1,2}, + {128,128,128,4,4,2,1,1,2}, + {64,64,64,16,4,4,1,1,2}, + {64,64,128,16,4,4,1,1,2}, + {64,64,64,8,2,4,1,1,2}, + {128,128,64,8,2,4,1,1,2}, + {128,32,32,16,2,2,1,1,2}, + {64,32,32,8,2,2,1,1,2} +}; +// END_GEMM_NonAccel_f32_gfx1201_DEFS + +// BEGIN_CONV_NonAccel_f32_gfx1201_DEFS +const InitParamsNonAccel PopulateParams::initParametersConvGfx1201[PopulateParams::nInitParametersConvGfx1201] = { {64,64,128,4,2,4,1,1,2}, - {64,32,32,16,4,2,1,1,2}, - {64,64,128,4,4,2,1,1,2}, - {64,64,64,4,4,2,1,1,2}, - {64,64,32,8,2,4,1,1,2}, - {64,128,64,8,2,4,1,1,2}, - {64,64,128,16,2,4,1,1,2}, - {64,32,128,16,2,2,1,1,2}, + {64,64,128,8,2,4,1,1,2}, + {64,64,64,4,4,4,1,1,2}, + {64,128,64,16,2,2,1,1,2}, + {64,64,128,8,4,4,1,1,2}, + {64,64,128,16,2,2,1,1,2}, {64,64,64,4,2,2,1,1,2}, - {64,128,32,4,2,4,1,1,2}, - {128,32,32,16,2,2,1,1,2}, - {64,32,128,4,4,2,1,1,2}, - {64,32,64,4,4,4,1,1,2} + {64,64,128,16,2,4,1,1,2}, + {64,64,64,16,2,2,1,1,2}, + {64,32,128,4,2,2,1,1,2}, + {128,128,128,16,2,2,1,1,2}, + {128,128,128,16,4,2,1,1,2}, + {128,128,128,8,2,2,1,1,2}, + {64,64,32,8,2,2,1,1,2}, + {64,64,32,16,2,4,1,1,2}, + {128,128,64,8,4,4,1,1,2}, + {64,32,64,8,2,4,1,1,2}, + {64,64,64,16,4,4,1,1,2}, + {128,64,64,8,2,4,1,1,2}, + {128,128,128,4,2,2,1,1,2}, + {128,128,32,16,2,4,1,1,2}, + {128,32,32,16,2,4,1,1,2}, + {64,128,64,8,4,2,1,1,2}, + {64,32,32,8,2,2,1,1,2}, + {256,128,128,8,4,2,1,1,2}, + {128,64,32,16,2,2,1,1,2}, + {128,128,64,16,2,4,1,1,2}, + {256,128,128,16,4,4,1,1,2}, + {256,32,32,16,2,2,1,1,2} }; -// END_CONV_NonAccel_f32_gfx1200_DEFS +// END_CONV_NonAccel_f32_gfx1201_DEFS #endif @@ -162,11 +163,6 @@ static constexpr size_t nInitParametersGemmGfx1100 = 13; static const InitParamsNonAccel initParametersGemmGfx1100[nInitParametersGemmGfx1100]; // END_GEMM_NonAccel_f32_gfx1100_DECS -// BEGIN_GEMM_NonAccel_f32_gfx1200_DECS -static constexpr size_t nInitParametersGemmGfx1200 = 11; -static const InitParamsNonAccel initParametersGemmGfx1200[nInitParametersGemmGfx1200]; -// END_GEMM_NonAccel_f32_gfx1200_DECS - // BEGIN_CONV_NonAccel_f32_gfx1000_DECS static constexpr size_t nInitParametersConvGfx1000 = 24; static const InitParamsNonAccel initParametersConvGfx1000[nInitParametersConvGfx1000]; @@ -177,10 +173,15 @@ static constexpr size_t nInitParametersConvGfx1100 = 25; static const InitParamsNonAccel initParametersConvGfx1100[nInitParametersConvGfx1100]; // END_CONV_NonAccel_f32_gfx1100_DECS -// BEGIN_CONV_NonAccel_f32_gfx1200_DECS -static constexpr size_t nInitParametersConvGfx1200 = 27; -static const InitParamsNonAccel initParametersConvGfx1200[nInitParametersConvGfx1200]; -// END_CONV_NonAccel_f32_gfx1200_DECS +// BEGIN_GEMM_NonAccel_f32_gfx1201_DECS +static constexpr size_t nInitParametersGemmGfx1201 = 10; +static const InitParamsNonAccel initParametersGemmGfx1201[nInitParametersGemmGfx1201]; +// END_GEMM_NonAccel_f32_gfx1201_DECS + +// BEGIN_CONV_NonAccel_f32_gfx1201_DECS +static constexpr size_t nInitParametersConvGfx1201 = 29; +static const InitParamsNonAccel initParametersConvGfx1201[nInitParametersConvGfx1201]; +// END_CONV_NonAccel_f32_gfx1201_DECS #endif @@ -928,37 +929,6 @@ const InitParamsAccel PopulateParamsWmma::initParametersFp16GemmGfx1100[Populate }; // END_GEMM_Wmma_f16_gfx1100_DEFS -// BEGIN_GEMM_Wmma_f16_gfx1200_DEFS -const InitParamsAccel PopulateParamsWmma::initParametersFp16GemmGfx1200[PopulateParamsWmma::nInitParametersFp16GemmGfx1200] = { - {128,128,2,128,32,16,8,1,2,2,true,true}, - {128,128,4,32,64,16,8,1,1,2,true,true}, - {128,256,4,64,64,16,8,1,2,2,true,true}, - {128,64,8,64,32,16,8,1,1,2,true,true}, - {64,256,4,32,64,16,8,1,1,2,true,true}, - {64,64,4,32,32,16,8,1,2,2,true,true}, - {32,128,8,32,32,16,8,1,2,2,true,true}, - {32,32,8,16,16,16,8,1,2,2,true,true}, - {64,128,4,64,32,16,8,1,2,2,true,true}, - {32,64,8,16,64,16,8,1,2,2,true,true}, - {16,32,8,16,16,16,8,1,2,2,true,true}, - {128,128,8,32,32,16,8,1,1,2,true,true}, - {256,128,2,128,32,16,8,1,2,2,true,true}, - {32,64,4,32,16,16,4,1,2,2,true,true}, - {256,128,8,64,32,16,8,1,1,2,true,true}, - {16,16,8,16,16,16,16,1,2,2,true,true}, - {64,64,2,32,32,16,8,1,1,2,true,true}, - {16,64,8,16,32,16,8,1,2,2,true,true}, - {64,32,8,16,32,16,8,1,1,2,true,true}, - {64,256,8,32,64,16,4,1,2,2,true,true}, - {16,32,8,16,32,16,16,1,1,2,true,true}, - {128,256,8,64,32,16,8,1,1,2,true,true}, - {16,16,8,16,16,16,16,1,1,2,true,true}, - {32,64,4,32,32,16,8,1,2,2,true,true}, - {128,128,8,32,64,16,16,1,1,2,true,true}, - {128,256,4,16,64,16,4,1,1,2,true,true} -}; -// END_GEMM_Wmma_f16_gfx1200_DEFS - // BEGIN_CONV_Wmma_f16_gfx1000_DEFS const InitParamsAccel PopulateParamsWmma::initParametersFp16ConvGfx1000[PopulateParamsWmma::nInitParametersFp16ConvGfx1000] = { {128,64,8,32,64,16,8,1,1,2,true,true}, @@ -1022,40 +992,6 @@ const InitParamsAccel PopulateParamsWmma::initParametersFp16ConvGfx1100[Populate }; // END_CONV_Wmma_f16_gfx1100_DEFS -// BEGIN_CONV_Wmma_f16_gfx1200_DEFS -const InitParamsAccel PopulateParamsWmma::initParametersFp16ConvGfx1200[PopulateParamsWmma::nInitParametersFp16ConvGfx1200] = { - {64,64,8,32,32,16,8,1,2,2,true,true}, - {128,64,8,32,64,16,8,1,1,2,true,true}, - {64,256,8,64,32,16,8,1,1,2,true,true}, - {128,128,8,64,64,16,8,1,1,2,true,true}, - {64,128,4,32,64,16,8,1,1,2,true,true}, - {64,256,4,32,64,16,8,1,1,2,true,true}, - {64,32,8,32,16,16,8,1,2,2,true,true}, - {64,32,8,16,32,16,8,1,2,2,true,true}, - {32,64,8,32,16,16,8,1,2,2,true,true}, - {128,64,2,32,64,16,8,1,2,2,true,true}, - {64,128,8,64,32,16,8,1,2,2,true,true}, - {64,128,4,64,32,16,8,1,2,2,true,true}, - {128,256,8,64,32,16,8,1,1,2,true,true}, - {32,16,8,16,16,16,8,1,2,2,true,true}, - {64,256,8,32,32,16,8,1,1,2,true,true}, - {128,128,8,32,16,16,8,1,2,2,true,true}, - {16,16,8,16,16,16,16,1,2,2,true,true}, - {64,128,8,32,32,16,4,1,2,2,true,true}, - {16,32,8,16,16,16,16,1,2,2,true,true}, - {256,128,8,32,32,16,8,1,1,2,true,true}, - {16,64,8,16,16,16,8,1,2,2,true,true}, - {128,64,4,64,64,16,4,1,2,2,true,true}, - {32,128,2,32,64,16,8,1,1,2,true,true}, - {64,64,2,64,64,16,8,1,1,2,true,true}, - {32,256,2,32,64,16,8,1,1,2,true,true}, - {64,64,2,64,64,16,8,1,2,2,true,true}, - {128,256,4,128,64,16,4,1,2,2,true,true}, - {32,128,2,32,64,16,8,1,2,2,true,true}, - {32,256,4,16,128,16,4,1,2,2,true,true} -}; -// END_CONV_Wmma_f16_gfx1200_DEFS - // BEGIN_GEMM_Wmma_fp8_gfx1000_DEFS const InitParamsAccel PopulateParamsWmma::initParametersFp8GemmGfx1000[PopulateParamsWmma::nInitParametersFp8GemmGfx1000] = { {128,128,4,32,64,16,16,1,1,2,true,true}, @@ -1139,20 +1075,6 @@ const InitParamsAccel PopulateParamsWmma::initParametersI8GemmGfx1100[PopulatePa }; // END_GEMM_Wmma_i8_gfx1100_DEFS -// BEGIN_GEMM_Wmma_i8_gfx1200_DEFS -const InitParamsAccel PopulateParamsWmma::initParametersI8GemmGfx1200[PopulateParamsWmma::nInitParametersI8GemmGfx1200] = { - {128,64,8,32,32,16,16,1,1,2,true,true}, - {256,128,4,64,32,16,16,1,2,2,true,true}, - {128,64,4,32,32,16,8,1,1,2,true,true}, - {128,64,2,32,64,16,16,1,2,2,true,true}, - {32,32,8,16,32,16,16,1,2,2,true,true}, - {128,128,2,128,32,16,16,1,1,2,true,true}, - {128,256,4,128,32,16,16,1,2,2,true,true}, - {128,32,8,64,16,16,16,1,1,2,true,true}, - {64,64,8,16,16,16,16,1,2,2,true,true} -}; -// END_GEMM_Wmma_i8_gfx1200_DEFS - // BEGIN_CONV_Wmma_i8_gfx1000_DEFS const InitParamsAccel PopulateParamsWmma::initParametersForwardI8ConvGfx1000[PopulateParamsWmma::nInitParametersForwardI8ConvGfx1000] = { {128,64,8,32,64,16,16,1,1,2,true,true}, @@ -1187,29 +1109,100 @@ const InitParamsAccel PopulateParamsWmma::initParametersForwardI8ConvGfx1100[Pop }; // END_CONV_Wmma_i8_gfx1100_DEFS -// BEGIN_CONV_Wmma_i8_gfx1200_DEFS -const InitParamsAccel PopulateParamsWmma::initParametersForwardI8ConvGfx1200[PopulateParamsWmma::nInitParametersForwardI8ConvGfx1200] = { - {64,32,8,32,16,16,16,1,2,2,true,true}, - {128,64,8,32,32,16,16,1,2,2,true,true}, - {64,64,4,32,32,16,16,1,1,2,true,true}, - {128,64,8,16,32,16,16,1,2,2,true,true}, +// BEGIN_GEMM_Wmma_f16_gfx1201_DEFS +const InitParamsAccel PopulateParamsWmma::initParametersFp16GemmGfx1201[PopulateParamsWmma::nInitParametersFp16GemmGfx1201] = { + {128,64,4,64,32,16,8,1,1,2,true,true}, + {128,128,8,32,128,16,8,1,1,2,true,true}, + {32,64,8,16,32,16,8,1,2,2,true,true}, + {64,128,2,32,64,16,8,1,2,2,true,true}, + {16,32,8,16,16,16,8,1,1,2,true,true}, + {128,256,4,64,64,16,8,1,2,2,true,true}, + {16,16,8,16,16,16,16,1,2,2,true,true}, + {128,128,4,128,32,16,8,1,1,2,true,true}, + {64,64,8,32,32,16,8,1,1,2,true,true}, + {32,64,8,32,16,16,8,1,1,2,true,true}, + {32,128,4,32,32,16,8,1,2,2,true,true}, + {256,64,4,32,64,16,8,1,1,2,true,true}, + {256,128,4,128,32,16,8,1,2,2,true,true}, + {64,64,8,64,32,16,8,1,1,2,true,true} +}; +// END_GEMM_Wmma_f16_gfx1201_DEFS + +// BEGIN_GEMM_Wmma_i8_gfx1201_DEFS +const InitParamsAccel PopulateParamsWmma::initParametersI8GemmGfx1201[PopulateParamsWmma::nInitParametersI8GemmGfx1201] = { {128,64,4,64,32,16,16,1,1,2,true,true}, - {64,128,8,64,32,16,16,1,1,2,true,true}, - {128,64,4,64,32,16,16,1,2,2,true,true}, - {32,16,8,16,16,16,16,1,2,2,true,true}, - {16,32,8,16,32,16,16,1,2,2,true,true}, - {16,16,8,16,16,16,16,1,1,2,true,true}, - {64,256,2,32,64,16,8,1,2,2,true,true}, - {16,128,4,16,128,16,4,1,2,2,true,true}, - {64,128,2,64,128,16,8,1,1,2,true,true}, + {32,32,8,16,16,16,16,1,2,2,true,true}, + {128,128,4,64,64,16,16,1,1,2,true,true}, + {256,64,4,32,64,16,16,1,1,2,true,true}, + {128,32,4,32,32,16,16,1,1,2,true,true}, + {128,256,8,64,64,16,8,1,1,2,true,true}, + {64,32,8,32,16,16,16,1,1,2,true,true}, + {256,128,4,128,32,16,16,1,1,2,true,true}, + {16,128,8,16,32,16,16,1,1,2,true,true}, + {256,128,8,64,64,16,8,1,2,2,true,true}, + {128,256,2,128,64,16,8,1,1,2,true,true}, + {256,256,8,64,32,16,8,1,2,2,true,true}, + {64,64,8,32,32,16,16,1,1,2,true,true} +}; +// END_GEMM_Wmma_i8_gfx1201_DEFS + +// BEGIN_CONV_Wmma_f16_gfx1201_DEFS +const InitParamsAccel PopulateParamsWmma::initParametersFp16ConvGfx1201[PopulateParamsWmma::nInitParametersFp16ConvGfx1201] = { + {128,128,4,128,32,16,8,1,1,2,true,true}, + {128,64,4,32,64,16,8,1,1,2,true,true}, + {128,64,8,32,64,16,8,1,1,2,true,true}, + {64,256,2,64,64,16,8,1,1,2,true,true}, + {128,32,8,32,32,16,8,1,1,2,true,true}, + {64,32,8,32,16,16,8,1,1,2,true,true}, + {128,128,2,128,32,16,8,1,1,2,true,true}, + {128,128,8,32,64,16,8,1,1,2,true,true}, + {256,128,4,64,64,16,8,1,1,2,true,true}, + {64,128,2,64,32,16,8,1,1,2,true,true}, + {256,64,4,128,32,16,8,1,1,2,true,true}, + {128,16,8,32,16,16,8,1,1,2,true,true}, + {256,128,8,128,32,16,8,1,1,2,true,true}, + {32,64,8,32,16,16,8,1,2,2,true,true}, + {64,16,8,16,16,16,8,1,1,2,true,true}, + {16,64,4,16,64,16,8,1,1,2,true,true}, + {32,128,4,32,32,16,8,1,2,2,true,true}, + {128,128,2,32,128,16,8,1,1,2,true,true}, + {64,128,4,16,128,16,8,1,1,2,true,true}, + {16,64,8,16,16,16,8,1,2,2,true,true}, + {256,256,8,128,32,16,8,1,1,2,true,true}, + {32,64,8,16,16,16,8,1,2,2,true,true}, + {128,256,8,64,64,16,8,1,1,2,true,true}, {32,128,2,32,128,16,8,1,1,2,true,true}, - {128,256,4,128,128,16,4,1,2,2,true,true}, - {32,256,4,32,128,16,4,1,1,2,true,true}, - {32,256,4,32,64,16,4,1,2,2,true,true}, - {64,128,2,64,64,16,8,1,1,2,true,true}, - {16,64,4,16,32,16,4,1,1,2,true,true} + {64,128,8,32,32,16,8,1,2,2,true,true}, + {64,16,8,16,16,16,16,1,1,2,true,true}, + {32,128,8,16,32,16,8,1,2,2,true,true}, + {32,64,8,32,32,16,8,1,2,2,true,true}, + {64,64,4,64,16,16,8,1,1,2,true,true}, + {16,32,4,16,16,16,8,1,2,2,true,true}, + {64,128,8,32,16,16,8,1,2,2,true,true}, + {32,16,8,16,16,16,16,1,1,2,true,true}, + {64,64,8,16,16,16,8,1,2,2,true,true}, + {32,32,4,16,16,16,16,1,2,2,true,true}, + {128,32,8,16,16,16,8,1,2,2,true,true}, + {16,16,8,16,16,16,16,1,2,2,true,true} }; -// END_CONV_Wmma_i8_gfx1200_DEFS +// END_CONV_Wmma_f16_gfx1201_DEFS + +// BEGIN_CONV_Wmma_i8_gfx1201_DEFS +const InitParamsAccel PopulateParamsWmma::initParametersForwardI8ConvGfx1201[PopulateParamsWmma::nInitParametersForwardI8ConvGfx1201] = { + {128,64,4,64,32,16,16,1,1,2,true,true}, + {256,64,4,32,64,16,16,1,2,2,true,true}, + {32,64,2,32,32,16,16,1,2,2,true,true}, + {64,16,8,16,16,16,16,1,1,2,true,true}, + {64,16,8,16,16,16,16,1,2,2,true,true}, + {128,16,8,32,16,16,16,1,1,2,true,true}, + {128,128,4,64,64,16,8,1,2,2,true,true}, + {128,128,4,64,64,16,16,1,1,2,true,true}, + {256,32,8,64,32,16,16,1,1,2,true,true}, + {32,256,2,32,128,16,8,1,1,2,true,true}, + {64,128,2,64,32,16,8,1,1,2,true,true}, + {256,128,8,32,128,16,8,1,1,2,true,true} +}; +// END_CONV_Wmma_i8_gfx1201_DEFS #endif @@ -1225,11 +1218,6 @@ static constexpr size_t nInitParametersFp16GemmGfx1100 = 21; static const InitParamsAccel initParametersFp16GemmGfx1100[nInitParametersFp16GemmGfx1100]; // END_GEMM_Wmma_f16_gfx1100_DECS -// BEGIN_GEMM_Wmma_f16_gfx1200_DECS -static constexpr size_t nInitParametersFp16GemmGfx1200 = 26; -static const InitParamsAccel initParametersFp16GemmGfx1200[nInitParametersFp16GemmGfx1200]; -// END_GEMM_Wmma_f16_gfx1200_DECS - // BEGIN_CONV_Wmma_f16_gfx1000_DECS static constexpr size_t nInitParametersFp16ConvGfx1000 = 26; static const InitParamsAccel initParametersFp16ConvGfx1000[nInitParametersFp16ConvGfx1000]; @@ -1240,11 +1228,6 @@ static constexpr size_t nInitParametersFp16ConvGfx1100 = 27; static const InitParamsAccel initParametersFp16ConvGfx1100[nInitParametersFp16ConvGfx1100]; // END_CONV_Wmma_f16_gfx1100_DECS -// BEGIN_CONV_Wmma_f16_gfx1200_DECS -static constexpr size_t nInitParametersFp16ConvGfx1200 = 29; -static const InitParamsAccel initParametersFp16ConvGfx1200[nInitParametersFp16ConvGfx1200]; -// END_CONV_Wmma_f16_gfx1200_DECS - // BEGIN_GEMM_Wmma_fp8_gfx1000_DECS static constexpr size_t nInitParametersFp8GemmGfx1000 = 18; static const InitParamsAccel initParametersFp8GemmGfx1000[nInitParametersFp8GemmGfx1000]; @@ -1265,11 +1248,6 @@ static constexpr size_t nInitParametersI8GemmGfx1100 = 9; static const InitParamsAccel initParametersI8GemmGfx1100[nInitParametersI8GemmGfx1100]; // END_GEMM_Wmma_i8_gfx1100_DECS -// BEGIN_GEMM_Wmma_i8_gfx1200_DECS -static constexpr size_t nInitParametersI8GemmGfx1200 = 9; -static const InitParamsAccel initParametersI8GemmGfx1200[nInitParametersI8GemmGfx1200]; -// END_GEMM_Wmma_i8_gfx1200_DECS - // BEGIN_CONV_Wmma_i8_gfx1000_DECS static constexpr size_t nInitParametersForwardI8ConvGfx1000 = 11; static const InitParamsAccel initParametersForwardI8ConvGfx1000[nInitParametersForwardI8ConvGfx1000]; @@ -1280,10 +1258,25 @@ static constexpr size_t nInitParametersForwardI8ConvGfx1100 = 13; static const InitParamsAccel initParametersForwardI8ConvGfx1100[nInitParametersForwardI8ConvGfx1100]; // END_CONV_Wmma_i8_gfx1100_DECS -// BEGIN_CONV_Wmma_i8_gfx1200_DECS -static constexpr size_t nInitParametersForwardI8ConvGfx1200 = 19; -static const InitParamsAccel initParametersForwardI8ConvGfx1200[nInitParametersForwardI8ConvGfx1200]; -// END_CONV_Wmma_i8_gfx1200_DECS +// BEGIN_GEMM_Wmma_f16_gfx1201_DECS +static constexpr size_t nInitParametersFp16GemmGfx1201 = 14; +static const InitParamsAccel initParametersFp16GemmGfx1201[nInitParametersFp16GemmGfx1201]; +// END_GEMM_Wmma_f16_gfx1201_DECS + +// BEGIN_GEMM_Wmma_i8_gfx1201_DECS +static constexpr size_t nInitParametersI8GemmGfx1201 = 13; +static const InitParamsAccel initParametersI8GemmGfx1201[nInitParametersI8GemmGfx1201]; +// END_GEMM_Wmma_i8_gfx1201_DECS + +// BEGIN_CONV_Wmma_f16_gfx1201_DECS +static constexpr size_t nInitParametersFp16ConvGfx1201 = 36; +static const InitParamsAccel initParametersFp16ConvGfx1201[nInitParametersFp16ConvGfx1201]; +// END_CONV_Wmma_f16_gfx1201_DECS + +// BEGIN_CONV_Wmma_i8_gfx1201_DECS +static constexpr size_t nInitParametersForwardI8ConvGfx1201 = 12; +static const InitParamsAccel initParametersForwardI8ConvGfx1201[nInitParametersForwardI8ConvGfx1201]; +// END_CONV_Wmma_i8_gfx1201_DECS #endif @@ -1291,10 +1284,10 @@ static const InitParamsAccel initParametersForwardI8ConvGfx1200[nInitParametersF {"gfx1000_gemm_f32", {PopulateParams::initParametersGemmGfx1000, PopulateParams::nInitParametersGemmGfx1000}}, {"gfx1100_gemm_f32", {PopulateParams::initParametersGemmGfx1100, PopulateParams::nInitParametersGemmGfx1100}}, -{"gfx1200_gemm_f32", {PopulateParams::initParametersGemmGfx1200, PopulateParams::nInitParametersGemmGfx1200}}, {"gfx1000_conv_f32", {PopulateParams::initParametersConvGfx1000, PopulateParams::nInitParametersConvGfx1000}}, {"gfx1100_conv_f32", {PopulateParams::initParametersConvGfx1100, PopulateParams::nInitParametersConvGfx1100}}, -{"gfx1200_conv_f32", {PopulateParams::initParametersConvGfx1200, PopulateParams::nInitParametersConvGfx1200}}, +{"gfx1201_gemm_f32", {PopulateParams::initParametersGemmGfx1201, PopulateParams::nInitParametersGemmGfx1201}}, +{"gfx1201_conv_f32", {PopulateParams::initParametersConvGfx1201, PopulateParams::nInitParametersConvGfx1201}}, #endif @@ -1329,17 +1322,17 @@ static const InitParamsAccel initParametersForwardI8ConvGfx1200[nInitParametersF {"gfx950_gemm_f4", {PopulateParamsXDL::initParametersF4GemmGfx950, PopulateParamsXDL::nInitParametersF4GemmGfx950}}, {"gfx1000_gemm_f16", {PopulateParamsWmma::initParametersFp16GemmGfx1000, PopulateParamsWmma::nInitParametersFp16GemmGfx1000}}, {"gfx1100_gemm_f16", {PopulateParamsWmma::initParametersFp16GemmGfx1100, PopulateParamsWmma::nInitParametersFp16GemmGfx1100}}, -{"gfx1200_gemm_f16", {PopulateParamsWmma::initParametersFp16GemmGfx1200, PopulateParamsWmma::nInitParametersFp16GemmGfx1200}}, {"gfx1000_conv_f16", {PopulateParamsWmma::initParametersFp16ConvGfx1000, PopulateParamsWmma::nInitParametersFp16ConvGfx1000}}, {"gfx1100_conv_f16", {PopulateParamsWmma::initParametersFp16ConvGfx1100, PopulateParamsWmma::nInitParametersFp16ConvGfx1100}}, -{"gfx1200_conv_f16", {PopulateParamsWmma::initParametersFp16ConvGfx1200, PopulateParamsWmma::nInitParametersFp16ConvGfx1200}}, {"gfx1000_gemm_fp8", {PopulateParamsWmma::initParametersFp8GemmGfx1000, PopulateParamsWmma::nInitParametersFp8GemmGfx1000}}, {"gfx1000_conv_fp8", {PopulateParamsWmma::initParametersForwardFp8ConvGfx1000, PopulateParamsWmma::nInitParametersForwardFp8ConvGfx1000}}, {"gfx1000_gemm_i8", {PopulateParamsWmma::initParametersI8GemmGfx1000, PopulateParamsWmma::nInitParametersI8GemmGfx1000}}, {"gfx1100_gemm_i8", {PopulateParamsWmma::initParametersI8GemmGfx1100, PopulateParamsWmma::nInitParametersI8GemmGfx1100}}, -{"gfx1200_gemm_i8", {PopulateParamsWmma::initParametersI8GemmGfx1200, PopulateParamsWmma::nInitParametersI8GemmGfx1200}}, {"gfx1000_conv_i8", {PopulateParamsWmma::initParametersForwardI8ConvGfx1000, PopulateParamsWmma::nInitParametersForwardI8ConvGfx1000}}, {"gfx1100_conv_i8", {PopulateParamsWmma::initParametersForwardI8ConvGfx1100, PopulateParamsWmma::nInitParametersForwardI8ConvGfx1100}}, -{"gfx1200_conv_i8", {PopulateParamsWmma::initParametersForwardI8ConvGfx1200, PopulateParamsWmma::nInitParametersForwardI8ConvGfx1200}}, +{"gfx1201_gemm_f16", {PopulateParamsWmma::initParametersFp16GemmGfx1201, PopulateParamsWmma::nInitParametersFp16GemmGfx1201}}, +{"gfx1201_gemm_i8", {PopulateParamsWmma::initParametersI8GemmGfx1201, PopulateParamsWmma::nInitParametersI8GemmGfx1201}}, +{"gfx1201_conv_f16", {PopulateParamsWmma::initParametersFp16ConvGfx1201, PopulateParamsWmma::nInitParametersFp16ConvGfx1201}}, +{"gfx1201_conv_i8", {PopulateParamsWmma::initParametersForwardI8ConvGfx1201, PopulateParamsWmma::nInitParametersForwardI8ConvGfx1201}}, #endif diff --git a/mlir/unittests/Dialect/Rock/ParamLookupTableTests.cpp b/mlir/unittests/Dialect/Rock/ParamLookupTableTests.cpp index 6da17c9b0d89..931188bc0d47 100644 --- a/mlir/unittests/Dialect/Rock/ParamLookupTableTests.cpp +++ b/mlir/unittests/Dialect/Rock/ParamLookupTableTests.cpp @@ -25,8 +25,8 @@ TEST(FindFallbackTest, OldestRelative) { } TEST(FindFallbackTest, YoungestRelative) { - // gfx1200 is the youngest available relative for gfx1900 - EXPECT_EQ("gfx1200_conv_f16", ParamLookupTable::findFallback( + // gfx1201 is the youngest available relative for gfx1900 + EXPECT_EQ("gfx1201_conv_f16", ParamLookupTable::findFallback( "gfx1900_conv_f16")); } @@ -63,6 +63,6 @@ TEST(FindFallbackTest, NoRelativesBySuffix) { TEST(FindFallbackTest, AnyGfxForNonAccel) { // Any gfx version is acceptable for non-accelerated operations EXPECT_EQ( - "gfx1200_gemm_f32", + "gfx1201_gemm_f32", ParamLookupTable::findFallback("gfx942_gemm_f32")); } From d154f504adf52a56a425f4aed5c1ed98e3293f8e Mon Sep 17 00:00:00 2001 From: Mirza Halilcevic Date: Tue, 4 Nov 2025 14:36:31 +0000 Subject: [PATCH 2/5] Update gfx1151 GEMM and CONV quick-tune lists. --- .../Rock/Tuning/QuickTuningPerfconfigs.inc | 228 ++++++++++++++++++ 1 file changed, 228 insertions(+) diff --git a/mlir/include/mlir/Dialect/Rock/Tuning/QuickTuningPerfconfigs.inc b/mlir/include/mlir/Dialect/Rock/Tuning/QuickTuningPerfconfigs.inc index 47fa43bcdce3..bf31438297ae 100644 --- a/mlir/include/mlir/Dialect/Rock/Tuning/QuickTuningPerfconfigs.inc +++ b/mlir/include/mlir/Dialect/Rock/Tuning/QuickTuningPerfconfigs.inc @@ -149,6 +149,64 @@ const InitParamsNonAccel PopulateParams::initParametersConvGfx1201[PopulateParam }; // END_CONV_NonAccel_f32_gfx1201_DEFS +// BEGIN_GEMM_NonAccel_f32_gfx1151_DEFS +const InitParamsNonAccel PopulateParams::initParametersGemmGfx1151[PopulateParams::nInitParametersGemmGfx1151] = { + {64,64,128,16,2,2,1,1,2}, + {128,128,128,16,2,2,1,1,2}, + {64,128,64,16,2,2,1,1,2}, + {64,64,32,16,2,4,1,1,2}, + {64,64,128,8,4,4,1,1,2}, + {64,32,64,16,4,2,1,1,2}, + {128,128,128,4,2,2,1,1,2}, + {64,64,64,4,2,2,1,1,2}, + {128,128,64,8,4,4,1,1,2}, + {64,128,32,16,4,2,1,1,2}, + {64,128,32,4,2,2,1,1,2}, + {128,128,64,8,2,4,1,1,2}, + {256,64,128,16,4,4,1,1,2}, + {128,128,32,16,2,2,1,1,2} +}; +// END_GEMM_NonAccel_f32_gfx1151_DEFS + +// BEGIN_CONV_NonAccel_f32_gfx1151_DEFS +const InitParamsNonAccel PopulateParams::initParametersConvGfx1151[PopulateParams::nInitParametersConvGfx1151] = { + {64,64,128,8,2,2,1,1,2}, + {64,64,128,16,2,4,1,1,2}, + {64,64,128,8,4,4,1,1,2}, + {64,32,128,4,2,4,1,1,2}, + {64,64,128,16,4,2,1,1,2}, + {64,128,64,16,2,2,1,1,2}, + {64,32,128,16,2,4,1,1,2}, + {64,64,64,4,4,4,1,1,2}, + {64,64,64,16,2,2,1,1,2}, + {64,128,64,16,2,4,1,1,2}, + {64,64,64,8,4,4,1,1,2}, + {64,64,64,8,2,2,1,1,2}, + {64,64,128,4,4,2,1,1,2}, + {128,128,128,8,4,4,1,1,2}, + {128,128,128,16,2,2,1,1,2}, + {64,64,64,8,4,2,1,1,2}, + {128,128,64,8,2,2,1,1,2}, + {64,128,64,8,2,2,1,1,2}, + {128,128,128,16,2,4,1,1,2}, + {128,128,64,8,4,4,1,1,2}, + {64,64,32,16,2,4,1,1,2}, + {64,128,64,4,2,2,1,1,2}, + {64,64,32,8,2,4,1,1,2}, + {64,32,64,16,2,4,1,1,2}, + {64,128,64,4,4,4,1,1,2}, + {64,64,32,16,4,4,1,1,2}, + {128,32,32,16,2,4,1,1,2}, + {128,128,32,16,4,4,1,1,2}, + {64,32,32,16,4,2,1,1,2}, + {64,128,32,16,2,2,1,1,2}, + {128,128,128,4,2,2,1,1,2}, + {128,64,32,16,2,4,1,1,2}, + {128,64,64,4,4,4,1,1,2}, + {128,32,32,4,2,2,1,1,2} +}; +// END_CONV_NonAccel_f32_gfx1151_DEFS + #endif #ifdef NonAccel_DECLARATIONS_GEN @@ -183,6 +241,16 @@ static constexpr size_t nInitParametersConvGfx1201 = 29; static const InitParamsNonAccel initParametersConvGfx1201[nInitParametersConvGfx1201]; // END_CONV_NonAccel_f32_gfx1201_DECS +// BEGIN_GEMM_NonAccel_f32_gfx1151_DECS +static constexpr size_t nInitParametersGemmGfx1151 = 14; +static const InitParamsNonAccel initParametersGemmGfx1151[nInitParametersGemmGfx1151]; +// END_GEMM_NonAccel_f32_gfx1151_DECS + +// BEGIN_CONV_NonAccel_f32_gfx1151_DECS +static constexpr size_t nInitParametersConvGfx1151 = 34; +static const InitParamsNonAccel initParametersConvGfx1151[nInitParametersConvGfx1151]; +// END_CONV_NonAccel_f32_gfx1151_DECS + #endif #ifdef XDL_DEFINITIONS_GEN @@ -1204,6 +1272,140 @@ const InitParamsAccel PopulateParamsWmma::initParametersForwardI8ConvGfx1201[Pop }; // END_CONV_Wmma_i8_gfx1201_DEFS +// BEGIN_GEMM_Wmma_f16_gfx1151_DEFS +const InitParamsAccel PopulateParamsWmma::initParametersFp16GemmGfx1151[PopulateParamsWmma::nInitParametersFp16GemmGfx1151] = { + {128,128,8,64,64,8,1,1,2,true,true}, + {128,128,4,64,64,8,1,1,2,true,true}, + {128,64,4,64,32,8,1,1,2,true,true}, + {16,16,8,16,16,8,1,2,2,true,true}, + {64,256,2,64,64,8,1,2,2,true,true}, + {16,32,8,16,16,8,1,1,2,true,true}, + {16,16,8,16,16,16,1,1,2,true,true}, + {16,16,8,16,16,16,1,2,2,true,true}, + {128,256,4,64,64,8,1,1,2,true,true}, + {128,64,2,128,32,8,1,1,2,true,true}, + {64,256,2,32,128,8,1,2,2,true,true}, + {256,64,8,128,32,8,1,1,2,true,true}, + {128,64,8,64,32,8,1,1,2,true,true}, + {32,128,4,32,32,8,1,2,2,true,true}, + {32,16,8,32,16,8,1,2,2,true,true}, + {128,64,8,32,32,8,1,1,2,true,true}, + {32,16,8,16,16,16,1,2,2,true,true}, + {128,256,8,128,32,8,1,1,2,true,true}, + {256,128,2,128,32,8,1,1,2,true,true}, + {64,32,8,64,32,8,1,1,2,true,true}, + {128,64,2,32,64,8,1,1,2,true,true}, + {32,128,4,32,32,4,1,1,2,true,true}, + {64,128,4,32,64,8,1,2,2,true,true}, + {64,256,2,64,64,8,1,1,2,true,true}, + {32,64,8,32,32,8,1,1,2,true,true}, + {256,256,2,128,64,8,1,2,2,true,true}, + {16,256,4,16,128,4,1,1,2,true,true} +}; +// END_GEMM_Wmma_f16_gfx1151_DEFS + +// BEGIN_GEMM_Wmma_i8_gfx1151_DEFS +const InitParamsAccel PopulateParamsWmma::initParametersI8GemmGfx1151[PopulateParamsWmma::nInitParametersI8GemmGfx1151] = { + {16,32,8,16,16,16,1,1,2,true,true}, + {16,16,8,16,16,16,1,2,2,true,true}, + {128,64,4,32,64,16,1,1,2,true,true}, + {128,128,8,64,64,4,1,2,2,true,true}, + {128,64,4,128,16,16,1,2,2,true,true}, + {256,128,4,128,32,16,1,1,2,true,true}, + {128,64,4,32,32,16,1,2,2,true,true}, + {64,32,2,32,32,16,1,2,2,true,true}, + {128,256,2,64,64,16,1,2,2,true,true}, + {64,64,4,32,32,16,1,1,2,true,true}, + {32,64,4,16,16,16,1,2,2,true,true}, + {64,256,4,32,32,16,1,1,2,true,true}, + {64,128,2,64,32,16,1,2,2,true,true}, + {128,256,8,64,64,8,1,1,2,true,true}, + {16,32,8,16,16,4,1,2,2,true,true}, + {256,128,2,128,32,16,1,2,2,true,true}, + {64,16,8,32,16,16,1,2,2,true,true}, + {128,32,8,128,16,4,1,2,2,true,true}, + {32,256,8,32,64,8,1,1,2,true,true} +}; +// END_GEMM_Wmma_i8_gfx1151_DEFS + +// BEGIN_CONV_Wmma_f16_gfx1151_DEFS +const InitParamsAccel PopulateParamsWmma::initParametersFp16ConvGfx1151[PopulateParamsWmma::nInitParametersFp16ConvGfx1151] = { + {128,64,8,64,32,8,1,1,2,true,true}, + {256,128,8,128,32,8,1,1,2,true,true}, + {64,64,2,64,64,8,1,1,2,true,true}, + {128,128,4,128,32,8,1,1,2,true,true}, + {64,256,2,64,64,8,1,2,2,true,true}, + {128,32,4,32,32,8,1,1,2,true,true}, + {32,64,2,32,64,8,1,1,2,true,true}, + {128,128,8,64,32,8,1,1,2,true,true}, + {32,64,2,32,64,8,1,2,2,true,true}, + {256,128,4,128,32,8,1,1,2,true,true}, + {256,32,8,64,32,8,1,1,2,true,true}, + {128,64,2,32,64,8,1,2,2,true,true}, + {128,64,8,32,32,8,1,1,2,true,true}, + {64,128,4,64,32,8,1,1,2,true,true}, + {256,256,8,128,32,8,1,1,2,true,true}, + {32,256,2,32,128,8,1,1,2,true,true}, + {64,32,8,32,16,8,1,1,2,true,true}, + {64,32,8,16,32,8,1,2,2,true,true}, + {64,128,8,32,64,8,1,1,2,true,true}, + {64,128,2,64,32,8,1,2,2,true,true}, + {128,128,2,128,32,16,1,1,2,true,true}, + {16,128,4,16,64,8,1,1,2,true,true}, + {64,256,2,64,64,16,1,1,2,true,true}, + {64,64,2,32,32,8,1,2,2,true,true}, + {128,256,8,128,32,8,1,1,2,true,true}, + {64,16,8,16,16,8,1,1,2,true,true}, + {128,32,2,32,32,8,1,1,2,true,true}, + {64,128,8,64,32,8,1,1,2,true,true}, + {128,16,4,32,16,8,1,1,2,true,true}, + {64,64,2,32,32,8,1,1,2,true,true}, + {32,32,8,16,16,8,1,1,2,true,true}, + {128,128,8,64,64,8,1,1,2,true,true}, + {32,256,4,32,64,8,1,1,2,true,true}, + {64,128,4,32,64,8,1,2,2,true,true}, + {64,128,2,32,128,8,1,2,2,true,true}, + {256,64,2,64,64,8,1,2,2,true,true}, + {128,256,2,128,64,16,1,1,2,true,true}, + {32,64,4,16,32,8,1,2,2,true,true}, + {128,16,8,16,16,8,1,1,2,true,true}, + {16,256,8,16,64,4,1,1,2,true,true}, + {16,16,8,16,16,16,1,1,2,true,true}, + {128,32,8,32,32,8,1,2,2,true,true}, + {64,64,4,64,16,8,1,2,2,true,true}, + {32,64,8,16,16,8,1,2,2,true,true}, + {16,16,8,16,16,8,1,1,2,true,true}, + {16,128,8,16,32,8,1,1,2,true,true}, + {128,64,2,128,64,16,1,1,2,true,true}, + {128,128,4,64,32,4,1,2,2,true,true}, + {64,128,2,64,128,16,1,2,2,true,true}, + {64,128,8,16,32,4,1,2,2,true,true} +}; +// END_CONV_Wmma_f16_gfx1151_DEFS + +// BEGIN_CONV_Wmma_i8_gfx1151_DEFS +const InitParamsAccel PopulateParamsWmma::initParametersForwardI8ConvGfx1151[PopulateParamsWmma::nInitParametersForwardI8ConvGfx1151] = { + {128,32,4,64,16,16,1,2,2,true,true}, + {128,16,8,32,16,16,1,1,2,true,true}, + {64,32,8,16,32,16,1,2,2,true,true}, + {64,16,4,16,16,16,1,1,2,true,true}, + {256,32,4,32,32,16,1,1,2,true,true}, + {256,64,4,32,64,16,1,1,2,true,true}, + {64,64,4,64,64,4,1,1,2,true,true}, + {64,32,4,64,32,16,1,1,2,true,true}, + {32,256,2,32,32,8,1,1,2,true,true}, + {64,256,2,64,128,8,1,2,2,true,true}, + {32,64,4,32,64,4,1,1,2,true,true}, + {64,64,2,32,32,16,1,2,2,true,true}, + {32,128,8,32,32,8,1,1,2,true,true}, + {64,128,4,64,32,16,1,2,2,true,true}, + {128,128,8,64,64,8,1,1,2,true,true}, + {128,128,4,32,128,4,1,2,2,true,true}, + {64,256,8,64,32,8,1,1,2,true,true}, + {128,32,2,64,32,8,1,1,2,true,true} +}; +// END_CONV_Wmma_i8_gfx1151_DEFS + #endif #ifdef Wmma_DECLARATIONS_GEN @@ -1278,6 +1480,26 @@ static constexpr size_t nInitParametersForwardI8ConvGfx1201 = 12; static const InitParamsAccel initParametersForwardI8ConvGfx1201[nInitParametersForwardI8ConvGfx1201]; // END_CONV_Wmma_i8_gfx1201_DECS +// BEGIN_GEMM_Wmma_f16_gfx1151_DECS +static constexpr size_t nInitParametersFp16GemmGfx1151 = 27; +static const InitParamsAccel initParametersFp16GemmGfx1151[nInitParametersFp16GemmGfx1151]; +// END_GEMM_Wmma_f16_gfx1151_DECS + +// BEGIN_GEMM_Wmma_i8_gfx1151_DECS +static constexpr size_t nInitParametersI8GemmGfx1151 = 19; +static const InitParamsAccel initParametersI8GemmGfx1151[nInitParametersI8GemmGfx1151]; +// END_GEMM_Wmma_i8_gfx1151_DECS + +// BEGIN_CONV_Wmma_f16_gfx1151_DECS +static constexpr size_t nInitParametersFp16ConvGfx1151 = 50; +static const InitParamsAccel initParametersFp16ConvGfx1151[nInitParametersFp16ConvGfx1151]; +// END_CONV_Wmma_f16_gfx1151_DECS + +// BEGIN_CONV_Wmma_i8_gfx1151_DECS +static constexpr size_t nInitParametersForwardI8ConvGfx1151 = 18; +static const InitParamsAccel initParametersForwardI8ConvGfx1151[nInitParametersForwardI8ConvGfx1151]; +// END_CONV_Wmma_i8_gfx1151_DECS + #endif #ifdef NonAccel_LOOKUP_TABLE_GEN @@ -1288,6 +1510,8 @@ static const InitParamsAccel initParametersForwardI8ConvGfx1201[nInitParametersF {"gfx1100_conv_f32", {PopulateParams::initParametersConvGfx1100, PopulateParams::nInitParametersConvGfx1100}}, {"gfx1201_gemm_f32", {PopulateParams::initParametersGemmGfx1201, PopulateParams::nInitParametersGemmGfx1201}}, {"gfx1201_conv_f32", {PopulateParams::initParametersConvGfx1201, PopulateParams::nInitParametersConvGfx1201}}, +{"gfx1151_gemm_f32", {PopulateParams::initParametersGemmGfx1151, PopulateParams::nInitParametersGemmGfx1151}}, +{"gfx1151_conv_f32", {PopulateParams::initParametersConvGfx1151, PopulateParams::nInitParametersConvGfx1151}}, #endif @@ -1334,5 +1558,9 @@ static const InitParamsAccel initParametersForwardI8ConvGfx1201[nInitParametersF {"gfx1201_gemm_i8", {PopulateParamsWmma::initParametersI8GemmGfx1201, PopulateParamsWmma::nInitParametersI8GemmGfx1201}}, {"gfx1201_conv_f16", {PopulateParamsWmma::initParametersFp16ConvGfx1201, PopulateParamsWmma::nInitParametersFp16ConvGfx1201}}, {"gfx1201_conv_i8", {PopulateParamsWmma::initParametersForwardI8ConvGfx1201, PopulateParamsWmma::nInitParametersForwardI8ConvGfx1201}}, +{"gfx1151_gemm_f16", {PopulateParamsWmma::initParametersFp16GemmGfx1151, PopulateParamsWmma::nInitParametersFp16GemmGfx1151}}, +{"gfx1151_gemm_i8", {PopulateParamsWmma::initParametersI8GemmGfx1151, PopulateParamsWmma::nInitParametersI8GemmGfx1151}}, +{"gfx1151_conv_f16", {PopulateParamsWmma::initParametersFp16ConvGfx1151, PopulateParamsWmma::nInitParametersFp16ConvGfx1151}}, +{"gfx1151_conv_i8", {PopulateParamsWmma::initParametersForwardI8ConvGfx1151, PopulateParamsWmma::nInitParametersForwardI8ConvGfx1151}}, #endif From 492c06819ef6d720526e1b6bae86507057c2c92f Mon Sep 17 00:00:00 2001 From: Mirza Halilcevic Date: Tue, 4 Nov 2025 14:39:04 +0000 Subject: [PATCH 3/5] Update gfx1150 GEMM and CONV quick-tune lists. --- .../Rock/Tuning/QuickTuningPerfconfigs.inc | 238 ++++++++++++++++++ 1 file changed, 238 insertions(+) diff --git a/mlir/include/mlir/Dialect/Rock/Tuning/QuickTuningPerfconfigs.inc b/mlir/include/mlir/Dialect/Rock/Tuning/QuickTuningPerfconfigs.inc index bf31438297ae..ffa6da0236c9 100644 --- a/mlir/include/mlir/Dialect/Rock/Tuning/QuickTuningPerfconfigs.inc +++ b/mlir/include/mlir/Dialect/Rock/Tuning/QuickTuningPerfconfigs.inc @@ -207,6 +207,71 @@ const InitParamsNonAccel PopulateParams::initParametersConvGfx1151[PopulateParam }; // END_CONV_NonAccel_f32_gfx1151_DEFS +// BEGIN_GEMM_NonAccel_f32_gfx1150_DEFS +const InitParamsNonAccel PopulateParams::initParametersGemmGfx1150[PopulateParams::nInitParametersGemmGfx1150] = { + {64,64,128,4,2,2,1,1,2}, + {128,128,128,16,4,2,1,1,2}, + {64,64,128,16,2,2,1,1,2}, + {64,128,64,16,2,2,1,1,2}, + {64,64,64,8,2,2,1,1,2}, + {64,128,32,16,2,4,1,1,2}, + {256,128,128,8,4,4,1,1,2}, + {128,32,32,16,2,4,1,1,2}, + {64,32,64,8,4,4,1,1,2}, + {128,64,128,4,2,2,1,1,2}, + {256,64,128,16,4,4,1,1,2}, + {128,64,64,4,4,2,1,1,2}, + {64,32,32,16,4,2,1,1,2} +}; +// END_GEMM_NonAccel_f32_gfx1150_DEFS + +// BEGIN_CONV_NonAccel_f32_gfx1150_DEFS +const InitParamsNonAccel PopulateParams::initParametersConvGfx1150[PopulateParams::nInitParametersConvGfx1150] = { + {64,64,128,16,2,2,1,1,2}, + {64,64,128,8,2,2,1,1,2}, + {64,64,128,4,2,2,1,1,2}, + {64,64,128,4,2,4,1,1,2}, + {64,64,128,16,4,2,1,1,2}, + {64,64,128,8,2,4,1,1,2}, + {64,128,64,16,2,2,1,1,2}, + {64,64,128,16,4,4,1,1,2}, + {64,64,128,4,4,2,1,1,2}, + {128,128,128,16,2,4,1,1,2}, + {128,128,128,16,4,2,1,1,2}, + {64,64,64,4,2,4,1,1,2}, + {64,32,128,4,2,2,1,1,2}, + {64,64,64,8,4,2,1,1,2}, + {64,64,64,16,2,2,1,1,2}, + {128,128,128,4,2,2,1,1,2}, + {64,128,64,16,4,2,1,1,2}, + {128,128,128,4,2,4,1,1,2}, + {64,32,64,4,2,2,1,1,2}, + {64,64,32,16,2,2,1,1,2}, + {64,32,64,16,2,2,1,1,2}, + {128,128,64,8,4,2,1,1,2}, + {64,128,64,4,4,4,1,1,2}, + {256,128,128,8,2,2,1,1,2}, + {64,128,32,16,4,2,1,1,2}, + {64,64,32,8,2,4,1,1,2}, + {128,32,32,16,2,4,1,1,2}, + {128,64,64,16,2,4,1,1,2}, + {128,64,32,16,2,2,1,1,2}, + {64,32,32,8,2,2,1,1,2}, + {64,128,32,8,4,2,1,1,2}, + {256,128,128,4,2,4,1,1,2}, + {128,128,32,16,2,4,1,1,2}, + {256,128,128,16,4,2,1,1,2}, + {64,128,32,8,2,4,1,1,2}, + {128,64,64,16,4,2,1,1,2}, + {256,64,64,16,2,4,1,1,2}, + {128,64,64,8,4,4,1,1,2}, + {256,32,128,8,2,2,1,1,2}, + {256,32,64,8,2,2,1,1,2}, + {256,64,32,16,2,2,1,1,2}, + {128,64,32,4,2,2,1,1,2} +}; +// END_CONV_NonAccel_f32_gfx1150_DEFS + #endif #ifdef NonAccel_DECLARATIONS_GEN @@ -251,6 +316,16 @@ static constexpr size_t nInitParametersConvGfx1151 = 34; static const InitParamsNonAccel initParametersConvGfx1151[nInitParametersConvGfx1151]; // END_CONV_NonAccel_f32_gfx1151_DECS +// BEGIN_GEMM_NonAccel_f32_gfx1150_DECS +static constexpr size_t nInitParametersGemmGfx1150 = 13; +static const InitParamsNonAccel initParametersGemmGfx1150[nInitParametersGemmGfx1150]; +// END_GEMM_NonAccel_f32_gfx1150_DECS + +// BEGIN_CONV_NonAccel_f32_gfx1150_DECS +static constexpr size_t nInitParametersConvGfx1150 = 42; +static const InitParamsNonAccel initParametersConvGfx1150[nInitParametersConvGfx1150]; +// END_CONV_NonAccel_f32_gfx1150_DECS + #endif #ifdef XDL_DEFINITIONS_GEN @@ -1406,6 +1481,143 @@ const InitParamsAccel PopulateParamsWmma::initParametersForwardI8ConvGfx1151[Pop }; // END_CONV_Wmma_i8_gfx1151_DEFS +// BEGIN_GEMM_Wmma_f16_gfx1150_DEFS +const InitParamsAccel PopulateParamsWmma::initParametersFp16GemmGfx1150[PopulateParamsWmma::nInitParametersFp16GemmGfx1150] = { + {64,128,8,32,64,8,1,1,2,true,true}, + {128,256,8,64,64,8,1,1,2,true,true}, + {128,256,4,128,32,8,1,1,2,true,true}, + {64,128,4,32,64,8,1,1,2,true,true}, + {64,64,4,64,64,8,1,1,2,true,true}, + {64,256,8,16,128,8,1,1,2,true,true}, + {16,16,8,16,16,16,1,2,2,true,true}, + {16,16,8,16,16,8,1,2,2,true,true}, + {128,64,8,32,32,8,1,1,2,true,true}, + {128,256,2,128,32,8,1,1,2,true,true}, + {64,64,8,32,64,8,1,1,2,true,true}, + {32,16,8,16,16,16,1,2,2,true,true}, + {32,32,8,16,16,8,1,2,2,true,true}, + {256,128,4,128,64,4,1,1,2,true,true}, + {32,64,4,32,64,16,1,1,2,true,true}, + {256,128,4,64,64,16,1,1,2,true,true}, + {16,16,4,16,16,4,1,2,2,true,true}, + {128,64,4,32,32,4,1,1,2,true,true}, + {64,32,4,16,32,4,1,1,2,true,true}, + {128,128,8,64,32,16,1,1,2,true,true}, + {256,64,8,128,32,8,1,1,2,true,true} +}; +// END_GEMM_Wmma_f16_gfx1150_DEFS + +// BEGIN_GEMM_Wmma_i8_gfx1150_DEFS +const InitParamsAccel PopulateParamsWmma::initParametersI8GemmGfx1150[PopulateParamsWmma::nInitParametersI8GemmGfx1150] = { + {128,256,4,128,32,16,1,1,2,true,true}, + {128,64,2,64,32,16,1,2,2,true,true}, + {16,16,8,16,16,16,1,2,2,true,true}, + {128,64,8,128,16,16,1,1,2,true,true}, + {128,128,4,128,32,16,1,1,2,true,true}, + {32,256,2,32,64,16,1,1,2,true,true}, + {32,64,4,32,16,16,1,2,2,true,true}, + {64,128,2,64,32,16,1,2,2,true,true}, + {256,256,4,128,32,16,1,1,2,true,true}, + {32,64,8,16,64,16,1,1,2,true,true}, + {128,128,8,32,64,16,1,1,2,true,true}, + {64,256,8,16,128,16,1,1,2,true,true}, + {64,64,4,64,64,16,1,1,2,true,true}, + {256,32,4,128,16,16,1,1,2,true,true}, + {16,16,4,16,16,4,1,2,2,true,true}, + {256,64,4,128,16,16,1,1,2,true,true} +}; +// END_GEMM_Wmma_i8_gfx1150_DEFS + +// BEGIN_CONV_Wmma_f16_gfx1150_DEFS +const InitParamsAccel PopulateParamsWmma::initParametersFp16ConvGfx1150[PopulateParamsWmma::nInitParametersFp16ConvGfx1150] = { + {64,128,4,64,32,8,1,1,2,true,true}, + {128,64,8,128,16,8,1,1,2,true,true}, + {128,64,4,64,32,8,1,1,2,true,true}, + {64,256,2,64,64,8,1,1,2,true,true}, + {128,128,4,128,32,8,1,1,2,true,true}, + {128,128,8,16,128,8,1,1,2,true,true}, + {64,256,4,64,32,8,1,1,2,true,true}, + {128,64,8,32,32,8,1,1,2,true,true}, + {32,64,4,32,64,4,1,1,2,true,true}, + {128,256,4,128,32,8,1,1,2,true,true}, + {128,128,2,128,32,16,1,1,2,true,true}, + {256,128,8,128,32,8,1,1,2,true,true}, + {128,64,2,128,32,8,1,1,2,true,true}, + {256,256,8,128,32,8,1,1,2,true,true}, + {64,128,8,64,32,8,1,1,2,true,true}, + {64,128,8,64,16,8,1,1,2,true,true}, + {32,128,4,32,64,8,1,1,2,true,true}, + {128,128,8,32,64,8,1,1,2,true,true}, + {256,128,8,64,32,8,1,1,2,true,true}, + {32,128,2,32,128,8,1,1,2,true,true}, + {64,128,2,64,32,8,1,2,2,true,true}, + {256,64,4,64,64,8,1,1,2,true,true}, + {64,128,4,64,16,8,1,2,2,true,true}, + {32,128,2,32,32,8,1,2,2,true,true}, + {128,64,4,128,32,4,1,1,2,true,true}, + {64,256,4,64,16,8,1,2,2,true,true}, + {256,128,8,64,64,8,1,1,2,true,true}, + {64,32,4,32,32,8,1,1,2,true,true}, + {16,128,4,16,128,8,1,1,2,true,true}, + {32,256,2,32,128,8,1,1,2,true,true}, + {128,64,2,32,32,8,1,1,2,true,true}, + {256,64,8,64,32,8,1,1,2,true,true}, + {32,32,4,32,16,8,1,2,2,true,true}, + {256,64,2,32,64,8,1,1,2,true,true}, + {64,64,8,16,64,8,1,2,2,true,true}, + {64,32,8,32,32,8,1,1,2,true,true}, + {16,32,8,16,32,8,1,1,2,true,true}, + {64,64,8,16,32,8,1,2,2,true,true}, + {16,32,4,16,32,8,1,2,2,true,true}, + {128,256,8,128,32,8,1,1,2,true,true}, + {64,128,8,16,128,8,1,2,2,true,true}, + {32,32,8,16,16,8,1,1,2,true,true}, + {256,32,4,64,32,8,1,1,2,true,true}, + {32,64,8,16,16,8,1,2,2,true,true}, + {256,128,8,32,64,8,1,1,2,true,true}, + {32,128,8,32,32,8,1,2,2,true,true}, + {32,16,4,16,16,8,1,1,2,true,true}, + {32,32,8,16,16,8,1,2,2,true,true}, + {16,64,8,16,16,8,1,1,2,true,true}, + {16,32,4,16,16,4,1,1,2,true,true}, + {256,32,8,32,32,8,1,1,2,true,true}, + {16,128,8,16,64,8,1,1,2,true,true}, + {64,32,8,32,16,16,1,1,2,true,true}, + {16,16,4,16,16,4,1,1,2,true,true}, + {128,32,4,16,32,16,1,1,2,true,true}, + {16,16,8,16,16,16,1,1,2,true,true}, + {128,16,4,32,16,8,1,2,2,true,true}, + {64,16,8,32,16,16,1,1,2,true,true}, + {32,16,8,16,16,16,1,1,2,true,true} +}; +// END_CONV_Wmma_f16_gfx1150_DEFS + +// BEGIN_CONV_Wmma_i8_gfx1150_DEFS +const InitParamsAccel PopulateParamsWmma::initParametersForwardI8ConvGfx1150[PopulateParamsWmma::nInitParametersForwardI8ConvGfx1150] = { + {128,32,4,32,32,16,1,2,2,true,true}, + {16,128,4,16,128,4,1,1,2,true,true}, + {256,32,2,32,32,16,1,1,2,true,true}, + {64,128,4,32,64,8,1,1,2,true,true}, + {256,64,2,64,64,16,1,2,2,true,true}, + {64,64,4,16,64,16,1,1,2,true,true}, + {128,256,2,128,64,8,1,1,2,true,true}, + {128,64,8,128,32,4,1,2,2,true,true}, + {64,64,8,16,64,16,1,1,2,true,true}, + {128,64,4,64,64,4,1,1,2,true,true}, + {128,64,8,64,16,16,1,1,2,true,true}, + {128,128,2,128,32,8,1,2,2,true,true}, + {256,64,8,64,64,8,1,1,2,true,true}, + {256,32,4,64,32,16,1,1,2,true,true}, + {256,32,8,16,32,16,1,1,2,true,true}, + {32,256,4,32,128,4,1,2,2,true,true}, + {64,128,2,32,64,16,1,2,2,true,true}, + {128,16,4,16,16,8,1,1,2,true,true}, + {16,16,8,16,16,8,1,1,2,true,true}, + {256,16,8,16,16,16,1,1,2,true,true}, + {256,32,8,128,32,16,1,1,2,true,true} +}; +// END_CONV_Wmma_i8_gfx1150_DEFS + #endif #ifdef Wmma_DECLARATIONS_GEN @@ -1500,6 +1712,26 @@ static constexpr size_t nInitParametersForwardI8ConvGfx1151 = 18; static const InitParamsAccel initParametersForwardI8ConvGfx1151[nInitParametersForwardI8ConvGfx1151]; // END_CONV_Wmma_i8_gfx1151_DECS +// BEGIN_GEMM_Wmma_f16_gfx1150_DECS +static constexpr size_t nInitParametersFp16GemmGfx1150 = 21; +static const InitParamsAccel initParametersFp16GemmGfx1150[nInitParametersFp16GemmGfx1150]; +// END_GEMM_Wmma_f16_gfx1150_DECS + +// BEGIN_GEMM_Wmma_i8_gfx1150_DECS +static constexpr size_t nInitParametersI8GemmGfx1150 = 16; +static const InitParamsAccel initParametersI8GemmGfx1150[nInitParametersI8GemmGfx1150]; +// END_GEMM_Wmma_i8_gfx1150_DECS + +// BEGIN_CONV_Wmma_f16_gfx1150_DECS +static constexpr size_t nInitParametersFp16ConvGfx1150 = 59; +static const InitParamsAccel initParametersFp16ConvGfx1150[nInitParametersFp16ConvGfx1150]; +// END_CONV_Wmma_f16_gfx1150_DECS + +// BEGIN_CONV_Wmma_i8_gfx1150_DECS +static constexpr size_t nInitParametersForwardI8ConvGfx1150 = 21; +static const InitParamsAccel initParametersForwardI8ConvGfx1150[nInitParametersForwardI8ConvGfx1150]; +// END_CONV_Wmma_i8_gfx1150_DECS + #endif #ifdef NonAccel_LOOKUP_TABLE_GEN @@ -1512,6 +1744,8 @@ static const InitParamsAccel initParametersForwardI8ConvGfx1151[nInitParametersF {"gfx1201_conv_f32", {PopulateParams::initParametersConvGfx1201, PopulateParams::nInitParametersConvGfx1201}}, {"gfx1151_gemm_f32", {PopulateParams::initParametersGemmGfx1151, PopulateParams::nInitParametersGemmGfx1151}}, {"gfx1151_conv_f32", {PopulateParams::initParametersConvGfx1151, PopulateParams::nInitParametersConvGfx1151}}, +{"gfx1150_gemm_f32", {PopulateParams::initParametersGemmGfx1150, PopulateParams::nInitParametersGemmGfx1150}}, +{"gfx1150_conv_f32", {PopulateParams::initParametersConvGfx1150, PopulateParams::nInitParametersConvGfx1150}}, #endif @@ -1562,5 +1796,9 @@ static const InitParamsAccel initParametersForwardI8ConvGfx1151[nInitParametersF {"gfx1151_gemm_i8", {PopulateParamsWmma::initParametersI8GemmGfx1151, PopulateParamsWmma::nInitParametersI8GemmGfx1151}}, {"gfx1151_conv_f16", {PopulateParamsWmma::initParametersFp16ConvGfx1151, PopulateParamsWmma::nInitParametersFp16ConvGfx1151}}, {"gfx1151_conv_i8", {PopulateParamsWmma::initParametersForwardI8ConvGfx1151, PopulateParamsWmma::nInitParametersForwardI8ConvGfx1151}}, +{"gfx1150_gemm_f16", {PopulateParamsWmma::initParametersFp16GemmGfx1150, PopulateParamsWmma::nInitParametersFp16GemmGfx1150}}, +{"gfx1150_gemm_i8", {PopulateParamsWmma::initParametersI8GemmGfx1150, PopulateParamsWmma::nInitParametersI8GemmGfx1150}}, +{"gfx1150_conv_f16", {PopulateParamsWmma::initParametersFp16ConvGfx1150, PopulateParamsWmma::nInitParametersFp16ConvGfx1150}}, +{"gfx1150_conv_i8", {PopulateParamsWmma::initParametersForwardI8ConvGfx1150, PopulateParamsWmma::nInitParametersForwardI8ConvGfx1150}}, #endif From 726f500377d89a7f0d45585f0803e223814ee919 Mon Sep 17 00:00:00 2001 From: Mirza Halilcevic Date: Thu, 27 Nov 2025 18:34:43 +0000 Subject: [PATCH 4/5] Update perf configs to v4. --- .../Rock/Tuning/QuickTuningPerfconfigs.inc | 462 +++++++++--------- 1 file changed, 231 insertions(+), 231 deletions(-) diff --git a/mlir/include/mlir/Dialect/Rock/Tuning/QuickTuningPerfconfigs.inc b/mlir/include/mlir/Dialect/Rock/Tuning/QuickTuningPerfconfigs.inc index ffa6da0236c9..8fbd0a9b2441 100644 --- a/mlir/include/mlir/Dialect/Rock/Tuning/QuickTuningPerfconfigs.inc +++ b/mlir/include/mlir/Dialect/Rock/Tuning/QuickTuningPerfconfigs.inc @@ -1349,272 +1349,272 @@ const InitParamsAccel PopulateParamsWmma::initParametersForwardI8ConvGfx1201[Pop // BEGIN_GEMM_Wmma_f16_gfx1151_DEFS const InitParamsAccel PopulateParamsWmma::initParametersFp16GemmGfx1151[PopulateParamsWmma::nInitParametersFp16GemmGfx1151] = { - {128,128,8,64,64,8,1,1,2,true,true}, - {128,128,4,64,64,8,1,1,2,true,true}, - {128,64,4,64,32,8,1,1,2,true,true}, - {16,16,8,16,16,8,1,2,2,true,true}, - {64,256,2,64,64,8,1,2,2,true,true}, - {16,32,8,16,16,8,1,1,2,true,true}, - {16,16,8,16,16,16,1,1,2,true,true}, - {16,16,8,16,16,16,1,2,2,true,true}, - {128,256,4,64,64,8,1,1,2,true,true}, - {128,64,2,128,32,8,1,1,2,true,true}, - {64,256,2,32,128,8,1,2,2,true,true}, - {256,64,8,128,32,8,1,1,2,true,true}, - {128,64,8,64,32,8,1,1,2,true,true}, - {32,128,4,32,32,8,1,2,2,true,true}, - {32,16,8,32,16,8,1,2,2,true,true}, - {128,64,8,32,32,8,1,1,2,true,true}, - {32,16,8,16,16,16,1,2,2,true,true}, - {128,256,8,128,32,8,1,1,2,true,true}, - {256,128,2,128,32,8,1,1,2,true,true}, - {64,32,8,64,32,8,1,1,2,true,true}, - {128,64,2,32,64,8,1,1,2,true,true}, - {32,128,4,32,32,4,1,1,2,true,true}, - {64,128,4,32,64,8,1,2,2,true,true}, - {64,256,2,64,64,8,1,1,2,true,true}, - {32,64,8,32,32,8,1,1,2,true,true}, - {256,256,2,128,64,8,1,2,2,true,true}, - {16,256,4,16,128,4,1,1,2,true,true} + {128,128,8,64,64,16,8,1,1,2,true,true}, + {128,128,4,64,64,16,8,1,1,2,true,true}, + {128,64,4,64,32,16,8,1,1,2,true,true}, + {16,16,8,16,16,16,8,1,2,2,true,true}, + {64,256,2,64,64,16,8,1,2,2,true,true}, + {16,32,8,16,16,16,8,1,1,2,true,true}, + {16,16,8,16,16,16,16,1,1,2,true,true}, + {16,16,8,16,16,16,16,1,2,2,true,true}, + {128,256,4,64,64,16,8,1,1,2,true,true}, + {128,64,2,128,32,16,8,1,1,2,true,true}, + {64,256,2,32,128,16,8,1,2,2,true,true}, + {256,64,8,128,32,16,8,1,1,2,true,true}, + {128,64,8,64,32,16,8,1,1,2,true,true}, + {32,128,4,32,32,16,8,1,2,2,true,true}, + {32,16,8,32,16,16,8,1,2,2,true,true}, + {128,64,8,32,32,16,8,1,1,2,true,true}, + {32,16,8,16,16,16,16,1,2,2,true,true}, + {128,256,8,128,32,16,8,1,1,2,true,true}, + {256,128,2,128,32,16,8,1,1,2,true,true}, + {64,32,8,64,32,16,8,1,1,2,true,true}, + {128,64,2,32,64,16,8,1,1,2,true,true}, + {32,128,4,32,32,16,4,1,1,2,true,true}, + {64,128,4,32,64,16,8,1,2,2,true,true}, + {64,256,2,64,64,16,8,1,1,2,true,true}, + {32,64,8,32,32,16,8,1,1,2,true,true}, + {256,256,2,128,64,16,8,1,2,2,true,true}, + {16,256,4,16,128,16,4,1,1,2,true,true} }; // END_GEMM_Wmma_f16_gfx1151_DEFS // BEGIN_GEMM_Wmma_i8_gfx1151_DEFS const InitParamsAccel PopulateParamsWmma::initParametersI8GemmGfx1151[PopulateParamsWmma::nInitParametersI8GemmGfx1151] = { - {16,32,8,16,16,16,1,1,2,true,true}, - {16,16,8,16,16,16,1,2,2,true,true}, - {128,64,4,32,64,16,1,1,2,true,true}, - {128,128,8,64,64,4,1,2,2,true,true}, - {128,64,4,128,16,16,1,2,2,true,true}, - {256,128,4,128,32,16,1,1,2,true,true}, - {128,64,4,32,32,16,1,2,2,true,true}, - {64,32,2,32,32,16,1,2,2,true,true}, - {128,256,2,64,64,16,1,2,2,true,true}, - {64,64,4,32,32,16,1,1,2,true,true}, - {32,64,4,16,16,16,1,2,2,true,true}, - {64,256,4,32,32,16,1,1,2,true,true}, - {64,128,2,64,32,16,1,2,2,true,true}, - {128,256,8,64,64,8,1,1,2,true,true}, - {16,32,8,16,16,4,1,2,2,true,true}, - {256,128,2,128,32,16,1,2,2,true,true}, - {64,16,8,32,16,16,1,2,2,true,true}, - {128,32,8,128,16,4,1,2,2,true,true}, - {32,256,8,32,64,8,1,1,2,true,true} + {16,32,8,16,16,16,16,1,1,2,true,true}, + {16,16,8,16,16,16,16,1,2,2,true,true}, + {128,64,4,32,64,16,16,1,1,2,true,true}, + {128,128,8,64,64,16,4,1,2,2,true,true}, + {128,64,4,128,16,16,16,1,2,2,true,true}, + {256,128,4,128,32,16,16,1,1,2,true,true}, + {128,64,4,32,32,16,16,1,2,2,true,true}, + {64,32,2,32,32,16,16,1,2,2,true,true}, + {128,256,2,64,64,16,16,1,2,2,true,true}, + {64,64,4,32,32,16,16,1,1,2,true,true}, + {32,64,4,16,16,16,16,1,2,2,true,true}, + {64,256,4,32,32,16,16,1,1,2,true,true}, + {64,128,2,64,32,16,16,1,2,2,true,true}, + {128,256,8,64,64,16,8,1,1,2,true,true}, + {16,32,8,16,16,16,4,1,2,2,true,true}, + {256,128,2,128,32,16,16,1,2,2,true,true}, + {64,16,8,32,16,16,16,1,2,2,true,true}, + {128,32,8,128,16,16,4,1,2,2,true,true}, + {32,256,8,32,64,16,8,1,1,2,true,true} }; // END_GEMM_Wmma_i8_gfx1151_DEFS // BEGIN_CONV_Wmma_f16_gfx1151_DEFS const InitParamsAccel PopulateParamsWmma::initParametersFp16ConvGfx1151[PopulateParamsWmma::nInitParametersFp16ConvGfx1151] = { - {128,64,8,64,32,8,1,1,2,true,true}, - {256,128,8,128,32,8,1,1,2,true,true}, - {64,64,2,64,64,8,1,1,2,true,true}, - {128,128,4,128,32,8,1,1,2,true,true}, - {64,256,2,64,64,8,1,2,2,true,true}, - {128,32,4,32,32,8,1,1,2,true,true}, - {32,64,2,32,64,8,1,1,2,true,true}, - {128,128,8,64,32,8,1,1,2,true,true}, - {32,64,2,32,64,8,1,2,2,true,true}, - {256,128,4,128,32,8,1,1,2,true,true}, - {256,32,8,64,32,8,1,1,2,true,true}, - {128,64,2,32,64,8,1,2,2,true,true}, - {128,64,8,32,32,8,1,1,2,true,true}, - {64,128,4,64,32,8,1,1,2,true,true}, - {256,256,8,128,32,8,1,1,2,true,true}, - {32,256,2,32,128,8,1,1,2,true,true}, - {64,32,8,32,16,8,1,1,2,true,true}, - {64,32,8,16,32,8,1,2,2,true,true}, - {64,128,8,32,64,8,1,1,2,true,true}, - {64,128,2,64,32,8,1,2,2,true,true}, - {128,128,2,128,32,16,1,1,2,true,true}, - {16,128,4,16,64,8,1,1,2,true,true}, - {64,256,2,64,64,16,1,1,2,true,true}, - {64,64,2,32,32,8,1,2,2,true,true}, - {128,256,8,128,32,8,1,1,2,true,true}, - {64,16,8,16,16,8,1,1,2,true,true}, - {128,32,2,32,32,8,1,1,2,true,true}, - {64,128,8,64,32,8,1,1,2,true,true}, - {128,16,4,32,16,8,1,1,2,true,true}, - {64,64,2,32,32,8,1,1,2,true,true}, - {32,32,8,16,16,8,1,1,2,true,true}, - {128,128,8,64,64,8,1,1,2,true,true}, - {32,256,4,32,64,8,1,1,2,true,true}, - {64,128,4,32,64,8,1,2,2,true,true}, - {64,128,2,32,128,8,1,2,2,true,true}, - {256,64,2,64,64,8,1,2,2,true,true}, - {128,256,2,128,64,16,1,1,2,true,true}, - {32,64,4,16,32,8,1,2,2,true,true}, - {128,16,8,16,16,8,1,1,2,true,true}, - {16,256,8,16,64,4,1,1,2,true,true}, - {16,16,8,16,16,16,1,1,2,true,true}, - {128,32,8,32,32,8,1,2,2,true,true}, - {64,64,4,64,16,8,1,2,2,true,true}, - {32,64,8,16,16,8,1,2,2,true,true}, - {16,16,8,16,16,8,1,1,2,true,true}, - {16,128,8,16,32,8,1,1,2,true,true}, - {128,64,2,128,64,16,1,1,2,true,true}, - {128,128,4,64,32,4,1,2,2,true,true}, - {64,128,2,64,128,16,1,2,2,true,true}, - {64,128,8,16,32,4,1,2,2,true,true} + {128,64,8,64,32,16,8,1,1,2,true,true}, + {256,128,8,128,32,16,8,1,1,2,true,true}, + {64,64,2,64,64,16,8,1,1,2,true,true}, + {128,128,4,128,32,16,8,1,1,2,true,true}, + {64,256,2,64,64,16,8,1,2,2,true,true}, + {128,32,4,32,32,16,8,1,1,2,true,true}, + {32,64,2,32,64,16,8,1,1,2,true,true}, + {128,128,8,64,32,16,8,1,1,2,true,true}, + {32,64,2,32,64,16,8,1,2,2,true,true}, + {256,128,4,128,32,16,8,1,1,2,true,true}, + {256,32,8,64,32,16,8,1,1,2,true,true}, + {128,64,2,32,64,16,8,1,2,2,true,true}, + {128,64,8,32,32,16,8,1,1,2,true,true}, + {64,128,4,64,32,16,8,1,1,2,true,true}, + {256,256,8,128,32,16,8,1,1,2,true,true}, + {32,256,2,32,128,16,8,1,1,2,true,true}, + {64,32,8,32,16,16,8,1,1,2,true,true}, + {64,32,8,16,32,16,8,1,2,2,true,true}, + {64,128,8,32,64,16,8,1,1,2,true,true}, + {64,128,2,64,32,16,8,1,2,2,true,true}, + {128,128,2,128,32,16,16,1,1,2,true,true}, + {16,128,4,16,64,16,8,1,1,2,true,true}, + {64,256,2,64,64,16,16,1,1,2,true,true}, + {64,64,2,32,32,16,8,1,2,2,true,true}, + {128,256,8,128,32,16,8,1,1,2,true,true}, + {64,16,8,16,16,16,8,1,1,2,true,true}, + {128,32,2,32,32,16,8,1,1,2,true,true}, + {64,128,8,64,32,16,8,1,1,2,true,true}, + {128,16,4,32,16,16,8,1,1,2,true,true}, + {64,64,2,32,32,16,8,1,1,2,true,true}, + {32,32,8,16,16,16,8,1,1,2,true,true}, + {128,128,8,64,64,16,8,1,1,2,true,true}, + {32,256,4,32,64,16,8,1,1,2,true,true}, + {64,128,4,32,64,16,8,1,2,2,true,true}, + {64,128,2,32,128,16,8,1,2,2,true,true}, + {256,64,2,64,64,16,8,1,2,2,true,true}, + {128,256,2,128,64,16,16,1,1,2,true,true}, + {32,64,4,16,32,16,8,1,2,2,true,true}, + {128,16,8,16,16,16,8,1,1,2,true,true}, + {16,256,8,16,64,16,4,1,1,2,true,true}, + {16,16,8,16,16,16,16,1,1,2,true,true}, + {128,32,8,32,32,16,8,1,2,2,true,true}, + {64,64,4,64,16,16,8,1,2,2,true,true}, + {32,64,8,16,16,16,8,1,2,2,true,true}, + {16,16,8,16,16,16,8,1,1,2,true,true}, + {16,128,8,16,32,16,8,1,1,2,true,true}, + {128,64,2,128,64,16,16,1,1,2,true,true}, + {128,128,4,64,32,16,4,1,2,2,true,true}, + {64,128,2,64,128,16,16,1,2,2,true,true}, + {64,128,8,16,32,16,4,1,2,2,true,true} }; // END_CONV_Wmma_f16_gfx1151_DEFS // BEGIN_CONV_Wmma_i8_gfx1151_DEFS const InitParamsAccel PopulateParamsWmma::initParametersForwardI8ConvGfx1151[PopulateParamsWmma::nInitParametersForwardI8ConvGfx1151] = { - {128,32,4,64,16,16,1,2,2,true,true}, - {128,16,8,32,16,16,1,1,2,true,true}, - {64,32,8,16,32,16,1,2,2,true,true}, - {64,16,4,16,16,16,1,1,2,true,true}, - {256,32,4,32,32,16,1,1,2,true,true}, - {256,64,4,32,64,16,1,1,2,true,true}, - {64,64,4,64,64,4,1,1,2,true,true}, - {64,32,4,64,32,16,1,1,2,true,true}, - {32,256,2,32,32,8,1,1,2,true,true}, - {64,256,2,64,128,8,1,2,2,true,true}, - {32,64,4,32,64,4,1,1,2,true,true}, - {64,64,2,32,32,16,1,2,2,true,true}, - {32,128,8,32,32,8,1,1,2,true,true}, - {64,128,4,64,32,16,1,2,2,true,true}, - {128,128,8,64,64,8,1,1,2,true,true}, - {128,128,4,32,128,4,1,2,2,true,true}, - {64,256,8,64,32,8,1,1,2,true,true}, - {128,32,2,64,32,8,1,1,2,true,true} + {128,32,4,64,16,16,16,1,2,2,true,true}, + {128,16,8,32,16,16,16,1,1,2,true,true}, + {64,32,8,16,32,16,16,1,2,2,true,true}, + {64,16,4,16,16,16,16,1,1,2,true,true}, + {256,32,4,32,32,16,16,1,1,2,true,true}, + {256,64,4,32,64,16,16,1,1,2,true,true}, + {64,64,4,64,64,16,4,1,1,2,true,true}, + {64,32,4,64,32,16,16,1,1,2,true,true}, + {32,256,2,32,32,16,8,1,1,2,true,true}, + {64,256,2,64,128,16,8,1,2,2,true,true}, + {32,64,4,32,64,16,4,1,1,2,true,true}, + {64,64,2,32,32,16,16,1,2,2,true,true}, + {32,128,8,32,32,16,8,1,1,2,true,true}, + {64,128,4,64,32,16,16,1,2,2,true,true}, + {128,128,8,64,64,16,8,1,1,2,true,true}, + {128,128,4,32,128,16,4,1,2,2,true,true}, + {64,256,8,64,32,16,8,1,1,2,true,true}, + {128,32,2,64,32,16,8,1,1,2,true,true} }; // END_CONV_Wmma_i8_gfx1151_DEFS // BEGIN_GEMM_Wmma_f16_gfx1150_DEFS const InitParamsAccel PopulateParamsWmma::initParametersFp16GemmGfx1150[PopulateParamsWmma::nInitParametersFp16GemmGfx1150] = { - {64,128,8,32,64,8,1,1,2,true,true}, - {128,256,8,64,64,8,1,1,2,true,true}, - {128,256,4,128,32,8,1,1,2,true,true}, - {64,128,4,32,64,8,1,1,2,true,true}, - {64,64,4,64,64,8,1,1,2,true,true}, - {64,256,8,16,128,8,1,1,2,true,true}, - {16,16,8,16,16,16,1,2,2,true,true}, - {16,16,8,16,16,8,1,2,2,true,true}, - {128,64,8,32,32,8,1,1,2,true,true}, - {128,256,2,128,32,8,1,1,2,true,true}, - {64,64,8,32,64,8,1,1,2,true,true}, - {32,16,8,16,16,16,1,2,2,true,true}, - {32,32,8,16,16,8,1,2,2,true,true}, - {256,128,4,128,64,4,1,1,2,true,true}, - {32,64,4,32,64,16,1,1,2,true,true}, - {256,128,4,64,64,16,1,1,2,true,true}, - {16,16,4,16,16,4,1,2,2,true,true}, - {128,64,4,32,32,4,1,1,2,true,true}, - {64,32,4,16,32,4,1,1,2,true,true}, - {128,128,8,64,32,16,1,1,2,true,true}, - {256,64,8,128,32,8,1,1,2,true,true} + {64,128,8,32,64,16,8,1,1,2,true,true}, + {128,256,8,64,64,16,8,1,1,2,true,true}, + {128,256,4,128,32,16,8,1,1,2,true,true}, + {64,128,4,32,64,16,8,1,1,2,true,true}, + {64,64,4,64,64,16,8,1,1,2,true,true}, + {64,256,8,16,128,16,8,1,1,2,true,true}, + {16,16,8,16,16,16,16,1,2,2,true,true}, + {16,16,8,16,16,16,8,1,2,2,true,true}, + {128,64,8,32,32,16,8,1,1,2,true,true}, + {128,256,2,128,32,16,8,1,1,2,true,true}, + {64,64,8,32,64,16,8,1,1,2,true,true}, + {32,16,8,16,16,16,16,1,2,2,true,true}, + {32,32,8,16,16,16,8,1,2,2,true,true}, + {256,128,4,128,64,16,4,1,1,2,true,true}, + {32,64,4,32,64,16,16,1,1,2,true,true}, + {256,128,4,64,64,16,16,1,1,2,true,true}, + {16,16,4,16,16,16,4,1,2,2,true,true}, + {128,64,4,32,32,16,4,1,1,2,true,true}, + {64,32,4,16,32,16,4,1,1,2,true,true}, + {128,128,8,64,32,16,16,1,1,2,true,true}, + {256,64,8,128,32,16,8,1,1,2,true,true} }; // END_GEMM_Wmma_f16_gfx1150_DEFS // BEGIN_GEMM_Wmma_i8_gfx1150_DEFS const InitParamsAccel PopulateParamsWmma::initParametersI8GemmGfx1150[PopulateParamsWmma::nInitParametersI8GemmGfx1150] = { - {128,256,4,128,32,16,1,1,2,true,true}, - {128,64,2,64,32,16,1,2,2,true,true}, - {16,16,8,16,16,16,1,2,2,true,true}, - {128,64,8,128,16,16,1,1,2,true,true}, - {128,128,4,128,32,16,1,1,2,true,true}, - {32,256,2,32,64,16,1,1,2,true,true}, - {32,64,4,32,16,16,1,2,2,true,true}, - {64,128,2,64,32,16,1,2,2,true,true}, - {256,256,4,128,32,16,1,1,2,true,true}, - {32,64,8,16,64,16,1,1,2,true,true}, - {128,128,8,32,64,16,1,1,2,true,true}, - {64,256,8,16,128,16,1,1,2,true,true}, - {64,64,4,64,64,16,1,1,2,true,true}, - {256,32,4,128,16,16,1,1,2,true,true}, - {16,16,4,16,16,4,1,2,2,true,true}, - {256,64,4,128,16,16,1,1,2,true,true} + {128,256,4,128,32,16,16,1,1,2,true,true}, + {128,64,2,64,32,16,16,1,2,2,true,true}, + {16,16,8,16,16,16,16,1,2,2,true,true}, + {128,64,8,128,16,16,16,1,1,2,true,true}, + {128,128,4,128,32,16,16,1,1,2,true,true}, + {32,256,2,32,64,16,16,1,1,2,true,true}, + {32,64,4,32,16,16,16,1,2,2,true,true}, + {64,128,2,64,32,16,16,1,2,2,true,true}, + {256,256,4,128,32,16,16,1,1,2,true,true}, + {32,64,8,16,64,16,16,1,1,2,true,true}, + {128,128,8,32,64,16,16,1,1,2,true,true}, + {64,256,8,16,128,16,16,1,1,2,true,true}, + {64,64,4,64,64,16,16,1,1,2,true,true}, + {256,32,4,128,16,16,16,1,1,2,true,true}, + {16,16,4,16,16,16,4,1,2,2,true,true}, + {256,64,4,128,16,16,16,1,1,2,true,true} }; // END_GEMM_Wmma_i8_gfx1150_DEFS // BEGIN_CONV_Wmma_f16_gfx1150_DEFS const InitParamsAccel PopulateParamsWmma::initParametersFp16ConvGfx1150[PopulateParamsWmma::nInitParametersFp16ConvGfx1150] = { - {64,128,4,64,32,8,1,1,2,true,true}, - {128,64,8,128,16,8,1,1,2,true,true}, - {128,64,4,64,32,8,1,1,2,true,true}, - {64,256,2,64,64,8,1,1,2,true,true}, - {128,128,4,128,32,8,1,1,2,true,true}, - {128,128,8,16,128,8,1,1,2,true,true}, - {64,256,4,64,32,8,1,1,2,true,true}, - {128,64,8,32,32,8,1,1,2,true,true}, - {32,64,4,32,64,4,1,1,2,true,true}, - {128,256,4,128,32,8,1,1,2,true,true}, - {128,128,2,128,32,16,1,1,2,true,true}, - {256,128,8,128,32,8,1,1,2,true,true}, - {128,64,2,128,32,8,1,1,2,true,true}, - {256,256,8,128,32,8,1,1,2,true,true}, - {64,128,8,64,32,8,1,1,2,true,true}, - {64,128,8,64,16,8,1,1,2,true,true}, - {32,128,4,32,64,8,1,1,2,true,true}, - {128,128,8,32,64,8,1,1,2,true,true}, - {256,128,8,64,32,8,1,1,2,true,true}, - {32,128,2,32,128,8,1,1,2,true,true}, - {64,128,2,64,32,8,1,2,2,true,true}, - {256,64,4,64,64,8,1,1,2,true,true}, - {64,128,4,64,16,8,1,2,2,true,true}, - {32,128,2,32,32,8,1,2,2,true,true}, - {128,64,4,128,32,4,1,1,2,true,true}, - {64,256,4,64,16,8,1,2,2,true,true}, - {256,128,8,64,64,8,1,1,2,true,true}, - {64,32,4,32,32,8,1,1,2,true,true}, - {16,128,4,16,128,8,1,1,2,true,true}, - {32,256,2,32,128,8,1,1,2,true,true}, - {128,64,2,32,32,8,1,1,2,true,true}, - {256,64,8,64,32,8,1,1,2,true,true}, - {32,32,4,32,16,8,1,2,2,true,true}, - {256,64,2,32,64,8,1,1,2,true,true}, - {64,64,8,16,64,8,1,2,2,true,true}, - {64,32,8,32,32,8,1,1,2,true,true}, - {16,32,8,16,32,8,1,1,2,true,true}, - {64,64,8,16,32,8,1,2,2,true,true}, - {16,32,4,16,32,8,1,2,2,true,true}, - {128,256,8,128,32,8,1,1,2,true,true}, - {64,128,8,16,128,8,1,2,2,true,true}, - {32,32,8,16,16,8,1,1,2,true,true}, - {256,32,4,64,32,8,1,1,2,true,true}, - {32,64,8,16,16,8,1,2,2,true,true}, - {256,128,8,32,64,8,1,1,2,true,true}, - {32,128,8,32,32,8,1,2,2,true,true}, - {32,16,4,16,16,8,1,1,2,true,true}, - {32,32,8,16,16,8,1,2,2,true,true}, - {16,64,8,16,16,8,1,1,2,true,true}, - {16,32,4,16,16,4,1,1,2,true,true}, - {256,32,8,32,32,8,1,1,2,true,true}, - {16,128,8,16,64,8,1,1,2,true,true}, - {64,32,8,32,16,16,1,1,2,true,true}, - {16,16,4,16,16,4,1,1,2,true,true}, - {128,32,4,16,32,16,1,1,2,true,true}, - {16,16,8,16,16,16,1,1,2,true,true}, - {128,16,4,32,16,8,1,2,2,true,true}, - {64,16,8,32,16,16,1,1,2,true,true}, - {32,16,8,16,16,16,1,1,2,true,true} + {64,128,4,64,32,16,8,1,1,2,true,true}, + {128,64,8,128,16,16,8,1,1,2,true,true}, + {128,64,4,64,32,16,8,1,1,2,true,true}, + {64,256,2,64,64,16,8,1,1,2,true,true}, + {128,128,4,128,32,16,8,1,1,2,true,true}, + {128,128,8,16,128,16,8,1,1,2,true,true}, + {64,256,4,64,32,16,8,1,1,2,true,true}, + {128,64,8,32,32,16,8,1,1,2,true,true}, + {32,64,4,32,64,16,4,1,1,2,true,true}, + {128,256,4,128,32,16,8,1,1,2,true,true}, + {128,128,2,128,32,16,16,1,1,2,true,true}, + {256,128,8,128,32,16,8,1,1,2,true,true}, + {128,64,2,128,32,16,8,1,1,2,true,true}, + {256,256,8,128,32,16,8,1,1,2,true,true}, + {64,128,8,64,32,16,8,1,1,2,true,true}, + {64,128,8,64,16,16,8,1,1,2,true,true}, + {32,128,4,32,64,16,8,1,1,2,true,true}, + {128,128,8,32,64,16,8,1,1,2,true,true}, + {256,128,8,64,32,16,8,1,1,2,true,true}, + {32,128,2,32,128,16,8,1,1,2,true,true}, + {64,128,2,64,32,16,8,1,2,2,true,true}, + {256,64,4,64,64,16,8,1,1,2,true,true}, + {64,128,4,64,16,16,8,1,2,2,true,true}, + {32,128,2,32,32,16,8,1,2,2,true,true}, + {128,64,4,128,32,16,4,1,1,2,true,true}, + {64,256,4,64,16,16,8,1,2,2,true,true}, + {256,128,8,64,64,16,8,1,1,2,true,true}, + {64,32,4,32,32,16,8,1,1,2,true,true}, + {16,128,4,16,128,16,8,1,1,2,true,true}, + {32,256,2,32,128,16,8,1,1,2,true,true}, + {128,64,2,32,32,16,8,1,1,2,true,true}, + {256,64,8,64,32,16,8,1,1,2,true,true}, + {32,32,4,32,16,16,8,1,2,2,true,true}, + {256,64,2,32,64,16,8,1,1,2,true,true}, + {64,64,8,16,64,16,8,1,2,2,true,true}, + {64,32,8,32,32,16,8,1,1,2,true,true}, + {16,32,8,16,32,16,8,1,1,2,true,true}, + {64,64,8,16,32,16,8,1,2,2,true,true}, + {16,32,4,16,32,16,8,1,2,2,true,true}, + {128,256,8,128,32,16,8,1,1,2,true,true}, + {64,128,8,16,128,16,8,1,2,2,true,true}, + {32,32,8,16,16,16,8,1,1,2,true,true}, + {256,32,4,64,32,16,8,1,1,2,true,true}, + {32,64,8,16,16,16,8,1,2,2,true,true}, + {256,128,8,32,64,16,8,1,1,2,true,true}, + {32,128,8,32,32,16,8,1,2,2,true,true}, + {32,16,4,16,16,16,8,1,1,2,true,true}, + {32,32,8,16,16,16,8,1,2,2,true,true}, + {16,64,8,16,16,16,8,1,1,2,true,true}, + {16,32,4,16,16,16,4,1,1,2,true,true}, + {256,32,8,32,32,16,8,1,1,2,true,true}, + {16,128,8,16,64,16,8,1,1,2,true,true}, + {64,32,8,32,16,16,16,1,1,2,true,true}, + {16,16,4,16,16,16,4,1,1,2,true,true}, + {128,32,4,16,32,16,16,1,1,2,true,true}, + {16,16,8,16,16,16,16,1,1,2,true,true}, + {128,16,4,32,16,16,8,1,2,2,true,true}, + {64,16,8,32,16,16,16,1,1,2,true,true}, + {32,16,8,16,16,16,16,1,1,2,true,true} }; // END_CONV_Wmma_f16_gfx1150_DEFS // BEGIN_CONV_Wmma_i8_gfx1150_DEFS const InitParamsAccel PopulateParamsWmma::initParametersForwardI8ConvGfx1150[PopulateParamsWmma::nInitParametersForwardI8ConvGfx1150] = { - {128,32,4,32,32,16,1,2,2,true,true}, - {16,128,4,16,128,4,1,1,2,true,true}, - {256,32,2,32,32,16,1,1,2,true,true}, - {64,128,4,32,64,8,1,1,2,true,true}, - {256,64,2,64,64,16,1,2,2,true,true}, - {64,64,4,16,64,16,1,1,2,true,true}, - {128,256,2,128,64,8,1,1,2,true,true}, - {128,64,8,128,32,4,1,2,2,true,true}, - {64,64,8,16,64,16,1,1,2,true,true}, - {128,64,4,64,64,4,1,1,2,true,true}, - {128,64,8,64,16,16,1,1,2,true,true}, - {128,128,2,128,32,8,1,2,2,true,true}, - {256,64,8,64,64,8,1,1,2,true,true}, - {256,32,4,64,32,16,1,1,2,true,true}, - {256,32,8,16,32,16,1,1,2,true,true}, - {32,256,4,32,128,4,1,2,2,true,true}, - {64,128,2,32,64,16,1,2,2,true,true}, - {128,16,4,16,16,8,1,1,2,true,true}, - {16,16,8,16,16,8,1,1,2,true,true}, - {256,16,8,16,16,16,1,1,2,true,true}, - {256,32,8,128,32,16,1,1,2,true,true} + {128,32,4,32,32,16,16,1,2,2,true,true}, + {16,128,4,16,128,16,4,1,1,2,true,true}, + {256,32,2,32,32,16,16,1,1,2,true,true}, + {64,128,4,32,64,16,8,1,1,2,true,true}, + {256,64,2,64,64,16,16,1,2,2,true,true}, + {64,64,4,16,64,16,16,1,1,2,true,true}, + {128,256,2,128,64,16,8,1,1,2,true,true}, + {128,64,8,128,32,16,4,1,2,2,true,true}, + {64,64,8,16,64,16,16,1,1,2,true,true}, + {128,64,4,64,64,16,4,1,1,2,true,true}, + {128,64,8,64,16,16,16,1,1,2,true,true}, + {128,128,2,128,32,16,8,1,2,2,true,true}, + {256,64,8,64,64,16,8,1,1,2,true,true}, + {256,32,4,64,32,16,16,1,1,2,true,true}, + {256,32,8,16,32,16,16,1,1,2,true,true}, + {32,256,4,32,128,16,4,1,2,2,true,true}, + {64,128,2,32,64,16,16,1,2,2,true,true}, + {128,16,4,16,16,16,8,1,1,2,true,true}, + {16,16,8,16,16,16,8,1,1,2,true,true}, + {256,16,8,16,16,16,16,1,1,2,true,true}, + {256,32,8,128,32,16,16,1,1,2,true,true} }; // END_CONV_Wmma_i8_gfx1150_DEFS From 992918e9d3979fd28b96fea7fd61f6a62a1537dd Mon Sep 17 00:00:00 2001 From: Mirza Halilcevic Date: Thu, 6 Nov 2025 13:35:13 +0000 Subject: [PATCH 5/5] Fix tests. --- .../Dialect/Rock/affix_tuning_params.mlir | 28 +++++++++---------- .../noTransA-noTransB/broadcasted-k-e2e.mlir | 2 +- .../noTransA-transB/broadcasted-k-e2e.mlir | 2 +- .../transA-noTransB/gemm-k-e2e.mlir | 2 +- .../transA-noTransB/sliced-k-e2e.mlir | 2 +- .../transA-transB/gemm-k-e2e.mlir | 2 +- .../transA-transB/sliced-k-e2e.mlir | 2 +- .../fusion/tosa-to-rock-gemm-reshape-add.mlir | 2 +- mlir/test/fusion/tosa-to-rock-tp-add-tp.mlir | 2 +- mlir/test/fusion/tosa-to-rock-tp-add.mlir | 2 +- mlir/test/mlir-rock-lib/populate_bwd.mlir | 2 +- 11 files changed, 24 insertions(+), 24 deletions(-) diff --git a/mlir/test/Dialect/Rock/affix_tuning_params.mlir b/mlir/test/Dialect/Rock/affix_tuning_params.mlir index e51b7625c7b3..22db85f5ebfd 100644 --- a/mlir/test/Dialect/Rock/affix_tuning_params.mlir +++ b/mlir/test/Dialect/Rock/affix_tuning_params.mlir @@ -10,9 +10,9 @@ // GRID-LABEL: rock_conv func.func @rock_conv(%filter : memref<1x128x8x3x3xf32>, %input : memref<128x1x8x32x32xf32>, %output : memref<128x1x128x30x30xf32>) attributes {arch = "amdgcn-amd-amdhsa:gfx906"} { // CHECK: rock.conv - // CHECK-SAME: params = #rock.general_gemm_params + // CHECK-SAME: params = #rock.general_gemm_params // GRID: rock.gridwise_gemm - // GRID-SAME: gridSize = 900 + // GRID-SAME: gridSize = 1800 rock.conv(%filter, %input, %output) features = none { filter_layout = ["g", "k", "c", "0", "1"], input_layout = ["ni", "gi", "ci", "0i", "1i"], @@ -28,9 +28,9 @@ func.func @rock_conv(%filter : memref<1x128x8x3x3xf32>, %input : memref<128x1x8x // GRID-LABEL: rock_conv_schedulev2 func.func @rock_conv_schedulev2(%filter : memref<1x128x8x3x3xf32>, %input : memref<128x1x8x32x32xf32>, %output : memref<128x1x128x30x30xf32>) attributes {schedule_version = #rock.schedule_version<2>, arch = "amdgcn-amd-amdhsa:gfx906"} { // CHECK: rock.conv - // CHECK-SAME: params = #rock.general_gemm_params + // CHECK-SAME: params = #rock.general_gemm_params // GRID: rock.gridwise_gemm - // GRID-SAME: gridSize = 900 + // GRID-SAME: gridSize = 1800 rock.conv(%filter, %input, %output) features = none { filter_layout = ["g", "k", "c", "0", "1"], input_layout = ["ni", "gi", "ci", "0i", "1i"], @@ -46,9 +46,9 @@ func.func @rock_conv_schedulev2(%filter : memref<1x128x8x3x3xf32>, %input : memr // GRID-LABEL: func.func @rock_conv_f16 func.func @rock_conv_f16(%filter : memref<1x128x8x3x3xf16>, %input : memref<128x1x8x32x32xf16>, %output : memref<128x1x128x30x30xf16>) attributes {arch = "amdgcn-amd-amdhsa:gfx906"} { // CHECK: rock.conv - // CHECK-SAME: params = #rock.general_gemm_params + // CHECK-SAME: params = #rock.general_gemm_params // GRID: rock.gridwise_gemm - // GRID-SAME: gridSize = 900 + // GRID-SAME: gridSize = 1800 rock.conv(%filter, %input, %output) features = none { filter_layout = ["g", "k", "c", "0", "1"], input_layout = ["ni", "gi", "ci", "0i", "1i"], @@ -125,7 +125,7 @@ func.func @rock_conv_bwd_data_f16(%filter: memref<1x1024x1024x1x1xf16>, %input: // GRID-LABEL: func.func @rock_conv_bwd_data_padMN func.func @rock_conv_bwd_data_padMN(%filter : memref<1x64x3x1x1xf32>, %input : memref<11x1x3x15x15xf32>, %output : memref<11x1x64x15x15xf32>) attributes {arch = "amdgcn-amd-amdhsa:gfx906"} { // CHECK: rock.conv_bwd_data - // CHECK-SAME: params = #rock.general_gemm_params + // CHECK-SAME: params = #rock.general_gemm_params // GRID: rock.gridwise_gemm // GRID-SAME: gridSize = 39 rock.conv_bwd_data(%filter, %input, %output) features = none { @@ -145,9 +145,9 @@ func.func @rock_conv_bwd_data_padMN(%filter : memref<1x64x3x1x1xf32>, %input : m // GRID-LABEL: @rock_conv_bwd_data_padMK func.func @rock_conv_bwd_data_padMK(%filter : memref<1x11x3x1x1xf32>, %input : memref<128x1x3x15x15xf32>, %output : memref<128x1x11x15x15xf32>) attributes {arch = "amdgcn-amd-amdhsa:gfx906"} { // CHECK: rock.conv_bwd_data - // CHECK-SAME: params = #rock.general_gemm_params + // CHECK-SAME: params = #rock.general_gemm_params // GRID: rock.gridwise_gemm - // GRID-SAME: gridSize = 450 + // GRID-SAME: gridSize = 225 rock.conv_bwd_data(%filter, %input, %output) features = none { filter_layout = ["g", "k", "c", "0", "1"], input_layout = ["ni", "gi", "ci", "0i", "1i"], @@ -165,7 +165,7 @@ func.func @rock_conv_bwd_data_padMK(%filter : memref<1x11x3x1x1xf32>, %input : m // GRID-LABEL: @rock_conv_bwd_weight func.func @rock_conv_bwd_weight(%filter : memref<1x128x8x3x3xf32>, %input : memref<128x1x8x32x32xf32>, %output : memref<128x1x128x30x30xf32>) attributes {arch = "amdgcn-amd-amdhsa:gfx906", numCU = 64 : i32} { // CHECK: rock.conv_bwd_weight - // CHECK-SAME: params = #rock.general_gemm_params + // CHECK-SAME: params = #rock.general_gemm_params // GRID: rock.gridwise_gemm // GRID-SAME: gridSize = 6 rock.conv_bwd_weight(%filter, %input, %output) features = none { @@ -183,7 +183,7 @@ func.func @rock_conv_bwd_weight(%filter : memref<1x128x8x3x3xf32>, %input : memr // GRID-LABEL: @rock_conv_bwd_weight_f16 func.func @rock_conv_bwd_weight_f16(%filter : memref<1x128x8x3x3xf16>, %input : memref<128x1x8x32x32xf16>, %output : memref<128x1x128x30x30xf16>) attributes {arch = "amdgcn-amd-amdhsa:gfx906", numCU = 64 : i32} { // CHECK: rock.conv_bwd_weight - // CHECK-SAME: params = #rock.general_gemm_params + // CHECK-SAME: params = #rock.general_gemm_params // GRID: rock.gridwise_gemm // GRID-SAME: gridSize = 6 rock.conv_bwd_weight(%filter, %input, %output) features = none { @@ -201,7 +201,7 @@ func.func @rock_conv_bwd_weight_f16(%filter : memref<1x128x8x3x3xf16>, %input : // GRID-LABEL: func.func @rock_conv_bwd_weight_padALL func.func @rock_conv_bwd_weight_padALL(%filter : memref<1x20x8x3x3xf32>, %input : memref<7x1x8x32x32xf32>, %output : memref<7x1x20x30x30xf32>) attributes {arch = "amdgcn-amd-amdhsa:gfx906", numCU = 64 : i32} { // CHECK: rock.conv_bwd_weight - // CHECK-SAME: params = #rock.general_gemm_params + // CHECK-SAME: params = #rock.general_gemm_params // GRID: rock.gridwise_gemm // GRID-SAME: gridSize = 3 rock.conv_bwd_weight(%filter, %input, %output) features = none { @@ -219,7 +219,7 @@ func.func @rock_conv_bwd_weight_padALL(%filter : memref<1x20x8x3x3xf32>, %input // GRID-LABEL: @rock_conv_bwd_weight_padALL_f16 func.func @rock_conv_bwd_weight_padALL_f16(%filter : memref<1x20x8x3x3xf16>, %input : memref<7x1x8x32x32xf16>, %output : memref<7x1x20x30x30xf16>) attributes {arch = "amdgcn-amd-amdhsa:gfx906", numCU = 64 : i32} { // CHECK: rock.conv_bwd_weight - // CHECK-SAME: params = #rock.general_gemm_params + // CHECK-SAME: params = #rock.general_gemm_params // GRID: rock.gridwise_gemm // GRID-SAME: gridSize = 3 rock.conv_bwd_weight(%filter, %input, %output) features = none { @@ -340,7 +340,7 @@ func.func @rock_conv_bwd_data_7x7(%arg0: memref<1x64x3x7x7xf32>, %arg1: memref<2 // GRID-LABEL: @rock_gemm_from_conv func.func @rock_gemm_from_conv(%a : memref<1x72x128xf32>, %b : memref<1x72x115200xf32>, %c : memref<1x128x115200xf32>) attributes {arch = "amdgcn-amd-amdhsa:gfx906", numCU = 64 : i32} { // CHECK: rock.gemm - // CHECK-SAME: params = #rock.general_gemm_params + // CHECK-SAME: params = #rock.general_gemm_params // GRID: rock.gridwise_gemm // GRID-SAME: gridSize = 900 rock.gemm %c = tr %a * %b features = none storeMethod = set diff --git a/mlir/test/fusion/pr-e2e/gemm-layouts/noTransA-noTransB/broadcasted-k-e2e.mlir b/mlir/test/fusion/pr-e2e/gemm-layouts/noTransA-noTransB/broadcasted-k-e2e.mlir index 8ba76962c546..92be2f1994b5 100644 --- a/mlir/test/fusion/pr-e2e/gemm-layouts/noTransA-noTransB/broadcasted-k-e2e.mlir +++ b/mlir/test/fusion/pr-e2e/gemm-layouts/noTransA-noTransB/broadcasted-k-e2e.mlir @@ -7,7 +7,7 @@ // EMITKEY: -t f16 -out_datatype f16 -transA false -transB false -g 2 -m 4096 -n 640 -k 320 // VECTORIZATION: aVectorDim: GemmDimension::MorN -// VECTORIZATION-NEXT: aVectorLen: 2 +// VECTORIZATION-NEXT: aVectorLen: 4 // VECTORIZATION: bVectorDim: GemmDimension::MorN // VECTORIZATION-NEXT: bVectorLen: 2 diff --git a/mlir/test/fusion/pr-e2e/gemm-layouts/noTransA-transB/broadcasted-k-e2e.mlir b/mlir/test/fusion/pr-e2e/gemm-layouts/noTransA-transB/broadcasted-k-e2e.mlir index 778dbac1e10a..853c3779e9af 100644 --- a/mlir/test/fusion/pr-e2e/gemm-layouts/noTransA-transB/broadcasted-k-e2e.mlir +++ b/mlir/test/fusion/pr-e2e/gemm-layouts/noTransA-transB/broadcasted-k-e2e.mlir @@ -7,7 +7,7 @@ // EMITKEY: -t f16 -out_datatype f16 -transA false -transB true -g 2 -m 4096 -n 640 -k 320 // VECTORIZATION: aVectorDim: GemmDimension::MorN -// VECTORIZATION-NEXT: aVectorLen: 2 +// VECTORIZATION-NEXT: aVectorLen: 4 // VECTORIZATION: bVectorDim: GemmDimension::K // VECTORIZATION-NEXT: bVectorLen: 8 diff --git a/mlir/test/fusion/pr-e2e/gemm-layouts/transA-noTransB/gemm-k-e2e.mlir b/mlir/test/fusion/pr-e2e/gemm-layouts/transA-noTransB/gemm-k-e2e.mlir index 6164549e560a..1c1f9e26dc68 100644 --- a/mlir/test/fusion/pr-e2e/gemm-layouts/transA-noTransB/gemm-k-e2e.mlir +++ b/mlir/test/fusion/pr-e2e/gemm-layouts/transA-noTransB/gemm-k-e2e.mlir @@ -6,7 +6,7 @@ // EMITKEY: -t f16 -out_datatype f16 -transA true -transB false -g 2 -m 4096 -n 640 -k 320 // VECTORIZATION: aVectorDim: GemmDimension::MorN -// VECTORIZATION-NEXT: aVectorLen: 2 +// VECTORIZATION-NEXT: aVectorLen: 4 // VECTORIZATION: bVectorDim: GemmDimension::MorN // VECTORIZATION-NEXT: bVectorLen: 2 diff --git a/mlir/test/fusion/pr-e2e/gemm-layouts/transA-noTransB/sliced-k-e2e.mlir b/mlir/test/fusion/pr-e2e/gemm-layouts/transA-noTransB/sliced-k-e2e.mlir index b35867a25f25..9a89a23dfe17 100644 --- a/mlir/test/fusion/pr-e2e/gemm-layouts/transA-noTransB/sliced-k-e2e.mlir +++ b/mlir/test/fusion/pr-e2e/gemm-layouts/transA-noTransB/sliced-k-e2e.mlir @@ -7,7 +7,7 @@ // EMITKEY: -t f16 -out_datatype f16 -transA true -transB false -g 2 -m 4096 -n 640 -k 320 // VECTORIZATION: aVectorDim: GemmDimension::MorN -// VECTORIZATION-NEXT: aVectorLen: 2 +// VECTORIZATION-NEXT: aVectorLen: 4 // VECTORIZATION: bVectorDim: GemmDimension::MorN // VECTORIZATION-NEXT: bVectorLen: 2 diff --git a/mlir/test/fusion/pr-e2e/gemm-layouts/transA-transB/gemm-k-e2e.mlir b/mlir/test/fusion/pr-e2e/gemm-layouts/transA-transB/gemm-k-e2e.mlir index 62dcc2df3117..5753af9c6330 100644 --- a/mlir/test/fusion/pr-e2e/gemm-layouts/transA-transB/gemm-k-e2e.mlir +++ b/mlir/test/fusion/pr-e2e/gemm-layouts/transA-transB/gemm-k-e2e.mlir @@ -7,7 +7,7 @@ // EMITKEY: -t f16 -out_datatype f16 -transA true -transB true -g 2 -m 4096 -n 640 -k 320 // VECTORIZATION: aVectorDim: GemmDimension::MorN -// VECTORIZATION-NEXT: aVectorLen: 2 +// VECTORIZATION-NEXT: aVectorLen: 4 // VECTORIZATION: bVectorDim: GemmDimension::K // VECTORIZATION-NEXT: bVectorLen: 8 diff --git a/mlir/test/fusion/pr-e2e/gemm-layouts/transA-transB/sliced-k-e2e.mlir b/mlir/test/fusion/pr-e2e/gemm-layouts/transA-transB/sliced-k-e2e.mlir index 3f95dfc8caae..b19e6a1ec7df 100644 --- a/mlir/test/fusion/pr-e2e/gemm-layouts/transA-transB/sliced-k-e2e.mlir +++ b/mlir/test/fusion/pr-e2e/gemm-layouts/transA-transB/sliced-k-e2e.mlir @@ -7,7 +7,7 @@ // EMITKEY: -t f16 -out_datatype f16 -transA true -transB true -g 2 -m 4096 -n 640 -k 320 // VECTORIZATION: aVectorDim: GemmDimension::MorN -// VECTORIZATION-NEXT: aVectorLen: 2 +// VECTORIZATION-NEXT: aVectorLen: 4 // VECTORIZATION: bVectorDim: GemmDimension::K // VECTORIZATION-NEXT: bVectorLen: 8 diff --git a/mlir/test/fusion/tosa-to-rock-gemm-reshape-add.mlir b/mlir/test/fusion/tosa-to-rock-gemm-reshape-add.mlir index 1bcb3ca28b9e..dc7d6ae3f8cc 100644 --- a/mlir/test/fusion/tosa-to-rock-gemm-reshape-add.mlir +++ b/mlir/test/fusion/tosa-to-rock-gemm-reshape-add.mlir @@ -7,7 +7,7 @@ // CHECK_LINALG_ALIGN-COUNT-2: rock.threadwise_read_into {{.*}} // CHECK_LINALG_ALIGN: rock.threadwise_read_into {{.*}} -> [[lain:%.*]] : -// CHECK_LINALG_ALIGN: linalg.generic{{.*}} ins({{.*}}, [[lain]] :{{.*}}) outs(%[[outBuf:.*]] : memref<32xf32, #gpu.address_space>) +// CHECK_LINALG_ALIGN: linalg.generic{{.*}} ins({{.*}}, [[lain]] :{{.*}}) outs(%[[outBuf:.*]] : memref<8xf32, #gpu.address_space>) // CHECK_LINALG_ALIGN: rock.threadwise_write_all {{.*}} %[[outBuf]] -> // to test reshape is converted as transform and fused. diff --git a/mlir/test/fusion/tosa-to-rock-tp-add-tp.mlir b/mlir/test/fusion/tosa-to-rock-tp-add-tp.mlir index f7bea8820bf1..bcad807d5d71 100644 --- a/mlir/test/fusion/tosa-to-rock-tp-add-tp.mlir +++ b/mlir/test/fusion/tosa-to-rock-tp-add-tp.mlir @@ -3,7 +3,7 @@ // CHECK-DAG: #[[MAP2:.*]] = #rock.transform_map<{{.*}} by [ ["dim0", "dim2", "dim3", "dim1"] at [0, 2, 3, 1]>] bounds = [256, 28, 28, 64] -> [256, 64, 28, 28]> // CHECK-COUNT-2: rock.threadwise_read_into {{.*}} // CHECK: rock.threadwise_read_into {{.*}} -> [[lain:%.*]] : -// CHECK: linalg.generic{{.*}} ins({{.*}}, [[lain]] :{{.*}}) outs(%[[outBuf:.*]] : memref<32xf32, #gpu.address_space>) +// CHECK: linalg.generic{{.*}} ins({{.*}}, [[lain]] :{{.*}}) outs(%[[outBuf:.*]] : memref<128xf32, #gpu.address_space>) // CHECK: rock.threadwise_write_all {{.*}} %[[outBuf]] -> // to test transpose is converted as transform and fused. diff --git a/mlir/test/fusion/tosa-to-rock-tp-add.mlir b/mlir/test/fusion/tosa-to-rock-tp-add.mlir index d6a1ac707407..71b5eaaaad52 100644 --- a/mlir/test/fusion/tosa-to-rock-tp-add.mlir +++ b/mlir/test/fusion/tosa-to-rock-tp-add.mlir @@ -3,7 +3,7 @@ // CHECK-DAG: #[[MAP2:.*]] = #rock.transform_map<#map{{.*}} by [ ["{{.*}}", "{{.*}}", "{{.*}}", "{{.*}}"] at [0, 2, 3, 1]>] bounds = [256, 28, 28, 64] -> [256, 64, 28, 28]> // CHECK-COUNT-2: rock.threadwise_read_into {{.*}} // CHECK: rock.threadwise_read_into {{.*}} -> [[lain:%.*]] : -// CHECK: linalg.generic{{.*}} ins({{.*}}, [[lain]] :{{.*}}) outs(%[[outBuf:.*]] : memref<32xf32, #gpu.address_space>) +// CHECK: linalg.generic{{.*}} ins({{.*}}, [[lain]] :{{.*}}) outs(%[[outBuf:.*]] : memref<128xf32, #gpu.address_space>) // CHECK: rock.threadwise_write_all {{.*}} %[[outBuf]] -> // to test transpose is converted as transform and fused. diff --git a/mlir/test/mlir-rock-lib/populate_bwd.mlir b/mlir/test/mlir-rock-lib/populate_bwd.mlir index 5ad9273d11b0..00575052e512 100644 --- a/mlir/test/mlir-rock-lib/populate_bwd.mlir +++ b/mlir/test/mlir-rock-lib/populate_bwd.mlir @@ -55,7 +55,7 @@ // ZEROINIT_KERNELCOUNT: Kernel count=1 // ZEROINIT_BIN: ELF -// ZEROINIT_TUNING: globalSize=100352, localSize=128 +// ZEROINIT_TUNING: globalSize=100352, localSize=64 // ZEROINIT_DRIVER: %arg1: memref<{{.*}}xf16> {rock.prefill = 0.000000e+00 : f16} // ZEROINIT_DRIVER-COUNT-3: rock.transform %{{.+}} by // ZEROINIT_DRIVER-NEXT: rock.conv_bwd_data(%{{.+}}, %{{.+}}, %{{.+}}) features = dot {dilations = [1 : index, 1 : index], filter_layout = ["g", "k", "c", "0", "1"], input_layout = ["ni", "gi", "ci", "0i", "1i"], kernelId = 0 : index, output_layout = ["no", "go", "ko", "0o", "1o"], padding = [0 : index, 0 : index, 0 : index, 0 : index], strides = [2 : index, 2 : index], usesV4R1 = true} : memref<1x2048x1024x1x1xf16>, memref<256x1x1024x14x14xf16>, memref<256x1x2048x7x7xf16>