Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
797 changes: 628 additions & 169 deletions mlir/include/mlir/Dialect/Rock/Tuning/QuickTuningPerfconfigs.inc

Large diffs are not rendered by default.

28 changes: 14 additions & 14 deletions mlir/test/Dialect/Rock/affix_tuning_params.mlir
Original file line number Diff line number Diff line change
Expand Up @@ -10,9 +10,9 @@
// GRID-LABEL: rock_conv
func.func @rock_conv(%filter : memref<1x128x8x3x3xf32>, %input : memref<128x1x8x32x32xf32>, %output : memref<128x1x128x30x30xf32>) attributes {arch = "amdgcn-amd-amdhsa:gfx906"} {
// CHECK: rock.conv
// CHECK-SAME: params = #rock.general_gemm_params<blockSize = 128, kPerBlock = 8, mPerBlock = 128, nPerBlock = 128, kPerThread = 1, mPerThread = 2, nPerThread = 2, kpack = 1, splitKFactor = 1, scheduleVersion = 1, outputSwizzle = 2>
// CHECK-SAME: params = #rock.general_gemm_params<blockSize = 64, kPerBlock = 4, mPerBlock = 64, nPerBlock = 128, kPerThread = 1, mPerThread = 2, nPerThread = 4, kpack = 1, splitKFactor = 1, scheduleVersion = 1, outputSwizzle = 2>
// GRID: rock.gridwise_gemm
// GRID-SAME: gridSize = 900
// GRID-SAME: gridSize = 1800
rock.conv(%filter, %input, %output) features = none {
filter_layout = ["g", "k", "c", "0", "1"],
input_layout = ["ni", "gi", "ci", "0i", "1i"],
Expand All @@ -28,9 +28,9 @@ func.func @rock_conv(%filter : memref<1x128x8x3x3xf32>, %input : memref<128x1x8x
// GRID-LABEL: rock_conv_schedulev2
func.func @rock_conv_schedulev2(%filter : memref<1x128x8x3x3xf32>, %input : memref<128x1x8x32x32xf32>, %output : memref<128x1x128x30x30xf32>) attributes {schedule_version = #rock.schedule_version<2>, arch = "amdgcn-amd-amdhsa:gfx906"} {
// CHECK: rock.conv
// CHECK-SAME: params = #rock.general_gemm_params<blockSize = 128, kPerBlock = 8, mPerBlock = 128, nPerBlock = 128, kPerThread = 1, mPerThread = 2, nPerThread = 2, kpack = 1, splitKFactor = 1, scheduleVersion = 2, outputSwizzle = 2>
// CHECK-SAME: params = #rock.general_gemm_params<blockSize = 64, kPerBlock = 4, mPerBlock = 64, nPerBlock = 128, kPerThread = 1, mPerThread = 2, nPerThread = 4, kpack = 1, splitKFactor = 1, scheduleVersion = 2, outputSwizzle = 2>
// GRID: rock.gridwise_gemm
// GRID-SAME: gridSize = 900
// GRID-SAME: gridSize = 1800
rock.conv(%filter, %input, %output) features = none {
filter_layout = ["g", "k", "c", "0", "1"],
input_layout = ["ni", "gi", "ci", "0i", "1i"],
Expand All @@ -46,9 +46,9 @@ func.func @rock_conv_schedulev2(%filter : memref<1x128x8x3x3xf32>, %input : memr
// GRID-LABEL: func.func @rock_conv_f16
func.func @rock_conv_f16(%filter : memref<1x128x8x3x3xf16>, %input : memref<128x1x8x32x32xf16>, %output : memref<128x1x128x30x30xf16>) attributes {arch = "amdgcn-amd-amdhsa:gfx906"} {
// CHECK: rock.conv
// CHECK-SAME: params = #rock.general_gemm_params<blockSize = 128, kPerBlock = 8, mPerBlock = 128, nPerBlock = 128, kPerThread = 1, mPerThread = 2, nPerThread = 2, kpack = 1, splitKFactor = 1, scheduleVersion = 1, outputSwizzle = 2>
// CHECK-SAME: params = #rock.general_gemm_params<blockSize = 64, kPerBlock = 4, mPerBlock = 64, nPerBlock = 128, kPerThread = 1, mPerThread = 2, nPerThread = 4, kpack = 1, splitKFactor = 1, scheduleVersion = 1, outputSwizzle = 2>
// GRID: rock.gridwise_gemm
// GRID-SAME: gridSize = 900
// GRID-SAME: gridSize = 1800
rock.conv(%filter, %input, %output) features = none {
filter_layout = ["g", "k", "c", "0", "1"],
input_layout = ["ni", "gi", "ci", "0i", "1i"],
Expand Down Expand Up @@ -125,7 +125,7 @@ func.func @rock_conv_bwd_data_f16(%filter: memref<1x1024x1024x1x1xf16>, %input:
// GRID-LABEL: func.func @rock_conv_bwd_data_padMN
func.func @rock_conv_bwd_data_padMN(%filter : memref<1x64x3x1x1xf32>, %input : memref<11x1x3x15x15xf32>, %output : memref<11x1x64x15x15xf32>) attributes {arch = "amdgcn-amd-amdhsa:gfx906"} {
// CHECK: rock.conv_bwd_data
// CHECK-SAME: params = #rock.general_gemm_params<blockSize = 64, kPerBlock = 16, mPerBlock = 32, nPerBlock = 64, kPerThread = 1, mPerThread = 2, nPerThread = 2, kpack = 1, splitKFactor = 1, scheduleVersion = 1, outputSwizzle = 2>
// CHECK-SAME: params = #rock.general_gemm_params<blockSize = 64, kPerBlock = 8, mPerBlock = 32, nPerBlock = 64, kPerThread = 1, mPerThread = 2, nPerThread = 4, kpack = 1, splitKFactor = 1, scheduleVersion = 1, outputSwizzle = 2>
// GRID: rock.gridwise_gemm
// GRID-SAME: gridSize = 39
rock.conv_bwd_data(%filter, %input, %output) features = none {
Expand All @@ -145,9 +145,9 @@ func.func @rock_conv_bwd_data_padMN(%filter : memref<1x64x3x1x1xf32>, %input : m
// GRID-LABEL: @rock_conv_bwd_data_padMK
func.func @rock_conv_bwd_data_padMK(%filter : memref<1x11x3x1x1xf32>, %input : memref<128x1x3x15x15xf32>, %output : memref<128x1x11x15x15xf32>) attributes {arch = "amdgcn-amd-amdhsa:gfx906"} {
// CHECK: rock.conv_bwd_data
// CHECK-SAME: params = #rock.general_gemm_params<blockSize = 64, kPerBlock = 4, mPerBlock = 32, nPerBlock = 64, kPerThread = 1, mPerThread = 2, nPerThread = 2, kpack = 1, splitKFactor = 1, scheduleVersion = 1, outputSwizzle = 2>
// CHECK-SAME: params = #rock.general_gemm_params<blockSize = 64, kPerBlock = 4, mPerBlock = 32, nPerBlock = 128, kPerThread = 1, mPerThread = 2, nPerThread = 2, kpack = 1, splitKFactor = 1, scheduleVersion = 1, outputSwizzle = 2>
// GRID: rock.gridwise_gemm
// GRID-SAME: gridSize = 450
// GRID-SAME: gridSize = 225
rock.conv_bwd_data(%filter, %input, %output) features = none {
filter_layout = ["g", "k", "c", "0", "1"],
input_layout = ["ni", "gi", "ci", "0i", "1i"],
Expand All @@ -165,7 +165,7 @@ func.func @rock_conv_bwd_data_padMK(%filter : memref<1x11x3x1x1xf32>, %input : m
// GRID-LABEL: @rock_conv_bwd_weight
func.func @rock_conv_bwd_weight(%filter : memref<1x128x8x3x3xf32>, %input : memref<128x1x8x32x32xf32>, %output : memref<128x1x128x30x30xf32>) attributes {arch = "amdgcn-amd-amdhsa:gfx906", numCU = 64 : i32} {
// CHECK: rock.conv_bwd_weight
// CHECK-SAME: params = #rock.general_gemm_params<blockSize = 64, kPerBlock = 16, mPerBlock = 64, nPerBlock = 32, kPerThread = 1, mPerThread = 2, nPerThread = 2, kpack = 1, splitKFactor = 1, scheduleVersion = 1, outputSwizzle = 2>
// CHECK-SAME: params = #rock.general_gemm_params<blockSize = 64, kPerBlock = 8, mPerBlock = 64, nPerBlock = 32, kPerThread = 1, mPerThread = 2, nPerThread = 2, kpack = 1, splitKFactor = 1, scheduleVersion = 1, outputSwizzle = 2>
// GRID: rock.gridwise_gemm
// GRID-SAME: gridSize = 6
rock.conv_bwd_weight(%filter, %input, %output) features = none {
Expand All @@ -183,7 +183,7 @@ func.func @rock_conv_bwd_weight(%filter : memref<1x128x8x3x3xf32>, %input : memr
// GRID-LABEL: @rock_conv_bwd_weight_f16
func.func @rock_conv_bwd_weight_f16(%filter : memref<1x128x8x3x3xf16>, %input : memref<128x1x8x32x32xf16>, %output : memref<128x1x128x30x30xf16>) attributes {arch = "amdgcn-amd-amdhsa:gfx906", numCU = 64 : i32} {
// CHECK: rock.conv_bwd_weight
// CHECK-SAME: params = #rock.general_gemm_params<blockSize = 64, kPerBlock = 16, mPerBlock = 64, nPerBlock = 32, kPerThread = 1, mPerThread = 2, nPerThread = 2, kpack = 1, splitKFactor = 1, scheduleVersion = 1, outputSwizzle = 2>
// CHECK-SAME: params = #rock.general_gemm_params<blockSize = 64, kPerBlock = 8, mPerBlock = 64, nPerBlock = 32, kPerThread = 1, mPerThread = 2, nPerThread = 2, kpack = 1, splitKFactor = 1, scheduleVersion = 1, outputSwizzle = 2>
// GRID: rock.gridwise_gemm
// GRID-SAME: gridSize = 6
rock.conv_bwd_weight(%filter, %input, %output) features = none {
Expand All @@ -201,7 +201,7 @@ func.func @rock_conv_bwd_weight_f16(%filter : memref<1x128x8x3x3xf16>, %input :
// GRID-LABEL: func.func @rock_conv_bwd_weight_padALL
func.func @rock_conv_bwd_weight_padALL(%filter : memref<1x20x8x3x3xf32>, %input : memref<7x1x8x32x32xf32>, %output : memref<7x1x20x30x30xf32>) attributes {arch = "amdgcn-amd-amdhsa:gfx906", numCU = 64 : i32} {
// CHECK: rock.conv_bwd_weight
// CHECK-SAME: params = #rock.general_gemm_params<blockSize = 64, kPerBlock = 16, mPerBlock = 32, nPerBlock = 32, kPerThread = 1, mPerThread = 2, nPerThread = 2, kpack = 1, splitKFactor = 1, scheduleVersion = 1, outputSwizzle = 2>
// CHECK-SAME: params = #rock.general_gemm_params<blockSize = 128, kPerBlock = 16, mPerBlock = 32, nPerBlock = 32, kPerThread = 1, mPerThread = 2, nPerThread = 4, kpack = 1, splitKFactor = 1, scheduleVersion = 1, outputSwizzle = 2>
// GRID: rock.gridwise_gemm
// GRID-SAME: gridSize = 3
rock.conv_bwd_weight(%filter, %input, %output) features = none {
Expand All @@ -219,7 +219,7 @@ func.func @rock_conv_bwd_weight_padALL(%filter : memref<1x20x8x3x3xf32>, %input
// GRID-LABEL: @rock_conv_bwd_weight_padALL_f16
func.func @rock_conv_bwd_weight_padALL_f16(%filter : memref<1x20x8x3x3xf16>, %input : memref<7x1x8x32x32xf16>, %output : memref<7x1x20x30x30xf16>) attributes {arch = "amdgcn-amd-amdhsa:gfx906", numCU = 64 : i32} {
// CHECK: rock.conv_bwd_weight
// CHECK-SAME: params = #rock.general_gemm_params<blockSize = 64, kPerBlock = 16, mPerBlock = 32, nPerBlock = 32, kPerThread = 1, mPerThread = 2, nPerThread = 2, kpack = 1, splitKFactor = 1, scheduleVersion = 1, outputSwizzle = 2>
// CHECK-SAME: params = #rock.general_gemm_params<blockSize = 128, kPerBlock = 16, mPerBlock = 32, nPerBlock = 32, kPerThread = 1, mPerThread = 2, nPerThread = 4, kpack = 1, splitKFactor = 1, scheduleVersion = 1, outputSwizzle = 2>
// GRID: rock.gridwise_gemm
// GRID-SAME: gridSize = 3
rock.conv_bwd_weight(%filter, %input, %output) features = none {
Expand Down Expand Up @@ -340,7 +340,7 @@ func.func @rock_conv_bwd_data_7x7(%arg0: memref<1x64x3x7x7xf32>, %arg1: memref<2
// GRID-LABEL: @rock_gemm_from_conv
func.func @rock_gemm_from_conv(%a : memref<1x72x128xf32>, %b : memref<1x72x115200xf32>, %c : memref<1x128x115200xf32>) attributes {arch = "amdgcn-amd-amdhsa:gfx906", numCU = 64 : i32} {
// CHECK: rock.gemm
// CHECK-SAME: params = #rock.general_gemm_params<blockSize = 128, kPerBlock = 8, mPerBlock = 128, nPerBlock = 128, kPerThread = 1, mPerThread = 4, nPerThread = 4, kpack = 1, splitKFactor = 1, scheduleVersion = 1, outputSwizzle = 2>
// CHECK-SAME: params = #rock.general_gemm_params<blockSize = 128, kPerBlock = 4, mPerBlock = 128, nPerBlock = 128, kPerThread = 1, mPerThread = 4, nPerThread = 2, kpack = 1, splitKFactor = 1, scheduleVersion = 1, outputSwizzle = 2>
// GRID: rock.gridwise_gemm
// GRID-SAME: gridSize = 900
rock.gemm %c = tr %a * %b features = none storeMethod = set
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
// EMITKEY: -t f16 -out_datatype f16 -transA false -transB false -g 2 -m 4096 -n 640 -k 320

// VECTORIZATION: aVectorDim: GemmDimension::MorN
// VECTORIZATION-NEXT: aVectorLen: 2
// VECTORIZATION-NEXT: aVectorLen: 4
// VECTORIZATION: bVectorDim: GemmDimension::MorN
// VECTORIZATION-NEXT: bVectorLen: 2

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
// EMITKEY: -t f16 -out_datatype f16 -transA false -transB true -g 2 -m 4096 -n 640 -k 320

// VECTORIZATION: aVectorDim: GemmDimension::MorN
// VECTORIZATION-NEXT: aVectorLen: 2
// VECTORIZATION-NEXT: aVectorLen: 4
// VECTORIZATION: bVectorDim: GemmDimension::K
// VECTORIZATION-NEXT: bVectorLen: 8

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
// EMITKEY: -t f16 -out_datatype f16 -transA true -transB false -g 2 -m 4096 -n 640 -k 320

// VECTORIZATION: aVectorDim: GemmDimension::MorN
// VECTORIZATION-NEXT: aVectorLen: 2
// VECTORIZATION-NEXT: aVectorLen: 4
// VECTORIZATION: bVectorDim: GemmDimension::MorN
// VECTORIZATION-NEXT: bVectorLen: 2

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
// EMITKEY: -t f16 -out_datatype f16 -transA true -transB false -g 2 -m 4096 -n 640 -k 320

// VECTORIZATION: aVectorDim: GemmDimension::MorN
// VECTORIZATION-NEXT: aVectorLen: 2
// VECTORIZATION-NEXT: aVectorLen: 4
// VECTORIZATION: bVectorDim: GemmDimension::MorN
// VECTORIZATION-NEXT: bVectorLen: 2

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
// EMITKEY: -t f16 -out_datatype f16 -transA true -transB true -g 2 -m 4096 -n 640 -k 320

// VECTORIZATION: aVectorDim: GemmDimension::MorN
// VECTORIZATION-NEXT: aVectorLen: 2
// VECTORIZATION-NEXT: aVectorLen: 4
// VECTORIZATION: bVectorDim: GemmDimension::K
// VECTORIZATION-NEXT: bVectorLen: 8

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
// EMITKEY: -t f16 -out_datatype f16 -transA true -transB true -g 2 -m 4096 -n 640 -k 320

// VECTORIZATION: aVectorDim: GemmDimension::MorN
// VECTORIZATION-NEXT: aVectorLen: 2
// VECTORIZATION-NEXT: aVectorLen: 4
// VECTORIZATION: bVectorDim: GemmDimension::K
// VECTORIZATION-NEXT: bVectorLen: 8

Expand Down
2 changes: 1 addition & 1 deletion mlir/test/fusion/tosa-to-rock-gemm-reshape-add.mlir
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@

// CHECK_LINALG_ALIGN-COUNT-2: rock.threadwise_read_into {{.*}}
// CHECK_LINALG_ALIGN: rock.threadwise_read_into {{.*}} -> [[lain:%.*]] :
// CHECK_LINALG_ALIGN: linalg.generic{{.*}} ins({{.*}}, [[lain]] :{{.*}}) outs(%[[outBuf:.*]] : memref<32xf32, #gpu.address_space<private>>)
// CHECK_LINALG_ALIGN: linalg.generic{{.*}} ins({{.*}}, [[lain]] :{{.*}}) outs(%[[outBuf:.*]] : memref<8xf32, #gpu.address_space<private>>)
// CHECK_LINALG_ALIGN: rock.threadwise_write_all {{.*}} %[[outBuf]] ->
// to test reshape is converted as transform and fused.

Expand Down
2 changes: 1 addition & 1 deletion mlir/test/fusion/tosa-to-rock-tp-add-tp.mlir
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
// CHECK-DAG: #[[MAP2:.*]] = #rock.transform_map<{{.*}} by [<PassThrough ["dim0", "dim2", "dim3", "dim1"] at [0, 1, 2, 3] -> ["dim0", "dim2", "dim3", "dim1"] at [0, 2, 3, 1]>] bounds = [256, 28, 28, 64] -> [256, 64, 28, 28]>
// CHECK-COUNT-2: rock.threadwise_read_into {{.*}}
// CHECK: rock.threadwise_read_into {{.*}} -> [[lain:%.*]] :
// CHECK: linalg.generic{{.*}} ins({{.*}}, [[lain]] :{{.*}}) outs(%[[outBuf:.*]] : memref<32xf32, #gpu.address_space<private>>)
// CHECK: linalg.generic{{.*}} ins({{.*}}, [[lain]] :{{.*}}) outs(%[[outBuf:.*]] : memref<128xf32, #gpu.address_space<private>>)
// CHECK: rock.threadwise_write_all {{.*}} %[[outBuf]] ->
// to test transpose is converted as transform and fused.

Expand Down
2 changes: 1 addition & 1 deletion mlir/test/fusion/tosa-to-rock-tp-add.mlir
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
// CHECK-DAG: #[[MAP2:.*]] = #rock.transform_map<#map{{.*}} by [<PassThrough ["{{.*}}", "{{.*}}", "{{.*}}", "{{.*}}"] at [0, 1, 2, 3] -> ["{{.*}}", "{{.*}}", "{{.*}}", "{{.*}}"] at [0, 2, 3, 1]>] bounds = [256, 28, 28, 64] -> [256, 64, 28, 28]>
// CHECK-COUNT-2: rock.threadwise_read_into {{.*}}
// CHECK: rock.threadwise_read_into {{.*}} -> [[lain:%.*]] :
// CHECK: linalg.generic{{.*}} ins({{.*}}, [[lain]] :{{.*}}) outs(%[[outBuf:.*]] : memref<32xf32, #gpu.address_space<private>>)
// CHECK: linalg.generic{{.*}} ins({{.*}}, [[lain]] :{{.*}}) outs(%[[outBuf:.*]] : memref<128xf32, #gpu.address_space<private>>)
// CHECK: rock.threadwise_write_all {{.*}} %[[outBuf]] ->
// to test transpose is converted as transform and fused.

Expand Down
2 changes: 1 addition & 1 deletion mlir/test/mlir-rock-lib/populate_bwd.mlir
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@

// ZEROINIT_KERNELCOUNT: Kernel count=1
// ZEROINIT_BIN: ELF
// ZEROINIT_TUNING: globalSize=100352, localSize=128
// ZEROINIT_TUNING: globalSize=100352, localSize=64
// ZEROINIT_DRIVER: %arg1: memref<{{.*}}xf16> {rock.prefill = 0.000000e+00 : f16}
// ZEROINIT_DRIVER-COUNT-3: rock.transform %{{.+}} by
// ZEROINIT_DRIVER-NEXT: rock.conv_bwd_data(%{{.+}}, %{{.+}}, %{{.+}}) features = dot {dilations = [1 : index, 1 : index], filter_layout = ["g", "k", "c", "0", "1"], input_layout = ["ni", "gi", "ci", "0i", "1i"], kernelId = 0 : index, output_layout = ["no", "go", "ko", "0o", "1o"], padding = [0 : index, 0 : index, 0 : index, 0 : index], strides = [2 : index, 2 : index], usesV4R1 = true} : memref<1x2048x1024x1x1xf16>, memref<256x1x1024x14x14xf16>, memref<256x1x2048x7x7xf16>
6 changes: 3 additions & 3 deletions mlir/unittests/Dialect/Rock/ParamLookupTableTests.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -25,8 +25,8 @@ TEST(FindFallbackTest, OldestRelative) {
}

TEST(FindFallbackTest, YoungestRelative) {
// gfx1200 is the youngest available relative for gfx1900
EXPECT_EQ("gfx1200_conv_f16", ParamLookupTable<InitParamsAccel>::findFallback(
// gfx1201 is the youngest available relative for gfx1900
EXPECT_EQ("gfx1201_conv_f16", ParamLookupTable<InitParamsAccel>::findFallback(
"gfx1900_conv_f16"));
}

Expand Down Expand Up @@ -63,6 +63,6 @@ TEST(FindFallbackTest, NoRelativesBySuffix) {
TEST(FindFallbackTest, AnyGfxForNonAccel) {
// Any gfx version is acceptable for non-accelerated operations
EXPECT_EQ(
"gfx1200_gemm_f32",
"gfx1201_gemm_f32",
ParamLookupTable<InitParamsNonAccel>::findFallback("gfx942_gemm_f32"));
}