From 851ec2a388be6b822cc850040a162faf8083bbbd Mon Sep 17 00:00:00 2001 From: mshahid Date: Mon, 14 Jul 2025 09:39:06 -0700 Subject: [PATCH 1/5] [mlir][linalg] Add mixed precision folding pattern in transform op. In case of mixed precision inputs, the inputs are generally casted to match output type thereby introduces arith.extFOp/extIOp instructions. Folding such pattern into vector.contract is desirable for HW having mixed precision ISA support. This patch adds folding of mixed precision pattern into vector.contract optionaly which can be enabled using attribute 'vectorize_mixed_precision'. --- .../Linalg/TransformOps/LinalgTransformOps.td | 5 ++ .../TransformOps/LinalgTransformOps.cpp | 13 ++- .../Linalg/transform-op-vectorize.mlir | 89 +++++++++++++++++++ 3 files changed, 106 insertions(+), 1 deletion(-) diff --git a/mlir/include/mlir/Dialect/Linalg/TransformOps/LinalgTransformOps.td b/mlir/include/mlir/Dialect/Linalg/TransformOps/LinalgTransformOps.td index 61ce23f07faa8..02da2ce30f215 100644 --- a/mlir/include/mlir/Dialect/Linalg/TransformOps/LinalgTransformOps.td +++ b/mlir/include/mlir/Dialect/Linalg/TransformOps/LinalgTransformOps.td @@ -2348,6 +2348,9 @@ def VectorizeChildrenAndApplyPatternsOp : operation that is contained inside the vectorization target. This transformation supports the following attributes: + - `vectorize_mixed_precision`: a `UnitAttr` to activate the vectorization + of ops that have mixed precision types. This enables the folding of + arith.extFOp/arith.extIOp into vector.contract with mixed precision. - `vectorize_padding`: a `UnitAttr` to activate the vectorization of `tensor.pad` ops. Different pipelines may prefer to lower such ops to loops. @@ -2368,6 +2371,7 @@ def VectorizeChildrenAndApplyPatternsOp : }]; let arguments = (ins TransformHandleTypeInterface:$target, + UnitAttr:$vectorize_mixed_precision, UnitAttr:$vectorize_padding, UnitAttr:$vectorize_nd_extract, UnitAttr:$flatten_1d_depthwise_conv, @@ -2381,6 +2385,7 @@ def VectorizeChildrenAndApplyPatternsOp : let builders = [ OpBuilder<(ins "Value":$target, + CArg<"bool", "false">:$vectorizeMixedPrecision, CArg<"bool", "false">:$vectorizePadding, CArg<"bool", "false">:$vectorizeNDExtract, CArg<"bool", "false">:$flatten1DDepthwise)> diff --git a/mlir/lib/Dialect/Linalg/TransformOps/LinalgTransformOps.cpp b/mlir/lib/Dialect/Linalg/TransformOps/LinalgTransformOps.cpp index bdfc8d020e58f..416052dd28500 100644 --- a/mlir/lib/Dialect/Linalg/TransformOps/LinalgTransformOps.cpp +++ b/mlir/lib/Dialect/Linalg/TransformOps/LinalgTransformOps.cpp @@ -3783,8 +3783,15 @@ LogicalResult TileUsingForallOp::verify() { void transform::VectorizeChildrenAndApplyPatternsOp::build( OpBuilder &builder, OperationState &result, Value target, - bool vectorizePadding, bool vectorizeExtract, bool flatten1DDepthwiseConv) { + bool vectorizeMixedPrecision, bool vectorizePadding, bool vectorizeExtract, + bool flatten1DDepthwiseConv) { result.addOperands(target); + if (vectorizeMixedPrecision) { + result.addAttribute( + VectorizeChildrenAndApplyPatternsOp::getVectorizeMixedPrecisionAttrName( + result.name), + builder.getUnitAttr()); + } if (vectorizePadding) { result.addAttribute( VectorizeChildrenAndApplyPatternsOp::getVectorizePaddingAttrName( @@ -3875,6 +3882,10 @@ transform::VectorizeChildrenAndApplyPatternsOp::applyToOne( patterns.add(ctx); + if (getVectorizeMixedPrecision()) { + vector::populateFoldArithExtensionPatterns(patterns); + } + if (getVectorizePadding()) { linalg::populatePadOpVectorizationPatterns(patterns); // This creates an alternative path for lowering tensor.pad - by diff --git a/mlir/test/Dialect/Linalg/transform-op-vectorize.mlir b/mlir/test/Dialect/Linalg/transform-op-vectorize.mlir index 0d59dbba8940d..96f89653d20ca 100644 --- a/mlir/test/Dialect/Linalg/transform-op-vectorize.mlir +++ b/mlir/test/Dialect/Linalg/transform-op-vectorize.mlir @@ -190,3 +190,92 @@ module attributes {transform.with_named_sequence} { transform.yield } } + +// ----- + +// Mixed Precision vetorization tests. + +// CHECK-LABEL: func @mixed_precision_generic_as_contract +// CHECK-COUNT-3: vector.transfer_read +// CHECK-NOT: arith.extf +// CHECK: vector.contract +// CHECK: vector.transfer_write +func.func @mixed_precision_generic_as_contract(%A: memref<8x16xbf16>, %B: memref<16x32xbf16>, + %C: memref<8x32xf32>) { + linalg.generic { + indexing_maps = [ + affine_map<(m, n, k) -> (m, k)>, + affine_map<(m, n, k) -> (k, n)>, + affine_map<(m, n, k) -> (m, n)> + ], + iterator_types = ["parallel", "parallel", "reduction"] + } + ins(%A, %B : memref<8x16xbf16>, memref<16x32xbf16>) + outs(%C : memref<8x32xf32>) { + ^bb(%in: bf16, %in_0: bf16, %c: f32) : + %a = arith.extf %in : bf16 to f32 + %b = arith.extf %in_0 : bf16 to f32 + %d = arith.mulf %a, %b: f32 + %e = arith.addf %c, %d: f32 + linalg.yield %e : f32 + } + return +} + +module attributes {transform.with_named_sequence} { + transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) { + %0 = transform.structured.match ops{["linalg.generic"]} in %arg1 : (!transform.any_op) -> !transform.any_op + %1 = transform.get_parent_op %0 {isolated_from_above} : (!transform.any_op) -> !transform.any_op + %2 = transform.structured.vectorize_children_and_apply_patterns %1 { vectorize_mixed_precision, disable_transfer_permutation_map_lowering_patterns } : (!transform.any_op) -> !transform.any_op + transform.yield + } +} + +// ----- + +// CHECK-LABEL: @mixed_precision_matmul_as_contract +// CHECK-COUNT-3: vector.transfer_read +// CHECK-NOT: arith.extf +// CHECK: vector.contract +// CHECK: vector.transfer_write +func.func @mixed_precision_matmul_as_contract(%A: tensor<24x12xbf16>, + %B: tensor<12x25xbf16>, + %C: tensor<24x25xf32>) -> tensor<24x25xf32> { + %0 = linalg.contract + indexing_maps = [affine_map<(m, n, k) -> (m, k)>, + affine_map<(m, n, k) -> (k, n)>, + affine_map<(m, n, k) -> (m, n)>] + ins(%A, %B : tensor<24x12xbf16>, tensor<12x25xbf16>) + outs(%C : tensor<24x25xf32>) -> tensor<24x25xf32> + func.return %0 : tensor<24x25xf32> +} + +module attributes {transform.with_named_sequence} { + transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) { + %0 = transform.structured.match ops{["linalg.contract"]} in %arg1 : (!transform.any_op) -> !transform.any_op + %1 = transform.get_parent_op %0 {isolated_from_above} : (!transform.any_op) -> !transform.any_op + %2 = transform.structured.vectorize_children_and_apply_patterns %1 { vectorize_mixed_precision } : (!transform.any_op) -> !transform.any_op + transform.yield + } +} + +// ----- + +// CHECK-LABEL: @contraction_matmul +// CHECK-COUNT-3: vector.transfer_read +// CHECK-NOT: arith.extf +// CHECK: vector.contract +func.func @contraction_matmul(%A: memref<1584x1584xbf16>, %B: memref<1584x1584xbf16>, %C: memref<1584x1584xf32>) { + linalg.matmul ins(%A, %B: memref<1584x1584xbf16>, memref<1584x1584xbf16>) + outs(%C: memref<1584x1584xf32>) + return +} + +module attributes {transform.with_named_sequence} { + transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) { + %0 = transform.structured.match ops{["linalg.matmul"]} in %arg1 : (!transform.any_op) -> !transform.any_op + %1 = transform.get_parent_op %0 {isolated_from_above} : (!transform.any_op) -> !transform.any_op + %2 = transform.structured.vectorize_children_and_apply_patterns %1 { vectorize_mixed_precision } : (!transform.any_op) -> !transform.any_op + transform.yield + } +} From 0c11c3990a7310b21cf77d3a8676ec0e3a35f1d3 Mon Sep 17 00:00:00 2001 From: mshahid Date: Tue, 5 Aug 2025 04:33:31 -0700 Subject: [PATCH 2/5] -Moved the tests to approprite place and added few more tests. -Refactored some code and comments. --- .../Linalg/TransformOps/LinalgTransformOps.td | 7 +- .../TransformOps/LinalgTransformOps.cpp | 13 +- .../Linalg/transform-op-vectorize.mlir | 91 +--------- .../linalg-ops-with-patterns.mlir | 155 ++++++++++++++++++ 4 files changed, 165 insertions(+), 101 deletions(-) diff --git a/mlir/include/mlir/Dialect/Linalg/TransformOps/LinalgTransformOps.td b/mlir/include/mlir/Dialect/Linalg/TransformOps/LinalgTransformOps.td index 02da2ce30f215..8e9ba50a61416 100644 --- a/mlir/include/mlir/Dialect/Linalg/TransformOps/LinalgTransformOps.td +++ b/mlir/include/mlir/Dialect/Linalg/TransformOps/LinalgTransformOps.td @@ -2348,8 +2348,7 @@ def VectorizeChildrenAndApplyPatternsOp : operation that is contained inside the vectorization target. This transformation supports the following attributes: - - `vectorize_mixed_precision`: a `UnitAttr` to activate the vectorization - of ops that have mixed precision types. This enables the folding of + - `fold_mixed_precision_into_contract`: a `UnitAttr` to enable the folding of arith.extFOp/arith.extIOp into vector.contract with mixed precision. - `vectorize_padding`: a `UnitAttr` to activate the vectorization of `tensor.pad` ops. Different pipelines may prefer to lower such ops to @@ -2371,7 +2370,7 @@ def VectorizeChildrenAndApplyPatternsOp : }]; let arguments = (ins TransformHandleTypeInterface:$target, - UnitAttr:$vectorize_mixed_precision, + UnitAttr:$fold_mixed_precision_into_contract, UnitAttr:$vectorize_padding, UnitAttr:$vectorize_nd_extract, UnitAttr:$flatten_1d_depthwise_conv, @@ -2385,7 +2384,7 @@ def VectorizeChildrenAndApplyPatternsOp : let builders = [ OpBuilder<(ins "Value":$target, - CArg<"bool", "false">:$vectorizeMixedPrecision, + CArg<"bool", "false">:$foldMixedPrecisionIntoContract, CArg<"bool", "false">:$vectorizePadding, CArg<"bool", "false">:$vectorizeNDExtract, CArg<"bool", "false">:$flatten1DDepthwise)> diff --git a/mlir/lib/Dialect/Linalg/TransformOps/LinalgTransformOps.cpp b/mlir/lib/Dialect/Linalg/TransformOps/LinalgTransformOps.cpp index 416052dd28500..13c84a79c227c 100644 --- a/mlir/lib/Dialect/Linalg/TransformOps/LinalgTransformOps.cpp +++ b/mlir/lib/Dialect/Linalg/TransformOps/LinalgTransformOps.cpp @@ -3783,13 +3783,13 @@ LogicalResult TileUsingForallOp::verify() { void transform::VectorizeChildrenAndApplyPatternsOp::build( OpBuilder &builder, OperationState &result, Value target, - bool vectorizeMixedPrecision, bool vectorizePadding, bool vectorizeExtract, - bool flatten1DDepthwiseConv) { + bool foldMixedPrecisionIntoContract, bool vectorizePadding, + bool vectorizeExtract, bool flatten1DDepthwiseConv) { result.addOperands(target); - if (vectorizeMixedPrecision) { + if (foldMixedPrecisionIntoContract) { result.addAttribute( - VectorizeChildrenAndApplyPatternsOp::getVectorizeMixedPrecisionAttrName( - result.name), + VectorizeChildrenAndApplyPatternsOp:: + getFoldMixedPrecisionIntoContractAttrName(result.name), builder.getUnitAttr()); } if (vectorizePadding) { @@ -3882,9 +3882,8 @@ transform::VectorizeChildrenAndApplyPatternsOp::applyToOne( patterns.add(ctx); - if (getVectorizeMixedPrecision()) { + if (getFoldMixedPrecisionIntoContract()) vector::populateFoldArithExtensionPatterns(patterns); - } if (getVectorizePadding()) { linalg::populatePadOpVectorizationPatterns(patterns); diff --git a/mlir/test/Dialect/Linalg/transform-op-vectorize.mlir b/mlir/test/Dialect/Linalg/transform-op-vectorize.mlir index 96f89653d20ca..e0c5cddfaf30b 100644 --- a/mlir/test/Dialect/Linalg/transform-op-vectorize.mlir +++ b/mlir/test/Dialect/Linalg/transform-op-vectorize.mlir @@ -189,93 +189,4 @@ module attributes {transform.with_named_sequence} { %2 = transform.structured.vectorize_children_and_apply_patterns %0 : (!transform.any_op) -> !transform.any_op transform.yield } -} - -// ----- - -// Mixed Precision vetorization tests. - -// CHECK-LABEL: func @mixed_precision_generic_as_contract -// CHECK-COUNT-3: vector.transfer_read -// CHECK-NOT: arith.extf -// CHECK: vector.contract -// CHECK: vector.transfer_write -func.func @mixed_precision_generic_as_contract(%A: memref<8x16xbf16>, %B: memref<16x32xbf16>, - %C: memref<8x32xf32>) { - linalg.generic { - indexing_maps = [ - affine_map<(m, n, k) -> (m, k)>, - affine_map<(m, n, k) -> (k, n)>, - affine_map<(m, n, k) -> (m, n)> - ], - iterator_types = ["parallel", "parallel", "reduction"] - } - ins(%A, %B : memref<8x16xbf16>, memref<16x32xbf16>) - outs(%C : memref<8x32xf32>) { - ^bb(%in: bf16, %in_0: bf16, %c: f32) : - %a = arith.extf %in : bf16 to f32 - %b = arith.extf %in_0 : bf16 to f32 - %d = arith.mulf %a, %b: f32 - %e = arith.addf %c, %d: f32 - linalg.yield %e : f32 - } - return -} - -module attributes {transform.with_named_sequence} { - transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) { - %0 = transform.structured.match ops{["linalg.generic"]} in %arg1 : (!transform.any_op) -> !transform.any_op - %1 = transform.get_parent_op %0 {isolated_from_above} : (!transform.any_op) -> !transform.any_op - %2 = transform.structured.vectorize_children_and_apply_patterns %1 { vectorize_mixed_precision, disable_transfer_permutation_map_lowering_patterns } : (!transform.any_op) -> !transform.any_op - transform.yield - } -} - -// ----- - -// CHECK-LABEL: @mixed_precision_matmul_as_contract -// CHECK-COUNT-3: vector.transfer_read -// CHECK-NOT: arith.extf -// CHECK: vector.contract -// CHECK: vector.transfer_write -func.func @mixed_precision_matmul_as_contract(%A: tensor<24x12xbf16>, - %B: tensor<12x25xbf16>, - %C: tensor<24x25xf32>) -> tensor<24x25xf32> { - %0 = linalg.contract - indexing_maps = [affine_map<(m, n, k) -> (m, k)>, - affine_map<(m, n, k) -> (k, n)>, - affine_map<(m, n, k) -> (m, n)>] - ins(%A, %B : tensor<24x12xbf16>, tensor<12x25xbf16>) - outs(%C : tensor<24x25xf32>) -> tensor<24x25xf32> - func.return %0 : tensor<24x25xf32> -} - -module attributes {transform.with_named_sequence} { - transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) { - %0 = transform.structured.match ops{["linalg.contract"]} in %arg1 : (!transform.any_op) -> !transform.any_op - %1 = transform.get_parent_op %0 {isolated_from_above} : (!transform.any_op) -> !transform.any_op - %2 = transform.structured.vectorize_children_and_apply_patterns %1 { vectorize_mixed_precision } : (!transform.any_op) -> !transform.any_op - transform.yield - } -} - -// ----- - -// CHECK-LABEL: @contraction_matmul -// CHECK-COUNT-3: vector.transfer_read -// CHECK-NOT: arith.extf -// CHECK: vector.contract -func.func @contraction_matmul(%A: memref<1584x1584xbf16>, %B: memref<1584x1584xbf16>, %C: memref<1584x1584xf32>) { - linalg.matmul ins(%A, %B: memref<1584x1584xbf16>, memref<1584x1584xbf16>) - outs(%C: memref<1584x1584xf32>) - return -} - -module attributes {transform.with_named_sequence} { - transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) { - %0 = transform.structured.match ops{["linalg.matmul"]} in %arg1 : (!transform.any_op) -> !transform.any_op - %1 = transform.get_parent_op %0 {isolated_from_above} : (!transform.any_op) -> !transform.any_op - %2 = transform.structured.vectorize_children_and_apply_patterns %1 { vectorize_mixed_precision } : (!transform.any_op) -> !transform.any_op - transform.yield - } -} +} \ No newline at end of file diff --git a/mlir/test/Dialect/Linalg/vectorization/linalg-ops-with-patterns.mlir b/mlir/test/Dialect/Linalg/vectorization/linalg-ops-with-patterns.mlir index 4eeae4c064519..86a09682ec159 100644 --- a/mlir/test/Dialect/Linalg/vectorization/linalg-ops-with-patterns.mlir +++ b/mlir/test/Dialect/Linalg/vectorization/linalg-ops-with-patterns.mlir @@ -1777,3 +1777,158 @@ module attributes {transform.with_named_sequence} { transform.yield } } + +// ----- + +// Mixed precision vectorization tests. + +// CHECK-LABEL: func @float_mixed_precision_generic_as_contract +// CHECK-COUNT-3: vector.transfer_read +// CHECK-NOT: arith.extf +// CHECK: vector.contract +// CHECK: vector.transfer_write +func.func @float_mixed_precision_generic_as_contract(%A: memref<8x16xbf16>, %B: memref<16x32xbf16>, + %C: memref<8x32xf32>) { + linalg.generic { + indexing_maps = [ + affine_map<(m, n, k) -> (m, k)>, + affine_map<(m, n, k) -> (k, n)>, + affine_map<(m, n, k) -> (m, n)> + ], + iterator_types = ["parallel", "parallel", "reduction"] + } + ins(%A, %B : memref<8x16xbf16>, memref<16x32xbf16>) + outs(%C : memref<8x32xf32>) { + ^bb(%in: bf16, %in_0: bf16, %c: f32) : + %a = arith.extf %in : bf16 to f32 + %b = arith.extf %in_0 : bf16 to f32 + %d = arith.mulf %a, %b: f32 + %e = arith.addf %c, %d: f32 + linalg.yield %e : f32 + } + return +} + +module attributes {transform.with_named_sequence} { + transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) { + %0 = transform.structured.match ops{["linalg.generic"]} in %arg1 : (!transform.any_op) -> !transform.any_op + %1 = transform.get_parent_op %0 {isolated_from_above} : (!transform.any_op) -> !transform.any_op + %2 = transform.structured.vectorize_children_and_apply_patterns %1 { fold_mixed_precision_into_contract, disable_transfer_permutation_map_lowering_patterns } : (!transform.any_op) -> !transform.any_op + transform.yield + } +} + +// ----- + +// CHECK-LABEL: func @integer_mixed_precision_generic_as_contract +// CHECK-COUNT-3: vector.transfer_read +// CHECK-NOT: arith.extsi +// CHECK: vector.contract +// CHECK: vector.transfer_write +func.func @integer_mixed_precision_generic_as_contract(%A: memref<8x16xi8>, %B: memref<16x32xi8>, + %C: memref<8x32xi32>) { + linalg.generic { + indexing_maps = [ + affine_map<(m, n, k) -> (m, k)>, + affine_map<(m, n, k) -> (k, n)>, + affine_map<(m, n, k) -> (m, n)> + ], + iterator_types = ["parallel", "parallel", "reduction"] + } + ins(%A, %B : memref<8x16xi8>, memref<16x32xi8>) + outs(%C : memref<8x32xi32>) { + ^bb(%in: i8, %in_0: i8, %c: i32) : + %a = arith.extsi %in : i8 to i32 + %b = arith.extsi %in_0 : i8 to i32 + %d = arith.muli %a, %b: i32 + %e = arith.addi %c, %d: i32 + linalg.yield %e : i32 + } + return +} + +module attributes {transform.with_named_sequence} { + transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) { + %0 = transform.structured.match ops{["linalg.generic"]} in %arg1 : (!transform.any_op) -> !transform.any_op + %1 = transform.get_parent_op %0 {isolated_from_above} : (!transform.any_op) -> !transform.any_op + %2 = transform.structured.vectorize_children_and_apply_patterns %1 { fold_mixed_precision_into_contract, disable_transfer_permutation_map_lowering_patterns } : (!transform.any_op) -> !transform.any_op + transform.yield + } +} + +// ----- + +// CHECK-LABEL: @float_mixed_precision_matmul_as_contract +// CHECK-COUNT-3: vector.transfer_read +// CHECK-NOT: arith.extf +// CHECK: vector.contract +// CHECK: vector.transfer_write +func.func @float_mixed_precision_matmul_as_contract(%A: tensor<24x12xbf16>, + %B: tensor<12x25xbf16>, + %C: tensor<24x25xf32>) -> tensor<24x25xf32> { + %0 = linalg.contract + indexing_maps = [affine_map<(m, n, k) -> (m, k)>, + affine_map<(m, n, k) -> (k, n)>, + affine_map<(m, n, k) -> (m, n)>] + ins(%A, %B : tensor<24x12xbf16>, tensor<12x25xbf16>) + outs(%C : tensor<24x25xf32>) -> tensor<24x25xf32> + func.return %0 : tensor<24x25xf32> +} + +module attributes {transform.with_named_sequence} { + transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) { + %0 = transform.structured.match ops{["linalg.contract"]} in %arg1 : (!transform.any_op) -> !transform.any_op + %1 = transform.get_parent_op %0 {isolated_from_above} : (!transform.any_op) -> !transform.any_op + %2 = transform.structured.vectorize_children_and_apply_patterns %1 { fold_mixed_precision_into_contract } : (!transform.any_op) -> !transform.any_op + transform.yield + } +} + +// ----- + +// CHECK-LABEL: @integer_mixed_precision_matmul_as_contract +// CHECK-COUNT-3: vector.transfer_read +// CHECK-NOT: arith.extf +// CHECK: vector.contract +// CHECK: vector.transfer_write +func.func @integer_mixed_precision_matmul_as_contract(%A: tensor<24x12xi8>, + %B: tensor<12x25xi8>, + %C: tensor<24x25xi32>) -> tensor<24x25xi32> { + %0 = linalg.contract + indexing_maps = [affine_map<(m, n, k) -> (m, k)>, + affine_map<(m, n, k) -> (k, n)>, + affine_map<(m, n, k) -> (m, n)>] + ins(%A, %B : tensor<24x12xi8>, tensor<12x25xi8>) + outs(%C : tensor<24x25xi32>) -> tensor<24x25xi32> + func.return %0 : tensor<24x25xi32> +} + +module attributes {transform.with_named_sequence} { + transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) { + %0 = transform.structured.match ops{["linalg.contract"]} in %arg1 : (!transform.any_op) -> !transform.any_op + %1 = transform.get_parent_op %0 {isolated_from_above} : (!transform.any_op) -> !transform.any_op + %2 = transform.structured.vectorize_children_and_apply_patterns %1 { fold_mixed_precision_into_contract } : (!transform.any_op) -> !transform.any_op + transform.yield + } +} + +// ----- + +// CHECK-LABEL: @contraction_matmul +// CHECK-COUNT-3: vector.transfer_read +// CHECK-NOT: arith.extf +// CHECK: vector.contract +func.func @contraction_matmul(%A: memref<1584x1584xbf16>, %B: memref<1584x1584xbf16>, %C: memref<1584x1584xf32>) { + linalg.matmul ins(%A, %B: memref<1584x1584xbf16>, memref<1584x1584xbf16>) + outs(%C: memref<1584x1584xf32>) + return +} + +module attributes {transform.with_named_sequence} { + transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) { + %0 = transform.structured.match ops{["linalg.matmul"]} in %arg1 : (!transform.any_op) -> !transform.any_op + %1 = transform.get_parent_op %0 {isolated_from_above} : (!transform.any_op) -> !transform.any_op + %2 = transform.structured.vectorize_children_and_apply_patterns %1 { fold_mixed_precision_into_contract } : (!transform.any_op) -> !transform.any_op + transform.yield + } +} From edcedf43423d6753d7571111f6062edabdbdc499 Mon Sep 17 00:00:00 2001 From: mshahid Date: Wed, 6 Aug 2025 04:36:33 -0700 Subject: [PATCH 3/5] Renamed the related attribute and grouped the same operation tests together. --- .../Linalg/TransformOps/LinalgTransformOps.td | 8 +- .../TransformOps/LinalgTransformOps.cpp | 8 +- .../linalg-ops-with-patterns.mlir | 752 +++++++++--------- 3 files changed, 370 insertions(+), 398 deletions(-) diff --git a/mlir/include/mlir/Dialect/Linalg/TransformOps/LinalgTransformOps.td b/mlir/include/mlir/Dialect/Linalg/TransformOps/LinalgTransformOps.td index 8e9ba50a61416..885f07176304a 100644 --- a/mlir/include/mlir/Dialect/Linalg/TransformOps/LinalgTransformOps.td +++ b/mlir/include/mlir/Dialect/Linalg/TransformOps/LinalgTransformOps.td @@ -2348,8 +2348,8 @@ def VectorizeChildrenAndApplyPatternsOp : operation that is contained inside the vectorization target. This transformation supports the following attributes: - - `fold_mixed_precision_into_contract`: a `UnitAttr` to enable the folding of - arith.extFOp/arith.extIOp into vector.contract with mixed precision. + - `fold_type_extensions_into_contract`: a `UnitAttr` to enable the folding of + cast operations into vector.contract with mixed precision. - `vectorize_padding`: a `UnitAttr` to activate the vectorization of `tensor.pad` ops. Different pipelines may prefer to lower such ops to loops. @@ -2370,7 +2370,7 @@ def VectorizeChildrenAndApplyPatternsOp : }]; let arguments = (ins TransformHandleTypeInterface:$target, - UnitAttr:$fold_mixed_precision_into_contract, + UnitAttr:$fold_type_extensions_into_contract, UnitAttr:$vectorize_padding, UnitAttr:$vectorize_nd_extract, UnitAttr:$flatten_1d_depthwise_conv, @@ -2384,7 +2384,7 @@ def VectorizeChildrenAndApplyPatternsOp : let builders = [ OpBuilder<(ins "Value":$target, - CArg<"bool", "false">:$foldMixedPrecisionIntoContract, + CArg<"bool", "false">:$foldTypeExtensionsIntoContract, CArg<"bool", "false">:$vectorizePadding, CArg<"bool", "false">:$vectorizeNDExtract, CArg<"bool", "false">:$flatten1DDepthwise)> diff --git a/mlir/lib/Dialect/Linalg/TransformOps/LinalgTransformOps.cpp b/mlir/lib/Dialect/Linalg/TransformOps/LinalgTransformOps.cpp index 13c84a79c227c..87547436eb474 100644 --- a/mlir/lib/Dialect/Linalg/TransformOps/LinalgTransformOps.cpp +++ b/mlir/lib/Dialect/Linalg/TransformOps/LinalgTransformOps.cpp @@ -3783,13 +3783,13 @@ LogicalResult TileUsingForallOp::verify() { void transform::VectorizeChildrenAndApplyPatternsOp::build( OpBuilder &builder, OperationState &result, Value target, - bool foldMixedPrecisionIntoContract, bool vectorizePadding, + bool foldTypeExtensionsIntoContract, bool vectorizePadding, bool vectorizeExtract, bool flatten1DDepthwiseConv) { result.addOperands(target); - if (foldMixedPrecisionIntoContract) { + if (foldTypeExtensionsIntoContract) { result.addAttribute( VectorizeChildrenAndApplyPatternsOp:: - getFoldMixedPrecisionIntoContractAttrName(result.name), + getFoldTypeExtensionsIntoContractAttrName(result.name), builder.getUnitAttr()); } if (vectorizePadding) { @@ -3882,7 +3882,7 @@ transform::VectorizeChildrenAndApplyPatternsOp::applyToOne( patterns.add(ctx); - if (getFoldMixedPrecisionIntoContract()) + if (getFoldTypeExtensionsIntoContract()) vector::populateFoldArithExtensionPatterns(patterns); if (getVectorizePadding()) { diff --git a/mlir/test/Dialect/Linalg/vectorization/linalg-ops-with-patterns.mlir b/mlir/test/Dialect/Linalg/vectorization/linalg-ops-with-patterns.mlir index 86a09682ec159..c98c024340825 100644 --- a/mlir/test/Dialect/Linalg/vectorization/linalg-ops-with-patterns.mlir +++ b/mlir/test/Dialect/Linalg/vectorization/linalg-ops-with-patterns.mlir @@ -61,6 +61,85 @@ module attributes {transform.with_named_sequence} { // ----- +// Mixed precision vectorization tests. + +// CHECK-LABEL: @float_mixed_precision_matmul +// CHECK-COUNT-3: vector.transfer_read +// CHECK-NOT: arith.extf +// CHECK: vector.contract {{.*}} : vector<1584x1584xbf16>, vector<1584x1584xbf16> into vector<1584x1584xf32> +func.func @float_mixed_precision_matmul(%A: memref<1584x1584xbf16>, %B: memref<1584x1584xbf16>, %C: memref<1584x1584xf32>) { + linalg.matmul ins(%A, %B: memref<1584x1584xbf16>, memref<1584x1584xbf16>) + outs(%C: memref<1584x1584xf32>) + return +} + +module attributes {transform.with_named_sequence} { + transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) { + %0 = transform.structured.match ops{["linalg.matmul"]} in %arg1 : (!transform.any_op) -> !transform.any_op + %1 = transform.get_parent_op %0 {isolated_from_above} : (!transform.any_op) -> !transform.any_op + %2 = transform.structured.vectorize_children_and_apply_patterns %1 { fold_type_extensions_into_contract } : (!transform.any_op) -> !transform.any_op + transform.yield + } +} + +// ----- + +// CHECK-LABEL: func @vectorization_test_2 +func.func @vectorization_test_2(%A: memref<8x16xf32>, %B: memref<16x32xf32>, + %C: memref<8x32xf32>) { + // CHECK: arith.mulf %{{.*}}, %{{.*}} : vector<8x32x16xf32> + // CHECK: vector.multi_reduction , %{{.*}}, {{.*}} [2] : vector<8x32x16xf32> to vector<8x32xf32> + linalg.matmul + ins(%A, %B: memref<8x16xf32>, memref<16x32xf32>) + outs(%C: memref<8x32xf32>) + return +} + +module attributes {transform.with_named_sequence} { + transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) { + %0 = transform.structured.match ops{["linalg.matmul"]} in %arg1 : (!transform.any_op) -> !transform.any_op + %1 = transform.get_parent_op %0 {isolated_from_above} : (!transform.any_op) -> !transform.any_op + %2 = transform.structured.vectorize_children_and_apply_patterns %1 { disable_multi_reduction_to_contract_patterns } : (!transform.any_op) -> !transform.any_op + transform.yield + } +} + +// ----- + +// CHECK-LABEL: func @matmul_tensors +// CHECK-SAME: (%[[ARG0:.*]]: tensor<8x4xf32>, %[[ARG1:.*]]: tensor<4x12xf32>, +// CHECK-SAME: %[[ARG2:.*]]: tensor<8x12xf32>) -> tensor<8x12xf32> +func.func @matmul_tensors( + %arg0: tensor<8x4xf32>, %arg1: tensor<4x12xf32>, %arg2: tensor<8x12xf32>) + -> tensor<8x12xf32> { + // CHECK-DAG: %[[C0:.*]] = arith.constant 0 : index + // CHECK-DAG: %[[V0:.*]] = vector.transfer_read %[[ARG0]][%[[C0]], %[[C0]]], {{.*}} : tensor<8x4xf32>, vector<8x12x4xf32> + // CHECK-DAG: %[[V1:.*]] = vector.transfer_read %[[ARG1]][%[[C0]], %[[C0]]], {{.*}} : tensor<4x12xf32>, vector<8x12x4xf32> + // CHECK-DAG: %[[V2:.*]] = vector.transfer_read %[[ARG2]][%[[C0]], %[[C0]]], {{.*}} : tensor<8x12xf32>, vector<8x12xf32> + // + // linalg matmul lowers gets expanded to a 3D reduction, canonicalization later + // convert it to a 2D contract. + // CHECK: %[[MUL:.*]] = arith.mulf %[[V0]], %[[V1]] : vector<8x12x4xf32> + // CHECK: %[[R:.*]] = vector.multi_reduction , %[[MUL]], %[[V2]] [2] : vector<8x12x4xf32> to vector<8x12xf32> + // CHECK: %[[W:.*]] = vector.transfer_write %[[R]], %[[ARG2]][%[[C0]], %[[C0]]] {in_bounds = [true, true]} : vector<8x12xf32>, tensor<8x12xf32> + %0 = linalg.matmul ins(%arg0, %arg1: tensor<8x4xf32>, tensor<4x12xf32>) + outs(%arg2: tensor<8x12xf32>) + -> tensor<8x12xf32> + // CHECK: return %[[W]] : tensor<8x12xf32> + return %0 : tensor<8x12xf32> +} + +module attributes {transform.with_named_sequence} { + transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) { + %0 = transform.structured.match ops{["linalg.matmul"]} in %arg1 : (!transform.any_op) -> !transform.any_op + %1 = transform.get_parent_op %0 {isolated_from_above} : (!transform.any_op) -> !transform.any_op + %2 = transform.structured.vectorize_children_and_apply_patterns %1 { disable_multi_reduction_to_contract_patterns, disable_transfer_permutation_map_lowering_patterns } : (!transform.any_op) -> !transform.any_op + transform.yield + } +} + +// ----- + // CHECK-LABEL: contraction_batch_matmul func.func @contraction_batch_matmul(%A: memref<1584x1584x1584xf32>, %B: memref<1584x1584x1584xf32>, %C: memref<1584x1584x1584xf32>) { // CHECK: arith.mulf %{{.*}}, %{{.*}} : vector<1584x1584x1584x1584xf32> @@ -115,6 +194,267 @@ module attributes {transform.with_named_sequence} { // ----- +// Mixed precision vectorization tests. + +// CHECK-LABEL: @float_mixed_precision_matmul_as_contract +// CHECK-COUNT-3: vector.transfer_read +// CHECK-NOT: arith.extf +// CHECK: vector.contract {{.*}} : vector<24x12xbf16>, vector<12x25xbf16> into vector<24x25xf32> +// CHECK: vector.transfer_write +func.func @float_mixed_precision_matmul_as_contract(%A: tensor<24x12xbf16>, + %B: tensor<12x25xbf16>, + %C: tensor<24x25xf32>) -> tensor<24x25xf32> { + %0 = linalg.contract + indexing_maps = [affine_map<(m, n, k) -> (m, k)>, + affine_map<(m, n, k) -> (k, n)>, + affine_map<(m, n, k) -> (m, n)>] + ins(%A, %B : tensor<24x12xbf16>, tensor<12x25xbf16>) + outs(%C : tensor<24x25xf32>) -> tensor<24x25xf32> + func.return %0 : tensor<24x25xf32> +} + +module attributes {transform.with_named_sequence} { + transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) { + %0 = transform.structured.match ops{["linalg.contract"]} in %arg1 : (!transform.any_op) -> !transform.any_op + %1 = transform.get_parent_op %0 {isolated_from_above} : (!transform.any_op) -> !transform.any_op + %2 = transform.structured.vectorize_children_and_apply_patterns %1 { fold_type_extensions_into_contract } : (!transform.any_op) -> !transform.any_op + transform.yield + } +} + +// ----- + +// CHECK-LABEL: func @test_vectorize_fill +func.func @test_vectorize_fill(%A : memref<8x16xf32>, %arg0 : f32) { + // CHECK: %[[V:.*]] = vector.broadcast {{.*}} : f32 to vector<8x16xf32> + // CHECK: vector.transfer_write %[[V]], {{.*}} : vector<8x16xf32>, memref<8x16xf32> + linalg.fill ins(%arg0 : f32) outs(%A : memref<8x16xf32>) + return +} + +module attributes {transform.with_named_sequence} { + transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) { + %0 = transform.structured.match ops{["linalg.fill"]} in %arg1 : (!transform.any_op) -> !transform.any_op + %1 = transform.get_parent_op %0 {isolated_from_above} : (!transform.any_op) -> !transform.any_op + %2 = transform.structured.vectorize_children_and_apply_patterns %1 : (!transform.any_op) -> !transform.any_op + transform.yield + } +} + +// ----- + +// CHECK-LABEL: func @test_vectorize_fill +func.func @test_vectorize_fill_0d(%A : memref, %arg0 : f32) { + // CHECK-SAME: (%[[M:.*]]: memref, %[[val:.*]]: f32) + // CHECK: %[[VEC:.*]] = vector.broadcast %[[val]] : f32 to vector + // CHECK: vector.transfer_write %[[VEC]], %[[M]][] : vector, memref + linalg.fill ins(%arg0 : f32) outs(%A : memref) + return +} + +module attributes {transform.with_named_sequence} { + transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) { + %0 = transform.structured.match ops{["linalg.fill"]} in %arg1 : (!transform.any_op) -> !transform.any_op + %1 = transform.get_parent_op %0 {isolated_from_above} : (!transform.any_op) -> !transform.any_op + %2 = transform.structured.vectorize_children_and_apply_patterns %1 : (!transform.any_op) -> !transform.any_op + transform.yield + } +} + +// ----- + +// CHECK-LABEL: func @test_vectorize_copy +func.func @test_vectorize_copy(%A : memref<8x16xf32>, %B : memref<8x16xf32>) { + // CHECK: %[[V:.*]] = vector.transfer_read {{.*}} : memref<8x16xf32>, vector<8x16xf32> + // CHECK: vector.transfer_write %[[V]], {{.*}} : vector<8x16xf32>, memref<8x16xf32> + memref.copy %A, %B : memref<8x16xf32> to memref<8x16xf32> + return +} + +module attributes {transform.with_named_sequence} { + transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) { + %0 = transform.structured.match ops{["memref.copy"]} in %arg1 : (!transform.any_op) -> !transform.any_op + %1 = transform.get_parent_op %0 {isolated_from_above} : (!transform.any_op) -> !transform.any_op + %2 = transform.structured.vectorize_children_and_apply_patterns %1 : (!transform.any_op) -> !transform.any_op + transform.yield + } +} + +// ----- + +// CHECK-LABEL: func @test_vectorize_copy_0d +func.func @test_vectorize_copy_0d(%A : memref, %B : memref) { + // CHECK-SAME: (%[[A:.*]]: memref, %[[B:.*]]: memref) + // CHECK: %[[V:.*]] = vector.transfer_read %[[A]][]{{.*}} : memref, vector + // CHECK: %[[val:.*]] = vector.extract %[[V]][] : f32 from vector + // CHECK: %[[VV:.*]] = vector.broadcast %[[val]] : f32 to vector + // CHECK: vector.transfer_write %[[VV]], %[[B]][] : vector, memref + memref.copy %A, %B : memref to memref + return +} + +module attributes {transform.with_named_sequence} { + transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) { + %0 = transform.structured.match ops{["memref.copy"]} in %arg1 : (!transform.any_op) -> !transform.any_op + %1 = transform.get_parent_op %0 {isolated_from_above} : (!transform.any_op) -> !transform.any_op + %2 = transform.structured.vectorize_children_and_apply_patterns %1 : (!transform.any_op) -> !transform.any_op + transform.yield + } +} + +// ----- + +// CHECK-LABEL: func @test_vectorize_copy_complex +// CHECK-NOT: vector< +func.func @test_vectorize_copy_complex(%A : memref<8x16xcomplex>, %B : memref<8x16xcomplex>) { + memref.copy %A, %B : memref<8x16xcomplex> to memref<8x16xcomplex> + return +} + +module attributes {transform.with_named_sequence} { + transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) { + %0 = transform.structured.match ops{["memref.copy"]} in %arg1 : (!transform.any_op) -> !transform.any_op + %1 = transform.get_parent_op %0 {isolated_from_above} : (!transform.any_op) -> !transform.any_op + %2 = transform.structured.vectorize_children_and_apply_patterns %1 : (!transform.any_op) -> !transform.any_op + transform.yield + } +} + +// ----- + +// Input identical as the test in vectorization.mlir. Output is different - +// vector sizes are inferred (rather than user-specified) and hence _no_ +// masking was used. + +func.func @test_vectorize_pack(%arg0: tensor<32x8x16xf32>, %arg1: tensor<4x1x32x16x2xf32>) -> tensor<4x1x32x16x2xf32> { + %pack = linalg.pack %arg0 outer_dims_perm = [1, 2, 0] inner_dims_pos = [2, 1] inner_tiles = [16, 2] into %arg1 : tensor<32x8x16xf32> -> tensor<4x1x32x16x2xf32> + return %pack : tensor<4x1x32x16x2xf32> +} + +module attributes {transform.with_named_sequence} { + transform.named_sequence @__transform_main(%arg0: !transform.any_op {transform.readonly}) { + %0 = transform.structured.match ops{["linalg.pack"]} in %arg0 : (!transform.any_op) -> !transform.any_op + %1 = transform.get_parent_op %0 {isolated_from_above} : (!transform.any_op) -> !transform.any_op + %2 = transform.structured.vectorize_children_and_apply_patterns %1 : (!transform.any_op) -> !transform.any_op + transform.yield + } +} + +// CHECK-LABEL: func.func @test_vectorize_pack( +// CHECK-SAME: %[[VAL_0:.*]]: tensor<32x8x16xf32>, +// CHECK-SAME: %[[VAL_1:.*]]: tensor<4x1x32x16x2xf32>) -> tensor<4x1x32x16x2xf32> { +// CHECK: %[[VAL_2:.*]] = arith.constant 0.000000e+00 : f32 +// CHECK: %[[VAL_3:.*]] = arith.constant 0 : index +// CHECK: %[[VAL_4:.*]] = vector.transfer_read %[[VAL_0]]{{\[}}%[[VAL_3]], %[[VAL_3]], %[[VAL_3]]], %[[VAL_2]] {in_bounds = [true, true, true]} : tensor<32x8x16xf32>, vector<32x8x16xf32> +// CHECK: %[[VAL_5:.*]] = vector.shape_cast %[[VAL_4]] : vector<32x8x16xf32> to vector<32x4x2x1x16xf32> +// CHECK: %[[VAL_6:.*]] = vector.transpose %[[VAL_5]], [1, 3, 0, 4, 2] : vector<32x4x2x1x16xf32> to vector<4x1x32x16x2xf32> +// CHECK: %[[VAL_7:.*]] = tensor.empty() : tensor<4x1x32x16x2xf32> +// CHECK: %[[VAL_8:.*]] = vector.transfer_write %[[VAL_6]], %[[VAL_7]]{{\[}}%[[VAL_3]], %[[VAL_3]], %[[VAL_3]], %[[VAL_3]], %[[VAL_3]]] {in_bounds = [true, true, true, true, true]} : vector<4x1x32x16x2xf32>, tensor<4x1x32x16x2xf32> +// CHECK: return %[[VAL_8]] : tensor<4x1x32x16x2xf32> + +// ----- + +func.func @test_vectorize_padded_pack(%arg0: tensor<32x7x15xf32>, %arg1: tensor<32x4x1x16x2xf32>) -> tensor<32x4x1x16x2xf32> { + %pad = arith.constant 0.000000e+00 : f32 + %pack = linalg.pack %arg0 padding_value(%pad : f32) inner_dims_pos = [2, 1] inner_tiles = [16, 2] into %arg1 : tensor<32x7x15xf32> -> tensor<32x4x1x16x2xf32> + return %pack : tensor<32x4x1x16x2xf32> +} + +// CHECK-LABEL: func.func @test_vectorize_padded_pack( +// CHECK-SAME: %[[VAL_0:.*]]: tensor<32x7x15xf32>, +// CHECK-SAME: %[[VAL_1:.*]]: tensor<32x4x1x16x2xf32>) -> tensor<32x4x1x16x2xf32> { +// CHECK: %[[VAL_2:.*]] = arith.constant 0.000000e+00 : f32 +// CHECK: %[[VAL_3:.*]] = arith.constant 0 : index +// CHECK: %[[VAL_4:.*]] = vector.transfer_read %[[VAL_0]]{{\[}}%[[VAL_3]], %[[VAL_3]], %[[VAL_3]]], %[[VAL_2]] {in_bounds = [true, false, false]} : tensor<32x7x15xf32>, vector<32x8x16xf32> +// CHECK: %[[VAL_5:.*]] = vector.shape_cast %[[VAL_4]] : vector<32x8x16xf32> to vector<32x4x2x1x16xf32> +// CHECK: %[[VAL_6:.*]] = vector.transpose %[[VAL_5]], [0, 1, 3, 4, 2] : vector<32x4x2x1x16xf32> to vector<32x4x1x16x2xf32> +// CHECK: %[[VAL_7:.*]] = tensor.empty() : tensor<32x4x1x16x2xf32> +// CHECK: %[[VAL_8:.*]] = vector.transfer_write %[[VAL_6]], %[[VAL_7]]{{\[}}%[[VAL_3]], %[[VAL_3]], %[[VAL_3]], %[[VAL_3]], %[[VAL_3]]] {in_bounds = [true, true, true, true, true]} : vector<32x4x1x16x2xf32>, tensor<32x4x1x16x2xf32> +// CHECK: return %[[VAL_8]] : tensor<32x4x1x16x2xf32> + +module attributes {transform.with_named_sequence} { + transform.named_sequence @__transform_main(%arg0: !transform.any_op {transform.readonly}) { + %0 = transform.structured.match ops{["linalg.pack"]} in %arg0 : (!transform.any_op) -> !transform.any_op + %1 = transform.get_parent_op %0 {isolated_from_above} : (!transform.any_op) -> !transform.any_op + %2 = transform.structured.vectorize_children_and_apply_patterns %1 : (!transform.any_op) -> !transform.any_op + transform.yield + } +} + +// ----- + +func.func @vectorize_map(%arg0: memref<64xf32>, + %arg1: memref<64xf32>, %arg2: memref<64xf32>) { + linalg.map ins(%arg0, %arg1 : memref<64xf32>, memref<64xf32>) + outs(%arg2 : memref<64xf32>) + (%in: f32, %in_0: f32) { + %0 = arith.addf %in, %in_0 : f32 + linalg.yield %0 : f32 + } + return +} +// CHECK-LABEL: func @vectorize_map +// CHECK: %[[LHS:.*]] = vector.transfer_read +// CHECK-NEXT: %[[RHS:.*]] = vector.transfer_read +// CHECK-NEXT: arith.addf %[[LHS]], %[[RHS]] : vector<64xf32> + +module attributes {transform.with_named_sequence} { + transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) { + %0 = transform.structured.match ops{["linalg.map"]} in %arg1 : (!transform.any_op) -> !transform.any_op + %1 = transform.get_parent_op %0 {isolated_from_above} : (!transform.any_op) -> !transform.any_op + %2 = transform.structured.vectorize_children_and_apply_patterns %1 : (!transform.any_op) -> !transform.any_op + transform.yield + } +} + +// ----- + +func.func @vectorize_transpose(%arg0: memref<16x32x64xf32>, + %arg1: memref<32x64x16xf32>) { + linalg.transpose ins(%arg0 : memref<16x32x64xf32>) + outs(%arg1 : memref<32x64x16xf32>) permutation = [1, 2, 0] + return +} +// CHECK-LABEL: func @vectorize_transpose +// CHECK: vector.transpose +// CHECK-SAME: [1, 2, 0] : vector<16x32x64xf32> to vector<32x64x16xf32> + +module attributes {transform.with_named_sequence} { + transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) { + %0 = transform.structured.match ops{["linalg.transpose"]} in %arg1 : (!transform.any_op) -> !transform.any_op + %1 = transform.get_parent_op %0 {isolated_from_above} : (!transform.any_op) -> !transform.any_op + %2 = transform.structured.vectorize_children_and_apply_patterns %1 : (!transform.any_op) -> !transform.any_op + transform.yield + } +} + +// ----- + +func.func @vectorize_reduce(%arg0: memref<16x32x64xf32>, + %arg1: memref<16x64xf32>) { + linalg.reduce ins(%arg0 : memref<16x32x64xf32>) + outs(%arg1 : memref<16x64xf32>) dimensions = [1] + (%in: f32, %init: f32) { + %0 = arith.addf %in, %init : f32 + linalg.yield %0 : f32 + } + return +} +// CHECK-LABEL: func @vectorize_reduce +// CHECK: vector.multi_reduction +// CHECK-SAME: : vector<16x32x64xf32> to vector<16x64xf32> + +module attributes {transform.with_named_sequence} { + transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) { + %0 = transform.structured.match ops{["linalg.reduce"]} in %arg1 : (!transform.any_op) -> !transform.any_op + %1 = transform.get_parent_op %0 {isolated_from_above} : (!transform.any_op) -> !transform.any_op + %2 = transform.structured.vectorize_children_and_apply_patterns %1 : (!transform.any_op) -> !transform.any_op + transform.yield + } +} + +// ----- + #matmul_trait = { indexing_maps = [ affine_map<(m, n, k) -> (m, k)>, @@ -306,27 +646,6 @@ module attributes {transform.with_named_sequence} { // ----- -// CHECK-LABEL: func @vectorization_test_2 -func.func @vectorization_test_2(%A: memref<8x16xf32>, %B: memref<16x32xf32>, - %C: memref<8x32xf32>) { - // CHECK: arith.mulf %{{.*}}, %{{.*}} : vector<8x32x16xf32> - // CHECK: vector.multi_reduction , %{{.*}}, {{.*}} [2] : vector<8x32x16xf32> to vector<8x32xf32> - linalg.matmul - ins(%A, %B: memref<8x16xf32>, memref<16x32xf32>) - outs(%C: memref<8x32xf32>) - return -} - -module attributes {transform.with_named_sequence} { - transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) { - %0 = transform.structured.match ops{["linalg.matmul"]} in %arg1 : (!transform.any_op) -> !transform.any_op - %1 = transform.get_parent_op %0 {isolated_from_above} : (!transform.any_op) -> !transform.any_op - %2 = transform.structured.vectorize_children_and_apply_patterns %1 { disable_multi_reduction_to_contract_patterns } : (!transform.any_op) -> !transform.any_op - transform.yield - } -} - -// ----- // CHECK-LABEL: func @test_vectorize_scalar_input func.func @test_vectorize_scalar_input(%A : memref<8x16xf32>, %arg0 : f32) { @@ -401,125 +720,27 @@ func.func @vectorize_affine_apply(%arg0: tensor<5xf32>, %arg3: index) -> tensor< return %1 : tensor<5xi32> } -// CHECK-LABEL: func.func @vectorize_affine_apply -// CHECK-SAME: %arg0: tensor<5xf32> -// CHECK-SAME: %[[ARG1:.*]]: index -// CHECK-DAG: %[[CST:.*]] = arith.constant dense<[123, 124, 125, 126, 127]> : vector<5xindex> -// CHECK-DAG: %[[CST_0:.*]] = arith.constant dense<1> : vector<5xindex> -// CHECK-DAG: %[[C0:.*]] = arith.constant 0 : index -// CHECK: %[[EMPTY:.*]] = tensor.empty() : tensor<5xi32> -// CHECK: %[[BCAST:.*]] = vector.broadcast %[[ARG1]] : index to vector<5xindex> -// CHECK: %[[ADDI_1:.*]] = arith.addi %[[BCAST]], %[[CST]] : vector<5xindex> -// CHECK: %[[ADDI_2:.*]] = arith.addi %[[ADDI_1]], %[[CST_0]] : vector<5xindex> -// CHECK: %[[ADDI_3:.*]] = arith.addi %[[ADDI_1]], %[[ADDI_2]] : vector<5xindex> -// CHECK: %[[ADDI_4:.*]] = arith.addi %[[ADDI_3]], %[[CST]] : vector<5xindex> -// CHECK: %[[CAST:.*]] = arith.index_cast %[[ADDI_4]] : vector<5xindex> to vector<5xi32> -// CHECK: vector.transfer_write %[[CAST]], %[[EMPTY]][%[[C0:.*]]] {in_bounds = [true]} : vector<5xi32>, tensor<5xi32> - -module attributes {transform.with_named_sequence} { - transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) { - %0 = transform.structured.match ops{["linalg.generic"]} in %arg1 : (!transform.any_op) -> !transform.any_op - %1 = transform.get_parent_op %0 {isolated_from_above} : (!transform.any_op) -> !transform.any_op - %2 = transform.structured.vectorize_children_and_apply_patterns %1 { vectorize_nd_extract } : (!transform.any_op) -> !transform.any_op - transform.yield - } -} - -// ----- - -// CHECK-LABEL: func @test_vectorize_fill -func.func @test_vectorize_fill(%A : memref<8x16xf32>, %arg0 : f32) { - // CHECK: %[[V:.*]] = vector.broadcast {{.*}} : f32 to vector<8x16xf32> - // CHECK: vector.transfer_write %[[V]], {{.*}} : vector<8x16xf32>, memref<8x16xf32> - linalg.fill ins(%arg0 : f32) outs(%A : memref<8x16xf32>) - return -} - -module attributes {transform.with_named_sequence} { - transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) { - %0 = transform.structured.match ops{["linalg.fill"]} in %arg1 : (!transform.any_op) -> !transform.any_op - %1 = transform.get_parent_op %0 {isolated_from_above} : (!transform.any_op) -> !transform.any_op - %2 = transform.structured.vectorize_children_and_apply_patterns %1 : (!transform.any_op) -> !transform.any_op - transform.yield - } -} - -// ----- - -// CHECK-LABEL: func @test_vectorize_fill -func.func @test_vectorize_fill_0d(%A : memref, %arg0 : f32) { - // CHECK-SAME: (%[[M:.*]]: memref, %[[val:.*]]: f32) - // CHECK: %[[VEC:.*]] = vector.broadcast %[[val]] : f32 to vector - // CHECK: vector.transfer_write %[[VEC]], %[[M]][] : vector, memref - linalg.fill ins(%arg0 : f32) outs(%A : memref) - return -} - -module attributes {transform.with_named_sequence} { - transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) { - %0 = transform.structured.match ops{["linalg.fill"]} in %arg1 : (!transform.any_op) -> !transform.any_op - %1 = transform.get_parent_op %0 {isolated_from_above} : (!transform.any_op) -> !transform.any_op - %2 = transform.structured.vectorize_children_and_apply_patterns %1 : (!transform.any_op) -> !transform.any_op - transform.yield - } -} - -// ----- - -// CHECK-LABEL: func @test_vectorize_copy -func.func @test_vectorize_copy(%A : memref<8x16xf32>, %B : memref<8x16xf32>) { - // CHECK: %[[V:.*]] = vector.transfer_read {{.*}} : memref<8x16xf32>, vector<8x16xf32> - // CHECK: vector.transfer_write %[[V]], {{.*}} : vector<8x16xf32>, memref<8x16xf32> - memref.copy %A, %B : memref<8x16xf32> to memref<8x16xf32> - return -} - -module attributes {transform.with_named_sequence} { - transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) { - %0 = transform.structured.match ops{["memref.copy"]} in %arg1 : (!transform.any_op) -> !transform.any_op - %1 = transform.get_parent_op %0 {isolated_from_above} : (!transform.any_op) -> !transform.any_op - %2 = transform.structured.vectorize_children_and_apply_patterns %1 : (!transform.any_op) -> !transform.any_op - transform.yield - } -} - -// ----- - -// CHECK-LABEL: func @test_vectorize_copy_0d -func.func @test_vectorize_copy_0d(%A : memref, %B : memref) { - // CHECK-SAME: (%[[A:.*]]: memref, %[[B:.*]]: memref) - // CHECK: %[[V:.*]] = vector.transfer_read %[[A]][]{{.*}} : memref, vector - // CHECK: %[[val:.*]] = vector.extract %[[V]][] : f32 from vector - // CHECK: %[[VV:.*]] = vector.broadcast %[[val]] : f32 to vector - // CHECK: vector.transfer_write %[[VV]], %[[B]][] : vector, memref - memref.copy %A, %B : memref to memref - return -} - -module attributes {transform.with_named_sequence} { - transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) { - %0 = transform.structured.match ops{["memref.copy"]} in %arg1 : (!transform.any_op) -> !transform.any_op - %1 = transform.get_parent_op %0 {isolated_from_above} : (!transform.any_op) -> !transform.any_op - %2 = transform.structured.vectorize_children_and_apply_patterns %1 : (!transform.any_op) -> !transform.any_op - transform.yield - } -} - -// ----- - -// CHECK-LABEL: func @test_vectorize_copy_complex -// CHECK-NOT: vector< -func.func @test_vectorize_copy_complex(%A : memref<8x16xcomplex>, %B : memref<8x16xcomplex>) { - memref.copy %A, %B : memref<8x16xcomplex> to memref<8x16xcomplex> - return -} +// CHECK-LABEL: func.func @vectorize_affine_apply +// CHECK-SAME: %arg0: tensor<5xf32> +// CHECK-SAME: %[[ARG1:.*]]: index +// CHECK-DAG: %[[CST:.*]] = arith.constant dense<[123, 124, 125, 126, 127]> : vector<5xindex> +// CHECK-DAG: %[[CST_0:.*]] = arith.constant dense<1> : vector<5xindex> +// CHECK-DAG: %[[C0:.*]] = arith.constant 0 : index +// CHECK: %[[EMPTY:.*]] = tensor.empty() : tensor<5xi32> +// CHECK: %[[BCAST:.*]] = vector.broadcast %[[ARG1]] : index to vector<5xindex> +// CHECK: %[[ADDI_1:.*]] = arith.addi %[[BCAST]], %[[CST]] : vector<5xindex> +// CHECK: %[[ADDI_2:.*]] = arith.addi %[[ADDI_1]], %[[CST_0]] : vector<5xindex> +// CHECK: %[[ADDI_3:.*]] = arith.addi %[[ADDI_1]], %[[ADDI_2]] : vector<5xindex> +// CHECK: %[[ADDI_4:.*]] = arith.addi %[[ADDI_3]], %[[CST]] : vector<5xindex> +// CHECK: %[[CAST:.*]] = arith.index_cast %[[ADDI_4]] : vector<5xindex> to vector<5xi32> +// CHECK: vector.transfer_write %[[CAST]], %[[EMPTY]][%[[C0:.*]]] {in_bounds = [true]} : vector<5xi32>, tensor<5xi32> module attributes {transform.with_named_sequence} { transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) { - %0 = transform.structured.match ops{["memref.copy"]} in %arg1 : (!transform.any_op) -> !transform.any_op - %1 = transform.get_parent_op %0 {isolated_from_above} : (!transform.any_op) -> !transform.any_op - %2 = transform.structured.vectorize_children_and_apply_patterns %1 : (!transform.any_op) -> !transform.any_op - transform.yield + %0 = transform.structured.match ops{["linalg.generic"]} in %arg1 : (!transform.any_op) -> !transform.any_op + %1 = transform.get_parent_op %0 {isolated_from_above} : (!transform.any_op) -> !transform.any_op + %2 = transform.structured.vectorize_children_and_apply_patterns %1 { vectorize_nd_extract } : (!transform.any_op) -> !transform.any_op + transform.yield } } @@ -855,40 +1076,6 @@ module attributes {transform.with_named_sequence} { // ----- -// CHECK-LABEL: func @matmul_tensors -// CHECK-SAME: (%[[ARG0:.*]]: tensor<8x4xf32>, %[[ARG1:.*]]: tensor<4x12xf32>, -// CHECK-SAME: %[[ARG2:.*]]: tensor<8x12xf32>) -> tensor<8x12xf32> -func.func @matmul_tensors( - %arg0: tensor<8x4xf32>, %arg1: tensor<4x12xf32>, %arg2: tensor<8x12xf32>) - -> tensor<8x12xf32> { - // CHECK-DAG: %[[C0:.*]] = arith.constant 0 : index - // CHECK-DAG: %[[V0:.*]] = vector.transfer_read %[[ARG0]][%[[C0]], %[[C0]]], {{.*}} : tensor<8x4xf32>, vector<8x12x4xf32> - // CHECK-DAG: %[[V1:.*]] = vector.transfer_read %[[ARG1]][%[[C0]], %[[C0]]], {{.*}} : tensor<4x12xf32>, vector<8x12x4xf32> - // CHECK-DAG: %[[V2:.*]] = vector.transfer_read %[[ARG2]][%[[C0]], %[[C0]]], {{.*}} : tensor<8x12xf32>, vector<8x12xf32> - // - // linalg matmul lowers gets expanded to a 3D reduction, canonicalization later - // convert it to a 2D contract. - // CHECK: %[[MUL:.*]] = arith.mulf %[[V0]], %[[V1]] : vector<8x12x4xf32> - // CHECK: %[[R:.*]] = vector.multi_reduction , %[[MUL]], %[[V2]] [2] : vector<8x12x4xf32> to vector<8x12xf32> - // CHECK: %[[W:.*]] = vector.transfer_write %[[R]], %[[ARG2]][%[[C0]], %[[C0]]] {in_bounds = [true, true]} : vector<8x12xf32>, tensor<8x12xf32> - %0 = linalg.matmul ins(%arg0, %arg1: tensor<8x4xf32>, tensor<4x12xf32>) - outs(%arg2: tensor<8x12xf32>) - -> tensor<8x12xf32> - // CHECK: return %[[W]] : tensor<8x12xf32> - return %0 : tensor<8x12xf32> -} - -module attributes {transform.with_named_sequence} { - transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) { - %0 = transform.structured.match ops{["linalg.matmul"]} in %arg1 : (!transform.any_op) -> !transform.any_op - %1 = transform.get_parent_op %0 {isolated_from_above} : (!transform.any_op) -> !transform.any_op - %2 = transform.structured.vectorize_children_and_apply_patterns %1 { disable_multi_reduction_to_contract_patterns, disable_transfer_permutation_map_lowering_patterns } : (!transform.any_op) -> !transform.any_op - transform.yield - } -} - -// ----- - // CHECK-LABEL: func @sum_exp func.func @sum_exp(%input: tensor<4x16x8xf32>, %output: tensor<4x16xf32>) -> tensor<4x16xf32> @@ -914,7 +1101,6 @@ func.func @sum_exp(%input: tensor<4x16x8xf32>, %output: tensor<4x16xf32>) return %0 : tensor<4x16xf32> } - module attributes {transform.with_named_sequence} { transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) { %3 = transform.structured.match ops{["linalg.generic"]} in %arg1 : (!transform.any_op) -> !transform.any_op @@ -993,7 +1179,6 @@ func.func @red_maximumf_2d(%arg0: tensor<4x4xf32>) -> tensor<4xf32> { return %red : tensor<4xf32> } - module attributes {transform.with_named_sequence} { transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) { %3 = transform.structured.match ops{["linalg.generic"]} in %arg1 : (!transform.any_op) -> !transform.any_op @@ -1428,78 +1613,6 @@ module attributes {transform.with_named_sequence} { // ----- -func.func @vectorize_map(%arg0: memref<64xf32>, - %arg1: memref<64xf32>, %arg2: memref<64xf32>) { - linalg.map ins(%arg0, %arg1 : memref<64xf32>, memref<64xf32>) - outs(%arg2 : memref<64xf32>) - (%in: f32, %in_0: f32) { - %0 = arith.addf %in, %in_0 : f32 - linalg.yield %0 : f32 - } - return -} -// CHECK-LABEL: func @vectorize_map -// CHECK: %[[LHS:.*]] = vector.transfer_read -// CHECK-NEXT: %[[RHS:.*]] = vector.transfer_read -// CHECK-NEXT: arith.addf %[[LHS]], %[[RHS]] : vector<64xf32> - -module attributes {transform.with_named_sequence} { - transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) { - %0 = transform.structured.match ops{["linalg.map"]} in %arg1 : (!transform.any_op) -> !transform.any_op - %1 = transform.get_parent_op %0 {isolated_from_above} : (!transform.any_op) -> !transform.any_op - %2 = transform.structured.vectorize_children_and_apply_patterns %1 : (!transform.any_op) -> !transform.any_op - transform.yield - } -} - -// ----- - -func.func @vectorize_transpose(%arg0: memref<16x32x64xf32>, - %arg1: memref<32x64x16xf32>) { - linalg.transpose ins(%arg0 : memref<16x32x64xf32>) - outs(%arg1 : memref<32x64x16xf32>) permutation = [1, 2, 0] - return -} -// CHECK-LABEL: func @vectorize_transpose -// CHECK: vector.transpose -// CHECK-SAME: [1, 2, 0] : vector<16x32x64xf32> to vector<32x64x16xf32> - -module attributes {transform.with_named_sequence} { - transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) { - %0 = transform.structured.match ops{["linalg.transpose"]} in %arg1 : (!transform.any_op) -> !transform.any_op - %1 = transform.get_parent_op %0 {isolated_from_above} : (!transform.any_op) -> !transform.any_op - %2 = transform.structured.vectorize_children_and_apply_patterns %1 : (!transform.any_op) -> !transform.any_op - transform.yield - } -} - -// ----- - -func.func @vectorize_reduce(%arg0: memref<16x32x64xf32>, - %arg1: memref<16x64xf32>) { - linalg.reduce ins(%arg0 : memref<16x32x64xf32>) - outs(%arg1 : memref<16x64xf32>) dimensions = [1] - (%in: f32, %init: f32) { - %0 = arith.addf %in, %init : f32 - linalg.yield %0 : f32 - } - return -} -// CHECK-LABEL: func @vectorize_reduce -// CHECK: vector.multi_reduction -// CHECK-SAME: : vector<16x32x64xf32> to vector<16x64xf32> - -module attributes {transform.with_named_sequence} { - transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) { - %0 = transform.structured.match ops{["linalg.reduce"]} in %arg1 : (!transform.any_op) -> !transform.any_op - %1 = transform.get_parent_op %0 {isolated_from_above} : (!transform.any_op) -> !transform.any_op - %2 = transform.structured.vectorize_children_and_apply_patterns %1 : (!transform.any_op) -> !transform.any_op - transform.yield - } -} - -// ----- - // This is a regression test. This IR cannot be vectorized, but // structured.vectorize_children_and_apply_patterns should nevertheless succeed. @@ -1715,77 +1828,12 @@ module attributes {transform.with_named_sequence} { // ----- -// Input identical as the test in vectorization.mlir. Output is different - -// vector sizes are inferred (rather than user-specified) and hence _no_ -// masking was used. - -func.func @test_vectorize_pack(%arg0: tensor<32x8x16xf32>, %arg1: tensor<4x1x32x16x2xf32>) -> tensor<4x1x32x16x2xf32> { - %pack = linalg.pack %arg0 outer_dims_perm = [1, 2, 0] inner_dims_pos = [2, 1] inner_tiles = [16, 2] into %arg1 : tensor<32x8x16xf32> -> tensor<4x1x32x16x2xf32> - return %pack : tensor<4x1x32x16x2xf32> -} - -module attributes {transform.with_named_sequence} { - transform.named_sequence @__transform_main(%arg0: !transform.any_op {transform.readonly}) { - %0 = transform.structured.match ops{["linalg.pack"]} in %arg0 : (!transform.any_op) -> !transform.any_op - %1 = transform.get_parent_op %0 {isolated_from_above} : (!transform.any_op) -> !transform.any_op - %2 = transform.structured.vectorize_children_and_apply_patterns %1 : (!transform.any_op) -> !transform.any_op - transform.yield - } -} - -// CHECK-LABEL: func.func @test_vectorize_pack( -// CHECK-SAME: %[[VAL_0:.*]]: tensor<32x8x16xf32>, -// CHECK-SAME: %[[VAL_1:.*]]: tensor<4x1x32x16x2xf32>) -> tensor<4x1x32x16x2xf32> { -// CHECK: %[[VAL_2:.*]] = arith.constant 0.000000e+00 : f32 -// CHECK: %[[VAL_3:.*]] = arith.constant 0 : index -// CHECK: %[[VAL_4:.*]] = vector.transfer_read %[[VAL_0]]{{\[}}%[[VAL_3]], %[[VAL_3]], %[[VAL_3]]], %[[VAL_2]] {in_bounds = [true, true, true]} : tensor<32x8x16xf32>, vector<32x8x16xf32> -// CHECK: %[[VAL_5:.*]] = vector.shape_cast %[[VAL_4]] : vector<32x8x16xf32> to vector<32x4x2x1x16xf32> -// CHECK: %[[VAL_6:.*]] = vector.transpose %[[VAL_5]], [1, 3, 0, 4, 2] : vector<32x4x2x1x16xf32> to vector<4x1x32x16x2xf32> -// CHECK: %[[VAL_7:.*]] = tensor.empty() : tensor<4x1x32x16x2xf32> -// CHECK: %[[VAL_8:.*]] = vector.transfer_write %[[VAL_6]], %[[VAL_7]]{{\[}}%[[VAL_3]], %[[VAL_3]], %[[VAL_3]], %[[VAL_3]], %[[VAL_3]]] {in_bounds = [true, true, true, true, true]} : vector<4x1x32x16x2xf32>, tensor<4x1x32x16x2xf32> -// CHECK: return %[[VAL_8]] : tensor<4x1x32x16x2xf32> - -// ----- - -// Input identical as the test in vectorization.mlir. Output is different - -// vector sizes are inferred (rather than user-specified) and hence _no_ -// masking was used. - -func.func @test_vectorize_padded_pack(%arg0: tensor<32x7x15xf32>, %arg1: tensor<32x4x1x16x2xf32>) -> tensor<32x4x1x16x2xf32> { - %pad = arith.constant 0.000000e+00 : f32 - %pack = linalg.pack %arg0 padding_value(%pad : f32) inner_dims_pos = [2, 1] inner_tiles = [16, 2] into %arg1 : tensor<32x7x15xf32> -> tensor<32x4x1x16x2xf32> - return %pack : tensor<32x4x1x16x2xf32> -} - -// CHECK-LABEL: func.func @test_vectorize_padded_pack( -// CHECK-SAME: %[[VAL_0:.*]]: tensor<32x7x15xf32>, -// CHECK-SAME: %[[VAL_1:.*]]: tensor<32x4x1x16x2xf32>) -> tensor<32x4x1x16x2xf32> { -// CHECK: %[[VAL_2:.*]] = arith.constant 0.000000e+00 : f32 -// CHECK: %[[VAL_3:.*]] = arith.constant 0 : index -// CHECK: %[[VAL_4:.*]] = vector.transfer_read %[[VAL_0]]{{\[}}%[[VAL_3]], %[[VAL_3]], %[[VAL_3]]], %[[VAL_2]] {in_bounds = [true, false, false]} : tensor<32x7x15xf32>, vector<32x8x16xf32> -// CHECK: %[[VAL_5:.*]] = vector.shape_cast %[[VAL_4]] : vector<32x8x16xf32> to vector<32x4x2x1x16xf32> -// CHECK: %[[VAL_6:.*]] = vector.transpose %[[VAL_5]], [0, 1, 3, 4, 2] : vector<32x4x2x1x16xf32> to vector<32x4x1x16x2xf32> -// CHECK: %[[VAL_7:.*]] = tensor.empty() : tensor<32x4x1x16x2xf32> -// CHECK: %[[VAL_8:.*]] = vector.transfer_write %[[VAL_6]], %[[VAL_7]]{{\[}}%[[VAL_3]], %[[VAL_3]], %[[VAL_3]], %[[VAL_3]], %[[VAL_3]]] {in_bounds = [true, true, true, true, true]} : vector<32x4x1x16x2xf32>, tensor<32x4x1x16x2xf32> -// CHECK: return %[[VAL_8]] : tensor<32x4x1x16x2xf32> - -module attributes {transform.with_named_sequence} { - transform.named_sequence @__transform_main(%arg0: !transform.any_op {transform.readonly}) { - %0 = transform.structured.match ops{["linalg.pack"]} in %arg0 : (!transform.any_op) -> !transform.any_op - %1 = transform.get_parent_op %0 {isolated_from_above} : (!transform.any_op) -> !transform.any_op - %2 = transform.structured.vectorize_children_and_apply_patterns %1 : (!transform.any_op) -> !transform.any_op - transform.yield - } -} - -// ----- - // Mixed precision vectorization tests. // CHECK-LABEL: func @float_mixed_precision_generic_as_contract // CHECK-COUNT-3: vector.transfer_read // CHECK-NOT: arith.extf -// CHECK: vector.contract +// CHECK: vector.contract {{.*}} : vector<8x32x16xbf16>, vector<8x32x16xbf16> into vector<8x32xf32> // CHECK: vector.transfer_write func.func @float_mixed_precision_generic_as_contract(%A: memref<8x16xbf16>, %B: memref<16x32xbf16>, %C: memref<8x32xf32>) { @@ -1813,7 +1861,7 @@ module attributes {transform.with_named_sequence} { transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) { %0 = transform.structured.match ops{["linalg.generic"]} in %arg1 : (!transform.any_op) -> !transform.any_op %1 = transform.get_parent_op %0 {isolated_from_above} : (!transform.any_op) -> !transform.any_op - %2 = transform.structured.vectorize_children_and_apply_patterns %1 { fold_mixed_precision_into_contract, disable_transfer_permutation_map_lowering_patterns } : (!transform.any_op) -> !transform.any_op + %2 = transform.structured.vectorize_children_and_apply_patterns %1 { fold_type_extensions_into_contract, disable_transfer_permutation_map_lowering_patterns } : (!transform.any_op) -> !transform.any_op transform.yield } } @@ -1823,7 +1871,7 @@ module attributes {transform.with_named_sequence} { // CHECK-LABEL: func @integer_mixed_precision_generic_as_contract // CHECK-COUNT-3: vector.transfer_read // CHECK-NOT: arith.extsi -// CHECK: vector.contract +// CHECK: vector.contract {{.*}} : vector<8x32x16xi8>, vector<8x32x16xi8> into vector<8x32xi32> // CHECK: vector.transfer_write func.func @integer_mixed_precision_generic_as_contract(%A: memref<8x16xi8>, %B: memref<16x32xi8>, %C: memref<8x32xi32>) { @@ -1851,84 +1899,8 @@ module attributes {transform.with_named_sequence} { transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) { %0 = transform.structured.match ops{["linalg.generic"]} in %arg1 : (!transform.any_op) -> !transform.any_op %1 = transform.get_parent_op %0 {isolated_from_above} : (!transform.any_op) -> !transform.any_op - %2 = transform.structured.vectorize_children_and_apply_patterns %1 { fold_mixed_precision_into_contract, disable_transfer_permutation_map_lowering_patterns } : (!transform.any_op) -> !transform.any_op - transform.yield - } -} - -// ----- - -// CHECK-LABEL: @float_mixed_precision_matmul_as_contract -// CHECK-COUNT-3: vector.transfer_read -// CHECK-NOT: arith.extf -// CHECK: vector.contract -// CHECK: vector.transfer_write -func.func @float_mixed_precision_matmul_as_contract(%A: tensor<24x12xbf16>, - %B: tensor<12x25xbf16>, - %C: tensor<24x25xf32>) -> tensor<24x25xf32> { - %0 = linalg.contract - indexing_maps = [affine_map<(m, n, k) -> (m, k)>, - affine_map<(m, n, k) -> (k, n)>, - affine_map<(m, n, k) -> (m, n)>] - ins(%A, %B : tensor<24x12xbf16>, tensor<12x25xbf16>) - outs(%C : tensor<24x25xf32>) -> tensor<24x25xf32> - func.return %0 : tensor<24x25xf32> -} - -module attributes {transform.with_named_sequence} { - transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) { - %0 = transform.structured.match ops{["linalg.contract"]} in %arg1 : (!transform.any_op) -> !transform.any_op - %1 = transform.get_parent_op %0 {isolated_from_above} : (!transform.any_op) -> !transform.any_op - %2 = transform.structured.vectorize_children_and_apply_patterns %1 { fold_mixed_precision_into_contract } : (!transform.any_op) -> !transform.any_op - transform.yield - } -} - -// ----- - -// CHECK-LABEL: @integer_mixed_precision_matmul_as_contract -// CHECK-COUNT-3: vector.transfer_read -// CHECK-NOT: arith.extf -// CHECK: vector.contract -// CHECK: vector.transfer_write -func.func @integer_mixed_precision_matmul_as_contract(%A: tensor<24x12xi8>, - %B: tensor<12x25xi8>, - %C: tensor<24x25xi32>) -> tensor<24x25xi32> { - %0 = linalg.contract - indexing_maps = [affine_map<(m, n, k) -> (m, k)>, - affine_map<(m, n, k) -> (k, n)>, - affine_map<(m, n, k) -> (m, n)>] - ins(%A, %B : tensor<24x12xi8>, tensor<12x25xi8>) - outs(%C : tensor<24x25xi32>) -> tensor<24x25xi32> - func.return %0 : tensor<24x25xi32> -} - -module attributes {transform.with_named_sequence} { - transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) { - %0 = transform.structured.match ops{["linalg.contract"]} in %arg1 : (!transform.any_op) -> !transform.any_op - %1 = transform.get_parent_op %0 {isolated_from_above} : (!transform.any_op) -> !transform.any_op - %2 = transform.structured.vectorize_children_and_apply_patterns %1 { fold_mixed_precision_into_contract } : (!transform.any_op) -> !transform.any_op + %2 = transform.structured.vectorize_children_and_apply_patterns %1 { fold_type_extensions_into_contract, disable_transfer_permutation_map_lowering_patterns } : (!transform.any_op) -> !transform.any_op transform.yield } } -// ----- - -// CHECK-LABEL: @contraction_matmul -// CHECK-COUNT-3: vector.transfer_read -// CHECK-NOT: arith.extf -// CHECK: vector.contract -func.func @contraction_matmul(%A: memref<1584x1584xbf16>, %B: memref<1584x1584xbf16>, %C: memref<1584x1584xf32>) { - linalg.matmul ins(%A, %B: memref<1584x1584xbf16>, memref<1584x1584xbf16>) - outs(%C: memref<1584x1584xf32>) - return -} - -module attributes {transform.with_named_sequence} { - transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) { - %0 = transform.structured.match ops{["linalg.matmul"]} in %arg1 : (!transform.any_op) -> !transform.any_op - %1 = transform.get_parent_op %0 {isolated_from_above} : (!transform.any_op) -> !transform.any_op - %2 = transform.structured.vectorize_children_and_apply_patterns %1 { fold_mixed_precision_into_contract } : (!transform.any_op) -> !transform.any_op - transform.yield - } -} From fcc14b3351e19aeaf2c2ccc7cd8b9ff14408d3eb Mon Sep 17 00:00:00 2001 From: mshahid Date: Wed, 6 Aug 2025 06:17:41 -0700 Subject: [PATCH 4/5] Updated doc and removed unnecessary leftover. --- .../mlir/Dialect/Linalg/TransformOps/LinalgTransformOps.td | 3 ++- mlir/test/Dialect/Linalg/transform-op-vectorize.mlir | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/mlir/include/mlir/Dialect/Linalg/TransformOps/LinalgTransformOps.td b/mlir/include/mlir/Dialect/Linalg/TransformOps/LinalgTransformOps.td index 885f07176304a..a19cce4b919a8 100644 --- a/mlir/include/mlir/Dialect/Linalg/TransformOps/LinalgTransformOps.td +++ b/mlir/include/mlir/Dialect/Linalg/TransformOps/LinalgTransformOps.td @@ -2349,7 +2349,8 @@ def VectorizeChildrenAndApplyPatternsOp : This transformation supports the following attributes: - `fold_type_extensions_into_contract`: a `UnitAttr` to enable the folding of - cast operations into vector.contract with mixed precision. + type extension operations into `vector.contract` to create a mixed precision + operation. - `vectorize_padding`: a `UnitAttr` to activate the vectorization of `tensor.pad` ops. Different pipelines may prefer to lower such ops to loops. diff --git a/mlir/test/Dialect/Linalg/transform-op-vectorize.mlir b/mlir/test/Dialect/Linalg/transform-op-vectorize.mlir index e0c5cddfaf30b..0d59dbba8940d 100644 --- a/mlir/test/Dialect/Linalg/transform-op-vectorize.mlir +++ b/mlir/test/Dialect/Linalg/transform-op-vectorize.mlir @@ -189,4 +189,4 @@ module attributes {transform.with_named_sequence} { %2 = transform.structured.vectorize_children_and_apply_patterns %0 : (!transform.any_op) -> !transform.any_op transform.yield } -} \ No newline at end of file +} From b0ad8628158c6598ed9e5f687e2a3312454469ff Mon Sep 17 00:00:00 2001 From: mshahid Date: Wed, 6 Aug 2025 09:15:33 -0700 Subject: [PATCH 5/5] Removed unwanted comment and attribute from test. --- .../linalg-ops-with-patterns.mlir | 24 +++++++------------ 1 file changed, 9 insertions(+), 15 deletions(-) diff --git a/mlir/test/Dialect/Linalg/vectorization/linalg-ops-with-patterns.mlir b/mlir/test/Dialect/Linalg/vectorization/linalg-ops-with-patterns.mlir index c98c024340825..25cbceb93c297 100644 --- a/mlir/test/Dialect/Linalg/vectorization/linalg-ops-with-patterns.mlir +++ b/mlir/test/Dialect/Linalg/vectorization/linalg-ops-with-patterns.mlir @@ -61,8 +61,6 @@ module attributes {transform.with_named_sequence} { // ----- -// Mixed precision vectorization tests. - // CHECK-LABEL: @float_mixed_precision_matmul // CHECK-COUNT-3: vector.transfer_read // CHECK-NOT: arith.extf @@ -194,12 +192,10 @@ module attributes {transform.with_named_sequence} { // ----- -// Mixed precision vectorization tests. - // CHECK-LABEL: @float_mixed_precision_matmul_as_contract // CHECK-COUNT-3: vector.transfer_read // CHECK-NOT: arith.extf -// CHECK: vector.contract {{.*}} : vector<24x12xbf16>, vector<12x25xbf16> into vector<24x25xf32> +// CHECK: vector.contract {{.*}} : vector<24x12xbf16>, vector<12x25xbf16> into vector<24x25xf32> // CHECK: vector.transfer_write func.func @float_mixed_precision_matmul_as_contract(%A: tensor<24x12xbf16>, %B: tensor<12x25xbf16>, @@ -1828,14 +1824,12 @@ module attributes {transform.with_named_sequence} { // ----- -// Mixed precision vectorization tests. - -// CHECK-LABEL: func @float_mixed_precision_generic_as_contract +// CHECK-LABEL: func @float_mixed_precision_matmul_as_generic // CHECK-COUNT-3: vector.transfer_read // CHECK-NOT: arith.extf -// CHECK: vector.contract {{.*}} : vector<8x32x16xbf16>, vector<8x32x16xbf16> into vector<8x32xf32> +// CHECK: vector.contract {{.*}} : vector<8x16xbf16>, vector<16x32xbf16> into vector<8x32xf32> // CHECK: vector.transfer_write -func.func @float_mixed_precision_generic_as_contract(%A: memref<8x16xbf16>, %B: memref<16x32xbf16>, +func.func @float_mixed_precision_matmul_as_generic(%A: memref<8x16xbf16>, %B: memref<16x32xbf16>, %C: memref<8x32xf32>) { linalg.generic { indexing_maps = [ @@ -1861,19 +1855,19 @@ module attributes {transform.with_named_sequence} { transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) { %0 = transform.structured.match ops{["linalg.generic"]} in %arg1 : (!transform.any_op) -> !transform.any_op %1 = transform.get_parent_op %0 {isolated_from_above} : (!transform.any_op) -> !transform.any_op - %2 = transform.structured.vectorize_children_and_apply_patterns %1 { fold_type_extensions_into_contract, disable_transfer_permutation_map_lowering_patterns } : (!transform.any_op) -> !transform.any_op + %2 = transform.structured.vectorize_children_and_apply_patterns %1 { fold_type_extensions_into_contract } : (!transform.any_op) -> !transform.any_op transform.yield } } // ----- -// CHECK-LABEL: func @integer_mixed_precision_generic_as_contract +// CHECK-LABEL: func @integer_mixed_precision_matmul_as_generic // CHECK-COUNT-3: vector.transfer_read // CHECK-NOT: arith.extsi -// CHECK: vector.contract {{.*}} : vector<8x32x16xi8>, vector<8x32x16xi8> into vector<8x32xi32> +// CHECK: vector.contract {{.*}} : vector<8x16xi8>, vector<16x32xi8> into vector<8x32xi32> // CHECK: vector.transfer_write -func.func @integer_mixed_precision_generic_as_contract(%A: memref<8x16xi8>, %B: memref<16x32xi8>, +func.func @integer_mixed_precision_matmul_as_generic(%A: memref<8x16xi8>, %B: memref<16x32xi8>, %C: memref<8x32xi32>) { linalg.generic { indexing_maps = [ @@ -1899,7 +1893,7 @@ module attributes {transform.with_named_sequence} { transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) { %0 = transform.structured.match ops{["linalg.generic"]} in %arg1 : (!transform.any_op) -> !transform.any_op %1 = transform.get_parent_op %0 {isolated_from_above} : (!transform.any_op) -> !transform.any_op - %2 = transform.structured.vectorize_children_and_apply_patterns %1 { fold_type_extensions_into_contract, disable_transfer_permutation_map_lowering_patterns } : (!transform.any_op) -> !transform.any_op + %2 = transform.structured.vectorize_children_and_apply_patterns %1 { fold_type_extensions_into_contract } : (!transform.any_op) -> !transform.any_op transform.yield } }