Update folding patterns.

hanhanW · hanhanW · commit cf2adb655a8b · 2025-07-21T14:59:32.000-07:00
Signed-off-by: hanhanW &lt;hanhan0912@gmail.com&gt;
diff --git a/mlir/lib/Dialect/Linalg/Transforms/PackAndUnpackPatterns.cpp b/mlir/lib/Dialect/Linalg/Transforms/PackAndUnpackPatterns.cpp
@@ -195,6 +195,28 @@ struct SimplifyUnPackToCollapseShape : public OpRewritePattern<UnPackOp> {
   }
 };
 
+/// Returns the outer shape in the packed domain before applying the
+/// transposition.
+template <typename OpTy>
+static SmallVector<int64_t>
+getPackedOuterShapeWithoutTransposition(OpTy packOrUnPack) {
+  static_assert(llvm::is_one_of<OpTy, PackOp, UnPackOp>::value,
+                "applies to only pack or unpack operations");
+  RankedTensorType packedType = (std::is_same<OpTy, PackOp>::value)
+                                      ? packOrUnPack.getDestType()
+                                      : packOrUnPack.getSourceType();
+  RankedTensorType unpackedType = (std::is_same<OpTy, PackOp>::value)
+                                      ? packOrUnPack.getSourceType()
+                                      : packOrUnPack.getDestType();
+  SmallVector<int64_t> result(
+      packedType.getShape().take_front(unpackedType.getRank()));
+  if (!packOrUnPack.getOuterDimsPerm().empty()) {
+    applyPermutationToVector(
+        result, invertPermutationVector(packOrUnPack.getOuterDimsPerm()));
+  }
+  return result;
+}
+
 /// Fold a `pad` -> `pack` into `pack` if they have the same padding values and
 /// the pad op has zero low paddings, or if `pack` has no padding values.
 struct FoldPadWithPackOp : public OpRewritePattern<PackOp> {
@@ -221,19 +243,14 @@ struct FoldPadWithPackOp : public OpRewritePattern<PackOp> {
       if (!isEqualConstantIntOrValue(paddingValue, constantPaddingValue))
         return failure();
 
-    RankedTensorType srcType = packOp.getSourceType();
-    RankedTensorType destType = packOp.getDestType();
-    SmallVector<int64_t> outerShapeWithoutTranspose(
-        destType.getShape().take_front(srcType.getRank()));
-    if (!packOp.getOuterDimsPerm().empty()) {
-      applyPermutationToVector(
-          outerShapeWithoutTranspose,
-          invertPermutationVector(packOp.getOuterDimsPerm()));
-    }
+    // Folding is not allowed if it introduces artificial padding.
+    RankedTensorType unpackedType = packOp.getSourceType();
+    SmallVector<int64_t> outerShapeWithoutTranspose =
+        getPackedOuterShapeWithoutTransposition(packOp);
     for (auto [pos, tileSize, high] :
          llvm::zip_equal(packOp.getInnerDimsPos(), packOp.getStaticInnerTiles(),
                          padOp.getMixedHighPad())) {
-      if (srcType.isDynamicDim(pos))
+      if (unpackedType.isDynamicDim(pos))
         return failure();
       if (ShapedType::isDynamic(outerShapeWithoutTranspose[pos]))
         return failure();
@@ -242,9 +259,9 @@ struct FoldPadWithPackOp : public OpRewritePattern<PackOp> {
       std::optional<int64_t> cstHigh = getConstantIntValue(high);
       if (!cstHigh)
         return failure();
-      int64_t paddingSize =
-          outerShapeWithoutTranspose[pos] * tileSize - srcType.getDimSize(pos);
-      // Do not fold the ops if it requires extra padding sizes.
+      int64_t paddingSize = outerShapeWithoutTranspose[pos] * tileSize -
+                            unpackedType.getDimSize(pos);
+      // Do not fold the op if it requires artificial padding.
       if (paddingSize + cstHigh.value() >= tileSize)
         return failure();
     }
@@ -292,6 +309,24 @@ struct FoldUnpackWithExtractSliceOp
           sliceOp, "expects offsets to be 0s and strides to be 1s");
     }
 
+    // Folding is not allowed if any tile is dropped.
+    RankedTensorType unpackedType = sliceOp.getResultType();
+    SmallVector<int64_t> outerShapeWithoutTranspose =
+        getPackedOuterShapeWithoutTransposition(unpackOp);
+    for (auto [pos, tileSize] : llvm::zip_equal(
+             unpackOp.getInnerDimsPos(), unpackOp.getStaticInnerTiles())) {
+      if (unpackedType.isDynamicDim(pos))
+        return failure();
+      if (ShapedType::isDynamic(outerShapeWithoutTranspose[pos]))
+        return failure();
+      if (ShapedType::isDynamic(tileSize))
+        return failure();
+      int64_t paddingSize = outerShapeWithoutTranspose[pos] * tileSize -
+                            unpackedType.getDimSize(pos);
+      if (paddingSize >= tileSize)
+        return failure();
+    }
+
     // Create a new empty output tensor.
     Type elementType = unpackOp.getDestType().getElementType();
     Value output = rewriter.create<tensor::EmptyOp>(
diff --git a/mlir/test/Dialect/Tensor/fold-into-pack-and-unpack.mlir b/mlir/test/Dialect/Tensor/fold-into-pack-and-unpack.mlir
@@ -1,22 +1,32 @@
 // RUN: mlir-opt -split-input-file -test-linalg-transform-patterns=test-fold-into-pack-and-unpack  %s | FileCheck %s
 // RUN: mlir-opt -split-input-file -test-linalg-transform-patterns=test-fold-into-pack-and-unpack-control  %s | FileCheck %s --check-prefix=CONTROL
 
-func.func @fold_unpack_slice(%arg0 : tensor<?x?x8x4xf32>, %arg1 : tensor<?x?xf32>,
+func.func @fold_unpack_slice(%arg0 : tensor<2082x1x8x32xf32>) -> tensor<16649x16xf32> {
+  %empty = tensor.empty() : tensor<16656x16xf32>
+  %0 = linalg.unpack %arg0 inner_dims_pos = [0, 1] inner_tiles = [8, 32] into %empty
+      : tensor<2082x1x8x32xf32> -> tensor<16656x16xf32>
+  %1 = tensor.extract_slice %0[0, 0] [16649, 16] [1, 1] : tensor<16656x16xf32> to tensor<16649x16xf32>
+  return %1 : tensor<16649x16xf32>
+}
+// CHECK-LABEL: func @fold_unpack_slice(
+//  CHECK-SAME:   %[[ARG0:[a-zA-Z0-9]+]]
+//      CHECK:    %[[INIT:.+]] = tensor.empty() : tensor<16649x16xf32>
+//      CHECK:    %[[UNPACK:.+]] = linalg.unpack %[[ARG0]] inner_dims_pos = [0, 1] inner_tiles = [8, 32]
+// CHECK-SAME:        into %[[INIT]]
+//      CHECK:    return %[[UNPACK]]
+
+// -----
+
+func.func @nofold_dynamic_unpack_slice(%arg0 : tensor<?x?x8x4xf32>, %arg1 : tensor<?x?xf32>,
     %arg2 : index, %arg3 : index) -> tensor<?x?xf32> {
   %0 = linalg.unpack %arg0 inner_dims_pos = [0, 1] inner_tiles = [8, 4] into %arg1
       : tensor<?x?x8x4xf32> -> tensor<?x?xf32>
   %1 = tensor.extract_slice %0[0, 0] [%arg2, %arg3] [1, 1] : tensor<?x?xf32> to tensor<?x?xf32>
   return %1 : tensor<?x?xf32>
 }
-//      CHECK: func @fold_unpack_slice(
-// CHECK-SAME:     %[[ARG0:.+]]: tensor<?x?x8x4xf32>
-// CHECK-SAME:     %[[ARG1:[a-zA-Z0-9]+]]: tensor<?x?xf32>
-// CHECK-SAME:     %[[ARG2:[a-zA-Z0-9]+]]: index
-// CHECK-SAME:     %[[ARG3:[a-zA-Z0-9]+]]: index
-//      CHECK:   %[[INIT:.+]] = tensor.empty(%[[ARG2]], %[[ARG3]]) : tensor<?x?xf32>
-//      CHECK:   %[[UNPACK:.+]] = linalg.unpack %[[ARG0]] inner_dims_pos = [0, 1] inner_tiles = [8, 4]
-// CHECK-SAME:       into %[[INIT]]
-//      CHECK:   return %[[UNPACK]]
+// CHECK-LABEL: func @nofold_dynamic_unpack_slice(
+//       CHECK:   linalg.unpack
+//       CHECK:   tensor.extract_slice
 
 // -----