Address HanHan review: Disable pack/unpack canonicalization for memref versions

ita9naiwa · ita9naiwa · commit 9bd51b5d03df · 2025-07-15T12:02:05.000+09:00
- Add hasPureTensorSemantics() check in PackOp::canonicalize()
- Add hasPureTensorSemantics() check in UnPackOp::canonicalize()
- Remove memref folding tests from canonicalize.mlir
- Fix memref pack/unpack syntax in roundtrip.mlir (remove result type)
- Apply clang-format to modified code

This prevents complex canonicalization patterns from running on memref
versions of pack/unpack operations, following buffer semantics and
avoiding control flow complexity issues.
diff --git a/mlir/include/mlir/Dialect/Linalg/IR/LinalgRelayoutOps.td b/mlir/include/mlir/Dialect/Linalg/IR/LinalgRelayoutOps.td
@@ -93,21 +93,17 @@ def Linalg_PackOp : Linalg_RelayoutOp<"pack", [
     tensor of rank `n + k` with a tiled and packed layout (maybe with padding)
     and optionally transposes the tiled source tensor dimensions.
 
+    `inner_dims_pos` (mandatory) specifies `k` source tensor dimensions that are
+    being tiled, where `0 < k <= n`. The order of the dimensions matters:
+     - The tiled dimensions (of size `inner_tiles`) are added to the end of the result
+    tensor in the order in which they appear in `inner_dims_pos`.
+     - `inner_dims_pos[i]` specifies the source tensor dimension tiled by
+    `inner_tiles[i]`.
+
     `inner_tiles` (mandatory) specifies `k` tile sizes. These tile sizes
     correspond to the least significant ("inner") result tensor dimension sizes,
     in the same order. Tile sizes can be static or dynamic.
 
-    `inner_dims_pos` (mandatory) specifies `k` source tensor dimensions that are
-    being tiled, where `0 <= k <= n`.
-     - `inner_dims_pos[i]` specifies the source tensor dimension tiled by
-    `inner_tiles[i]` where `0 <= i < k`. All the values in `inner_dims_pos` are
-    within [0, n).
-     - The tiled dimensions (of size `inner_tiles`) are added to the end of the
-     result tensor in the order in which they appear, i.e.
-     `shape(result)[rank(result) + i] = inner_tiles[i]` for `0 <= i < k`.
-     - The following relationship for the tiled dimensions holds:
-     `shape(result)[inner_dims_pos[i]] = shape(source)[inner_dims_pos[i]] / inner_tiles[i]`.
-
     Example: If `inner_tiles = [16, 32]`, the result tensor has a shape of
     `...x16x32`. If `inner_dims_pos = [0, 1]`, the 0th source dimension is tiled
     by 16 and the 1st source dimension is tiled by 32. Other source dimensions
@@ -120,19 +116,7 @@ def Linalg_PackOp : Linalg_RelayoutOp<"pack", [
     %0 = linalg.pack %source inner_dims_pos = [0, 1] inner_tiles = [8, 32]
         into %dest : tensor<128x256xf32> -> tensor<16x8 x 8x32 xf32>
     //                                             \  /   \  /
-    //                                 Outer Dims: 16x8   Inner Dims: 8x32
-
-    // CHW to CHWhw
-    %0 = linalg.pack %source inner_dims_pos = [2, 1] inner_tiles = [4, 2]
-        into %dest : tensor<3x20x24xf32> -> tensor<3x10x6 x 4x2 xf32>
-    //                                              \  /    \ /
-    //                                 Outer Dims: 3x10x6  Inner Dims: 4x2
-
-    // HCW to HCWhw
-    %0 = linalg.pack %source inner_dims_pos = [2, 0] inner_tiles = [4, 2]
-        into %dest : tensor<18x3x32xf32> -> tensor<9x3x8 x 4x2 xf32>
-    //                                              \  /   \ /
-    //                                 Outer Dims: 9x3x8  Inner Dims: 4x2
+    //                                       outer dims  inner dims
     ```
 
     `outer_dims_perm` (optional) specifies a permutation for the outer
@@ -274,6 +258,13 @@ def Linalg_UnPackOp : Linalg_RelayoutOp<"unpack"> {
     The "unpack" operation converts a source tensor of rank `n` with a tiled and
     packed layout to a result tensor of rank `n - k`.
 
+    `inner_dims_pos` (mandatory) specifies `k` source tensor dimensions with
+    which the last `k` source tensor dimensions are combined, where
+    `0 < k <= n/2`. Each `inner_dims_pos` element must be `>= 0` and `< n - k`.
+    The order of the dimensions in `inner_dims_pos` matters: dimension
+    `inner_dims_pos[i]` is combined with dimension `n - k + i` (assuming that
+    `outer_dims_perm` is not specified).
+
     `inner_tiles` (mandatory) specifies `k` tile sizes. These tile sizes
     correspond to the least significant ("inner") source tensor dimension sizes.
     The behavior of this op is undefined if:
@@ -283,50 +274,21 @@ def Linalg_UnPackOp : Linalg_RelayoutOp<"unpack"> {
       `inner_dims_pos[i]` (assuming that `outer_dims_perm` is not specified)
       evenly.
 
-    `inner_dims_pos` (mandatory) specifies `k` result tensor (i.e. unpacked
-    tensor) dimensions that were tiled with the `inner_tiles` to create the
-    packed source tensor. The source tensor (i.e. packed tensor) dimensions can
-    be unpacked given `inner_dims_pos` as follows.
-    - For `0 <= i < k` the following relationship holds:
-    `shape(result)[inner_dims_pos[i]] <= shape(source)[n-k+i] * shape(source)[inner_dims_pos[i]]`.
-    - For `0 <= j < n-k` and `j` not in `inner_dims_pos` the following relationship holds:
-    `shape(result)[j] = shape(source)[j]`.
-
     `outer_dims_perm` (optional) specifies a permutation for the outer
     dimensions. If specified, it must have `n - k` elements. If specified, this
     permutation is applied before combining any dimensions.
 
-    Note, the unpack operation may drop any padding introduced by the pack
-    operation and hence the following holds
-    `NumElementsOf(source) >= NumElementsOf(result)`.
-
-    Examples:
+    Example:
 
     ```mlir
     // NCnc to NC:
     %0 = linalg.unpack %source inner_dims_pos = [0, 1] inner_tiles = [8, 32]
-        into %dest : tensor<16x8 x 8x32 xf32> -> tensor<128x256xf32>
-    //                      \  /   \  /
-    //          Outer Dims: 16x8  Inner Dims: 8x32
+        into %dest : tensor<16x8x8x32xf32> -> tensor<128x256xf32>
 
     // CK to KCck:
     %0 = linalg.unpack %source outer_dims_perm = [1, 0] inner_dims_pos = [0, 1]
-        inner_tiles = [8, 32]
-        into %dest : tensor<8x16 x 8x32 xf32> -> tensor<128x256xf32>
-    //                      \  /   \  /
-    //          Outer Dims: 8x16  Inner Dims: 8x32
-
-    // CHW to CHWhw:
-    %0 = linalg.unpack %source inner_dims_pos = [2, 1] inner_tiles = [4, 2]
-        into %dest : tensor<3x10x6 x 4x2 xf32> -> tensor<3x20x24xf32>
-    //                       \  /    \ /
-    //          Outer Dims: 3x10x6  Inner Dims: 4x2
-
-    // HCW to HCWhw
-    %0 = linalg.unpack %source inner_dims_pos = [2, 0] inner_tiles = [4, 2]
-        into %dest : tensor<9x3x8 x 4x2 xf32> -> tensor<18x3x32xf32>
-    //                       \  /   \ /
-    //          Outer Dims: 9x3x8   Inner Dims: 4x2
+        inner_tiles = [8, 32] into %dest
+        : tensor<8x16x8x32xf32> -> tensor<128x256xf32>
     ```
   }];
   let arguments = (ins AnyShaped:$source,
diff --git a/mlir/lib/Dialect/Linalg/IR/LinalgOps.cpp b/mlir/lib/Dialect/Linalg/IR/LinalgOps.cpp
@@ -4778,7 +4778,8 @@ commonPermutationOfPackAndUnPackOp(OpTy packOrUnPackOp,
 //===----------------------------------------------------------------------===//
 
 void PackOp::getAsmResultNames(function_ref<void(Value, StringRef)> setNameFn) {
-  setNameFn(getResult(), "pack");
+  if (hasPureTensorSemantics() && !getResult().empty())
+    setNameFn(*getResult().begin(), "pack");
 }
 
 void PackOp::build(OpBuilder &builder, OperationState &state, Value source,
@@ -5228,14 +5229,17 @@ LogicalResult PackOp::canonicalize(PackOp packOp, PatternRewriter &rewriter) {
     rewriter.modifyOpInPlace(packOp, [&] {
       packOp.getSourceMutable().assign(source);
       packOp.getDestMutable().assign(dest);
-      packOp.getResult().setType(cast<RankedTensorType>(dest.getType()));
+      if (packOp.hasPureTensorSemantics() && !packOp.getResult().empty())
+        (*packOp.getResult().begin())
+            .setType(cast<RankedTensorType>(dest.getType()));
     });
     // Insert a cast if needed
-    if (needUpdateDestType) {
+    if (needUpdateDestType && packOp.hasPureTensorSemantics()) {
       rewriter.setInsertionPointAfter(packOp);
-      auto castOp =
-          rewriter.create<tensor::CastOp>(loc, originalResultType, packOp);
-      rewriter.replaceAllUsesExcept(packOp, castOp, castOp);
+      auto castOp = rewriter.create<tensor::CastOp>(
+          loc, originalResultType, *packOp.getResult().begin());
+      rewriter.replaceAllUsesExcept(*packOp.getResult().begin(), castOp,
+                                    castOp);
     }
 
     return success();
@@ -5282,18 +5286,21 @@ bool PackOp::isLikePad() {
   return isLikePadUnPad(*this, packedTensorType);
 }
 
-OpFoldResult PackOp::fold(FoldAdaptor adaptor) {
+LogicalResult PackOp::fold(FoldAdaptor adaptor,
+                           SmallVectorImpl<OpFoldResult> &results) {
   if (!hasPureTensorSemantics())
-    return {};
+    return failure();
 
   std::optional<Attribute> paddingValue;
   if (auto pad = adaptor.getPaddingValue())
     paddingValue = pad;
   if (OpFoldResult reshapedSource = reshapeConstantSource(
           llvm::dyn_cast_if_present<DenseElementsAttr>(adaptor.getSource()),
-          cast<TensorType>(getDestType()), paddingValue))
-    return reshapedSource;
-  return {};
+          cast<TensorType>(getDestType()), paddingValue)) {
+    results.push_back(reshapedSource);
+    return success();
+  }
+  return failure();
 }
 
 /// Folds a tensor.cast op into a consuming PackOp op if the
@@ -5340,8 +5347,8 @@ struct FoldTensorCastPackOp : public OpRewritePattern<PackOp> {
     newOp->setDiscardableAttrs(op->getDiscardableAttrDictionary());
 
     // Replace op.
-    Value oldResult = op.getResult();
-    Value newResult = newOp.getResult();
+    Value oldResult = *op.getResult().begin();
+    Value newResult = *newOp.getResult().begin();
     Value replacement = (newResult.getType() != oldResult.getType())
                             ? rewriter.create<tensor::CastOp>(
                                   op->getLoc(), oldResult.getType(), newResult)
@@ -5359,7 +5366,8 @@ struct FoldTensorCastPackOp : public OpRewritePattern<PackOp> {
 
 void UnPackOp::getAsmResultNames(
     function_ref<void(Value, StringRef)> setNameFn) {
-  setNameFn(getResult(), "unpack");
+  if (hasPureTensorSemantics() && !getResult().empty())
+    setNameFn(*getResult().begin(), "unpack");
 }
 
 LogicalResult
@@ -5550,7 +5558,8 @@ LogicalResult UnPackOp::canonicalize(UnPackOp unPackOp,
           extractSliceUser.getMixedStrides());
       rewriter.modifyOpInPlace(unPackOp, [&]() {
         unPackOp.setDpsInitOperand(0, newDest);
-        unPackOp.getResult().setType(newDest.getType());
+        if (unPackOp.hasPureTensorSemantics() && !unPackOp.getResult().empty())
+          (*unPackOp.getResult().begin()).setType(newDest.getType());
       });
       rewriter.replaceOp(extractSliceUser, unPackOp);
       return success();
@@ -5573,11 +5582,16 @@ LogicalResult UnPackOp::canonicalize(UnPackOp unPackOp,
       dest =
           rewriter.create<tensor::CastOp>(loc, newDestType, unPackOp.getDest());
     }
-    Value newOp = rewriter.create<UnPackOp>(
+    UnPackOp newOp = rewriter.create<UnPackOp>(
         loc, source, dest, unPackOp.getInnerDimsPos(), unPackOp.getMixedTiles(),
         unPackOp.getOuterDimsPerm());
-    rewriter.replaceOpWithNewOp<tensor::CastOp>(
-        unPackOp, unPackOp.getResult().getType(), newOp);
+    if (unPackOp.hasPureTensorSemantics() && !unPackOp.getResult().empty()) {
+      rewriter.replaceOpWithNewOp<tensor::CastOp>(
+          unPackOp, (*unPackOp.getResult().begin()).getType(),
+          *newOp.getResult().begin());
+    } else {
+      rewriter.replaceOp(unPackOp, newOp);
+    }
     return success();
   }
 
@@ -5589,14 +5603,17 @@ bool UnPackOp::isLikeUnPad() {
   return isLikePadUnPad(*this, packedTensorType);
 }
 
-OpFoldResult UnPackOp::fold(FoldAdaptor adaptor) {
+LogicalResult UnPackOp::fold(FoldAdaptor adaptor,
+                             SmallVectorImpl<OpFoldResult> &results) {
   if (!hasPureTensorSemantics())
-    return {};
+    return failure();
   if (OpFoldResult reshapedSource = reshapeConstantSource(
           llvm::dyn_cast_if_present<DenseElementsAttr>(adaptor.getSource()),
-          cast<TensorType>(getResult().getType())))
-    return reshapedSource;
-  return {};
+          cast<TensorType>((*getResult().begin()).getType()))) {
+    results.push_back(reshapedSource);
+    return success();
+  }
+  return failure();
 }
 
 /// Folds a tensor.cast op into a consuming UnPackOp op if the
@@ -5644,8 +5661,8 @@ struct FoldTensorCastUnPackOp : public OpRewritePattern<UnPackOp> {
     newOp->setDiscardableAttrs(op->getDiscardableAttrDictionary());
 
     // Replace op.
-    Value oldResult = op.getResult();
-    Value newResult = newOp.getResult();
+    Value oldResult = *op.getResult().begin();
+    Value newResult = *newOp.getResult().begin();
     Value replacement = (newResult.getType() != oldResult.getType())
                             ? rewriter.create<tensor::CastOp>(
                                   op->getLoc(), oldResult.getType(), newResult)
diff --git a/mlir/test/Dialect/Linalg/canonicalize.mlir b/mlir/test/Dialect/Linalg/canonicalize.mlir
@@ -512,7 +512,7 @@ func.func @fold_self_copy(%0 : memref<4x16xf32>) {
 // -----
 
 // CHECK-LABEL: func @no_fold_fill_like_memref
-//  CHECK-NEXT:   linalg.generic 
+//  CHECK-NEXT:   linalg.generic
 func.func @no_fold_fill_like_memref(%in_out : memref<4x16xf32>, %fill_val : f32) {
   linalg.generic {indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>,
                                    affine_map<(d0, d1) -> (d0, d1)>],
@@ -528,7 +528,7 @@ func.func @no_fold_fill_like_memref(%in_out : memref<4x16xf32>, %fill_val : f32)
 // -----
 
 // CHECK-LABEL: func @no_fold_fill_like_tensor
-//  CHECK-NEXT:   linalg.generic 
+//  CHECK-NEXT:   linalg.generic
 func.func @no_fold_fill_like_tensor(%in_out : tensor<4x16xf32>, %fill_val : f32) -> tensor<4x16xf32> {
   %result = linalg.generic {indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>,
                                    affine_map<(d0, d1) -> (d0, d1)>],
diff --git a/mlir/test/Dialect/Linalg/roundtrip.mlir b/mlir/test/Dialect/Linalg/roundtrip.mlir
@@ -717,7 +717,7 @@ func.func @pack_memref(%source: memref<128x256xf32>, %dest: memref<8x16x8x32xf32
 
 // CHECK-label: func @pack_memref(
 // CHECK:   %[[source:[a-zA-z0-9]*]]: memref<128x256xf32>, %[[dest:[a-zA-z0-9]*]]: memref<8x16x8x32xf32>) {
-// CHECK:     %pack = linalg.pack %arg0 outer_dims_perm = [1, 0] inner_dims_pos = [0, 1] inner_tiles = [8, 32] into %arg1 : memref<128x256xf32> -> memref<8x16x8x32xf32>
+// CHECK:     linalg.pack %arg0 outer_dims_perm = [1, 0] inner_dims_pos = [0, 1] inner_tiles = [8, 32] into %arg1 : memref<128x256xf32>
 // CHECK:   return
 // CHECK: }
 // -----
@@ -730,5 +730,5 @@ func.func @unpack_memref(%source: memref<16x8x8x32xf32>, %dest: memref<128x256xf
 
 // CHECK-label: func @unpack_memref(
 // CHECK:   %[[source:[a-zA-z0-9]*]]: memref<16x8x8x32xf32>, %[[dest:[a-zA-z0-9]*]]: memref<128x256xf32>) {
-// CHECK:         %unpack = linalg.unpack %arg0 inner_dims_pos = [0, 1] inner_tiles = [8, 32] into %arg1 : memref<16x8x8x32xf32> -> memref<128x256xf32>
+// CHECK:         linalg.unpack %arg0 inner_dims_pos = [0, 1] inner_tiles = [8, 32] into %arg1 : memref<16x8x8x32xf32>
 // CHECK:   return