Skip to content

Commit addc07a

Browse files
committed
Still ... merge it with FoldMemRefAliasOps pass.
1 parent 97ec2bf commit addc07a

File tree

3 files changed

+15
-27
lines changed

3 files changed

+15
-27
lines changed

mlir/include/mlir/Dialect/AMDGPU/Transforms/Passes.td

Lines changed: 0 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -70,16 +70,4 @@ def AmdgpuMaskedloadToLoadPass : Pass<"amdgpu-maskedload-to-load"> {
7070
"memref::MemRefDialect"
7171
];
7272
}
73-
74-
def AmdgpuFoldSubviewOpsPass : Pass<"amdgpu-fold-subview-ops"> {
75-
let summary = "Fold subview operations into their parent operations";
76-
let description = [{
77-
This pass identifies `memref.subview` sources of `GatherToLDSOp` and
78-
attempts to fold the source ops, potentially simplifying the overall
79-
operation and improving performance.
80-
}];
81-
let dependentDialects = [
82-
"memref::MemRefDialect"
83-
];
84-
}
8573
#endif // MLIR_DIALECT_AMDGPU_TRANSFORMS_PASSES_TD_

mlir/lib/Dialect/MemRef/Transforms/FoldMemRefAliasOps.cpp

Lines changed: 14 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
//
1212
//===----------------------------------------------------------------------===//
1313

14+
#include "mlir/Dialect/AMDGPU/IR/AMDGPUDialect.h"
1415
#include "mlir/Dialect/Affine/IR/AffineOps.h"
1516
#include "mlir/Dialect/Affine/ViewLikeInterfaceUtils.h"
1617
#include "mlir/Dialect/Arith/Utils/Utils.h"
@@ -20,7 +21,6 @@
2021
#include "mlir/Dialect/MemRef/Transforms/Transforms.h"
2122
#include "mlir/Dialect/MemRef/Utils/MemRefUtils.h"
2223
#include "mlir/Dialect/NVGPU/IR/NVGPUDialect.h"
23-
#include "mlir/Dialect/AMDGPU/IR/AMDGPUDialect.h"
2424
#include "mlir/Dialect/Vector/IR/VectorOps.h"
2525
#include "mlir/IR/AffineMap.h"
2626
#include "mlir/Transforms/GreedyPatternRewriteDriver.h"
@@ -733,11 +733,11 @@ LogicalResult NVGPUAsyncCopyOpSubViewOpFolder::matchAndRewrite(
733733
return success();
734734
}
735735

736-
struct FoldSubviewIntoGatherToLDSOp
737-
: public OpRewritePattern<amdgpu::GatherToLDSOp> {
738-
using OpRewritePattern<amdgpu::GatherToLDSOp>::OpRewritePattern;
739-
LogicalResult
740-
matchAndRewrite(amdgpu::GatherToLDSOp op, PatternRewriter &rewriter) const override {
736+
struct FoldSubviewIntoAMDGPUGatherToLDSOp final
737+
: OpRewritePattern<amdgpu::GatherToLDSOp> {
738+
using OpRewritePattern::OpRewritePattern;
739+
LogicalResult matchAndRewrite(amdgpu::GatherToLDSOp op,
740+
PatternRewriter &rewriter) const override {
741741
Location loc = op.getLoc();
742742

743743
// Check if the source is a subview operation:
@@ -747,15 +747,15 @@ struct FoldSubviewIntoGatherToLDSOp
747747
loc, "GatherToLDSOp can only be folded if the source is a SubviewOp");
748748

749749
SmallVector<Value> sourceIndices;
750-
mlir::affine::resolveIndicesIntoOpWithOffsetsAndStrides(
751-
rewriter, loc, subviewOp.getMixedOffsets(), subviewOp.getMixedStrides(),
752-
subviewOp.getDroppedDims(), op.getSrcIndices(), sourceIndices);
750+
mlir::affine::resolveIndicesIntoOpWithOffsetsAndStrides(
751+
rewriter, loc, subviewOp.getMixedOffsets(), subviewOp.getMixedStrides(),
752+
subviewOp.getDroppedDims(), op.getSrcIndices(), sourceIndices);
753753

754-
rewriter.replaceOpWithNewOp<admgpu::GatherToLDSOp>(
755-
op, subviewOp.getSource(), sourceIndices, op.getDst(), op.getDstIndices(),
756-
op.getTransferType());
754+
rewriter.replaceOpWithNewOp<amdgpu::GatherToLDSOp>(
755+
op, subviewOp.getSource(), sourceIndices, op.getDst(),
756+
op.getDstIndices(), op.getTransferType());
757757

758-
return success();
758+
return success();
759759
}
760760
};
761761

@@ -790,7 +790,7 @@ void memref::populateFoldMemRefAliasOpPatterns(RewritePatternSet &patterns) {
790790
StoreOpOfCollapseShapeOpFolder<vector::StoreOp>,
791791
StoreOpOfCollapseShapeOpFolder<vector::MaskedStoreOp>,
792792
SubViewOfSubViewFolder, NVGPUAsyncCopyOpSubViewOpFolder,
793-
FoldSubviewIntoGatherToLDSOp>(patterns.getContext());
793+
FoldSubviewIntoAMDGPUGatherToLDSOp>(patterns.getContext());
794794
}
795795

796796
//===----------------------------------------------------------------------===//

mlir/test/Dialect/AMDGPU/amdgpu-fold-subviews.mlir

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
// RUN: mlir-opt -amdgpu-fold-subview-ops -split-input-file %s | FileCheck %s
1+
// RUN: mlir-opt --fold-memref-alias-ops --split-input-file %s | FileCheck %s
22

33
#gpu_lds_addrspace = 3
44

0 commit comments

Comments
 (0)