-
Notifications
You must be signed in to change notification settings - Fork 14.8k
Revert "[AMDGPU] fold memref.subview/expand_shape/collapse_shape
in…
#150256
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
…to `amdgpu.gather_to_lds` (llvm#149851)" This reverts commit dbc63f1. Having build deps issue.
@llvm/pr-subscribers-mlir @llvm/pr-subscribers-mlir-amdgpu Author: Alan Li (lialan) Changes…to This reverts commit dbc63f1. Having build deps issue. Patch is 21.44 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/150256.diff 8 Files Affected:
diff --git a/mlir/include/mlir/Dialect/AMDGPU/Transforms/Passes.h b/mlir/include/mlir/Dialect/AMDGPU/Transforms/Passes.h
index 58b9c74b2f8e0..cc2f543e79f69 100644
--- a/mlir/include/mlir/Dialect/AMDGPU/Transforms/Passes.h
+++ b/mlir/include/mlir/Dialect/AMDGPU/Transforms/Passes.h
@@ -22,9 +22,8 @@ class ConversionTarget;
namespace amdgpu {
#define GEN_PASS_DECL_AMDGPUEMULATEATOMICSPASS
-#define GEN_PASS_DECL_AMDGPUFOLDMEMREFOPSPASS
-#define GEN_PASS_DECL_AMDGPUMASKEDLOADTOLOADPASS
#define GEN_PASS_DECL_AMDGPURESOLVESTRIDEDMETADATAPASS
+#define GEN_PASS_DECL_AMDGPUMASKEDLOADTOLOADPASS
#define GEN_PASS_REGISTRATION
#include "mlir/Dialect/AMDGPU/Transforms/Passes.h.inc"
@@ -39,9 +38,6 @@ void populateAmdgpuResolveStridedMetadataPatterns(RewritePatternSet &patterns,
void populateAmdgpuMaskedloadToLoadPatterns(RewritePatternSet &patterns,
PatternBenefit benefit = 1);
-void populateAmdgpuFoldMemRefOpsPatterns(RewritePatternSet &patterns,
- PatternBenefit benefit = 1);
-
} // namespace amdgpu
} // namespace mlir
diff --git a/mlir/include/mlir/Dialect/AMDGPU/Transforms/Passes.td b/mlir/include/mlir/Dialect/AMDGPU/Transforms/Passes.td
index 8664f971cabde..8d0e6829ab0cc 100644
--- a/mlir/include/mlir/Dialect/AMDGPU/Transforms/Passes.td
+++ b/mlir/include/mlir/Dialect/AMDGPU/Transforms/Passes.td
@@ -70,16 +70,4 @@ def AmdgpuMaskedloadToLoadPass : Pass<"amdgpu-maskedload-to-load"> {
"memref::MemRefDialect"
];
}
-
-def AmdgpuFoldMemRefOpsPass : Pass<"amdgpu-fold-memrefs-ops"> {
- let summary = "Fold memref operations into their parent operations";
- let description = [{
- This pass identifies memref operations (subview, expand_shape, collapse_shape)
- that are sources of `GatherToLDSOp` and attempts to fold the source ops,
- potentially simplifying the overall operation and improving performance.
- }];
- let dependentDialects = [
- "memref::MemRefDialect"
- ];
-}
#endif // MLIR_DIALECT_AMDGPU_TRANSFORMS_PASSES_TD_
diff --git a/mlir/include/mlir/Dialect/MemRef/Utils/MemRefUtils.h b/mlir/include/mlir/Dialect/MemRef/Utils/MemRefUtils.h
index dd3b3dea6ef26..34ad279a07a8b 100644
--- a/mlir/include/mlir/Dialect/MemRef/Utils/MemRefUtils.h
+++ b/mlir/include/mlir/Dialect/MemRef/Utils/MemRefUtils.h
@@ -116,43 +116,6 @@ inline bool isSameViewOrTrivialAlias(MemrefValue a, MemrefValue b) {
/// the source memref (i.e. implements ViewLikeOpInterface).
MemrefValue skipViewLikeOps(MemrefValue source);
-/// Given the 'indices' of a load/store operation where the memref is a result
-/// of a expand_shape op, returns the indices w.r.t to the source memref of the
-/// expand_shape op. For example
-///
-/// %0 = ... : memref<12x42xf32>
-/// %1 = memref.expand_shape %0 [[0, 1], [2]]
-/// : memref<12x42xf32> into memref<2x6x42xf32>
-/// %2 = load %1[%i1, %i2, %i3] : memref<2x6x42xf32
-///
-/// could be folded into
-///
-/// %2 = load %0[6 * i1 + i2, %i3] :
-/// memref<12x42xf32>
-LogicalResult resolveSourceIndicesExpandShape(
- Location loc, PatternRewriter &rewriter,
- memref::ExpandShapeOp expandShapeOp, ValueRange indices,
- SmallVectorImpl<Value> &sourceIndices, bool startsInbounds);
-
-/// Given the 'indices' of a load/store operation where the memref is a result
-/// of a collapse_shape op, returns the indices w.r.t to the source memref of
-/// the collapse_shape op. For example
-///
-/// %0 = ... : memref<2x6x42xf32>
-/// %1 = memref.collapse_shape %0 [[0, 1], [2]]
-/// : memref<2x6x42xf32> into memref<12x42xf32>
-/// %2 = load %1[%i1, %i2] : memref<12x42xf32>
-///
-/// could be folded into
-///
-/// %2 = load %0[%i1 / 6, %i1 % 6, %i2] :
-/// memref<2x6x42xf32>
-LogicalResult
-resolveSourceIndicesCollapseShape(Location loc, PatternRewriter &rewriter,
- memref::CollapseShapeOp collapseShapeOp,
- ValueRange indices,
- SmallVectorImpl<Value> &sourceIndices);
-
} // namespace memref
} // namespace mlir
diff --git a/mlir/lib/Dialect/AMDGPU/Transforms/CMakeLists.txt b/mlir/lib/Dialect/AMDGPU/Transforms/CMakeLists.txt
index 3b0c072ed1217..17bbe54ea6c0c 100644
--- a/mlir/lib/Dialect/AMDGPU/Transforms/CMakeLists.txt
+++ b/mlir/lib/Dialect/AMDGPU/Transforms/CMakeLists.txt
@@ -1,8 +1,7 @@
add_mlir_dialect_library(MLIRAMDGPUTransforms
EmulateAtomics.cpp
- FoldMemRefsOps.cpp
- MaskedloadToLoad.cpp
ResolveStridedMetadata.cpp
+ MaskedloadToLoad.cpp
ADDITIONAL_HEADER_DIRS
{$MLIR_MAIN_INCLUDE_DIR}/mlir/Dialect/AMDGPU/Transforms
diff --git a/mlir/lib/Dialect/AMDGPU/Transforms/FoldMemRefsOps.cpp b/mlir/lib/Dialect/AMDGPU/Transforms/FoldMemRefsOps.cpp
deleted file mode 100644
index a3fdc7ee385ed..0000000000000
--- a/mlir/lib/Dialect/AMDGPU/Transforms/FoldMemRefsOps.cpp
+++ /dev/null
@@ -1,97 +0,0 @@
-//===- FoldSubviewOps.cpp - AMDGPU fold subview ops -----------------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-
-#include "mlir/Dialect/AMDGPU/Transforms/Passes.h"
-
-#include "mlir/Dialect/AMDGPU/IR/AMDGPUDialect.h"
-#include "mlir/Dialect/Affine/ViewLikeInterfaceUtils.h"
-#include "mlir/Dialect/MemRef/IR/MemRef.h"
-#include "mlir/Dialect/MemRef/Utils/MemRefUtils.h"
-#include "mlir/Transforms/WalkPatternRewriteDriver.h"
-#include "llvm/ADT/TypeSwitch.h"
-
-namespace mlir::amdgpu {
-#define GEN_PASS_DEF_AMDGPUFOLDMEMREFOPSPASS
-#include "mlir/Dialect/AMDGPU/Transforms/Passes.h.inc"
-
-struct AmdgpuFoldMemRefOpsPass final
- : amdgpu::impl::AmdgpuFoldMemRefOpsPassBase<AmdgpuFoldMemRefOpsPass> {
- void runOnOperation() override {
- RewritePatternSet patterns(&getContext());
- populateAmdgpuFoldMemRefOpsPatterns(patterns);
- walkAndApplyPatterns(getOperation(), std::move(patterns));
- }
-};
-
-struct FoldMemRefOpsIntoGatherToLDSOp final : OpRewritePattern<GatherToLDSOp> {
- using OpRewritePattern::OpRewritePattern;
- LogicalResult matchAndRewrite(GatherToLDSOp op,
- PatternRewriter &rewriter) const override {
- Location loc = op.getLoc();
-
- Value memrefSource;
- SmallVector<Value> sourceIndices;
- auto foldResult =
- llvm::TypeSwitch<Operation *, LogicalResult>(
- op.getSrc().getDefiningOp())
- .Case<memref::SubViewOp>([&](memref::SubViewOp subviewOp) {
- // If the source is a SubViewOp, we can directly rewrite the
- // GatherToLDSOp.
- mlir::affine::resolveIndicesIntoOpWithOffsetsAndStrides(
- rewriter, loc, subviewOp.getMixedOffsets(),
- subviewOp.getMixedStrides(), subviewOp.getDroppedDims(),
- op.getSrcIndices(), sourceIndices);
- memrefSource = subviewOp.getSource();
- return success();
- })
- .Case<memref::ExpandShapeOp>(
- [&](memref::ExpandShapeOp expandShapeOp) {
- if (failed(mlir::memref::resolveSourceIndicesExpandShape(
- loc, rewriter, expandShapeOp, op.getSrcIndices(),
- sourceIndices, false))) {
- return failure();
- }
- memrefSource = expandShapeOp.getViewSource();
- return success();
- })
- .Case<memref::CollapseShapeOp>(
- [&](memref::CollapseShapeOp collapseShapeOp) {
- if (failed(mlir::memref::resolveSourceIndicesCollapseShape(
- loc, rewriter, collapseShapeOp, op.getSrcIndices(),
- sourceIndices))) {
- return failure();
- }
- memrefSource = collapseShapeOp.getViewSource();
- return success();
- })
- .Default([&](Operation *op) {
- // If the source is not a SubViewOp, ExpandShapeOp, or
- // CollapseShapeOp, we cannot fold the GatherToLDSOp.
- return rewriter.notifyMatchFailure(
- op,
- "source producer is not one of SubViewOp, ExpandShapeOp, or "
- "CollapseShapeOp");
- });
-
- if (failed(foldResult)) {
- return failure();
- }
-
- rewriter.replaceOpWithNewOp<GatherToLDSOp>(op, memrefSource, sourceIndices,
- op.getDst(), op.getDstIndices(),
- op.getTransferType());
-
- return success();
- }
-};
-
-void populateAmdgpuFoldMemRefOpsPatterns(RewritePatternSet &patterns,
- PatternBenefit benefit) {
- patterns.add<FoldMemRefOpsIntoGatherToLDSOp>(patterns.getContext(), benefit);
-}
-} // namespace mlir::amdgpu
diff --git a/mlir/lib/Dialect/MemRef/Transforms/FoldMemRefAliasOps.cpp b/mlir/lib/Dialect/MemRef/Transforms/FoldMemRefAliasOps.cpp
index 24da447ad7685..89be188af9129 100644
--- a/mlir/lib/Dialect/MemRef/Transforms/FoldMemRefAliasOps.cpp
+++ b/mlir/lib/Dialect/MemRef/Transforms/FoldMemRefAliasOps.cpp
@@ -44,6 +44,97 @@ using namespace mlir;
// Utility functions
//===----------------------------------------------------------------------===//
+/// Given the 'indices' of a load/store operation where the memref is a result
+/// of a expand_shape op, returns the indices w.r.t to the source memref of the
+/// expand_shape op. For example
+///
+/// %0 = ... : memref<12x42xf32>
+/// %1 = memref.expand_shape %0 [[0, 1], [2]]
+/// : memref<12x42xf32> into memref<2x6x42xf32>
+/// %2 = load %1[%i1, %i2, %i3] : memref<2x6x42xf32
+///
+/// could be folded into
+///
+/// %2 = load %0[6 * i1 + i2, %i3] :
+/// memref<12x42xf32>
+static LogicalResult resolveSourceIndicesExpandShape(
+ Location loc, PatternRewriter &rewriter,
+ memref::ExpandShapeOp expandShapeOp, ValueRange indices,
+ SmallVectorImpl<Value> &sourceIndices, bool startsInbounds) {
+ SmallVector<OpFoldResult> destShape = expandShapeOp.getMixedOutputShape();
+
+ // Traverse all reassociation groups to determine the appropriate indices
+ // corresponding to each one of them post op folding.
+ for (ArrayRef<int64_t> group : expandShapeOp.getReassociationIndices()) {
+ assert(!group.empty() && "association indices groups cannot be empty");
+ int64_t groupSize = group.size();
+ if (groupSize == 1) {
+ sourceIndices.push_back(indices[group[0]]);
+ continue;
+ }
+ SmallVector<OpFoldResult> groupBasis =
+ llvm::map_to_vector(group, [&](int64_t d) { return destShape[d]; });
+ SmallVector<Value> groupIndices =
+ llvm::map_to_vector(group, [&](int64_t d) { return indices[d]; });
+ Value collapsedIndex = rewriter.create<affine::AffineLinearizeIndexOp>(
+ loc, groupIndices, groupBasis, /*disjoint=*/startsInbounds);
+ sourceIndices.push_back(collapsedIndex);
+ }
+ return success();
+}
+
+/// Given the 'indices' of a load/store operation where the memref is a result
+/// of a collapse_shape op, returns the indices w.r.t to the source memref of
+/// the collapse_shape op. For example
+///
+/// %0 = ... : memref<2x6x42xf32>
+/// %1 = memref.collapse_shape %0 [[0, 1], [2]]
+/// : memref<2x6x42xf32> into memref<12x42xf32>
+/// %2 = load %1[%i1, %i2] : memref<12x42xf32>
+///
+/// could be folded into
+///
+/// %2 = load %0[%i1 / 6, %i1 % 6, %i2] :
+/// memref<2x6x42xf32>
+static LogicalResult
+resolveSourceIndicesCollapseShape(Location loc, PatternRewriter &rewriter,
+ memref::CollapseShapeOp collapseShapeOp,
+ ValueRange indices,
+ SmallVectorImpl<Value> &sourceIndices) {
+ // Note: collapse_shape requires a strided memref, we can do this.
+ auto metadata = rewriter.create<memref::ExtractStridedMetadataOp>(
+ loc, collapseShapeOp.getSrc());
+ SmallVector<OpFoldResult> sourceSizes = metadata.getConstifiedMixedSizes();
+ for (auto [index, group] :
+ llvm::zip(indices, collapseShapeOp.getReassociationIndices())) {
+ assert(!group.empty() && "association indices groups cannot be empty");
+ int64_t groupSize = group.size();
+
+ if (groupSize == 1) {
+ sourceIndices.push_back(index);
+ continue;
+ }
+
+ SmallVector<OpFoldResult> basis =
+ llvm::map_to_vector(group, [&](int64_t d) { return sourceSizes[d]; });
+ auto delinearize = rewriter.create<affine::AffineDelinearizeIndexOp>(
+ loc, index, basis, /*hasOuterBound=*/true);
+ llvm::append_range(sourceIndices, delinearize.getResults());
+ }
+ if (collapseShapeOp.getReassociationIndices().empty()) {
+ auto zeroAffineMap = rewriter.getConstantAffineMap(0);
+ int64_t srcRank =
+ cast<MemRefType>(collapseShapeOp.getViewSource().getType()).getRank();
+ OpFoldResult ofr = affine::makeComposedFoldedAffineApply(
+ rewriter, loc, zeroAffineMap, ArrayRef<OpFoldResult>{});
+ for (int64_t i = 0; i < srcRank; i++) {
+ sourceIndices.push_back(
+ getValueOrCreateConstantIndexOp(rewriter, loc, ofr));
+ }
+ }
+ return success();
+}
+
/// Helpers to access the memref operand for each op.
template <typename LoadOrStoreOpTy>
static Value getMemRefOperand(LoadOrStoreOpTy op) {
diff --git a/mlir/lib/Dialect/MemRef/Utils/MemRefUtils.cpp b/mlir/lib/Dialect/MemRef/Utils/MemRefUtils.cpp
index 97fe3cb5b4705..a50b4cfc74708 100644
--- a/mlir/lib/Dialect/MemRef/Utils/MemRefUtils.cpp
+++ b/mlir/lib/Dialect/MemRef/Utils/MemRefUtils.cpp
@@ -12,7 +12,6 @@
#include "mlir/Dialect/MemRef/Utils/MemRefUtils.h"
#include "mlir/Dialect/Affine/IR/AffineOps.h"
-#include "mlir/Dialect/Arith/Utils/Utils.h"
#include "mlir/Dialect/MemRef/IR/MemRef.h"
#include "mlir/Interfaces/ViewLikeInterface.h"
#include "llvm/ADT/STLExtras.h"
@@ -218,70 +217,5 @@ MemrefValue skipViewLikeOps(MemrefValue source) {
return source;
}
-LogicalResult resolveSourceIndicesExpandShape(
- Location loc, PatternRewriter &rewriter,
- memref::ExpandShapeOp expandShapeOp, ValueRange indices,
- SmallVectorImpl<Value> &sourceIndices, bool startsInbounds) {
- SmallVector<OpFoldResult> destShape = expandShapeOp.getMixedOutputShape();
-
- // Traverse all reassociation groups to determine the appropriate indices
- // corresponding to each one of them post op folding.
- for (ArrayRef<int64_t> group : expandShapeOp.getReassociationIndices()) {
- assert(!group.empty() && "association indices groups cannot be empty");
- int64_t groupSize = group.size();
- if (groupSize == 1) {
- sourceIndices.push_back(indices[group[0]]);
- continue;
- }
- SmallVector<OpFoldResult> groupBasis =
- llvm::map_to_vector(group, [&](int64_t d) { return destShape[d]; });
- SmallVector<Value> groupIndices =
- llvm::map_to_vector(group, [&](int64_t d) { return indices[d]; });
- Value collapsedIndex = rewriter.create<affine::AffineLinearizeIndexOp>(
- loc, groupIndices, groupBasis, /*disjoint=*/startsInbounds);
- sourceIndices.push_back(collapsedIndex);
- }
- return success();
-}
-
-LogicalResult
-resolveSourceIndicesCollapseShape(Location loc, PatternRewriter &rewriter,
- memref::CollapseShapeOp collapseShapeOp,
- ValueRange indices,
- SmallVectorImpl<Value> &sourceIndices) {
- // Note: collapse_shape requires a strided memref, we can do this.
- auto metadata = rewriter.create<memref::ExtractStridedMetadataOp>(
- loc, collapseShapeOp.getSrc());
- SmallVector<OpFoldResult> sourceSizes = metadata.getConstifiedMixedSizes();
- for (auto [index, group] :
- llvm::zip(indices, collapseShapeOp.getReassociationIndices())) {
- assert(!group.empty() && "association indices groups cannot be empty");
- int64_t groupSize = group.size();
-
- if (groupSize == 1) {
- sourceIndices.push_back(index);
- continue;
- }
-
- SmallVector<OpFoldResult> basis =
- llvm::map_to_vector(group, [&](int64_t d) { return sourceSizes[d]; });
- auto delinearize = rewriter.create<affine::AffineDelinearizeIndexOp>(
- loc, index, basis, /*hasOuterBound=*/true);
- llvm::append_range(sourceIndices, delinearize.getResults());
- }
- if (collapseShapeOp.getReassociationIndices().empty()) {
- auto zeroAffineMap = rewriter.getConstantAffineMap(0);
- int64_t srcRank =
- cast<MemRefType>(collapseShapeOp.getViewSource().getType()).getRank();
- OpFoldResult ofr = affine::makeComposedFoldedAffineApply(
- rewriter, loc, zeroAffineMap, ArrayRef<OpFoldResult>{});
- for (int64_t i = 0; i < srcRank; i++) {
- sourceIndices.push_back(
- getValueOrCreateConstantIndexOp(rewriter, loc, ofr));
- }
- }
- return success();
-}
-
} // namespace memref
} // namespace mlir
diff --git a/mlir/test/Dialect/AMDGPU/amdgpu-fold-memrefs.mlir b/mlir/test/Dialect/AMDGPU/amdgpu-fold-memrefs.mlir
deleted file mode 100644
index 57afa127c9da8..0000000000000
--- a/mlir/test/Dialect/AMDGPU/amdgpu-fold-memrefs.mlir
+++ /dev/null
@@ -1,94 +0,0 @@
-// RUN: mlir-opt --amdgpu-fold-memrefs-ops --split-input-file %s | FileCheck %s
-
-#gpu_lds_addrspace = 3
-
-// CHECK: func @test_subview_folding
-// CHECK-SAME: %[[ARG0:.*]]: index, %[[ARG1:.*]]: index
-func.func @test_subview_folding(%offset_i: index, %offset_j: index) {
- // CHECK: %[[LOCAL:.*]] = memref.alloc() : memref<64x64xf16, 3>
- // CHECK: %[[MEM:.*]] = memref.alloc() : memref<64x128xf16>
- // CHECK: %[[C0:.*]] = arith.constant 0 : index
- // CHECK: amdgpu.gather_to_lds %[[MEM]][%[[ARG0]], %[[ARG1]]], %[[LOCAL]][%[[C0]], %[[C0]]]
- // CHECK-SAME: vector<8xf16>, memref<64x128xf16>, memref<64x64xf16, 3>
-
- %alloc = memref.alloc() : memref<64x64xf16, #gpu_lds_addrspace>
- %mem = memref.alloc() : memref<64x128xf16>
- %subview = memref.subview %mem[0, 0][32, 64][1, 1] : memref<64x128xf16> to memref<32x64xf16, strided<[128, 1]>>
- %c0 = arith.constant 0 : index
- amdgpu.gather_to_lds %subview[%offset_i, %offset_j], %alloc[%c0, %c0]
- : vector<8xf16>, memref<32x64xf16, strided<[128, 1]>>, memref<64x64xf16, #gpu_lds_addrspace>
- func.return
-}
-
-// -----
-
-#gpu_lds_addrspace = 3
-
-// CHECK: #[[MAP:.*]] = affine_map<()[s0] -> (s0 + 32)>
-// CHECK: #[[MAP1:.*]] = affine_map<()[s0] -> (s0 + 64)>
-
-// CHECK: func @subview_folding_offset
-// CHECK-SAME: %[[ARG0:.*]]: index, %[[ARG1:.*]]: index
-func.func @subview_folding_offset(%offset_i: index, %offset_j: index) {
- // CHECK: %[[LOCAL:.*]] = memref.alloc() : memref<64x64xf16, 3>
- // CHECK: %[[MEM:.*]] = memref.alloc() : memref<64x128xf16>
- // CHECK: %[[C0:.*]] = arith.constant 0 : index
- // CHECK: %[[IDX0:.*]] = affine.apply #[[MAP]]()[%[[ARG0]]]
- // CHECK: %[[IDX1:.*]] = affine.apply #[[MAP1]]()[%[[ARG1]]]
- // CHECK: amdgpu.gather_to_lds %[[MEM]][%[[IDX0]], %[[IDX1]]], %[[LOCAL]][%[[C0]], %[[C0]]]
- // CHECK-SAME: vector<8xf16>, memref<64x128xf16>, memref<64x64xf16, 3>
-
- %alloc = memref.alloc() : memref<64x64xf16, #gpu_lds_addrspace>
- %mem = memref.alloc() : memref<64x128xf16>
- %subview = memref.subview %mem[32, 64][32, 64][1, 1] : memref<64x128xf16> to memref<32x64xf16, strided<[128, 1], offset: 4160>>
- %c0 = arith.constant 0 : index
- amdgpu.gather_to_lds %subview[%offset_i, %offset_j], %alloc[%c0, %c0]
- : vector<8xf16>, memref<32x64xf16, strided<[128, 1], offset: 4160>>, memref<64x64xf16, #gpu_lds_addrspace>
- func.return
-}
-
-// -----
-
-#gpu_lds_addrspace = 3
-
-// CHECK: func @test_expand_shape
-// CHECK-SAME: %[[ARG0:.*]]: index, %[[ARG1:.*]]: index
-func.func @test_expand_shape(%offset_i: index, %offset_j: index) {
- // CHECK: %[[LOCAL:.*]] = memref.alloc() : memref<64x64xf16, 3>
- // CHECK: %[[MEM:.*]] = memref.alloc() : memref<8192xf16>
- // CHECK: %[[C0:.*]] = arith.constant 0 : index
- // CHECK: %[[IDX:.*]] = affine.linearize_index [%[[ARG0]], %[[ARG1]]] by (64, 128) : index
- // CHECK: amdgpu.gather_to_lds %[[MEM]][%[[IDX]]], %[[LOCAL]][%[[C0]], %[[C0]]]
- // CHECK-SAME: vector<8xf16>, memref<8192xf16>, memref<64x64xf16, 3>
-
- %alloc = memref.al...
[truncated]
|
@llvm/pr-subscribers-mlir-gpu Author: Alan Li (lialan) Changes…to This reverts commit dbc63f1. Having build deps issue. Patch is 21.44 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/150256.diff 8 Files Affected:
diff --git a/mlir/include/mlir/Dialect/AMDGPU/Transforms/Passes.h b/mlir/include/mlir/Dialect/AMDGPU/Transforms/Passes.h
index 58b9c74b2f8e0..cc2f543e79f69 100644
--- a/mlir/include/mlir/Dialect/AMDGPU/Transforms/Passes.h
+++ b/mlir/include/mlir/Dialect/AMDGPU/Transforms/Passes.h
@@ -22,9 +22,8 @@ class ConversionTarget;
namespace amdgpu {
#define GEN_PASS_DECL_AMDGPUEMULATEATOMICSPASS
-#define GEN_PASS_DECL_AMDGPUFOLDMEMREFOPSPASS
-#define GEN_PASS_DECL_AMDGPUMASKEDLOADTOLOADPASS
#define GEN_PASS_DECL_AMDGPURESOLVESTRIDEDMETADATAPASS
+#define GEN_PASS_DECL_AMDGPUMASKEDLOADTOLOADPASS
#define GEN_PASS_REGISTRATION
#include "mlir/Dialect/AMDGPU/Transforms/Passes.h.inc"
@@ -39,9 +38,6 @@ void populateAmdgpuResolveStridedMetadataPatterns(RewritePatternSet &patterns,
void populateAmdgpuMaskedloadToLoadPatterns(RewritePatternSet &patterns,
PatternBenefit benefit = 1);
-void populateAmdgpuFoldMemRefOpsPatterns(RewritePatternSet &patterns,
- PatternBenefit benefit = 1);
-
} // namespace amdgpu
} // namespace mlir
diff --git a/mlir/include/mlir/Dialect/AMDGPU/Transforms/Passes.td b/mlir/include/mlir/Dialect/AMDGPU/Transforms/Passes.td
index 8664f971cabde..8d0e6829ab0cc 100644
--- a/mlir/include/mlir/Dialect/AMDGPU/Transforms/Passes.td
+++ b/mlir/include/mlir/Dialect/AMDGPU/Transforms/Passes.td
@@ -70,16 +70,4 @@ def AmdgpuMaskedloadToLoadPass : Pass<"amdgpu-maskedload-to-load"> {
"memref::MemRefDialect"
];
}
-
-def AmdgpuFoldMemRefOpsPass : Pass<"amdgpu-fold-memrefs-ops"> {
- let summary = "Fold memref operations into their parent operations";
- let description = [{
- This pass identifies memref operations (subview, expand_shape, collapse_shape)
- that are sources of `GatherToLDSOp` and attempts to fold the source ops,
- potentially simplifying the overall operation and improving performance.
- }];
- let dependentDialects = [
- "memref::MemRefDialect"
- ];
-}
#endif // MLIR_DIALECT_AMDGPU_TRANSFORMS_PASSES_TD_
diff --git a/mlir/include/mlir/Dialect/MemRef/Utils/MemRefUtils.h b/mlir/include/mlir/Dialect/MemRef/Utils/MemRefUtils.h
index dd3b3dea6ef26..34ad279a07a8b 100644
--- a/mlir/include/mlir/Dialect/MemRef/Utils/MemRefUtils.h
+++ b/mlir/include/mlir/Dialect/MemRef/Utils/MemRefUtils.h
@@ -116,43 +116,6 @@ inline bool isSameViewOrTrivialAlias(MemrefValue a, MemrefValue b) {
/// the source memref (i.e. implements ViewLikeOpInterface).
MemrefValue skipViewLikeOps(MemrefValue source);
-/// Given the 'indices' of a load/store operation where the memref is a result
-/// of a expand_shape op, returns the indices w.r.t to the source memref of the
-/// expand_shape op. For example
-///
-/// %0 = ... : memref<12x42xf32>
-/// %1 = memref.expand_shape %0 [[0, 1], [2]]
-/// : memref<12x42xf32> into memref<2x6x42xf32>
-/// %2 = load %1[%i1, %i2, %i3] : memref<2x6x42xf32
-///
-/// could be folded into
-///
-/// %2 = load %0[6 * i1 + i2, %i3] :
-/// memref<12x42xf32>
-LogicalResult resolveSourceIndicesExpandShape(
- Location loc, PatternRewriter &rewriter,
- memref::ExpandShapeOp expandShapeOp, ValueRange indices,
- SmallVectorImpl<Value> &sourceIndices, bool startsInbounds);
-
-/// Given the 'indices' of a load/store operation where the memref is a result
-/// of a collapse_shape op, returns the indices w.r.t to the source memref of
-/// the collapse_shape op. For example
-///
-/// %0 = ... : memref<2x6x42xf32>
-/// %1 = memref.collapse_shape %0 [[0, 1], [2]]
-/// : memref<2x6x42xf32> into memref<12x42xf32>
-/// %2 = load %1[%i1, %i2] : memref<12x42xf32>
-///
-/// could be folded into
-///
-/// %2 = load %0[%i1 / 6, %i1 % 6, %i2] :
-/// memref<2x6x42xf32>
-LogicalResult
-resolveSourceIndicesCollapseShape(Location loc, PatternRewriter &rewriter,
- memref::CollapseShapeOp collapseShapeOp,
- ValueRange indices,
- SmallVectorImpl<Value> &sourceIndices);
-
} // namespace memref
} // namespace mlir
diff --git a/mlir/lib/Dialect/AMDGPU/Transforms/CMakeLists.txt b/mlir/lib/Dialect/AMDGPU/Transforms/CMakeLists.txt
index 3b0c072ed1217..17bbe54ea6c0c 100644
--- a/mlir/lib/Dialect/AMDGPU/Transforms/CMakeLists.txt
+++ b/mlir/lib/Dialect/AMDGPU/Transforms/CMakeLists.txt
@@ -1,8 +1,7 @@
add_mlir_dialect_library(MLIRAMDGPUTransforms
EmulateAtomics.cpp
- FoldMemRefsOps.cpp
- MaskedloadToLoad.cpp
ResolveStridedMetadata.cpp
+ MaskedloadToLoad.cpp
ADDITIONAL_HEADER_DIRS
{$MLIR_MAIN_INCLUDE_DIR}/mlir/Dialect/AMDGPU/Transforms
diff --git a/mlir/lib/Dialect/AMDGPU/Transforms/FoldMemRefsOps.cpp b/mlir/lib/Dialect/AMDGPU/Transforms/FoldMemRefsOps.cpp
deleted file mode 100644
index a3fdc7ee385ed..0000000000000
--- a/mlir/lib/Dialect/AMDGPU/Transforms/FoldMemRefsOps.cpp
+++ /dev/null
@@ -1,97 +0,0 @@
-//===- FoldSubviewOps.cpp - AMDGPU fold subview ops -----------------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-
-#include "mlir/Dialect/AMDGPU/Transforms/Passes.h"
-
-#include "mlir/Dialect/AMDGPU/IR/AMDGPUDialect.h"
-#include "mlir/Dialect/Affine/ViewLikeInterfaceUtils.h"
-#include "mlir/Dialect/MemRef/IR/MemRef.h"
-#include "mlir/Dialect/MemRef/Utils/MemRefUtils.h"
-#include "mlir/Transforms/WalkPatternRewriteDriver.h"
-#include "llvm/ADT/TypeSwitch.h"
-
-namespace mlir::amdgpu {
-#define GEN_PASS_DEF_AMDGPUFOLDMEMREFOPSPASS
-#include "mlir/Dialect/AMDGPU/Transforms/Passes.h.inc"
-
-struct AmdgpuFoldMemRefOpsPass final
- : amdgpu::impl::AmdgpuFoldMemRefOpsPassBase<AmdgpuFoldMemRefOpsPass> {
- void runOnOperation() override {
- RewritePatternSet patterns(&getContext());
- populateAmdgpuFoldMemRefOpsPatterns(patterns);
- walkAndApplyPatterns(getOperation(), std::move(patterns));
- }
-};
-
-struct FoldMemRefOpsIntoGatherToLDSOp final : OpRewritePattern<GatherToLDSOp> {
- using OpRewritePattern::OpRewritePattern;
- LogicalResult matchAndRewrite(GatherToLDSOp op,
- PatternRewriter &rewriter) const override {
- Location loc = op.getLoc();
-
- Value memrefSource;
- SmallVector<Value> sourceIndices;
- auto foldResult =
- llvm::TypeSwitch<Operation *, LogicalResult>(
- op.getSrc().getDefiningOp())
- .Case<memref::SubViewOp>([&](memref::SubViewOp subviewOp) {
- // If the source is a SubViewOp, we can directly rewrite the
- // GatherToLDSOp.
- mlir::affine::resolveIndicesIntoOpWithOffsetsAndStrides(
- rewriter, loc, subviewOp.getMixedOffsets(),
- subviewOp.getMixedStrides(), subviewOp.getDroppedDims(),
- op.getSrcIndices(), sourceIndices);
- memrefSource = subviewOp.getSource();
- return success();
- })
- .Case<memref::ExpandShapeOp>(
- [&](memref::ExpandShapeOp expandShapeOp) {
- if (failed(mlir::memref::resolveSourceIndicesExpandShape(
- loc, rewriter, expandShapeOp, op.getSrcIndices(),
- sourceIndices, false))) {
- return failure();
- }
- memrefSource = expandShapeOp.getViewSource();
- return success();
- })
- .Case<memref::CollapseShapeOp>(
- [&](memref::CollapseShapeOp collapseShapeOp) {
- if (failed(mlir::memref::resolveSourceIndicesCollapseShape(
- loc, rewriter, collapseShapeOp, op.getSrcIndices(),
- sourceIndices))) {
- return failure();
- }
- memrefSource = collapseShapeOp.getViewSource();
- return success();
- })
- .Default([&](Operation *op) {
- // If the source is not a SubViewOp, ExpandShapeOp, or
- // CollapseShapeOp, we cannot fold the GatherToLDSOp.
- return rewriter.notifyMatchFailure(
- op,
- "source producer is not one of SubViewOp, ExpandShapeOp, or "
- "CollapseShapeOp");
- });
-
- if (failed(foldResult)) {
- return failure();
- }
-
- rewriter.replaceOpWithNewOp<GatherToLDSOp>(op, memrefSource, sourceIndices,
- op.getDst(), op.getDstIndices(),
- op.getTransferType());
-
- return success();
- }
-};
-
-void populateAmdgpuFoldMemRefOpsPatterns(RewritePatternSet &patterns,
- PatternBenefit benefit) {
- patterns.add<FoldMemRefOpsIntoGatherToLDSOp>(patterns.getContext(), benefit);
-}
-} // namespace mlir::amdgpu
diff --git a/mlir/lib/Dialect/MemRef/Transforms/FoldMemRefAliasOps.cpp b/mlir/lib/Dialect/MemRef/Transforms/FoldMemRefAliasOps.cpp
index 24da447ad7685..89be188af9129 100644
--- a/mlir/lib/Dialect/MemRef/Transforms/FoldMemRefAliasOps.cpp
+++ b/mlir/lib/Dialect/MemRef/Transforms/FoldMemRefAliasOps.cpp
@@ -44,6 +44,97 @@ using namespace mlir;
// Utility functions
//===----------------------------------------------------------------------===//
+/// Given the 'indices' of a load/store operation where the memref is a result
+/// of a expand_shape op, returns the indices w.r.t to the source memref of the
+/// expand_shape op. For example
+///
+/// %0 = ... : memref<12x42xf32>
+/// %1 = memref.expand_shape %0 [[0, 1], [2]]
+/// : memref<12x42xf32> into memref<2x6x42xf32>
+/// %2 = load %1[%i1, %i2, %i3] : memref<2x6x42xf32
+///
+/// could be folded into
+///
+/// %2 = load %0[6 * i1 + i2, %i3] :
+/// memref<12x42xf32>
+static LogicalResult resolveSourceIndicesExpandShape(
+ Location loc, PatternRewriter &rewriter,
+ memref::ExpandShapeOp expandShapeOp, ValueRange indices,
+ SmallVectorImpl<Value> &sourceIndices, bool startsInbounds) {
+ SmallVector<OpFoldResult> destShape = expandShapeOp.getMixedOutputShape();
+
+ // Traverse all reassociation groups to determine the appropriate indices
+ // corresponding to each one of them post op folding.
+ for (ArrayRef<int64_t> group : expandShapeOp.getReassociationIndices()) {
+ assert(!group.empty() && "association indices groups cannot be empty");
+ int64_t groupSize = group.size();
+ if (groupSize == 1) {
+ sourceIndices.push_back(indices[group[0]]);
+ continue;
+ }
+ SmallVector<OpFoldResult> groupBasis =
+ llvm::map_to_vector(group, [&](int64_t d) { return destShape[d]; });
+ SmallVector<Value> groupIndices =
+ llvm::map_to_vector(group, [&](int64_t d) { return indices[d]; });
+ Value collapsedIndex = rewriter.create<affine::AffineLinearizeIndexOp>(
+ loc, groupIndices, groupBasis, /*disjoint=*/startsInbounds);
+ sourceIndices.push_back(collapsedIndex);
+ }
+ return success();
+}
+
+/// Given the 'indices' of a load/store operation where the memref is a result
+/// of a collapse_shape op, returns the indices w.r.t to the source memref of
+/// the collapse_shape op. For example
+///
+/// %0 = ... : memref<2x6x42xf32>
+/// %1 = memref.collapse_shape %0 [[0, 1], [2]]
+/// : memref<2x6x42xf32> into memref<12x42xf32>
+/// %2 = load %1[%i1, %i2] : memref<12x42xf32>
+///
+/// could be folded into
+///
+/// %2 = load %0[%i1 / 6, %i1 % 6, %i2] :
+/// memref<2x6x42xf32>
+static LogicalResult
+resolveSourceIndicesCollapseShape(Location loc, PatternRewriter &rewriter,
+ memref::CollapseShapeOp collapseShapeOp,
+ ValueRange indices,
+ SmallVectorImpl<Value> &sourceIndices) {
+ // Note: collapse_shape requires a strided memref, we can do this.
+ auto metadata = rewriter.create<memref::ExtractStridedMetadataOp>(
+ loc, collapseShapeOp.getSrc());
+ SmallVector<OpFoldResult> sourceSizes = metadata.getConstifiedMixedSizes();
+ for (auto [index, group] :
+ llvm::zip(indices, collapseShapeOp.getReassociationIndices())) {
+ assert(!group.empty() && "association indices groups cannot be empty");
+ int64_t groupSize = group.size();
+
+ if (groupSize == 1) {
+ sourceIndices.push_back(index);
+ continue;
+ }
+
+ SmallVector<OpFoldResult> basis =
+ llvm::map_to_vector(group, [&](int64_t d) { return sourceSizes[d]; });
+ auto delinearize = rewriter.create<affine::AffineDelinearizeIndexOp>(
+ loc, index, basis, /*hasOuterBound=*/true);
+ llvm::append_range(sourceIndices, delinearize.getResults());
+ }
+ if (collapseShapeOp.getReassociationIndices().empty()) {
+ auto zeroAffineMap = rewriter.getConstantAffineMap(0);
+ int64_t srcRank =
+ cast<MemRefType>(collapseShapeOp.getViewSource().getType()).getRank();
+ OpFoldResult ofr = affine::makeComposedFoldedAffineApply(
+ rewriter, loc, zeroAffineMap, ArrayRef<OpFoldResult>{});
+ for (int64_t i = 0; i < srcRank; i++) {
+ sourceIndices.push_back(
+ getValueOrCreateConstantIndexOp(rewriter, loc, ofr));
+ }
+ }
+ return success();
+}
+
/// Helpers to access the memref operand for each op.
template <typename LoadOrStoreOpTy>
static Value getMemRefOperand(LoadOrStoreOpTy op) {
diff --git a/mlir/lib/Dialect/MemRef/Utils/MemRefUtils.cpp b/mlir/lib/Dialect/MemRef/Utils/MemRefUtils.cpp
index 97fe3cb5b4705..a50b4cfc74708 100644
--- a/mlir/lib/Dialect/MemRef/Utils/MemRefUtils.cpp
+++ b/mlir/lib/Dialect/MemRef/Utils/MemRefUtils.cpp
@@ -12,7 +12,6 @@
#include "mlir/Dialect/MemRef/Utils/MemRefUtils.h"
#include "mlir/Dialect/Affine/IR/AffineOps.h"
-#include "mlir/Dialect/Arith/Utils/Utils.h"
#include "mlir/Dialect/MemRef/IR/MemRef.h"
#include "mlir/Interfaces/ViewLikeInterface.h"
#include "llvm/ADT/STLExtras.h"
@@ -218,70 +217,5 @@ MemrefValue skipViewLikeOps(MemrefValue source) {
return source;
}
-LogicalResult resolveSourceIndicesExpandShape(
- Location loc, PatternRewriter &rewriter,
- memref::ExpandShapeOp expandShapeOp, ValueRange indices,
- SmallVectorImpl<Value> &sourceIndices, bool startsInbounds) {
- SmallVector<OpFoldResult> destShape = expandShapeOp.getMixedOutputShape();
-
- // Traverse all reassociation groups to determine the appropriate indices
- // corresponding to each one of them post op folding.
- for (ArrayRef<int64_t> group : expandShapeOp.getReassociationIndices()) {
- assert(!group.empty() && "association indices groups cannot be empty");
- int64_t groupSize = group.size();
- if (groupSize == 1) {
- sourceIndices.push_back(indices[group[0]]);
- continue;
- }
- SmallVector<OpFoldResult> groupBasis =
- llvm::map_to_vector(group, [&](int64_t d) { return destShape[d]; });
- SmallVector<Value> groupIndices =
- llvm::map_to_vector(group, [&](int64_t d) { return indices[d]; });
- Value collapsedIndex = rewriter.create<affine::AffineLinearizeIndexOp>(
- loc, groupIndices, groupBasis, /*disjoint=*/startsInbounds);
- sourceIndices.push_back(collapsedIndex);
- }
- return success();
-}
-
-LogicalResult
-resolveSourceIndicesCollapseShape(Location loc, PatternRewriter &rewriter,
- memref::CollapseShapeOp collapseShapeOp,
- ValueRange indices,
- SmallVectorImpl<Value> &sourceIndices) {
- // Note: collapse_shape requires a strided memref, we can do this.
- auto metadata = rewriter.create<memref::ExtractStridedMetadataOp>(
- loc, collapseShapeOp.getSrc());
- SmallVector<OpFoldResult> sourceSizes = metadata.getConstifiedMixedSizes();
- for (auto [index, group] :
- llvm::zip(indices, collapseShapeOp.getReassociationIndices())) {
- assert(!group.empty() && "association indices groups cannot be empty");
- int64_t groupSize = group.size();
-
- if (groupSize == 1) {
- sourceIndices.push_back(index);
- continue;
- }
-
- SmallVector<OpFoldResult> basis =
- llvm::map_to_vector(group, [&](int64_t d) { return sourceSizes[d]; });
- auto delinearize = rewriter.create<affine::AffineDelinearizeIndexOp>(
- loc, index, basis, /*hasOuterBound=*/true);
- llvm::append_range(sourceIndices, delinearize.getResults());
- }
- if (collapseShapeOp.getReassociationIndices().empty()) {
- auto zeroAffineMap = rewriter.getConstantAffineMap(0);
- int64_t srcRank =
- cast<MemRefType>(collapseShapeOp.getViewSource().getType()).getRank();
- OpFoldResult ofr = affine::makeComposedFoldedAffineApply(
- rewriter, loc, zeroAffineMap, ArrayRef<OpFoldResult>{});
- for (int64_t i = 0; i < srcRank; i++) {
- sourceIndices.push_back(
- getValueOrCreateConstantIndexOp(rewriter, loc, ofr));
- }
- }
- return success();
-}
-
} // namespace memref
} // namespace mlir
diff --git a/mlir/test/Dialect/AMDGPU/amdgpu-fold-memrefs.mlir b/mlir/test/Dialect/AMDGPU/amdgpu-fold-memrefs.mlir
deleted file mode 100644
index 57afa127c9da8..0000000000000
--- a/mlir/test/Dialect/AMDGPU/amdgpu-fold-memrefs.mlir
+++ /dev/null
@@ -1,94 +0,0 @@
-// RUN: mlir-opt --amdgpu-fold-memrefs-ops --split-input-file %s | FileCheck %s
-
-#gpu_lds_addrspace = 3
-
-// CHECK: func @test_subview_folding
-// CHECK-SAME: %[[ARG0:.*]]: index, %[[ARG1:.*]]: index
-func.func @test_subview_folding(%offset_i: index, %offset_j: index) {
- // CHECK: %[[LOCAL:.*]] = memref.alloc() : memref<64x64xf16, 3>
- // CHECK: %[[MEM:.*]] = memref.alloc() : memref<64x128xf16>
- // CHECK: %[[C0:.*]] = arith.constant 0 : index
- // CHECK: amdgpu.gather_to_lds %[[MEM]][%[[ARG0]], %[[ARG1]]], %[[LOCAL]][%[[C0]], %[[C0]]]
- // CHECK-SAME: vector<8xf16>, memref<64x128xf16>, memref<64x64xf16, 3>
-
- %alloc = memref.alloc() : memref<64x64xf16, #gpu_lds_addrspace>
- %mem = memref.alloc() : memref<64x128xf16>
- %subview = memref.subview %mem[0, 0][32, 64][1, 1] : memref<64x128xf16> to memref<32x64xf16, strided<[128, 1]>>
- %c0 = arith.constant 0 : index
- amdgpu.gather_to_lds %subview[%offset_i, %offset_j], %alloc[%c0, %c0]
- : vector<8xf16>, memref<32x64xf16, strided<[128, 1]>>, memref<64x64xf16, #gpu_lds_addrspace>
- func.return
-}
-
-// -----
-
-#gpu_lds_addrspace = 3
-
-// CHECK: #[[MAP:.*]] = affine_map<()[s0] -> (s0 + 32)>
-// CHECK: #[[MAP1:.*]] = affine_map<()[s0] -> (s0 + 64)>
-
-// CHECK: func @subview_folding_offset
-// CHECK-SAME: %[[ARG0:.*]]: index, %[[ARG1:.*]]: index
-func.func @subview_folding_offset(%offset_i: index, %offset_j: index) {
- // CHECK: %[[LOCAL:.*]] = memref.alloc() : memref<64x64xf16, 3>
- // CHECK: %[[MEM:.*]] = memref.alloc() : memref<64x128xf16>
- // CHECK: %[[C0:.*]] = arith.constant 0 : index
- // CHECK: %[[IDX0:.*]] = affine.apply #[[MAP]]()[%[[ARG0]]]
- // CHECK: %[[IDX1:.*]] = affine.apply #[[MAP1]]()[%[[ARG1]]]
- // CHECK: amdgpu.gather_to_lds %[[MEM]][%[[IDX0]], %[[IDX1]]], %[[LOCAL]][%[[C0]], %[[C0]]]
- // CHECK-SAME: vector<8xf16>, memref<64x128xf16>, memref<64x64xf16, 3>
-
- %alloc = memref.alloc() : memref<64x64xf16, #gpu_lds_addrspace>
- %mem = memref.alloc() : memref<64x128xf16>
- %subview = memref.subview %mem[32, 64][32, 64][1, 1] : memref<64x128xf16> to memref<32x64xf16, strided<[128, 1], offset: 4160>>
- %c0 = arith.constant 0 : index
- amdgpu.gather_to_lds %subview[%offset_i, %offset_j], %alloc[%c0, %c0]
- : vector<8xf16>, memref<32x64xf16, strided<[128, 1], offset: 4160>>, memref<64x64xf16, #gpu_lds_addrspace>
- func.return
-}
-
-// -----
-
-#gpu_lds_addrspace = 3
-
-// CHECK: func @test_expand_shape
-// CHECK-SAME: %[[ARG0:.*]]: index, %[[ARG1:.*]]: index
-func.func @test_expand_shape(%offset_i: index, %offset_j: index) {
- // CHECK: %[[LOCAL:.*]] = memref.alloc() : memref<64x64xf16, 3>
- // CHECK: %[[MEM:.*]] = memref.alloc() : memref<8192xf16>
- // CHECK: %[[C0:.*]] = arith.constant 0 : index
- // CHECK: %[[IDX:.*]] = affine.linearize_index [%[[ARG0]], %[[ARG1]]] by (64, 128) : index
- // CHECK: amdgpu.gather_to_lds %[[MEM]][%[[IDX]]], %[[LOCAL]][%[[C0]], %[[C0]]]
- // CHECK-SAME: vector<8xf16>, memref<8192xf16>, memref<64x64xf16, 3>
-
- %alloc = memref.al...
[truncated]
|
You don't need a PR for the revert. Simply just revert it locally and push it to |
We also have a label for this: |
LLVM Buildbot has detected a new failure on builder Full details are available at: https://lab.llvm.org/buildbot/#/builders/153/builds/39111 Here is the relevant piece of the build log for the reference
|
llvm#150256) …to `amdgpu.gather_to_lds` (llvm#149851)" This reverts commit dbc63f1. Having build deps issue.
…to
amdgpu.gather_to_lds
(#149851)"This reverts commit dbc63f1.
Having build deps issue.