Skip to content

Commit 70925b7

Browse files
committed
[VPlan] More CSE progress; a DenseMap crash is observed
1 parent 194367e commit 70925b7

15 files changed

+188
-208
lines changed

llvm/lib/Transforms/Vectorize/VPlan.h

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -897,6 +897,11 @@ struct VPRecipeWithIRFlags : public VPSingleDefRecipe, public VPIRFlags {
897897
return R && classof(R);
898898
}
899899

900+
static inline bool classof(const VPSingleDefRecipe *U) {
901+
auto *R = dyn_cast<VPRecipeBase>(U);
902+
return R && classof(R);
903+
}
904+
900905
void execute(VPTransformState &State) override = 0;
901906
};
902907

@@ -3569,6 +3574,12 @@ struct CastInfo<VPPhiAccessors, VPRecipeBase *>
35693574
template <>
35703575
struct CastInfo<VPPhiAccessors, const VPRecipeBase *>
35713576
: CastInfoVPPhiAccessors<const VPRecipeBase *> {};
3577+
template <>
3578+
struct CastInfo<VPPhiAccessors, VPSingleDefRecipe *>
3579+
: CastInfoVPPhiAccessors<VPRecipeBase *> {};
3580+
template <>
3581+
struct CastInfo<VPPhiAccessors, const VPSingleDefRecipe *>
3582+
: CastInfoVPPhiAccessors<const VPRecipeBase *> {};
35723583

35733584
/// VPBasicBlock serves as the leaf of the Hierarchical Control-Flow Graph. It
35743585
/// holds a sequence of zero or more VPRecipe's each representing a sequence of

llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp

Lines changed: 28 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -1756,55 +1756,47 @@ void VPlanTransforms::clearReductionWrapFlags(VPlan &Plan) {
17561756
/// Hash the underlying data of a VPSingleDefRecipe pointer, instead of hashing
17571757
/// the pointer itself.
17581758
namespace {
1759-
static bool isIdentical(const VPValue *L, const VPValue *R) {
1760-
if (L->getVPValueID() != R->getVPValueID() ||
1761-
L->hasDefiningRecipe() != R->hasDefiningRecipe() ||
1762-
vputils::isSingleScalar(L) != vputils::isSingleScalar(R))
1763-
return false;
1764-
if (L->hasDefiningRecipe()) {
1765-
const VPRecipeBase *DefL = L->getDefiningRecipe();
1766-
const VPRecipeBase *DefR = R->getDefiningRecipe();
1767-
return vputils::getOpcode(*DefL) == vputils::getOpcode(*DefR) &&
1768-
equal(DefL->operands(), DefR->operands());
1769-
}
1770-
return L == R;
1771-
}
1772-
1773-
static hash_code hash_value(const VPValue &V) {
1774-
if (V.hasDefiningRecipe()) {
1775-
const VPRecipeBase *Def = V.getDefiningRecipe();
1776-
return hash_combine(V.getVPValueID(), vputils::getOpcode(*Def),
1777-
vputils::isSingleScalar(&V),
1759+
struct CSEDenseMapInfo : public DenseMapInfo<VPSingleDefRecipe *> {
1760+
static unsigned getHashValue(const VPSingleDefRecipe *Def) {
1761+
return hash_combine(Def->getVPDefID(), vputils::getOpcode(*Def),
1762+
vputils::isSingleScalar(Def),
17781763
hash_combine_range(Def->operands()));
17791764
}
1780-
return hash_value(&V);
1781-
}
17821765

1783-
struct CSEDenseMapInfo : public DenseMapInfo<VPSingleDefRecipe *> {
1784-
static unsigned getHashValue(const VPSingleDefRecipe *R) {
1785-
return hash_value(*R);
1786-
}
1766+
static bool isEqual(const VPSingleDefRecipe *L, const VPSingleDefRecipe *R) {
1767+
if (L == getEmptyKey() || R == getEmptyKey() || L == getTombstoneKey() ||
1768+
R == getTombstoneKey())
1769+
return L == R;
1770+
1771+
if (L == R)
1772+
return true;
17871773

1788-
static bool isEqual(const VPSingleDefRecipe *LHS,
1789-
const VPSingleDefRecipe *RHS) {
1790-
if (LHS == getEmptyKey() || RHS == getEmptyKey() ||
1791-
LHS == getTombstoneKey() || RHS == getTombstoneKey())
1792-
return LHS == RHS;
1793-
return isIdentical(LHS, RHS);
1774+
return L->getVPDefID() == R->getVPDefID() &&
1775+
vputils::getOpcode(*L) == vputils::getOpcode(*R) &&
1776+
vputils::isSingleScalar(L) == vputils::isSingleScalar(R) &&
1777+
equal(L->operands(), R->operands());
17941778
}
17951779
};
17961780
} // end anonymous namespace
17971781

17981782
/// Perform a common-subexpression-elimination of VPSingleDefRecipes on the \p
17991783
/// Plan.
18001784
void VPlanTransforms::cse(VPlan &Plan, Type &CanonicalIVTy) {
1801-
DenseMap<VPSingleDefRecipe *, VPSingleDefRecipe *, CSEDenseMapInfo> CSEMap;
1802-
VPTypeAnalysis TypeInfo(&CanonicalIVTy);
1785+
VPRegionBlock *LoopRegion = Plan.getVectorLoopRegion();
1786+
if (!LoopRegion)
1787+
return;
1788+
auto VPBBsOutsideLoopRegion = VPBlockUtils::blocksOnly<VPBasicBlock>(
1789+
vp_depth_first_shallow(Plan.getEntry()));
1790+
auto VPBBsInsideLoopRegion = VPBlockUtils::blocksOnly<VPBasicBlock>(
1791+
vp_depth_first_shallow(LoopRegion->getEntry()));
1792+
18031793
// There is existing logic to sink instructions into replicate regions, and
18041794
// we'd be undoing that work if we went through replicate regions. Hence,
18051795
// don't CSE in replicate regions.
1806-
for (VPBasicBlock *VPBB : VPBlockUtils::blocksOnly<VPBasicBlock>(
1807-
vp_depth_first_shallow(Plan.getEntry()))) {
1796+
DenseMap<VPSingleDefRecipe *, VPSingleDefRecipe *, CSEDenseMapInfo> CSEMap;
1797+
VPTypeAnalysis TypeInfo(&CanonicalIVTy);
1798+
for (VPBasicBlock *VPBB :
1799+
concat<VPBasicBlock *>(VPBBsOutsideLoopRegion, VPBBsInsideLoopRegion)) {
18081800
for (VPRecipeBase &R : make_early_inc_range(*VPBB)) {
18091801
auto *Def = dyn_cast<VPSingleDefRecipe>(&R);
18101802
if (!Def)
@@ -1813,7 +1805,7 @@ void VPlanTransforms::cse(VPlan &Plan, Type &CanonicalIVTy) {
18131805
if (TypeInfo.inferScalarType(Def) != TypeInfo.inferScalarType(V))
18141806
continue;
18151807
// Drop poison-generating flags when reusing a value.
1816-
if (auto *RFlags = dyn_cast<VPRecipeWithIRFlags>(&R))
1808+
if (auto *RFlags = dyn_cast<VPRecipeWithIRFlags>(V))
18171809
RFlags->dropPoisonGeneratingFlags();
18181810
Def->replaceAllUsesWith(V);
18191811
Def->eraseFromParent();

llvm/lib/Transforms/Vectorize/VPlanUtils.h

Lines changed: 11 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -38,12 +38,19 @@ VPValue *getOrCreateVPValueForSCEVExpr(VPlan &Plan, const SCEV *Expr,
3838
/// SCEV expression could be constructed.
3939
const SCEV *getSCEVExprForVPValue(VPValue *V, ScalarEvolution &SE);
4040

41-
/// Get any instruction opcode data embedded in recipe \p R.
42-
inline std::optional<unsigned> getOpcode(const VPRecipeBase &R) {
43-
return TypeSwitch<const VPRecipeBase *, std::optional<unsigned>>(&R)
41+
/// Get any instruction opcode data embedded in recipe \p R. Returns an optional
42+
/// pair, where the first element indicates whether it is an intrinsic ID.
43+
inline std::optional<std::pair<bool, unsigned>>
44+
getOpcode(const VPRecipeBase &R) {
45+
return TypeSwitch<const VPRecipeBase *,
46+
std::optional<std::pair<bool, unsigned>>>(&R)
4447
.Case<VPInstruction, VPWidenRecipe, VPWidenCastRecipe,
4548
VPWidenSelectRecipe, VPHistogramRecipe, VPPartialReductionRecipe,
46-
VPReplicateRecipe>([](auto *I) { return I->getOpcode(); })
49+
VPReplicateRecipe>(
50+
[](auto *I) { return std::make_pair(false, I->getOpcode()); })
51+
.Case<VPWidenIntrinsicRecipe>([](auto *I) {
52+
return std::make_pair(true, I->getVectorIntrinsicID());
53+
})
4754
.Default([](auto *) { return std::nullopt; });
4855
}
4956

llvm/test/Transforms/LoopVectorize/AArch64/sve-vscale-based-trip-counts.ll

Lines changed: 12 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -84,8 +84,11 @@ define void @vscale_mul_8(ptr noalias noundef readonly captures(none) %a, ptr n
8484
; CHECK-NEXT: [[WIDE_LOAD3:%.*]] = load <vscale x 4 x float>, ptr [[TMP22]], align 4
8585
; CHECK-NEXT: [[TMP17:%.*]] = fmul <vscale x 4 x float> [[WIDE_LOAD]], [[WIDE_LOAD2]]
8686
; CHECK-NEXT: [[TMP14:%.*]] = fmul <vscale x 4 x float> [[WIDE_LOAD1]], [[WIDE_LOAD3]]
87+
; CHECK-NEXT: [[TMP15:%.*]] = call i64 @llvm.vscale.i64()
88+
; CHECK-NEXT: [[TMP16:%.*]] = mul nuw i64 [[TMP15]], 4
89+
; CHECK-NEXT: [[TMP18:%.*]] = getelementptr inbounds nuw float, ptr [[B]], i64 [[TMP16]]
8790
; CHECK-NEXT: store <vscale x 4 x float> [[TMP17]], ptr [[B]], align 4
88-
; CHECK-NEXT: store <vscale x 4 x float> [[TMP14]], ptr [[TMP22]], align 4
91+
; CHECK-NEXT: store <vscale x 4 x float> [[TMP14]], ptr [[TMP18]], align 4
8992
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[MUL1]], [[N_VEC]]
9093
; CHECK-NEXT: br i1 [[CMP_N]], label %[[FOR_COND_CLEANUP:.*]], label %[[FOR_BODY:.*]]
9194
; CHECK: [[FOR_COND_CLEANUP]]:
@@ -222,16 +225,13 @@ define void @vscale_mul_31(ptr noalias noundef readonly captures(none) %a, ptr n
222225
; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds nuw float, ptr [[B]], i64 [[INDEX]]
223226
; CHECK-NEXT: [[TMP14:%.*]] = call i64 @llvm.vscale.i64()
224227
; CHECK-NEXT: [[TMP15:%.*]] = mul nuw i64 [[TMP14]], 4
225-
; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds nuw float, ptr [[TMP12]], i64 [[TMP15]]
228+
; CHECK-NEXT: [[TMP16:%.*]] = getelementptr float, ptr [[TMP12]], i64 [[TMP15]]
226229
; CHECK-NEXT: [[WIDE_LOAD2:%.*]] = load <vscale x 4 x float>, ptr [[TMP12]], align 4
227230
; CHECK-NEXT: [[WIDE_LOAD3:%.*]] = load <vscale x 4 x float>, ptr [[TMP16]], align 4
228231
; CHECK-NEXT: [[TMP17:%.*]] = fmul <vscale x 4 x float> [[WIDE_LOAD]], [[WIDE_LOAD2]]
229232
; CHECK-NEXT: [[TMP18:%.*]] = fmul <vscale x 4 x float> [[WIDE_LOAD1]], [[WIDE_LOAD3]]
230-
; CHECK-NEXT: [[TMP19:%.*]] = call i64 @llvm.vscale.i64()
231-
; CHECK-NEXT: [[TMP20:%.*]] = mul nuw i64 [[TMP19]], 4
232-
; CHECK-NEXT: [[TMP21:%.*]] = getelementptr inbounds nuw float, ptr [[TMP12]], i64 [[TMP20]]
233233
; CHECK-NEXT: store <vscale x 4 x float> [[TMP17]], ptr [[TMP12]], align 4
234-
; CHECK-NEXT: store <vscale x 4 x float> [[TMP18]], ptr [[TMP21]], align 4
234+
; CHECK-NEXT: store <vscale x 4 x float> [[TMP18]], ptr [[TMP16]], align 4
235235
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP6]]
236236
; CHECK-NEXT: [[TMP22:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
237237
; CHECK-NEXT: br i1 [[TMP22]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
@@ -305,16 +305,13 @@ define void @vscale_mul_64(ptr noalias noundef readonly captures(none) %a, ptr n
305305
; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds nuw float, ptr [[B]], i64 [[INDEX]]
306306
; CHECK-NEXT: [[TMP14:%.*]] = call i64 @llvm.vscale.i64()
307307
; CHECK-NEXT: [[TMP15:%.*]] = mul nuw i64 [[TMP14]], 4
308-
; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds nuw float, ptr [[TMP12]], i64 [[TMP15]]
308+
; CHECK-NEXT: [[TMP16:%.*]] = getelementptr float, ptr [[TMP12]], i64 [[TMP15]]
309309
; CHECK-NEXT: [[WIDE_LOAD2:%.*]] = load <vscale x 4 x float>, ptr [[TMP12]], align 4
310310
; CHECK-NEXT: [[WIDE_LOAD3:%.*]] = load <vscale x 4 x float>, ptr [[TMP16]], align 4
311311
; CHECK-NEXT: [[TMP17:%.*]] = fmul <vscale x 4 x float> [[WIDE_LOAD]], [[WIDE_LOAD2]]
312312
; CHECK-NEXT: [[TMP18:%.*]] = fmul <vscale x 4 x float> [[WIDE_LOAD1]], [[WIDE_LOAD3]]
313-
; CHECK-NEXT: [[TMP19:%.*]] = call i64 @llvm.vscale.i64()
314-
; CHECK-NEXT: [[TMP20:%.*]] = mul nuw i64 [[TMP19]], 4
315-
; CHECK-NEXT: [[TMP21:%.*]] = getelementptr inbounds nuw float, ptr [[TMP12]], i64 [[TMP20]]
316313
; CHECK-NEXT: store <vscale x 4 x float> [[TMP17]], ptr [[TMP12]], align 4
317-
; CHECK-NEXT: store <vscale x 4 x float> [[TMP18]], ptr [[TMP21]], align 4
314+
; CHECK-NEXT: store <vscale x 4 x float> [[TMP18]], ptr [[TMP16]], align 4
318315
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP6]]
319316
; CHECK-NEXT: [[TMP22:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
320317
; CHECK-NEXT: br i1 [[TMP22]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]]
@@ -390,16 +387,13 @@ define void @trip_count_with_overflow(ptr noalias noundef readonly captures(none
390387
; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds nuw float, ptr [[B]], i64 [[INDEX]]
391388
; CHECK-NEXT: [[TMP15:%.*]] = call i64 @llvm.vscale.i64()
392389
; CHECK-NEXT: [[TMP16:%.*]] = mul nuw i64 [[TMP15]], 4
393-
; CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds nuw float, ptr [[TMP13]], i64 [[TMP16]]
390+
; CHECK-NEXT: [[TMP17:%.*]] = getelementptr float, ptr [[TMP13]], i64 [[TMP16]]
394391
; CHECK-NEXT: [[WIDE_LOAD2:%.*]] = load <vscale x 4 x float>, ptr [[TMP13]], align 4
395392
; CHECK-NEXT: [[WIDE_LOAD3:%.*]] = load <vscale x 4 x float>, ptr [[TMP17]], align 4
396393
; CHECK-NEXT: [[TMP18:%.*]] = fmul <vscale x 4 x float> [[WIDE_LOAD]], [[WIDE_LOAD2]]
397394
; CHECK-NEXT: [[TMP19:%.*]] = fmul <vscale x 4 x float> [[WIDE_LOAD1]], [[WIDE_LOAD3]]
398-
; CHECK-NEXT: [[TMP20:%.*]] = call i64 @llvm.vscale.i64()
399-
; CHECK-NEXT: [[TMP21:%.*]] = mul nuw i64 [[TMP20]], 4
400-
; CHECK-NEXT: [[TMP22:%.*]] = getelementptr inbounds nuw float, ptr [[TMP13]], i64 [[TMP21]]
401395
; CHECK-NEXT: store <vscale x 4 x float> [[TMP18]], ptr [[TMP13]], align 4
402-
; CHECK-NEXT: store <vscale x 4 x float> [[TMP19]], ptr [[TMP22]], align 4
396+
; CHECK-NEXT: store <vscale x 4 x float> [[TMP19]], ptr [[TMP17]], align 4
403397
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP7]]
404398
; CHECK-NEXT: [[TMP23:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
405399
; CHECK-NEXT: br i1 [[TMP23]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]]
@@ -471,16 +465,13 @@ define void @trip_count_too_big_for_element_count(ptr noalias noundef readonly c
471465
; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds nuw float, ptr [[B]], i64 [[INDEX]]
472466
; CHECK-NEXT: [[TMP15:%.*]] = call i64 @llvm.vscale.i64()
473467
; CHECK-NEXT: [[TMP16:%.*]] = mul nuw i64 [[TMP15]], 4
474-
; CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds nuw float, ptr [[TMP13]], i64 [[TMP16]]
468+
; CHECK-NEXT: [[TMP17:%.*]] = getelementptr float, ptr [[TMP13]], i64 [[TMP16]]
475469
; CHECK-NEXT: [[WIDE_LOAD2:%.*]] = load <vscale x 4 x float>, ptr [[TMP13]], align 4
476470
; CHECK-NEXT: [[WIDE_LOAD3:%.*]] = load <vscale x 4 x float>, ptr [[TMP17]], align 4
477471
; CHECK-NEXT: [[TMP18:%.*]] = fmul <vscale x 4 x float> [[WIDE_LOAD]], [[WIDE_LOAD2]]
478472
; CHECK-NEXT: [[TMP19:%.*]] = fmul <vscale x 4 x float> [[WIDE_LOAD1]], [[WIDE_LOAD3]]
479-
; CHECK-NEXT: [[TMP20:%.*]] = call i64 @llvm.vscale.i64()
480-
; CHECK-NEXT: [[TMP21:%.*]] = mul nuw i64 [[TMP20]], 4
481-
; CHECK-NEXT: [[TMP22:%.*]] = getelementptr inbounds nuw float, ptr [[TMP13]], i64 [[TMP21]]
482473
; CHECK-NEXT: store <vscale x 4 x float> [[TMP18]], ptr [[TMP13]], align 4
483-
; CHECK-NEXT: store <vscale x 4 x float> [[TMP19]], ptr [[TMP22]], align 4
474+
; CHECK-NEXT: store <vscale x 4 x float> [[TMP19]], ptr [[TMP17]], align 4
484475
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP7]]
485476
; CHECK-NEXT: [[TMP23:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
486477
; CHECK-NEXT: br i1 [[TMP23]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]]

llvm/test/Transforms/LoopVectorize/AArch64/sve-widen-phi.ll

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -148,8 +148,7 @@ define void @widen_2ptrs_phi_unrolled(ptr noalias nocapture %dst, ptr noalias no
148148
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
149149
; CHECK-NEXT: [[OFFSET_IDX:%.*]] = shl i64 [[INDEX]], 2
150150
; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[SRC]], i64 [[OFFSET_IDX]]
151-
; CHECK-NEXT: [[OFFSET_IDX4:%.*]] = shl i64 [[INDEX]], 2
152-
; CHECK-NEXT: [[NEXT_GEP5:%.*]] = getelementptr i8, ptr [[DST]], i64 [[OFFSET_IDX4]]
151+
; CHECK-NEXT: [[NEXT_GEP5:%.*]] = getelementptr i8, ptr [[DST]], i64 [[OFFSET_IDX]]
153152
; CHECK-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64()
154153
; CHECK-NEXT: [[DOTIDX:%.*]] = shl nuw nsw i64 [[TMP7]], 4
155154
; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[NEXT_GEP]], i64 [[DOTIDX]]

llvm/test/Transforms/LoopVectorize/AArch64/transform-narrow-interleave-to-widen-memory-derived-ivs.ll

Lines changed: 4 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -165,8 +165,7 @@ define void @derived_pointer_ivs(ptr noalias %a, ptr noalias %b, ptr %end) {
165165
; VF2-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
166166
; VF2-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 16
167167
; VF2-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[A]], i64 [[OFFSET_IDX]]
168-
; VF2-NEXT: [[OFFSET_IDX6:%.*]] = mul i64 [[INDEX]], 16
169-
; VF2-NEXT: [[NEXT_GEP7:%.*]] = getelementptr i8, ptr [[B]], i64 [[OFFSET_IDX6]]
168+
; VF2-NEXT: [[NEXT_GEP7:%.*]] = getelementptr i8, ptr [[B]], i64 [[OFFSET_IDX]]
170169
; VF2-NEXT: [[WIDE_LOAD:%.*]] = load <2 x double>, ptr [[NEXT_GEP]], align 8
171170
; VF2-NEXT: store <2 x double> [[WIDE_LOAD]], ptr [[NEXT_GEP7]], align 8
172171
; VF2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 1
@@ -216,10 +215,8 @@ define void @derived_pointer_ivs(ptr noalias %a, ptr noalias %b, ptr %end) {
216215
; VF2IC2-NEXT: [[TMP13:%.*]] = add i64 [[OFFSET_IDX]], 16
217216
; VF2IC2-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[A]], i64 [[OFFSET_IDX]]
218217
; VF2IC2-NEXT: [[NEXT_GEP6:%.*]] = getelementptr i8, ptr [[A]], i64 [[TMP13]]
219-
; VF2IC2-NEXT: [[OFFSET_IDX7:%.*]] = mul i64 [[INDEX]], 16
220-
; VF2IC2-NEXT: [[TMP14:%.*]] = add i64 [[OFFSET_IDX7]], 16
221-
; VF2IC2-NEXT: [[NEXT_GEP8:%.*]] = getelementptr i8, ptr [[B]], i64 [[OFFSET_IDX7]]
222-
; VF2IC2-NEXT: [[NEXT_GEP9:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP14]]
218+
; VF2IC2-NEXT: [[NEXT_GEP8:%.*]] = getelementptr i8, ptr [[B]], i64 [[OFFSET_IDX]]
219+
; VF2IC2-NEXT: [[NEXT_GEP9:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP13]]
223220
; VF2IC2-NEXT: [[WIDE_LOAD:%.*]] = load <2 x double>, ptr [[NEXT_GEP]], align 8
224221
; VF2IC2-NEXT: [[WIDE_LOAD10:%.*]] = load <2 x double>, ptr [[NEXT_GEP6]], align 8
225222
; VF2IC2-NEXT: store <2 x double> [[WIDE_LOAD]], ptr [[NEXT_GEP8]], align 8
@@ -269,8 +266,7 @@ define void @derived_pointer_ivs(ptr noalias %a, ptr noalias %b, ptr %end) {
269266
; VF4-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
270267
; VF4-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 16
271268
; VF4-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[A]], i64 [[OFFSET_IDX]]
272-
; VF4-NEXT: [[OFFSET_IDX6:%.*]] = mul i64 [[INDEX]], 16
273-
; VF4-NEXT: [[NEXT_GEP7:%.*]] = getelementptr i8, ptr [[B]], i64 [[OFFSET_IDX6]]
269+
; VF4-NEXT: [[NEXT_GEP7:%.*]] = getelementptr i8, ptr [[B]], i64 [[OFFSET_IDX]]
274270
; VF4-NEXT: [[WIDE_VEC:%.*]] = load <8 x double>, ptr [[NEXT_GEP]], align 8
275271
; VF4-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <8 x double> [[WIDE_VEC]], <8 x double> poison, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
276272
; VF4-NEXT: [[STRIDED_VEC8:%.*]] = shufflevector <8 x double> [[WIDE_VEC]], <8 x double> poison, <4 x i32> <i32 1, i32 3, i32 5, i32 7>

llvm/test/Transforms/LoopVectorize/AArch64/transform-narrow-interleave-to-widen-memory-with-wide-ops.ll

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -972,8 +972,7 @@ define void @test_2xi64_sub_of_wide_loads(ptr noalias %data, ptr noalias %A, ptr
972972
; VF4-NEXT: [[TMP5:%.*]] = sub <4 x i64> [[WIDE_LOAD]], [[WIDE_LOAD1]]
973973
; VF4-NEXT: [[TMP6:%.*]] = shl nsw i64 [[TMP0]], 1
974974
; VF4-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[DATA]], i64 [[TMP6]]
975-
; VF4-NEXT: [[TMP8:%.*]] = sub <4 x i64> [[WIDE_LOAD]], [[WIDE_LOAD1]]
976-
; VF4-NEXT: [[TMP9:%.*]] = shufflevector <4 x i64> [[TMP5]], <4 x i64> [[TMP8]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
975+
; VF4-NEXT: [[TMP9:%.*]] = shufflevector <4 x i64> [[TMP5]], <4 x i64> [[TMP5]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
977976
; VF4-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <8 x i64> [[TMP9]], <8 x i64> poison, <8 x i32> <i32 0, i32 4, i32 1, i32 5, i32 2, i32 6, i32 3, i32 7>
978977
; VF4-NEXT: store <8 x i64> [[INTERLEAVED_VEC]], ptr [[TMP7]], align 8
979978
; VF4-NEXT: [[INDEX_NEXT]] = add nuw i64 [[TMP0]], 4

llvm/test/Transforms/LoopVectorize/dead_instructions.ll

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -148,13 +148,13 @@ define void @dead_load_and_vector_pointer(ptr %a, ptr %b) {
148148
; CHECK: [[VECTOR_BODY]]:
149149
; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
150150
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[A]], i32 [[INDEX]]
151-
; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i32 2
151+
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i32, ptr [[TMP2]], i32 2
152152
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i32>, ptr [[TMP2]], align 8, !alias.scope [[META6:![0-9]+]], !noalias [[META9:![0-9]+]]
153-
; CHECK-NEXT: [[WIDE_LOAD2:%.*]] = load <2 x i32>, ptr [[TMP5]], align 8, !alias.scope [[META6]], !noalias [[META9]]
153+
; CHECK-NEXT: [[WIDE_LOAD2:%.*]] = load <2 x i32>, ptr [[TMP1]], align 8, !alias.scope [[META6]], !noalias [[META9]]
154154
; CHECK-NEXT: [[TMP6:%.*]] = add <2 x i32> [[WIDE_LOAD]], splat (i32 1)
155155
; CHECK-NEXT: [[TMP7:%.*]] = add <2 x i32> [[WIDE_LOAD2]], splat (i32 1)
156156
; CHECK-NEXT: store <2 x i32> [[TMP6]], ptr [[TMP2]], align 4, !alias.scope [[META6]], !noalias [[META9]]
157-
; CHECK-NEXT: store <2 x i32> [[TMP7]], ptr [[TMP5]], align 4, !alias.scope [[META6]], !noalias [[META9]]
157+
; CHECK-NEXT: store <2 x i32> [[TMP7]], ptr [[TMP1]], align 4, !alias.scope [[META6]], !noalias [[META9]]
158158
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4
159159
; CHECK-NEXT: [[TMP12:%.*]] = icmp eq i32 [[INDEX_NEXT]], 128
160160
; CHECK-NEXT: br i1 [[TMP12]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP11:![0-9]+]]

0 commit comments

Comments
 (0)