Skip to content

Commit b0f2110

Browse files
committed
[VPlan] Remove VPVectorPointer for part 0 after unrolling.
VPVectorPointer for part 0 is just the pointer operand. Simplify it after unrolling. This removes a large number of redundant GEPs with index 0.
1 parent 6201761 commit b0f2110

File tree

289 files changed

+3797
-6032
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

289 files changed

+3797
-6032
lines changed

llvm/lib/Transforms/Vectorize/VPlan.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1835,6 +1835,8 @@ class VPVectorPointerRecipe : public VPRecipeWithIRFlags,
18351835
getGEPNoWrapFlags(), getDebugLoc());
18361836
}
18371837

1838+
bool isPart0() { return getUnrollPart(*this) == 0; }
1839+
18381840
/// Return the cost of this VPHeaderPHIRecipe.
18391841
InstructionCost computeCost(ElementCount VF,
18401842
VPCostContext &Ctx) const override {

llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1015,6 +1015,14 @@ static void simplifyRecipe(VPRecipeBase &R, VPTypeAnalysis &TypeInfo) {
10151015
if (Op->isLiveIn())
10161016
PredPHI->replaceAllUsesWith(Op);
10171017
}
1018+
if (auto *VecPtr= dyn_cast<VPVectorPointerRecipe>(&R)) {
1019+
if (VecPtr->getParent()->getPlan()->isUnrolled() && VecPtr->isPart0()) {
1020+
VecPtr->replaceAllUsesWith(VecPtr->getOperand(0));
1021+
return;
1022+
}
1023+
}
1024+
1025+
10181026

10191027
VPValue *A;
10201028
if (match(Def, m_Trunc(m_ZExtOrSExt(m_VPValue(A))))) {

llvm/test/Transforms/LoopVectorize/AArch64/blend-costs.ll

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -22,8 +22,7 @@ define void @test_blend_feeding_replicated_store_1(i64 %N, ptr noalias %src, ptr
2222
; CHECK: [[VECTOR_BODY]]:
2323
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_STORE_CONTINUE30:.*]] ]
2424
; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i64 [[INDEX]]
25-
; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[TMP4]], i32 0
26-
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <16 x i32>, ptr [[TMP5]], align 4
25+
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <16 x i32>, ptr [[TMP4]], align 4
2726
; CHECK-NEXT: [[TMP6:%.*]] = icmp slt <16 x i32> [[WIDE_LOAD]], zeroinitializer
2827
; CHECK-NEXT: [[TMP7:%.*]] = select <16 x i1> [[TMP6]], <16 x i1> zeroinitializer, <16 x i1> zeroinitializer
2928
; CHECK-NEXT: [[TMP8:%.*]] = xor <16 x i1> [[TMP6]], splat (i1 true)
@@ -213,8 +212,7 @@ define void @test_blend_feeding_replicated_store_2(ptr noalias %src, ptr %dst, i
213212
; CHECK: [[VECTOR_BODY]]:
214213
; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_STORE_CONTINUE30:.*]] ]
215214
; CHECK-NEXT: [[GEP_SRC:%.*]] = getelementptr inbounds i8, ptr [[SRC]], i32 [[IV]]
216-
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i8, ptr [[GEP_SRC]], i32 0
217-
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <16 x i8>, ptr [[TMP2]], align 1
215+
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <16 x i8>, ptr [[GEP_SRC]], align 1
218216
; CHECK-NEXT: [[TMP3:%.*]] = icmp eq <16 x i8> [[WIDE_LOAD]], zeroinitializer
219217
; CHECK-NEXT: [[TMP4:%.*]] = xor <16 x i1> [[TMP3]], splat (i1 true)
220218
; CHECK-NEXT: [[TMP6:%.*]] = select <16 x i1> [[TMP4]], <16 x i1> [[TMP5]], <16 x i1> zeroinitializer

llvm/test/Transforms/LoopVectorize/AArch64/call-costs.ll

Lines changed: 4 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -14,18 +14,16 @@ define void @fshl_operand_first_order_recurrence(ptr %dst, ptr noalias %src) {
1414
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
1515
; CHECK-NEXT: [[VECTOR_RECUR:%.*]] = phi <2 x i64> [ <i64 poison, i64 0>, %[[VECTOR_PH]] ], [ [[WIDE_LOAD1:%.*]], %[[VECTOR_BODY]] ]
1616
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[SRC]], i64 [[INDEX]]
17-
; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[TMP2]], i32 0
1817
; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[TMP2]], i32 2
19-
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i64>, ptr [[TMP4]], align 8
18+
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i64>, ptr [[TMP2]], align 8
2019
; CHECK-NEXT: [[WIDE_LOAD1]] = load <2 x i64>, ptr [[TMP5]], align 8
2120
; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <2 x i64> [[VECTOR_RECUR]], <2 x i64> [[WIDE_LOAD]], <2 x i32> <i32 1, i32 2>
2221
; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <2 x i64> [[WIDE_LOAD]], <2 x i64> [[WIDE_LOAD1]], <2 x i32> <i32 1, i32 2>
2322
; CHECK-NEXT: [[TMP8:%.*]] = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> splat (i64 1), <2 x i64> [[TMP6]], <2 x i64> splat (i64 1))
2423
; CHECK-NEXT: [[TMP9:%.*]] = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> splat (i64 1), <2 x i64> [[TMP7]], <2 x i64> splat (i64 1))
2524
; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i64, ptr [[DST]], i64 [[INDEX]]
26-
; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds i64, ptr [[TMP10]], i32 0
2725
; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds i64, ptr [[TMP10]], i32 2
28-
; CHECK-NEXT: store <2 x i64> [[TMP8]], ptr [[TMP12]], align 8
26+
; CHECK-NEXT: store <2 x i64> [[TMP8]], ptr [[TMP10]], align 8
2927
; CHECK-NEXT: store <2 x i64> [[TMP9]], ptr [[TMP13]], align 8
3028
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
3129
; CHECK-NEXT: [[TMP14:%.*]] = icmp eq i64 [[INDEX_NEXT]], 100
@@ -79,11 +77,9 @@ define void @powi_call(ptr %P) {
7977
; CHECK: [[VECTOR_PH]]:
8078
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
8179
; CHECK: [[VECTOR_BODY]]:
82-
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds double, ptr [[P]], i32 0
83-
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x double>, ptr [[TMP2]], align 8
80+
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x double>, ptr [[P]], align 8
8481
; CHECK-NEXT: [[TMP3:%.*]] = call <2 x double> @llvm.powi.v2f64.i32(<2 x double> [[WIDE_LOAD]], i32 3)
85-
; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds double, ptr [[P]], i32 0
86-
; CHECK-NEXT: store <2 x double> [[TMP3]], ptr [[TMP4]], align 8
82+
; CHECK-NEXT: store <2 x double> [[TMP3]], ptr [[P]], align 8
8783
; CHECK-NEXT: br label %[[MIDDLE_BLOCK:.*]]
8884
; CHECK: [[MIDDLE_BLOCK]]:
8985
; CHECK-NEXT: br i1 true, label %[[EXIT:.*]], label %[[SCALAR_PH]]

llvm/test/Transforms/LoopVectorize/AArch64/clamped-trip-count.ll

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -33,8 +33,7 @@ define void @clamped_tc_8(ptr nocapture %dst, i32 %n, i64 %val) vscale_range(1,1
3333
; CHECK-NEXT: [[TMP10:%.*]] = shl nuw nsw <vscale x 8 x i64> [[VEC_IND]], splat (i64 3)
3434
; CHECK-NEXT: [[TMP11:%.*]] = lshr <vscale x 8 x i64> [[BROADCAST_SPLAT]], [[TMP10]]
3535
; CHECK-NEXT: [[TMP14:%.*]] = trunc <vscale x 8 x i64> [[TMP11]] to <vscale x 8 x i8>
36-
; CHECK-NEXT: [[TMP17:%.*]] = getelementptr i8, ptr [[NEXT_GEP]], i32 0
37-
; CHECK-NEXT: call void @llvm.masked.store.nxv8i8.p0(<vscale x 8 x i8> [[TMP14]], ptr [[TMP17]], i32 1, <vscale x 8 x i1> [[ACTIVE_LANE_MASK]])
36+
; CHECK-NEXT: call void @llvm.masked.store.nxv8i8.p0(<vscale x 8 x i8> [[TMP14]], ptr [[NEXT_GEP]], i32 1, <vscale x 8 x i1> [[ACTIVE_LANE_MASK]])
3837
; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP6]]
3938
; CHECK-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call <vscale x 8 x i1> @llvm.get.active.lane.mask.nxv8i1.i64(i64 [[INDEX_NEXT]], i64 8)
4039
; CHECK-NEXT: [[VEC_IND_NEXT]] = add <vscale x 8 x i64> [[VEC_IND]], [[DOTSPLAT]]
@@ -117,8 +116,7 @@ define void @clamped_tc_max_8(ptr nocapture %dst, i32 %n, i64 %val) vscale_range
117116
; CHECK-NEXT: [[TMP10:%.*]] = shl nuw nsw <vscale x 8 x i64> [[VEC_IND]], splat (i64 3)
118117
; CHECK-NEXT: [[TMP11:%.*]] = lshr <vscale x 8 x i64> [[BROADCAST_SPLAT]], [[TMP10]]
119118
; CHECK-NEXT: [[TMP14:%.*]] = trunc <vscale x 8 x i64> [[TMP11]] to <vscale x 8 x i8>
120-
; CHECK-NEXT: [[TMP17:%.*]] = getelementptr i8, ptr [[NEXT_GEP]], i32 0
121-
; CHECK-NEXT: call void @llvm.masked.store.nxv8i8.p0(<vscale x 8 x i8> [[TMP14]], ptr [[TMP17]], i32 1, <vscale x 8 x i1> [[ACTIVE_LANE_MASK]])
119+
; CHECK-NEXT: call void @llvm.masked.store.nxv8i8.p0(<vscale x 8 x i8> [[TMP14]], ptr [[NEXT_GEP]], i32 1, <vscale x 8 x i1> [[ACTIVE_LANE_MASK]])
122120
; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP6]]
123121
; CHECK-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call <vscale x 8 x i1> @llvm.get.active.lane.mask.nxv8i1.i64(i64 [[INDEX_NEXT]], i64 [[WIDE_TRIP_COUNT]])
124122
; CHECK-NEXT: [[VEC_IND_NEXT]] = add <vscale x 8 x i64> [[VEC_IND]], [[DOTSPLAT]]

llvm/test/Transforms/LoopVectorize/AArch64/conditional-branches-cost.ll

Lines changed: 6 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -82,9 +82,8 @@ define void @loop_dependent_cond(ptr %src, ptr noalias %dst, i64 %N) {
8282
; DEFAULT: [[VECTOR_BODY]]:
8383
; DEFAULT-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_STORE_CONTINUE7:.*]] ]
8484
; DEFAULT-NEXT: [[TMP3:%.*]] = getelementptr double, ptr [[SRC]], i64 [[INDEX]]
85-
; DEFAULT-NEXT: [[TMP5:%.*]] = getelementptr double, ptr [[TMP3]], i32 0
8685
; DEFAULT-NEXT: [[TMP6:%.*]] = getelementptr double, ptr [[TMP3]], i32 2
87-
; DEFAULT-NEXT: [[WIDE_LOAD:%.*]] = load <2 x double>, ptr [[TMP5]], align 8
86+
; DEFAULT-NEXT: [[WIDE_LOAD:%.*]] = load <2 x double>, ptr [[TMP3]], align 8
8887
; DEFAULT-NEXT: [[WIDE_LOAD1:%.*]] = load <2 x double>, ptr [[TMP6]], align 8
8988
; DEFAULT-NEXT: [[TMP7:%.*]] = call <2 x double> @llvm.fabs.v2f64(<2 x double> [[WIDE_LOAD]])
9089
; DEFAULT-NEXT: [[TMP8:%.*]] = call <2 x double> @llvm.fabs.v2f64(<2 x double> [[WIDE_LOAD1]])
@@ -341,9 +340,8 @@ define void @latch_branch_cost(ptr %dst) {
341340
; DEFAULT: [[VECTOR_BODY]]:
342341
; DEFAULT-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
343342
; DEFAULT-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[DST]], i64 [[INDEX]]
344-
; DEFAULT-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[TMP2]], i32 0
345343
; DEFAULT-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[TMP2]], i32 16
346-
; DEFAULT-NEXT: store <16 x i8> zeroinitializer, ptr [[TMP6]], align 1
344+
; DEFAULT-NEXT: store <16 x i8> zeroinitializer, ptr [[TMP2]], align 1
347345
; DEFAULT-NEXT: store <16 x i8> zeroinitializer, ptr [[TMP5]], align 1
348346
; DEFAULT-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 32
349347
; DEFAULT-NEXT: [[TMP4:%.*]] = icmp eq i64 [[INDEX_NEXT]], 96
@@ -358,8 +356,7 @@ define void @latch_branch_cost(ptr %dst) {
358356
; DEFAULT: [[VEC_EPILOG_VECTOR_BODY]]:
359357
; DEFAULT-NEXT: [[INDEX1:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], %[[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT2:%.*]], %[[VEC_EPILOG_VECTOR_BODY]] ]
360358
; DEFAULT-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[DST]], i64 [[INDEX1]]
361-
; DEFAULT-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[TMP8]], i32 0
362-
; DEFAULT-NEXT: store <4 x i8> zeroinitializer, ptr [[TMP9]], align 1
359+
; DEFAULT-NEXT: store <4 x i8> zeroinitializer, ptr [[TMP8]], align 1
363360
; DEFAULT-NEXT: [[INDEX_NEXT2]] = add nuw i64 [[INDEX1]], 4
364361
; DEFAULT-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT2]], 100
365362
; DEFAULT-NEXT: br i1 [[TMP10]], label %[[VEC_EPILOG_MIDDLE_BLOCK:.*]], label %[[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]]
@@ -575,8 +572,7 @@ define i32 @header_mask_and_invariant_compare(ptr %A, ptr %B, ptr %C, ptr %D, pt
575572
; DEFAULT-NEXT: store i32 [[TMP22]], ptr [[E]], align 4, !alias.scope [[META14]], !noalias [[META16]]
576573
; DEFAULT-NEXT: br label %[[PRED_STORE_CONTINUE37]]
577574
; DEFAULT: [[PRED_STORE_CONTINUE37]]:
578-
; DEFAULT-NEXT: [[TMP17:%.*]] = getelementptr i32, ptr [[TMP16]], i32 0
579-
; DEFAULT-NEXT: call void @llvm.masked.store.v4i32.p0(<4 x i32> zeroinitializer, ptr [[TMP17]], i32 4, <4 x i1> [[TMP8]]), !alias.scope [[META18:![0-9]+]], !noalias [[META19:![0-9]+]]
575+
; DEFAULT-NEXT: call void @llvm.masked.store.v4i32.p0(<4 x i32> zeroinitializer, ptr [[TMP16]], i32 4, <4 x i1> [[TMP8]]), !alias.scope [[META18:![0-9]+]], !noalias [[META19:![0-9]+]]
580576
; DEFAULT-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
581577
; DEFAULT-NEXT: [[TMP18:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
582578
; DEFAULT-NEXT: br i1 [[TMP18]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP20:![0-9]+]]
@@ -674,8 +670,7 @@ define void @multiple_exit_conditions(ptr %src, ptr noalias %dst) #1 {
674670
; DEFAULT-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <8 x i16> [[BROADCAST_SPLATINSERT]], <8 x i16> poison, <8 x i32> zeroinitializer
675671
; DEFAULT-NEXT: [[TMP2:%.*]] = or <8 x i16> [[BROADCAST_SPLAT]], splat (i16 1)
676672
; DEFAULT-NEXT: [[TMP3:%.*]] = uitofp <8 x i16> [[TMP2]] to <8 x double>
677-
; DEFAULT-NEXT: [[TMP4:%.*]] = getelementptr double, ptr [[NEXT_GEP]], i32 0
678-
; DEFAULT-NEXT: store <8 x double> [[TMP3]], ptr [[TMP4]], align 8
673+
; DEFAULT-NEXT: store <8 x double> [[TMP3]], ptr [[NEXT_GEP]], align 8
679674
; DEFAULT-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
680675
; DEFAULT-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], 256
681676
; DEFAULT-NEXT: br i1 [[TMP5]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP22:![0-9]+]]
@@ -730,8 +725,7 @@ define void @multiple_exit_conditions(ptr %src, ptr noalias %dst) #1 {
730725
; PRED-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 2 x i16> [[BROADCAST_SPLATINSERT]], <vscale x 2 x i16> poison, <vscale x 2 x i32> zeroinitializer
731726
; PRED-NEXT: [[TMP13:%.*]] = or <vscale x 2 x i16> [[BROADCAST_SPLAT]], splat (i16 1)
732727
; PRED-NEXT: [[TMP14:%.*]] = uitofp <vscale x 2 x i16> [[TMP13]] to <vscale x 2 x double>
733-
; PRED-NEXT: [[TMP15:%.*]] = getelementptr double, ptr [[NEXT_GEP]], i32 0
734-
; PRED-NEXT: call void @llvm.masked.store.nxv2f64.p0(<vscale x 2 x double> [[TMP14]], ptr [[TMP15]], i32 8, <vscale x 2 x i1> [[ACTIVE_LANE_MASK]])
728+
; PRED-NEXT: call void @llvm.masked.store.nxv2f64.p0(<vscale x 2 x double> [[TMP14]], ptr [[NEXT_GEP]], i32 8, <vscale x 2 x i1> [[ACTIVE_LANE_MASK]])
735729
; PRED-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP5]]
736730
; PRED-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call <vscale x 2 x i1> @llvm.get.active.lane.mask.nxv2i1.i64(i64 [[INDEX]], i64 [[TMP10]])
737731
; PRED-NEXT: [[TMP16:%.*]] = xor <vscale x 2 x i1> [[ACTIVE_LANE_MASK_NEXT]], splat (i1 true)

llvm/test/Transforms/LoopVectorize/AArch64/divs-with-scalable-vfs.ll

Lines changed: 3 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -38,11 +38,10 @@ define void @sdiv_feeding_gep(ptr %dst, i32 %x, i64 %M, i64 %conv6, i64 %N) {
3838
; CHECK-NEXT: [[TMP30:%.*]] = add i32 [[TMP28]], [[TMP26]]
3939
; CHECK-NEXT: [[TMP32:%.*]] = sext i32 [[TMP30]] to i64
4040
; CHECK-NEXT: [[TMP34:%.*]] = getelementptr double, ptr [[DST]], i64 [[TMP32]]
41-
; CHECK-NEXT: [[TMP36:%.*]] = getelementptr double, ptr [[TMP34]], i32 0
4241
; CHECK-NEXT: [[TMP37:%.*]] = call i64 @llvm.vscale.i64()
4342
; CHECK-NEXT: [[TMP38:%.*]] = mul nuw i64 [[TMP37]], 2
4443
; CHECK-NEXT: [[TMP39:%.*]] = getelementptr double, ptr [[TMP34]], i64 [[TMP38]]
45-
; CHECK-NEXT: store <vscale x 2 x double> zeroinitializer, ptr [[TMP36]], align 8
44+
; CHECK-NEXT: store <vscale x 2 x double> zeroinitializer, ptr [[TMP34]], align 8
4645
; CHECK-NEXT: store <vscale x 2 x double> zeroinitializer, ptr [[TMP39]], align 8
4746
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP11]]
4847
; CHECK-NEXT: [[TMP40:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
@@ -149,8 +148,7 @@ define void @sdiv_feeding_gep_predicated(ptr %dst, i32 %x, i64 %M, i64 %conv6, i
149148
; CHECK-NEXT: [[TMP32:%.*]] = add i32 [[TMP31]], [[TMP30]]
150149
; CHECK-NEXT: [[TMP33:%.*]] = sext i32 [[TMP32]] to i64
151150
; CHECK-NEXT: [[TMP34:%.*]] = getelementptr double, ptr [[DST]], i64 [[TMP33]]
152-
; CHECK-NEXT: [[TMP35:%.*]] = getelementptr double, ptr [[TMP34]], i32 0
153-
; CHECK-NEXT: call void @llvm.masked.store.nxv2f64.p0(<vscale x 2 x double> zeroinitializer, ptr [[TMP35]], i32 8, <vscale x 2 x i1> [[TMP23]])
151+
; CHECK-NEXT: call void @llvm.masked.store.nxv2f64.p0(<vscale x 2 x double> zeroinitializer, ptr [[TMP34]], i32 8, <vscale x 2 x i1> [[TMP23]])
154152
; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP9]]
155153
; CHECK-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call <vscale x 2 x i1> @llvm.get.active.lane.mask.nxv2i1.i64(i64 [[INDEX]], i64 [[TMP14]])
156154
; CHECK-NEXT: [[TMP36:%.*]] = xor <vscale x 2 x i1> [[ACTIVE_LANE_MASK_NEXT]], splat (i1 true)
@@ -275,8 +273,7 @@ define void @udiv_urem_feeding_gep(i64 %x, ptr %dst, i64 %N) {
275273
; CHECK-NEXT: [[TMP36:%.*]] = shl i64 [[TMP35]], 32
276274
; CHECK-NEXT: [[TMP37:%.*]] = ashr i64 [[TMP36]], 32
277275
; CHECK-NEXT: [[TMP38:%.*]] = getelementptr i64, ptr [[DST]], i64 [[TMP37]]
278-
; CHECK-NEXT: [[TMP39:%.*]] = getelementptr i64, ptr [[TMP38]], i32 0
279-
; CHECK-NEXT: call void @llvm.masked.store.nxv2i64.p0(<vscale x 2 x i64> [[TMP23]], ptr [[TMP39]], i32 4, <vscale x 2 x i1> [[ACTIVE_LANE_MASK]])
276+
; CHECK-NEXT: call void @llvm.masked.store.nxv2i64.p0(<vscale x 2 x i64> [[TMP23]], ptr [[TMP38]], i32 4, <vscale x 2 x i1> [[ACTIVE_LANE_MASK]])
280277
; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP9]]
281278
; CHECK-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call <vscale x 2 x i1> @llvm.get.active.lane.mask.nxv2i1.i64(i64 [[INDEX]], i64 [[TMP14]])
282279
; CHECK-NEXT: [[TMP47:%.*]] = xor <vscale x 2 x i1> [[ACTIVE_LANE_MASK_NEXT]], splat (i1 true)

llvm/test/Transforms/LoopVectorize/AArch64/drop-poison-generating-flags.ll

Lines changed: 4 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -15,15 +15,13 @@ define void @check_widen_intrinsic_with_nnan(ptr noalias %dst.0, ptr noalias %ds
1515
; CHECK: [[VECTOR_BODY]]:
1616
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_LOAD_CONTINUE6:.*]] ]
1717
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds double, ptr [[SRC_1]], i64 [[INDEX]]
18-
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds double, ptr [[TMP1]], i32 0
19-
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x double>, ptr [[TMP2]], align 8
18+
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x double>, ptr [[TMP1]], align 8
2019
; CHECK-NEXT: [[TMP3:%.*]] = call <4 x double> @llvm.fabs.v4f64(<4 x double> [[WIDE_LOAD]])
2120
; CHECK-NEXT: [[TMP4:%.*]] = fcmp olt <4 x double> [[TMP3]], splat (double 1.000000e+00)
2221
; CHECK-NEXT: [[TMP5:%.*]] = xor <4 x i1> [[TMP4]], splat (i1 true)
2322
; CHECK-NEXT: [[TMP6:%.*]] = add i64 [[INDEX]], -1
2423
; CHECK-NEXT: [[TMP7:%.*]] = getelementptr double, ptr [[DST_0]], i64 [[TMP6]]
25-
; CHECK-NEXT: [[TMP8:%.*]] = getelementptr double, ptr [[TMP7]], i32 0
26-
; CHECK-NEXT: call void @llvm.masked.store.v4f64.p0(<4 x double> zeroinitializer, ptr [[TMP8]], i32 8, <4 x i1> [[TMP5]])
24+
; CHECK-NEXT: call void @llvm.masked.store.v4f64.p0(<4 x double> zeroinitializer, ptr [[TMP7]], i32 8, <4 x i1> [[TMP5]])
2725
; CHECK-NEXT: [[TMP9:%.*]] = extractelement <4 x i1> [[TMP4]], i32 0
2826
; CHECK-NEXT: br i1 [[TMP9]], label %[[PRED_LOAD_IF:.*]], label %[[PRED_LOAD_CONTINUE:.*]]
2927
; CHECK: [[PRED_LOAD_IF]]:
@@ -58,16 +56,14 @@ define void @check_widen_intrinsic_with_nnan(ptr noalias %dst.0, ptr noalias %ds
5856
; CHECK-NEXT: [[TMP24:%.*]] = phi <4 x double> [ [[TMP20]], %[[PRED_LOAD_CONTINUE4]] ], [ [[TMP23]], %[[PRED_LOAD_IF5]] ]
5957
; CHECK-NEXT: [[TMP25:%.*]] = add i64 [[INDEX]], -1
6058
; CHECK-NEXT: [[TMP26:%.*]] = getelementptr double, ptr [[DST_0]], i64 [[TMP25]]
61-
; CHECK-NEXT: [[TMP27:%.*]] = getelementptr double, ptr [[TMP26]], i32 0
62-
; CHECK-NEXT: call void @llvm.masked.store.v4f64.p0(<4 x double> zeroinitializer, ptr [[TMP27]], i32 8, <4 x i1> [[TMP4]])
59+
; CHECK-NEXT: call void @llvm.masked.store.v4f64.p0(<4 x double> zeroinitializer, ptr [[TMP26]], i32 8, <4 x i1> [[TMP4]])
6360
; CHECK-NEXT: [[TMP28:%.*]] = fcmp oeq <4 x double> [[TMP24]], zeroinitializer
6461
; CHECK-NEXT: [[TMP29:%.*]] = select <4 x i1> [[TMP4]], <4 x i1> [[TMP28]], <4 x i1> zeroinitializer
6562
; CHECK-NEXT: [[TMP30:%.*]] = or <4 x i1> [[TMP5]], [[TMP29]]
6663
; CHECK-NEXT: [[TMP31:%.*]] = extractelement <4 x i1> [[TMP29]], i32 0
6764
; CHECK-NEXT: [[PREDPHI:%.*]] = select i1 [[TMP31]], i64 [[TMP25]], i64 [[TMP6]]
6865
; CHECK-NEXT: [[TMP32:%.*]] = getelementptr i32, ptr [[DST_1]], i64 [[PREDPHI]]
69-
; CHECK-NEXT: [[TMP33:%.*]] = getelementptr i32, ptr [[TMP32]], i32 0
70-
; CHECK-NEXT: call void @llvm.masked.store.v4i32.p0(<4 x i32> splat (i32 10), ptr [[TMP33]], i32 4, <4 x i1> [[TMP30]])
66+
; CHECK-NEXT: call void @llvm.masked.store.v4i32.p0(<4 x i32> splat (i32 10), ptr [[TMP32]], i32 4, <4 x i1> [[TMP30]])
7167
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
7268
; CHECK-NEXT: [[TMP34:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1000
7369
; CHECK-NEXT: br i1 [[TMP34]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]

llvm/test/Transforms/LoopVectorize/AArch64/eliminate-tail-predication.ll

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -23,11 +23,10 @@ define void @f1(ptr %A) #0 {
2323
; CHECK: vector.body:
2424
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
2525
; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[INDEX]]
26-
; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[TMP6]], i32 0
27-
; CHECK-NEXT: store <vscale x 4 x i32> splat (i32 1), ptr [[TMP7]], align 4
26+
; CHECK-NEXT: store <vscale x 4 x i32> splat (i32 1), ptr [[TMP6]], align 4
2827
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]]
29-
; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
30-
; CHECK-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
28+
; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
29+
; CHECK-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
3130
; CHECK: middle.block:
3231
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 1024, [[N_VEC]]
3332
; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]]

0 commit comments

Comments
 (0)