Skip to content

Commit 522b476

Browse files
[InstCombine] Match intrinsic recurrences when known to be hoisted
For value-accumulating recurrences of kind: ``` %umax.acc = phi i8 [ %umax, %backedge ], [ %a, %entry ] %umax = call i8 @llvm.umax.i8(i8 %umax.acc, i8 %b) ``` The binary intrinsic may be simplified into an intrinsic with init value and the other operand, if the latter is loop-invariant: ``` %umax = call i8 @llvm.umax.i8(i8 %a, i8 %b) ``` Fixes: #145875.
1 parent 2ead2f9 commit 522b476

File tree

3 files changed

+55
-25
lines changed

3 files changed

+55
-25
lines changed

llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp

Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1532,6 +1532,41 @@ static Instruction *foldBitOrderCrossLogicOp(Value *V,
15321532
return nullptr;
15331533
}
15341534

1535+
static Value *foldBinaryIntrinsicRecurrence(InstCombinerImpl &IC,
1536+
IntrinsicInst *II) {
1537+
PHINode *PN;
1538+
Value *Init, *OtherOp;
1539+
1540+
// A binary intrinsic recurrence with loop-invariant operands is equivalent to
1541+
// `call @llvm.binary.intrinsic(Init, OtherOp)`.
1542+
if (!matchSimpleBinaryIntrinsicRecurrence(II, PN, Init, OtherOp) ||
1543+
!IC.getDominatorTree().dominates(OtherOp, PN))
1544+
return nullptr;
1545+
1546+
auto IID = II->getIntrinsicID();
1547+
switch (IID) {
1548+
case Intrinsic::maxnum:
1549+
case Intrinsic::minnum:
1550+
case Intrinsic::maximum:
1551+
case Intrinsic::minimum:
1552+
case Intrinsic::maximumnum:
1553+
case Intrinsic::minimumnum:
1554+
case Intrinsic::smax:
1555+
case Intrinsic::smin:
1556+
case Intrinsic::umax:
1557+
case Intrinsic::umin:
1558+
break;
1559+
default:
1560+
return nullptr;
1561+
}
1562+
1563+
auto *InvariantBinaryInst =
1564+
IC.Builder.CreateBinaryIntrinsic(IID, Init, OtherOp);
1565+
if (isa<FPMathOperator>(InvariantBinaryInst))
1566+
cast<Instruction>(InvariantBinaryInst)->copyFastMathFlags(II);
1567+
return InvariantBinaryInst;
1568+
}
1569+
15351570
static Value *simplifyReductionOperand(Value *Arg, bool CanReorderLanes) {
15361571
if (!CanReorderLanes)
15371572
return nullptr;
@@ -3906,6 +3941,14 @@ Instruction *InstCombinerImpl::visitCallInst(CallInst &CI) {
39063941
if (Value *Reverse = foldReversedIntrinsicOperands(II))
39073942
return replaceInstUsesWith(*II, Reverse);
39083943

3944+
// Attempt to simplify value-accumulating recurrences of kind:
3945+
// %umax.acc = phi i8 [ %umax, %backedge ], [ %a, %entry ]
3946+
// %umax = call i8 @llvm.umax.i8(i8 %umax.acc, i8 %b)
3947+
// And let the binary intrinsic be hoisted, when the operands are known to be
3948+
// loop-invariant.
3949+
if (Value *Res = foldBinaryIntrinsicRecurrence(*this, II))
3950+
return replaceInstUsesWith(*II, Res);
3951+
39093952
// Some intrinsics (like experimental_gc_statepoint) can be used in invoke
39103953
// context, so it is handled in visitCallBase and we should trigger it.
39113954
return visitCallBase(*II);

llvm/test/Transforms/InstCombine/known-phi-recurse.ll

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -261,14 +261,11 @@ define i8 @knownbits_umax_select_test() {
261261
; CHECK-NEXT: entry:
262262
; CHECK-NEXT: br label [[LOOP:%.*]]
263263
; CHECK: loop:
264-
; CHECK-NEXT: [[INDVAR:%.*]] = phi i8 [ 0, [[ENTRY:%.*]] ], [ [[CONTAIN:%.*]], [[LOOP]] ]
265264
; CHECK-NEXT: [[COND0:%.*]] = call i1 @cond()
266-
; CHECK-NEXT: [[CONTAIN]] = call i8 @llvm.umax.i8(i8 [[INDVAR]], i8 1)
267265
; CHECK-NEXT: [[COND1:%.*]] = call i1 @cond()
268266
; CHECK-NEXT: br i1 [[COND1]], label [[EXIT:%.*]], label [[LOOP]]
269267
; CHECK: exit:
270-
; CHECK-NEXT: [[BOOL:%.*]] = and i8 [[CONTAIN]], 1
271-
; CHECK-NEXT: ret i8 [[BOOL]]
268+
; CHECK-NEXT: ret i8 1
272269
;
273270
entry:
274271
br label %loop

llvm/test/Transforms/InstCombine/recurrence-binary-intrinsic.ll

Lines changed: 11 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -8,12 +8,11 @@ define i8 @simple_recurrence_intrinsic_smax(i8 %n, i8 %a, i8 %b) {
88
; CHECK-NEXT: br label %[[LOOP:.*]]
99
; CHECK: [[LOOP]]:
1010
; CHECK-NEXT: [[IV:%.*]] = phi i8 [ [[IV_NEXT:%.*]], %[[LOOP]] ], [ 0, %[[ENTRY]] ]
11-
; CHECK-NEXT: [[SMAX_ACC:%.*]] = phi i8 [ [[SMAX:%.*]], %[[LOOP]] ], [ [[A]], %[[ENTRY]] ]
12-
; CHECK-NEXT: [[SMAX]] = call i8 @llvm.smax.i8(i8 [[SMAX_ACC]], i8 [[B]])
1311
; CHECK-NEXT: [[IV_NEXT]] = add nuw i8 [[IV]], 1
1412
; CHECK-NEXT: [[CMP:%.*]] = icmp ult i8 [[IV_NEXT]], [[N]]
1513
; CHECK-NEXT: br i1 [[CMP]], label %[[LOOP]], label %[[EXIT:.*]]
1614
; CHECK: [[EXIT]]:
15+
; CHECK-NEXT: [[SMAX:%.*]] = call i8 @llvm.smax.i8(i8 [[A]], i8 [[B]])
1716
; CHECK-NEXT: ret i8 [[SMAX]]
1817
;
1918
entry:
@@ -38,12 +37,11 @@ define i8 @simple_recurrence_intrinsic_smin(i8 %n, i8 %a, i8 %b) {
3837
; CHECK-NEXT: br label %[[LOOP:.*]]
3938
; CHECK: [[LOOP]]:
4039
; CHECK-NEXT: [[IV:%.*]] = phi i8 [ [[IV_NEXT:%.*]], %[[LOOP]] ], [ 0, %[[ENTRY]] ]
41-
; CHECK-NEXT: [[SMIN_ACC:%.*]] = phi i8 [ [[SMIN:%.*]], %[[LOOP]] ], [ [[A]], %[[ENTRY]] ]
42-
; CHECK-NEXT: [[SMIN]] = call i8 @llvm.smin.i8(i8 [[SMIN_ACC]], i8 [[B]])
4340
; CHECK-NEXT: [[IV_NEXT]] = add nuw i8 [[IV]], 1
4441
; CHECK-NEXT: [[CMP:%.*]] = icmp ult i8 [[IV_NEXT]], [[N]]
4542
; CHECK-NEXT: br i1 [[CMP]], label %[[LOOP]], label %[[EXIT:.*]]
4643
; CHECK: [[EXIT]]:
44+
; CHECK-NEXT: [[SMIN:%.*]] = call i8 @llvm.smin.i8(i8 [[A]], i8 [[B]])
4745
; CHECK-NEXT: ret i8 [[SMIN]]
4846
;
4947
entry:
@@ -68,12 +66,11 @@ define i8 @simple_recurrence_intrinsic_umax(i8 %n, i8 %a, i8 %b) {
6866
; CHECK-NEXT: br label %[[LOOP:.*]]
6967
; CHECK: [[LOOP]]:
7068
; CHECK-NEXT: [[IV:%.*]] = phi i8 [ [[IV_NEXT:%.*]], %[[LOOP]] ], [ 0, %[[ENTRY]] ]
71-
; CHECK-NEXT: [[UMAX_ACC:%.*]] = phi i8 [ [[UMAX:%.*]], %[[LOOP]] ], [ [[A]], %[[ENTRY]] ]
72-
; CHECK-NEXT: [[UMAX]] = call i8 @llvm.umax.i8(i8 [[UMAX_ACC]], i8 [[B]])
7369
; CHECK-NEXT: [[IV_NEXT]] = add nuw i8 [[IV]], 1
7470
; CHECK-NEXT: [[CMP:%.*]] = icmp ult i8 [[IV_NEXT]], [[N]]
7571
; CHECK-NEXT: br i1 [[CMP]], label %[[LOOP]], label %[[EXIT:.*]]
7672
; CHECK: [[EXIT]]:
73+
; CHECK-NEXT: [[UMAX:%.*]] = call i8 @llvm.umax.i8(i8 [[A]], i8 [[B]])
7774
; CHECK-NEXT: ret i8 [[UMAX]]
7875
;
7976
entry:
@@ -98,12 +95,11 @@ define i8 @simple_recurrence_intrinsic_umin(i8 %n, i8 %a, i8 %b) {
9895
; CHECK-NEXT: br label %[[LOOP:.*]]
9996
; CHECK: [[LOOP]]:
10097
; CHECK-NEXT: [[IV:%.*]] = phi i8 [ [[IV_NEXT:%.*]], %[[LOOP]] ], [ 0, %[[ENTRY]] ]
101-
; CHECK-NEXT: [[UMIN_ACC:%.*]] = phi i8 [ [[UMIN:%.*]], %[[LOOP]] ], [ [[A]], %[[ENTRY]] ]
102-
; CHECK-NEXT: [[UMIN]] = call i8 @llvm.umin.i8(i8 [[UMIN_ACC]], i8 [[B]])
10398
; CHECK-NEXT: [[IV_NEXT]] = add nuw i8 [[IV]], 1
10499
; CHECK-NEXT: [[CMP:%.*]] = icmp ult i8 [[IV_NEXT]], [[N]]
105100
; CHECK-NEXT: br i1 [[CMP]], label %[[LOOP]], label %[[EXIT:.*]]
106101
; CHECK: [[EXIT]]:
102+
; CHECK-NEXT: [[UMIN:%.*]] = call i8 @llvm.umin.i8(i8 [[A]], i8 [[B]])
107103
; CHECK-NEXT: ret i8 [[UMIN]]
108104
;
109105
entry:
@@ -128,12 +124,11 @@ define float @simple_recurrence_intrinsic_maxnum(i32 %n, float %a, float %b) {
128124
; CHECK-NEXT: br label %[[LOOP:.*]]
129125
; CHECK: [[LOOP]]:
130126
; CHECK-NEXT: [[IV:%.*]] = phi i32 [ [[IV_NEXT:%.*]], %[[LOOP]] ], [ 0, %[[ENTRY]] ]
131-
; CHECK-NEXT: [[FMAX_ACC:%.*]] = phi float [ [[FMAX:%.*]], %[[LOOP]] ], [ [[A]], %[[ENTRY]] ]
132-
; CHECK-NEXT: [[FMAX]] = call float @llvm.maxnum.f32(float [[FMAX_ACC]], float [[B]])
133127
; CHECK-NEXT: [[IV_NEXT]] = add nuw i32 [[IV]], 1
134128
; CHECK-NEXT: [[CMP:%.*]] = icmp ult i32 [[IV_NEXT]], [[N]]
135129
; CHECK-NEXT: br i1 [[CMP]], label %[[LOOP]], label %[[EXIT:.*]]
136130
; CHECK: [[EXIT]]:
131+
; CHECK-NEXT: [[FMAX:%.*]] = call float @llvm.maxnum.f32(float [[A]], float [[B]])
137132
; CHECK-NEXT: ret float [[FMAX]]
138133
;
139134
entry:
@@ -157,12 +152,11 @@ define float @simple_recurrence_intrinsic_minnum(i32 %n, float %a, float %b) {
157152
; CHECK-NEXT: br label %[[LOOP:.*]]
158153
; CHECK: [[LOOP]]:
159154
; CHECK-NEXT: [[IV:%.*]] = phi i32 [ [[IV_NEXT:%.*]], %[[LOOP]] ], [ 0, %[[ENTRY]] ]
160-
; CHECK-NEXT: [[FMIN_ACC:%.*]] = phi float [ [[FMIN:%.*]], %[[LOOP]] ], [ [[A]], %[[ENTRY]] ]
161-
; CHECK-NEXT: [[FMIN]] = call float @llvm.minnum.f32(float [[FMIN_ACC]], float [[B]])
162155
; CHECK-NEXT: [[IV_NEXT]] = add nuw i32 [[IV]], 1
163156
; CHECK-NEXT: [[CMP:%.*]] = icmp ult i32 [[IV_NEXT]], [[N]]
164157
; CHECK-NEXT: br i1 [[CMP]], label %[[LOOP]], label %[[EXIT:.*]]
165158
; CHECK: [[EXIT]]:
159+
; CHECK-NEXT: [[FMIN:%.*]] = call float @llvm.minnum.f32(float [[A]], float [[B]])
166160
; CHECK-NEXT: ret float [[FMIN]]
167161
;
168162
entry:
@@ -186,12 +180,11 @@ define float @simple_recurrence_intrinsic_maximum(i32 %n, float %a, float %b) {
186180
; CHECK-NEXT: br label %[[LOOP:.*]]
187181
; CHECK: [[LOOP]]:
188182
; CHECK-NEXT: [[IV:%.*]] = phi i32 [ [[IV_NEXT:%.*]], %[[LOOP]] ], [ 0, %[[ENTRY]] ]
189-
; CHECK-NEXT: [[FMAX_ACC:%.*]] = phi float [ [[FMAX:%.*]], %[[LOOP]] ], [ [[A]], %[[ENTRY]] ]
190-
; CHECK-NEXT: [[FMAX]] = call nnan float @llvm.maximum.f32(float [[FMAX_ACC]], float [[B]])
191183
; CHECK-NEXT: [[IV_NEXT]] = add nuw i32 [[IV]], 1
192184
; CHECK-NEXT: [[CMP:%.*]] = icmp ult i32 [[IV_NEXT]], [[N]]
193185
; CHECK-NEXT: br i1 [[CMP]], label %[[LOOP]], label %[[EXIT:.*]]
194186
; CHECK: [[EXIT]]:
187+
; CHECK-NEXT: [[FMAX:%.*]] = call nnan float @llvm.maximum.f32(float [[A]], float [[B]])
195188
; CHECK-NEXT: ret float [[FMAX]]
196189
;
197190
entry:
@@ -215,12 +208,11 @@ define float @simple_recurrence_intrinsic_minimum(i32 %n, float %a, float %b) {
215208
; CHECK-NEXT: br label %[[LOOP:.*]]
216209
; CHECK: [[LOOP]]:
217210
; CHECK-NEXT: [[IV:%.*]] = phi i32 [ [[IV_NEXT:%.*]], %[[LOOP]] ], [ 0, %[[ENTRY]] ]
218-
; CHECK-NEXT: [[FMIN_ACC:%.*]] = phi float [ [[FMIN:%.*]], %[[LOOP]] ], [ [[A]], %[[ENTRY]] ]
219-
; CHECK-NEXT: [[FMIN]] = call nnan float @llvm.minimum.f32(float [[FMIN_ACC]], float [[B]])
220211
; CHECK-NEXT: [[IV_NEXT]] = add nuw i32 [[IV]], 1
221212
; CHECK-NEXT: [[CMP:%.*]] = icmp ult i32 [[IV_NEXT]], [[N]]
222213
; CHECK-NEXT: br i1 [[CMP]], label %[[LOOP]], label %[[EXIT:.*]]
223214
; CHECK: [[EXIT]]:
215+
; CHECK-NEXT: [[FMIN:%.*]] = call nnan float @llvm.minimum.f32(float [[A]], float [[B]])
224216
; CHECK-NEXT: ret float [[FMIN]]
225217
;
226218
entry:
@@ -244,12 +236,11 @@ define float @simple_recurrence_intrinsic_maximumnum(i32 %n, float %a, float %b)
244236
; CHECK-NEXT: br label %[[LOOP:.*]]
245237
; CHECK: [[LOOP]]:
246238
; CHECK-NEXT: [[IV:%.*]] = phi i32 [ [[IV_NEXT:%.*]], %[[LOOP]] ], [ 0, %[[ENTRY]] ]
247-
; CHECK-NEXT: [[FMAX_ACC:%.*]] = phi float [ [[FMAX:%.*]], %[[LOOP]] ], [ [[A]], %[[ENTRY]] ]
248-
; CHECK-NEXT: [[FMAX]] = call nnan float @llvm.maximumnum.f32(float [[FMAX_ACC]], float [[B]])
249239
; CHECK-NEXT: [[IV_NEXT]] = add nuw i32 [[IV]], 1
250240
; CHECK-NEXT: [[CMP:%.*]] = icmp ult i32 [[IV_NEXT]], [[N]]
251241
; CHECK-NEXT: br i1 [[CMP]], label %[[LOOP]], label %[[EXIT:.*]]
252242
; CHECK: [[EXIT]]:
243+
; CHECK-NEXT: [[FMAX:%.*]] = call nnan float @llvm.maximumnum.f32(float [[A]], float [[B]])
253244
; CHECK-NEXT: ret float [[FMAX]]
254245
;
255246
entry:
@@ -273,12 +264,11 @@ define float @simple_recurrence_intrinsic_minimumnum(i32 %n, float %a, float %b)
273264
; CHECK-NEXT: br label %[[LOOP:.*]]
274265
; CHECK: [[LOOP]]:
275266
; CHECK-NEXT: [[IV:%.*]] = phi i32 [ [[IV_NEXT:%.*]], %[[LOOP]] ], [ 0, %[[ENTRY]] ]
276-
; CHECK-NEXT: [[FMIN_ACC:%.*]] = phi float [ [[FMIN:%.*]], %[[LOOP]] ], [ [[A]], %[[ENTRY]] ]
277-
; CHECK-NEXT: [[FMIN]] = call nnan float @llvm.minimumnum.f32(float [[FMIN_ACC]], float [[B]])
278267
; CHECK-NEXT: [[IV_NEXT]] = add nuw i32 [[IV]], 1
279268
; CHECK-NEXT: [[CMP:%.*]] = icmp ult i32 [[IV_NEXT]], [[N]]
280269
; CHECK-NEXT: br i1 [[CMP]], label %[[LOOP]], label %[[EXIT:.*]]
281270
; CHECK: [[EXIT]]:
271+
; CHECK-NEXT: [[FMIN:%.*]] = call nnan float @llvm.minimumnum.f32(float [[A]], float [[B]])
282272
; CHECK-NEXT: ret float [[FMIN]]
283273
;
284274
entry:
@@ -304,7 +294,7 @@ define i8 @simple_recurrence_intrinsic_multiuse_phi(i8 %n, i8 %a, i8 %b) {
304294
; CHECK-NEXT: [[IV:%.*]] = phi i8 [ [[IV_NEXT:%.*]], %[[LOOP]] ], [ 0, %[[ENTRY]] ]
305295
; CHECK-NEXT: [[UMAX_ACC:%.*]] = phi i8 [ [[UMAX:%.*]], %[[LOOP]] ], [ [[A]], %[[ENTRY]] ]
306296
; CHECK-NEXT: call void @use(i8 [[UMAX_ACC]])
307-
; CHECK-NEXT: [[UMAX]] = call i8 @llvm.umax.i8(i8 [[UMAX_ACC]], i8 [[B]])
297+
; CHECK-NEXT: [[UMAX]] = call i8 @llvm.umax.i8(i8 [[A]], i8 [[B]])
308298
; CHECK-NEXT: [[IV_NEXT]] = add nuw i8 [[IV]], 1
309299
; CHECK-NEXT: [[CMP:%.*]] = icmp ult i8 [[IV_NEXT]], [[N]]
310300
; CHECK-NEXT: br i1 [[CMP]], label %[[LOOP]], label %[[EXIT:.*]]

0 commit comments

Comments
 (0)