Skip to content

Commit 42ddcf9

Browse files
[InstCombine] Match intrinsic recurrences when known to be hoisted
For value-accumulating recurrences of kind: ``` %umax.acc = phi i8 [ %umax, %backedge ], [ %a, %entry ] %umax = call i8 @llvm.umax.i8(i8 %umax.acc, i8 %b) ``` The binary intrinsic may be simplified into an intrinsic with init value and the other operand, if the latter is loop-invariant: ``` %umax = call i8 @llvm.umax.i8(i8 %a, i8 %b) ``` Fixes: #145875.
1 parent dddafce commit 42ddcf9

File tree

2 files changed

+50
-4
lines changed

2 files changed

+50
-4
lines changed

llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp

Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1532,6 +1532,46 @@ static Instruction *foldBitOrderCrossLogicOp(Value *V,
15321532
return nullptr;
15331533
}
15341534

1535+
static bool foldBinaryIntrinsicRecurrence(InstCombinerImpl &IC,
1536+
IntrinsicInst *II) {
1537+
PHINode *PN;
1538+
Value *Init, *OtherOp;
1539+
1540+
// A binary intrinsic recurrence with loop-invariant operands is equivalent to
1541+
// `call @llvm.binary.intrinsic(Init, OtherOp)`.
1542+
if (!matchSimpleBinaryIntrinsicRecurrence(II, PN, Init, OtherOp) ||
1543+
isa<Constant>(OtherOp) || !PN->hasOneUse() ||
1544+
!IC.getDominatorTree().dominates(OtherOp, PN))
1545+
return false;
1546+
1547+
auto IID = II->getIntrinsicID();
1548+
switch (IID) {
1549+
case Intrinsic::maxnum:
1550+
case Intrinsic::minnum:
1551+
case Intrinsic::maximum:
1552+
case Intrinsic::minimum:
1553+
case Intrinsic::maximumnum:
1554+
case Intrinsic::minimumnum:
1555+
case Intrinsic::smax:
1556+
case Intrinsic::smin:
1557+
case Intrinsic::umax:
1558+
case Intrinsic::umin:
1559+
break;
1560+
default:
1561+
return false;
1562+
}
1563+
1564+
IC.Builder.SetInsertPoint(&*PN->getParent()->getFirstInsertionPt());
1565+
auto *InvariantBinaryInst =
1566+
cast<IntrinsicInst>(IC.Builder.CreateBinaryIntrinsic(IID, Init, OtherOp));
1567+
if (isa<FPMathOperator>(II))
1568+
InvariantBinaryInst->copyFastMathFlags(II);
1569+
InvariantBinaryInst->takeName(II);
1570+
IC.eraseInstFromFunction(*IC.replaceInstUsesWith(*II, InvariantBinaryInst));
1571+
IC.eraseInstFromFunction(*PN);
1572+
return true;
1573+
}
1574+
15351575
static Value *simplifyReductionOperand(Value *Arg, bool CanReorderLanes) {
15361576
if (!CanReorderLanes)
15371577
return nullptr;
@@ -3906,6 +3946,14 @@ Instruction *InstCombinerImpl::visitCallInst(CallInst &CI) {
39063946
if (Value *Reverse = foldReversedIntrinsicOperands(II))
39073947
return replaceInstUsesWith(*II, Reverse);
39083948

3949+
// Attempt to simplify value-accumulating recurrences of kind:
3950+
// %umax.acc = phi i8 [ %umax, %backedge ], [ %a, %entry ]
3951+
// %umax = call i8 @llvm.umax.i8(i8 %umax.acc, i8 %b)
3952+
// And let the binary intrinsic be hoisted, when the operands are known to be
3953+
// loop-invariant.
3954+
if (foldBinaryIntrinsicRecurrence(*this, II))
3955+
return nullptr;
3956+
39093957
// Some intrinsics (like experimental_gc_statepoint) can be used in invoke
39103958
// context, so it is handled in visitCallBase and we should trigger it.
39113959
return visitCallBase(*II);

llvm/test/Transforms/InstCombine/recurrence-binary-intrinsic.ll

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -8,12 +8,11 @@ define i8 @simple_recurrence_intrinsic(i8 %n, i8 %a, i8 %b) {
88
; CHECK-NEXT: br label %[[LOOP:.*]]
99
; CHECK: [[LOOP]]:
1010
; CHECK-NEXT: [[IV:%.*]] = phi i8 [ [[IV_NEXT:%.*]], %[[LOOP]] ], [ 0, %[[ENTRY]] ]
11-
; CHECK-NEXT: [[UMAX_ACC:%.*]] = phi i8 [ [[UMAX:%.*]], %[[LOOP]] ], [ [[A]], %[[ENTRY]] ]
12-
; CHECK-NEXT: [[UMAX]] = call i8 @llvm.umax.i8(i8 [[UMAX_ACC]], i8 [[B]])
1311
; CHECK-NEXT: [[IV_NEXT]] = add nuw i8 [[IV]], 1
1412
; CHECK-NEXT: [[CMP:%.*]] = icmp ult i8 [[IV_NEXT]], [[N]]
1513
; CHECK-NEXT: br i1 [[CMP]], label %[[LOOP]], label %[[EXIT:.*]]
1614
; CHECK: [[EXIT]]:
15+
; CHECK-NEXT: [[UMAX:%.*]] = call i8 @llvm.umax.i8(i8 [[A]], i8 [[B]])
1716
; CHECK-NEXT: ret i8 [[UMAX]]
1817
;
1918
entry:
@@ -38,12 +37,11 @@ define float @simple_recurrence_intrinsic_2(i32 %n, float %a, float %b) {
3837
; CHECK-NEXT: br label %[[LOOP:.*]]
3938
; CHECK: [[LOOP]]:
4039
; CHECK-NEXT: [[IV:%.*]] = phi i32 [ [[IV_NEXT:%.*]], %[[LOOP]] ], [ 0, %[[ENTRY]] ]
41-
; CHECK-NEXT: [[FMAX_ACC:%.*]] = phi float [ [[FMAX:%.*]], %[[LOOP]] ], [ [[A]], %[[ENTRY]] ]
42-
; CHECK-NEXT: [[FMAX]] = call nnan float @llvm.maximum.f32(float [[FMAX_ACC]], float [[B]])
4340
; CHECK-NEXT: [[IV_NEXT]] = add nuw i32 [[IV]], 1
4441
; CHECK-NEXT: [[CMP:%.*]] = icmp ult i32 [[IV_NEXT]], [[N]]
4542
; CHECK-NEXT: br i1 [[CMP]], label %[[LOOP]], label %[[EXIT:.*]]
4643
; CHECK: [[EXIT]]:
44+
; CHECK-NEXT: [[FMAX:%.*]] = call nnan float @llvm.maximum.f32(float [[A]], float [[B]])
4745
; CHECK-NEXT: ret float [[FMAX]]
4846
;
4947
entry:

0 commit comments

Comments
 (0)