Skip to content
Closed
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions llvm/include/llvm/Analysis/IVDescriptors.h
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,8 @@ enum class RecurKind {
FMul, ///< Product of floats.
FMin, ///< FP min implemented in terms of select(cmp()).
FMax, ///< FP max implemented in terms of select(cmp()).
FMinNum, ///< FP min with llvm.minnum semantics including NaNs.
FMaxNum, ///< FP max with llvm.maxnum semantics including NaNs.
FMinimum, ///< FP min with llvm.minimum semantics
FMaximum, ///< FP max with llvm.maximum semantics
FMinimumNum, ///< FP min with llvm.minimumnum semantics
Expand Down Expand Up @@ -250,6 +252,7 @@ class RecurrenceDescriptor {
/// Returns true if the recurrence kind is a floating-point min/max kind.
static bool isFPMinMaxRecurrenceKind(RecurKind Kind) {
return Kind == RecurKind::FMin || Kind == RecurKind::FMax ||
Kind == RecurKind::FMinNum || Kind == RecurKind::FMaxNum ||
Kind == RecurKind::FMinimum || Kind == RecurKind::FMaximum ||
Kind == RecurKind::FMinimumNum || Kind == RecurKind::FMaximumNum;
}
Expand Down
26 changes: 23 additions & 3 deletions llvm/lib/Analysis/IVDescriptors.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -941,10 +941,30 @@ RecurrenceDescriptor::InstDesc RecurrenceDescriptor::isRecurrenceInstr(
m_Intrinsic<Intrinsic::minimumnum>(m_Value(), m_Value())) ||
match(I, m_Intrinsic<Intrinsic::maximumnum>(m_Value(), m_Value()));
};
if (isIntMinMaxRecurrenceKind(Kind) ||
(HasRequiredFMF() && isFPMinMaxRecurrenceKind(Kind)))
if (isIntMinMaxRecurrenceKind(Kind))
return isMinMaxPattern(I, Kind, Prev);
else if (isFMulAddIntrinsic(I))
if (isFPMinMaxRecurrenceKind(Kind)) {
InstDesc Res = isMinMaxPattern(I, Kind, Prev);
if (!Res.isRecurrence())
return InstDesc(false, I);
if (HasRequiredFMF())
return Res;
// We may be able to vectorize FMax/FMin reductions using maxnum/minnum
// intrinsics with extra checks ensuring the vector loop handles only
// non-NaN inputs.
if (match(I, m_Intrinsic<Intrinsic::maxnum>(m_Value(), m_Value()))) {
assert(Kind == RecurKind::FMax &&
"unexpected recurrence kind for maxnum");
return InstDesc(I, RecurKind::FMaxNum);
}
if (match(I, m_Intrinsic<Intrinsic::minnum>(m_Value(), m_Value()))) {
assert(Kind == RecurKind::FMin &&
"unexpected recurrence kind for minnum");
return InstDesc(I, RecurKind::FMinNum);
}
return InstDesc(false, I);
}
if (isFMulAddIntrinsic(I))
return InstDesc(Kind == RecurKind::FMulAdd, I,
I->hasAllowReassoc() ? nullptr : I);
return InstDesc(false, I);
Expand Down
10 changes: 8 additions & 2 deletions llvm/lib/Transforms/Utils/LoopUtils.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -938,8 +938,10 @@ constexpr Intrinsic::ID llvm::getReductionIntrinsicID(RecurKind RK) {
case RecurKind::UMin:
return Intrinsic::vector_reduce_umin;
case RecurKind::FMax:
case RecurKind::FMaxNum:
return Intrinsic::vector_reduce_fmax;
case RecurKind::FMin:
case RecurKind::FMinNum:
return Intrinsic::vector_reduce_fmin;
case RecurKind::FMaximum:
return Intrinsic::vector_reduce_fmaximum;
Expand Down Expand Up @@ -1037,8 +1039,10 @@ Intrinsic::ID llvm::getMinMaxReductionIntrinsicOp(RecurKind RK) {
case RecurKind::SMax:
return Intrinsic::smax;
case RecurKind::FMin:
case RecurKind::FMinNum:
return Intrinsic::minnum;
case RecurKind::FMax:
case RecurKind::FMaxNum:
return Intrinsic::maxnum;
case RecurKind::FMinimum:
return Intrinsic::minimum;
Expand Down Expand Up @@ -1096,9 +1100,9 @@ Value *llvm::createMinMaxOp(IRBuilderBase &Builder, RecurKind RK, Value *Left,
Value *Right) {
Type *Ty = Left->getType();
if (Ty->isIntOrIntVectorTy() ||
(RK == RecurKind::FMinimum || RK == RecurKind::FMaximum ||
(RK == RecurKind::FMinNum || RK == RecurKind::FMaxNum ||
RK == RecurKind::FMinimum || RK == RecurKind::FMaximum ||
RK == RecurKind::FMinimumNum || RK == RecurKind::FMaximumNum)) {
// TODO: Add float minnum/maxnum support when FMF nnan is set.
Intrinsic::ID Id = getMinMaxReductionIntrinsicOp(RK);
return Builder.CreateIntrinsic(Ty, Id, {Left, Right}, nullptr,
"rdx.minmax");
Expand Down Expand Up @@ -1308,6 +1312,8 @@ Value *llvm::createSimpleReduction(IRBuilderBase &Builder, Value *Src,
case RecurKind::UMin:
case RecurKind::FMax:
case RecurKind::FMin:
case RecurKind::FMinNum:
case RecurKind::FMaxNum:
case RecurKind::FMinimum:
case RecurKind::FMaximum:
case RecurKind::FMinimumNum:
Expand Down
12 changes: 11 additions & 1 deletion llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h
Original file line number Diff line number Diff line change
Expand Up @@ -230,7 +230,6 @@ class VPBuilder {

/// Create a new ICmp VPInstruction with predicate \p Pred and operands \p A
/// and \p B.
/// TODO: add createFCmp when needed.
VPInstruction *createICmp(CmpInst::Predicate Pred, VPValue *A, VPValue *B,
DebugLoc DL = DebugLoc::getUnknown(),
const Twine &Name = "") {
Expand All @@ -240,6 +239,17 @@ class VPBuilder {
new VPInstruction(Instruction::ICmp, {A, B}, Pred, DL, Name));
}

/// Create a new FCmp VPInstruction with predicate \p Pred and operands \p A
/// and \p B.
VPInstruction *createFCmp(CmpInst::Predicate Pred, VPValue *A, VPValue *B,
DebugLoc DL = DebugLoc::getUnknown(),
const Twine &Name = "") {
assert(Pred >= CmpInst::FIRST_FCMP_PREDICATE &&
Pred <= CmpInst::LAST_FCMP_PREDICATE && "invalid predicate");
return tryInsertInstruction(
new VPInstruction(Instruction::FCmp, {A, B}, Pred, DL, Name));
}

VPInstruction *createPtrAdd(VPValue *Ptr, VPValue *Offset,
DebugLoc DL = DebugLoc::getUnknown(),
const Twine &Name = "") {
Expand Down
18 changes: 14 additions & 4 deletions llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4345,10 +4345,14 @@ VectorizationFactor LoopVectorizationPlanner::selectVectorizationFactor() {

bool LoopVectorizationPlanner::isCandidateForEpilogueVectorization(
ElementCount VF) const {
// Cross iteration phis such as reductions need special handling and are
// currently unsupported.
if (any_of(OrigLoop->getHeader()->phis(),
[&](PHINode &Phi) { return Legal->isFixedOrderRecurrence(&Phi); }))
// Cross iteration phis such as fixed-order recurrences and FMaxNum/FMinNum
// reductions need special handling and are currently unsupported.
if (any_of(OrigLoop->getHeader()->phis(), [&](PHINode &Phi) {
if (!Legal->isReductionVariable(&Phi))
return Legal->isFixedOrderRecurrence(&Phi);
RecurKind RK = Legal->getRecurrenceDescriptor(&Phi).getRecurrenceKind();
return RK == RecurKind::FMinNum || RK == RecurKind::FMaxNum;
}))
return false;

// Phis with uses outside of the loop require special handling and are
Expand Down Expand Up @@ -8817,6 +8821,12 @@ VPlanPtr LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(
// Adjust the recipes for any inloop reductions.
adjustRecipesForReductions(Plan, RecipeBuilder, Range.Start);

// Apply mandatory transformation to handle FP maxnum/minnum reduction with
// NaNs if possible, bail out otherwise.
if (!VPlanTransforms::runPass(
VPlanTransforms::handleMaxMinNumReductionsWithoutFastMath, *Plan))
return nullptr;

// Transform recipes to abstract recipes if it is legal and beneficial and
// clamp the range for better cost estimation.
// TODO: Enable following transform when the EVL-version of extended-reduction
Expand Down
6 changes: 6 additions & 0 deletions llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -23196,6 +23196,8 @@ class HorizontalReduction {
case RecurKind::FindFirstIVUMin:
case RecurKind::FindLastIVSMax:
case RecurKind::FindLastIVUMax:
case RecurKind::FMaxNum:
case RecurKind::FMinNum:
case RecurKind::FMaximumNum:
case RecurKind::FMinimumNum:
case RecurKind::None:
Expand Down Expand Up @@ -23333,6 +23335,8 @@ class HorizontalReduction {
case RecurKind::FindFirstIVUMin:
case RecurKind::FindLastIVSMax:
case RecurKind::FindLastIVUMax:
case RecurKind::FMaxNum:
case RecurKind::FMinNum:
case RecurKind::FMaximumNum:
case RecurKind::FMinimumNum:
case RecurKind::None:
Expand Down Expand Up @@ -23435,6 +23439,8 @@ class HorizontalReduction {
case RecurKind::FindFirstIVUMin:
case RecurKind::FindLastIVSMax:
case RecurKind::FindLastIVUMax:
case RecurKind::FMaxNum:
case RecurKind::FMinNum:
case RecurKind::FMaximumNum:
case RecurKind::FMinimumNum:
case RecurKind::None:
Expand Down
1 change: 1 addition & 0 deletions llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,7 @@ Type *VPTypeAnalysis::inferScalarTypeForRecipe(const VPInstruction *R) {
return ResTy;
}
case Instruction::ICmp:
case Instruction::FCmp:
case VPInstruction::ActiveLaneMask:
assert(inferScalarType(R->getOperand(0)) ==
inferScalarType(R->getOperand(1)) &&
Expand Down
160 changes: 160 additions & 0 deletions llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -628,3 +628,163 @@ void VPlanTransforms::attachCheckBlock(VPlan &Plan, Value *Cond,
Term->addMetadata(LLVMContext::MD_prof, BranchWeights);
}
}

bool VPlanTransforms::handleMaxMinNumReductionsWithoutFastMath(VPlan &Plan) {
auto GetMinMaxCompareValue = [](VPReductionPHIRecipe *RedPhiR) -> VPValue * {
auto *MinMaxR = dyn_cast<VPRecipeWithIRFlags>(
RedPhiR->getBackedgeValue()->getDefiningRecipe());
if (!MinMaxR)
return nullptr;

auto *RepR = dyn_cast<VPReplicateRecipe>(MinMaxR);
if (!isa<VPWidenIntrinsicRecipe>(MinMaxR) &&
!(RepR && isa<IntrinsicInst>(RepR->getUnderlyingInstr())))
return nullptr;

#ifndef NDEBUG
Intrinsic::ID RdxIntrinsicId =
RedPhiR->getRecurrenceKind() == RecurKind::FMaxNum ? Intrinsic::maxnum
: Intrinsic::minnum;
assert((isa<VPWidenIntrinsicRecipe>(MinMaxR) &&
cast<VPWidenIntrinsicRecipe>(MinMaxR)->getVectorIntrinsicID() ==
RdxIntrinsicId) ||
(RepR &&
cast<IntrinsicInst>(RepR->getUnderlyingInstr())->getIntrinsicID() ==
RdxIntrinsicId) &&
"Intrinsic did not match recurrence kind");
#endif

if (MinMaxR->getOperand(0) == RedPhiR)
return MinMaxR->getOperand(1);

assert(MinMaxR->getOperand(1) == RedPhiR &&
"Reduction phi operand expected");
return MinMaxR->getOperand(0);
};

VPRegionBlock *LoopRegion = Plan.getVectorLoopRegion();
VPReductionPHIRecipe *RedPhiR = nullptr;
bool HasUnsupportedPhi = false;
for (auto &R : LoopRegion->getEntryBasicBlock()->phis()) {
if (isa<VPCanonicalIVPHIRecipe, VPWidenIntOrFpInductionRecipe>(&R))
continue;
auto *Cur = dyn_cast<VPReductionPHIRecipe>(&R);
if (!Cur) {
// TODO: Also support fixed-order recurrence phis.
HasUnsupportedPhi = true;
continue;
}
// For now, only a single reduction is supported.
// TODO: Support multiple MaxNum/MinNum reductions and other reductions.
if (RedPhiR)
return false;
if (Cur->getRecurrenceKind() != RecurKind::FMaxNum &&
Cur->getRecurrenceKind() != RecurKind::FMinNum) {
HasUnsupportedPhi = true;
continue;
}
RedPhiR = Cur;
}

if (!RedPhiR)
return true;

// We won't be able to resume execution in the scalar tail, if there are
// unsupported header phis or there is no scalar tail at all, due to
// tail-folding.
if (HasUnsupportedPhi || !Plan.hasScalarTail())
return false;

VPValue *MinMaxOp = GetMinMaxCompareValue(RedPhiR);
if (!MinMaxOp)
return false;

RecurKind RedPhiRK = RedPhiR->getRecurrenceKind();
assert((RedPhiRK == RecurKind::FMaxNum || RedPhiRK == RecurKind::FMinNum) &&
"unsupported reduction");

/// Check if the vector loop of \p Plan can early exit and restart
/// execution of last vector iteration in the scalar loop. This requires all
/// recipes up to early exit point be side-effect free as they are
/// re-executed. Currently we check that the loop is free of any recipe that
/// may write to memory. Expected to operate on an early VPlan w/o nested
/// regions.
for (VPBlockBase *VPB : vp_depth_first_shallow(
Plan.getVectorLoopRegion()->getEntryBasicBlock())) {
auto *VPBB = cast<VPBasicBlock>(VPB);
for (auto &R : *VPBB) {
if (R.mayWriteToMemory() &&
!match(&R, m_BranchOnCount(m_VPValue(), m_VPValue())))
return false;
}
}

VPBasicBlock *LatchVPBB = LoopRegion->getExitingBasicBlock();
VPBuilder Builder(LatchVPBB->getTerminator());
auto *LatchExitingBranch = cast<VPInstruction>(LatchVPBB->getTerminator());
assert(LatchExitingBranch->getOpcode() == VPInstruction::BranchOnCount &&
"Unexpected terminator");
auto *IsLatchExitTaken =
Builder.createICmp(CmpInst::ICMP_EQ, LatchExitingBranch->getOperand(0),
LatchExitingBranch->getOperand(1));

VPValue *IsNaN = Builder.createFCmp(CmpInst::FCMP_UNO, MinMaxOp, MinMaxOp);
VPValue *AnyNaN = Builder.createNaryOp(VPInstruction::AnyOf, {IsNaN});
auto *AnyExitTaken =
Builder.createNaryOp(Instruction::Or, {AnyNaN, IsLatchExitTaken});
Builder.createNaryOp(VPInstruction::BranchOnCond, AnyExitTaken);
LatchExitingBranch->eraseFromParent();

// If we exit early due to NaNs, compute the final reduction result based on
// the reduction phi at the beginning of the last vector iteration.
auto *RdxResult = find_singleton<VPSingleDefRecipe>(
RedPhiR->users(), [](VPUser *U, bool) -> VPSingleDefRecipe * {
auto *VPI = dyn_cast<VPInstruction>(U);
if (VPI && VPI->getOpcode() == VPInstruction::ComputeReductionResult)
return VPI;
return nullptr;
});

auto *MiddleVPBB = Plan.getMiddleBlock();
Builder.setInsertPoint(MiddleVPBB, MiddleVPBB->begin());
auto *NewSel =
Builder.createSelect(AnyNaN, RedPhiR, RdxResult->getOperand(1));
RdxResult->setOperand(1, NewSel);

auto *ScalarPH = Plan.getScalarPreheader();
// Update resume phis for inductions in the scalar preheader. If AnyNaN is
// true, the resume from the start of the last vector iteration via the
// canonical IV, otherwise from the original value.
for (auto &R : ScalarPH->phis()) {
auto *ResumeR = cast<VPPhi>(&R);
VPValue *VecV = ResumeR->getOperand(0);
if (VecV == RdxResult)
continue;
if (auto *DerivedIV = dyn_cast<VPDerivedIVRecipe>(VecV)) {
if (DerivedIV->getNumUsers() == 1 &&
DerivedIV->getOperand(1) == &Plan.getVectorTripCount()) {
auto *NewSel = Builder.createSelect(AnyNaN, Plan.getCanonicalIV(),
&Plan.getVectorTripCount());
DerivedIV->moveAfter(&*Builder.getInsertPoint());
DerivedIV->setOperand(1, NewSel);
continue;
}
}
// Bail out and abandon the current, partially modified, VPlan if we
// encounter resume phi that cannot be updated yet.
if (VecV != &Plan.getVectorTripCount()) {
LLVM_DEBUG(dbgs() << "Found resume phi we cannot update for VPlan with "
"FMaxNum/FMinNum reduction.\n");
return false;
}
auto *NewSel = Builder.createSelect(AnyNaN, Plan.getCanonicalIV(), VecV);
ResumeR->setOperand(0, NewSel);
}

auto *MiddleTerm = MiddleVPBB->getTerminator();
Builder.setInsertPoint(MiddleTerm);
VPValue *MiddleCond = MiddleTerm->getOperand(0);
VPValue *NewCond = Builder.createAnd(MiddleCond, Builder.createNot(AnyNaN));
MiddleTerm->setOperand(0, NewCond);
return true;
}
Loading
Loading