-
Notifications
You must be signed in to change notification settings - Fork 14.5k
release/21.x: [LV] Vectorize maxnum/minnum w/o fast-math flags. (#148239) #149736
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: release/21.x
Are you sure you want to change the base?
Conversation
Update LV to vectorize maxnum/minnum reductions without fast-math flags, by adding an extra check in the loop if any inputs to maxnum/minnum are NaN, due to maxnum/minnum behavior w.r.t to signaling NaNs. Signed-zeros are already handled consistently by maxnum/minnum. If any input is NaN, *exit the vector loop, *compute the reduction result up to the vector iteration that contained NaN inputs and * resume in the scalar loop New recurrence kinds are added for reductions using maxnum/minnum without fast-math flags. PR: llvm#148239 (cherry picked from commit 004c67e)
@ayalz What do you think about merging this PR to the release branch? |
@llvm/pr-subscribers-vectorizers @llvm/pr-subscribers-llvm-analysis Author: None (llvmbot) ChangesBackport 004c67e Requested by: @fhahn Patch is 66.35 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/149736.diff 16 Files Affected:
diff --git a/llvm/include/llvm/Analysis/IVDescriptors.h b/llvm/include/llvm/Analysis/IVDescriptors.h
index b985292ccee40..1dc73205a0ebb 100644
--- a/llvm/include/llvm/Analysis/IVDescriptors.h
+++ b/llvm/include/llvm/Analysis/IVDescriptors.h
@@ -47,6 +47,8 @@ enum class RecurKind {
FMul, ///< Product of floats.
FMin, ///< FP min implemented in terms of select(cmp()).
FMax, ///< FP max implemented in terms of select(cmp()).
+ FMinNum, ///< FP min with llvm.minnum semantics including NaNs.
+ FMaxNum, ///< FP max with llvm.maxnum semantics including NaNs.
FMinimum, ///< FP min with llvm.minimum semantics
FMaximum, ///< FP max with llvm.maximum semantics
FMinimumNum, ///< FP min with llvm.minimumnum semantics
@@ -250,6 +252,7 @@ class RecurrenceDescriptor {
/// Returns true if the recurrence kind is a floating-point min/max kind.
static bool isFPMinMaxRecurrenceKind(RecurKind Kind) {
return Kind == RecurKind::FMin || Kind == RecurKind::FMax ||
+ Kind == RecurKind::FMinNum || Kind == RecurKind::FMaxNum ||
Kind == RecurKind::FMinimum || Kind == RecurKind::FMaximum ||
Kind == RecurKind::FMinimumNum || Kind == RecurKind::FMaximumNum;
}
diff --git a/llvm/lib/Analysis/IVDescriptors.cpp b/llvm/lib/Analysis/IVDescriptors.cpp
index 39f74beca082f..8be5de3bf356f 100644
--- a/llvm/lib/Analysis/IVDescriptors.cpp
+++ b/llvm/lib/Analysis/IVDescriptors.cpp
@@ -941,10 +941,30 @@ RecurrenceDescriptor::InstDesc RecurrenceDescriptor::isRecurrenceInstr(
m_Intrinsic<Intrinsic::minimumnum>(m_Value(), m_Value())) ||
match(I, m_Intrinsic<Intrinsic::maximumnum>(m_Value(), m_Value()));
};
- if (isIntMinMaxRecurrenceKind(Kind) ||
- (HasRequiredFMF() && isFPMinMaxRecurrenceKind(Kind)))
+ if (isIntMinMaxRecurrenceKind(Kind))
return isMinMaxPattern(I, Kind, Prev);
- else if (isFMulAddIntrinsic(I))
+ if (isFPMinMaxRecurrenceKind(Kind)) {
+ InstDesc Res = isMinMaxPattern(I, Kind, Prev);
+ if (!Res.isRecurrence())
+ return InstDesc(false, I);
+ if (HasRequiredFMF())
+ return Res;
+ // We may be able to vectorize FMax/FMin reductions using maxnum/minnum
+ // intrinsics with extra checks ensuring the vector loop handles only
+ // non-NaN inputs.
+ if (match(I, m_Intrinsic<Intrinsic::maxnum>(m_Value(), m_Value()))) {
+ assert(Kind == RecurKind::FMax &&
+ "unexpected recurrence kind for maxnum");
+ return InstDesc(I, RecurKind::FMaxNum);
+ }
+ if (match(I, m_Intrinsic<Intrinsic::minnum>(m_Value(), m_Value()))) {
+ assert(Kind == RecurKind::FMin &&
+ "unexpected recurrence kind for minnum");
+ return InstDesc(I, RecurKind::FMinNum);
+ }
+ return InstDesc(false, I);
+ }
+ if (isFMulAddIntrinsic(I))
return InstDesc(Kind == RecurKind::FMulAdd, I,
I->hasAllowReassoc() ? nullptr : I);
return InstDesc(false, I);
diff --git a/llvm/lib/Transforms/Utils/LoopUtils.cpp b/llvm/lib/Transforms/Utils/LoopUtils.cpp
index 200d1fb854155..e7623aaff105d 100644
--- a/llvm/lib/Transforms/Utils/LoopUtils.cpp
+++ b/llvm/lib/Transforms/Utils/LoopUtils.cpp
@@ -938,8 +938,10 @@ constexpr Intrinsic::ID llvm::getReductionIntrinsicID(RecurKind RK) {
case RecurKind::UMin:
return Intrinsic::vector_reduce_umin;
case RecurKind::FMax:
+ case RecurKind::FMaxNum:
return Intrinsic::vector_reduce_fmax;
case RecurKind::FMin:
+ case RecurKind::FMinNum:
return Intrinsic::vector_reduce_fmin;
case RecurKind::FMaximum:
return Intrinsic::vector_reduce_fmaximum;
@@ -1037,8 +1039,10 @@ Intrinsic::ID llvm::getMinMaxReductionIntrinsicOp(RecurKind RK) {
case RecurKind::SMax:
return Intrinsic::smax;
case RecurKind::FMin:
+ case RecurKind::FMinNum:
return Intrinsic::minnum;
case RecurKind::FMax:
+ case RecurKind::FMaxNum:
return Intrinsic::maxnum;
case RecurKind::FMinimum:
return Intrinsic::minimum;
@@ -1096,9 +1100,9 @@ Value *llvm::createMinMaxOp(IRBuilderBase &Builder, RecurKind RK, Value *Left,
Value *Right) {
Type *Ty = Left->getType();
if (Ty->isIntOrIntVectorTy() ||
- (RK == RecurKind::FMinimum || RK == RecurKind::FMaximum ||
+ (RK == RecurKind::FMinNum || RK == RecurKind::FMaxNum ||
+ RK == RecurKind::FMinimum || RK == RecurKind::FMaximum ||
RK == RecurKind::FMinimumNum || RK == RecurKind::FMaximumNum)) {
- // TODO: Add float minnum/maxnum support when FMF nnan is set.
Intrinsic::ID Id = getMinMaxReductionIntrinsicOp(RK);
return Builder.CreateIntrinsic(Ty, Id, {Left, Right}, nullptr,
"rdx.minmax");
@@ -1308,6 +1312,8 @@ Value *llvm::createSimpleReduction(IRBuilderBase &Builder, Value *Src,
case RecurKind::UMin:
case RecurKind::FMax:
case RecurKind::FMin:
+ case RecurKind::FMinNum:
+ case RecurKind::FMaxNum:
case RecurKind::FMinimum:
case RecurKind::FMaximum:
case RecurKind::FMinimumNum:
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h b/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h
index 11853859484e3..f57ce0c3ccb4d 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h
@@ -230,7 +230,6 @@ class VPBuilder {
/// Create a new ICmp VPInstruction with predicate \p Pred and operands \p A
/// and \p B.
- /// TODO: add createFCmp when needed.
VPInstruction *createICmp(CmpInst::Predicate Pred, VPValue *A, VPValue *B,
DebugLoc DL = DebugLoc::getUnknown(),
const Twine &Name = "") {
@@ -240,6 +239,17 @@ class VPBuilder {
new VPInstruction(Instruction::ICmp, {A, B}, Pred, DL, Name));
}
+ /// Create a new FCmp VPInstruction with predicate \p Pred and operands \p A
+ /// and \p B.
+ VPInstruction *createFCmp(CmpInst::Predicate Pred, VPValue *A, VPValue *B,
+ DebugLoc DL = DebugLoc::getUnknown(),
+ const Twine &Name = "") {
+ assert(Pred >= CmpInst::FIRST_FCMP_PREDICATE &&
+ Pred <= CmpInst::LAST_FCMP_PREDICATE && "invalid predicate");
+ return tryInsertInstruction(
+ new VPInstruction(Instruction::FCmp, {A, B}, Pred, DL, Name));
+ }
+
VPInstruction *createPtrAdd(VPValue *Ptr, VPValue *Offset,
DebugLoc DL = DebugLoc::getUnknown(),
const Twine &Name = "") {
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index 06db89a89bc38..74f59a2f7f136 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -4345,10 +4345,14 @@ VectorizationFactor LoopVectorizationPlanner::selectVectorizationFactor() {
bool LoopVectorizationPlanner::isCandidateForEpilogueVectorization(
ElementCount VF) const {
- // Cross iteration phis such as reductions need special handling and are
- // currently unsupported.
- if (any_of(OrigLoop->getHeader()->phis(),
- [&](PHINode &Phi) { return Legal->isFixedOrderRecurrence(&Phi); }))
+ // Cross iteration phis such as fixed-order recurrences and FMaxNum/FMinNum
+ // reductions need special handling and are currently unsupported.
+ if (any_of(OrigLoop->getHeader()->phis(), [&](PHINode &Phi) {
+ if (!Legal->isReductionVariable(&Phi))
+ return Legal->isFixedOrderRecurrence(&Phi);
+ RecurKind RK = Legal->getRecurrenceDescriptor(&Phi).getRecurrenceKind();
+ return RK == RecurKind::FMinNum || RK == RecurKind::FMaxNum;
+ }))
return false;
// Phis with uses outside of the loop require special handling and are
@@ -8817,6 +8821,12 @@ VPlanPtr LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(
// Adjust the recipes for any inloop reductions.
adjustRecipesForReductions(Plan, RecipeBuilder, Range.Start);
+ // Apply mandatory transformation to handle FP maxnum/minnum reduction with
+ // NaNs if possible, bail out otherwise.
+ if (!VPlanTransforms::runPass(
+ VPlanTransforms::handleMaxMinNumReductionsWithoutFastMath, *Plan))
+ return nullptr;
+
// Transform recipes to abstract recipes if it is legal and beneficial and
// clamp the range for better cost estimation.
// TODO: Enable following transform when the EVL-version of extended-reduction
diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index 31aec77db63c1..f6610ea5b333f 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -23196,6 +23196,8 @@ class HorizontalReduction {
case RecurKind::FindFirstIVUMin:
case RecurKind::FindLastIVSMax:
case RecurKind::FindLastIVUMax:
+ case RecurKind::FMaxNum:
+ case RecurKind::FMinNum:
case RecurKind::FMaximumNum:
case RecurKind::FMinimumNum:
case RecurKind::None:
@@ -23333,6 +23335,8 @@ class HorizontalReduction {
case RecurKind::FindFirstIVUMin:
case RecurKind::FindLastIVSMax:
case RecurKind::FindLastIVUMax:
+ case RecurKind::FMaxNum:
+ case RecurKind::FMinNum:
case RecurKind::FMaximumNum:
case RecurKind::FMinimumNum:
case RecurKind::None:
@@ -23435,6 +23439,8 @@ class HorizontalReduction {
case RecurKind::FindFirstIVUMin:
case RecurKind::FindLastIVSMax:
case RecurKind::FindLastIVUMax:
+ case RecurKind::FMaxNum:
+ case RecurKind::FMinNum:
case RecurKind::FMaximumNum:
case RecurKind::FMinimumNum:
case RecurKind::None:
diff --git a/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp b/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp
index b27a7ffeed208..66657b98b094b 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp
@@ -84,6 +84,7 @@ Type *VPTypeAnalysis::inferScalarTypeForRecipe(const VPInstruction *R) {
return ResTy;
}
case Instruction::ICmp:
+ case Instruction::FCmp:
case VPInstruction::ActiveLaneMask:
assert(inferScalarType(R->getOperand(0)) ==
inferScalarType(R->getOperand(1)) &&
diff --git a/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp b/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp
index 52eecb000d0c2..c71d70935b449 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp
@@ -628,3 +628,163 @@ void VPlanTransforms::attachCheckBlock(VPlan &Plan, Value *Cond,
Term->addMetadata(LLVMContext::MD_prof, BranchWeights);
}
}
+
+bool VPlanTransforms::handleMaxMinNumReductionsWithoutFastMath(VPlan &Plan) {
+ auto GetMinMaxCompareValue = [](VPReductionPHIRecipe *RedPhiR) -> VPValue * {
+ auto *MinMaxR = dyn_cast<VPRecipeWithIRFlags>(
+ RedPhiR->getBackedgeValue()->getDefiningRecipe());
+ if (!MinMaxR)
+ return nullptr;
+
+ auto *RepR = dyn_cast<VPReplicateRecipe>(MinMaxR);
+ if (!isa<VPWidenIntrinsicRecipe>(MinMaxR) &&
+ !(RepR && isa<IntrinsicInst>(RepR->getUnderlyingInstr())))
+ return nullptr;
+
+#ifndef NDEBUG
+ Intrinsic::ID RdxIntrinsicId =
+ RedPhiR->getRecurrenceKind() == RecurKind::FMaxNum ? Intrinsic::maxnum
+ : Intrinsic::minnum;
+ assert((isa<VPWidenIntrinsicRecipe>(MinMaxR) &&
+ cast<VPWidenIntrinsicRecipe>(MinMaxR)->getVectorIntrinsicID() ==
+ RdxIntrinsicId) ||
+ (RepR &&
+ cast<IntrinsicInst>(RepR->getUnderlyingInstr())->getIntrinsicID() ==
+ RdxIntrinsicId) &&
+ "Intrinsic did not match recurrence kind");
+#endif
+
+ if (MinMaxR->getOperand(0) == RedPhiR)
+ return MinMaxR->getOperand(1);
+
+ assert(MinMaxR->getOperand(1) == RedPhiR &&
+ "Reduction phi operand expected");
+ return MinMaxR->getOperand(0);
+ };
+
+ VPRegionBlock *LoopRegion = Plan.getVectorLoopRegion();
+ VPReductionPHIRecipe *RedPhiR = nullptr;
+ bool HasUnsupportedPhi = false;
+ for (auto &R : LoopRegion->getEntryBasicBlock()->phis()) {
+ if (isa<VPCanonicalIVPHIRecipe, VPWidenIntOrFpInductionRecipe>(&R))
+ continue;
+ auto *Cur = dyn_cast<VPReductionPHIRecipe>(&R);
+ if (!Cur) {
+ // TODO: Also support fixed-order recurrence phis.
+ HasUnsupportedPhi = true;
+ continue;
+ }
+ // For now, only a single reduction is supported.
+ // TODO: Support multiple MaxNum/MinNum reductions and other reductions.
+ if (RedPhiR)
+ return false;
+ if (Cur->getRecurrenceKind() != RecurKind::FMaxNum &&
+ Cur->getRecurrenceKind() != RecurKind::FMinNum) {
+ HasUnsupportedPhi = true;
+ continue;
+ }
+ RedPhiR = Cur;
+ }
+
+ if (!RedPhiR)
+ return true;
+
+ // We won't be able to resume execution in the scalar tail, if there are
+ // unsupported header phis or there is no scalar tail at all, due to
+ // tail-folding.
+ if (HasUnsupportedPhi || !Plan.hasScalarTail())
+ return false;
+
+ VPValue *MinMaxOp = GetMinMaxCompareValue(RedPhiR);
+ if (!MinMaxOp)
+ return false;
+
+ RecurKind RedPhiRK = RedPhiR->getRecurrenceKind();
+ assert((RedPhiRK == RecurKind::FMaxNum || RedPhiRK == RecurKind::FMinNum) &&
+ "unsupported reduction");
+
+ /// Check if the vector loop of \p Plan can early exit and restart
+ /// execution of last vector iteration in the scalar loop. This requires all
+ /// recipes up to early exit point be side-effect free as they are
+ /// re-executed. Currently we check that the loop is free of any recipe that
+ /// may write to memory. Expected to operate on an early VPlan w/o nested
+ /// regions.
+ for (VPBlockBase *VPB : vp_depth_first_shallow(
+ Plan.getVectorLoopRegion()->getEntryBasicBlock())) {
+ auto *VPBB = cast<VPBasicBlock>(VPB);
+ for (auto &R : *VPBB) {
+ if (R.mayWriteToMemory() &&
+ !match(&R, m_BranchOnCount(m_VPValue(), m_VPValue())))
+ return false;
+ }
+ }
+
+ VPBasicBlock *LatchVPBB = LoopRegion->getExitingBasicBlock();
+ VPBuilder Builder(LatchVPBB->getTerminator());
+ auto *LatchExitingBranch = cast<VPInstruction>(LatchVPBB->getTerminator());
+ assert(LatchExitingBranch->getOpcode() == VPInstruction::BranchOnCount &&
+ "Unexpected terminator");
+ auto *IsLatchExitTaken =
+ Builder.createICmp(CmpInst::ICMP_EQ, LatchExitingBranch->getOperand(0),
+ LatchExitingBranch->getOperand(1));
+
+ VPValue *IsNaN = Builder.createFCmp(CmpInst::FCMP_UNO, MinMaxOp, MinMaxOp);
+ VPValue *AnyNaN = Builder.createNaryOp(VPInstruction::AnyOf, {IsNaN});
+ auto *AnyExitTaken =
+ Builder.createNaryOp(Instruction::Or, {AnyNaN, IsLatchExitTaken});
+ Builder.createNaryOp(VPInstruction::BranchOnCond, AnyExitTaken);
+ LatchExitingBranch->eraseFromParent();
+
+ // If we exit early due to NaNs, compute the final reduction result based on
+ // the reduction phi at the beginning of the last vector iteration.
+ auto *RdxResult = find_singleton<VPSingleDefRecipe>(
+ RedPhiR->users(), [](VPUser *U, bool) -> VPSingleDefRecipe * {
+ auto *VPI = dyn_cast<VPInstruction>(U);
+ if (VPI && VPI->getOpcode() == VPInstruction::ComputeReductionResult)
+ return VPI;
+ return nullptr;
+ });
+
+ auto *MiddleVPBB = Plan.getMiddleBlock();
+ Builder.setInsertPoint(MiddleVPBB, MiddleVPBB->begin());
+ auto *NewSel =
+ Builder.createSelect(AnyNaN, RedPhiR, RdxResult->getOperand(1));
+ RdxResult->setOperand(1, NewSel);
+
+ auto *ScalarPH = Plan.getScalarPreheader();
+ // Update resume phis for inductions in the scalar preheader. If AnyNaN is
+ // true, the resume from the start of the last vector iteration via the
+ // canonical IV, otherwise from the original value.
+ for (auto &R : ScalarPH->phis()) {
+ auto *ResumeR = cast<VPPhi>(&R);
+ VPValue *VecV = ResumeR->getOperand(0);
+ if (VecV == RdxResult)
+ continue;
+ if (auto *DerivedIV = dyn_cast<VPDerivedIVRecipe>(VecV)) {
+ if (DerivedIV->getNumUsers() == 1 &&
+ DerivedIV->getOperand(1) == &Plan.getVectorTripCount()) {
+ auto *NewSel = Builder.createSelect(AnyNaN, Plan.getCanonicalIV(),
+ &Plan.getVectorTripCount());
+ DerivedIV->moveAfter(&*Builder.getInsertPoint());
+ DerivedIV->setOperand(1, NewSel);
+ continue;
+ }
+ }
+ // Bail out and abandon the current, partially modified, VPlan if we
+ // encounter resume phi that cannot be updated yet.
+ if (VecV != &Plan.getVectorTripCount()) {
+ LLVM_DEBUG(dbgs() << "Found resume phi we cannot update for VPlan with "
+ "FMaxNum/FMinNum reduction.\n");
+ return false;
+ }
+ auto *NewSel = Builder.createSelect(AnyNaN, Plan.getCanonicalIV(), VecV);
+ ResumeR->setOperand(0, NewSel);
+ }
+
+ auto *MiddleTerm = MiddleVPBB->getTerminator();
+ Builder.setInsertPoint(MiddleTerm);
+ VPValue *MiddleCond = MiddleTerm->getOperand(0);
+ VPValue *NewCond = Builder.createAnd(MiddleCond, Builder.createNot(AnyNaN));
+ MiddleTerm->setOperand(0, NewCond);
+ return true;
+}
diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
index 1664bcc3881aa..57b713d3dfcb9 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
@@ -587,6 +587,7 @@ Value *VPInstruction::generate(VPTransformState &State) {
Value *Op = State.get(getOperand(0), vputils::onlyFirstLaneUsed(this));
return Builder.CreateFreeze(Op, Name);
}
+ case Instruction::FCmp:
case Instruction::ICmp: {
bool OnlyFirstLaneUsed = vputils::onlyFirstLaneUsed(this);
Value *A = State.get(getOperand(0), OnlyFirstLaneUsed);
@@ -860,7 +861,7 @@ Value *VPInstruction::generate(VPTransformState &State) {
Value *Res = State.get(getOperand(0));
for (VPValue *Op : drop_begin(operands()))
Res = Builder.CreateOr(Res, State.get(Op));
- return Builder.CreateOrReduce(Res);
+ return State.VF.isScalar() ? Res : Builder.CreateOrReduce(Res);
}
case VPInstruction::FirstActiveLane: {
if (getNumOperands() == 1) {
@@ -1033,6 +1034,7 @@ bool VPInstruction::opcodeMayReadOrWriteFromMemory() const {
switch (getOpcode()) {
case Instruction::ExtractElement:
case Instruction::Freeze:
+ case Instruction::FCmp:
case Instruction::ICmp:
case Instruction::Select:
case VPInstruction::AnyOf:
@@ -1068,6 +1070,7 @@ bool VPInstruction::onlyFirstLaneUsed(const VPValue *Op) const {
return Op == getOperand(1);
case Instruction::PHI:
return true;
+ case Instruction::FCmp:
case Instruction::ICmp:
case Instruction::Select:
case Instruction::Or:
@@ -1100,6 +1103,7 @@ bool VPInstruction::onlyFirstPartUsed(const VPValue *Op) const {
switch (getOpcode()) {
default:
return false;
+ case Instruction::FCmp:
case Instruction::ICmp:
case Instruction::Select:
return vputils::onlyFirstPartUsed(this);
@@ -1786,7 +1790,7 @@ bool VPIRFlags::flagsValidForOpcode(unsigned Opcode) const {
return Opcode == Instruction::ZExt;
break;
case OperationType::Cmp:
- return Opcode == Instruction::ICmp;
+ return Opcode == Instruction::FCmp || Opcode == Instruction::ICmp;
case OperationType::Other:
return true;
}
diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.h b/llvm/lib/Transforms/Vectorize/VPlanTransforms.h
index 870b1bb68b79a..4d1752fe57565 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.h
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.h
@@ -99,6 +99,12 @@ struct VPlanTransforms {
/// not valid.
static bool adjustFixedOrderRecurrences(VPlan &Plan, VPBuilder &Builder);
+ /// Check if \p Plan contains any FMaxNum or FMinNum reductions. If they do,
+ /// try to update the vector loop to exit early if any input is NaN and resume
+ /// executing in the scalar loop to handle the NaNs there. Return false if
+ /// this attempt was unsuccessful.
+ static bool handleMaxMinNumReductionsWithoutFastMath(VPlan &Plan);
+
/// Clear NSW/NUW flags from reduction instructions if necessary.
static void cl...
[truncated]
|
Sure, fine w/ me, looks profitable and stable enough. Nit: name could be shortened from |
Clarify name as suggested in #149736, as only FMaxNum and FMinNum are handled.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Nit: name could be shortened from
handleMaxMinNumReductionsWithoutFastMath()
tohandleMaxMinNumReductions()
Ah yes thanks! done on main in 3813567 and also adjusted in this PR.
…. (NFC) Clarify name as suggested in llvm/llvm-project#149736, as only FMaxNum and FMinNum are handled.
Can this be squashed for the release branch? We ideally want to have just one commit per fix. |
@tru is there a way to update this PR here or do I need to create a new PR? |
Backport 004c67e
Requested by: @fhahn