Skip to content

Commit de2d6f2

Browse files
committed
!fixup address latest comments, thanks
1 parent 42d1194 commit de2d6f2

15 files changed

+212
-182
lines changed

llvm/include/llvm/Analysis/IVDescriptors.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -47,8 +47,8 @@ enum class RecurKind {
4747
FMul, ///< Product of floats.
4848
FMin, ///< FP min implemented in terms of select(cmp()).
4949
FMax, ///< FP max implemented in terms of select(cmp()).
50-
FMinNumNoFMFs, ///< FP min with llvm.minnum semantics and no fast-math flags.
51-
FMaxNumNoFMFs, ///< FP max with llvm.maxnumsemantics and no fast-math flags.
50+
FMinNum, ///< FP min with llvm.minnum semantics including NaNs.
51+
FMaxNum, ///< FP max with llvm.maxnum semantics including NaNs.
5252
FMinimum, ///< FP min with llvm.minimum semantics
5353
FMaximum, ///< FP max with llvm.maximum semantics
5454
FMinimumNum, ///< FP min with llvm.minimumnum semantics

llvm/lib/Analysis/IVDescriptors.cpp

Lines changed: 16 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -944,23 +944,24 @@ RecurrenceDescriptor::InstDesc RecurrenceDescriptor::isRecurrenceInstr(
944944
if (isIntMinMaxRecurrenceKind(Kind))
945945
return isMinMaxPattern(I, Kind, Prev);
946946
if (isFPMinMaxRecurrenceKind(Kind)) {
947+
InstDesc Res = isMinMaxPattern(I, Kind, Prev);
947948
if (HasRequiredFMF())
948-
return isMinMaxPattern(I, Kind, Prev);
949+
return Res;
950+
951+
if (!Res.isRecurrence())
952+
return InstDesc(false, I);
953+
949954
// We may be able to vectorize FMax/FMin reductions using maxnum/minnum
950-
// intrinsics with extra checks ensuring the inputs are not NaN.
951-
auto *StartV = dyn_cast<ConstantFP>(
952-
OrigPhi->getIncomingValueForBlock(L->getLoopPredecessor()));
953-
if (StartV && !StartV->getValue().isNaN() &&
954-
isMinMaxPattern(I, Kind, Prev).isRecurrence()) {
955-
if (((Kind == RecurKind::FMax &&
956-
match(I, m_Intrinsic<Intrinsic::maxnum>(m_Value(), m_Value()))) ||
957-
Kind == RecurKind::FMaxNumNoFMFs))
958-
return InstDesc(I, RecurKind::FMaxNumNoFMFs);
959-
if (((Kind == RecurKind::FMin &&
960-
match(I, m_Intrinsic<Intrinsic::minnum>(m_Value(), m_Value()))) ||
961-
Kind == RecurKind::FMinNumNoFMFs))
962-
return InstDesc(I, RecurKind::FMinNumNoFMFs);
963-
}
955+
// intrinsics with extra checks ensuring the vector loop handles only
956+
// non-NaN inputs.
957+
if (Kind == RecurKind::FMax &&
958+
match(I, m_Intrinsic<Intrinsic::maxnum>(m_Value(), m_Value())))
959+
return InstDesc(I, RecurKind::FMaxNum);
960+
if (Kind == RecurKind::FMin &&
961+
match(I, m_Intrinsic<Intrinsic::minnum>(m_Value(), m_Value())))
962+
return InstDesc(I, RecurKind::FMinNum);
963+
964+
return InstDesc(false, I);
964965
} else if (isFMulAddIntrinsic(I))
965966
return InstDesc(Kind == RecurKind::FMulAdd, I,
966967
I->hasAllowReassoc() ? nullptr : I);

llvm/lib/Transforms/Utils/LoopUtils.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -938,10 +938,10 @@ constexpr Intrinsic::ID llvm::getReductionIntrinsicID(RecurKind RK) {
938938
case RecurKind::UMin:
939939
return Intrinsic::vector_reduce_umin;
940940
case RecurKind::FMax:
941-
case RecurKind::FMaxNumNoFMFs:
941+
case RecurKind::FMaxNum:
942942
return Intrinsic::vector_reduce_fmax;
943943
case RecurKind::FMin:
944-
case RecurKind::FMinNumNoFMFs:
944+
case RecurKind::FMinNum:
945945
return Intrinsic::vector_reduce_fmin;
946946
case RecurKind::FMaximum:
947947
return Intrinsic::vector_reduce_fmaximum;

llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -230,7 +230,6 @@ class VPBuilder {
230230

231231
/// Create a new ICmp VPInstruction with predicate \p Pred and operands \p A
232232
/// and \p B.
233-
/// TODO: add createFCmp when needed.
234233
VPInstruction *createICmp(CmpInst::Predicate Pred, VPValue *A, VPValue *B,
235234
DebugLoc DL = DebugLoc::getUnknown(),
236235
const Twine &Name = "") {
@@ -240,6 +239,17 @@ class VPBuilder {
240239
new VPInstruction(Instruction::ICmp, {A, B}, Pred, DL, Name));
241240
}
242241

242+
/// Create a new FCmp VPInstruction with predicate \p Pred and operands \p A
243+
/// and \p B.
244+
VPInstruction *createFCmp(CmpInst::Predicate Pred, VPValue *A, VPValue *B,
245+
DebugLoc DL = DebugLoc::getUnknown(),
246+
const Twine &Name = "") {
247+
assert(Pred >= CmpInst::FIRST_FCMP_PREDICATE &&
248+
Pred <= CmpInst::LAST_FCMP_PREDICATE && "invalid predicate");
249+
return tryInsertInstruction(
250+
new VPInstruction(Instruction::FCmp, {A, B}, Pred, DL, Name));
251+
}
252+
243253
VPInstruction *createPtrAdd(VPValue *Ptr, VPValue *Offset,
244254
DebugLoc DL = DebugLoc::getUnknown(),
245255
const Twine &Name = "") {

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -4345,14 +4345,13 @@ VectorizationFactor LoopVectorizationPlanner::selectVectorizationFactor() {
43454345

43464346
bool LoopVectorizationPlanner::isCandidateForEpilogueVectorization(
43474347
ElementCount VF) const {
4348-
// Cross iteration phis such as reductions need special handling and are
4349-
// currently unsupported.
4348+
// Cross iteration phis such as first-order recurrences and FMaxNum/FMinNum
4349+
// reductions need special handling and are currently unsupported.
43504350
if (any_of(OrigLoop->getHeader()->phis(), [&](PHINode &Phi) {
43514351
if (Legal->isReductionVariable(&Phi)) {
43524352
RecurKind RK =
43534353
Legal->getRecurrenceDescriptor(&Phi).getRecurrenceKind();
4354-
return RK == RecurKind::FMinNumNoFMFs ||
4355-
RK == RecurKind::FMaxNumNoFMFs;
4354+
return RK == RecurKind::FMinNum || RK == RecurKind::FMaxNum;
43564355
}
43574356
return Legal->isFixedOrderRecurrence(&Phi);
43584357
}))
@@ -8776,6 +8775,9 @@ VPlanPtr LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(
87768775

87778776
// Adjust the recipes for any inloop reductions.
87788777
adjustRecipesForReductions(Plan, RecipeBuilder, Range.Start);
8778+
8779+
// Apply mandatory transformation to handle FP maxnum/minnum reduction with
8780+
// NaNs if possible, bail out otherwise.
87798781
if (!VPlanTransforms::runPass(
87808782
VPlanTransforms::handleMaxMinNumReductionsWithoutFastMath, *Plan))
87818783
return nullptr;

llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -23193,8 +23193,8 @@ class HorizontalReduction {
2319323193
case RecurKind::FindFirstIVUMin:
2319423194
case RecurKind::FindLastIVSMax:
2319523195
case RecurKind::FindLastIVUMax:
23196-
case RecurKind::FMaxNumNoFMFs:
23197-
case RecurKind::FMinNumNoFMFs:
23196+
case RecurKind::FMaxNum:
23197+
case RecurKind::FMinNum:
2319823198
case RecurKind::FMaximumNum:
2319923199
case RecurKind::FMinimumNum:
2320023200
case RecurKind::None:
@@ -23332,8 +23332,8 @@ class HorizontalReduction {
2333223332
case RecurKind::FindFirstIVUMin:
2333323333
case RecurKind::FindLastIVSMax:
2333423334
case RecurKind::FindLastIVUMax:
23335-
case RecurKind::FMaxNumNoFMFs:
23336-
case RecurKind::FMinNumNoFMFs:
23335+
case RecurKind::FMaxNum:
23336+
case RecurKind::FMinNum:
2333723337
case RecurKind::FMaximumNum:
2333823338
case RecurKind::FMinimumNum:
2333923339
case RecurKind::None:
@@ -23436,8 +23436,8 @@ class HorizontalReduction {
2343623436
case RecurKind::FindFirstIVUMin:
2343723437
case RecurKind::FindLastIVSMax:
2343823438
case RecurKind::FindLastIVUMax:
23439-
case RecurKind::FMaxNumNoFMFs:
23440-
case RecurKind::FMinNumNoFMFs:
23439+
case RecurKind::FMaxNum:
23440+
case RecurKind::FMinNum:
2344123441
case RecurKind::FMaximumNum:
2344223442
case RecurKind::FMinimumNum:
2344323443
case RecurKind::None:

llvm/lib/Transforms/Vectorize/VPlan.h

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1356,16 +1356,16 @@ class LLVM_ABI_FOR_TEST VPWidenRecipe : public VPRecipeWithIRFlags,
13561356
unsigned Opcode;
13571357

13581358
public:
1359-
VPWidenRecipe(Instruction &I, ArrayRef<VPValue *> Operands)
1360-
: VPRecipeWithIRFlags(VPDef::VPWidenSC, Operands, I), VPIRMetadata(I),
1361-
Opcode(I.getOpcode()) {}
1362-
13631359
VPWidenRecipe(unsigned Opcode, ArrayRef<VPValue *> Operands,
13641360
const VPIRFlags &Flags, const VPIRMetadata &Metadata,
13651361
DebugLoc DL)
13661362
: VPRecipeWithIRFlags(VPDef::VPWidenSC, Operands, Flags, DL),
13671363
VPIRMetadata(Metadata), Opcode(Opcode) {}
13681364

1365+
VPWidenRecipe(Instruction &I, ArrayRef<VPValue *> Operands)
1366+
: VPRecipeWithIRFlags(VPDef::VPWidenSC, Operands, I), VPIRMetadata(I),
1367+
Opcode(I.getOpcode()) {}
1368+
13691369
~VPWidenRecipe() override = default;
13701370

13711371
VPWidenRecipe *clone() override {

llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp

Lines changed: 72 additions & 72 deletions
Original file line numberDiff line numberDiff line change
@@ -653,102 +653,85 @@ void VPlanTransforms::attachCheckBlock(VPlan &Plan, Value *Cond,
653653
}
654654
}
655655

656-
static VPValue *getMinMaxCompareValue(VPSingleDefRecipe *MinMaxOp,
657-
VPReductionPHIRecipe *RedPhi) {
658-
auto *RepR = dyn_cast<VPReplicateRecipe>(MinMaxOp);
659-
if (!isa<VPWidenIntrinsicRecipe>(MinMaxOp) &&
660-
!(RepR && (isa<IntrinsicInst>(RepR->getUnderlyingInstr()))))
661-
return nullptr;
662-
663-
if (MinMaxOp->getOperand(0) == RedPhi)
664-
return MinMaxOp->getOperand(1);
665-
return MinMaxOp->getOperand(0);
666-
}
667-
668-
/// Returns true if there VPlan is read-only and execution can be resumed at the
669-
/// beginning of the last vector iteration in the scalar loop
670-
static bool canResumeInScalarLoopFromVectorLoop(VPlan &Plan) {
671-
for (VPBlockBase *VPB : vp_depth_first_shallow(
672-
Plan.getVectorLoopRegion()->getEntryBasicBlock())) {
673-
auto *VPBB = dyn_cast<VPBasicBlock>(VPB);
674-
if (!VPBB)
675-
return false;
676-
for (auto &R : *VPBB) {
677-
if (match(&R, m_BranchOnCount(m_VPValue(), m_VPValue())))
678-
continue;
679-
if (R.mayWriteToMemory())
680-
return false;
681-
}
682-
}
683-
return true;
684-
}
685-
686656
bool VPlanTransforms::handleMaxMinNumReductionsWithoutFastMath(VPlan &Plan) {
687657
VPRegionBlock *LoopRegion = Plan.getVectorLoopRegion();
688-
VPValue *AnyNaN = nullptr;
689658
VPReductionPHIRecipe *RedPhiR = nullptr;
690-
VPRecipeWithIRFlags *MinMaxOp = nullptr;
659+
VPValue *MinMaxOp = nullptr;
691660
bool HasUnsupportedPhi = false;
661+
662+
auto GetMinMaxCompareValue = [](VPSingleDefRecipe *MinMaxOp,
663+
VPReductionPHIRecipe *RedPhi) -> VPValue * {
664+
auto *RepR = dyn_cast<VPReplicateRecipe>(MinMaxOp);
665+
if (!isa<VPWidenIntrinsicRecipe>(MinMaxOp) &&
666+
!(RepR && (isa<IntrinsicInst>(RepR->getUnderlyingInstr()))))
667+
return nullptr;
668+
669+
if (MinMaxOp->getOperand(0) == RedPhi)
670+
return MinMaxOp->getOperand(1);
671+
assert(MinMaxOp->getOperand(1) == RedPhi &&
672+
"Reduction phi operand expected");
673+
return MinMaxOp->getOperand(0);
674+
};
675+
692676
for (auto &R : LoopRegion->getEntryBasicBlock()->phis()) {
677+
// TODO: Also support first-order recurrence phis.
693678
HasUnsupportedPhi |=
694679
!isa<VPCanonicalIVPHIRecipe, VPWidenIntOrFpInductionRecipe,
695680
VPReductionPHIRecipe>(&R);
696681
auto *Cur = dyn_cast<VPReductionPHIRecipe>(&R);
697682
if (!Cur)
698683
continue;
684+
// For now, only a single reduction is supported.
685+
// TODO: Support multiple MaxNum/MinNum reductions and other reductions.
699686
if (RedPhiR)
700687
return false;
701-
if (Cur->getRecurrenceKind() != RecurKind::FMaxNumNoFMFs &&
702-
Cur->getRecurrenceKind() != RecurKind::FMinNumNoFMFs)
688+
if (Cur->getRecurrenceKind() != RecurKind::FMaxNum &&
689+
Cur->getRecurrenceKind() != RecurKind::FMinNum)
703690
continue;
704691

705692
RedPhiR = Cur;
706-
MinMaxOp = dyn_cast<VPRecipeWithIRFlags>(
693+
auto *MinMaxR = dyn_cast<VPRecipeWithIRFlags>(
707694
RedPhiR->getBackedgeValue()->getDefiningRecipe());
708-
if (!MinMaxOp)
695+
if (!MinMaxR)
709696
return false;
710-
VPValue *In = getMinMaxCompareValue(MinMaxOp, RedPhiR);
711-
if (!In)
697+
MinMaxOp = GetMinMaxCompareValue(MinMaxR, RedPhiR);
698+
if (!MinMaxOp)
712699
return false;
713-
714-
auto *IsNaN =
715-
new VPInstruction(Instruction::FCmp, {In, In}, {CmpInst::FCMP_UNO}, {});
716-
IsNaN->insertBefore(MinMaxOp);
717-
AnyNaN = new VPInstruction(VPInstruction::AnyOf, {IsNaN});
718-
AnyNaN->getDefiningRecipe()->insertAfter(IsNaN);
719700
}
720701

721-
if (!AnyNaN)
702+
if (!RedPhiR)
722703
return true;
723704

724-
if (HasUnsupportedPhi || !canResumeInScalarLoopFromVectorLoop(Plan))
705+
if (HasUnsupportedPhi || !Plan.hasScalarTail())
725706
return false;
726707

708+
/// Check if the vector loop of \p Plan can early exit and restart
709+
/// execution of last vector iteration in the scalar loop. This requires all
710+
/// recipes up to early exit point be side-effect free as they are
711+
/// re-executed. Currently we check that the loop is free of any recipe that
712+
/// may write to memory. Expected to operate on an early VPlan w/o nested
713+
/// regions.
714+
for (VPBlockBase *VPB : vp_depth_first_shallow(
715+
Plan.getVectorLoopRegion()->getEntryBasicBlock())) {
716+
auto *VPBB = cast<VPBasicBlock>(VPB);
717+
for (auto &R : *VPBB) {
718+
if (match(&R, m_BranchOnCount(m_VPValue(), m_VPValue())))
719+
continue;
720+
if (R.mayWriteToMemory())
721+
return false;
722+
}
723+
}
724+
727725
auto *MiddleVPBB = Plan.getMiddleBlock();
728726
auto *RdxResult = dyn_cast<VPInstruction>(&MiddleVPBB->front());
729727
if (!RdxResult ||
730728
RdxResult->getOpcode() != VPInstruction::ComputeReductionResult ||
731729
RdxResult->getOperand(0) != RedPhiR)
732730
return false;
733731

734-
auto *ScalarPH = Plan.getScalarPreheader();
735-
// Update the resume phis in the scalar preheader. They all must either resume
736-
// from the reduction result or the canonical induction. Bail out if there are
737-
// other resume phis.
738-
for (auto &R : ScalarPH->phis()) {
739-
auto *ResumeR = cast<VPPhi>(&R);
740-
VPValue *VecV = ResumeR->getOperand(0);
741-
VPValue *BypassV = ResumeR->getOperand(ResumeR->getNumOperands() - 1);
742-
if (VecV != RdxResult && VecV != &Plan.getVectorTripCount())
743-
return false;
744-
ResumeR->setOperand(
745-
1, VecV == &Plan.getVectorTripCount() ? Plan.getCanonicalIV() : VecV);
746-
ResumeR->addOperand(BypassV);
747-
}
748-
749732
// Create a new reduction phi recipe with either FMin/FMax, replacing
750-
// FMinNumNoFMFs/FMaxNumNoFMFs.
751-
RecurKind NewRK = RedPhiR->getRecurrenceKind() != RecurKind::FMinNumNoFMFs
733+
// FMinNum/FMaxNum.
734+
RecurKind NewRK = RedPhiR->getRecurrenceKind() == RecurKind::FMinNum
752735
? RecurKind::FMin
753736
: RecurKind::FMax;
754737
auto *NewRedPhiR = new VPReductionPHIRecipe(
@@ -769,23 +752,40 @@ bool VPlanTransforms::handleMaxMinNumReductionsWithoutFastMath(VPlan &Plan) {
769752
auto *IsLatchExitTaken =
770753
Builder.createICmp(CmpInst::ICMP_EQ, LatchExitingBranch->getOperand(0),
771754
LatchExitingBranch->getOperand(1));
755+
756+
VPValue *IsNaN = Builder.createFCmp(CmpInst::FCMP_UNO, MinMaxOp, MinMaxOp);
757+
VPValue *AnyNaN = Builder.createNaryOp(VPInstruction::AnyOf, {IsNaN});
772758
auto *AnyExitTaken =
773759
Builder.createNaryOp(Instruction::Or, {AnyNaN, IsLatchExitTaken});
774760
Builder.createNaryOp(VPInstruction::BranchOnCond, AnyExitTaken);
775761
LatchExitingBranch->eraseFromParent();
776762

777-
// Split the middle block and introduce a new block, branching to the scalar
778-
// preheader to resume iteration in the scalar loop if any NaNs have been
779-
// encountered.
780-
MiddleVPBB->splitAt(std::prev(MiddleVPBB->end()));
763+
// If we exit early due to NaNs, compute the final reduction result based on
764+
// the reduction phi at the beginning of the last vector iteration.
781765
Builder.setInsertPoint(MiddleVPBB, MiddleVPBB->begin());
782766
auto *NewSel =
783767
Builder.createSelect(AnyNaN, NewRedPhiR, RdxResult->getOperand(1));
784768
RdxResult->setOperand(1, NewSel);
785-
Builder.setInsertPoint(MiddleVPBB);
786-
Builder.createNaryOp(VPInstruction::BranchOnCond, AnyNaN);
787-
VPBlockUtils::connectBlocks(MiddleVPBB, ScalarPH);
788-
MiddleVPBB->swapSuccessors();
789-
std::swap(ScalarPH->getPredecessors()[1], ScalarPH->getPredecessors().back());
769+
770+
auto *ScalarPH = Plan.getScalarPreheader();
771+
// Update the resume phis for inductions in the scalar preheader. If AnyNaN is
772+
// true, the resume from the start of the last vector iteration via the
773+
// canonical IV, otherwise from the original value.
774+
for (auto &R : ScalarPH->phis()) {
775+
auto *ResumeR = cast<VPPhi>(&R);
776+
VPValue *VecV = ResumeR->getOperand(0);
777+
if (VecV == RdxResult)
778+
continue;
779+
if (VecV != &Plan.getVectorTripCount())
780+
return false;
781+
auto *NewSel = Builder.createSelect(AnyNaN, Plan.getCanonicalIV(), VecV);
782+
ResumeR->setOperand(0, NewSel);
783+
}
784+
785+
auto *MiddleTerm = MiddleVPBB->getTerminator();
786+
Builder.setInsertPoint(MiddleTerm);
787+
VPValue *MiddleCond = MiddleTerm->getOperand(0);
788+
VPValue *NewCond = Builder.createAnd(MiddleCond, Builder.createNot(AnyNaN));
789+
MiddleTerm->setOperand(0, NewCond);
790790
return true;
791791
}

llvm/lib/Transforms/Vectorize/VPlanTransforms.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -106,6 +106,7 @@ struct VPlanTransforms {
106106
/// Check if \p Plan contains any FMaxNumNoFMFs or FMinNumNoFMFs reductions.
107107
/// If they do, try to update the vector loop to exit early if any input is
108108
/// NaN and resume executing in the scalar loop to handle the NaNs there.
109+
/// Return false if this attempt was unsuccessful.
109110
static bool handleMaxMinNumReductionsWithoutFastMath(VPlan &Plan);
110111

111112
/// Clear NSW/NUW flags from reduction instructions if necessary.

0 commit comments

Comments
 (0)