Skip to content

Commit 1142181

Browse files
committed
[LV] Vectorize select min/max index.
Add support for vectorizing loops that select the index of the minimum or maximum element. The patch implements vectorizing those patterns by combining Min/Max and FindFirstIV reductions. It extends matching Min/Max reductions to allow in-loop users that are FindLastIV reductions. It records a flag indicating that the Min/Max reduction is used by another reduction. When creating reduction recipes, we process any reduction that has other reduction users. The reduction using the min/max reduction needs adjusting to compute the correct result: 1. We need to find the first IV for which the condition based on the min/max reduction is true, 2. Compare the partial min/max reduction result to its final value and, 3. Select the lanes of the partial FindLastIV reductions which correspond to the lanes matching the min/max reduction result.
1 parent f79d6b3 commit 1142181

File tree

11 files changed

+1433
-241
lines changed

11 files changed

+1433
-241
lines changed

llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp

Lines changed: 15 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -798,6 +798,7 @@ bool LoopVectorizationLegality::canVectorizeInstrs() {
798798
// For each block in the loop.
799799
for (BasicBlock *BB : TheLoop->blocks()) {
800800
// Scan the instructions in the block and look for hazards.
801+
PHINode *UnclassifiedPhi = nullptr;
801802
for (Instruction &I : *BB) {
802803
if (auto *Phi = dyn_cast<PHINode>(&I)) {
803804
Type *PhiTy = Phi->getType();
@@ -887,12 +888,7 @@ bool LoopVectorizationLegality::canVectorizeInstrs() {
887888
addInductionPhi(Phi, ID, AllowedExit);
888889
continue;
889890
}
890-
891-
reportVectorizationFailure("Found an unidentified PHI",
892-
"value that could not be identified as "
893-
"reduction is used outside the loop",
894-
"NonReductionValueUsedOutsideLoop", ORE, TheLoop, Phi);
895-
return false;
891+
UnclassifiedPhi = Phi;
896892
} // end of PHI handling
897893

898894
// We handle calls that:
@@ -1043,6 +1039,19 @@ bool LoopVectorizationLegality::canVectorizeInstrs() {
10431039
return false;
10441040
}
10451041
} // next instr.
1042+
if (UnclassifiedPhi && none_of(BB->phis(), [this](PHINode &P) {
1043+
auto I = Reductions.find(&P);
1044+
return I != Reductions.end() &&
1045+
RecurrenceDescriptor::isFindLastIVRecurrenceKind(
1046+
I->second.getRecurrenceKind());
1047+
})) {
1048+
reportVectorizationFailure("Found an unidentified PHI",
1049+
"value that could not be identified as "
1050+
"reduction is used outside the loop",
1051+
"NonReductionValueUsedOutsideLoop", ORE,
1052+
TheLoop, UnclassifiedPhi);
1053+
return false;
1054+
}
10461055
}
10471056

10481057
if (!PrimaryInduction) {

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

Lines changed: 23 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -7199,6 +7199,9 @@ static void fixReductionScalarResumeWhenVectorizingEpilog(
71997199
Value *StartV = getStartValueFromReductionResult(EpiRedResult);
72007200
Value *SentinelV = EpiRedResult->getOperand(2)->getLiveInIRValue();
72017201
using namespace llvm::PatternMatch;
7202+
MainResumeValue = cast<VPInstruction>(EpiRedHeaderPhi->getStartValue())
7203+
->getOperand(0)
7204+
->getUnderlyingValue();
72027205
Value *Cmp, *OrigResumeV, *CmpOp;
72037206
[[maybe_unused]] bool IsExpectedPattern =
72047207
match(MainResumeValue,
@@ -7209,7 +7212,11 @@ static void fixReductionScalarResumeWhenVectorizingEpilog(
72097212
((CmpOp == StartV && isGuaranteedNotToBeUndefOrPoison(CmpOp))));
72107213
assert(IsExpectedPattern && "Unexpected reduction resume pattern");
72117214
MainResumeValue = OrigResumeV;
7215+
} else {
7216+
if (auto *VPI = dyn_cast<VPInstruction>(EpiRedHeaderPhi->getStartValue()))
7217+
MainResumeValue = VPI->getOperand(0)->getUnderlyingValue();
72127218
}
7219+
72137220
PHINode *MainResumePhi = cast<PHINode>(MainResumeValue);
72147221

72157222
// When fixing reductions in the epilogue loop we should already have
@@ -8222,9 +8229,6 @@ VPRecipeBase *VPRecipeBuilder::tryToCreateWidenRecipe(VPSingleDefRecipe *R,
82228229
return Recipe;
82238230

82248231
VPHeaderPHIRecipe *PhiRecipe = nullptr;
8225-
assert((Legal->isReductionVariable(Phi) ||
8226-
Legal->isFixedOrderRecurrence(Phi)) &&
8227-
"can only widen reductions and fixed-order recurrences here");
82288232
VPValue *StartV = Operands[0];
82298233
if (Legal->isReductionVariable(Phi)) {
82308234
const RecurrenceDescriptor &RdxDesc = Legal->getRecurrenceDescriptor(Phi);
@@ -8237,12 +8241,17 @@ VPRecipeBase *VPRecipeBuilder::tryToCreateWidenRecipe(VPSingleDefRecipe *R,
82378241
PhiRecipe = new VPReductionPHIRecipe(
82388242
Phi, RdxDesc.getRecurrenceKind(), *StartV, CM.isInLoopReduction(Phi),
82398243
CM.useOrderedReductions(RdxDesc), ScaleFactor);
8240-
} else {
8244+
} else if (Legal->isFixedOrderRecurrence(Phi)) {
82418245
// TODO: Currently fixed-order recurrences are modeled as chains of
82428246
// first-order recurrences. If there are no users of the intermediate
82438247
// recurrences in the chain, the fixed order recurrence should be modeled
82448248
// directly, enabling more efficient codegen.
82458249
PhiRecipe = new VPFirstOrderRecurrencePHIRecipe(Phi, *StartV);
8250+
} else {
8251+
// Failed to identify phi as reduction or fixed-order recurrence. Keep the
8252+
// original VPWidenPHIRecipe for now, to be legalized later if possible.
8253+
setRecipe(Phi, R);
8254+
return nullptr;
82468255
}
82478256
// Add backedge value.
82488257
PhiRecipe->addOperand(Operands[1]);
@@ -8427,7 +8436,7 @@ static void addScalarResumePhis(VPRecipeBuilder &Builder, VPlan &Plan,
84278436
// TODO: Extract final value from induction recipe initially, optimize to
84288437
// pre-computed end value together in optimizeInductionExitUsers.
84298438
auto *VectorPhiR =
8430-
cast<VPHeaderPHIRecipe>(Builder.getRecipe(&ScalarPhiIRI->getIRPhi()));
8439+
cast<VPSingleDefRecipe>(Builder.getRecipe(&ScalarPhiIRI->getIRPhi()));
84318440
if (auto *WideIVR = dyn_cast<VPWidenInductionRecipe>(VectorPhiR)) {
84328441
if (VPInstruction *ResumePhi = addResumePhiRecipeForInduction(
84338442
WideIVR, VectorPHBuilder, ScalarPHBuilder, TypeInfo,
@@ -8449,7 +8458,7 @@ static void addScalarResumePhis(VPRecipeBuilder &Builder, VPlan &Plan,
84498458
// which for FORs is a vector whose last element needs to be extracted. The
84508459
// start value provides the value if the loop is bypassed.
84518460
bool IsFOR = isa<VPFirstOrderRecurrencePHIRecipe>(VectorPhiR);
8452-
auto *ResumeFromVectorLoop = VectorPhiR->getBackedgeValue();
8461+
auto *ResumeFromVectorLoop = VectorPhiR->getOperand(1);
84538462
assert(VectorRegion->getSingleSuccessor() == Plan.getMiddleBlock() &&
84548463
"Cannot handle loops with uncountable early exits");
84558464
if (IsFOR)
@@ -8458,7 +8467,7 @@ static void addScalarResumePhis(VPRecipeBuilder &Builder, VPlan &Plan,
84588467
"vector.recur.extract");
84598468
StringRef Name = IsFOR ? "scalar.recur.init" : "bc.merge.rdx";
84608469
auto *ResumePhiR = ScalarPHBuilder.createScalarPhi(
8461-
{ResumeFromVectorLoop, VectorPhiR->getStartValue()}, {}, Name);
8470+
{ResumeFromVectorLoop, VectorPhiR->getOperand(0)}, {}, Name);
84628471
ScalarPhiIRI->addOperand(ResumePhiR);
84638472
}
84648473
}
@@ -8729,6 +8738,8 @@ VPlanPtr LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(
87298738
VPRecipeBase *Recipe =
87308739
RecipeBuilder.tryToCreateWidenRecipe(SingleDef, Range);
87318740
if (!Recipe) {
8741+
if (isa<VPWidenPHIRecipe>(SingleDef))
8742+
continue;
87328743
SmallVector<VPValue *, 4> Operands(R.operands());
87338744
Recipe = RecipeBuilder.handleReplication(Instr, Operands, Range);
87348745
}
@@ -8791,6 +8802,10 @@ VPlanPtr LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(
87918802
// Adjust the recipes for any inloop reductions.
87928803
adjustRecipesForReductions(Plan, RecipeBuilder, Range.Start);
87938804

8805+
// Try to convert remaining VPWidenPHIRecipes to reduction recipes.
8806+
if (!VPlanTransforms::runPass(VPlanTransforms::legalizeUnclassifiedPhis,
8807+
*Plan))
8808+
return nullptr;
87948809
// Apply mandatory transformation to handle FP maxnum/minnum reduction with
87958810
// NaNs if possible, bail out otherwise.
87968811
if (!VPlanTransforms::runPass(
@@ -9263,6 +9278,7 @@ void LoopVectorizationPlanner::adjustRecipesForReductions(
92639278
PhiR->setOperand(0, StartV);
92649279
}
92659280
}
9281+
92669282
for (VPRecipeBase *R : ToDelete)
92679283
R->eraseFromParent();
92689284

llvm/lib/Transforms/Vectorize/VPlan.h

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1886,7 +1886,8 @@ class LLVM_ABI_FOR_TEST VPHeaderPHIRecipe : public VPSingleDefRecipe,
18861886
~VPHeaderPHIRecipe() override = default;
18871887

18881888
/// Method to support type inquiry through isa, cast, and dyn_cast.
1889-
static inline bool classof(const VPRecipeBase *B) {
1889+
static inline bool classof(const VPUser *U) {
1890+
auto *B = cast<VPRecipeBase>(U);
18901891
return B->getVPDefID() >= VPDef::VPFirstHeaderPHISC &&
18911892
B->getVPDefID() <= VPDef::VPLastHeaderPHISC;
18921893
}
@@ -1895,6 +1896,10 @@ class LLVM_ABI_FOR_TEST VPHeaderPHIRecipe : public VPSingleDefRecipe,
18951896
return B && B->getVPDefID() >= VPRecipeBase::VPFirstHeaderPHISC &&
18961897
B->getVPDefID() <= VPRecipeBase::VPLastHeaderPHISC;
18971898
}
1899+
static inline bool classof(const VPSingleDefRecipe *B) {
1900+
return B->getVPDefID() >= VPDef::VPFirstHeaderPHISC &&
1901+
B->getVPDefID() <= VPDef::VPLastHeaderPHISC;
1902+
}
18981903

18991904
/// Generate the phi nodes.
19001905
void execute(VPTransformState &State) override = 0;
@@ -1956,7 +1961,7 @@ class VPWidenInductionRecipe : public VPHeaderPHIRecipe {
19561961
return R && classof(R);
19571962
}
19581963

1959-
static inline bool classof(const VPHeaderPHIRecipe *R) {
1964+
static inline bool classof(const VPSingleDefRecipe *R) {
19601965
return classof(static_cast<const VPRecipeBase *>(R));
19611966
}
19621967

llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp

Lines changed: 145 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -813,3 +813,148 @@ bool VPlanTransforms::handleMaxMinNumReductionsWithoutFastMath(VPlan &Plan) {
813813
MiddleTerm->setOperand(0, NewCond);
814814
return true;
815815
}
816+
817+
bool VPlanTransforms::legalizeUnclassifiedPhis(VPlan &Plan) {
818+
using namespace VPlanPatternMatch;
819+
for (auto &PhiR : make_early_inc_range(
820+
Plan.getVectorLoopRegion()->getEntryBasicBlock()->phis())) {
821+
if (!isa<VPWidenPHIRecipe>(&PhiR))
822+
continue;
823+
824+
// Check if PhiR is a min/max reduction that has a user inside the loop
825+
// outside the min/max reduction chain. The other user must be the compare
826+
// of a FindLastIV reduction chain.
827+
auto *MinMaxPhiR = cast<VPWidenPHIRecipe>(&PhiR);
828+
auto *MinMaxOp = dyn_cast_or_null<VPSingleDefRecipe>(
829+
MinMaxPhiR->getOperand(1)->getDefiningRecipe());
830+
if (!MinMaxOp)
831+
return false;
832+
833+
// The incoming value must be a min/max instrinsic.
834+
// TODO: Also handle the select variant.
835+
Intrinsic::ID ID = Intrinsic::not_intrinsic;
836+
if (auto *WideInt = dyn_cast<VPWidenIntrinsicRecipe>(MinMaxOp))
837+
ID = WideInt->getVectorIntrinsicID();
838+
else {
839+
auto *RepR = dyn_cast<VPReplicateRecipe>(MinMaxOp);
840+
if (!RepR || !isa<IntrinsicInst>(RepR->getUnderlyingInstr()))
841+
return false;
842+
ID = cast<IntrinsicInst>(RepR->getUnderlyingInstr())->getIntrinsicID();
843+
}
844+
RecurKind RdxKind = RecurKind::None;
845+
switch (ID) {
846+
case Intrinsic::umax:
847+
RdxKind = RecurKind::UMax;
848+
break;
849+
case Intrinsic::umin:
850+
RdxKind = RecurKind::UMin;
851+
break;
852+
case Intrinsic::smax:
853+
RdxKind = RecurKind::SMax;
854+
break;
855+
case Intrinsic::smin:
856+
RdxKind = RecurKind::SMin;
857+
break;
858+
default:
859+
return false;
860+
}
861+
862+
// The min/max intrinsic must use the phi and itself must only be used by
863+
// the phi and a resume-phi in the scalar preheader.
864+
if (MinMaxOp->getOperand(0) != MinMaxPhiR &&
865+
MinMaxOp->getOperand(1) != MinMaxPhiR)
866+
return false;
867+
if (MinMaxPhiR->getNumUsers() != 2 ||
868+
any_of(MinMaxOp->users(), [MinMaxPhiR, &Plan](VPUser *U) {
869+
auto *Phi = dyn_cast<VPPhi>(U);
870+
return MinMaxPhiR != U &&
871+
(!Phi || Phi->getParent() != Plan.getScalarPreheader());
872+
}))
873+
return false;
874+
875+
// One user of MinMaxPhiR is MinMaxOp, the other users must be a compare
876+
// that's part of a FindLastIV chain.
877+
auto MinMaxUsers = to_vector(MinMaxPhiR->users());
878+
auto *Cmp = dyn_cast<VPRecipeWithIRFlags>(
879+
MinMaxUsers[0] == MinMaxOp ? MinMaxUsers[1] : MinMaxUsers[0]);
880+
VPValue *CmpOpA;
881+
VPValue *CmpOpB;
882+
if (!Cmp || Cmp->getNumUsers() != 1 ||
883+
!match(Cmp, m_Binary<Instruction::ICmp>(m_VPValue(CmpOpA),
884+
m_VPValue(CmpOpB))))
885+
return false;
886+
887+
// Normalize the predicate so MinMaxPhiR is on the right side.
888+
CmpInst::Predicate Pred = Cmp->getPredicate();
889+
if (CmpOpA == MinMaxPhiR)
890+
Pred = CmpInst::getSwappedPredicate(Pred);
891+
892+
// Determine if the predicate is not strict.
893+
bool IsNonStrictPred = ICmpInst::isLE(Pred) || ICmpInst::isGE(Pred);
894+
// Account for a mis-match between RdxKind and the predicate.
895+
switch (RdxKind) {
896+
case RecurKind::UMin:
897+
case RecurKind::SMin:
898+
IsNonStrictPred |= ICmpInst::isGT(Pred);
899+
break;
900+
case RecurKind::UMax:
901+
case RecurKind::SMax:
902+
IsNonStrictPred |= ICmpInst::isLT(Pred);
903+
break;
904+
default:
905+
llvm_unreachable("unsupported kind");
906+
}
907+
908+
// TODO: Strict predicates need to find the first IV value for which the
909+
// predicate holds, not the last.
910+
if (Pred == CmpInst::ICMP_NE || !IsNonStrictPred)
911+
return false;
912+
913+
// Cmp must be used by the select of a FindLastIV chain.
914+
VPValue *Sel = dyn_cast<VPSingleDefRecipe>(*Cmp->user_begin());
915+
VPValue *IVOp, *FindIV;
916+
if (!Sel ||
917+
!match(Sel,
918+
m_Select(m_Specific(Cmp), m_VPValue(IVOp), m_VPValue(FindIV))) ||
919+
Sel->getNumUsers() != 2 || !isa<VPWidenIntOrFpInductionRecipe>(IVOp))
920+
return false;
921+
auto *FindIVPhiR = dyn_cast<VPReductionPHIRecipe>(FindIV);
922+
if (!FindIVPhiR || !RecurrenceDescriptor::isFindLastIVRecurrenceKind(
923+
FindIVPhiR->getRecurrenceKind()))
924+
return false;
925+
926+
assert(!FindIVPhiR->isInLoop() && !FindIVPhiR->isOrdered() &&
927+
"cannot handle inloop/ordered reductions yet");
928+
929+
auto NewPhiR = new VPReductionPHIRecipe(
930+
cast<PHINode>(MinMaxPhiR->getUnderlyingInstr()), RdxKind,
931+
*MinMaxPhiR->getOperand(0), false, false, 1);
932+
NewPhiR->insertBefore(MinMaxPhiR);
933+
MinMaxPhiR->replaceAllUsesWith(NewPhiR);
934+
NewPhiR->addOperand(MinMaxPhiR->getOperand(1));
935+
MinMaxPhiR->eraseFromParent();
936+
937+
// The reduction using MinMaxPhiR needs adjusting to compute the correct
938+
// result:
939+
// 1. We need to find the last IV for which the condition based on the
940+
// min/max recurrence is true,
941+
// 2. Compare the partial min/max reduction result to its final value and,
942+
// 3. Select the lanes of the partial FindLastIV reductions which
943+
// correspond to the lanes matching the min/max reduction result.
944+
VPInstruction *FindIVResult = cast<VPInstruction>(
945+
*(Sel->user_begin() + (*Sel->user_begin() == FindIVPhiR ? 1 : 0)));
946+
VPBuilder B(FindIVResult);
947+
VPInstruction *MinMaxResult =
948+
B.createNaryOp(VPInstruction::ComputeReductionResult,
949+
{NewPhiR, NewPhiR->getBackedgeValue()}, VPIRFlags(), {});
950+
NewPhiR->getBackedgeValue()->replaceUsesWithIf(
951+
MinMaxResult, [](VPUser &U, unsigned) { return isa<VPPhi>(&U); });
952+
auto *FinalMinMaxCmp = B.createICmp(
953+
CmpInst::ICMP_EQ, MinMaxResult->getOperand(1), MinMaxResult);
954+
auto *FinalIVSelect =
955+
B.createSelect(FinalMinMaxCmp, FindIVResult->getOperand(3),
956+
FindIVResult->getOperand(2));
957+
FindIVResult->setOperand(3, FinalIVSelect);
958+
}
959+
return true;
960+
}

llvm/lib/Transforms/Vectorize/VPlanTransforms.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -93,6 +93,11 @@ struct VPlanTransforms {
9393
GetIntOrFpInductionDescriptor,
9494
ScalarEvolution &SE, const TargetLibraryInfo &TLI);
9595

96+
/// Try to legalize unclassified phis by converting VPWidenPHIRecipes to
97+
/// min-max reductions used by FindLastIV reductions if possible. Returns
98+
/// false if the VPlan contains VPWidenPHIRecipes that cannot be legalized.
99+
static bool legalizeUnclassifiedPhis(VPlan &Plan);
100+
96101
/// Try to have all users of fixed-order recurrences appear after the recipe
97102
/// defining their previous value, by either sinking users or hoisting recipes
98103
/// defining their previous value (and its operands). Then introduce

0 commit comments

Comments
 (0)