Skip to content
Merged
2 changes: 1 addition & 1 deletion llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h
Original file line number Diff line number Diff line change
Expand Up @@ -506,7 +506,7 @@ class LoopVectorizationPlanner {
// instructions leading from the loop exit instr to the phi need to be
// converted to reductions, with one operand being vector and the other being
// the scalar reduction chain. For other reductions, a select is introduced
// between the phi and live-out recipes when folding the tail.
// between the phi and users outside the vector region when folding the tail.
void adjustRecipesForReductions(VPlanPtr &Plan,
VPRecipeBuilder &RecipeBuilder,
ElementCount MinVF);
Expand Down
131 changes: 60 additions & 71 deletions llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -498,7 +498,7 @@ class InnerLoopVectorizer {
virtual std::pair<BasicBlock *, Value *>
createVectorizedLoopSkeleton(const SCEV2ValueTy &ExpandedSCEVs);

/// Fix the vectorized code, taking care of header phi's, live-outs, and more.
/// Fix the vectorized code, taking care of header phi's, and more.
void fixVectorizedLoop(VPTransformState &State);

// Return true if any runtime check is added.
Expand Down Expand Up @@ -2713,7 +2713,8 @@ InnerLoopVectorizer::createVectorizedLoopSkeleton(
| |
(opt) v <-- edge from middle to exit iff epilogue is not required.
| [ ] \
| [ ]_| <-- old scalar loop to handle remainder (scalar epilogue).
| [ ]_| <-- old scalar loop to handle remainder (scalar epilogue, header
| | wrapped in VPIRBasicBlock).
\ |
\ v
>[ ] <-- exit block(s). (wrapped in VPIRBasicBlock)
Expand Down Expand Up @@ -2956,7 +2957,7 @@ void InnerLoopVectorizer::fixVectorizedLoop(VPTransformState &State) {
// and there is nothing to fix from vector loop; phis should have incoming
// from scalar loop only.
} else {
// TODO: Check VPLiveOuts to see if IV users need fixing instead of checking
// TODO: Check in VPlan to see if IV users need fixing instead of checking
// the cost model.

// If we inserted an edge from the middle block to the unique exit block,
Expand All @@ -2970,10 +2971,6 @@ void InnerLoopVectorizer::fixVectorizedLoop(VPTransformState &State) {
IVEndValues[Entry.first], LoopMiddleBlock, State);
}

// Fix live-out phis not already fixed earlier.
for (const auto &KV : Plan.getLiveOuts())
KV.second->fixPhi(Plan, State);

for (Instruction *PI : PredicatedInstructions)
sinkScalarOperands(&*PI);

Expand Down Expand Up @@ -8790,6 +8787,41 @@ static void addCanonicalIVRecipes(VPlan &Plan, Type *IdxTy, bool HasNUW,
{CanonicalIVIncrement, &Plan.getVectorTripCount()}, DL);
}

/// Create resume phis in the scalar preheader for first-order recurrences and
/// reductions and update the VPIRInstructions wrapping the original phis in the
/// scalar header.
static void addScalarResumePhis(VPRecipeBuilder &Builder, VPlan &Plan) {
auto *ScalarPH = Plan.getScalarPreheader();
auto *MiddleVPBB = cast<VPBasicBlock>(ScalarPH->getSinglePredecessor());
VPBuilder ScalarPHBuilder(ScalarPH);
VPBuilder MiddleBuilder(MiddleVPBB, MiddleVPBB->getFirstNonPhi());
VPValue *OneVPV = Plan.getOrAddLiveIn(
ConstantInt::get(Plan.getCanonicalIV()->getScalarType(), 1));
for (VPRecipeBase &ScalarPhiR : *Plan.getScalarHeader()) {
auto *ScalarPhiIRI = cast<VPIRInstruction>(&ScalarPhiR);
auto *ScalarPhiI = dyn_cast<PHINode>(&ScalarPhiIRI->getInstruction());
if (!ScalarPhiI)
break;
auto *VectorPhiR = cast<VPHeaderPHIRecipe>(Builder.getRecipe(ScalarPhiI));
if (!isa<VPFirstOrderRecurrencePHIRecipe, VPReductionPHIRecipe>(VectorPhiR))
continue;
// The backedge value provides the value to resume coming out of a loop,
// which for FORs is a vector whose last element needs to be extracted. The
// start value provides the value if the loop is bypassed.
bool IsFOR = isa<VPFirstOrderRecurrencePHIRecipe>(VectorPhiR);
auto *ResumeFromVectorLoop = VectorPhiR->getBackedgeValue();
if (IsFOR)
ResumeFromVectorLoop = MiddleBuilder.createNaryOp(
VPInstruction::ExtractFromEnd, {ResumeFromVectorLoop, OneVPV}, {},
"vector.recur.extract");
StringRef Name = IsFOR ? "scalar.recur.init" : "bc.merge.rdx";
auto *ResumePhiR = ScalarPHBuilder.createNaryOp(
VPInstruction::ResumePhi,
{ResumeFromVectorLoop, VectorPhiR->getStartValue()}, {}, Name);
ScalarPhiIRI->addOperand(ResumePhiR);
}
}

// Collect VPIRInstructions for phis in the original exit block that are modeled
// in VPlan and add the exiting VPValue as operand. Some exiting values are not
// modeled explicitly yet and won't be included. Those are un-truncated
Expand Down Expand Up @@ -8819,8 +8851,7 @@ static SetVector<VPIRInstruction *> collectUsersInExitBlock(
VPValue *V = Builder.getVPValueOrAddLiveIn(IncomingValue);
// Exit values for inductions are computed and updated outside of VPlan and
// independent of induction recipes.
// TODO: Compute induction exit values in VPlan, use VPLiveOuts to update
// live-outs.
// TODO: Compute induction exit values in VPlan.
if ((isa<VPWidenIntOrFpInductionRecipe>(V) &&
!cast<VPWidenIntOrFpInductionRecipe>(V)->getTruncInst()) ||
isa<VPWidenPointerInductionRecipe>(V) ||
Expand Down Expand Up @@ -8853,7 +8884,8 @@ addUsersInExitBlock(VPlan &Plan,
// modeling the corresponding LCSSA phis.
for (VPIRInstruction *ExitIRI : ExitUsersToFix) {
VPValue *V = ExitIRI->getOperand(0);
// Pass live-in values used by exit phis directly through to the live-out.
// Pass live-in values used by exit phis directly through to their users in
// the exit block.
if (V->isLiveIn())
continue;

Expand All @@ -8865,39 +8897,17 @@ addUsersInExitBlock(VPlan &Plan,
}
}

/// Handle live-outs for first order reductions, both in the scalar preheader
/// and the original exit block:
/// 1. Feed a resume value for every FOR from the vector loop to the scalar
/// loop, if middle block branches to scalar preheader, by introducing
/// ExtractFromEnd and ResumePhi recipes in each, respectively, and a
/// VPLiveOut which uses the latter and corresponds to the scalar header.
/// 2. Feed the penultimate value of recurrences to their LCSSA phi users in
/// the original exit block using a VPLiveOut.
static void addLiveOutsForFirstOrderRecurrences(
/// Handle users in the exit block for first order reductions in the original
/// exit block. The penultimate value of recurrences is fed to their LCSSA phi
/// users in the original exit block using the VPIRInstruction wrapping to the
/// LCSSA phi.
static void addExitUsersForFirstOrderRecurrences(
VPlan &Plan, SetVector<VPIRInstruction *> &ExitUsersToFix) {
VPRegionBlock *VectorRegion = Plan.getVectorLoopRegion();

// Start by finding out if middle block branches to scalar preheader, which is
// not a VPIRBasicBlock, unlike Exit block - the other possible successor of
// middle block.
// TODO: Should be replaced by
// Plan->getScalarLoopRegion()->getSinglePredecessor() in the future once the
// scalar region is modeled as well.
auto *ScalarPHVPBB = Plan.getScalarPreheader();
auto *MiddleVPBB = cast<VPBasicBlock>(VectorRegion->getSingleSuccessor());
VPBasicBlock *ScalarPHVPBB = nullptr;
if (MiddleVPBB->getNumSuccessors() == 2) {
// Order is strict: first is the exit block, second is the scalar preheader.
ScalarPHVPBB = cast<VPBasicBlock>(MiddleVPBB->getSuccessors()[1]);
} else if (ExitUsersToFix.empty()) {
ScalarPHVPBB = cast<VPBasicBlock>(MiddleVPBB->getSingleSuccessor());
} else {
llvm_unreachable("unsupported CFG in VPlan");
}

VPBuilder ScalarPHBuilder(ScalarPHVPBB);
VPBuilder MiddleBuilder(MiddleVPBB, MiddleVPBB->getFirstNonPhi());
VPValue *OneVPV = Plan.getOrAddLiveIn(
ConstantInt::get(Plan.getCanonicalIV()->getScalarType(), 1));
VPValue *TwoVPV = Plan.getOrAddLiveIn(
ConstantInt::get(Plan.getCanonicalIV()->getScalarType(), 2));

Expand Down Expand Up @@ -8973,26 +8983,16 @@ static void addLiveOutsForFirstOrderRecurrences(
// lo = lcssa.phi [s1, scalar.body],
// [vector.recur.extract.for.phi, middle.block]
//
// Extract the resume value and create a new VPLiveOut for it.
auto *Resume = MiddleBuilder.createNaryOp(VPInstruction::ExtractFromEnd,
{FOR->getBackedgeValue(), OneVPV},
{}, "vector.recur.extract");
auto *ResumePhiRecipe = ScalarPHBuilder.createNaryOp(
VPInstruction::ResumePhi, {Resume, FOR->getStartValue()}, {},
"scalar.recur.init");
auto *FORPhi = cast<PHINode>(FOR->getUnderlyingInstr());
Plan.addLiveOut(FORPhi, ResumePhiRecipe);

// Now update VPIRInstructions modeling LCSSA phis in the exit block.
// Extract the penultimate value of the recurrence and use it as operand for
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

(independent): ok to remove ExitIRI during traversal of ExitUsersToFix below? Better make_early_inc_range?
OTOH, is more than one LCSSA phi expected to have FOR as its operand? If not better break as soon as it is found and handled.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes I thought the same thing, will adjust separately.

// the VPIRInstruction modeling the phi.
for (VPIRInstruction *ExitIRI : ExitUsersToFix) {
if (ExitIRI->getOperand(0) != FOR)
continue;
VPValue *Ext = MiddleBuilder.createNaryOp(
VPValue *PenultimateElement = MiddleBuilder.createNaryOp(
VPInstruction::ExtractFromEnd, {FOR->getBackedgeValue(), TwoVPV}, {},
"vector.recur.extract.for.phi");
ExitIRI->setOperand(0, Ext);
ExitIRI->setOperand(0, PenultimateElement);
ExitUsersToFix.remove(ExitIRI);
}
}
Expand Down Expand Up @@ -9166,11 +9166,11 @@ LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(VFRange &Range) {
"VPBasicBlock");
RecipeBuilder.fixHeaderPhis();

addScalarResumePhis(RecipeBuilder, *Plan);
SetVector<VPIRInstruction *> ExitUsersToFix = collectUsersInExitBlock(
OrigLoop, RecipeBuilder, *Plan, Legal->getInductionVars());
addLiveOutsForFirstOrderRecurrences(*Plan, ExitUsersToFix);
addExitUsersForFirstOrderRecurrences(*Plan, ExitUsersToFix);
addUsersInExitBlock(*Plan, ExitUsersToFix);

// ---------------------------------------------------------------------------
// Transform initial VPlan: Apply previously taken decisions, in order, to
// bring the VPlan to its final state.
Expand All @@ -9192,9 +9192,7 @@ LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(VFRange &Range) {
// Replace VPValues for known constant strides guaranteed by predicate scalar
// evolution.
auto CanUseVersionedStride = [&Plan](VPUser &U, unsigned) {
auto *R = dyn_cast<VPRecipeBase>(&U);
if (!R)
return false;
auto *R = cast<VPRecipeBase>(&U);
return R->getParent()->getParent() ||
R->getParent() ==
Plan->getVectorLoopRegion()->getSinglePredecessor();
Expand Down Expand Up @@ -9291,7 +9289,7 @@ VPlanPtr LoopVectorizationPlanner::buildVPlan(VFRange &Range) {
// instructions leading from the loop exit instr to the phi need to be converted
// to reductions, with one operand being vector and the other being the scalar
// reduction chain. For other reductions, a select is introduced between the phi
// and live-out recipes when folding the tail.
// and users outside the vector region when folding the tail.
//
// A ComputeReductionResult recipe is added to the middle block, also for
// in-loop reductions which compute their result in-loop, because generating
Expand Down Expand Up @@ -9325,8 +9323,10 @@ void LoopVectorizationPlanner::adjustRecipesForReductions(
for (VPUser *U : Cur->users()) {
auto *UserRecipe = cast<VPSingleDefRecipe>(U);
if (!UserRecipe->getParent()->getEnclosingLoopRegion()) {
assert(UserRecipe->getParent() == MiddleVPBB &&
"U must be either in the loop region or the middle block.");
assert((UserRecipe->getParent() == MiddleVPBB ||
UserRecipe->getParent() == Plan->getScalarPreheader()) &&
"U must be either in the loop region, the middle block or the "
"scalar preheader.");
continue;
}
Worklist.insert(UserRecipe);
Expand Down Expand Up @@ -9440,8 +9440,8 @@ void LoopVectorizationPlanner::adjustRecipesForReductions(

const RecurrenceDescriptor &RdxDesc = PhiR->getRecurrenceDescriptor();
// If tail is folded by masking, introduce selects between the phi
// and the live-out instruction of each reduction, at the beginning of the
// dedicated latch block.
// and the users outside the vector region of each reduction, at the
// beginning of the dedicated latch block.
auto *OrigExitingVPV = PhiR->getBackedgeValue();
auto *NewExitingVPV = PhiR->getBackedgeValue();
if (!PhiR->isInLoop() && CM.foldTailByMasking()) {
Expand Down Expand Up @@ -9513,17 +9513,6 @@ void LoopVectorizationPlanner::adjustRecipesForReductions(
});
FinalReductionResult->insertBefore(*MiddleVPBB, IP);

// Order is strict: if there are multiple successors, the first is the exit
// block, second is the scalar preheader.
VPBasicBlock *ScalarPHVPBB =
cast<VPBasicBlock>(MiddleVPBB->getSuccessors().back());
VPBuilder ScalarPHBuilder(ScalarPHVPBB);
auto *ResumePhiRecipe = ScalarPHBuilder.createNaryOp(
VPInstruction::ResumePhi, {FinalReductionResult, PhiR->getStartValue()},
{}, "bc.merge.rdx");
auto *RedPhi = cast<PHINode>(PhiR->getUnderlyingInstr());
Plan->addLiveOut(RedPhi, ResumePhiRecipe);

// Adjust AnyOf reductions; replace the reduction phi for the selected value
// with a boolean reduction phi node to check if the condition is true in
// any iteration. The final value is selected by the final
Expand Down
Loading
Loading