-
Notifications
You must be signed in to change notification settings - Fork 15.2k
[VPlan] Dispatch to multiple exit blocks via middle blocks. #112138
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 2 commits
245b56a
47258de
9265fb1
3831acb
e64888a
64db0ee
3259e66
0f8aedf
9212f96
5cb0851
e849195
7b98d34
c53eca6
43a8ef7
e26af8e
06c3d39
552bd91
2042a43
00dea4a
7b8866d
4d5608f
b9ee739
43d5590
cba7dce
95f4276
c3d3b39
a875249
65d0288
8d04383
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -43,6 +43,10 @@ AllowStridedPointerIVs("lv-strided-pointer-ivs", cl::init(false), cl::Hidden, | |
cl::desc("Enable recognition of non-constant strided " | ||
"pointer induction variables.")); | ||
|
||
static cl::opt<bool> | ||
EnableEarlyExitVectorization("enable-early-exit-vectorization", | ||
cl::init(false), cl::Hidden, cl::desc("")); | ||
|
||
namespace llvm { | ||
cl::opt<bool> | ||
HintsAllowReordering("hints-allow-reordering", cl::init(true), cl::Hidden, | ||
|
@@ -1378,6 +1382,10 @@ bool LoopVectorizationLegality::isFixedOrderRecurrence( | |
} | ||
|
||
bool LoopVectorizationLegality::blockNeedsPredication(BasicBlock *BB) const { | ||
// When vectorizing early exits, create predicates for all blocks, except the | ||
// header. | ||
if (canVectorizeEarlyExit() && BB != TheLoop->getHeader()) | ||
return true; | ||
return LoopAccessInfo::blockNeedsPredication(BB, TheLoop, DT); | ||
} | ||
|
||
|
@@ -1514,6 +1522,27 @@ bool LoopVectorizationLegality::canVectorizeWithIfConvert() { | |
return true; | ||
} | ||
|
||
bool LoopVectorizationLegality::canVectorizeEarlyExit() const { | ||
|
||
// Currently only allow vectorizing loops with early exits, if early-exit | ||
// vectorization is explicitly enabled and the loop has metadata to force | ||
// vectorization. | ||
if (!EnableEarlyExitVectorization) | ||
return false; | ||
|
||
SmallVector<BasicBlock *> Exiting; | ||
TheLoop->getExitingBlocks(Exiting); | ||
if (Exiting.size() == 1) | ||
return false; | ||
|
||
LoopVectorizeHints Hints(TheLoop, true, *ORE); | ||
if (Hints.getForce() == LoopVectorizeHints::FK_Undefined) | ||
return false; | ||
|
||
Function *Fn = TheLoop->getHeader()->getParent(); | ||
return Hints.allowVectorization(Fn, TheLoop, | ||
true /*VectorizeOnlyWhenForced*/); | ||
} | ||
|
||
// Helper function to canVectorizeLoopNestCFG. | ||
bool LoopVectorizationLegality::canVectorizeLoopCFG(Loop *Lp, | ||
bool UseVPlanNativePath) { | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -1363,9 +1363,11 @@ class LoopVectorizationCostModel { | |
// If we might exit from anywhere but the latch, must run the exiting | ||
// iteration in scalar form. | ||
if (TheLoop->getExitingBlock() != TheLoop->getLoopLatch()) { | ||
LLVM_DEBUG( | ||
dbgs() << "LV: Loop requires scalar epilogue: multiple exits\n"); | ||
return true; | ||
if (!Legal->canVectorizeEarlyExit()) { | ||
|
||
LLVM_DEBUG( | ||
dbgs() << "LV: Loop requires scalar epilogue: multiple exits\n"); | ||
return true; | ||
} | ||
} | ||
if (IsVectorizing && InterleaveInfo.requiresScalarEpilogue()) { | ||
LLVM_DEBUG(dbgs() << "LV: Loop requires scalar epilogue: " | ||
|
@@ -2575,7 +2577,8 @@ void InnerLoopVectorizer::createVectorLoopSkeleton(StringRef Prefix) { | |
LoopVectorPreHeader = OrigLoop->getLoopPreheader(); | ||
assert(LoopVectorPreHeader && "Invalid loop structure"); | ||
LoopExitBlock = OrigLoop->getUniqueExitBlock(); // may be nullptr | ||
assert((LoopExitBlock || Cost->requiresScalarEpilogue(VF.isVector())) && | ||
assert((LoopExitBlock || Cost->requiresScalarEpilogue(VF.isVector()) || | ||
|
||
Legal->canVectorizeEarlyExit()) && | ||
"multiple exit loop without required epilogue?"); | ||
|
||
LoopMiddleBlock = | ||
|
@@ -2758,8 +2761,6 @@ void InnerLoopVectorizer::fixupIVUsers(PHINode *OrigPhi, | |
// value (the value that feeds into the phi from the loop latch). | ||
// We allow both, but they, obviously, have different values. | ||
|
||
assert(OrigLoop->getUniqueExitBlock() && "Expected a single exit block"); | ||
|
||
DenseMap<Value *, Value *> MissingVals; | ||
|
||
// An external user of the last iteration's value should see the value that | ||
|
@@ -2819,6 +2820,9 @@ void InnerLoopVectorizer::fixupIVUsers(PHINode *OrigPhi, | |
if (PHI->getBasicBlockIndex(MiddleBlock) == -1) | ||
PHI->addIncoming(I.second, MiddleBlock); | ||
} | ||
|
||
assert((MissingVals.empty() || OrigLoop->getUniqueExitBlock()) && | ||
|
||
"Expected a single exit block"); | ||
} | ||
|
||
namespace { | ||
|
@@ -3599,7 +3603,8 @@ void LoopVectorizationCostModel::collectLoopUniforms(ElementCount VF) { | |
TheLoop->getExitingBlocks(Exiting); | ||
for (BasicBlock *E : Exiting) { | ||
auto *Cmp = dyn_cast<Instruction>(E->getTerminator()->getOperand(0)); | ||
if (Cmp && TheLoop->contains(Cmp) && Cmp->hasOneUse()) | ||
if (Cmp && TheLoop->contains(Cmp) && Cmp->hasOneUse() && | ||
(TheLoop->getLoopLatch() == E || !Legal->canVectorizeEarlyExit())) | ||
AddToWorklistIfAllowed(Cmp); | ||
} | ||
|
||
|
@@ -7692,12 +7697,15 @@ DenseMap<const SCEV *, Value *> LoopVectorizationPlanner::executePlan( | |
BestVPlan.execute(&State); | ||
|
||
// 2.5 Collect reduction resume values. | ||
auto *ExitVPBB = | ||
cast<VPBasicBlock>(BestVPlan.getVectorLoopRegion()->getSingleSuccessor()); | ||
for (VPRecipeBase &R : *ExitVPBB) { | ||
createAndCollectMergePhiForReduction( | ||
dyn_cast<VPInstruction>(&R), State, OrigLoop, | ||
State.CFG.VPBB2IRBB[ExitVPBB], ExpandedSCEVs); | ||
VPBasicBlock *ExitVPBB = nullptr; | ||
if (BestVPlan.getVectorLoopRegion()->getSingleSuccessor()) { | ||
|
||
ExitVPBB = cast<VPBasicBlock>( | ||
BestVPlan.getVectorLoopRegion()->getSingleSuccessor()); | ||
for (VPRecipeBase &R : *ExitVPBB) { | ||
createAndCollectMergePhiForReduction( | ||
dyn_cast<VPInstruction>(&R), State, OrigLoop, | ||
State.CFG.VPBB2IRBB[ExitVPBB], ExpandedSCEVs); | ||
} | ||
} | ||
|
||
// 2.6. Maintain Loop Hints | ||
|
@@ -7723,6 +7731,7 @@ DenseMap<const SCEV *, Value *> LoopVectorizationPlanner::executePlan( | |
LoopVectorizeHints Hints(L, true, *ORE); | ||
Hints.setAlreadyVectorized(); | ||
} | ||
|
||
|
||
TargetTransformInfo::UnrollingPreferences UP; | ||
TTI.getUnrollingPreferences(L, *PSE.getSE(), UP, ORE); | ||
if (!UP.UnrollVectorizedLoop || CanonicalIVStartValue) | ||
|
@@ -7735,15 +7744,17 @@ DenseMap<const SCEV *, Value *> LoopVectorizationPlanner::executePlan( | |
ILV.printDebugTracesAtEnd(); | ||
|
||
// 4. Adjust branch weight of the branch in the middle block. | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. ... "if the latter exists"? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Updated, thanks! |
||
auto *MiddleTerm = | ||
cast<BranchInst>(State.CFG.VPBB2IRBB[ExitVPBB]->getTerminator()); | ||
if (MiddleTerm->isConditional() && | ||
hasBranchWeightMD(*OrigLoop->getLoopLatch()->getTerminator())) { | ||
// Assume that `Count % VectorTripCount` is equally distributed. | ||
unsigned TripCount = BestVPlan.getUF() * State.VF.getKnownMinValue(); | ||
assert(TripCount > 0 && "trip count should not be zero"); | ||
const uint32_t Weights[] = {1, TripCount - 1}; | ||
setBranchWeights(*MiddleTerm, Weights, /*IsExpected=*/false); | ||
if (ExitVPBB) { | ||
auto *MiddleTerm = | ||
cast<BranchInst>(State.CFG.VPBB2IRBB[ExitVPBB]->getTerminator()); | ||
if (MiddleTerm->isConditional() && | ||
hasBranchWeightMD(*OrigLoop->getLoopLatch()->getTerminator())) { | ||
// Assume that `Count % VectorTripCount` is equally distributed. | ||
unsigned TripCount = BestVPlan.getUF() * State.VF.getKnownMinValue(); | ||
assert(TripCount > 0 && "trip count should not be zero"); | ||
const uint32_t Weights[] = {1, TripCount - 1}; | ||
setBranchWeights(*MiddleTerm, Weights, /*IsExpected=*/false); | ||
} | ||
} | ||
|
||
return State.ExpandedSCEVs; | ||
|
@@ -8128,7 +8139,7 @@ VPValue *VPRecipeBuilder::createEdgeMask(BasicBlock *Src, BasicBlock *Dst) { | |
// If source is an exiting block, we know the exit edge is dynamically dead | ||
// in the vector loop, and thus we don't need to restrict the mask. Avoid | ||
// adding uses of an otherwise potentially dead instruction. | ||
|
||
if (OrigLoop->isLoopExiting(Src)) | ||
if (!Legal->canVectorizeEarlyExit() && OrigLoop->isLoopExiting(Src)) | ||
return EdgeMaskCache[Edge] = SrcMask; | ||
|
||
VPValue *EdgeMask = getVPValueOrAddLiveIn(BI->getCondition()); | ||
|
@@ -8778,6 +8789,8 @@ static void addCanonicalIVRecipes(VPlan &Plan, Type *IdxTy, bool HasNUW, | |
static SetVector<VPIRInstruction *> collectUsersInExitBlock( | ||
Loop *OrigLoop, VPRecipeBuilder &Builder, VPlan &Plan, | ||
const MapVector<PHINode *, InductionDescriptor> &Inductions) { | ||
if (!Plan.getVectorLoopRegion()->getSingleSuccessor()) | ||
|
||
return {}; | ||
auto *MiddleVPBB = | ||
cast<VPBasicBlock>(Plan.getVectorLoopRegion()->getSingleSuccessor()); | ||
// No edge from the middle block to the unique exit block has been inserted | ||
|
@@ -8863,6 +8876,8 @@ static void addLiveOutsForFirstOrderRecurrences( | |
// TODO: Should be replaced by | ||
// Plan->getScalarLoopRegion()->getSinglePredecessor() in the future once the | ||
// scalar region is modeled as well. | ||
if (!VectorRegion->getSingleSuccessor()) | ||
|
||
return; | ||
auto *MiddleVPBB = cast<VPBasicBlock>(VectorRegion->getSingleSuccessor()); | ||
VPBasicBlock *ScalarPHVPBB = nullptr; | ||
if (MiddleVPBB->getNumSuccessors() == 2) { | ||
|
@@ -9146,10 +9161,15 @@ LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(VFRange &Range) { | |
"VPBasicBlock"); | ||
RecipeBuilder.fixHeaderPhis(); | ||
|
||
SetVector<VPIRInstruction *> ExitUsersToFix = collectUsersInExitBlock( | ||
OrigLoop, RecipeBuilder, *Plan, Legal->getInductionVars()); | ||
addLiveOutsForFirstOrderRecurrences(*Plan, ExitUsersToFix); | ||
addUsersInExitBlock(*Plan, ExitUsersToFix); | ||
if (Legal->canVectorizeEarlyExit()) { | ||
VPlanTransforms::convertToMultiCond(*Plan, *PSE.getSE(), OrigLoop, | ||
RecipeBuilder); | ||
} else { | ||
SetVector<VPIRInstruction *> ExitUsersToFix = collectUsersInExitBlock( | ||
OrigLoop, RecipeBuilder, *Plan, Legal->getInductionVars()); | ||
addLiveOutsForFirstOrderRecurrences(*Plan, ExitUsersToFix); | ||
addUsersInExitBlock(*Plan, ExitUsersToFix); | ||
} | ||
|
||
// --------------------------------------------------------------------------- | ||
// Transform initial VPlan: Apply previously taken decisions, in order, to | ||
|
@@ -9277,8 +9297,6 @@ void LoopVectorizationPlanner::adjustRecipesForReductions( | |
using namespace VPlanPatternMatch; | ||
VPRegionBlock *VectorLoopRegion = Plan->getVectorLoopRegion(); | ||
VPBasicBlock *Header = VectorLoopRegion->getEntryBasicBlock(); | ||
VPBasicBlock *MiddleVPBB = | ||
cast<VPBasicBlock>(VectorLoopRegion->getSingleSuccessor()); | ||
for (VPRecipeBase &R : Header->phis()) { | ||
auto *PhiR = dyn_cast<VPReductionPHIRecipe>(&R); | ||
if (!PhiR || !PhiR->isInLoop() || (MinVF.isScalar() && !PhiR->isOrdered())) | ||
|
@@ -9297,8 +9315,6 @@ void LoopVectorizationPlanner::adjustRecipesForReductions( | |
for (VPUser *U : Cur->users()) { | ||
auto *UserRecipe = cast<VPSingleDefRecipe>(U); | ||
if (!UserRecipe->getParent()->getEnclosingLoopRegion()) { | ||
assert(UserRecipe->getParent() == MiddleVPBB && | ||
"U must be either in the loop region or the middle block."); | ||
continue; | ||
} | ||
Worklist.insert(UserRecipe); | ||
|
@@ -9403,6 +9419,10 @@ void LoopVectorizationPlanner::adjustRecipesForReductions( | |
} | ||
VPBasicBlock *LatchVPBB = VectorLoopRegion->getExitingBasicBlock(); | ||
Builder.setInsertPoint(&*LatchVPBB->begin()); | ||
if (!VectorLoopRegion->getSingleSuccessor()) | ||
|
||
return; | ||
VPBasicBlock *MiddleVPBB = | ||
cast<VPBasicBlock>(VectorLoopRegion->getSingleSuccessor()); | ||
VPBasicBlock::iterator IP = MiddleVPBB->getFirstNonPhi(); | ||
for (VPRecipeBase &R : | ||
Plan->getVectorLoopRegion()->getEntryBasicBlock()->phis()) { | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -474,6 +474,14 @@ void VPIRBasicBlock::execute(VPTransformState *State) { | |
// backedges. A backward successor is set when the branch is created. | ||
const auto &PredVPSuccessors = PredVPBB->getHierarchicalSuccessors(); | ||
unsigned idx = PredVPSuccessors.front() == this ? 0 : 1; | ||
if (TermBr->getSuccessor(idx) && | ||
|
||
PredVPBlock == getPlan()->getVectorLoopRegion() && | ||
PredVPBlock->getNumSuccessors()) { | ||
// Update PRedBB and TermBr for BranchOnMultiCond in predecessor. | ||
PredBB = TermBr->getSuccessor(1); | ||
TermBr = cast<BranchInst>(PredBB->getTerminator()); | ||
idx = 0; | ||
} | ||
assert(!TermBr->getSuccessor(idx) && | ||
"Trying to reset an existing successor block."); | ||
TermBr->setSuccessor(idx, IRBB); | ||
|
@@ -908,8 +916,8 @@ VPlanPtr VPlan::createInitialVPlan(Type *InductionTy, | |
VPBasicBlock *MiddleVPBB = new VPBasicBlock("middle.block"); | ||
VPBlockUtils::insertBlockAfter(MiddleVPBB, TopRegion); | ||
|
||
VPBasicBlock *ScalarPH = new VPBasicBlock("scalar.ph"); | ||
if (!RequiresScalarEpilogueCheck) { | ||
VPBasicBlock *ScalarPH = new VPBasicBlock("scalar.ph"); | ||
VPBlockUtils::connectBlocks(MiddleVPBB, ScalarPH); | ||
return Plan; | ||
} | ||
|
@@ -923,10 +931,14 @@ VPlanPtr VPlan::createInitialVPlan(Type *InductionTy, | |
// we unconditionally branch to the scalar preheader. Do nothing. | ||
// 3) Otherwise, construct a runtime check. | ||
BasicBlock *IRExitBlock = TheLoop->getUniqueExitBlock(); | ||
|
||
auto *VPExitBlock = VPIRBasicBlock::fromBasicBlock(IRExitBlock); | ||
// The connection order corresponds to the operands of the conditional branch. | ||
VPBlockUtils::insertBlockAfter(VPExitBlock, MiddleVPBB); | ||
VPBlockUtils::connectBlocks(MiddleVPBB, ScalarPH); | ||
if (IRExitBlock) { | ||
auto *VPExitBlock = VPIRBasicBlock::fromBasicBlock(IRExitBlock); | ||
// The connection order corresponds to the operands of the conditional | ||
// branch. | ||
VPBlockUtils::insertBlockAfter(VPExitBlock, MiddleVPBB); | ||
VPBasicBlock *ScalarPH = new VPBasicBlock("scalar.ph"); | ||
VPBlockUtils::connectBlocks(MiddleVPBB, ScalarPH); | ||
} | ||
|
||
auto *ScalarLatchTerm = TheLoop->getLoopLatch()->getTerminator(); | ||
// Here we use the same DebugLoc as the scalar loop latch terminator instead | ||
|
@@ -1031,7 +1043,9 @@ void VPlan::execute(VPTransformState *State) { | |
// VPlan execution rather than earlier during VPlan construction. | ||
BasicBlock *MiddleBB = State->CFG.ExitBB; | ||
VPBasicBlock *MiddleVPBB = | ||
cast<VPBasicBlock>(getVectorLoopRegion()->getSingleSuccessor()); | ||
getVectorLoopRegion()->getNumSuccessors() == 1 | ||
|
||
? cast<VPBasicBlock>(getVectorLoopRegion()->getSuccessors()[0]) | ||
: cast<VPBasicBlock>(getVectorLoopRegion()->getSuccessors()[1]); | ||
// Find the VPBB for the scalar preheader, relying on the current structure | ||
// when creating the middle block and its successrs: if there's a single | ||
// predecessor, it must be the scalar preheader. Otherwise, the second | ||
|
@@ -1044,6 +1058,10 @@ void VPlan::execute(VPTransformState *State) { | |
MiddleSuccs.size() == 1 ? MiddleSuccs[0] : MiddleSuccs[1]); | ||
assert(!isa<VPIRBasicBlock>(ScalarPhVPBB) && | ||
"scalar preheader cannot be wrapped already"); | ||
if (ScalarPhVPBB->getNumSuccessors() != 0) { | ||
ScalarPhVPBB = cast<VPBasicBlock>(ScalarPhVPBB->getSuccessors()[1]); | ||
MiddleVPBB = cast<VPBasicBlock>(MiddleVPBB->getSuccessors()[1]); | ||
} | ||
replaceVPBBWithIRVPBB(ScalarPhVPBB, ScalarPh); | ||
replaceVPBBWithIRVPBB(MiddleVPBB, MiddleBB); | ||
|
||
|
@@ -1056,12 +1074,19 @@ void VPlan::execute(VPTransformState *State) { | |
State->CFG.DTU.applyUpdates({{DominatorTree::Delete, MiddleBB, ScalarPh}}); | ||
|
||
// Generate code in the loop pre-header and body. | ||
for (VPBlockBase *Block : vp_depth_first_shallow(Entry)) | ||
ReversePostOrderTraversal<VPBlockShallowTraversalWrapper<VPBlockBase *>> RPOT( | ||
Entry); | ||
|
||
for (VPBlockBase *Block : RPOT) | ||
Block->execute(State); | ||
|
||
VPBasicBlock *LatchVPBB = getVectorLoopRegion()->getExitingBasicBlock(); | ||
BasicBlock *VectorLatchBB = State->CFG.VPBB2IRBB[LatchVPBB]; | ||
|
||
if (!getVectorLoopRegion()->getSingleSuccessor()) | ||
|
||
VectorLatchBB = | ||
cast<BranchInst>(VectorLatchBB->getTerminator())->getSuccessor(1); | ||
|
||
// Fix the latch value of canonical, reduction and first-order recurrences | ||
// phis in the vector loop. | ||
VPBasicBlock *Header = getVectorLoopRegion()->getEntryBasicBlock(); | ||
|
@@ -1088,7 +1113,10 @@ void VPlan::execute(VPTransformState *State) { | |
// Move the last step to the end of the latch block. This ensures | ||
// consistent placement of all induction updates. | ||
Instruction *Inc = cast<Instruction>(Phi->getIncomingValue(1)); | ||
Inc->moveBefore(VectorLatchBB->getTerminator()->getPrevNode()); | ||
if (VectorLatchBB->getTerminator() == &*VectorLatchBB->getFirstNonPHI()) | ||
Inc->moveBefore(VectorLatchBB->getTerminator()); | ||
else | ||
Inc->moveBefore(VectorLatchBB->getTerminator()->getPrevNode()); | ||
|
||
|
||
// Use the steps for the last part as backedge value for the induction. | ||
if (auto *IV = dyn_cast<VPWidenIntOrFpInductionRecipe>(&R)) | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -1274,6 +1274,7 @@ class VPInstruction : public VPRecipeWithIRFlags, | |
// operand). Only generates scalar values (either for the first lane only or | ||
// for all lanes, depending on its uses). | ||
PtrAdd, | ||
AnyOf, | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Perhaps worth adding a simple comment here? Something along the lines of:
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Added, thanks There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Some explanation how AnyOf relates (or should relate) to ComputeReductionResult? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think AnyOf also needs adding to the switch statement in VPRecipeBase::mayWriteToMemory and return false? |
||
}; | ||
|
||
private: | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Do we need a description here, such as what I had in https://github.com/llvm/llvm-project/pull/88385/files for the same flag? Or is the idea to try to not expose this too much at this stage?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Updated to move it to LoopVectorize.cpp, thanks. Originally this was only used in combination with the new helper introduced to LVL, but that changed after using the existing checks.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Hmm, the flag still seems to be in the old place. Perhaps the patch hasn't updated correctly?