-
Notifications
You must be signed in to change notification settings - Fork 14.5k
[AMDGPU][FixIrreducible][UnifyLoopExits] Support callbr with inline-asm #149308
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Conversation
First batch of changes to add support for basic inline-asm callbr for the AMDGPU backend.
@llvm/pr-subscribers-llvm-support Author: Robert Imschweiler (ro-i) ChangesFirst batch of changes to add support for basic inline-asm callbr for the AMDGPU backend. Patch is 132.46 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/149308.diff 16 Files Affected:
diff --git a/llvm/include/llvm/Support/GenericLoopInfoImpl.h b/llvm/include/llvm/Support/GenericLoopInfoImpl.h
index 6fc508b0e0cca..8b7927357d57d 100644
--- a/llvm/include/llvm/Support/GenericLoopInfoImpl.h
+++ b/llvm/include/llvm/Support/GenericLoopInfoImpl.h
@@ -355,7 +355,7 @@ void LoopBase<BlockT, LoopT>::verifyLoop() const {
if (BB == getHeader()) {
assert(!OutsideLoopPreds.empty() && "Loop is unreachable!");
} else if (!OutsideLoopPreds.empty()) {
- // A non-header loop shouldn't be reachable from outside the loop,
+ // A non-header loop block shouldn't be reachable from outside the loop,
// though it is permitted if the predecessor is not itself actually
// reachable.
BlockT *EntryBB = &BB->getParent()->front();
diff --git a/llvm/include/llvm/Transforms/Utils/BasicBlockUtils.h b/llvm/include/llvm/Transforms/Utils/BasicBlockUtils.h
index 979f3b3eb72ff..fc7b313eb552a 100644
--- a/llvm/include/llvm/Transforms/Utils/BasicBlockUtils.h
+++ b/llvm/include/llvm/Transforms/Utils/BasicBlockUtils.h
@@ -607,10 +607,17 @@ LLVM_ABI bool SplitIndirectBrCriticalEdges(Function &F,
// successors
LLVM_ABI void InvertBranch(BranchInst *PBI, IRBuilderBase &Builder);
-// Check whether the function only has simple terminator:
+template <typename... TermInst>
+LLVM_ABI bool hasOnlyGivenTerminators(const Function &F);
+
+// Check whether the function only has blocks with simple terminators:
// br/brcond/unreachable/ret
LLVM_ABI bool hasOnlySimpleTerminator(const Function &F);
+// Check whether the function only has blocks with simple terminators
+// (br/brcond/unreachable/ret) or callbr.
+LLVM_ABI bool hasOnlySimpleTerminatorOrCallBr(const Function &F);
+
} // end namespace llvm
#endif // LLVM_TRANSFORMS_UTILS_BASICBLOCKUTILS_H
diff --git a/llvm/include/llvm/Transforms/Utils/ControlFlowUtils.h b/llvm/include/llvm/Transforms/Utils/ControlFlowUtils.h
index 810fef29f4010..e55efbc907d42 100644
--- a/llvm/include/llvm/Transforms/Utils/ControlFlowUtils.h
+++ b/llvm/include/llvm/Transforms/Utils/ControlFlowUtils.h
@@ -15,10 +15,13 @@
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringRef.h"
+#include "llvm/IR/CycleInfo.h"
namespace llvm {
class BasicBlock;
+class CallBrInst;
+class LoopInfo;
class DomTreeUpdater;
/// Given a set of branch descriptors [BB, Succ0, Succ1], create a "hub" such
@@ -104,7 +107,8 @@ struct ControlFlowHub {
: BB(BB), Succ0(Succ0), Succ1(Succ1) {}
};
- void addBranch(BasicBlock *BB, BasicBlock *Succ0, BasicBlock *Succ1) {
+ void addBranch(BasicBlock *BB, BasicBlock *Succ0,
+ BasicBlock *Succ1 = nullptr) {
assert(BB);
assert(Succ0 || Succ1);
Branches.emplace_back(BB, Succ0, Succ1);
@@ -118,6 +122,35 @@ struct ControlFlowHub {
std::optional<unsigned> MaxControlFlowBooleans = std::nullopt);
SmallVector<BranchDescriptor> Branches;
+
+ /**
+ * \brief Create a new intermediate target block for a callbr edge.
+ *
+ * This function creates a new basic block (the "target block") that sits
+ * between a callbr instruction and one of its successors. The callbr's
+ * successor is rewired to this new block, and the new block unconditionally
+ * branches to the original successor. This is useful for normalizing control
+ * flow, e.g., when transforming irreducible loops.
+ *
+ * \param CallBr The callbr instruction whose edge is to be split.
+ * \param Succ The original successor basic block to be reached.
+ * \param SuccIdx The index of the successor in the callbr instruction.
+ * \param AttachToCallBr If true, the new block is associated with the
+ * callbr's parent for loop/cycle info. If false, the new block is associated
+ * with the callbr's successor for loop/cycle info. \param CI Optional
+ * CycleInfo for updating cycle membership. \param DTU Optional
+ * DomTreeUpdater for updating the dominator tree. \param LI Optional LoopInfo
+ * for updating loop membership.
+ *
+ * \returns The newly created intermediate target block.
+ *
+ * \note This function updates PHI nodes, dominator tree, loop info, and cycle
+ * info as needed.
+ */
+ static BasicBlock *
+ createCallBrTarget(CallBrInst *CallBr, BasicBlock *Succ, unsigned SuccIdx,
+ bool AttachToCallBr = true, CycleInfo *CI = nullptr,
+ DomTreeUpdater *DTU = nullptr, LoopInfo *LI = nullptr);
};
} // end namespace llvm
diff --git a/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp b/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp
index c8255742c41ba..6103d07212fc2 100644
--- a/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp
+++ b/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp
@@ -1766,12 +1766,21 @@ void llvm::InvertBranch(BranchInst *PBI, IRBuilderBase &Builder) {
PBI->swapSuccessors();
}
-bool llvm::hasOnlySimpleTerminator(const Function &F) {
+template <typename... TermInst>
+bool llvm::hasOnlyGivenTerminators(const Function &F) {
for (auto &BB : F) {
auto *Term = BB.getTerminator();
- if (!(isa<ReturnInst>(Term) || isa<UnreachableInst>(Term) ||
- isa<BranchInst>(Term)))
+ if (!(isa<TermInst>(Term) || ...))
return false;
}
return true;
}
+
+bool llvm::hasOnlySimpleTerminator(const Function &F) {
+ return hasOnlyGivenTerminators<ReturnInst, UnreachableInst, BranchInst>(F);
+}
+
+bool llvm::hasOnlySimpleTerminatorOrCallBr(const Function &F) {
+ return hasOnlyGivenTerminators<ReturnInst, UnreachableInst, BranchInst,
+ CallBrInst>(F);
+}
\ No newline at end of file
diff --git a/llvm/lib/Transforms/Utils/ControlFlowUtils.cpp b/llvm/lib/Transforms/Utils/ControlFlowUtils.cpp
index 4b0065d0030cd..f7197a68813dd 100644
--- a/llvm/lib/Transforms/Utils/ControlFlowUtils.cpp
+++ b/llvm/lib/Transforms/Utils/ControlFlowUtils.cpp
@@ -14,6 +14,7 @@
#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/SmallSet.h"
#include "llvm/Analysis/DomTreeUpdater.h"
+#include "llvm/Analysis/LoopInfo.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/ValueHandle.h"
@@ -282,7 +283,9 @@ std::pair<BasicBlock *, bool> ControlFlowHub::finalize(
for (auto [BB, Succ0, Succ1] : Branches) {
#ifndef NDEBUG
- assert(Incoming.insert(BB).second && "Duplicate entry for incoming block.");
+ assert(
+ (Incoming.insert(BB).second || isa<CallBrInst>(BB->getTerminator())) &&
+ "Duplicate entry for incoming block.");
#endif
if (Succ0)
Outgoing.insert(Succ0);
@@ -342,3 +345,55 @@ std::pair<BasicBlock *, bool> ControlFlowHub::finalize(
return {FirstGuardBlock, true};
}
+
+BasicBlock *ControlFlowHub::createCallBrTarget(
+ CallBrInst *CallBr, BasicBlock *Succ, unsigned SuccIdx, bool AttachToCallBr,
+ CycleInfo *CI, DomTreeUpdater *DTU, LoopInfo *LI) {
+ BasicBlock *CallBrBlock = CallBr->getParent();
+ BasicBlock *CallBrTarget =
+ BasicBlock::Create(CallBrBlock->getContext(),
+ CallBrBlock->getName() + ".target." + Succ->getName(),
+ CallBrBlock->getParent());
+ // Rewire control flow from callbr to the new target block.
+ Succ->replacePhiUsesWith(CallBrBlock, CallBrTarget);
+ CallBr->setSuccessor(SuccIdx, CallBrTarget);
+ // Jump from the new target block to the original successor.
+ BranchInst::Create(Succ, CallBrTarget);
+ if (LI) {
+ if (Loop *L = LI->getLoopFor(AttachToCallBr ? CallBrBlock : Succ); L) {
+ bool AddToLoop = true;
+ if (AttachToCallBr) {
+ // Check if the loops are disjoint. In that case, we do not add the
+ // intermediate target to any loop.
+ if (auto *LL = LI->getLoopFor(Succ);
+ LL && !L->contains(LL) && !LL->contains(L))
+ AddToLoop = false;
+ }
+ if (AddToLoop)
+ L->addBasicBlockToLoop(CallBrTarget, *LI);
+ }
+ }
+ if (CI) {
+ if (auto *C = CI->getCycle(AttachToCallBr ? CallBrBlock : Succ); C) {
+ bool AddToCycle = true;
+ if (AttachToCallBr) {
+ // Check if the cycles are disjoint. In that case, we do not add the
+ // intermediate target to any cycle.
+ if (auto *CC = CI->getCycle(Succ); CC) {
+ auto *CommonC = CI->getSmallestCommonCycle(C, CC);
+ if (CommonC != C && CommonC != CC)
+ AddToCycle = false;
+ }
+ }
+ if (AddToCycle)
+ CI->addBlockToCycle(CallBrTarget, C);
+ }
+ }
+ if (DTU) {
+ DTU->applyUpdates({{DominatorTree::Insert, CallBrBlock, CallBrTarget}});
+ if (DTU->getDomTree().dominates(CallBrBlock, Succ))
+ DTU->applyUpdates({{DominatorTree::Delete, CallBrBlock, Succ},
+ {DominatorTree::Insert, CallBrTarget, Succ}});
+ }
+ return CallBrTarget;
+}
\ No newline at end of file
diff --git a/llvm/lib/Transforms/Utils/FixIrreducible.cpp b/llvm/lib/Transforms/Utils/FixIrreducible.cpp
index 45e1d12c2bfff..ade23f942352d 100644
--- a/llvm/lib/Transforms/Utils/FixIrreducible.cpp
+++ b/llvm/lib/Transforms/Utils/FixIrreducible.cpp
@@ -79,6 +79,53 @@
// Limitation: The pass cannot handle switch statements and indirect
// branches. Both must be lowered to plain branches first.
//
+// CallBr support: CallBr is handled as a more general branch instruction which
+// can have multiple successors. The pass redirects the edges to intermediate
+// target blocks that unconditionally branch to the original callbr target
+// blocks. This allows the control flow hub to know to which of the original
+// target blocks to jump to.
+// Example input CFG:
+// Entry (callbr)
+// / \
+// v v
+// H ----> B
+// ^ /|
+// `----' |
+// v
+// Exit
+//
+// becomes:
+// Entry (callbr)
+// / \
+// v v
+// target.H target.B
+// | |
+// v v
+// H ----> B
+// ^ /|
+// `----' |
+// v
+// Exit
+//
+// Note
+// OUTPUT CFG: Converted to a natural loop with a new header N.
+//
+// Entry (callbr)
+// / \
+// v v
+// target.H target.B
+// \ /
+// \ /
+// v v
+// N <---.
+// / \ \
+// / \ |
+// v v /
+// H --> B --'
+// |
+// v
+// Exit
+//
//===----------------------------------------------------------------------===//
#include "llvm/Transforms/Utils/FixIrreducible.h"
@@ -231,6 +278,7 @@ static bool fixIrreducible(Cycle &C, CycleInfo &CI, DominatorTree &DT,
return false;
LLVM_DEBUG(dbgs() << "Processing cycle:\n" << CI.print(&C) << "\n";);
+ DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Eager);
ControlFlowHub CHub;
SetVector<BasicBlock *> Predecessors;
@@ -242,18 +290,33 @@ static bool fixIrreducible(Cycle &C, CycleInfo &CI, DominatorTree &DT,
}
for (BasicBlock *P : Predecessors) {
- auto *Branch = cast<BranchInst>(P->getTerminator());
- // Exactly one of the two successors is the header.
- BasicBlock *Succ0 = Branch->getSuccessor(0) == Header ? Header : nullptr;
- BasicBlock *Succ1 = Succ0 ? nullptr : Header;
- if (!Succ0)
- assert(Branch->getSuccessor(1) == Header);
- assert(Succ0 || Succ1);
- CHub.addBranch(P, Succ0, Succ1);
-
- LLVM_DEBUG(dbgs() << "Added internal branch: " << P->getName() << " -> "
- << (Succ0 ? Succ0->getName() : "") << " "
- << (Succ1 ? Succ1->getName() : "") << "\n");
+ if (BranchInst *Branch = dyn_cast<BranchInst>(P->getTerminator()); Branch) {
+ // Exactly one of the two successors is the header.
+ BasicBlock *Succ0 = Branch->getSuccessor(0) == Header ? Header : nullptr;
+ BasicBlock *Succ1 = Succ0 ? nullptr : Header;
+ if (!Succ0)
+ assert(Branch->getSuccessor(1) == Header);
+ assert(Succ0 || Succ1);
+ CHub.addBranch(P, Succ0, Succ1);
+
+ LLVM_DEBUG(dbgs() << "Added internal branch: " << P->getName() << " -> "
+ << (Succ0 ? Succ0->getName() : "") << " "
+ << (Succ1 ? Succ1->getName() : "") << "\n");
+ } else if (CallBrInst *CallBr = dyn_cast<CallBrInst>(P->getTerminator());
+ CallBr) {
+ for (unsigned I = 0; I < CallBr->getNumSuccessors(); ++I) {
+ BasicBlock *Succ = CallBr->getSuccessor(I);
+ if (Succ != Header)
+ continue;
+ BasicBlock *NewSucc = llvm::ControlFlowHub::createCallBrTarget(
+ CallBr, Succ, I, false, &CI, &DTU, LI);
+ CHub.addBranch(NewSucc, Succ);
+ LLVM_DEBUG(dbgs() << "Added internal branch: " << NewSucc->getName()
+ << " -> " << Succ->getName() << "\n");
+ }
+ } else {
+ llvm_unreachable("Unsupported block terminator.");
+ }
}
// Redirect external incoming edges. This includes the edges on the header.
@@ -266,17 +329,32 @@ static bool fixIrreducible(Cycle &C, CycleInfo &CI, DominatorTree &DT,
}
for (BasicBlock *P : Predecessors) {
- auto *Branch = cast<BranchInst>(P->getTerminator());
- BasicBlock *Succ0 = Branch->getSuccessor(0);
- Succ0 = C.contains(Succ0) ? Succ0 : nullptr;
- BasicBlock *Succ1 =
- Branch->isUnconditional() ? nullptr : Branch->getSuccessor(1);
- Succ1 = Succ1 && C.contains(Succ1) ? Succ1 : nullptr;
- CHub.addBranch(P, Succ0, Succ1);
-
- LLVM_DEBUG(dbgs() << "Added external branch: " << P->getName() << " -> "
- << (Succ0 ? Succ0->getName() : "") << " "
- << (Succ1 ? Succ1->getName() : "") << "\n");
+ if (BranchInst *Branch = dyn_cast<BranchInst>(P->getTerminator()); Branch) {
+ BasicBlock *Succ0 = Branch->getSuccessor(0);
+ Succ0 = C.contains(Succ0) ? Succ0 : nullptr;
+ BasicBlock *Succ1 =
+ Branch->isUnconditional() ? nullptr : Branch->getSuccessor(1);
+ Succ1 = Succ1 && C.contains(Succ1) ? Succ1 : nullptr;
+ CHub.addBranch(P, Succ0, Succ1);
+
+ LLVM_DEBUG(dbgs() << "Added external branch: " << P->getName() << " -> "
+ << (Succ0 ? Succ0->getName() : "") << " "
+ << (Succ1 ? Succ1->getName() : "") << "\n");
+ } else if (CallBrInst *CallBr = dyn_cast<CallBrInst>(P->getTerminator());
+ CallBr) {
+ for (unsigned I = 0; I < CallBr->getNumSuccessors(); ++I) {
+ BasicBlock *Succ = CallBr->getSuccessor(I);
+ if (!C.contains(Succ))
+ continue;
+ BasicBlock *NewSucc = llvm::ControlFlowHub::createCallBrTarget(
+ CallBr, Succ, I, true, &CI, &DTU, LI);
+ CHub.addBranch(NewSucc, Succ);
+ LLVM_DEBUG(dbgs() << "Added external branch: " << NewSucc->getName()
+ << " -> " << Succ->getName() << "\n");
+ }
+ } else {
+ llvm_unreachable("Unsupported block terminator.");
+ }
}
// Redirect all the backedges through a "hub" consisting of a series
@@ -292,7 +370,6 @@ static bool fixIrreducible(Cycle &C, CycleInfo &CI, DominatorTree &DT,
SetVector<BasicBlock *> Entries;
Entries.insert(C.entry_rbegin(), C.entry_rend());
- DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Eager);
CHub.finalize(&DTU, GuardBlocks, "irr");
#if defined(EXPENSIVE_CHECKS)
assert(DT.verify(DominatorTree::VerificationLevel::Full));
@@ -325,7 +402,7 @@ static bool FixIrreducibleImpl(Function &F, CycleInfo &CI, DominatorTree &DT,
LLVM_DEBUG(dbgs() << "===== Fix irreducible control-flow in function: "
<< F.getName() << "\n");
- assert(hasOnlySimpleTerminator(F) && "Unsupported block terminator.");
+ assert(hasOnlySimpleTerminatorOrCallBr(F) && "Unsupported block terminator.");
bool Changed = false;
for (Cycle *TopCycle : CI.toplevel_cycles()) {
diff --git a/llvm/lib/Transforms/Utils/UnifyLoopExits.cpp b/llvm/lib/Transforms/Utils/UnifyLoopExits.cpp
index 9f338dbc78cff..51e5aaa5225e1 100644
--- a/llvm/lib/Transforms/Utils/UnifyLoopExits.cpp
+++ b/llvm/lib/Transforms/Utils/UnifyLoopExits.cpp
@@ -12,7 +12,11 @@
//
// Limitation: This assumes that all terminators in the CFG are direct branches
// (the "br" instruction). The presence of any other control flow
-// such as indirectbr, switch or callbr will cause an assert.
+// such as indirectbr ot switch will cause an assert.
+// The callbr terminator is supported by creating intermediate
+// target blocks that unconditionally branch to the original target
+// blocks. These intermediate target blocks can then be redirected
+// through the ControlFlowHub as usual.
//
//===----------------------------------------------------------------------===//
@@ -150,25 +154,53 @@ static bool unifyLoopExits(DominatorTree &DT, LoopInfo &LI, Loop *L) {
SmallVector<BasicBlock *, 8> ExitingBlocks;
L->getExitingBlocks(ExitingBlocks);
+ DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Eager);
+ SmallVector<BasicBlock *, 8> CallBrTargetBlocks;
// Redirect exiting edges through a control flow hub.
ControlFlowHub CHub;
- for (auto *BB : ExitingBlocks) {
- auto *Branch = cast<BranchInst>(BB->getTerminator());
- BasicBlock *Succ0 = Branch->getSuccessor(0);
- Succ0 = L->contains(Succ0) ? nullptr : Succ0;
-
- BasicBlock *Succ1 =
- Branch->isUnconditional() ? nullptr : Branch->getSuccessor(1);
- Succ1 = L->contains(Succ1) ? nullptr : Succ1;
- CHub.addBranch(BB, Succ0, Succ1);
-
- LLVM_DEBUG(dbgs() << "Added exiting branch: " << BB->getName() << " -> {"
- << (Succ0 ? Succ0->getName() : "<none>") << ", "
- << (Succ1 ? Succ1->getName() : "<none>") << "}\n");
+
+ for (unsigned I = 0; I < ExitingBlocks.size(); ++I) {
+ BasicBlock *BB = ExitingBlocks[I];
+ if (BranchInst *Branch = dyn_cast<BranchInst>(BB->getTerminator());
+ Branch) {
+ BasicBlock *Succ0 = Branch->getSuccessor(0);
+ Succ0 = L->contains(Succ0) ? nullptr : Succ0;
+
+ BasicBlock *Succ1 =
+ Branch->isUnconditional() ? nullptr : Branch->getSuccessor(1);
+ Succ1 = L->contains(Succ1) ? nullptr : Succ1;
+ CHub.addBranch(BB, Succ0, Succ1);
+
+ LLVM_DEBUG(dbgs() << "Added exiting branch: " << BB->getName() << " -> {"
+ << (Succ0 ? Succ0->getName() : "<none>") << ", "
+ << (Succ1 ? Succ1->getName() : "<none>") << "}\n");
+ } else if (CallBrInst *CallBr = dyn_cast<CallBrInst>(BB->getTerminator());
+ CallBr) {
+ for (unsigned J = 0; J < CallBr->getNumSuccessors(); ++J) {
+ BasicBlock *Succ = CallBr->getSuccessor(J);
+ if (L->contains(Succ))
+ continue;
+ BasicBlock *NewSucc = ControlFlowHub::createCallBrTarget(
+ CallBr, Succ, J, false, nullptr, &DTU, &LI);
+ // ExitingBlocks is later used to restore SSA, so we need to make sure
+ // that the blocks used for phi nodes in the guard blocks match the
+ // predecessors of the guard blocks, which, in the case of callbr, are
+ // the new intermediate target blocks instead of the callbr blocks
+ // themselves.
+ ExitingBlocks[I] = NewSucc;
+ CHub.addBranch(NewSucc, Succ);
+ LLVM_DEBUG(dbgs() << "Added exiting branch: " << NewSucc->getName()
+ << " -> " << Succ->getName() << "\n");
+ // Also add the new target block to the list of exiting blocks that
+ // should later be added to the parent loops.
+ CallBrTargetBlocks.push_back(NewSucc);
+ }
+ } else {
+ llvm_unreachable("Unsupported block terminator.");
+ }
}
SmallVector<BasicBlock *, 8> GuardBlocks;
- DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Eager);
Ba...
[truncated]
|
@llvm/pr-subscribers-llvm-transforms Author: Robert Imschweiler (ro-i) ChangesFirst batch of changes to add support for basic inline-asm callbr for the AMDGPU backend. Patch is 132.46 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/149308.diff 16 Files Affected:
diff --git a/llvm/include/llvm/Support/GenericLoopInfoImpl.h b/llvm/include/llvm/Support/GenericLoopInfoImpl.h
index 6fc508b0e0cca..8b7927357d57d 100644
--- a/llvm/include/llvm/Support/GenericLoopInfoImpl.h
+++ b/llvm/include/llvm/Support/GenericLoopInfoImpl.h
@@ -355,7 +355,7 @@ void LoopBase<BlockT, LoopT>::verifyLoop() const {
if (BB == getHeader()) {
assert(!OutsideLoopPreds.empty() && "Loop is unreachable!");
} else if (!OutsideLoopPreds.empty()) {
- // A non-header loop shouldn't be reachable from outside the loop,
+ // A non-header loop block shouldn't be reachable from outside the loop,
// though it is permitted if the predecessor is not itself actually
// reachable.
BlockT *EntryBB = &BB->getParent()->front();
diff --git a/llvm/include/llvm/Transforms/Utils/BasicBlockUtils.h b/llvm/include/llvm/Transforms/Utils/BasicBlockUtils.h
index 979f3b3eb72ff..fc7b313eb552a 100644
--- a/llvm/include/llvm/Transforms/Utils/BasicBlockUtils.h
+++ b/llvm/include/llvm/Transforms/Utils/BasicBlockUtils.h
@@ -607,10 +607,17 @@ LLVM_ABI bool SplitIndirectBrCriticalEdges(Function &F,
// successors
LLVM_ABI void InvertBranch(BranchInst *PBI, IRBuilderBase &Builder);
-// Check whether the function only has simple terminator:
+template <typename... TermInst>
+LLVM_ABI bool hasOnlyGivenTerminators(const Function &F);
+
+// Check whether the function only has blocks with simple terminators:
// br/brcond/unreachable/ret
LLVM_ABI bool hasOnlySimpleTerminator(const Function &F);
+// Check whether the function only has blocks with simple terminators
+// (br/brcond/unreachable/ret) or callbr.
+LLVM_ABI bool hasOnlySimpleTerminatorOrCallBr(const Function &F);
+
} // end namespace llvm
#endif // LLVM_TRANSFORMS_UTILS_BASICBLOCKUTILS_H
diff --git a/llvm/include/llvm/Transforms/Utils/ControlFlowUtils.h b/llvm/include/llvm/Transforms/Utils/ControlFlowUtils.h
index 810fef29f4010..e55efbc907d42 100644
--- a/llvm/include/llvm/Transforms/Utils/ControlFlowUtils.h
+++ b/llvm/include/llvm/Transforms/Utils/ControlFlowUtils.h
@@ -15,10 +15,13 @@
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringRef.h"
+#include "llvm/IR/CycleInfo.h"
namespace llvm {
class BasicBlock;
+class CallBrInst;
+class LoopInfo;
class DomTreeUpdater;
/// Given a set of branch descriptors [BB, Succ0, Succ1], create a "hub" such
@@ -104,7 +107,8 @@ struct ControlFlowHub {
: BB(BB), Succ0(Succ0), Succ1(Succ1) {}
};
- void addBranch(BasicBlock *BB, BasicBlock *Succ0, BasicBlock *Succ1) {
+ void addBranch(BasicBlock *BB, BasicBlock *Succ0,
+ BasicBlock *Succ1 = nullptr) {
assert(BB);
assert(Succ0 || Succ1);
Branches.emplace_back(BB, Succ0, Succ1);
@@ -118,6 +122,35 @@ struct ControlFlowHub {
std::optional<unsigned> MaxControlFlowBooleans = std::nullopt);
SmallVector<BranchDescriptor> Branches;
+
+ /**
+ * \brief Create a new intermediate target block for a callbr edge.
+ *
+ * This function creates a new basic block (the "target block") that sits
+ * between a callbr instruction and one of its successors. The callbr's
+ * successor is rewired to this new block, and the new block unconditionally
+ * branches to the original successor. This is useful for normalizing control
+ * flow, e.g., when transforming irreducible loops.
+ *
+ * \param CallBr The callbr instruction whose edge is to be split.
+ * \param Succ The original successor basic block to be reached.
+ * \param SuccIdx The index of the successor in the callbr instruction.
+ * \param AttachToCallBr If true, the new block is associated with the
+ * callbr's parent for loop/cycle info. If false, the new block is associated
+ * with the callbr's successor for loop/cycle info. \param CI Optional
+ * CycleInfo for updating cycle membership. \param DTU Optional
+ * DomTreeUpdater for updating the dominator tree. \param LI Optional LoopInfo
+ * for updating loop membership.
+ *
+ * \returns The newly created intermediate target block.
+ *
+ * \note This function updates PHI nodes, dominator tree, loop info, and cycle
+ * info as needed.
+ */
+ static BasicBlock *
+ createCallBrTarget(CallBrInst *CallBr, BasicBlock *Succ, unsigned SuccIdx,
+ bool AttachToCallBr = true, CycleInfo *CI = nullptr,
+ DomTreeUpdater *DTU = nullptr, LoopInfo *LI = nullptr);
};
} // end namespace llvm
diff --git a/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp b/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp
index c8255742c41ba..6103d07212fc2 100644
--- a/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp
+++ b/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp
@@ -1766,12 +1766,21 @@ void llvm::InvertBranch(BranchInst *PBI, IRBuilderBase &Builder) {
PBI->swapSuccessors();
}
-bool llvm::hasOnlySimpleTerminator(const Function &F) {
+template <typename... TermInst>
+bool llvm::hasOnlyGivenTerminators(const Function &F) {
for (auto &BB : F) {
auto *Term = BB.getTerminator();
- if (!(isa<ReturnInst>(Term) || isa<UnreachableInst>(Term) ||
- isa<BranchInst>(Term)))
+ if (!(isa<TermInst>(Term) || ...))
return false;
}
return true;
}
+
+bool llvm::hasOnlySimpleTerminator(const Function &F) {
+ return hasOnlyGivenTerminators<ReturnInst, UnreachableInst, BranchInst>(F);
+}
+
+bool llvm::hasOnlySimpleTerminatorOrCallBr(const Function &F) {
+ return hasOnlyGivenTerminators<ReturnInst, UnreachableInst, BranchInst,
+ CallBrInst>(F);
+}
\ No newline at end of file
diff --git a/llvm/lib/Transforms/Utils/ControlFlowUtils.cpp b/llvm/lib/Transforms/Utils/ControlFlowUtils.cpp
index 4b0065d0030cd..f7197a68813dd 100644
--- a/llvm/lib/Transforms/Utils/ControlFlowUtils.cpp
+++ b/llvm/lib/Transforms/Utils/ControlFlowUtils.cpp
@@ -14,6 +14,7 @@
#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/SmallSet.h"
#include "llvm/Analysis/DomTreeUpdater.h"
+#include "llvm/Analysis/LoopInfo.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/ValueHandle.h"
@@ -282,7 +283,9 @@ std::pair<BasicBlock *, bool> ControlFlowHub::finalize(
for (auto [BB, Succ0, Succ1] : Branches) {
#ifndef NDEBUG
- assert(Incoming.insert(BB).second && "Duplicate entry for incoming block.");
+ assert(
+ (Incoming.insert(BB).second || isa<CallBrInst>(BB->getTerminator())) &&
+ "Duplicate entry for incoming block.");
#endif
if (Succ0)
Outgoing.insert(Succ0);
@@ -342,3 +345,55 @@ std::pair<BasicBlock *, bool> ControlFlowHub::finalize(
return {FirstGuardBlock, true};
}
+
+BasicBlock *ControlFlowHub::createCallBrTarget(
+ CallBrInst *CallBr, BasicBlock *Succ, unsigned SuccIdx, bool AttachToCallBr,
+ CycleInfo *CI, DomTreeUpdater *DTU, LoopInfo *LI) {
+ BasicBlock *CallBrBlock = CallBr->getParent();
+ BasicBlock *CallBrTarget =
+ BasicBlock::Create(CallBrBlock->getContext(),
+ CallBrBlock->getName() + ".target." + Succ->getName(),
+ CallBrBlock->getParent());
+ // Rewire control flow from callbr to the new target block.
+ Succ->replacePhiUsesWith(CallBrBlock, CallBrTarget);
+ CallBr->setSuccessor(SuccIdx, CallBrTarget);
+ // Jump from the new target block to the original successor.
+ BranchInst::Create(Succ, CallBrTarget);
+ if (LI) {
+ if (Loop *L = LI->getLoopFor(AttachToCallBr ? CallBrBlock : Succ); L) {
+ bool AddToLoop = true;
+ if (AttachToCallBr) {
+ // Check if the loops are disjoint. In that case, we do not add the
+ // intermediate target to any loop.
+ if (auto *LL = LI->getLoopFor(Succ);
+ LL && !L->contains(LL) && !LL->contains(L))
+ AddToLoop = false;
+ }
+ if (AddToLoop)
+ L->addBasicBlockToLoop(CallBrTarget, *LI);
+ }
+ }
+ if (CI) {
+ if (auto *C = CI->getCycle(AttachToCallBr ? CallBrBlock : Succ); C) {
+ bool AddToCycle = true;
+ if (AttachToCallBr) {
+ // Check if the cycles are disjoint. In that case, we do not add the
+ // intermediate target to any cycle.
+ if (auto *CC = CI->getCycle(Succ); CC) {
+ auto *CommonC = CI->getSmallestCommonCycle(C, CC);
+ if (CommonC != C && CommonC != CC)
+ AddToCycle = false;
+ }
+ }
+ if (AddToCycle)
+ CI->addBlockToCycle(CallBrTarget, C);
+ }
+ }
+ if (DTU) {
+ DTU->applyUpdates({{DominatorTree::Insert, CallBrBlock, CallBrTarget}});
+ if (DTU->getDomTree().dominates(CallBrBlock, Succ))
+ DTU->applyUpdates({{DominatorTree::Delete, CallBrBlock, Succ},
+ {DominatorTree::Insert, CallBrTarget, Succ}});
+ }
+ return CallBrTarget;
+}
\ No newline at end of file
diff --git a/llvm/lib/Transforms/Utils/FixIrreducible.cpp b/llvm/lib/Transforms/Utils/FixIrreducible.cpp
index 45e1d12c2bfff..ade23f942352d 100644
--- a/llvm/lib/Transforms/Utils/FixIrreducible.cpp
+++ b/llvm/lib/Transforms/Utils/FixIrreducible.cpp
@@ -79,6 +79,53 @@
// Limitation: The pass cannot handle switch statements and indirect
// branches. Both must be lowered to plain branches first.
//
+// CallBr support: CallBr is handled as a more general branch instruction which
+// can have multiple successors. The pass redirects the edges to intermediate
+// target blocks that unconditionally branch to the original callbr target
+// blocks. This allows the control flow hub to know to which of the original
+// target blocks to jump to.
+// Example input CFG:
+// Entry (callbr)
+// / \
+// v v
+// H ----> B
+// ^ /|
+// `----' |
+// v
+// Exit
+//
+// becomes:
+// Entry (callbr)
+// / \
+// v v
+// target.H target.B
+// | |
+// v v
+// H ----> B
+// ^ /|
+// `----' |
+// v
+// Exit
+//
+// Note
+// OUTPUT CFG: Converted to a natural loop with a new header N.
+//
+// Entry (callbr)
+// / \
+// v v
+// target.H target.B
+// \ /
+// \ /
+// v v
+// N <---.
+// / \ \
+// / \ |
+// v v /
+// H --> B --'
+// |
+// v
+// Exit
+//
//===----------------------------------------------------------------------===//
#include "llvm/Transforms/Utils/FixIrreducible.h"
@@ -231,6 +278,7 @@ static bool fixIrreducible(Cycle &C, CycleInfo &CI, DominatorTree &DT,
return false;
LLVM_DEBUG(dbgs() << "Processing cycle:\n" << CI.print(&C) << "\n";);
+ DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Eager);
ControlFlowHub CHub;
SetVector<BasicBlock *> Predecessors;
@@ -242,18 +290,33 @@ static bool fixIrreducible(Cycle &C, CycleInfo &CI, DominatorTree &DT,
}
for (BasicBlock *P : Predecessors) {
- auto *Branch = cast<BranchInst>(P->getTerminator());
- // Exactly one of the two successors is the header.
- BasicBlock *Succ0 = Branch->getSuccessor(0) == Header ? Header : nullptr;
- BasicBlock *Succ1 = Succ0 ? nullptr : Header;
- if (!Succ0)
- assert(Branch->getSuccessor(1) == Header);
- assert(Succ0 || Succ1);
- CHub.addBranch(P, Succ0, Succ1);
-
- LLVM_DEBUG(dbgs() << "Added internal branch: " << P->getName() << " -> "
- << (Succ0 ? Succ0->getName() : "") << " "
- << (Succ1 ? Succ1->getName() : "") << "\n");
+ if (BranchInst *Branch = dyn_cast<BranchInst>(P->getTerminator()); Branch) {
+ // Exactly one of the two successors is the header.
+ BasicBlock *Succ0 = Branch->getSuccessor(0) == Header ? Header : nullptr;
+ BasicBlock *Succ1 = Succ0 ? nullptr : Header;
+ if (!Succ0)
+ assert(Branch->getSuccessor(1) == Header);
+ assert(Succ0 || Succ1);
+ CHub.addBranch(P, Succ0, Succ1);
+
+ LLVM_DEBUG(dbgs() << "Added internal branch: " << P->getName() << " -> "
+ << (Succ0 ? Succ0->getName() : "") << " "
+ << (Succ1 ? Succ1->getName() : "") << "\n");
+ } else if (CallBrInst *CallBr = dyn_cast<CallBrInst>(P->getTerminator());
+ CallBr) {
+ for (unsigned I = 0; I < CallBr->getNumSuccessors(); ++I) {
+ BasicBlock *Succ = CallBr->getSuccessor(I);
+ if (Succ != Header)
+ continue;
+ BasicBlock *NewSucc = llvm::ControlFlowHub::createCallBrTarget(
+ CallBr, Succ, I, false, &CI, &DTU, LI);
+ CHub.addBranch(NewSucc, Succ);
+ LLVM_DEBUG(dbgs() << "Added internal branch: " << NewSucc->getName()
+ << " -> " << Succ->getName() << "\n");
+ }
+ } else {
+ llvm_unreachable("Unsupported block terminator.");
+ }
}
// Redirect external incoming edges. This includes the edges on the header.
@@ -266,17 +329,32 @@ static bool fixIrreducible(Cycle &C, CycleInfo &CI, DominatorTree &DT,
}
for (BasicBlock *P : Predecessors) {
- auto *Branch = cast<BranchInst>(P->getTerminator());
- BasicBlock *Succ0 = Branch->getSuccessor(0);
- Succ0 = C.contains(Succ0) ? Succ0 : nullptr;
- BasicBlock *Succ1 =
- Branch->isUnconditional() ? nullptr : Branch->getSuccessor(1);
- Succ1 = Succ1 && C.contains(Succ1) ? Succ1 : nullptr;
- CHub.addBranch(P, Succ0, Succ1);
-
- LLVM_DEBUG(dbgs() << "Added external branch: " << P->getName() << " -> "
- << (Succ0 ? Succ0->getName() : "") << " "
- << (Succ1 ? Succ1->getName() : "") << "\n");
+ if (BranchInst *Branch = dyn_cast<BranchInst>(P->getTerminator()); Branch) {
+ BasicBlock *Succ0 = Branch->getSuccessor(0);
+ Succ0 = C.contains(Succ0) ? Succ0 : nullptr;
+ BasicBlock *Succ1 =
+ Branch->isUnconditional() ? nullptr : Branch->getSuccessor(1);
+ Succ1 = Succ1 && C.contains(Succ1) ? Succ1 : nullptr;
+ CHub.addBranch(P, Succ0, Succ1);
+
+ LLVM_DEBUG(dbgs() << "Added external branch: " << P->getName() << " -> "
+ << (Succ0 ? Succ0->getName() : "") << " "
+ << (Succ1 ? Succ1->getName() : "") << "\n");
+ } else if (CallBrInst *CallBr = dyn_cast<CallBrInst>(P->getTerminator());
+ CallBr) {
+ for (unsigned I = 0; I < CallBr->getNumSuccessors(); ++I) {
+ BasicBlock *Succ = CallBr->getSuccessor(I);
+ if (!C.contains(Succ))
+ continue;
+ BasicBlock *NewSucc = llvm::ControlFlowHub::createCallBrTarget(
+ CallBr, Succ, I, true, &CI, &DTU, LI);
+ CHub.addBranch(NewSucc, Succ);
+ LLVM_DEBUG(dbgs() << "Added external branch: " << NewSucc->getName()
+ << " -> " << Succ->getName() << "\n");
+ }
+ } else {
+ llvm_unreachable("Unsupported block terminator.");
+ }
}
// Redirect all the backedges through a "hub" consisting of a series
@@ -292,7 +370,6 @@ static bool fixIrreducible(Cycle &C, CycleInfo &CI, DominatorTree &DT,
SetVector<BasicBlock *> Entries;
Entries.insert(C.entry_rbegin(), C.entry_rend());
- DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Eager);
CHub.finalize(&DTU, GuardBlocks, "irr");
#if defined(EXPENSIVE_CHECKS)
assert(DT.verify(DominatorTree::VerificationLevel::Full));
@@ -325,7 +402,7 @@ static bool FixIrreducibleImpl(Function &F, CycleInfo &CI, DominatorTree &DT,
LLVM_DEBUG(dbgs() << "===== Fix irreducible control-flow in function: "
<< F.getName() << "\n");
- assert(hasOnlySimpleTerminator(F) && "Unsupported block terminator.");
+ assert(hasOnlySimpleTerminatorOrCallBr(F) && "Unsupported block terminator.");
bool Changed = false;
for (Cycle *TopCycle : CI.toplevel_cycles()) {
diff --git a/llvm/lib/Transforms/Utils/UnifyLoopExits.cpp b/llvm/lib/Transforms/Utils/UnifyLoopExits.cpp
index 9f338dbc78cff..51e5aaa5225e1 100644
--- a/llvm/lib/Transforms/Utils/UnifyLoopExits.cpp
+++ b/llvm/lib/Transforms/Utils/UnifyLoopExits.cpp
@@ -12,7 +12,11 @@
//
// Limitation: This assumes that all terminators in the CFG are direct branches
// (the "br" instruction). The presence of any other control flow
-// such as indirectbr, switch or callbr will cause an assert.
+// such as indirectbr ot switch will cause an assert.
+// The callbr terminator is supported by creating intermediate
+// target blocks that unconditionally branch to the original target
+// blocks. These intermediate target blocks can then be redirected
+// through the ControlFlowHub as usual.
//
//===----------------------------------------------------------------------===//
@@ -150,25 +154,53 @@ static bool unifyLoopExits(DominatorTree &DT, LoopInfo &LI, Loop *L) {
SmallVector<BasicBlock *, 8> ExitingBlocks;
L->getExitingBlocks(ExitingBlocks);
+ DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Eager);
+ SmallVector<BasicBlock *, 8> CallBrTargetBlocks;
// Redirect exiting edges through a control flow hub.
ControlFlowHub CHub;
- for (auto *BB : ExitingBlocks) {
- auto *Branch = cast<BranchInst>(BB->getTerminator());
- BasicBlock *Succ0 = Branch->getSuccessor(0);
- Succ0 = L->contains(Succ0) ? nullptr : Succ0;
-
- BasicBlock *Succ1 =
- Branch->isUnconditional() ? nullptr : Branch->getSuccessor(1);
- Succ1 = L->contains(Succ1) ? nullptr : Succ1;
- CHub.addBranch(BB, Succ0, Succ1);
-
- LLVM_DEBUG(dbgs() << "Added exiting branch: " << BB->getName() << " -> {"
- << (Succ0 ? Succ0->getName() : "<none>") << ", "
- << (Succ1 ? Succ1->getName() : "<none>") << "}\n");
+
+ for (unsigned I = 0; I < ExitingBlocks.size(); ++I) {
+ BasicBlock *BB = ExitingBlocks[I];
+ if (BranchInst *Branch = dyn_cast<BranchInst>(BB->getTerminator());
+ Branch) {
+ BasicBlock *Succ0 = Branch->getSuccessor(0);
+ Succ0 = L->contains(Succ0) ? nullptr : Succ0;
+
+ BasicBlock *Succ1 =
+ Branch->isUnconditional() ? nullptr : Branch->getSuccessor(1);
+ Succ1 = L->contains(Succ1) ? nullptr : Succ1;
+ CHub.addBranch(BB, Succ0, Succ1);
+
+ LLVM_DEBUG(dbgs() << "Added exiting branch: " << BB->getName() << " -> {"
+ << (Succ0 ? Succ0->getName() : "<none>") << ", "
+ << (Succ1 ? Succ1->getName() : "<none>") << "}\n");
+ } else if (CallBrInst *CallBr = dyn_cast<CallBrInst>(BB->getTerminator());
+ CallBr) {
+ for (unsigned J = 0; J < CallBr->getNumSuccessors(); ++J) {
+ BasicBlock *Succ = CallBr->getSuccessor(J);
+ if (L->contains(Succ))
+ continue;
+ BasicBlock *NewSucc = ControlFlowHub::createCallBrTarget(
+ CallBr, Succ, J, false, nullptr, &DTU, &LI);
+ // ExitingBlocks is later used to restore SSA, so we need to make sure
+ // that the blocks used for phi nodes in the guard blocks match the
+ // predecessors of the guard blocks, which, in the case of callbr, are
+ // the new intermediate target blocks instead of the callbr blocks
+ // themselves.
+ ExitingBlocks[I] = NewSucc;
+ CHub.addBranch(NewSucc, Succ);
+ LLVM_DEBUG(dbgs() << "Added exiting branch: " << NewSucc->getName()
+ << " -> " << Succ->getName() << "\n");
+ // Also add the new target block to the list of exiting blocks that
+ // should later be added to the parent loops.
+ CallBrTargetBlocks.push_back(NewSucc);
+ }
+ } else {
+ llvm_unreachable("Unsupported block terminator.");
+ }
}
SmallVector<BasicBlock *, 8> GuardBlocks;
- DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Eager);
Ba...
[truncated]
|
@@ -325,7 +402,7 @@ static bool FixIrreducibleImpl(Function &F, CycleInfo &CI, DominatorTree &DT, | |||
LLVM_DEBUG(dbgs() << "===== Fix irreducible control-flow in function: " | |||
<< F.getName() << "\n"); | |||
|
|||
assert(hasOnlySimpleTerminator(F) && "Unsupported block terminator."); | |||
assert(hasOnlySimpleTerminatorOrCallBr(F) && "Unsupported block terminator."); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Is this assert really necessary, or will it hit one of the later asserts when a particular block is processed?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Only if those blocks are processed while dealing with a loop/cycle.
However, they wouldn't actually be a problem if used completely unrelated to any loop/cycle.
Ig that it's more for documenting the restriction of the pass inside the code as well.
if (!ParentLoop->contains(C)) | ||
ParentLoop->addBasicBlockToLoop(C, LI); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This seems like a double map lookup pattern, is there a way to just add without prechecking if it's already in the loop?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think that I could alternatively do something like:
for (auto *C : CallBrTargetBlocks) {
if (LI-getLoopFor(C->getSingleSuccessor()) != ParentLoop)
ParentLoop->addBasicBlockToLoop(C, LI);
}
However, I'm not sure whether that would be 100% correct. What if ParentLoop
is a parent loop of LI-getLoopFor(C->getSingleSuccessor())
--- we would have to check for that as well.
Co-authored-by: Matt Arsenault <[email protected]>
First batch of changes to add support for basic inline-asm callbr for the AMDGPU backend.