Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
89 changes: 54 additions & 35 deletions llvm/lib/Target/AMDGPU/AMDGPUUnifyDivergentExitNodes.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -181,14 +181,52 @@ BasicBlock *AMDGPUUnifyDivergentExitNodesImpl::unifyReturnBlockSet(
return NewRetBlock;
}

static BasicBlock *
createDummyReturnBlock(Function &F,
SmallVector<BasicBlock *, 4> &ReturningBlocks) {
BasicBlock *DummyReturnBB =
BasicBlock::Create(F.getContext(), "DummyReturnBlock", &F);
Type *RetTy = F.getReturnType();
Value *RetVal = RetTy->isVoidTy() ? nullptr : PoisonValue::get(RetTy);
ReturnInst::Create(F.getContext(), RetVal, DummyReturnBB);
ReturningBlocks.push_back(DummyReturnBB);
return DummyReturnBB;
}

/// Handle conditional branch instructions (-> 2 targets) and callbr
/// instructions with N targets.
static void handleNBranch(Function &F, BasicBlock *BB, Instruction *BI,
BasicBlock *DummyReturnBB,
std::vector<DominatorTree::UpdateType> &Updates) {
SmallVector<BasicBlock *, 2> Successors(successors(BB));

// Create a new transition block to hold the conditional branch.
BasicBlock *TransitionBB = BB->splitBasicBlock(BI, "TransitionBlock");

Updates.reserve(Updates.size() + 2 * Successors.size() + 2);

// 'Successors' become successors of TransitionBB instead of BB,
// and TransitionBB becomes a single successor of BB.
Updates.emplace_back(DominatorTree::Insert, BB, TransitionBB);
for (BasicBlock *Successor : Successors) {
Updates.emplace_back(DominatorTree::Insert, TransitionBB, Successor);
Updates.emplace_back(DominatorTree::Delete, BB, Successor);
}

// Create a branch that will always branch to the transition block and
// references DummyReturnBB.
BB->getTerminator()->eraseFromParent();
BranchInst::Create(TransitionBB, DummyReturnBB,
ConstantInt::getTrue(F.getContext()), BB);
Updates.emplace_back(DominatorTree::Insert, BB, DummyReturnBB);
}

bool AMDGPUUnifyDivergentExitNodesImpl::run(Function &F, DominatorTree *DT,
const PostDominatorTree &PDT,
const UniformityInfo &UA) {
assert(hasOnlySimpleTerminator(F) && "Unsupported block terminator.");

if (PDT.root_size() == 0 ||
(PDT.root_size() == 1 &&
!isa<BranchInst>(PDT.getRoot()->getTerminator())))
!isa<BranchInst, CallBrInst>(PDT.getRoot()->getTerminator())))
return false;

// Loop over all of the blocks in a function, tracking all of the blocks that
Expand Down Expand Up @@ -222,46 +260,27 @@ bool AMDGPUUnifyDivergentExitNodesImpl::run(Function &F, DominatorTree *DT,
if (HasDivergentExitBlock)
UnreachableBlocks.push_back(BB);
} else if (BranchInst *BI = dyn_cast<BranchInst>(BB->getTerminator())) {

ConstantInt *BoolTrue = ConstantInt::getTrue(F.getContext());
if (DummyReturnBB == nullptr) {
DummyReturnBB = BasicBlock::Create(F.getContext(),
"DummyReturnBlock", &F);
Type *RetTy = F.getReturnType();
Value *RetVal = RetTy->isVoidTy() ? nullptr : PoisonValue::get(RetTy);
ReturnInst::Create(F.getContext(), RetVal, DummyReturnBB);
ReturningBlocks.push_back(DummyReturnBB);
}
if (!DummyReturnBB)
DummyReturnBB = createDummyReturnBlock(F, ReturningBlocks);

if (BI->isUnconditional()) {
BasicBlock *LoopHeaderBB = BI->getSuccessor(0);
BI->eraseFromParent(); // Delete the unconditional branch.
// Add a new conditional branch with a dummy edge to the return block.
BranchInst::Create(LoopHeaderBB, DummyReturnBB, BoolTrue, BB);
Updates.emplace_back(DominatorTree::Insert, BB, DummyReturnBB);
} else { // Conditional branch.
SmallVector<BasicBlock *, 2> Successors(successors(BB));

// Create a new transition block to hold the conditional branch.
BasicBlock *TransitionBB = BB->splitBasicBlock(BI, "TransitionBlock");

Updates.reserve(Updates.size() + 2 * Successors.size() + 2);

// 'Successors' become successors of TransitionBB instead of BB,
// and TransitionBB becomes a single successor of BB.
Updates.emplace_back(DominatorTree::Insert, BB, TransitionBB);
for (BasicBlock *Successor : Successors) {
Updates.emplace_back(DominatorTree::Insert, TransitionBB, Successor);
Updates.emplace_back(DominatorTree::Delete, BB, Successor);
}

// Create a branch that will always branch to the transition block and
// references DummyReturnBB.
BB->getTerminator()->eraseFromParent();
BranchInst::Create(TransitionBB, DummyReturnBB, BoolTrue, BB);
BranchInst::Create(LoopHeaderBB, DummyReturnBB,
ConstantInt::getTrue(F.getContext()), BB);
Updates.emplace_back(DominatorTree::Insert, BB, DummyReturnBB);
} else {
handleNBranch(F, BB, BI, DummyReturnBB, Updates);
}
Changed = true;
} else if (CallBrInst *CBI = dyn_cast<CallBrInst>(BB->getTerminator())) {
if (!DummyReturnBB)
DummyReturnBB = createDummyReturnBlock(F, ReturningBlocks);

handleNBranch(F, BB, CBI, DummyReturnBB, Updates);
} else {
llvm_unreachable("unsupported block terminator");
}
}

Expand Down
20 changes: 12 additions & 8 deletions llvm/lib/Transforms/Scalar/StructurizeCFG.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -554,11 +554,10 @@ void StructurizeCFG::analyzeLoops(RegionNode *N) {
} else {
// Test for successors as back edge
BasicBlock *BB = N->getNodeAs<BasicBlock>();
BranchInst *Term = cast<BranchInst>(BB->getTerminator());

for (BasicBlock *Succ : Term->successors())
if (Visited.count(Succ))
Loops[Succ] = BB;
if (BranchInst *Term = dyn_cast<BranchInst>(BB->getTerminator()))
for (BasicBlock *Succ : Term->successors())
if (Visited.count(Succ))
Loops[Succ] = BB;
}
}

Expand Down Expand Up @@ -590,7 +589,7 @@ void StructurizeCFG::gatherPredicates(RegionNode *N) {

for (BasicBlock *P : predecessors(BB)) {
// Ignore it if it's a branch from outside into our region entry
if (!ParentRegion->contains(P))
if (!ParentRegion->contains(P) || !dyn_cast<BranchInst>(P->getTerminator()))
continue;

Region *R = RI->getRegionFor(P);
Expand Down Expand Up @@ -1397,13 +1396,18 @@ bool StructurizeCFG::makeUniformRegion(Region *R, UniformityInfo &UA) {
/// Run the transformation for each region found
bool StructurizeCFG::run(Region *R, DominatorTree *DT,
const TargetTransformInfo *TTI) {
if (R->isTopLevelRegion())
// CallBr and its corresponding direct target blocks are for now ignored by
// this pass. This is not a limitation for the currently intended uses cases
// of callbr in the AMDGPU backend (cf.
// https://discourse.llvm.org/t/rfc-add-callbr-intrinsic-support/86087).
// Parent and child regions are not affected by this (current) restriction.
// See `llvm/test/Transforms/StructurizeCFG/callbr.ll` for details.
if (R->isTopLevelRegion() || isa<CallBrInst>(R->getEntry()->getTerminator()))
return false;

this->DT = DT;
this->TTI = TTI;
Func = R->getEntry()->getParent();
assert(hasOnlySimpleTerminator(*Func) && "Unsupported block terminator.");

ParentRegion = R;

Expand Down
54 changes: 54 additions & 0 deletions llvm/test/CodeGen/AMDGPU/callbr.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
; RUN: llc -mtriple=amdgcn -mcpu=gfx90a < %s | FileCheck %s

define void @callbr_inline_asm(ptr %src, ptr %dst1, ptr %dst2, i32 %c) {
; CHECK-LABEL: callbr_inline_asm:
; CHECK: ; %bb.0:
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CHECK-NEXT: flat_load_dword v0, v[0:1]
; CHECK-NEXT: ;;#ASMSTART
; CHECK-NEXT: v_cmp_gt_i32 vcc v6, 42; s_cbranch_vccnz .LBB0_2
; CHECK-NEXT: ;;#ASMEND
; CHECK-NEXT: ; %bb.1: ; %fallthrough
; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
; CHECK-NEXT: flat_store_dword v[2:3], v0
; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
; CHECK-NEXT: s_setpc_b64 s[30:31]
; CHECK-NEXT: .LBB0_2: ; Inline asm indirect target
; CHECK-NEXT: ; %indirect
; CHECK-NEXT: ; Label of block must be emitted
; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
; CHECK-NEXT: flat_store_dword v[4:5], v0
; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
; CHECK-NEXT: s_setpc_b64 s[30:31]
%a = load i32, ptr %src, align 4
callbr void asm "v_cmp_gt_i32 vcc $0, 42; s_cbranch_vccnz ${1:l}", "r,!i"(i32 %c) to label %fallthrough [label %indirect]
fallthrough:
store i32 %a, ptr %dst1, align 4
br label %ret
indirect:
store i32 %a, ptr %dst2, align 4
br label %ret
ret:
ret void
}

define void @callbr_self_loop(i1 %c) {
; CHECK-LABEL: callbr_self_loop:
; CHECK: ; %bb.0:
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CHECK-NEXT: .LBB1_1: ; %callbr
; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1
; CHECK-NEXT: ;;#ASMSTART
; CHECK-NEXT: ;;#ASMEND
; CHECK-NEXT: s_branch .LBB1_1
; CHECK-NEXT: .LBB1_2: ; Inline asm indirect target
; CHECK-NEXT: ; %callbr.target.ret
; CHECK-NEXT: ; Label of block must be emitted
; CHECK-NEXT: s_setpc_b64 s[30:31]
br label %callbr
callbr:
callbr void asm "", "!i"() to label %callbr [label %ret]
ret:
ret void
}
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@

declare void @foo(ptr)
declare i1 @bar(ptr)
declare i32 @bar32(ptr)

define void @musttail_call_without_return_value(ptr %p) {
; CHECK-LABEL: define void @musttail_call_without_return_value(
Expand All @@ -28,6 +29,31 @@ bb.1:
ret void
}

define void @musttail_call_without_return_value_callbr(ptr %p) {
; CHECK-LABEL: define void @musttail_call_without_return_value_callbr(
; CHECK-SAME: ptr [[P:%.*]]) #[[ATTR0]] {
; CHECK-NEXT: [[ENTRY:.*:]]
; CHECK-NEXT: [[LOAD:%.*]] = load i32, ptr [[P]], align 1
; CHECK-NEXT: callbr void asm "", "r,!i"(i32 [[LOAD]])
; CHECK-NEXT: to label %[[BB_0:.*]] [label %bb.1]
; CHECK: [[BB_0]]:
; CHECK-NEXT: musttail call void @foo(ptr [[P]])
; CHECK-NEXT: ret void
; CHECK: [[BB_1:.*:]]
; CHECK-NEXT: ret void
;
entry:
%load = load i32, ptr %p, align 1
callbr void asm "", "r,!i"(i32 %load) to label %bb.0 [label %bb.1]

bb.0:
musttail call void @foo(ptr %p)
ret void

bb.1:
ret void
}

define i1 @musttail_call_with_return_value(ptr %p) {
; CHECK-LABEL: define i1 @musttail_call_with_return_value(
; CHECK-SAME: ptr [[P:%.*]]) #[[ATTR0]] {
Expand All @@ -51,3 +77,28 @@ bb.0:
bb.1:
ret i1 %load
}

define i32 @musttail_call_with_return_value_callbr(ptr %p) {
; CHECK-LABEL: define i32 @musttail_call_with_return_value_callbr(
; CHECK-SAME: ptr [[P:%.*]]) #[[ATTR0]] {
; CHECK-NEXT: [[ENTRY:.*:]]
; CHECK-NEXT: [[LOAD:%.*]] = load i32, ptr [[P]], align 1
; CHECK-NEXT: callbr void asm "", "r,!i"(i32 [[LOAD]])
; CHECK-NEXT: to label %[[BB_0:.*]] [label %bb.1]
; CHECK: [[BB_0]]:
; CHECK-NEXT: [[RET:%.*]] = musttail call i32 @bar32(ptr [[P]])
; CHECK-NEXT: ret i32 [[RET]]
; CHECK: [[BB_1:.*:]]
; CHECK-NEXT: ret i32 [[LOAD]]
;
entry:
%load = load i32, ptr %p, align 1
callbr void asm "", "r,!i"(i32 %load) to label %bb.0 [label %bb.1]

bb.0:
%ret = musttail call i32 @bar32(ptr %p)
ret i32 %ret

bb.1:
ret i32 %load
}
Loading