Skip to content

[DirectX] Undo SimplifyCFG by reversing select into if else basic blocks #153858

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 1 commit into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
90 changes: 90 additions & 0 deletions llvm/lib/Target/DirectX/DXILLegalizePass.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,11 @@
#include "llvm/IR/InstIterator.h"
#include "llvm/IR/Instruction.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/IntrinsicsDirectX.h"
#include "llvm/IR/Module.h"
#include "llvm/Pass.h"
#include "llvm/Support/Casting.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include <functional>

Expand Down Expand Up @@ -622,6 +625,92 @@ legalizeScalarLoadStoreOnArrays(Instruction &I,
return true;
}

// Note: Legalization to undo SimplifyCFG. Ideally, SimplifyCFG's
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

TODO: fix comment grammar, make shorter.

// TargetTransformInfo would ignore our resource intrinsics, but
// it doesn't. This Works for a single select; multiple selects on
// raw buffer loads won’t be legalized. The hasOneUser for ExtractValueInst
// of dx_resource_load_rawbuffer is enforced in DXILOpLowering,
// so checking it here is fine.
static bool
legalizeBuffLoadSelectCalls(Instruction &I,
SmallVectorImpl<Instruction *> &ToRemove,
DenseMap<Value *, Value *> &) {
// Check if this is a dx_resource_load_rawbuffer intrinsic
auto *II = dyn_cast<IntrinsicInst>(&I);
if (!II || II->getIntrinsicID() != Intrinsic::dx_resource_load_rawbuffer)
return false;

// Check if the first argument is a select instruction
Value *Arg0 = II->getArgOperand(0);
auto *Sel = dyn_cast<SelectInst>(Arg0);
if (!Sel)
return false;

if (!II->hasOneUser())
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why is this check necessary?

Copy link
Member Author

@farzonl farzonl Aug 16, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

replaceResRetUses requires that ExtractValueInst be the only use. If it has more than this then it isn't the pattern we are trying to legalize.

assert(OldResult->hasOneUse() &&
isa<ExtractValueInst>(*OldResult->user_begin()) &&

return false;
ExtractValueInst *EVI = dyn_cast<ExtractValueInst>(*II->user_begin());
if (!EVI->hasOneUser())
return false;
auto *StoreII = dyn_cast<IntrinsicInst>(*EVI->user_begin());
if (!StoreII ||
StoreII->getIntrinsicID() != Intrinsic::dx_resource_store_rawbuffer)
return false;

BasicBlock *BB = II->getParent();
Function *F = BB->getParent();
IRBuilder<> Builder(StoreII);

// Create new basic blocks
BasicBlock *ThenBB = BasicBlock::Create(F->getContext(), "rawbuf.if", F);
BasicBlock *ElseBB = BasicBlock::Create(F->getContext(), "rawbuf.else", F);
BasicBlock *ContinueBB =
BB->splitBasicBlock(std::next(II->getIterator()), "rawbuf.continue");

// Remove the unconditional branch
BB->getTerminator()->eraseFromParent();

// Create a conditional branch based on the select condition
Builder.SetInsertPoint(BB);
Builder.CreateCondBr(Sel->getCondition(), ThenBB, ElseBB);

// Create the true path
Builder.SetInsertPoint(ThenBB);
Instruction *TrueResourceLoad = II->clone();
Instruction *TrueExtract = EVI->clone();
Instruction *TrueResourceStore = StoreII->clone();
TrueResourceLoad->setOperand(0, Sel->getTrueValue());
TrueExtract->setOperand(0, TrueResourceLoad);
TrueResourceStore->setOperand(3, TrueExtract);
Builder.Insert(TrueResourceLoad);
Builder.Insert(TrueExtract);
Builder.Insert(TrueResourceStore);
Builder.CreateBr(ContinueBB);

// Create the false path
Builder.SetInsertPoint(ElseBB);
Instruction *FalseResourceLoad = II->clone();
Instruction *FalseExtract = EVI->clone();
Instruction *FalseResourceStore = StoreII->clone();
FalseResourceLoad->setOperand(0, Sel->getFalseValue());
FalseExtract->setOperand(0, FalseResourceLoad);
FalseResourceStore->setOperand(3, FalseExtract);
Builder.Insert(FalseResourceLoad);
Builder.Insert(FalseExtract);
Builder.Insert(FalseResourceStore);
Builder.CreateBr(ContinueBB);

// Set up the merge block
Builder.SetInsertPoint(ContinueBB);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I understand that we want to set the insert point at the continue BB. But the position of the continue BB is before that of the If and Then BB's. Why, was this deliberate?
I'd expect the Continue BB to be positioned after the if and then BBs.
I had also thought that this wouldn't be possible since labels need to be declared ahead of time, but the select instruction references the if / then labels without forward declaration, so it should be possible to put the continue block after the then / else blocks, no?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

actually we don't need to do Builder.SetInsertPoint(ContinueBB); because we aren't adding any instructions to ContinueBB. so will delete this.

The if and else basic blocks can be anywhere we want them to be because we aren't doing any fall through cases. Each basic block ends with Builder.CreateBr(ContinueBB); What you are likely thinking of is maybe removing the unconditional branch in the else case and letting that fall through to the ContinueBB basic block. I suppose that could be an optimization, but it isn't required for correctness.

Not sure what you are talking about with forward declares. We do define the If\Else\Continue basic blocks before we do anything else.


// Mark the instructions for removal
ToRemove.push_back(Sel);
ToRemove.push_back(II);
ToRemove.push_back(EVI);
ToRemove.push_back(StoreII);

return true;
}

namespace {
class DXILLegalizationPipeline {

Expand Down Expand Up @@ -671,6 +760,7 @@ class DXILLegalizationPipeline {
LegalizationPipeline[Stage2].push_back(
downcastI64toI32InsertExtractElements);
LegalizationPipeline[Stage2].push_back(legalizeScalarLoadStoreOnArrays);
LegalizationPipeline[Stage2].push_back(legalizeBuffLoadSelectCalls);
}
};

Expand Down
40 changes: 40 additions & 0 deletions llvm/test/CodeGen/DirectX/issue-152348.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
; RUN: opt -S -passes='dxil-legalize' -mtriple=dxil-pc-shadermodel6.3-library %s | FileCheck %s

@default_int= global i32 0
@.str = internal unnamed_addr constant [2 x i8] c"a\00", align 1

define void @CSMain() local_unnamed_addr {
; CHECK-LABEL: define void @CSMain() local_unnamed_addr {
; CHECK-NEXT: [[ENTRY:.*:]]
; CHECK-NEXT: [[TMP0:%.*]] = tail call target("dx.RawBuffer", half, 1, 0) @llvm.dx.resource.handlefromimplicitbinding.tdx.RawBuffer_f16_1_0t(i32 0, i32 0, i32 1, i32 0, i1 false, ptr nonnull @.str)
; CHECK-NEXT: [[TMP1:%.*]] = tail call target("dx.RawBuffer", half, 1, 0) @llvm.dx.resource.handlefromimplicitbinding.tdx.RawBuffer_f16_1_0t(i32 1, i32 0, i32 1, i32 0, i1 false, ptr nonnull @.str)
; CHECK-NEXT: [[TMP2:%.*]] = tail call target("dx.RawBuffer", half, 1, 0) @llvm.dx.resource.handlefromimplicitbinding.tdx.RawBuffer_f16_1_0t(i32 2, i32 0, i32 1, i32 0, i1 false, ptr nonnull @.str)
; CHECK-NEXT: [[INT:%.*]] = load i32, ptr @default_int, align 4
; CHECK-NEXT: [[TOBOOL_NOT_I:%.*]] = icmp eq i32 [[INT]], 0
; CHECK-NEXT: br i1 [[TOBOOL_NOT_I]], label %[[RAWBUF_IF:.*]], label %[[RAWBUF_ELSE:.*]]
; CHECK: [[RAWBUF_CONTINUE:.*]]:
; CHECK-NEXT: ret void
; CHECK: [[RAWBUF_IF]]:
; CHECK-NEXT: [[TMP3:%.*]] = call { half, i1 } @llvm.dx.resource.load.rawbuffer.f16.tdx.RawBuffer_f16_1_0t(target("dx.RawBuffer", half, 1, 0) [[TMP0]], i32 0, i32 0)
; CHECK-NEXT: [[TMP4:%.*]] = extractvalue { half, i1 } [[TMP3]], 0
; CHECK-NEXT: call void @llvm.dx.resource.store.rawbuffer.tdx.RawBuffer_f16_1_0t.f16(target("dx.RawBuffer", half, 1, 0) [[TMP2]], i32 0, i32 0, half [[TMP4]])
; CHECK-NEXT: br label %[[RAWBUF_CONTINUE]]
; CHECK: [[RAWBUF_ELSE]]:
; CHECK-NEXT: [[TMP5:%.*]] = call { half, i1 } @llvm.dx.resource.load.rawbuffer.f16.tdx.RawBuffer_f16_1_0t(target("dx.RawBuffer", half, 1, 0) [[TMP1]], i32 0, i32 0)
; CHECK-NEXT: [[TMP6:%.*]] = extractvalue { half, i1 } [[TMP5]], 0
; CHECK-NEXT: call void @llvm.dx.resource.store.rawbuffer.tdx.RawBuffer_f16_1_0t.f16(target("dx.RawBuffer", half, 1, 0) [[TMP2]], i32 0, i32 0, half [[TMP6]])
; CHECK-NEXT: br label %[[RAWBUF_CONTINUE]]
;
entry:
%0 = tail call target("dx.RawBuffer", half, 1, 0) @llvm.dx.resource.handlefromimplicitbinding.tdx.RawBuffer_f16_1_0t(i32 0, i32 0, i32 1, i32 0, i1 false, ptr nonnull @.str)
%1 = tail call target("dx.RawBuffer", half, 1, 0) @llvm.dx.resource.handlefromimplicitbinding.tdx.RawBuffer_f16_1_0t(i32 1, i32 0, i32 1, i32 0, i1 false, ptr nonnull @.str)
%2 = tail call target("dx.RawBuffer", half, 1, 0) @llvm.dx.resource.handlefromimplicitbinding.tdx.RawBuffer_f16_1_0t(i32 2, i32 0, i32 1, i32 0, i1 false, ptr nonnull @.str)
%int = load i32, ptr @default_int
%tobool.not.i = icmp eq i32 %int, 0
%.rawbuffer = select i1 %tobool.not.i, target("dx.RawBuffer", half, 1, 0) %0, target("dx.RawBuffer", half, 1, 0) %1
%4 = call { half, i1 } @llvm.dx.resource.load.rawbuffer.f16.tdx.RawBuffer_f16_1_0t(target("dx.RawBuffer", half, 1, 0) %.rawbuffer, i32 0, i32 0)
%5 = extractvalue { half, i1 } %4, 0
call void @llvm.dx.resource.store.rawbuffer.tdx.RawBuffer_f16_1_0t.f16(target("dx.RawBuffer", half, 1, 0) %2, i32 0, i32 0, half %5)
ret void
}