From 001b09897fa9146464fe9cbbe10005291d5ced29 Mon Sep 17 00:00:00 2001 From: Farzon Lotfi Date: Fri, 15 Aug 2025 15:31:55 -0400 Subject: [PATCH] [DirectX] Undo SimplifyCFG by reversing select into if else basic blocks fixes #152348 SimplifyCFG collapses raw buffer store from a if\else load into a select. This change duplicates the instructions from the load to the store back into seperate blocks. --- llvm/lib/Target/DirectX/DXILLegalizePass.cpp | 90 ++++++++++++++++++++ llvm/test/CodeGen/DirectX/issue-152348.ll | 40 +++++++++ 2 files changed, 130 insertions(+) create mode 100644 llvm/test/CodeGen/DirectX/issue-152348.ll diff --git a/llvm/lib/Target/DirectX/DXILLegalizePass.cpp b/llvm/lib/Target/DirectX/DXILLegalizePass.cpp index 3427968d199f6..8391ec2d332be 100644 --- a/llvm/lib/Target/DirectX/DXILLegalizePass.cpp +++ b/llvm/lib/Target/DirectX/DXILLegalizePass.cpp @@ -15,8 +15,11 @@ #include "llvm/IR/InstIterator.h" #include "llvm/IR/Instruction.h" #include "llvm/IR/Instructions.h" +#include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/IntrinsicsDirectX.h" #include "llvm/IR/Module.h" #include "llvm/Pass.h" +#include "llvm/Support/Casting.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" #include @@ -622,6 +625,92 @@ legalizeScalarLoadStoreOnArrays(Instruction &I, return true; } +// Note: Legalization to undo SimplifyCFG. Ideally, SimplifyCFG's +// TargetTransformInfo would ignore our resource intrinsics, but +// it doesn't. This Works for a single select; multiple selects on +// raw buffer loads won’t be legalized. The hasOneUser for ExtractValueInst +// of dx_resource_load_rawbuffer is enforced in DXILOpLowering, +// so checking it here is fine. +static bool +legalizeBuffLoadSelectCalls(Instruction &I, + SmallVectorImpl &ToRemove, + DenseMap &) { + // Check if this is a dx_resource_load_rawbuffer intrinsic + auto *II = dyn_cast(&I); + if (!II || II->getIntrinsicID() != Intrinsic::dx_resource_load_rawbuffer) + return false; + + // Check if the first argument is a select instruction + Value *Arg0 = II->getArgOperand(0); + auto *Sel = dyn_cast(Arg0); + if (!Sel) + return false; + + if (!II->hasOneUser()) + return false; + ExtractValueInst *EVI = dyn_cast(*II->user_begin()); + if (!EVI->hasOneUser()) + return false; + auto *StoreII = dyn_cast(*EVI->user_begin()); + if (!StoreII || + StoreII->getIntrinsicID() != Intrinsic::dx_resource_store_rawbuffer) + return false; + + BasicBlock *BB = II->getParent(); + Function *F = BB->getParent(); + IRBuilder<> Builder(StoreII); + + // Create new basic blocks + BasicBlock *ThenBB = BasicBlock::Create(F->getContext(), "rawbuf.if", F); + BasicBlock *ElseBB = BasicBlock::Create(F->getContext(), "rawbuf.else", F); + BasicBlock *ContinueBB = + BB->splitBasicBlock(std::next(II->getIterator()), "rawbuf.continue"); + + // Remove the unconditional branch + BB->getTerminator()->eraseFromParent(); + + // Create a conditional branch based on the select condition + Builder.SetInsertPoint(BB); + Builder.CreateCondBr(Sel->getCondition(), ThenBB, ElseBB); + + // Create the true path + Builder.SetInsertPoint(ThenBB); + Instruction *TrueResourceLoad = II->clone(); + Instruction *TrueExtract = EVI->clone(); + Instruction *TrueResourceStore = StoreII->clone(); + TrueResourceLoad->setOperand(0, Sel->getTrueValue()); + TrueExtract->setOperand(0, TrueResourceLoad); + TrueResourceStore->setOperand(3, TrueExtract); + Builder.Insert(TrueResourceLoad); + Builder.Insert(TrueExtract); + Builder.Insert(TrueResourceStore); + Builder.CreateBr(ContinueBB); + + // Create the false path + Builder.SetInsertPoint(ElseBB); + Instruction *FalseResourceLoad = II->clone(); + Instruction *FalseExtract = EVI->clone(); + Instruction *FalseResourceStore = StoreII->clone(); + FalseResourceLoad->setOperand(0, Sel->getFalseValue()); + FalseExtract->setOperand(0, FalseResourceLoad); + FalseResourceStore->setOperand(3, FalseExtract); + Builder.Insert(FalseResourceLoad); + Builder.Insert(FalseExtract); + Builder.Insert(FalseResourceStore); + Builder.CreateBr(ContinueBB); + + // Set up the merge block + Builder.SetInsertPoint(ContinueBB); + + // Mark the instructions for removal + ToRemove.push_back(Sel); + ToRemove.push_back(II); + ToRemove.push_back(EVI); + ToRemove.push_back(StoreII); + + return true; +} + namespace { class DXILLegalizationPipeline { @@ -671,6 +760,7 @@ class DXILLegalizationPipeline { LegalizationPipeline[Stage2].push_back( downcastI64toI32InsertExtractElements); LegalizationPipeline[Stage2].push_back(legalizeScalarLoadStoreOnArrays); + LegalizationPipeline[Stage2].push_back(legalizeBuffLoadSelectCalls); } }; diff --git a/llvm/test/CodeGen/DirectX/issue-152348.ll b/llvm/test/CodeGen/DirectX/issue-152348.ll new file mode 100644 index 0000000000000..92e7b7bfa727e --- /dev/null +++ b/llvm/test/CodeGen/DirectX/issue-152348.ll @@ -0,0 +1,40 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 +; RUN: opt -S -passes='dxil-legalize' -mtriple=dxil-pc-shadermodel6.3-library %s | FileCheck %s + +@default_int= global i32 0 +@.str = internal unnamed_addr constant [2 x i8] c"a\00", align 1 + +define void @CSMain() local_unnamed_addr { +; CHECK-LABEL: define void @CSMain() local_unnamed_addr { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[TMP0:%.*]] = tail call target("dx.RawBuffer", half, 1, 0) @llvm.dx.resource.handlefromimplicitbinding.tdx.RawBuffer_f16_1_0t(i32 0, i32 0, i32 1, i32 0, i1 false, ptr nonnull @.str) +; CHECK-NEXT: [[TMP1:%.*]] = tail call target("dx.RawBuffer", half, 1, 0) @llvm.dx.resource.handlefromimplicitbinding.tdx.RawBuffer_f16_1_0t(i32 1, i32 0, i32 1, i32 0, i1 false, ptr nonnull @.str) +; CHECK-NEXT: [[TMP2:%.*]] = tail call target("dx.RawBuffer", half, 1, 0) @llvm.dx.resource.handlefromimplicitbinding.tdx.RawBuffer_f16_1_0t(i32 2, i32 0, i32 1, i32 0, i1 false, ptr nonnull @.str) +; CHECK-NEXT: [[INT:%.*]] = load i32, ptr @default_int, align 4 +; CHECK-NEXT: [[TOBOOL_NOT_I:%.*]] = icmp eq i32 [[INT]], 0 +; CHECK-NEXT: br i1 [[TOBOOL_NOT_I]], label %[[RAWBUF_IF:.*]], label %[[RAWBUF_ELSE:.*]] +; CHECK: [[RAWBUF_CONTINUE:.*]]: +; CHECK-NEXT: ret void +; CHECK: [[RAWBUF_IF]]: +; CHECK-NEXT: [[TMP3:%.*]] = call { half, i1 } @llvm.dx.resource.load.rawbuffer.f16.tdx.RawBuffer_f16_1_0t(target("dx.RawBuffer", half, 1, 0) [[TMP0]], i32 0, i32 0) +; CHECK-NEXT: [[TMP4:%.*]] = extractvalue { half, i1 } [[TMP3]], 0 +; CHECK-NEXT: call void @llvm.dx.resource.store.rawbuffer.tdx.RawBuffer_f16_1_0t.f16(target("dx.RawBuffer", half, 1, 0) [[TMP2]], i32 0, i32 0, half [[TMP4]]) +; CHECK-NEXT: br label %[[RAWBUF_CONTINUE]] +; CHECK: [[RAWBUF_ELSE]]: +; CHECK-NEXT: [[TMP5:%.*]] = call { half, i1 } @llvm.dx.resource.load.rawbuffer.f16.tdx.RawBuffer_f16_1_0t(target("dx.RawBuffer", half, 1, 0) [[TMP1]], i32 0, i32 0) +; CHECK-NEXT: [[TMP6:%.*]] = extractvalue { half, i1 } [[TMP5]], 0 +; CHECK-NEXT: call void @llvm.dx.resource.store.rawbuffer.tdx.RawBuffer_f16_1_0t.f16(target("dx.RawBuffer", half, 1, 0) [[TMP2]], i32 0, i32 0, half [[TMP6]]) +; CHECK-NEXT: br label %[[RAWBUF_CONTINUE]] +; +entry: + %0 = tail call target("dx.RawBuffer", half, 1, 0) @llvm.dx.resource.handlefromimplicitbinding.tdx.RawBuffer_f16_1_0t(i32 0, i32 0, i32 1, i32 0, i1 false, ptr nonnull @.str) + %1 = tail call target("dx.RawBuffer", half, 1, 0) @llvm.dx.resource.handlefromimplicitbinding.tdx.RawBuffer_f16_1_0t(i32 1, i32 0, i32 1, i32 0, i1 false, ptr nonnull @.str) + %2 = tail call target("dx.RawBuffer", half, 1, 0) @llvm.dx.resource.handlefromimplicitbinding.tdx.RawBuffer_f16_1_0t(i32 2, i32 0, i32 1, i32 0, i1 false, ptr nonnull @.str) + %int = load i32, ptr @default_int + %tobool.not.i = icmp eq i32 %int, 0 + %.rawbuffer = select i1 %tobool.not.i, target("dx.RawBuffer", half, 1, 0) %0, target("dx.RawBuffer", half, 1, 0) %1 + %4 = call { half, i1 } @llvm.dx.resource.load.rawbuffer.f16.tdx.RawBuffer_f16_1_0t(target("dx.RawBuffer", half, 1, 0) %.rawbuffer, i32 0, i32 0) + %5 = extractvalue { half, i1 } %4, 0 + call void @llvm.dx.resource.store.rawbuffer.tdx.RawBuffer_f16_1_0t.f16(target("dx.RawBuffer", half, 1, 0) %2, i32 0, i32 0, half %5) + ret void +}