Skip to content

Commit 11f52ec

Browse files
authored
[NVPTX] Mark callseq insts as reading and writing memory (#151376)
In order to prevent the st.param and ld.param instructions which store parameters and load return values from being sunk or hoisted out of a call sequence, mark the callseq start and end nodes as reading and writing memory. Fixes #151329
1 parent 729b0d1 commit 11f52ec

File tree

2 files changed

+85
-32
lines changed

2 files changed

+85
-32
lines changed

llvm/lib/Target/NVPTX/NVPTXInstrInfo.td

Lines changed: 38 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -1774,6 +1774,18 @@ def : Pat<(declare_array_param externalsym:$a, imm:$align, imm:$size),
17741774
def : Pat<(declare_scalar_param externalsym:$a, imm:$size),
17751775
(DECLARE_PARAM_scalar (to_texternsym $a), imm:$size)>;
17761776

1777+
// Call prototype wrapper, this is a dummy instruction that just prints it's
1778+
// operand which is string defining the prototype.
1779+
def SDTCallPrototype : SDTypeProfile<0, 1, [SDTCisInt<0>]>;
1780+
def CallPrototype :
1781+
SDNode<"NVPTXISD::CallPrototype", SDTCallPrototype,
1782+
[SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>;
1783+
def ProtoIdent : Operand<i32> { let PrintMethod = "printProtoIdent"; }
1784+
def CALL_PROTOTYPE :
1785+
NVPTXInst<(outs), (ins ProtoIdent:$ident),
1786+
"$ident", [(CallPrototype (i32 texternalsym:$ident))]>;
1787+
1788+
17771789
foreach t = [I32RT, I64RT] in {
17781790
defvar inst_name = "MOV" # t.Size # "_PARAM";
17791791
def inst_name : BasicNVPTXInst<(outs t.RC:$dst), (ins t.RC:$src), "mov.b" # t.Size>;
@@ -1793,6 +1805,32 @@ defm ProxyRegB16 : ProxyRegInst<"b16", B16>;
17931805
defm ProxyRegB32 : ProxyRegInst<"b32", B32>;
17941806
defm ProxyRegB64 : ProxyRegInst<"b64", B64>;
17951807

1808+
1809+
// Callseq start and end
1810+
1811+
// Note: these nodes are marked as SDNPMayStore and SDNPMayLoad because
1812+
// they define the scope in which the declared params may be used. Therefore
1813+
// we add these flags to ensure ld.param and st.param are not sunk or hoisted
1814+
// out of that scope.
1815+
1816+
def callseq_start : SDNode<"ISD::CALLSEQ_START",
1817+
SDCallSeqStart<[SDTCisVT<0, i32>, SDTCisVT<1, i32>]>,
1818+
[SDNPHasChain, SDNPOutGlue,
1819+
SDNPSideEffect, SDNPMayStore, SDNPMayLoad]>;
1820+
def callseq_end : SDNode<"ISD::CALLSEQ_END",
1821+
SDCallSeqEnd<[SDTCisVT<0, i32>, SDTCisVT<1, i32>]>,
1822+
[SDNPHasChain, SDNPOptInGlue, SDNPOutGlue,
1823+
SDNPSideEffect, SDNPMayStore, SDNPMayLoad]>;
1824+
1825+
def Callseq_Start :
1826+
NVPTXInst<(outs), (ins i32imm:$amt1, i32imm:$amt2),
1827+
"\\{ // callseq $amt1, $amt2",
1828+
[(callseq_start timm:$amt1, timm:$amt2)]>;
1829+
def Callseq_End :
1830+
NVPTXInst<(outs), (ins i32imm:$amt1, i32imm:$amt2),
1831+
"\\} // callseq $amt1",
1832+
[(callseq_end timm:$amt1, timm:$amt2)]>;
1833+
17961834
//
17971835
// Load / Store Handling
17981836
//
@@ -2336,26 +2374,6 @@ def : Pat<(brcond i32:$a, bb:$target),
23362374
def : Pat<(brcond (i1 (setne i1:$a, -1)), bb:$target),
23372375
(CBranchOther $a, bb:$target)>;
23382376

2339-
// Call
2340-
def SDT_NVPTXCallSeqStart : SDCallSeqStart<[SDTCisVT<0, i32>,
2341-
SDTCisVT<1, i32>]>;
2342-
def SDT_NVPTXCallSeqEnd : SDCallSeqEnd<[SDTCisVT<0, i32>, SDTCisVT<1, i32>]>;
2343-
2344-
def callseq_start : SDNode<"ISD::CALLSEQ_START", SDT_NVPTXCallSeqStart,
2345-
[SDNPHasChain, SDNPOutGlue, SDNPSideEffect]>;
2346-
def callseq_end : SDNode<"ISD::CALLSEQ_END", SDT_NVPTXCallSeqEnd,
2347-
[SDNPHasChain, SDNPOptInGlue, SDNPOutGlue,
2348-
SDNPSideEffect]>;
2349-
2350-
def Callseq_Start :
2351-
NVPTXInst<(outs), (ins i32imm:$amt1, i32imm:$amt2),
2352-
"\\{ // callseq $amt1, $amt2",
2353-
[(callseq_start timm:$amt1, timm:$amt2)]>;
2354-
def Callseq_End :
2355-
NVPTXInst<(outs), (ins i32imm:$amt1, i32imm:$amt2),
2356-
"\\} // callseq $amt1",
2357-
[(callseq_end timm:$amt1, timm:$amt2)]>;
2358-
23592377
// trap instruction
23602378
def trapinst : BasicNVPTXInst<(outs), (ins), "trap", [(trap)]>, Requires<[noPTXASUnreachableBug]>;
23612379
// Emit an `exit` as well to convey to ptxas that `trap` exits the CFG.
@@ -2364,18 +2382,6 @@ def trapexitinst : NVPTXInst<(outs), (ins), "trap; exit;", [(trap)]>, Requires<[
23642382
// brkpt instruction
23652383
def debugtrapinst : BasicNVPTXInst<(outs), (ins), "brkpt", [(debugtrap)]>;
23662384

2367-
// Call prototype wrapper
2368-
def SDTCallPrototype : SDTypeProfile<0, 1, [SDTCisInt<0>]>;
2369-
def CallPrototype :
2370-
SDNode<"NVPTXISD::CallPrototype", SDTCallPrototype,
2371-
[SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>;
2372-
def ProtoIdent : Operand<i32> {
2373-
let PrintMethod = "printProtoIdent";
2374-
}
2375-
def CALL_PROTOTYPE :
2376-
NVPTXInst<(outs), (ins ProtoIdent:$ident),
2377-
"$ident", [(CallPrototype (i32 texternalsym:$ident))]>;
2378-
23792385
def SDTDynAllocaOp :
23802386
SDTypeProfile<1, 2, [SDTCisSameAs<0, 1>, SDTCisInt<1>, SDTCisVT<2, i32>]>;
23812387

Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,47 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
2+
; RUN: llc < %s -verify-machineinstrs | FileCheck %s
3+
; RUN: %if ptxas %{ llc < %s | %ptxas-verify %}
4+
5+
target triple = "nvptx64-nvidia-cuda"
6+
7+
declare ptr @bar(i64)
8+
declare i64 @baz()
9+
10+
define ptr @foo(i1 %cond) {
11+
; CHECK-LABEL: foo(
12+
; CHECK: {
13+
; CHECK-NEXT: .reg .pred %p<2>;
14+
; CHECK-NEXT: .reg .b16 %rs<3>;
15+
; CHECK-NEXT: .reg .b64 %rd<3>;
16+
; CHECK-EMPTY:
17+
; CHECK-NEXT: // %bb.0: // %entry
18+
; CHECK-NEXT: ld.param.b8 %rs1, [foo_param_0];
19+
; CHECK-NEXT: and.b16 %rs2, %rs1, 1;
20+
; CHECK-NEXT: setp.ne.b16 %p1, %rs2, 0;
21+
; CHECK-NEXT: { // callseq 0, 0
22+
; CHECK-NEXT: .param .b64 retval0;
23+
; CHECK-NEXT: call.uni (retval0), baz, ();
24+
; CHECK-NEXT: ld.param.b64 %rd2, [retval0];
25+
; CHECK-NEXT: } // callseq 0
26+
; CHECK-NEXT: @%p1 bra $L__BB0_2;
27+
; CHECK-NEXT: // %bb.1: // %bb
28+
; CHECK-NEXT: { // callseq 1, 0
29+
; CHECK-NEXT: .param .b64 param0;
30+
; CHECK-NEXT: .param .b64 retval0;
31+
; CHECK-NEXT: st.param.b64 [param0], %rd2;
32+
; CHECK-NEXT: call.uni (retval0), bar, (param0);
33+
; CHECK-NEXT: } // callseq 1
34+
; CHECK-NEXT: $L__BB0_2: // %common.ret
35+
; CHECK-NEXT: st.param.b64 [func_retval0], 0;
36+
; CHECK-NEXT: ret;
37+
entry:
38+
%call = call i64 @baz()
39+
br i1 %cond, label %common.ret, label %bb
40+
41+
bb:
42+
%tmp = call ptr @bar(i64 %call)
43+
br label %common.ret
44+
45+
common.ret:
46+
ret ptr null
47+
}

0 commit comments

Comments
 (0)