Skip to content

Commit 6d8e53d

Browse files
authored
[AMDGPU] Support nv memory instructions modifier on gfx1250 (#149582)
1 parent 1b8a136 commit 6d8e53d

13 files changed

+177
-3
lines changed

llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp

Lines changed: 25 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5280,6 +5280,15 @@ bool AMDGPUAsmParser::validateCoherencyBits(const MCInst &Inst,
52805280

52815281
unsigned CPol = Inst.getOperand(CPolPos).getImm();
52825282

5283+
if (!isGFX1250()) {
5284+
if (CPol & CPol::NV) {
5285+
SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands);
5286+
StringRef CStr(S.getPointer());
5287+
S = SMLoc::getFromPointer(&CStr.data()[CStr.find("nv")]);
5288+
Error(S, "nv is not supported on this GPU");
5289+
}
5290+
}
5291+
52835292
if (isGFX12Plus())
52845293
return validateTHAndScopeBits(Inst, Operands, CPol);
52855294

@@ -6916,6 +6925,7 @@ ParseStatus AMDGPUAsmParser::parseCPol(OperandVector &Operands) {
69166925
int64_t CPolVal = 0;
69176926
ParseStatus ResTH = ParseStatus::NoMatch;
69186927
ParseStatus ResScope = ParseStatus::NoMatch;
6928+
ParseStatus ResNV = ParseStatus::NoMatch;
69196929

69206930
for (;;) {
69216931
if (ResTH.isNoMatch()) {
@@ -6940,10 +6950,24 @@ ParseStatus AMDGPUAsmParser::parseCPol(OperandVector &Operands) {
69406950
}
69416951
}
69426952

6953+
// NV bit exists on GFX12+, but does something starting from GFX1250.
6954+
// Allow parsing on all GFX12 and fail on validation for better
6955+
// diagnostics.
6956+
if (ResNV.isNoMatch()) {
6957+
if (trySkipId("nv")) {
6958+
ResNV = ParseStatus::Success;
6959+
CPolVal |= CPol::NV;
6960+
continue;
6961+
} else if (trySkipId("no", "nv")) {
6962+
ResNV = ParseStatus::Success;
6963+
continue;
6964+
}
6965+
}
6966+
69436967
break;
69446968
}
69456969

6946-
if (ResTH.isNoMatch() && ResScope.isNoMatch())
6970+
if (ResTH.isNoMatch() && ResScope.isNoMatch() && ResNV.isNoMatch())
69476971
return ParseStatus::NoMatch;
69486972

69496973
Operands.push_back(AMDGPUOperand::CreateImm(this, CPolVal, StringLoc,

llvm/lib/Target/AMDGPU/BUFInstructions.td

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2451,6 +2451,7 @@ class VBUFFER_Real <bits<8> op, BUF_Pseudo ps, string real_name> :
24512451
let Inst{62} = ps.offen;
24522452
let Inst{63} = ps.idxen;
24532453

2454+
let Inst{7} = cpol{5}; // nv
24542455
let Inst{54-53} = cpol{2-1}; // th{2-1}
24552456
let Inst{52} = !if(ps.IsAtomicRet, 1, cpol{0}); // th{0}
24562457
let Inst{51-50} = cpol{4-3}; // scope

llvm/lib/Target/AMDGPU/FLATInstructions.td

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -183,7 +183,7 @@ class VFLAT_Real <bits<8> op, FLAT_Pseudo ps, string opName = ps.Mnemonic> :
183183

184184
bits<7> saddr;
185185
bits<8> vdst;
186-
bits<6> cpol;
186+
bits<12> cpol;
187187
bits<8> vdata; // vsrc
188188
bits<8> vaddr;
189189
bits<24> offset;
@@ -193,6 +193,7 @@ class VFLAT_Real <bits<8> op, FLAT_Pseudo ps, string opName = ps.Mnemonic> :
193193
let Inst{31-26} = 0x3b;
194194
let Inst{39-32} = !if(ps.has_vdst, vdst, ?);
195195
let Inst{49} = ps.sve;
196+
let Inst{7} = cpol{5}; // nv
196197
let Inst{54-53} = cpol{2-1}; // th{2-1}
197198
let Inst{52} = !if(ps.IsAtomicRet, 1, cpol{0}); // th{0}
198199
let Inst{51-50} = cpol{4-3}; // scope

llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -160,6 +160,9 @@ void AMDGPUInstPrinter::printCPol(const MCInst *MI, unsigned OpNo,
160160
printTH(MI, TH, Scope, O);
161161
printScope(Scope, O);
162162

163+
if (Imm & CPol::NV)
164+
O << " nv";
165+
163166
return;
164167
}
165168

llvm/lib/Target/AMDGPU/SIDefines.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -398,6 +398,8 @@ enum CPol {
398398
SCOPE_DEV = 2 << 3,
399399
SCOPE_SYS = 3 << 3,
400400

401+
NV = 1 << 5, // Non-volatile bit
402+
401403
SWZ = 1 << 6, // Swizzle bit
402404

403405
ALL = TH | SCOPE,

llvm/lib/Target/AMDGPU/SIInstrFormats.td

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -317,6 +317,7 @@ def CPolBit {
317317
int SLC = 1;
318318
int DLC = 2;
319319
int SCC = 4;
320+
int NV = 5;
320321
}
321322

322323
class VOPDstOperand <RegisterClass rc> : RegisterOperand <rc, "printVOPDst">;

llvm/lib/Target/AMDGPU/SMInstructions.td

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -87,7 +87,7 @@ class SM_Real <SM_Pseudo ps, string opName = ps.Mnemonic>
8787
bits<7> sdst;
8888
bits<32> offset;
8989
bits<8> soffset;
90-
bits<5> cpol;
90+
bits<12> cpol;
9191
}
9292

9393
class OffsetMode<bit hasOffset, bit hasSOffset, string variant,
@@ -1485,6 +1485,7 @@ class SMEM_Real_Load_gfx12<bits<6> op, string ps, string opName, OffsetMode offs
14851485
RegisterClass BaseClass = !cast<SM_Load_Pseudo>(ps # offsets.Variant).BaseClass;
14861486
let InOperandList = !con((ins BaseClass:$sbase), offsets.Ins, (ins CPol:$cpol));
14871487

1488+
let Inst{20} = cpol{CPolBit.NV}; // non-volatile
14881489
let Inst{22-21} = cpol{4-3}; // scope
14891490
let Inst{24-23} = cpol{1-0}; // th - only lower 2 bits are supported
14901491
}
Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
// RUN: llvm-mc -triple=amdgcn -mcpu=gfx1250 -show-encoding %s | FileCheck --check-prefix=GFX1250 %s
2+
// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1200 -show-encoding %s 2>&1 | FileCheck --check-prefix=GFX12-ERR --implicit-check-not=error: --strict-whitespace %s
3+
4+
s_load_b32 s4, s[2:3], 10 nv
5+
// GFX1250: s_load_b32 s4, s[2:3], 0xa nv ; encoding: [0x01,0x01,0x10,0xf4,0x0a,0x00,0x00,0xf8]
6+
// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: nv is not supported on this GPU
7+
// GFX12-ERR-NEXT:{{^}}s_load_b32 s4, s[2:3], 10 nv
8+
// GFX12-ERR-NEXT:{{^}} ^
9+
10+
s_buffer_load_i8 s5, s[4:7], s0 nv
11+
// GFX1250: s_buffer_load_i8 s5, s[4:7], s0 offset:0x0 nv ; encoding: [0x42,0x01,0x13,0xf4,0x00,0x00,0x00,0x00]
12+
// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: nv is not supported on this GPU
13+
// GFX12-ERR-NEXT:{{^}}s_buffer_load_i8 s5, s[4:7], s0 nv
14+
// GFX12-ERR-NEXT:{{^}} ^
Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
// RUN: llvm-mc -triple=amdgcn -mcpu=gfx1250 -show-encoding %s | FileCheck --check-prefix=GFX1250 %s
2+
// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1200 -show-encoding %s 2>&1 | FileCheck --check-prefix=GFX12-ERR --implicit-check-not=error: --strict-whitespace %s
3+
4+
buffer_load_b32 v5, v1, s[8:11], s3 offen offset:4095 nv
5+
// GFX1250: buffer_load_b32 v5, v1, s[8:11], s3 offen offset:4095 nv ; encoding: [0x83,0x00,0x05,0xc4,0x05,0x10,0x80,0x40,0x01,0xff,0x0f,0x00]
6+
// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: nv is not supported on this GPU
7+
// GFX12-ERR-NEXT:{{^}}buffer_load_b32 v5, v1, s[8:11], s3 offen offset:4095 nv
8+
// GFX12-ERR-NEXT:{{^}} ^
9+
10+
buffer_store_b128 v[2:5], v0, s[12:15], s4 idxen offset:4095 nv
11+
// GFX1250: buffer_store_b128 v[2:5], v0, s[12:15], s4 idxen offset:4095 nv ; encoding: [0x84,0x40,0x07,0xc4,0x02,0x18,0x80,0x80,0x00,0xff,0x0f,0x00]
12+
// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: nv is not supported on this GPU
13+
// GFX12-ERR-NEXT:{{^}}buffer_store_b128 v[2:5], v0, s[12:15], s4 idxen offset:4095 nv
14+
// GFX12-ERR-NEXT:{{^}} ^
15+
16+
buffer_atomic_and_b32 v5, v1, s[8:11], s3 offen offset:4095 nv
17+
// GFX1250: buffer_atomic_and_b32 v5, v1, s[8:11], s3 offen offset:4095 nv ; encoding: [0x83,0x00,0x0f,0xc4,0x05,0x10,0x80,0x40,0x01,0xff,0x0f,0x00]
18+
// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: nv is not supported on this GPU
19+
// GFX12-ERR-NEXT:{{^}}buffer_atomic_and_b32 v5, v1, s[8:11], s3 offen offset:4095 nv
20+
// GFX12-ERR-NEXT:{{^}} ^

llvm/test/MC/AMDGPU/gfx1250_asm_vflat.s

Lines changed: 60 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,66 @@
11
// RUN: llvm-mc -triple=amdgcn -mcpu=gfx1250 -show-encoding %s | FileCheck --check-prefix=GFX1250 %s
22
// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1200 -show-encoding %s 2>&1 | FileCheck --check-prefix=GFX12-ERR --implicit-check-not=error: --strict-whitespace %s
33

4+
global_load_b32 v0, v[2:3], off nv
5+
// GFX1250: global_load_b32 v0, v[2:3], off nv ; encoding: [0xfc,0x00,0x05,0xee,0x00,0x00,0x00,0x00,0x02,0x00,0x00,0x00]
6+
// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: nv is not supported on this GPU
7+
// GFX12-ERR-NEXT:{{^}}global_load_b32 v0, v[2:3], off nv
8+
// GFX12-ERR-NEXT:{{^}} ^
9+
10+
global_store_b32 v[2:3], v0, off nv
11+
// GFX1250: global_store_b32 v[2:3], v0, off nv ; encoding: [0xfc,0x80,0x06,0xee,0x00,0x00,0x00,0x00,0x02,0x00,0x00,0x00]
12+
// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: nv is not supported on this GPU
13+
// GFX12-ERR-NEXT:{{^}}global_store_b32 v[2:3], v0, off nv
14+
// GFX12-ERR-NEXT:{{^}} ^
15+
16+
global_atomic_add v[2:3], v2, off nv
17+
// GFX1250: global_atomic_add_u32 v[2:3], v2, off nv ; encoding: [0xfc,0x40,0x0d,0xee,0x00,0x00,0x00,0x01,0x02,0x00,0x00,0x00]
18+
// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: nv is not supported on this GPU
19+
// GFX12-ERR-NEXT:{{^}}global_atomic_add v[2:3], v2, off nv
20+
// GFX12-ERR-NEXT:{{^}} ^
21+
22+
global_load_addtid_b32 v5, s[2:3] nv
23+
// GFX1250: global_load_addtid_b32 v5, s[2:3] nv ; encoding: [0x82,0x00,0x0a,0xee,0x05,0x00,0x00,0x00,0x00,0x00,0x00,0x00]
24+
// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: nv is not supported on this GPU
25+
// GFX12-ERR-NEXT:{{^}}global_load_addtid_b32 v5, s[2:3] nv
26+
// GFX12-ERR-NEXT:{{^}} ^
27+
28+
scratch_load_b32 v0, v2, off nv
29+
// GFX1250: scratch_load_b32 v0, v2, off nv ; encoding: [0xfc,0x00,0x05,0xed,0x00,0x00,0x02,0x00,0x02,0x00,0x00,0x00]
30+
// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: nv is not supported on this GPU
31+
// GFX12-ERR-NEXT:{{^}}scratch_load_b32 v0, v2, off nv
32+
// GFX12-ERR-NEXT:{{^}} ^
33+
34+
scratch_store_b32 v2, v0, off nv
35+
// GFX1250: scratch_store_b32 v2, v0, off nv ; encoding: [0xfc,0x80,0x06,0xed,0x00,0x00,0x02,0x00,0x02,0x00,0x00,0x00]
36+
// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: nv is not supported on this GPU
37+
// GFX12-ERR-NEXT:{{^}}scratch_store_b32 v2, v0, off nv
38+
// GFX12-ERR-NEXT:{{^}} ^
39+
40+
flat_load_b32 v0, v[2:3] nv
41+
// GFX1250: flat_load_b32 v0, v[2:3] nv ; encoding: [0xfc,0x00,0x05,0xec,0x00,0x00,0x00,0x00,0x02,0x00,0x00,0x00]
42+
// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: nv is not supported on this GPU
43+
// GFX12-ERR-NEXT:{{^}}flat_load_b32 v0, v[2:3] nv
44+
// GFX12-ERR-NEXT:{{^}} ^
45+
46+
flat_store_b32 v[2:3], v0 nv
47+
// GFX1250: flat_store_b32 v[2:3], v0 nv ; encoding: [0xfc,0x80,0x06,0xec,0x00,0x00,0x00,0x00,0x02,0x00,0x00,0x00]
48+
// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: nv is not supported on this GPU
49+
// GFX12-ERR-NEXT:{{^}}flat_store_b32 v[2:3], v0 nv
50+
// GFX12-ERR-NEXT:{{^}} ^
51+
52+
flat_atomic_add v[2:3], v2 nv
53+
// GFX1250: flat_atomic_add_u32 v[2:3], v2 nv ; encoding: [0xfc,0x40,0x0d,0xec,0x00,0x00,0x00,0x01,0x02,0x00,0x00,0x00]
54+
// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: nv is not supported on this GPU
55+
// GFX12-ERR-NEXT:{{^}}flat_atomic_add v[2:3], v2 nv
56+
// GFX12-ERR-NEXT:{{^}} ^
57+
58+
scratch_load_b32 v5, v2, off nv
59+
// GFX1250: scratch_load_b32 v5, v2, off nv ; encoding: [0xfc,0x00,0x05,0xed,0x05,0x00,0x02,0x00,0x02,0x00,0x00,0x00]
60+
// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: nv is not supported on this GPU
61+
// GFX12-ERR-NEXT:{{^}}scratch_load_b32 v5, v2, off nv
62+
// GFX12-ERR-NEXT:{{^}} ^
63+
464
tensor_save s[0:1]
565
// GFX1250: tensor_save s[0:1] ; encoding: [0x00,0x80,0x1b,0xee,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00]
666
// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU

0 commit comments

Comments
 (0)