Skip to content

[AMDGPU] wip: MIR pretty printing for S_WAITCNT_FENCE_soft #150391

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Conversation

ssahasra
Copy link
Collaborator

@ssahasra ssahasra commented Jul 24, 2025

@llvmbot
Copy link
Member

llvmbot commented Jul 24, 2025

@llvm/pr-subscribers-backend-amdgpu

Author: Sameer Sahasrabuddhe (ssahasra)

Changes

Patch is 34.95 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/150391.diff

7 Files Affected:

  • (modified) llvm/lib/CodeGen/MIRParser/MIParser.cpp (+10-15)
  • (modified) llvm/lib/Target/AMDGPU/AMDGPUMIRFormatter.cpp (+161)
  • (modified) llvm/lib/Target/AMDGPU/SIDefines.h (+6-2)
  • (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/memory-legalizer-atomic-fence.ll (+36-36)
  • (added) llvm/test/CodeGen/AMDGPU/fence-parameters.mir (+29)
  • (modified) llvm/test/CodeGen/AMDGPU/insert-waitcnts-fence-soft.mir (+9-9)
  • (modified) llvm/test/CodeGen/AMDGPU/memory-legalizer-local.mir (+12-12)
diff --git a/llvm/lib/CodeGen/MIRParser/MIParser.cpp b/llvm/lib/CodeGen/MIRParser/MIParser.cpp
index 3a364d5ff0d20..c8ad286a87a35 100644
--- a/llvm/lib/CodeGen/MIRParser/MIParser.cpp
+++ b/llvm/lib/CodeGen/MIRParser/MIParser.cpp
@@ -1850,28 +1850,25 @@ bool MIParser::parseImmediateOperand(MachineOperand &Dest) {
   return false;
 }
 
+// The target mnemonic is an expression of the form:
+//
+//     Dot(IntegerLiteral|Identifier|Dot)+
+//
+// We could be stricter like not terminating in a dot, but that's note important
+// where this is being used.
 bool MIParser::parseTargetImmMnemonic(const unsigned OpCode,
                                       const unsigned OpIdx,
                                       MachineOperand &Dest,
                                       const MIRFormatter &MF) {
   assert(Token.is(MIToken::dot));
   auto Loc = Token.location(); // record start position
-  size_t Len = 1;              // for "."
-  lex();
-
-  // Handle the case that mnemonic starts with number.
-  if (Token.is(MIToken::IntegerLiteral)) {
+  size_t Len = 0;
+  while (Token.is(MIToken::IntegerLiteral) || Token.is(MIToken::dot) ||
+         Token.is(MIToken::Identifier)) {
     Len += Token.range().size();
     lex();
   }
-
-  StringRef Src;
-  if (Token.is(MIToken::comma))
-    Src = StringRef(Loc, Len);
-  else {
-    assert(Token.is(MIToken::Identifier));
-    Src = StringRef(Loc, Len + Token.stringValue().size());
-  }
+  StringRef Src(Loc, Len);
   int64_t Val;
   if (MF.parseImmMnemonic(OpCode, OpIdx, Src, Val,
                           [this](StringRef::iterator Loc, const Twine &Msg)
@@ -1879,8 +1876,6 @@ bool MIParser::parseTargetImmMnemonic(const unsigned OpCode,
     return true;
 
   Dest = MachineOperand::CreateImm(Val);
-  if (!Token.is(MIToken::comma))
-    lex();
   return false;
 }
 
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUMIRFormatter.cpp b/llvm/lib/Target/AMDGPU/AMDGPUMIRFormatter.cpp
index 75e3d8c426e73..f318d6ffc1bae 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUMIRFormatter.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUMIRFormatter.cpp
@@ -12,10 +12,135 @@
 //===----------------------------------------------------------------------===//
 
 #include "AMDGPUMIRFormatter.h"
+#include "SIDefines.h"
 #include "SIMachineFunctionInfo.h"
 
 using namespace llvm;
 
+bool parseAtomicOrdering(StringRef Src, unsigned &Order) {
+  Src.consume_front(".");
+  for (unsigned I = 0; I <= (unsigned)AtomicOrdering::LAST; ++I) {
+    if (Src == toIRString((AtomicOrdering)I)) {
+      Order = I;
+      return true;
+    }
+  }
+  Order = ~0u;
+  return false;
+}
+
+static const char *fmtScope(unsigned Scope) {
+  static const char *Names[] = {"none",      "singlethread", "wavefront",
+                                "workgroup", "agent",        "system"};
+  return Names[Scope];
+}
+
+bool parseAtomicScope(StringRef Src, unsigned &Scope) {
+  Src.consume_front(".");
+  for (unsigned I = 0;
+       I != (unsigned)AMDGPU::SIAtomicScope::NUM_SI_ATOMIC_SCOPES; ++I) {
+    if (Src == fmtScope(I)) {
+      Scope = I;
+      return true;
+    }
+  }
+  Scope = ~0u;
+  return false;
+}
+
+static const char *fmtAddrSpace(unsigned Space) {
+  static const char *Names[] = {"none",    "global", "lds",
+                                "scratch", "gds",    "other"};
+  return Names[Space];
+}
+
+bool parseOneAddrSpace(StringRef Src, unsigned &AddrSpace) {
+  if (Src == "none") {
+    AddrSpace = (unsigned)AMDGPU::SIAtomicAddrSpace::NONE;
+    return true;
+  }
+  if (Src == "flat") {
+    AddrSpace = (unsigned)AMDGPU::SIAtomicAddrSpace::FLAT;
+    return true;
+  }
+  if (Src == "atomic") {
+    AddrSpace = (unsigned)AMDGPU::SIAtomicAddrSpace::ATOMIC;
+    return true;
+  }
+  if (Src == "all") {
+    AddrSpace = (unsigned)AMDGPU::SIAtomicAddrSpace::ALL;
+    return true;
+  }
+  for (unsigned I = 1, A = 1; A <= (unsigned)AMDGPU::SIAtomicAddrSpace::LAST;
+       A <<= 1, ++I) {
+    if (Src == fmtAddrSpace(I)) {
+      AddrSpace = A;
+      return true;
+    }
+  }
+  AddrSpace = ~0u;
+  return false;
+}
+
+bool parseAddrSpace(StringRef Src, unsigned &AddrSpace) {
+  Src = Src.trim();
+  Src.consume_front(".");
+  while (!Src.empty()) {
+    auto [First, Rest] = Src.split('.');
+    unsigned OneSpace;
+    if (!parseOneAddrSpace(First, OneSpace))
+      return false;
+    AddrSpace |= OneSpace;
+    Src = Rest;
+  }
+  return true;
+}
+
+static void fmtAddrSpace(raw_ostream &OS, int64_t Imm) {
+  OS << '.';
+  if (Imm == (unsigned)AMDGPU::SIAtomicAddrSpace::NONE) {
+    OS << "none";
+    return;
+  }
+  if (Imm == (unsigned)AMDGPU::SIAtomicAddrSpace::FLAT) {
+    OS << "flat";
+    return;
+  }
+  if (Imm == (unsigned)AMDGPU::SIAtomicAddrSpace::ATOMIC) {
+    OS << "atomic";
+    return;
+  }
+  if (Imm == (unsigned)AMDGPU::SIAtomicAddrSpace::ALL) {
+    OS << "all";
+    return;
+  }
+
+  ListSeparator LS{"."};
+  auto AddrSpace = (AMDGPU::SIAtomicAddrSpace)Imm;
+  const auto LAST = (unsigned)AMDGPU::SIAtomicAddrSpace::LAST;
+
+  for (unsigned A = 1, I = 1; A <= LAST; A <<= 1, ++I) {
+    if (any(AddrSpace & (AMDGPU::SIAtomicAddrSpace)A))
+      OS << LS << StringRef(fmtAddrSpace(I));
+  }
+}
+
+static void printFenceOperand(raw_ostream &OS, const MachineInstr &MI,
+                              std::optional<unsigned int> OpIdx, int64_t Imm) {
+#define GET_IDX(Name)                                                          \
+  AMDGPU::getNamedOperandIdx(AMDGPU::S_WAITCNT_FENCE_soft, AMDGPU::OpName::Name)
+  if (OpIdx == GET_IDX(Ordering)) {
+    assert(Imm <= (unsigned)AtomicOrdering::LAST);
+    OS << '.' << StringRef(toIRString((AtomicOrdering)Imm));
+  } else if (OpIdx == GET_IDX(Scope)) {
+    assert(Imm < (unsigned)AMDGPU::SIAtomicScope::NUM_SI_ATOMIC_SCOPES);
+    OS << '.' << StringRef(fmtScope(Imm));
+  } else if (OpIdx == GET_IDX(AddrSpace)) {
+    fmtAddrSpace(OS, Imm);
+  }
+#undef GET_IDX
+}
+
 void AMDGPUMIRFormatter::printImm(raw_ostream &OS, const MachineInstr &MI,
                       std::optional<unsigned int> OpIdx, int64_t Imm) const {
 
@@ -24,12 +149,46 @@ void AMDGPUMIRFormatter::printImm(raw_ostream &OS, const MachineInstr &MI,
     assert(OpIdx == 0);
     printSDelayAluImm(Imm, OS);
     break;
+  case AMDGPU::S_WAITCNT_FENCE_soft:
+    printFenceOperand(OS, MI, OpIdx, Imm);
+    break;
   default:
     MIRFormatter::printImm(OS, MI, OpIdx, Imm);
     break;
   }
 }
 
+static bool
+parseFenceParameter(const unsigned int OpIdx, int64_t &Imm,
+                    llvm::StringRef &Src,
+                    llvm::MIRFormatter::ErrorCallbackType &ErrorCallback) {
+#define GET_IDX(Name)                                                          \
+  AMDGPU::getNamedOperandIdx(AMDGPU::S_WAITCNT_FENCE_soft, AMDGPU::OpName::Name)
+  if (OpIdx == (unsigned)GET_IDX(Ordering)) {
+    unsigned Order = 0;
+    if (!parseAtomicOrdering(Src, Order))
+      return ErrorCallback(Src.begin(), "Expected atomic ordering");
+    Imm = Order;
+    return false;
+  }
+  if (OpIdx == (unsigned)GET_IDX(Scope)) {
+    unsigned Scope = 0;
+    if (!parseAtomicScope(Src, Scope))
+      return ErrorCallback(Src.begin(), "Expected atomic scope");
+    Imm = Scope;
+    return false;
+  }
+  if (OpIdx == (unsigned)GET_IDX(AddrSpace)) {
+    unsigned AddrSpace = 0;
+    if (!parseAddrSpace(Src, AddrSpace))
+      return ErrorCallback(Src.begin(), "Expected address space");
+    Imm = AddrSpace;
+    return false;
+  }
+  return true;
+#undef GET_IDX
+}
+
 /// Implement target specific parsing of immediate mnemonics. The mnemonic is
 /// a string with a leading dot.
 bool AMDGPUMIRFormatter::parseImmMnemonic(const unsigned OpCode,
@@ -41,6 +200,8 @@ bool AMDGPUMIRFormatter::parseImmMnemonic(const unsigned OpCode,
   switch (OpCode) {
   case AMDGPU::S_DELAY_ALU:
     return parseSDelayAluImmMnemonic(OpIdx, Imm, Src, ErrorCallback);
+  case AMDGPU::S_WAITCNT_FENCE_soft:
+    return parseFenceParameter(OpIdx, Imm, Src, ErrorCallback);
   default:
     break;
   }
diff --git a/llvm/lib/Target/AMDGPU/SIDefines.h b/llvm/lib/Target/AMDGPU/SIDefines.h
index 9d30951cac1a3..d7c2aff1d3411 100644
--- a/llvm/lib/Target/AMDGPU/SIDefines.h
+++ b/llvm/lib/Target/AMDGPU/SIDefines.h
@@ -421,13 +421,16 @@ enum CPol {
 } // namespace CPol
 
 /// The atomic synchronization scopes supported by the AMDGPU target.
+//
+// Note: Update the strings in AMDGPUMIRFormatter.cpp to match this enum.
 enum class SIAtomicScope {
   NONE,
   SINGLETHREAD,
   WAVEFRONT,
   WORKGROUP,
   AGENT,
-  SYSTEM
+  SYSTEM,
+  NUM_SI_ATOMIC_SCOPES
 };
 
 /// The distinct address spaces supported by the AMDGPU target for
@@ -439,6 +442,7 @@ enum class SIAtomicAddrSpace {
   SCRATCH = 1u << 2,
   GDS = 1u << 3,
   OTHER = 1u << 4,
+  LAST = OTHER,
 
   /// The address spaces that can be accessed by a FLAT instruction.
   FLAT = GLOBAL | LDS | SCRATCH,
@@ -449,7 +453,7 @@ enum class SIAtomicAddrSpace {
   /// All address spaces.
   ALL = GLOBAL | LDS | SCRATCH | GDS | OTHER,
 
-  LLVM_MARK_AS_BITMASK_ENUM(/* LargestFlag = */ ALL)
+  LLVM_MARK_AS_BITMASK_ENUM(/* Highest bit defined = */ LAST)
 };
 
 namespace SendMsg { // Encoding of SIMM16 used in s_sendmsg* insns.
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/memory-legalizer-atomic-fence.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/memory-legalizer-atomic-fence.ll
index 1f01c64de546c..6a14c2c9aae7f 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/memory-legalizer-atomic-fence.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/memory-legalizer-atomic-fence.ll
@@ -536,36 +536,36 @@ entry:
 define amdgpu_kernel void @workgroup_one_as_release() #0 {
   ; GFX6-LABEL: name: workgroup_one_as_release
   ; GFX6: bb.0.entry:
-  ; GFX6-NEXT:   S_WAITCNT_FENCE_soft 5, 3, 15
+  ; GFX6-NEXT:   S_WAITCNT_FENCE_soft .release, .workgroup, .atomic
   ; GFX6-NEXT:   S_ENDPGM 0
   ;
   ; GFX8-LABEL: name: workgroup_one_as_release
   ; GFX8: bb.0.entry:
-  ; GFX8-NEXT:   S_WAITCNT_FENCE_soft 5, 3, 15
+  ; GFX8-NEXT:   S_WAITCNT_FENCE_soft .release, .workgroup, .atomic
   ; GFX8-NEXT:   S_ENDPGM 0
   ;
   ; GFX10WGP-LABEL: name: workgroup_one_as_release
   ; GFX10WGP: bb.0.entry:
   ; GFX10WGP-NEXT:   S_WAITCNT_soft 16240
-  ; GFX10WGP-NEXT:   S_WAITCNT_FENCE_soft 5, 3, 15
+  ; GFX10WGP-NEXT:   S_WAITCNT_FENCE_soft .release, .workgroup, .atomic
   ; GFX10WGP-NEXT:   S_WAITCNT_VSCNT_soft undef $sgpr_null, 0
   ; GFX10WGP-NEXT:   S_ENDPGM 0
   ;
   ; GFX10CU-LABEL: name: workgroup_one_as_release
   ; GFX10CU: bb.0.entry:
-  ; GFX10CU-NEXT:   S_WAITCNT_FENCE_soft 5, 3, 15
+  ; GFX10CU-NEXT:   S_WAITCNT_FENCE_soft .release, .workgroup, .atomic
   ; GFX10CU-NEXT:   S_ENDPGM 0
   ;
   ; GFX11WGP-LABEL: name: workgroup_one_as_release
   ; GFX11WGP: bb.0.entry:
   ; GFX11WGP-NEXT:   S_WAITCNT_soft 1015
-  ; GFX11WGP-NEXT:   S_WAITCNT_FENCE_soft 5, 3, 15
+  ; GFX11WGP-NEXT:   S_WAITCNT_FENCE_soft .release, .workgroup, .atomic
   ; GFX11WGP-NEXT:   S_WAITCNT_VSCNT_soft undef $sgpr_null, 0
   ; GFX11WGP-NEXT:   S_ENDPGM 0
   ;
   ; GFX11CU-LABEL: name: workgroup_one_as_release
   ; GFX11CU: bb.0.entry:
-  ; GFX11CU-NEXT:   S_WAITCNT_FENCE_soft 5, 3, 15
+  ; GFX11CU-NEXT:   S_WAITCNT_FENCE_soft .release, .workgroup, .atomic
   ; GFX11CU-NEXT:   S_ENDPGM 0
 entry:
   fence syncscope("workgroup-one-as") release
@@ -575,38 +575,38 @@ entry:
 define amdgpu_kernel void @workgroup_one_as_acq_rel() #0 {
   ; GFX6-LABEL: name: workgroup_one_as_acq_rel
   ; GFX6: bb.0.entry:
-  ; GFX6-NEXT:   S_WAITCNT_FENCE_soft 5, 3, 15
+  ; GFX6-NEXT:   S_WAITCNT_FENCE_soft .release, .workgroup, .atomic
   ; GFX6-NEXT:   S_ENDPGM 0
   ;
   ; GFX8-LABEL: name: workgroup_one_as_acq_rel
   ; GFX8: bb.0.entry:
-  ; GFX8-NEXT:   S_WAITCNT_FENCE_soft 5, 3, 15
+  ; GFX8-NEXT:   S_WAITCNT_FENCE_soft .release, .workgroup, .atomic
   ; GFX8-NEXT:   S_ENDPGM 0
   ;
   ; GFX10WGP-LABEL: name: workgroup_one_as_acq_rel
   ; GFX10WGP: bb.0.entry:
   ; GFX10WGP-NEXT:   S_WAITCNT_soft 16240
-  ; GFX10WGP-NEXT:   S_WAITCNT_FENCE_soft 5, 3, 15
+  ; GFX10WGP-NEXT:   S_WAITCNT_FENCE_soft .release, .workgroup, .atomic
   ; GFX10WGP-NEXT:   S_WAITCNT_VSCNT_soft undef $sgpr_null, 0
   ; GFX10WGP-NEXT:   BUFFER_GL0_INV implicit $exec
   ; GFX10WGP-NEXT:   S_ENDPGM 0
   ;
   ; GFX10CU-LABEL: name: workgroup_one_as_acq_rel
   ; GFX10CU: bb.0.entry:
-  ; GFX10CU-NEXT:   S_WAITCNT_FENCE_soft 5, 3, 15
+  ; GFX10CU-NEXT:   S_WAITCNT_FENCE_soft .release, .workgroup, .atomic
   ; GFX10CU-NEXT:   S_ENDPGM 0
   ;
   ; GFX11WGP-LABEL: name: workgroup_one_as_acq_rel
   ; GFX11WGP: bb.0.entry:
   ; GFX11WGP-NEXT:   S_WAITCNT_soft 1015
-  ; GFX11WGP-NEXT:   S_WAITCNT_FENCE_soft 5, 3, 15
+  ; GFX11WGP-NEXT:   S_WAITCNT_FENCE_soft .release, .workgroup, .atomic
   ; GFX11WGP-NEXT:   S_WAITCNT_VSCNT_soft undef $sgpr_null, 0
   ; GFX11WGP-NEXT:   BUFFER_GL0_INV implicit $exec
   ; GFX11WGP-NEXT:   S_ENDPGM 0
   ;
   ; GFX11CU-LABEL: name: workgroup_one_as_acq_rel
   ; GFX11CU: bb.0.entry:
-  ; GFX11CU-NEXT:   S_WAITCNT_FENCE_soft 5, 3, 15
+  ; GFX11CU-NEXT:   S_WAITCNT_FENCE_soft .release, .workgroup, .atomic
   ; GFX11CU-NEXT:   S_ENDPGM 0
 entry:
   fence syncscope("workgroup-one-as") acq_rel
@@ -616,38 +616,38 @@ entry:
 define amdgpu_kernel void @workgroup_one_as_seq_cst() #0 {
   ; GFX6-LABEL: name: workgroup_one_as_seq_cst
   ; GFX6: bb.0.entry:
-  ; GFX6-NEXT:   S_WAITCNT_FENCE_soft 5, 3, 15
+  ; GFX6-NEXT:   S_WAITCNT_FENCE_soft .release, .workgroup, .atomic
   ; GFX6-NEXT:   S_ENDPGM 0
   ;
   ; GFX8-LABEL: name: workgroup_one_as_seq_cst
   ; GFX8: bb.0.entry:
-  ; GFX8-NEXT:   S_WAITCNT_FENCE_soft 5, 3, 15
+  ; GFX8-NEXT:   S_WAITCNT_FENCE_soft .release, .workgroup, .atomic
   ; GFX8-NEXT:   S_ENDPGM 0
   ;
   ; GFX10WGP-LABEL: name: workgroup_one_as_seq_cst
   ; GFX10WGP: bb.0.entry:
   ; GFX10WGP-NEXT:   S_WAITCNT_soft 16240
-  ; GFX10WGP-NEXT:   S_WAITCNT_FENCE_soft 5, 3, 15
+  ; GFX10WGP-NEXT:   S_WAITCNT_FENCE_soft .release, .workgroup, .atomic
   ; GFX10WGP-NEXT:   S_WAITCNT_VSCNT_soft undef $sgpr_null, 0
   ; GFX10WGP-NEXT:   BUFFER_GL0_INV implicit $exec
   ; GFX10WGP-NEXT:   S_ENDPGM 0
   ;
   ; GFX10CU-LABEL: name: workgroup_one_as_seq_cst
   ; GFX10CU: bb.0.entry:
-  ; GFX10CU-NEXT:   S_WAITCNT_FENCE_soft 5, 3, 15
+  ; GFX10CU-NEXT:   S_WAITCNT_FENCE_soft .release, .workgroup, .atomic
   ; GFX10CU-NEXT:   S_ENDPGM 0
   ;
   ; GFX11WGP-LABEL: name: workgroup_one_as_seq_cst
   ; GFX11WGP: bb.0.entry:
   ; GFX11WGP-NEXT:   S_WAITCNT_soft 1015
-  ; GFX11WGP-NEXT:   S_WAITCNT_FENCE_soft 5, 3, 15
+  ; GFX11WGP-NEXT:   S_WAITCNT_FENCE_soft .release, .workgroup, .atomic
   ; GFX11WGP-NEXT:   S_WAITCNT_VSCNT_soft undef $sgpr_null, 0
   ; GFX11WGP-NEXT:   BUFFER_GL0_INV implicit $exec
   ; GFX11WGP-NEXT:   S_ENDPGM 0
   ;
   ; GFX11CU-LABEL: name: workgroup_one_as_seq_cst
   ; GFX11CU: bb.0.entry:
-  ; GFX11CU-NEXT:   S_WAITCNT_FENCE_soft 5, 3, 15
+  ; GFX11CU-NEXT:   S_WAITCNT_FENCE_soft .release, .workgroup, .atomic
   ; GFX11CU-NEXT:   S_ENDPGM 0
 entry:
   fence syncscope("workgroup-one-as") seq_cst
@@ -1301,39 +1301,39 @@ define amdgpu_kernel void @workgroup_release() #0 {
   ; GFX6-LABEL: name: workgroup_release
   ; GFX6: bb.0.entry:
   ; GFX6-NEXT:   S_WAITCNT_soft 127
-  ; GFX6-NEXT:   S_WAITCNT_FENCE_soft 5, 3, 15
+  ; GFX6-NEXT:   S_WAITCNT_FENCE_soft .release, .workgroup, .atomic
   ; GFX6-NEXT:   S_ENDPGM 0
   ;
   ; GFX8-LABEL: name: workgroup_release
   ; GFX8: bb.0.entry:
   ; GFX8-NEXT:   S_WAITCNT_soft 127
-  ; GFX8-NEXT:   S_WAITCNT_FENCE_soft 5, 3, 15
+  ; GFX8-NEXT:   S_WAITCNT_FENCE_soft .release, .workgroup, .atomic
   ; GFX8-NEXT:   S_ENDPGM 0
   ;
   ; GFX10WGP-LABEL: name: workgroup_release
   ; GFX10WGP: bb.0.entry:
   ; GFX10WGP-NEXT:   S_WAITCNT_soft 112
-  ; GFX10WGP-NEXT:   S_WAITCNT_FENCE_soft 5, 3, 15
+  ; GFX10WGP-NEXT:   S_WAITCNT_FENCE_soft .release, .workgroup, .atomic
   ; GFX10WGP-NEXT:   S_WAITCNT_VSCNT_soft undef $sgpr_null, 0
   ; GFX10WGP-NEXT:   S_ENDPGM 0
   ;
   ; GFX10CU-LABEL: name: workgroup_release
   ; GFX10CU: bb.0.entry:
   ; GFX10CU-NEXT:   S_WAITCNT_soft 49279
-  ; GFX10CU-NEXT:   S_WAITCNT_FENCE_soft 5, 3, 15
+  ; GFX10CU-NEXT:   S_WAITCNT_FENCE_soft .release, .workgroup, .atomic
   ; GFX10CU-NEXT:   S_ENDPGM 0
   ;
   ; GFX11WGP-LABEL: name: workgroup_release
   ; GFX11WGP: bb.0.entry:
   ; GFX11WGP-NEXT:   S_WAITCNT_soft 7
-  ; GFX11WGP-NEXT:   S_WAITCNT_FENCE_soft 5, 3, 15
+  ; GFX11WGP-NEXT:   S_WAITCNT_FENCE_soft .release, .workgroup, .atomic
   ; GFX11WGP-NEXT:   S_WAITCNT_VSCNT_soft undef $sgpr_null, 0
   ; GFX11WGP-NEXT:   S_ENDPGM 0
   ;
   ; GFX11CU-LABEL: name: workgroup_release
   ; GFX11CU: bb.0.entry:
   ; GFX11CU-NEXT:   S_WAITCNT_soft 64519
-  ; GFX11CU-NEXT:   S_WAITCNT_FENCE_soft 5, 3, 15
+  ; GFX11CU-NEXT:   S_WAITCNT_FENCE_soft .release, .workgroup, .atomic
   ; GFX11CU-NEXT:   S_ENDPGM 0
 entry:
   fence syncscope("workgroup") release
@@ -1344,19 +1344,19 @@ define amdgpu_kernel void @workgroup_acq_rel() #0 {
   ; GFX6-LABEL: name: workgroup_acq_rel
   ; GFX6: bb.0.entry:
   ; GFX6-NEXT:   S_WAITCNT_soft 127
-  ; GFX6-NEXT:   S_WAITCNT_FENCE_soft 5, 3, 15
+  ; GFX6-NEXT:   S_WAITCNT_FENCE_soft .release, .workgroup, .atomic
   ; GFX6-NEXT:   S_ENDPGM 0
   ;
   ; GFX8-LABEL: name: workgroup_acq_rel
   ; GFX8: bb.0.entry:
   ; GFX8-NEXT:   S_WAITCNT_soft 127
-  ; GFX8-NEXT:   S_WAITCNT_FENCE_soft 5, 3, 15
+  ; GFX8-NEXT:   S_WAITCNT_FENCE_soft .release, .workgroup, .atomic
   ; GFX8-NEXT:   S_ENDPGM 0
   ;
   ; GFX10WGP-LABEL: name: workgroup_acq_rel
   ; GFX10WGP: bb.0.entry:
   ; GFX10WGP-NEXT:   S_WAITCNT_soft 112
-  ; GFX10WGP-NEXT:   S_WAITCNT_FENCE_soft 5, 3, 15
+  ; GFX10WGP-NEXT:   S_WAITCNT_FENCE_soft .release, .workgroup, .atomic
   ; GFX10WGP-NEXT:   S_WAITCNT_VSCNT_soft undef $sgpr_null, 0
   ; GFX10WGP-NEXT:   BUFFER_GL0_INV implicit $exec
   ; GFX10WGP-NEXT:   S_ENDPGM 0
@@ -1364,13 +1364,13 @@ define amdgpu_kernel void @workgroup_acq_rel() #0 {
   ; GFX10CU-LABEL: name: workgroup_acq_rel
   ; GFX10CU: bb.0.entry:
   ; GFX10CU-NEXT:   S_WAITCNT_soft 49279
-  ; GFX10CU-NEXT:   S_WAITCNT_FENCE_soft 5, 3, 15
+  ; GFX10CU-NEXT:   S_WAITCNT_FENCE_soft .release, .workgroup, .atomic
   ; GFX10CU-NEXT:   S_ENDPGM 0
   ;
   ; GFX11WGP-LABEL: name: workgroup_acq_rel
   ; GFX11WGP: bb.0.entry:
   ; GFX11WGP-NEXT:   S_WAITCNT_soft 7
-  ; GFX11WGP-NEXT:   S_WAITCNT_FENCE_soft 5, 3, 15
+  ; GFX11WGP-NEXT:   S_WAITCNT_FENCE_soft .release, .workgroup, .atomic
   ; GFX11WGP-NEXT:   S_WAITCNT_VSCNT_soft undef $sgpr_null, 0
   ; GFX11WGP-NEXT:   BUFFER_GL0_INV implicit $exec
   ; GFX11WGP-NEXT:   S_ENDPGM 0
@@ -1378,7 +1378,7 @@ define amdgpu_kernel void @workgroup_acq_rel() #0 {
   ; GFX11CU-LABEL: name: workgroup_acq_rel
   ; GFX11CU: bb.0.entry:
   ; GFX11CU-NEXT:   S_WAITCNT_soft 64519
-  ; GFX11CU-NEXT:   S_WAITCNT_FENCE_soft 5, 3, 15
+  ; GFX11CU-NEXT:   S_WAITCNT_FENCE_soft .release, .workgroup, .atomic
   ; GFX11CU-NEXT:   S_ENDPGM 0
 entry:
   fence syncscope("workgroup") acq_rel
@@ -1389,19 +1389,19 @@ define amdgpu_kernel void @workgroup_seq_cst() #0 {
   ; GFX6-LABEL: name: workgroup_seq_cst
   ; GFX6: bb.0.entry:
   ; GFX6-NEXT:   S_WAITCNT_soft 127
-  ; GFX6-NEXT:   S_WAITCNT_FENCE_soft 5, 3, 15
+  ; GFX6-NEXT:   S_WAITCNT_FENCE_soft .release, .workgroup, .atomic
   ; GFX6-NEXT:   S_ENDPGM 0
   ;
   ; GFX8-LABEL: name: workgroup_seq_cst
   ; GFX8: bb.0.entry:
   ; GFX8-NEXT:   S_WAITCNT_soft 127
-  ; GFX8-NEXT:   S_WAITCNT_FENCE_soft 5, 3, 15
+  ; GFX8-NEXT:   S_WAITCNT_FENCE_soft .release, .workgroup, .atomic
   ; GFX8-NEXT:   S_ENDPGM 0
   ;
   ; GFX10WGP-LABEL: name: workgroup_seq_cst
   ; GFX10WGP: bb.0.entry:
   ; GFX10WGP-NEXT:   S_WAITCNT_soft 112
-  ; GFX10WGP-NEXT:   S_WAITCNT_FENCE_soft 5, 3, 15
+  ; GFX10WGP-NEXT:   S_WAITCNT_FENCE_soft .release, .workgroup, .atomic
   ; GFX10WGP-NEXT:   S_WAITCNT_VSCNT_soft undef $sgpr_null, 0
   ; GFX10WGP-NEXT:   BUFFER_GL0_INV implicit $exec
   ; GFX10WGP-NEXT:   S_ENDPGM 0
@@ -1409,13 +1409,13 @@ define amdgpu_kernel void @workgroup_seq_cst() #0 {
   ; GFX10CU-LABEL: name: workgroup_seq_cst
   ; GFX10CU: bb.0.entry:
   ; GFX10CU-NEXT:   S_WAITCNT_soft 49279
-  ; GFX10CU-NEXT:   S_WAITCNT_FENCE_soft 5, 3, 15
+  ; GFX10CU-NEXT:   S_WAITCNT_FENCE_soft .release, .workgroup, .atomic
   ; GFX10CU-NEXT:   S_ENDPGM 0
   ;
   ; GFX11WGP-LABEL: name: workgroup_seq_cst
   ; GFX11WGP: bb.0.entry:
   ; GFX11WGP-NEXT:   S_WAITCNT_soft 7
-  ; GFX11WGP-NEXT:   S_WAITCNT_FENCE_soft 5, 3, 15
+  ; GFX11WGP-NEXT:   S_WAITCNT_FENCE_soft .release, .workgroup, .atomic
   ; GFX11WGP-NEXT:   S_WAITCN...
[truncated]

@llvmbot
Copy link
Member

llvmbot commented Jul 24, 2025

@llvm/pr-subscribers-llvm-globalisel

Author: Sameer Sahasrabuddhe (ssahasra)

Changes

Patch is 34.95 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/150391.diff

7 Files Affected:

  • (modified) llvm/lib/CodeGen/MIRParser/MIParser.cpp (+10-15)
  • (modified) llvm/lib/Target/AMDGPU/AMDGPUMIRFormatter.cpp (+161)
  • (modified) llvm/lib/Target/AMDGPU/SIDefines.h (+6-2)
  • (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/memory-legalizer-atomic-fence.ll (+36-36)
  • (added) llvm/test/CodeGen/AMDGPU/fence-parameters.mir (+29)
  • (modified) llvm/test/CodeGen/AMDGPU/insert-waitcnts-fence-soft.mir (+9-9)
  • (modified) llvm/test/CodeGen/AMDGPU/memory-legalizer-local.mir (+12-12)
diff --git a/llvm/lib/CodeGen/MIRParser/MIParser.cpp b/llvm/lib/CodeGen/MIRParser/MIParser.cpp
index 3a364d5ff0d20..c8ad286a87a35 100644
--- a/llvm/lib/CodeGen/MIRParser/MIParser.cpp
+++ b/llvm/lib/CodeGen/MIRParser/MIParser.cpp
@@ -1850,28 +1850,25 @@ bool MIParser::parseImmediateOperand(MachineOperand &Dest) {
   return false;
 }
 
+// The target mnemonic is an expression of the form:
+//
+//     Dot(IntegerLiteral|Identifier|Dot)+
+//
+// We could be stricter like not terminating in a dot, but that's note important
+// where this is being used.
 bool MIParser::parseTargetImmMnemonic(const unsigned OpCode,
                                       const unsigned OpIdx,
                                       MachineOperand &Dest,
                                       const MIRFormatter &MF) {
   assert(Token.is(MIToken::dot));
   auto Loc = Token.location(); // record start position
-  size_t Len = 1;              // for "."
-  lex();
-
-  // Handle the case that mnemonic starts with number.
-  if (Token.is(MIToken::IntegerLiteral)) {
+  size_t Len = 0;
+  while (Token.is(MIToken::IntegerLiteral) || Token.is(MIToken::dot) ||
+         Token.is(MIToken::Identifier)) {
     Len += Token.range().size();
     lex();
   }
-
-  StringRef Src;
-  if (Token.is(MIToken::comma))
-    Src = StringRef(Loc, Len);
-  else {
-    assert(Token.is(MIToken::Identifier));
-    Src = StringRef(Loc, Len + Token.stringValue().size());
-  }
+  StringRef Src(Loc, Len);
   int64_t Val;
   if (MF.parseImmMnemonic(OpCode, OpIdx, Src, Val,
                           [this](StringRef::iterator Loc, const Twine &Msg)
@@ -1879,8 +1876,6 @@ bool MIParser::parseTargetImmMnemonic(const unsigned OpCode,
     return true;
 
   Dest = MachineOperand::CreateImm(Val);
-  if (!Token.is(MIToken::comma))
-    lex();
   return false;
 }
 
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUMIRFormatter.cpp b/llvm/lib/Target/AMDGPU/AMDGPUMIRFormatter.cpp
index 75e3d8c426e73..f318d6ffc1bae 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUMIRFormatter.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUMIRFormatter.cpp
@@ -12,10 +12,135 @@
 //===----------------------------------------------------------------------===//
 
 #include "AMDGPUMIRFormatter.h"
+#include "SIDefines.h"
 #include "SIMachineFunctionInfo.h"
 
 using namespace llvm;
 
+bool parseAtomicOrdering(StringRef Src, unsigned &Order) {
+  Src.consume_front(".");
+  for (unsigned I = 0; I <= (unsigned)AtomicOrdering::LAST; ++I) {
+    if (Src == toIRString((AtomicOrdering)I)) {
+      Order = I;
+      return true;
+    }
+  }
+  Order = ~0u;
+  return false;
+}
+
+static const char *fmtScope(unsigned Scope) {
+  static const char *Names[] = {"none",      "singlethread", "wavefront",
+                                "workgroup", "agent",        "system"};
+  return Names[Scope];
+}
+
+bool parseAtomicScope(StringRef Src, unsigned &Scope) {
+  Src.consume_front(".");
+  for (unsigned I = 0;
+       I != (unsigned)AMDGPU::SIAtomicScope::NUM_SI_ATOMIC_SCOPES; ++I) {
+    if (Src == fmtScope(I)) {
+      Scope = I;
+      return true;
+    }
+  }
+  Scope = ~0u;
+  return false;
+}
+
+static const char *fmtAddrSpace(unsigned Space) {
+  static const char *Names[] = {"none",    "global", "lds",
+                                "scratch", "gds",    "other"};
+  return Names[Space];
+}
+
+bool parseOneAddrSpace(StringRef Src, unsigned &AddrSpace) {
+  if (Src == "none") {
+    AddrSpace = (unsigned)AMDGPU::SIAtomicAddrSpace::NONE;
+    return true;
+  }
+  if (Src == "flat") {
+    AddrSpace = (unsigned)AMDGPU::SIAtomicAddrSpace::FLAT;
+    return true;
+  }
+  if (Src == "atomic") {
+    AddrSpace = (unsigned)AMDGPU::SIAtomicAddrSpace::ATOMIC;
+    return true;
+  }
+  if (Src == "all") {
+    AddrSpace = (unsigned)AMDGPU::SIAtomicAddrSpace::ALL;
+    return true;
+  }
+  for (unsigned I = 1, A = 1; A <= (unsigned)AMDGPU::SIAtomicAddrSpace::LAST;
+       A <<= 1, ++I) {
+    if (Src == fmtAddrSpace(I)) {
+      AddrSpace = A;
+      return true;
+    }
+  }
+  AddrSpace = ~0u;
+  return false;
+}
+
+bool parseAddrSpace(StringRef Src, unsigned &AddrSpace) {
+  Src = Src.trim();
+  Src.consume_front(".");
+  while (!Src.empty()) {
+    auto [First, Rest] = Src.split('.');
+    unsigned OneSpace;
+    if (!parseOneAddrSpace(First, OneSpace))
+      return false;
+    AddrSpace |= OneSpace;
+    Src = Rest;
+  }
+  return true;
+}
+
+static void fmtAddrSpace(raw_ostream &OS, int64_t Imm) {
+  OS << '.';
+  if (Imm == (unsigned)AMDGPU::SIAtomicAddrSpace::NONE) {
+    OS << "none";
+    return;
+  }
+  if (Imm == (unsigned)AMDGPU::SIAtomicAddrSpace::FLAT) {
+    OS << "flat";
+    return;
+  }
+  if (Imm == (unsigned)AMDGPU::SIAtomicAddrSpace::ATOMIC) {
+    OS << "atomic";
+    return;
+  }
+  if (Imm == (unsigned)AMDGPU::SIAtomicAddrSpace::ALL) {
+    OS << "all";
+    return;
+  }
+
+  ListSeparator LS{"."};
+  auto AddrSpace = (AMDGPU::SIAtomicAddrSpace)Imm;
+  const auto LAST = (unsigned)AMDGPU::SIAtomicAddrSpace::LAST;
+
+  for (unsigned A = 1, I = 1; A <= LAST; A <<= 1, ++I) {
+    if (any(AddrSpace & (AMDGPU::SIAtomicAddrSpace)A))
+      OS << LS << StringRef(fmtAddrSpace(I));
+  }
+}
+
+static void printFenceOperand(raw_ostream &OS, const MachineInstr &MI,
+                              std::optional<unsigned int> OpIdx, int64_t Imm) {
+#define GET_IDX(Name)                                                          \
+  AMDGPU::getNamedOperandIdx(AMDGPU::S_WAITCNT_FENCE_soft, AMDGPU::OpName::Name)
+  if (OpIdx == GET_IDX(Ordering)) {
+    assert(Imm <= (unsigned)AtomicOrdering::LAST);
+    OS << '.' << StringRef(toIRString((AtomicOrdering)Imm));
+  } else if (OpIdx == GET_IDX(Scope)) {
+    assert(Imm < (unsigned)AMDGPU::SIAtomicScope::NUM_SI_ATOMIC_SCOPES);
+    OS << '.' << StringRef(fmtScope(Imm));
+  } else if (OpIdx == GET_IDX(AddrSpace)) {
+    fmtAddrSpace(OS, Imm);
+  }
+#undef GET_IDX
+}
+
 void AMDGPUMIRFormatter::printImm(raw_ostream &OS, const MachineInstr &MI,
                       std::optional<unsigned int> OpIdx, int64_t Imm) const {
 
@@ -24,12 +149,46 @@ void AMDGPUMIRFormatter::printImm(raw_ostream &OS, const MachineInstr &MI,
     assert(OpIdx == 0);
     printSDelayAluImm(Imm, OS);
     break;
+  case AMDGPU::S_WAITCNT_FENCE_soft:
+    printFenceOperand(OS, MI, OpIdx, Imm);
+    break;
   default:
     MIRFormatter::printImm(OS, MI, OpIdx, Imm);
     break;
   }
 }
 
+static bool
+parseFenceParameter(const unsigned int OpIdx, int64_t &Imm,
+                    llvm::StringRef &Src,
+                    llvm::MIRFormatter::ErrorCallbackType &ErrorCallback) {
+#define GET_IDX(Name)                                                          \
+  AMDGPU::getNamedOperandIdx(AMDGPU::S_WAITCNT_FENCE_soft, AMDGPU::OpName::Name)
+  if (OpIdx == (unsigned)GET_IDX(Ordering)) {
+    unsigned Order = 0;
+    if (!parseAtomicOrdering(Src, Order))
+      return ErrorCallback(Src.begin(), "Expected atomic ordering");
+    Imm = Order;
+    return false;
+  }
+  if (OpIdx == (unsigned)GET_IDX(Scope)) {
+    unsigned Scope = 0;
+    if (!parseAtomicScope(Src, Scope))
+      return ErrorCallback(Src.begin(), "Expected atomic scope");
+    Imm = Scope;
+    return false;
+  }
+  if (OpIdx == (unsigned)GET_IDX(AddrSpace)) {
+    unsigned AddrSpace = 0;
+    if (!parseAddrSpace(Src, AddrSpace))
+      return ErrorCallback(Src.begin(), "Expected address space");
+    Imm = AddrSpace;
+    return false;
+  }
+  return true;
+#undef GET_IDX
+}
+
 /// Implement target specific parsing of immediate mnemonics. The mnemonic is
 /// a string with a leading dot.
 bool AMDGPUMIRFormatter::parseImmMnemonic(const unsigned OpCode,
@@ -41,6 +200,8 @@ bool AMDGPUMIRFormatter::parseImmMnemonic(const unsigned OpCode,
   switch (OpCode) {
   case AMDGPU::S_DELAY_ALU:
     return parseSDelayAluImmMnemonic(OpIdx, Imm, Src, ErrorCallback);
+  case AMDGPU::S_WAITCNT_FENCE_soft:
+    return parseFenceParameter(OpIdx, Imm, Src, ErrorCallback);
   default:
     break;
   }
diff --git a/llvm/lib/Target/AMDGPU/SIDefines.h b/llvm/lib/Target/AMDGPU/SIDefines.h
index 9d30951cac1a3..d7c2aff1d3411 100644
--- a/llvm/lib/Target/AMDGPU/SIDefines.h
+++ b/llvm/lib/Target/AMDGPU/SIDefines.h
@@ -421,13 +421,16 @@ enum CPol {
 } // namespace CPol
 
 /// The atomic synchronization scopes supported by the AMDGPU target.
+//
+// Note: Update the strings in AMDGPUMIRFormatter.cpp to match this enum.
 enum class SIAtomicScope {
   NONE,
   SINGLETHREAD,
   WAVEFRONT,
   WORKGROUP,
   AGENT,
-  SYSTEM
+  SYSTEM,
+  NUM_SI_ATOMIC_SCOPES
 };
 
 /// The distinct address spaces supported by the AMDGPU target for
@@ -439,6 +442,7 @@ enum class SIAtomicAddrSpace {
   SCRATCH = 1u << 2,
   GDS = 1u << 3,
   OTHER = 1u << 4,
+  LAST = OTHER,
 
   /// The address spaces that can be accessed by a FLAT instruction.
   FLAT = GLOBAL | LDS | SCRATCH,
@@ -449,7 +453,7 @@ enum class SIAtomicAddrSpace {
   /// All address spaces.
   ALL = GLOBAL | LDS | SCRATCH | GDS | OTHER,
 
-  LLVM_MARK_AS_BITMASK_ENUM(/* LargestFlag = */ ALL)
+  LLVM_MARK_AS_BITMASK_ENUM(/* Highest bit defined = */ LAST)
 };
 
 namespace SendMsg { // Encoding of SIMM16 used in s_sendmsg* insns.
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/memory-legalizer-atomic-fence.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/memory-legalizer-atomic-fence.ll
index 1f01c64de546c..6a14c2c9aae7f 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/memory-legalizer-atomic-fence.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/memory-legalizer-atomic-fence.ll
@@ -536,36 +536,36 @@ entry:
 define amdgpu_kernel void @workgroup_one_as_release() #0 {
   ; GFX6-LABEL: name: workgroup_one_as_release
   ; GFX6: bb.0.entry:
-  ; GFX6-NEXT:   S_WAITCNT_FENCE_soft 5, 3, 15
+  ; GFX6-NEXT:   S_WAITCNT_FENCE_soft .release, .workgroup, .atomic
   ; GFX6-NEXT:   S_ENDPGM 0
   ;
   ; GFX8-LABEL: name: workgroup_one_as_release
   ; GFX8: bb.0.entry:
-  ; GFX8-NEXT:   S_WAITCNT_FENCE_soft 5, 3, 15
+  ; GFX8-NEXT:   S_WAITCNT_FENCE_soft .release, .workgroup, .atomic
   ; GFX8-NEXT:   S_ENDPGM 0
   ;
   ; GFX10WGP-LABEL: name: workgroup_one_as_release
   ; GFX10WGP: bb.0.entry:
   ; GFX10WGP-NEXT:   S_WAITCNT_soft 16240
-  ; GFX10WGP-NEXT:   S_WAITCNT_FENCE_soft 5, 3, 15
+  ; GFX10WGP-NEXT:   S_WAITCNT_FENCE_soft .release, .workgroup, .atomic
   ; GFX10WGP-NEXT:   S_WAITCNT_VSCNT_soft undef $sgpr_null, 0
   ; GFX10WGP-NEXT:   S_ENDPGM 0
   ;
   ; GFX10CU-LABEL: name: workgroup_one_as_release
   ; GFX10CU: bb.0.entry:
-  ; GFX10CU-NEXT:   S_WAITCNT_FENCE_soft 5, 3, 15
+  ; GFX10CU-NEXT:   S_WAITCNT_FENCE_soft .release, .workgroup, .atomic
   ; GFX10CU-NEXT:   S_ENDPGM 0
   ;
   ; GFX11WGP-LABEL: name: workgroup_one_as_release
   ; GFX11WGP: bb.0.entry:
   ; GFX11WGP-NEXT:   S_WAITCNT_soft 1015
-  ; GFX11WGP-NEXT:   S_WAITCNT_FENCE_soft 5, 3, 15
+  ; GFX11WGP-NEXT:   S_WAITCNT_FENCE_soft .release, .workgroup, .atomic
   ; GFX11WGP-NEXT:   S_WAITCNT_VSCNT_soft undef $sgpr_null, 0
   ; GFX11WGP-NEXT:   S_ENDPGM 0
   ;
   ; GFX11CU-LABEL: name: workgroup_one_as_release
   ; GFX11CU: bb.0.entry:
-  ; GFX11CU-NEXT:   S_WAITCNT_FENCE_soft 5, 3, 15
+  ; GFX11CU-NEXT:   S_WAITCNT_FENCE_soft .release, .workgroup, .atomic
   ; GFX11CU-NEXT:   S_ENDPGM 0
 entry:
   fence syncscope("workgroup-one-as") release
@@ -575,38 +575,38 @@ entry:
 define amdgpu_kernel void @workgroup_one_as_acq_rel() #0 {
   ; GFX6-LABEL: name: workgroup_one_as_acq_rel
   ; GFX6: bb.0.entry:
-  ; GFX6-NEXT:   S_WAITCNT_FENCE_soft 5, 3, 15
+  ; GFX6-NEXT:   S_WAITCNT_FENCE_soft .release, .workgroup, .atomic
   ; GFX6-NEXT:   S_ENDPGM 0
   ;
   ; GFX8-LABEL: name: workgroup_one_as_acq_rel
   ; GFX8: bb.0.entry:
-  ; GFX8-NEXT:   S_WAITCNT_FENCE_soft 5, 3, 15
+  ; GFX8-NEXT:   S_WAITCNT_FENCE_soft .release, .workgroup, .atomic
   ; GFX8-NEXT:   S_ENDPGM 0
   ;
   ; GFX10WGP-LABEL: name: workgroup_one_as_acq_rel
   ; GFX10WGP: bb.0.entry:
   ; GFX10WGP-NEXT:   S_WAITCNT_soft 16240
-  ; GFX10WGP-NEXT:   S_WAITCNT_FENCE_soft 5, 3, 15
+  ; GFX10WGP-NEXT:   S_WAITCNT_FENCE_soft .release, .workgroup, .atomic
   ; GFX10WGP-NEXT:   S_WAITCNT_VSCNT_soft undef $sgpr_null, 0
   ; GFX10WGP-NEXT:   BUFFER_GL0_INV implicit $exec
   ; GFX10WGP-NEXT:   S_ENDPGM 0
   ;
   ; GFX10CU-LABEL: name: workgroup_one_as_acq_rel
   ; GFX10CU: bb.0.entry:
-  ; GFX10CU-NEXT:   S_WAITCNT_FENCE_soft 5, 3, 15
+  ; GFX10CU-NEXT:   S_WAITCNT_FENCE_soft .release, .workgroup, .atomic
   ; GFX10CU-NEXT:   S_ENDPGM 0
   ;
   ; GFX11WGP-LABEL: name: workgroup_one_as_acq_rel
   ; GFX11WGP: bb.0.entry:
   ; GFX11WGP-NEXT:   S_WAITCNT_soft 1015
-  ; GFX11WGP-NEXT:   S_WAITCNT_FENCE_soft 5, 3, 15
+  ; GFX11WGP-NEXT:   S_WAITCNT_FENCE_soft .release, .workgroup, .atomic
   ; GFX11WGP-NEXT:   S_WAITCNT_VSCNT_soft undef $sgpr_null, 0
   ; GFX11WGP-NEXT:   BUFFER_GL0_INV implicit $exec
   ; GFX11WGP-NEXT:   S_ENDPGM 0
   ;
   ; GFX11CU-LABEL: name: workgroup_one_as_acq_rel
   ; GFX11CU: bb.0.entry:
-  ; GFX11CU-NEXT:   S_WAITCNT_FENCE_soft 5, 3, 15
+  ; GFX11CU-NEXT:   S_WAITCNT_FENCE_soft .release, .workgroup, .atomic
   ; GFX11CU-NEXT:   S_ENDPGM 0
 entry:
   fence syncscope("workgroup-one-as") acq_rel
@@ -616,38 +616,38 @@ entry:
 define amdgpu_kernel void @workgroup_one_as_seq_cst() #0 {
   ; GFX6-LABEL: name: workgroup_one_as_seq_cst
   ; GFX6: bb.0.entry:
-  ; GFX6-NEXT:   S_WAITCNT_FENCE_soft 5, 3, 15
+  ; GFX6-NEXT:   S_WAITCNT_FENCE_soft .release, .workgroup, .atomic
   ; GFX6-NEXT:   S_ENDPGM 0
   ;
   ; GFX8-LABEL: name: workgroup_one_as_seq_cst
   ; GFX8: bb.0.entry:
-  ; GFX8-NEXT:   S_WAITCNT_FENCE_soft 5, 3, 15
+  ; GFX8-NEXT:   S_WAITCNT_FENCE_soft .release, .workgroup, .atomic
   ; GFX8-NEXT:   S_ENDPGM 0
   ;
   ; GFX10WGP-LABEL: name: workgroup_one_as_seq_cst
   ; GFX10WGP: bb.0.entry:
   ; GFX10WGP-NEXT:   S_WAITCNT_soft 16240
-  ; GFX10WGP-NEXT:   S_WAITCNT_FENCE_soft 5, 3, 15
+  ; GFX10WGP-NEXT:   S_WAITCNT_FENCE_soft .release, .workgroup, .atomic
   ; GFX10WGP-NEXT:   S_WAITCNT_VSCNT_soft undef $sgpr_null, 0
   ; GFX10WGP-NEXT:   BUFFER_GL0_INV implicit $exec
   ; GFX10WGP-NEXT:   S_ENDPGM 0
   ;
   ; GFX10CU-LABEL: name: workgroup_one_as_seq_cst
   ; GFX10CU: bb.0.entry:
-  ; GFX10CU-NEXT:   S_WAITCNT_FENCE_soft 5, 3, 15
+  ; GFX10CU-NEXT:   S_WAITCNT_FENCE_soft .release, .workgroup, .atomic
   ; GFX10CU-NEXT:   S_ENDPGM 0
   ;
   ; GFX11WGP-LABEL: name: workgroup_one_as_seq_cst
   ; GFX11WGP: bb.0.entry:
   ; GFX11WGP-NEXT:   S_WAITCNT_soft 1015
-  ; GFX11WGP-NEXT:   S_WAITCNT_FENCE_soft 5, 3, 15
+  ; GFX11WGP-NEXT:   S_WAITCNT_FENCE_soft .release, .workgroup, .atomic
   ; GFX11WGP-NEXT:   S_WAITCNT_VSCNT_soft undef $sgpr_null, 0
   ; GFX11WGP-NEXT:   BUFFER_GL0_INV implicit $exec
   ; GFX11WGP-NEXT:   S_ENDPGM 0
   ;
   ; GFX11CU-LABEL: name: workgroup_one_as_seq_cst
   ; GFX11CU: bb.0.entry:
-  ; GFX11CU-NEXT:   S_WAITCNT_FENCE_soft 5, 3, 15
+  ; GFX11CU-NEXT:   S_WAITCNT_FENCE_soft .release, .workgroup, .atomic
   ; GFX11CU-NEXT:   S_ENDPGM 0
 entry:
   fence syncscope("workgroup-one-as") seq_cst
@@ -1301,39 +1301,39 @@ define amdgpu_kernel void @workgroup_release() #0 {
   ; GFX6-LABEL: name: workgroup_release
   ; GFX6: bb.0.entry:
   ; GFX6-NEXT:   S_WAITCNT_soft 127
-  ; GFX6-NEXT:   S_WAITCNT_FENCE_soft 5, 3, 15
+  ; GFX6-NEXT:   S_WAITCNT_FENCE_soft .release, .workgroup, .atomic
   ; GFX6-NEXT:   S_ENDPGM 0
   ;
   ; GFX8-LABEL: name: workgroup_release
   ; GFX8: bb.0.entry:
   ; GFX8-NEXT:   S_WAITCNT_soft 127
-  ; GFX8-NEXT:   S_WAITCNT_FENCE_soft 5, 3, 15
+  ; GFX8-NEXT:   S_WAITCNT_FENCE_soft .release, .workgroup, .atomic
   ; GFX8-NEXT:   S_ENDPGM 0
   ;
   ; GFX10WGP-LABEL: name: workgroup_release
   ; GFX10WGP: bb.0.entry:
   ; GFX10WGP-NEXT:   S_WAITCNT_soft 112
-  ; GFX10WGP-NEXT:   S_WAITCNT_FENCE_soft 5, 3, 15
+  ; GFX10WGP-NEXT:   S_WAITCNT_FENCE_soft .release, .workgroup, .atomic
   ; GFX10WGP-NEXT:   S_WAITCNT_VSCNT_soft undef $sgpr_null, 0
   ; GFX10WGP-NEXT:   S_ENDPGM 0
   ;
   ; GFX10CU-LABEL: name: workgroup_release
   ; GFX10CU: bb.0.entry:
   ; GFX10CU-NEXT:   S_WAITCNT_soft 49279
-  ; GFX10CU-NEXT:   S_WAITCNT_FENCE_soft 5, 3, 15
+  ; GFX10CU-NEXT:   S_WAITCNT_FENCE_soft .release, .workgroup, .atomic
   ; GFX10CU-NEXT:   S_ENDPGM 0
   ;
   ; GFX11WGP-LABEL: name: workgroup_release
   ; GFX11WGP: bb.0.entry:
   ; GFX11WGP-NEXT:   S_WAITCNT_soft 7
-  ; GFX11WGP-NEXT:   S_WAITCNT_FENCE_soft 5, 3, 15
+  ; GFX11WGP-NEXT:   S_WAITCNT_FENCE_soft .release, .workgroup, .atomic
   ; GFX11WGP-NEXT:   S_WAITCNT_VSCNT_soft undef $sgpr_null, 0
   ; GFX11WGP-NEXT:   S_ENDPGM 0
   ;
   ; GFX11CU-LABEL: name: workgroup_release
   ; GFX11CU: bb.0.entry:
   ; GFX11CU-NEXT:   S_WAITCNT_soft 64519
-  ; GFX11CU-NEXT:   S_WAITCNT_FENCE_soft 5, 3, 15
+  ; GFX11CU-NEXT:   S_WAITCNT_FENCE_soft .release, .workgroup, .atomic
   ; GFX11CU-NEXT:   S_ENDPGM 0
 entry:
   fence syncscope("workgroup") release
@@ -1344,19 +1344,19 @@ define amdgpu_kernel void @workgroup_acq_rel() #0 {
   ; GFX6-LABEL: name: workgroup_acq_rel
   ; GFX6: bb.0.entry:
   ; GFX6-NEXT:   S_WAITCNT_soft 127
-  ; GFX6-NEXT:   S_WAITCNT_FENCE_soft 5, 3, 15
+  ; GFX6-NEXT:   S_WAITCNT_FENCE_soft .release, .workgroup, .atomic
   ; GFX6-NEXT:   S_ENDPGM 0
   ;
   ; GFX8-LABEL: name: workgroup_acq_rel
   ; GFX8: bb.0.entry:
   ; GFX8-NEXT:   S_WAITCNT_soft 127
-  ; GFX8-NEXT:   S_WAITCNT_FENCE_soft 5, 3, 15
+  ; GFX8-NEXT:   S_WAITCNT_FENCE_soft .release, .workgroup, .atomic
   ; GFX8-NEXT:   S_ENDPGM 0
   ;
   ; GFX10WGP-LABEL: name: workgroup_acq_rel
   ; GFX10WGP: bb.0.entry:
   ; GFX10WGP-NEXT:   S_WAITCNT_soft 112
-  ; GFX10WGP-NEXT:   S_WAITCNT_FENCE_soft 5, 3, 15
+  ; GFX10WGP-NEXT:   S_WAITCNT_FENCE_soft .release, .workgroup, .atomic
   ; GFX10WGP-NEXT:   S_WAITCNT_VSCNT_soft undef $sgpr_null, 0
   ; GFX10WGP-NEXT:   BUFFER_GL0_INV implicit $exec
   ; GFX10WGP-NEXT:   S_ENDPGM 0
@@ -1364,13 +1364,13 @@ define amdgpu_kernel void @workgroup_acq_rel() #0 {
   ; GFX10CU-LABEL: name: workgroup_acq_rel
   ; GFX10CU: bb.0.entry:
   ; GFX10CU-NEXT:   S_WAITCNT_soft 49279
-  ; GFX10CU-NEXT:   S_WAITCNT_FENCE_soft 5, 3, 15
+  ; GFX10CU-NEXT:   S_WAITCNT_FENCE_soft .release, .workgroup, .atomic
   ; GFX10CU-NEXT:   S_ENDPGM 0
   ;
   ; GFX11WGP-LABEL: name: workgroup_acq_rel
   ; GFX11WGP: bb.0.entry:
   ; GFX11WGP-NEXT:   S_WAITCNT_soft 7
-  ; GFX11WGP-NEXT:   S_WAITCNT_FENCE_soft 5, 3, 15
+  ; GFX11WGP-NEXT:   S_WAITCNT_FENCE_soft .release, .workgroup, .atomic
   ; GFX11WGP-NEXT:   S_WAITCNT_VSCNT_soft undef $sgpr_null, 0
   ; GFX11WGP-NEXT:   BUFFER_GL0_INV implicit $exec
   ; GFX11WGP-NEXT:   S_ENDPGM 0
@@ -1378,7 +1378,7 @@ define amdgpu_kernel void @workgroup_acq_rel() #0 {
   ; GFX11CU-LABEL: name: workgroup_acq_rel
   ; GFX11CU: bb.0.entry:
   ; GFX11CU-NEXT:   S_WAITCNT_soft 64519
-  ; GFX11CU-NEXT:   S_WAITCNT_FENCE_soft 5, 3, 15
+  ; GFX11CU-NEXT:   S_WAITCNT_FENCE_soft .release, .workgroup, .atomic
   ; GFX11CU-NEXT:   S_ENDPGM 0
 entry:
   fence syncscope("workgroup") acq_rel
@@ -1389,19 +1389,19 @@ define amdgpu_kernel void @workgroup_seq_cst() #0 {
   ; GFX6-LABEL: name: workgroup_seq_cst
   ; GFX6: bb.0.entry:
   ; GFX6-NEXT:   S_WAITCNT_soft 127
-  ; GFX6-NEXT:   S_WAITCNT_FENCE_soft 5, 3, 15
+  ; GFX6-NEXT:   S_WAITCNT_FENCE_soft .release, .workgroup, .atomic
   ; GFX6-NEXT:   S_ENDPGM 0
   ;
   ; GFX8-LABEL: name: workgroup_seq_cst
   ; GFX8: bb.0.entry:
   ; GFX8-NEXT:   S_WAITCNT_soft 127
-  ; GFX8-NEXT:   S_WAITCNT_FENCE_soft 5, 3, 15
+  ; GFX8-NEXT:   S_WAITCNT_FENCE_soft .release, .workgroup, .atomic
   ; GFX8-NEXT:   S_ENDPGM 0
   ;
   ; GFX10WGP-LABEL: name: workgroup_seq_cst
   ; GFX10WGP: bb.0.entry:
   ; GFX10WGP-NEXT:   S_WAITCNT_soft 112
-  ; GFX10WGP-NEXT:   S_WAITCNT_FENCE_soft 5, 3, 15
+  ; GFX10WGP-NEXT:   S_WAITCNT_FENCE_soft .release, .workgroup, .atomic
   ; GFX10WGP-NEXT:   S_WAITCNT_VSCNT_soft undef $sgpr_null, 0
   ; GFX10WGP-NEXT:   BUFFER_GL0_INV implicit $exec
   ; GFX10WGP-NEXT:   S_ENDPGM 0
@@ -1409,13 +1409,13 @@ define amdgpu_kernel void @workgroup_seq_cst() #0 {
   ; GFX10CU-LABEL: name: workgroup_seq_cst
   ; GFX10CU: bb.0.entry:
   ; GFX10CU-NEXT:   S_WAITCNT_soft 49279
-  ; GFX10CU-NEXT:   S_WAITCNT_FENCE_soft 5, 3, 15
+  ; GFX10CU-NEXT:   S_WAITCNT_FENCE_soft .release, .workgroup, .atomic
   ; GFX10CU-NEXT:   S_ENDPGM 0
   ;
   ; GFX11WGP-LABEL: name: workgroup_seq_cst
   ; GFX11WGP: bb.0.entry:
   ; GFX11WGP-NEXT:   S_WAITCNT_soft 7
-  ; GFX11WGP-NEXT:   S_WAITCNT_FENCE_soft 5, 3, 15
+  ; GFX11WGP-NEXT:   S_WAITCNT_FENCE_soft .release, .workgroup, .atomic
   ; GFX11WGP-NEXT:   S_WAITCN...
[truncated]

Comment on lines +22 to +23
for (unsigned I = 0; I <= (unsigned)AtomicOrdering::LAST; ++I) {
if (Src == toIRString((AtomicOrdering)I)) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can we make AtomicOrdering work with enum_seq?

}

bool parseOneAddrSpace(StringRef Src, unsigned &AddrSpace) {
if (Src == "none") {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

StringSwitch?

@@ -0,0 +1,29 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Mir print / parse belongs in test/CodeGen/MIR

AMDGPU::getNamedOperandIdx(AMDGPU::S_WAITCNT_FENCE_soft, AMDGPU::OpName::Name)
if (OpIdx == GET_IDX(Ordering)) {
assert(Imm <= (unsigned)AtomicOrdering::LAST);
OS << '.' << StringRef(toIRString((AtomicOrdering)Imm));
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This should return StringRef in the first place?

@ssahasra ssahasra closed this Jul 30, 2025
@ssahasra ssahasra deleted the users/ssahasra/waitcnt-pretty-print branch July 30, 2025 06:09
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Projects
None yet
Development

Successfully merging this pull request may close these issues.

3 participants