llvm · jrbyrnes · Jul 23, 2025 · Jul 28, 2025 · Jul 28, 2025 · Jul 29, 2025
diff --git a/llvm/lib/Target/AMDGPU/GCNIterativeScheduler.cpp b/llvm/lib/Target/AMDGPU/GCNIterativeScheduler.cpp
@@ -447,11 +447,7 @@ void GCNIterativeScheduler::sortRegionsByPressure(unsigned TargetOcc) {
 // BestSchedules aren't deleted on fail.
 unsigned GCNIterativeScheduler::tryMaximizeOccupancy(unsigned TargetOcc) {
   // TODO: assert Regions are sorted descending by pressure
-  const auto &ST = MF.getSubtarget<GCNSubtarget>();
-  const unsigned DynamicVGPRBlockSize =
-      MF.getInfo<SIMachineFunctionInfo>()->getDynamicVGPRBlockSize();
-  const auto Occ =
-      Regions.front()->MaxPressure.getOccupancy(ST, DynamicVGPRBlockSize);
+  const auto Occ = Regions.front()->MaxPressure.getOccupancy(MF);
   LLVM_DEBUG(dbgs() << "Trying to improve occupancy, target = " << TargetOcc
                     << ", current = " << Occ << '\n');
 
@@ -460,7 +456,7 @@ unsigned GCNIterativeScheduler::tryMaximizeOccupancy(unsigned TargetOcc) {
     // Always build the DAG to add mutations
     BuildDAG DAG(*R, *this);
 
-    if (R->MaxPressure.getOccupancy(ST, DynamicVGPRBlockSize) >= NewOcc)
+    if (R->MaxPressure.getOccupancy(MF) >= NewOcc)
       continue;
 
     LLVM_DEBUG(printRegion(dbgs(), R->Begin, R->End, LIS, 3);
@@ -471,7 +467,7 @@ unsigned GCNIterativeScheduler::tryMaximizeOccupancy(unsigned TargetOcc) {
     LLVM_DEBUG(dbgs() << "Occupancy improvement attempt:\n";
                printSchedRP(dbgs(), R->MaxPressure, MaxRP));
 
-    NewOcc = std::min(NewOcc, MaxRP.getOccupancy(ST, DynamicVGPRBlockSize));
+    NewOcc = std::min(NewOcc, MaxRP.getOccupancy(MF));
     if (NewOcc <= Occ)
       break;
 
@@ -488,15 +484,12 @@ unsigned GCNIterativeScheduler::tryMaximizeOccupancy(unsigned TargetOcc) {
 }
 
 void GCNIterativeScheduler::scheduleLegacyMaxOccupancy(
-  bool TryMaximizeOccupancy) {
-  const auto &ST = MF.getSubtarget<GCNSubtarget>();
+    bool TryMaximizeOccupancy) {
   SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
   auto TgtOcc = MFI->getMinAllowedOccupancy();
-  unsigned DynamicVGPRBlockSize = MFI->getDynamicVGPRBlockSize();
 
   sortRegionsByPressure(TgtOcc);
-  auto Occ =
-      Regions.front()->MaxPressure.getOccupancy(ST, DynamicVGPRBlockSize);
+  auto Occ = Regions.front()->MaxPressure.getOccupancy(MF);
 
   bool IsReentry = false;
   if (TryMaximizeOccupancy && Occ < TgtOcc) {
@@ -527,21 +520,19 @@ void GCNIterativeScheduler::scheduleLegacyMaxOccupancy(
       const auto RP = getRegionPressure(*R);
       LLVM_DEBUG(printSchedRP(dbgs(), R->MaxPressure, RP));
 
-      if (RP.getOccupancy(ST, DynamicVGPRBlockSize) < TgtOcc) {
+      if (RP.getOccupancy(MF) < TgtOcc) {
         LLVM_DEBUG(dbgs() << "Didn't fit into target occupancy O" << TgtOcc);
-        if (R->BestSchedule.get() && R->BestSchedule->MaxPressure.getOccupancy(
-                                         ST, DynamicVGPRBlockSize) >= TgtOcc) {
+        if (R->BestSchedule.get() &&
+            R->BestSchedule->MaxPressure.getOccupancy(MF) >= TgtOcc) {
           LLVM_DEBUG(dbgs() << ", scheduling minimal register\n");
           scheduleBest(*R);
         } else {
           LLVM_DEBUG(dbgs() << ", restoring\n");
           Ovr.restoreOrder();
-          assert(R->MaxPressure.getOccupancy(ST, DynamicVGPRBlockSize) >=
-                 TgtOcc);
+          assert(R->MaxPressure.getOccupancy(MF) >= TgtOcc);
         }
       }
-      FinalOccupancy =
-          std::min(FinalOccupancy, RP.getOccupancy(ST, DynamicVGPRBlockSize));
+      FinalOccupancy = std::min(FinalOccupancy, RP.getOccupancy(MF));
     }
   }
   MFI->limitOccupancy(FinalOccupancy);
@@ -582,16 +573,12 @@ void GCNIterativeScheduler::scheduleMinReg(bool force) {
 ///////////////////////////////////////////////////////////////////////////////
 // ILP scheduler port
 
-void GCNIterativeScheduler::scheduleILP(
-  bool TryMaximizeOccupancy) {
-  const auto &ST = MF.getSubtarget<GCNSubtarget>();
+void GCNIterativeScheduler::scheduleILP(bool TryMaximizeOccupancy) {
   SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
   auto TgtOcc = MFI->getMinAllowedOccupancy();
-  unsigned DynamicVGPRBlockSize = MFI->getDynamicVGPRBlockSize();
 
   sortRegionsByPressure(TgtOcc);
-  auto Occ =
-      Regions.front()->MaxPressure.getOccupancy(ST, DynamicVGPRBlockSize);
+  auto Occ = Regions.front()->MaxPressure.getOccupancy(MF);
 
   bool IsReentry = false;
   if (TryMaximizeOccupancy && Occ < TgtOcc) {
@@ -612,18 +599,17 @@ void GCNIterativeScheduler::scheduleILP(
     const auto RP = getSchedulePressure(*R, ILPSchedule);
     LLVM_DEBUG(printSchedRP(dbgs(), R->MaxPressure, RP));
 
-    if (RP.getOccupancy(ST, DynamicVGPRBlockSize) < TgtOcc) {
+    if (RP.getOccupancy(MF) < TgtOcc) {
       LLVM_DEBUG(dbgs() << "Didn't fit into target occupancy O" << TgtOcc);
-      if (R->BestSchedule.get() && R->BestSchedule->MaxPressure.getOccupancy(
-                                       ST, DynamicVGPRBlockSize) >= TgtOcc) {
+      if (R->BestSchedule.get() &&
+          R->BestSchedule->MaxPressure.getOccupancy(MF) >= TgtOcc) {
         LLVM_DEBUG(dbgs() << ", scheduling minimal register\n");
         scheduleBest(*R);
       }
     } else {
       scheduleRegion(*R, ILPSchedule, RP);
       LLVM_DEBUG(printSchedResult(dbgs(), R, RP));
-      FinalOccupancy =
-          std::min(FinalOccupancy, RP.getOccupancy(ST, DynamicVGPRBlockSize));
+      FinalOccupancy = std::min(FinalOccupancy, RP.getOccupancy(MF));
     }
   }
   MFI->limitOccupancy(FinalOccupancy);

diff --git a/llvm/lib/Target/AMDGPU/GCNRegPressure.cpp b/llvm/lib/Target/AMDGPU/GCNRegPressure.cpp
@@ -99,20 +99,22 @@ void GCNRegPressure::inc(unsigned Reg,
 bool GCNRegPressure::less(const MachineFunction &MF, const GCNRegPressure &O,
                           unsigned MaxOccupancy) const {
   const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
+  unsigned ArchVGPRThreshold = ST.getMaxNumVectorRegs(MF.getFunction()).first;
   unsigned DynamicVGPRBlockSize =
       MF.getInfo<SIMachineFunctionInfo>()->getDynamicVGPRBlockSize();
 
   const auto SGPROcc = std::min(MaxOccupancy,
                                 ST.getOccupancyWithNumSGPRs(getSGPRNum()));
   const auto VGPROcc = std::min(
-      MaxOccupancy, ST.getOccupancyWithNumVGPRs(getVGPRNum(ST.hasGFX90AInsts()),
-                                                DynamicVGPRBlockSize));
+      MaxOccupancy, ST.getOccupancyWithNumVGPRs(
+                        getVGPRNum(ST.hasGFX90AInsts(), ArchVGPRThreshold),
+                        DynamicVGPRBlockSize));
   const auto OtherSGPROcc = std::min(MaxOccupancy,
                                 ST.getOccupancyWithNumSGPRs(O.getSGPRNum()));
-  const auto OtherVGPROcc =
-      std::min(MaxOccupancy,
-               ST.getOccupancyWithNumVGPRs(O.getVGPRNum(ST.hasGFX90AInsts()),
-                                           DynamicVGPRBlockSize));
+  const auto OtherVGPROcc = std::min(
+      MaxOccupancy, ST.getOccupancyWithNumVGPRs(
+                        O.getVGPRNum(ST.hasGFX90AInsts(), ArchVGPRThreshold),
+                        DynamicVGPRBlockSize));
 
   const auto Occ = std::min(SGPROcc, VGPROcc);
   const auto OtherOcc = std::min(OtherSGPROcc, OtherVGPROcc);
@@ -135,35 +137,39 @@ bool GCNRegPressure::less(const MachineFunction &MF, const GCNRegPressure &O,
   unsigned OtherVGPRForSGPRSpills =
       (OtherExcessSGPR + (WaveSize - 1)) / WaveSize;
 
-  unsigned MaxArchVGPRs = ST.getAddressableNumArchVGPRs();
-
   // Unified excess pressure conditions, accounting for VGPRs used for SGPR
   // spills
-  unsigned ExcessVGPR =
-      std::max(static_cast<int>(getVGPRNum(ST.hasGFX90AInsts()) +
-                                VGPRForSGPRSpills - MaxVGPRs),
-               0);
-  unsigned OtherExcessVGPR =
-      std::max(static_cast<int>(O.getVGPRNum(ST.hasGFX90AInsts()) +
-                                OtherVGPRForSGPRSpills - MaxVGPRs),
-               0);
+  unsigned ExcessVGPR = std::max(
+      static_cast<int>(getVGPRNum(ST.hasGFX90AInsts(), ArchVGPRThreshold) +
+                       VGPRForSGPRSpills - MaxVGPRs),
+      0);
+  unsigned OtherExcessVGPR = std::max(
+      static_cast<int>(O.getVGPRNum(ST.hasGFX90AInsts(), ArchVGPRThreshold) +
+                       OtherVGPRForSGPRSpills - MaxVGPRs),
+      0);
   // Arch VGPR excess pressure conditions, accounting for VGPRs used for SGPR
   // spills
-  unsigned ExcessArchVGPR = std::max(
-      static_cast<int>(getVGPRNum(false) + VGPRForSGPRSpills - MaxArchVGPRs),
-      0);
+  unsigned AddressableArchVGPRs = ST.getAddressableNumArchVGPRs();
+  unsigned ExcessArchVGPR =
+      std::max(static_cast<int>(getVGPRNum(false, ArchVGPRThreshold) +
+                                VGPRForSGPRSpills - AddressableArchVGPRs),
+               0);
   unsigned OtherExcessArchVGPR =
-      std::max(static_cast<int>(O.getVGPRNum(false) + OtherVGPRForSGPRSpills -
-                                MaxArchVGPRs),
+      std::max(static_cast<int>(O.getVGPRNum(false, ArchVGPRThreshold) +
+                                OtherVGPRForSGPRSpills - AddressableArchVGPRs),
                0);
   // AGPR excess pressure conditions
-  unsigned ExcessAGPR = std::max(
-      static_cast<int>(ST.hasGFX90AInsts() ? (getAGPRNum() - MaxArchVGPRs)
-                                           : (getAGPRNum() - MaxVGPRs)),
-      0);
+  unsigned ExcessAGPR =
+      std::max(static_cast<int>(
+                   ST.hasGFX90AInsts()
+                       ? (getAGPRNum(ArchVGPRThreshold) - AddressableArchVGPRs)
+                       : (getAGPRNum(ArchVGPRThreshold) - MaxVGPRs)),
+               0);
   unsigned OtherExcessAGPR = std::max(
-      static_cast<int>(ST.hasGFX90AInsts() ? (O.getAGPRNum() - MaxArchVGPRs)
-                                           : (O.getAGPRNum() - MaxVGPRs)),
+      static_cast<int>(
+          ST.hasGFX90AInsts()
+              ? (O.getAGPRNum(ArchVGPRThreshold) - AddressableArchVGPRs)
+              : (O.getAGPRNum(ArchVGPRThreshold) - MaxVGPRs)),
       0);
 
   bool ExcessRP = ExcessSGPR || ExcessVGPR || ExcessArchVGPR || ExcessAGPR;
@@ -184,14 +190,21 @@ bool GCNRegPressure::less(const MachineFunction &MF, const GCNRegPressure &O,
       return VGPRDiff > 0;
     if (SGPRDiff != 0) {
       unsigned PureExcessVGPR =
-          std::max(static_cast<int>(getVGPRNum(ST.hasGFX90AInsts()) - MaxVGPRs),
+          std::max(static_cast<int>(
+                       getVGPRNum(ST.hasGFX90AInsts(), ArchVGPRThreshold) -
+                       MaxVGPRs),
                    0) +
-          std::max(static_cast<int>(getVGPRNum(false) - MaxArchVGPRs), 0);
+          std::max(static_cast<int>(getVGPRNum(false, ArchVGPRThreshold) -
+                                    AddressableArchVGPRs),
+                   0);
       unsigned OtherPureExcessVGPR =
-          std::max(
-              static_cast<int>(O.getVGPRNum(ST.hasGFX90AInsts()) - MaxVGPRs),
-              0) +
-          std::max(static_cast<int>(O.getVGPRNum(false) - MaxArchVGPRs), 0);
+          std::max(static_cast<int>(
+                       O.getVGPRNum(ST.hasGFX90AInsts(), ArchVGPRThreshold) -
+                       MaxVGPRs),
+                   0) +
+          std::max(static_cast<int>(O.getVGPRNum(false, ArchVGPRThreshold) -
+                                    AddressableArchVGPRs),
+                   0);
 
       // If we have a special case where there is a tie in excess VGPR, but one
       // of the pressures has VGPR usage from SGPR spills, prefer the pressure
@@ -221,38 +234,45 @@ bool GCNRegPressure::less(const MachineFunction &MF, const GCNRegPressure &O,
       if (SW != OtherSW)
         return SW < OtherSW;
     } else {
-      auto VW = getVGPRTuplesWeight();
-      auto OtherVW = O.getVGPRTuplesWeight();
+      auto VW = getVGPRTuplesWeight(ArchVGPRThreshold);
+      auto OtherVW = O.getVGPRTuplesWeight(ArchVGPRThreshold);
       if (VW != OtherVW)
         return VW < OtherVW;
     }
   }
 
   // Give final precedence to lower general RP.
-  return SGPRImportant ? (getSGPRNum() < O.getSGPRNum()):
-                         (getVGPRNum(ST.hasGFX90AInsts()) <
-                          O.getVGPRNum(ST.hasGFX90AInsts()));
+  return SGPRImportant ? (getSGPRNum() < O.getSGPRNum())
+                       : (getVGPRNum(ST.hasGFX90AInsts(), ArchVGPRThreshold) <
+                          O.getVGPRNum(ST.hasGFX90AInsts(), ArchVGPRThreshold));
 }
 
 Printable llvm::print(const GCNRegPressure &RP, const GCNSubtarget *ST,
-                      unsigned DynamicVGPRBlockSize) {
-  return Printable([&RP, ST, DynamicVGPRBlockSize](raw_ostream &OS) {
-    OS << "VGPRs: " << RP.getArchVGPRNum() << ' '
-       << "AGPRs: " << RP.getAGPRNum();
-    if (ST)
-      OS << "(O"
-         << ST->getOccupancyWithNumVGPRs(RP.getVGPRNum(ST->hasGFX90AInsts()),
-                                         DynamicVGPRBlockSize)
-         << ')';
-    OS << ", SGPRs: " << RP.getSGPRNum();
-    if (ST)
-      OS << "(O" << ST->getOccupancyWithNumSGPRs(RP.getSGPRNum()) << ')';
-    OS << ", LVGPR WT: " << RP.getVGPRTuplesWeight()
-       << ", LSGPR WT: " << RP.getSGPRTuplesWeight();
-    if (ST)
-      OS << " -> Occ: " << RP.getOccupancy(*ST, DynamicVGPRBlockSize);
-    OS << '\n';
-  });
+                      unsigned DynamicVGPRBlockSize,
+                      const MachineFunction *MF) {
+  unsigned ArchVGPRThreshold = std::numeric_limits<unsigned int>::max();
+  if (ST && MF)
+    ArchVGPRThreshold = ST->getMaxNumVectorRegs(MF->getFunction()).first;
+
+  return Printable(
+      [&RP, ST, DynamicVGPRBlockSize, ArchVGPRThreshold, MF](raw_ostream &OS) {
+        OS << "VGPRs: " << RP.getArchVGPRNum(ArchVGPRThreshold) << ' '
+           << "AGPRs: " << RP.getAGPRNum(ArchVGPRThreshold);
+        if (ST)
+          OS << "(O"
+             << ST->getOccupancyWithNumVGPRs(
+                    RP.getVGPRNum(ST->hasGFX90AInsts(), ArchVGPRThreshold),
+                    DynamicVGPRBlockSize)
+             << ')';
+        OS << ", SGPRs: " << RP.getSGPRNum();
+        if (ST)
+          OS << "(O" << ST->getOccupancyWithNumSGPRs(RP.getSGPRNum()) << ')';
+        OS << ", LVGPR WT: " << RP.getVGPRTuplesWeight(ArchVGPRThreshold)
+           << ", LSGPR WT: " << RP.getSGPRTuplesWeight();
+        if (ST)
+          OS << " -> Occ: " << RP.getOccupancy(*MF);
+        OS << '\n';
+      });
 }
 
 static LaneBitmask getDefRegMask(const MachineOperand &MO,
@@ -398,8 +418,9 @@ void GCNRPTarget::setRegLimits(unsigned NumSGPRs, unsigned NumVGPRs,
   const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
   unsigned DynamicVGPRBlockSize =
       MF.getInfo<SIMachineFunctionInfo>()->getDynamicVGPRBlockSize();
+  AddressableNumArchVGPRs = ST.getAddressableNumArchVGPRs();
   MaxSGPRs = std::min(ST.getAddressableNumSGPRs(), NumSGPRs);
-  MaxVGPRs = std::min(ST.getAddressableNumArchVGPRs(), NumVGPRs);
+  MaxVGPRs = std::min(AddressableNumArchVGPRs, NumVGPRs);
   MaxUnifiedVGPRs =
       ST.hasGFX90AInsts()
           ? std::min(ST.getAddressableNumVGPRs(DynamicVGPRBlockSize), NumVGPRs)
@@ -414,15 +435,21 @@ bool GCNRPTarget::isSaveBeneficial(Register Reg,
 
   if (SRI->isSGPRClass(RC))
     return RP.getSGPRNum() > MaxSGPRs;
-  unsigned NumVGPRs =
-      SRI->isAGPRClass(RC) ? RP.getAGPRNum() : RP.getArchVGPRNum();
+
+  bool ShouldUseAGPR =
+      SRI->isAGPRClass(RC) ||
+      (SRI->isVectorSuperClass(RC) &&
+       RP.getArchVGPRNum(AddressableNumArchVGPRs) >= AddressableNumArchVGPRs);
+  unsigned NumVGPRs = ShouldUseAGPR
+                          ? RP.getAGPRNum(AddressableNumArchVGPRs)
+                          : RP.getArchVGPRNum(AddressableNumArchVGPRs);
   return isVGPRBankSaveBeneficial(NumVGPRs);
 }
 
 bool GCNRPTarget::satisfied() const {
   if (RP.getSGPRNum() > MaxSGPRs)
     return false;
-  if (RP.getVGPRNum(false) > MaxVGPRs &&
+  if (RP.getVGPRNum(false, AddressableNumArchVGPRs) > MaxVGPRs &&
       (!CombineVGPRSavings || !satisifiesVGPRBanksTarget()))
     return false;
   return satisfiesUnifiedTarget();
@@ -876,10 +903,13 @@ bool GCNRegPressurePrinter::runOnMachineFunction(MachineFunction &MF) {
 
   OS << "---\nname: " << MF.getName() << "\nbody:             |\n";
 
-  auto printRP = [](const GCNRegPressure &RP) {
-    return Printable([&RP](raw_ostream &OS) {
+  auto printRP = [&MF](const GCNRegPressure &RP) {
+    return Printable([&RP, &MF](raw_ostream &OS) {
       OS << format(PFX "  %-5d", RP.getSGPRNum())
-         << format(" %-5d", RP.getVGPRNum(false));
+         << format(" %-5d", RP.getVGPRNum(false, MF.getSubtarget<GCNSubtarget>()
+                                                     .getMaxNumVectorRegs(
+                                                         MF.getFunction())
+                                                     .first));
     });
   };