Skip to content

[AMDGPU] More accurately account for AVGPR pressure #150711

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 11 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
46 changes: 16 additions & 30 deletions llvm/lib/Target/AMDGPU/GCNIterativeScheduler.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -447,11 +447,7 @@ void GCNIterativeScheduler::sortRegionsByPressure(unsigned TargetOcc) {
// BestSchedules aren't deleted on fail.
unsigned GCNIterativeScheduler::tryMaximizeOccupancy(unsigned TargetOcc) {
// TODO: assert Regions are sorted descending by pressure
const auto &ST = MF.getSubtarget<GCNSubtarget>();
const unsigned DynamicVGPRBlockSize =
MF.getInfo<SIMachineFunctionInfo>()->getDynamicVGPRBlockSize();
const auto Occ =
Regions.front()->MaxPressure.getOccupancy(ST, DynamicVGPRBlockSize);
const auto Occ = Regions.front()->MaxPressure.getOccupancy(MF);
LLVM_DEBUG(dbgs() << "Trying to improve occupancy, target = " << TargetOcc
<< ", current = " << Occ << '\n');

Expand All @@ -460,7 +456,7 @@ unsigned GCNIterativeScheduler::tryMaximizeOccupancy(unsigned TargetOcc) {
// Always build the DAG to add mutations
BuildDAG DAG(*R, *this);

if (R->MaxPressure.getOccupancy(ST, DynamicVGPRBlockSize) >= NewOcc)
if (R->MaxPressure.getOccupancy(MF) >= NewOcc)
continue;

LLVM_DEBUG(printRegion(dbgs(), R->Begin, R->End, LIS, 3);
Expand All @@ -471,7 +467,7 @@ unsigned GCNIterativeScheduler::tryMaximizeOccupancy(unsigned TargetOcc) {
LLVM_DEBUG(dbgs() << "Occupancy improvement attempt:\n";
printSchedRP(dbgs(), R->MaxPressure, MaxRP));

NewOcc = std::min(NewOcc, MaxRP.getOccupancy(ST, DynamicVGPRBlockSize));
NewOcc = std::min(NewOcc, MaxRP.getOccupancy(MF));
if (NewOcc <= Occ)
break;

Expand All @@ -488,15 +484,12 @@ unsigned GCNIterativeScheduler::tryMaximizeOccupancy(unsigned TargetOcc) {
}

void GCNIterativeScheduler::scheduleLegacyMaxOccupancy(
bool TryMaximizeOccupancy) {
const auto &ST = MF.getSubtarget<GCNSubtarget>();
bool TryMaximizeOccupancy) {
SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
auto TgtOcc = MFI->getMinAllowedOccupancy();
unsigned DynamicVGPRBlockSize = MFI->getDynamicVGPRBlockSize();

sortRegionsByPressure(TgtOcc);
auto Occ =
Regions.front()->MaxPressure.getOccupancy(ST, DynamicVGPRBlockSize);
auto Occ = Regions.front()->MaxPressure.getOccupancy(MF);

bool IsReentry = false;
if (TryMaximizeOccupancy && Occ < TgtOcc) {
Expand Down Expand Up @@ -527,21 +520,19 @@ void GCNIterativeScheduler::scheduleLegacyMaxOccupancy(
const auto RP = getRegionPressure(*R);
LLVM_DEBUG(printSchedRP(dbgs(), R->MaxPressure, RP));

if (RP.getOccupancy(ST, DynamicVGPRBlockSize) < TgtOcc) {
if (RP.getOccupancy(MF) < TgtOcc) {
LLVM_DEBUG(dbgs() << "Didn't fit into target occupancy O" << TgtOcc);
if (R->BestSchedule.get() && R->BestSchedule->MaxPressure.getOccupancy(
ST, DynamicVGPRBlockSize) >= TgtOcc) {
if (R->BestSchedule.get() &&
R->BestSchedule->MaxPressure.getOccupancy(MF) >= TgtOcc) {
LLVM_DEBUG(dbgs() << ", scheduling minimal register\n");
scheduleBest(*R);
} else {
LLVM_DEBUG(dbgs() << ", restoring\n");
Ovr.restoreOrder();
assert(R->MaxPressure.getOccupancy(ST, DynamicVGPRBlockSize) >=
TgtOcc);
assert(R->MaxPressure.getOccupancy(MF) >= TgtOcc);
}
}
FinalOccupancy =
std::min(FinalOccupancy, RP.getOccupancy(ST, DynamicVGPRBlockSize));
FinalOccupancy = std::min(FinalOccupancy, RP.getOccupancy(MF));
}
}
MFI->limitOccupancy(FinalOccupancy);
Expand Down Expand Up @@ -582,16 +573,12 @@ void GCNIterativeScheduler::scheduleMinReg(bool force) {
///////////////////////////////////////////////////////////////////////////////
// ILP scheduler port

void GCNIterativeScheduler::scheduleILP(
bool TryMaximizeOccupancy) {
const auto &ST = MF.getSubtarget<GCNSubtarget>();
void GCNIterativeScheduler::scheduleILP(bool TryMaximizeOccupancy) {
SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
auto TgtOcc = MFI->getMinAllowedOccupancy();
unsigned DynamicVGPRBlockSize = MFI->getDynamicVGPRBlockSize();

sortRegionsByPressure(TgtOcc);
auto Occ =
Regions.front()->MaxPressure.getOccupancy(ST, DynamicVGPRBlockSize);
auto Occ = Regions.front()->MaxPressure.getOccupancy(MF);

bool IsReentry = false;
if (TryMaximizeOccupancy && Occ < TgtOcc) {
Expand All @@ -612,18 +599,17 @@ void GCNIterativeScheduler::scheduleILP(
const auto RP = getSchedulePressure(*R, ILPSchedule);
LLVM_DEBUG(printSchedRP(dbgs(), R->MaxPressure, RP));

if (RP.getOccupancy(ST, DynamicVGPRBlockSize) < TgtOcc) {
if (RP.getOccupancy(MF) < TgtOcc) {
LLVM_DEBUG(dbgs() << "Didn't fit into target occupancy O" << TgtOcc);
if (R->BestSchedule.get() && R->BestSchedule->MaxPressure.getOccupancy(
ST, DynamicVGPRBlockSize) >= TgtOcc) {
if (R->BestSchedule.get() &&
R->BestSchedule->MaxPressure.getOccupancy(MF) >= TgtOcc) {
LLVM_DEBUG(dbgs() << ", scheduling minimal register\n");
scheduleBest(*R);
}
} else {
scheduleRegion(*R, ILPSchedule, RP);
LLVM_DEBUG(printSchedResult(dbgs(), R, RP));
FinalOccupancy =
std::min(FinalOccupancy, RP.getOccupancy(ST, DynamicVGPRBlockSize));
FinalOccupancy = std::min(FinalOccupancy, RP.getOccupancy(MF));
}
}
MFI->limitOccupancy(FinalOccupancy);
Expand Down
156 changes: 93 additions & 63 deletions llvm/lib/Target/AMDGPU/GCNRegPressure.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -99,20 +99,22 @@ void GCNRegPressure::inc(unsigned Reg,
bool GCNRegPressure::less(const MachineFunction &MF, const GCNRegPressure &O,
unsigned MaxOccupancy) const {
const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
unsigned ArchVGPRThreshold = ST.getMaxNumVectorRegs(MF.getFunction()).first;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Would it make sense to make this a class member to avoid passing it every time we call getVGPRNum/getAGPRNum? It seems to always have the value ST.getMaxNumVectorRegs(MF.getFunction()).first which should not change throughout the MF's lifetime.

unsigned DynamicVGPRBlockSize =
MF.getInfo<SIMachineFunctionInfo>()->getDynamicVGPRBlockSize();

const auto SGPROcc = std::min(MaxOccupancy,
ST.getOccupancyWithNumSGPRs(getSGPRNum()));
const auto VGPROcc = std::min(
MaxOccupancy, ST.getOccupancyWithNumVGPRs(getVGPRNum(ST.hasGFX90AInsts()),
DynamicVGPRBlockSize));
MaxOccupancy, ST.getOccupancyWithNumVGPRs(
getVGPRNum(ST.hasGFX90AInsts(), ArchVGPRThreshold),
DynamicVGPRBlockSize));
const auto OtherSGPROcc = std::min(MaxOccupancy,
ST.getOccupancyWithNumSGPRs(O.getSGPRNum()));
const auto OtherVGPROcc =
std::min(MaxOccupancy,
ST.getOccupancyWithNumVGPRs(O.getVGPRNum(ST.hasGFX90AInsts()),
DynamicVGPRBlockSize));
const auto OtherVGPROcc = std::min(
MaxOccupancy, ST.getOccupancyWithNumVGPRs(
O.getVGPRNum(ST.hasGFX90AInsts(), ArchVGPRThreshold),
DynamicVGPRBlockSize));

const auto Occ = std::min(SGPROcc, VGPROcc);
const auto OtherOcc = std::min(OtherSGPROcc, OtherVGPROcc);
Expand All @@ -135,35 +137,39 @@ bool GCNRegPressure::less(const MachineFunction &MF, const GCNRegPressure &O,
unsigned OtherVGPRForSGPRSpills =
(OtherExcessSGPR + (WaveSize - 1)) / WaveSize;

unsigned MaxArchVGPRs = ST.getAddressableNumArchVGPRs();

// Unified excess pressure conditions, accounting for VGPRs used for SGPR
// spills
unsigned ExcessVGPR =
std::max(static_cast<int>(getVGPRNum(ST.hasGFX90AInsts()) +
VGPRForSGPRSpills - MaxVGPRs),
0);
unsigned OtherExcessVGPR =
std::max(static_cast<int>(O.getVGPRNum(ST.hasGFX90AInsts()) +
OtherVGPRForSGPRSpills - MaxVGPRs),
0);
unsigned ExcessVGPR = std::max(
static_cast<int>(getVGPRNum(ST.hasGFX90AInsts(), ArchVGPRThreshold) +
VGPRForSGPRSpills - MaxVGPRs),
0);
unsigned OtherExcessVGPR = std::max(
static_cast<int>(O.getVGPRNum(ST.hasGFX90AInsts(), ArchVGPRThreshold) +
OtherVGPRForSGPRSpills - MaxVGPRs),
0);
// Arch VGPR excess pressure conditions, accounting for VGPRs used for SGPR
// spills
unsigned ExcessArchVGPR = std::max(
static_cast<int>(getVGPRNum(false) + VGPRForSGPRSpills - MaxArchVGPRs),
0);
unsigned AddressableArchVGPRs = ST.getAddressableNumArchVGPRs();
unsigned ExcessArchVGPR =
std::max(static_cast<int>(getVGPRNum(false, ArchVGPRThreshold) +
VGPRForSGPRSpills - AddressableArchVGPRs),
0);
unsigned OtherExcessArchVGPR =
std::max(static_cast<int>(O.getVGPRNum(false) + OtherVGPRForSGPRSpills -
MaxArchVGPRs),
std::max(static_cast<int>(O.getVGPRNum(false, ArchVGPRThreshold) +
OtherVGPRForSGPRSpills - AddressableArchVGPRs),
0);
// AGPR excess pressure conditions
unsigned ExcessAGPR = std::max(
static_cast<int>(ST.hasGFX90AInsts() ? (getAGPRNum() - MaxArchVGPRs)
: (getAGPRNum() - MaxVGPRs)),
0);
unsigned ExcessAGPR =
std::max(static_cast<int>(
ST.hasGFX90AInsts()
? (getAGPRNum(ArchVGPRThreshold) - AddressableArchVGPRs)
: (getAGPRNum(ArchVGPRThreshold) - MaxVGPRs)),
0);
unsigned OtherExcessAGPR = std::max(
static_cast<int>(ST.hasGFX90AInsts() ? (O.getAGPRNum() - MaxArchVGPRs)
: (O.getAGPRNum() - MaxVGPRs)),
static_cast<int>(
ST.hasGFX90AInsts()
? (O.getAGPRNum(ArchVGPRThreshold) - AddressableArchVGPRs)
: (O.getAGPRNum(ArchVGPRThreshold) - MaxVGPRs)),
0);

bool ExcessRP = ExcessSGPR || ExcessVGPR || ExcessArchVGPR || ExcessAGPR;
Expand All @@ -184,14 +190,21 @@ bool GCNRegPressure::less(const MachineFunction &MF, const GCNRegPressure &O,
return VGPRDiff > 0;
if (SGPRDiff != 0) {
unsigned PureExcessVGPR =
std::max(static_cast<int>(getVGPRNum(ST.hasGFX90AInsts()) - MaxVGPRs),
std::max(static_cast<int>(
getVGPRNum(ST.hasGFX90AInsts(), ArchVGPRThreshold) -
MaxVGPRs),
0) +
std::max(static_cast<int>(getVGPRNum(false) - MaxArchVGPRs), 0);
std::max(static_cast<int>(getVGPRNum(false, ArchVGPRThreshold) -
AddressableArchVGPRs),
0);
unsigned OtherPureExcessVGPR =
std::max(
static_cast<int>(O.getVGPRNum(ST.hasGFX90AInsts()) - MaxVGPRs),
0) +
std::max(static_cast<int>(O.getVGPRNum(false) - MaxArchVGPRs), 0);
std::max(static_cast<int>(
O.getVGPRNum(ST.hasGFX90AInsts(), ArchVGPRThreshold) -
MaxVGPRs),
0) +
std::max(static_cast<int>(O.getVGPRNum(false, ArchVGPRThreshold) -
AddressableArchVGPRs),
0);

// If we have a special case where there is a tie in excess VGPR, but one
// of the pressures has VGPR usage from SGPR spills, prefer the pressure
Expand Down Expand Up @@ -221,38 +234,45 @@ bool GCNRegPressure::less(const MachineFunction &MF, const GCNRegPressure &O,
if (SW != OtherSW)
return SW < OtherSW;
} else {
auto VW = getVGPRTuplesWeight();
auto OtherVW = O.getVGPRTuplesWeight();
auto VW = getVGPRTuplesWeight(ArchVGPRThreshold);
auto OtherVW = O.getVGPRTuplesWeight(ArchVGPRThreshold);
if (VW != OtherVW)
return VW < OtherVW;
}
}

// Give final precedence to lower general RP.
return SGPRImportant ? (getSGPRNum() < O.getSGPRNum()):
(getVGPRNum(ST.hasGFX90AInsts()) <
O.getVGPRNum(ST.hasGFX90AInsts()));
return SGPRImportant ? (getSGPRNum() < O.getSGPRNum())
: (getVGPRNum(ST.hasGFX90AInsts(), ArchVGPRThreshold) <
O.getVGPRNum(ST.hasGFX90AInsts(), ArchVGPRThreshold));
}

Printable llvm::print(const GCNRegPressure &RP, const GCNSubtarget *ST,
unsigned DynamicVGPRBlockSize) {
return Printable([&RP, ST, DynamicVGPRBlockSize](raw_ostream &OS) {
OS << "VGPRs: " << RP.getArchVGPRNum() << ' '
<< "AGPRs: " << RP.getAGPRNum();
if (ST)
OS << "(O"
<< ST->getOccupancyWithNumVGPRs(RP.getVGPRNum(ST->hasGFX90AInsts()),
DynamicVGPRBlockSize)
<< ')';
OS << ", SGPRs: " << RP.getSGPRNum();
if (ST)
OS << "(O" << ST->getOccupancyWithNumSGPRs(RP.getSGPRNum()) << ')';
OS << ", LVGPR WT: " << RP.getVGPRTuplesWeight()
<< ", LSGPR WT: " << RP.getSGPRTuplesWeight();
if (ST)
OS << " -> Occ: " << RP.getOccupancy(*ST, DynamicVGPRBlockSize);
OS << '\n';
});
unsigned DynamicVGPRBlockSize,
const MachineFunction *MF) {
unsigned ArchVGPRThreshold = std::numeric_limits<unsigned int>::max();
if (ST && MF)
ArchVGPRThreshold = ST->getMaxNumVectorRegs(MF->getFunction()).first;

return Printable(
[&RP, ST, DynamicVGPRBlockSize, ArchVGPRThreshold, MF](raw_ostream &OS) {
OS << "VGPRs: " << RP.getArchVGPRNum(ArchVGPRThreshold) << ' '
<< "AGPRs: " << RP.getAGPRNum(ArchVGPRThreshold);
if (ST)
OS << "(O"
<< ST->getOccupancyWithNumVGPRs(
RP.getVGPRNum(ST->hasGFX90AInsts(), ArchVGPRThreshold),
DynamicVGPRBlockSize)
<< ')';
OS << ", SGPRs: " << RP.getSGPRNum();
if (ST)
OS << "(O" << ST->getOccupancyWithNumSGPRs(RP.getSGPRNum()) << ')';
OS << ", LVGPR WT: " << RP.getVGPRTuplesWeight(ArchVGPRThreshold)
<< ", LSGPR WT: " << RP.getSGPRTuplesWeight();
if (ST)
OS << " -> Occ: " << RP.getOccupancy(*MF);
OS << '\n';
});
}

static LaneBitmask getDefRegMask(const MachineOperand &MO,
Expand Down Expand Up @@ -398,8 +418,9 @@ void GCNRPTarget::setRegLimits(unsigned NumSGPRs, unsigned NumVGPRs,
const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
unsigned DynamicVGPRBlockSize =
MF.getInfo<SIMachineFunctionInfo>()->getDynamicVGPRBlockSize();
AddressableNumArchVGPRs = ST.getAddressableNumArchVGPRs();
MaxSGPRs = std::min(ST.getAddressableNumSGPRs(), NumSGPRs);
MaxVGPRs = std::min(ST.getAddressableNumArchVGPRs(), NumVGPRs);
MaxVGPRs = std::min(AddressableNumArchVGPRs, NumVGPRs);
MaxUnifiedVGPRs =
ST.hasGFX90AInsts()
? std::min(ST.getAddressableNumVGPRs(DynamicVGPRBlockSize), NumVGPRs)
Expand All @@ -414,15 +435,21 @@ bool GCNRPTarget::isSaveBeneficial(Register Reg,

if (SRI->isSGPRClass(RC))
return RP.getSGPRNum() > MaxSGPRs;
unsigned NumVGPRs =
SRI->isAGPRClass(RC) ? RP.getAGPRNum() : RP.getArchVGPRNum();

bool ShouldUseAGPR =
SRI->isAGPRClass(RC) ||
(SRI->isVectorSuperClass(RC) &&
RP.getArchVGPRNum(AddressableNumArchVGPRs) >= AddressableNumArchVGPRs);
unsigned NumVGPRs = ShouldUseAGPR
? RP.getAGPRNum(AddressableNumArchVGPRs)
: RP.getArchVGPRNum(AddressableNumArchVGPRs);
return isVGPRBankSaveBeneficial(NumVGPRs);
}

bool GCNRPTarget::satisfied() const {
if (RP.getSGPRNum() > MaxSGPRs)
return false;
if (RP.getVGPRNum(false) > MaxVGPRs &&
if (RP.getVGPRNum(false, AddressableNumArchVGPRs) > MaxVGPRs &&
(!CombineVGPRSavings || !satisifiesVGPRBanksTarget()))
return false;
return satisfiesUnifiedTarget();
Expand Down Expand Up @@ -876,10 +903,13 @@ bool GCNRegPressurePrinter::runOnMachineFunction(MachineFunction &MF) {

OS << "---\nname: " << MF.getName() << "\nbody: |\n";

auto printRP = [](const GCNRegPressure &RP) {
return Printable([&RP](raw_ostream &OS) {
auto printRP = [&MF](const GCNRegPressure &RP) {
return Printable([&RP, &MF](raw_ostream &OS) {
OS << format(PFX " %-5d", RP.getSGPRNum())
<< format(" %-5d", RP.getVGPRNum(false));
<< format(" %-5d", RP.getVGPRNum(false, MF.getSubtarget<GCNSubtarget>()
.getMaxNumVectorRegs(
MF.getFunction())
.first));
});
};

Expand Down
Loading