Skip to content

Commit 9295849

Browse files
committed
[AMDGPU] More accurately account for AVGPR pressure
Change-Id: I6f129c2723b52a391a96178e390f60535164ac9b
1 parent 83dfdd8 commit 9295849

File tree

7 files changed

+726
-169
lines changed

7 files changed

+726
-169
lines changed

llvm/lib/Target/AMDGPU/GCNIterativeScheduler.cpp

Lines changed: 16 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -447,11 +447,7 @@ void GCNIterativeScheduler::sortRegionsByPressure(unsigned TargetOcc) {
447447
// BestSchedules aren't deleted on fail.
448448
unsigned GCNIterativeScheduler::tryMaximizeOccupancy(unsigned TargetOcc) {
449449
// TODO: assert Regions are sorted descending by pressure
450-
const auto &ST = MF.getSubtarget<GCNSubtarget>();
451-
const unsigned DynamicVGPRBlockSize =
452-
MF.getInfo<SIMachineFunctionInfo>()->getDynamicVGPRBlockSize();
453-
const auto Occ =
454-
Regions.front()->MaxPressure.getOccupancy(ST, DynamicVGPRBlockSize);
450+
const auto Occ = Regions.front()->MaxPressure.getOccupancy(MF);
455451
LLVM_DEBUG(dbgs() << "Trying to improve occupancy, target = " << TargetOcc
456452
<< ", current = " << Occ << '\n');
457453

@@ -460,7 +456,7 @@ unsigned GCNIterativeScheduler::tryMaximizeOccupancy(unsigned TargetOcc) {
460456
// Always build the DAG to add mutations
461457
BuildDAG DAG(*R, *this);
462458

463-
if (R->MaxPressure.getOccupancy(ST, DynamicVGPRBlockSize) >= NewOcc)
459+
if (R->MaxPressure.getOccupancy(MF) >= NewOcc)
464460
continue;
465461

466462
LLVM_DEBUG(printRegion(dbgs(), R->Begin, R->End, LIS, 3);
@@ -471,7 +467,7 @@ unsigned GCNIterativeScheduler::tryMaximizeOccupancy(unsigned TargetOcc) {
471467
LLVM_DEBUG(dbgs() << "Occupancy improvement attempt:\n";
472468
printSchedRP(dbgs(), R->MaxPressure, MaxRP));
473469

474-
NewOcc = std::min(NewOcc, MaxRP.getOccupancy(ST, DynamicVGPRBlockSize));
470+
NewOcc = std::min(NewOcc, MaxRP.getOccupancy(MF));
475471
if (NewOcc <= Occ)
476472
break;
477473

@@ -488,15 +484,12 @@ unsigned GCNIterativeScheduler::tryMaximizeOccupancy(unsigned TargetOcc) {
488484
}
489485

490486
void GCNIterativeScheduler::scheduleLegacyMaxOccupancy(
491-
bool TryMaximizeOccupancy) {
492-
const auto &ST = MF.getSubtarget<GCNSubtarget>();
487+
bool TryMaximizeOccupancy) {
493488
SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
494489
auto TgtOcc = MFI->getMinAllowedOccupancy();
495-
unsigned DynamicVGPRBlockSize = MFI->getDynamicVGPRBlockSize();
496490

497491
sortRegionsByPressure(TgtOcc);
498-
auto Occ =
499-
Regions.front()->MaxPressure.getOccupancy(ST, DynamicVGPRBlockSize);
492+
auto Occ = Regions.front()->MaxPressure.getOccupancy(MF);
500493

501494
bool IsReentry = false;
502495
if (TryMaximizeOccupancy && Occ < TgtOcc) {
@@ -527,21 +520,19 @@ void GCNIterativeScheduler::scheduleLegacyMaxOccupancy(
527520
const auto RP = getRegionPressure(*R);
528521
LLVM_DEBUG(printSchedRP(dbgs(), R->MaxPressure, RP));
529522

530-
if (RP.getOccupancy(ST, DynamicVGPRBlockSize) < TgtOcc) {
523+
if (RP.getOccupancy(MF) < TgtOcc) {
531524
LLVM_DEBUG(dbgs() << "Didn't fit into target occupancy O" << TgtOcc);
532-
if (R->BestSchedule.get() && R->BestSchedule->MaxPressure.getOccupancy(
533-
ST, DynamicVGPRBlockSize) >= TgtOcc) {
525+
if (R->BestSchedule.get() &&
526+
R->BestSchedule->MaxPressure.getOccupancy(MF) >= TgtOcc) {
534527
LLVM_DEBUG(dbgs() << ", scheduling minimal register\n");
535528
scheduleBest(*R);
536529
} else {
537530
LLVM_DEBUG(dbgs() << ", restoring\n");
538531
Ovr.restoreOrder();
539-
assert(R->MaxPressure.getOccupancy(ST, DynamicVGPRBlockSize) >=
540-
TgtOcc);
532+
assert(R->MaxPressure.getOccupancy(MF) >= TgtOcc);
541533
}
542534
}
543-
FinalOccupancy =
544-
std::min(FinalOccupancy, RP.getOccupancy(ST, DynamicVGPRBlockSize));
535+
FinalOccupancy = std::min(FinalOccupancy, RP.getOccupancy(MF));
545536
}
546537
}
547538
MFI->limitOccupancy(FinalOccupancy);
@@ -582,16 +573,12 @@ void GCNIterativeScheduler::scheduleMinReg(bool force) {
582573
///////////////////////////////////////////////////////////////////////////////
583574
// ILP scheduler port
584575

585-
void GCNIterativeScheduler::scheduleILP(
586-
bool TryMaximizeOccupancy) {
587-
const auto &ST = MF.getSubtarget<GCNSubtarget>();
576+
void GCNIterativeScheduler::scheduleILP(bool TryMaximizeOccupancy) {
588577
SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
589578
auto TgtOcc = MFI->getMinAllowedOccupancy();
590-
unsigned DynamicVGPRBlockSize = MFI->getDynamicVGPRBlockSize();
591579

592580
sortRegionsByPressure(TgtOcc);
593-
auto Occ =
594-
Regions.front()->MaxPressure.getOccupancy(ST, DynamicVGPRBlockSize);
581+
auto Occ = Regions.front()->MaxPressure.getOccupancy(MF);
595582

596583
bool IsReentry = false;
597584
if (TryMaximizeOccupancy && Occ < TgtOcc) {
@@ -612,18 +599,17 @@ void GCNIterativeScheduler::scheduleILP(
612599
const auto RP = getSchedulePressure(*R, ILPSchedule);
613600
LLVM_DEBUG(printSchedRP(dbgs(), R->MaxPressure, RP));
614601

615-
if (RP.getOccupancy(ST, DynamicVGPRBlockSize) < TgtOcc) {
602+
if (RP.getOccupancy(MF) < TgtOcc) {
616603
LLVM_DEBUG(dbgs() << "Didn't fit into target occupancy O" << TgtOcc);
617-
if (R->BestSchedule.get() && R->BestSchedule->MaxPressure.getOccupancy(
618-
ST, DynamicVGPRBlockSize) >= TgtOcc) {
604+
if (R->BestSchedule.get() &&
605+
R->BestSchedule->MaxPressure.getOccupancy(MF) >= TgtOcc) {
619606
LLVM_DEBUG(dbgs() << ", scheduling minimal register\n");
620607
scheduleBest(*R);
621608
}
622609
} else {
623610
scheduleRegion(*R, ILPSchedule, RP);
624611
LLVM_DEBUG(printSchedResult(dbgs(), R, RP));
625-
FinalOccupancy =
626-
std::min(FinalOccupancy, RP.getOccupancy(ST, DynamicVGPRBlockSize));
612+
FinalOccupancy = std::min(FinalOccupancy, RP.getOccupancy(MF));
627613
}
628614
}
629615
MFI->limitOccupancy(FinalOccupancy);

llvm/lib/Target/AMDGPU/GCNRegPressure.cpp

Lines changed: 93 additions & 63 deletions
Original file line numberDiff line numberDiff line change
@@ -99,20 +99,22 @@ void GCNRegPressure::inc(unsigned Reg,
9999
bool GCNRegPressure::less(const MachineFunction &MF, const GCNRegPressure &O,
100100
unsigned MaxOccupancy) const {
101101
const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
102+
unsigned ArchVGPRThreshold = ST.getMaxNumVectorRegs(MF.getFunction()).first;
102103
unsigned DynamicVGPRBlockSize =
103104
MF.getInfo<SIMachineFunctionInfo>()->getDynamicVGPRBlockSize();
104105

105106
const auto SGPROcc = std::min(MaxOccupancy,
106107
ST.getOccupancyWithNumSGPRs(getSGPRNum()));
107108
const auto VGPROcc = std::min(
108-
MaxOccupancy, ST.getOccupancyWithNumVGPRs(getVGPRNum(ST.hasGFX90AInsts()),
109-
DynamicVGPRBlockSize));
109+
MaxOccupancy, ST.getOccupancyWithNumVGPRs(
110+
getVGPRNum(ST.hasGFX90AInsts(), ArchVGPRThreshold),
111+
DynamicVGPRBlockSize));
110112
const auto OtherSGPROcc = std::min(MaxOccupancy,
111113
ST.getOccupancyWithNumSGPRs(O.getSGPRNum()));
112-
const auto OtherVGPROcc =
113-
std::min(MaxOccupancy,
114-
ST.getOccupancyWithNumVGPRs(O.getVGPRNum(ST.hasGFX90AInsts()),
115-
DynamicVGPRBlockSize));
114+
const auto OtherVGPROcc = std::min(
115+
MaxOccupancy, ST.getOccupancyWithNumVGPRs(
116+
O.getVGPRNum(ST.hasGFX90AInsts(), ArchVGPRThreshold),
117+
DynamicVGPRBlockSize));
116118

117119
const auto Occ = std::min(SGPROcc, VGPROcc);
118120
const auto OtherOcc = std::min(OtherSGPROcc, OtherVGPROcc);
@@ -135,35 +137,39 @@ bool GCNRegPressure::less(const MachineFunction &MF, const GCNRegPressure &O,
135137
unsigned OtherVGPRForSGPRSpills =
136138
(OtherExcessSGPR + (WaveSize - 1)) / WaveSize;
137139

138-
unsigned MaxArchVGPRs = ST.getAddressableNumArchVGPRs();
139-
140140
// Unified excess pressure conditions, accounting for VGPRs used for SGPR
141141
// spills
142-
unsigned ExcessVGPR =
143-
std::max(static_cast<int>(getVGPRNum(ST.hasGFX90AInsts()) +
144-
VGPRForSGPRSpills - MaxVGPRs),
145-
0);
146-
unsigned OtherExcessVGPR =
147-
std::max(static_cast<int>(O.getVGPRNum(ST.hasGFX90AInsts()) +
148-
OtherVGPRForSGPRSpills - MaxVGPRs),
149-
0);
142+
unsigned ExcessVGPR = std::max(
143+
static_cast<int>(getVGPRNum(ST.hasGFX90AInsts(), ArchVGPRThreshold) +
144+
VGPRForSGPRSpills - MaxVGPRs),
145+
0);
146+
unsigned OtherExcessVGPR = std::max(
147+
static_cast<int>(O.getVGPRNum(ST.hasGFX90AInsts(), ArchVGPRThreshold) +
148+
OtherVGPRForSGPRSpills - MaxVGPRs),
149+
0);
150150
// Arch VGPR excess pressure conditions, accounting for VGPRs used for SGPR
151151
// spills
152-
unsigned ExcessArchVGPR = std::max(
153-
static_cast<int>(getVGPRNum(false) + VGPRForSGPRSpills - MaxArchVGPRs),
154-
0);
152+
unsigned AddressableArchVGPRs = ST.getAddressableNumArchVGPRs();
153+
unsigned ExcessArchVGPR =
154+
std::max(static_cast<int>(getVGPRNum(false, ArchVGPRThreshold) +
155+
VGPRForSGPRSpills - AddressableArchVGPRs),
156+
0);
155157
unsigned OtherExcessArchVGPR =
156-
std::max(static_cast<int>(O.getVGPRNum(false) + OtherVGPRForSGPRSpills -
157-
MaxArchVGPRs),
158+
std::max(static_cast<int>(O.getVGPRNum(false, ArchVGPRThreshold) +
159+
OtherVGPRForSGPRSpills - AddressableArchVGPRs),
158160
0);
159161
// AGPR excess pressure conditions
160-
unsigned ExcessAGPR = std::max(
161-
static_cast<int>(ST.hasGFX90AInsts() ? (getAGPRNum() - MaxArchVGPRs)
162-
: (getAGPRNum() - MaxVGPRs)),
163-
0);
162+
unsigned ExcessAGPR =
163+
std::max(static_cast<int>(
164+
ST.hasGFX90AInsts()
165+
? (getAGPRNum(ArchVGPRThreshold) - AddressableArchVGPRs)
166+
: (getAGPRNum(ArchVGPRThreshold) - MaxVGPRs)),
167+
0);
164168
unsigned OtherExcessAGPR = std::max(
165-
static_cast<int>(ST.hasGFX90AInsts() ? (O.getAGPRNum() - MaxArchVGPRs)
166-
: (O.getAGPRNum() - MaxVGPRs)),
169+
static_cast<int>(
170+
ST.hasGFX90AInsts()
171+
? (O.getAGPRNum(ArchVGPRThreshold) - AddressableArchVGPRs)
172+
: (O.getAGPRNum(ArchVGPRThreshold) - MaxVGPRs)),
167173
0);
168174

169175
bool ExcessRP = ExcessSGPR || ExcessVGPR || ExcessArchVGPR || ExcessAGPR;
@@ -184,14 +190,21 @@ bool GCNRegPressure::less(const MachineFunction &MF, const GCNRegPressure &O,
184190
return VGPRDiff > 0;
185191
if (SGPRDiff != 0) {
186192
unsigned PureExcessVGPR =
187-
std::max(static_cast<int>(getVGPRNum(ST.hasGFX90AInsts()) - MaxVGPRs),
193+
std::max(static_cast<int>(
194+
getVGPRNum(ST.hasGFX90AInsts(), ArchVGPRThreshold) -
195+
MaxVGPRs),
188196
0) +
189-
std::max(static_cast<int>(getVGPRNum(false) - MaxArchVGPRs), 0);
197+
std::max(static_cast<int>(getVGPRNum(false, ArchVGPRThreshold) -
198+
AddressableArchVGPRs),
199+
0);
190200
unsigned OtherPureExcessVGPR =
191-
std::max(
192-
static_cast<int>(O.getVGPRNum(ST.hasGFX90AInsts()) - MaxVGPRs),
193-
0) +
194-
std::max(static_cast<int>(O.getVGPRNum(false) - MaxArchVGPRs), 0);
201+
std::max(static_cast<int>(
202+
O.getVGPRNum(ST.hasGFX90AInsts(), ArchVGPRThreshold) -
203+
MaxVGPRs),
204+
0) +
205+
std::max(static_cast<int>(O.getVGPRNum(false, ArchVGPRThreshold) -
206+
AddressableArchVGPRs),
207+
0);
195208

196209
// If we have a special case where there is a tie in excess VGPR, but one
197210
// of the pressures has VGPR usage from SGPR spills, prefer the pressure
@@ -221,38 +234,45 @@ bool GCNRegPressure::less(const MachineFunction &MF, const GCNRegPressure &O,
221234
if (SW != OtherSW)
222235
return SW < OtherSW;
223236
} else {
224-
auto VW = getVGPRTuplesWeight();
225-
auto OtherVW = O.getVGPRTuplesWeight();
237+
auto VW = getVGPRTuplesWeight(ArchVGPRThreshold);
238+
auto OtherVW = O.getVGPRTuplesWeight(ArchVGPRThreshold);
226239
if (VW != OtherVW)
227240
return VW < OtherVW;
228241
}
229242
}
230243

231244
// Give final precedence to lower general RP.
232-
return SGPRImportant ? (getSGPRNum() < O.getSGPRNum()):
233-
(getVGPRNum(ST.hasGFX90AInsts()) <
234-
O.getVGPRNum(ST.hasGFX90AInsts()));
245+
return SGPRImportant ? (getSGPRNum() < O.getSGPRNum())
246+
: (getVGPRNum(ST.hasGFX90AInsts(), ArchVGPRThreshold) <
247+
O.getVGPRNum(ST.hasGFX90AInsts(), ArchVGPRThreshold));
235248
}
236249

237250
Printable llvm::print(const GCNRegPressure &RP, const GCNSubtarget *ST,
238-
unsigned DynamicVGPRBlockSize) {
239-
return Printable([&RP, ST, DynamicVGPRBlockSize](raw_ostream &OS) {
240-
OS << "VGPRs: " << RP.getArchVGPRNum() << ' '
241-
<< "AGPRs: " << RP.getAGPRNum();
242-
if (ST)
243-
OS << "(O"
244-
<< ST->getOccupancyWithNumVGPRs(RP.getVGPRNum(ST->hasGFX90AInsts()),
245-
DynamicVGPRBlockSize)
246-
<< ')';
247-
OS << ", SGPRs: " << RP.getSGPRNum();
248-
if (ST)
249-
OS << "(O" << ST->getOccupancyWithNumSGPRs(RP.getSGPRNum()) << ')';
250-
OS << ", LVGPR WT: " << RP.getVGPRTuplesWeight()
251-
<< ", LSGPR WT: " << RP.getSGPRTuplesWeight();
252-
if (ST)
253-
OS << " -> Occ: " << RP.getOccupancy(*ST, DynamicVGPRBlockSize);
254-
OS << '\n';
255-
});
251+
unsigned DynamicVGPRBlockSize,
252+
const MachineFunction *MF) {
253+
unsigned ArchVGPRThreshold = std::numeric_limits<unsigned int>::max();
254+
if (ST && MF)
255+
ArchVGPRThreshold = ST->getMaxNumVectorRegs(MF->getFunction()).first;
256+
257+
return Printable(
258+
[&RP, ST, DynamicVGPRBlockSize, ArchVGPRThreshold, MF](raw_ostream &OS) {
259+
OS << "VGPRs: " << RP.getArchVGPRNum(ArchVGPRThreshold) << ' '
260+
<< "AGPRs: " << RP.getAGPRNum(ArchVGPRThreshold);
261+
if (ST)
262+
OS << "(O"
263+
<< ST->getOccupancyWithNumVGPRs(
264+
RP.getVGPRNum(ST->hasGFX90AInsts(), ArchVGPRThreshold),
265+
DynamicVGPRBlockSize)
266+
<< ')';
267+
OS << ", SGPRs: " << RP.getSGPRNum();
268+
if (ST)
269+
OS << "(O" << ST->getOccupancyWithNumSGPRs(RP.getSGPRNum()) << ')';
270+
OS << ", LVGPR WT: " << RP.getVGPRTuplesWeight(ArchVGPRThreshold)
271+
<< ", LSGPR WT: " << RP.getSGPRTuplesWeight();
272+
if (ST)
273+
OS << " -> Occ: " << RP.getOccupancy(*MF);
274+
OS << '\n';
275+
});
256276
}
257277

258278
static LaneBitmask getDefRegMask(const MachineOperand &MO,
@@ -398,8 +418,9 @@ void GCNRPTarget::setRegLimits(unsigned NumSGPRs, unsigned NumVGPRs,
398418
const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
399419
unsigned DynamicVGPRBlockSize =
400420
MF.getInfo<SIMachineFunctionInfo>()->getDynamicVGPRBlockSize();
421+
AddressableNumArchVGPRs = ST.getAddressableNumArchVGPRs();
401422
MaxSGPRs = std::min(ST.getAddressableNumSGPRs(), NumSGPRs);
402-
MaxVGPRs = std::min(ST.getAddressableNumArchVGPRs(), NumVGPRs);
423+
MaxVGPRs = std::min(AddressableNumArchVGPRs, NumVGPRs);
403424
MaxUnifiedVGPRs =
404425
ST.hasGFX90AInsts()
405426
? std::min(ST.getAddressableNumVGPRs(DynamicVGPRBlockSize), NumVGPRs)
@@ -414,15 +435,21 @@ bool GCNRPTarget::isSaveBeneficial(Register Reg,
414435

415436
if (SRI->isSGPRClass(RC))
416437
return RP.getSGPRNum() > MaxSGPRs;
417-
unsigned NumVGPRs =
418-
SRI->isAGPRClass(RC) ? RP.getAGPRNum() : RP.getArchVGPRNum();
438+
439+
bool ShouldUseAGPR =
440+
SRI->isAGPRClass(RC) ||
441+
(SRI->isVectorSuperClass(RC) &&
442+
RP.getArchVGPRNum(AddressableNumArchVGPRs) >= AddressableNumArchVGPRs);
443+
unsigned NumVGPRs = ShouldUseAGPR
444+
? RP.getAGPRNum(AddressableNumArchVGPRs)
445+
: RP.getArchVGPRNum(AddressableNumArchVGPRs);
419446
return isVGPRBankSaveBeneficial(NumVGPRs);
420447
}
421448

422449
bool GCNRPTarget::satisfied() const {
423450
if (RP.getSGPRNum() > MaxSGPRs)
424451
return false;
425-
if (RP.getVGPRNum(false) > MaxVGPRs &&
452+
if (RP.getVGPRNum(false, AddressableNumArchVGPRs) > MaxVGPRs &&
426453
(!CombineVGPRSavings || !satisifiesVGPRBanksTarget()))
427454
return false;
428455
return satisfiesUnifiedTarget();
@@ -876,10 +903,13 @@ bool GCNRegPressurePrinter::runOnMachineFunction(MachineFunction &MF) {
876903

877904
OS << "---\nname: " << MF.getName() << "\nbody: |\n";
878905

879-
auto printRP = [](const GCNRegPressure &RP) {
880-
return Printable([&RP](raw_ostream &OS) {
906+
auto printRP = [&MF](const GCNRegPressure &RP) {
907+
return Printable([&RP, &MF](raw_ostream &OS) {
881908
OS << format(PFX " %-5d", RP.getSGPRNum())
882-
<< format(" %-5d", RP.getVGPRNum(false));
909+
<< format(" %-5d", RP.getVGPRNum(false, MF.getSubtarget<GCNSubtarget>()
910+
.getMaxNumVectorRegs(
911+
MF.getFunction())
912+
.first));
883913
});
884914
};
885915

0 commit comments

Comments
 (0)