Skip to content

Commit e3dd5ac

Browse files
committed
[AMDGPU] Constrain AV->VReg if we do not exceed RP thresholds
Change-Id: I17cb012504946fa9dca88b32548f922e2ce4b7a9
1 parent c75b24e commit e3dd5ac

File tree

5 files changed

+329
-33
lines changed

5 files changed

+329
-33
lines changed

llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp

Lines changed: 88 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -528,6 +528,7 @@ GCNMaxOccupancySchedStrategy::GCNMaxOccupancySchedStrategy(
528528
const MachineSchedContext *C, bool IsLegacyScheduler)
529529
: GCNSchedStrategy(C) {
530530
SchedStages.push_back(GCNSchedStageID::OccInitialSchedule);
531+
SchedStages.push_back(GCNSchedStageID::AVGPRRewriteSchedule);
531532
SchedStages.push_back(GCNSchedStageID::UnclusteredHighRPReschedule);
532533
SchedStages.push_back(GCNSchedStageID::ClusteredLowOccupancyReschedule);
533534
SchedStages.push_back(GCNSchedStageID::PreRARematerialize);
@@ -778,6 +779,8 @@ GCNScheduleDAGMILive::createSchedStage(GCNSchedStageID SchedStageID) {
778779
switch (SchedStageID) {
779780
case GCNSchedStageID::OccInitialSchedule:
780781
return std::make_unique<OccInitialScheduleStage>(SchedStageID, *this);
782+
case GCNSchedStageID::AVGPRRewriteSchedule:
783+
return std::make_unique<AVGPRRewriteScheduleStage>(SchedStageID, *this);
781784
case GCNSchedStageID::UnclusteredHighRPReschedule:
782785
return std::make_unique<UnclusteredHighRPStage>(SchedStageID, *this);
783786
case GCNSchedStageID::ClusteredLowOccupancyReschedule:
@@ -941,10 +944,14 @@ void GCNScheduleDAGMILive::finalizeSchedule() {
941944
Pressure.resize(Regions.size());
942945
RegionsWithHighRP.resize(Regions.size());
943946
RegionsWithExcessRP.resize(Regions.size());
947+
RegionsWithAVRegs.resize(Regions.size());
948+
RegionsWithExcessVGPRRP.resize(Regions.size());
944949
RegionsWithMinOcc.resize(Regions.size());
945950
RegionsWithIGLPInstrs.resize(Regions.size());
946951
RegionsWithHighRP.reset();
947952
RegionsWithExcessRP.reset();
953+
RegionsWithAVRegs.reset();
954+
RegionsWithExcessVGPRRP.reset();
948955
RegionsWithMinOcc.reset();
949956
RegionsWithIGLPInstrs.reset();
950957

@@ -1003,6 +1010,9 @@ raw_ostream &llvm::operator<<(raw_ostream &OS, const GCNSchedStageID &StageID) {
10031010
case GCNSchedStageID::OccInitialSchedule:
10041011
OS << "Max Occupancy Initial Schedule";
10051012
break;
1013+
case GCNSchedStageID::AVGPRRewriteSchedule:
1014+
OS << "AVGPR Rewriting Reschedule";
1015+
break;
10061016
case GCNSchedStageID::UnclusteredHighRPReschedule:
10071017
OS << "Unclustered High Register Pressure Reschedule";
10081018
break;
@@ -1036,6 +1046,78 @@ bool GCNSchedStage::initGCNSchedStage() {
10361046
return true;
10371047
}
10381048

1049+
bool AVGPRRewriteScheduleStage::reconstrainRegClass(
1050+
Register Reg, const TargetRegisterClass *NewRC) const {
1051+
const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo();
1052+
const TargetRegisterClass *OldRC = DAG.MRI.getRegClass(Reg);
1053+
const TargetRegisterInfo *TRI = DAG.MRI.getTargetRegisterInfo();
1054+
const TargetRegisterClass *ConstrainRC = NewRC;
1055+
const SIRegisterInfo *SRI = MF.getSubtarget<GCNSubtarget>().getRegisterInfo();
1056+
1057+
// Stop early if there is nothing to do.
1058+
if (!NewRC || NewRC == OldRC)
1059+
return false;
1060+
1061+
// Accumulate constraints from all uses.
1062+
for (MachineOperand &MO : DAG.MRI.reg_nodbg_operands(Reg)) {
1063+
// Apply the effect of the given operand to NewRC.
1064+
MachineInstr *MI = MO.getParent();
1065+
unsigned OpNo = &MO - &MI->getOperand(0);
1066+
ConstrainRC = MI->getRegClassConstraintEffect(OpNo, ConstrainRC, TII, TRI);
1067+
if (!ConstrainRC)
1068+
return false;
1069+
if (MI->isCopy()) {
1070+
MachineOperand &OtherOp = MI->getOperand(1 - OpNo);
1071+
if (!OtherOp.isReg())
1072+
continue;
1073+
1074+
if (!SRI->isVGPR(DAG.MRI, OtherOp.getReg()))
1075+
return false;
1076+
}
1077+
}
1078+
DAG.MRI.setRegClass(Reg, ConstrainRC);
1079+
return true;
1080+
}
1081+
1082+
bool AVGPRRewriteScheduleStage::initGCNSchedStage() {
1083+
const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
1084+
1085+
// The main benefit of AVReg usage is that the register can be assigned to
1086+
// either VGPR or AGPR. However, for the unified RF case, we should only be
1087+
// using AGPR if strictly necessary. That is, if the required number of VGPRs
1088+
// exceeds the addressable limit. Otherwise, we should be stricly using VGPRs
1089+
// to minimize cross RC copies. Thus, if we are underc this limit, we should
1090+
// constrain AVReg- > VReg.
1091+
// TODO: AVReg constraining for non unified case.
1092+
if (!ST.hasGFX90AInsts() || DAG.RegionsWithAVRegs.empty() ||
1093+
DAG.RegionsWithExcessVGPRRP.any())
1094+
return false;
1095+
1096+
const SIRegisterInfo *SRI = ST.getRegisterInfo();
1097+
1098+
for (unsigned I = 0, E = DAG.MRI.getNumVirtRegs(); I != E; ++I) {
1099+
Register Reg = Register::index2VirtReg(I);
1100+
if (!DAG.LIS->hasInterval(Reg))
1101+
continue;
1102+
const TargetRegisterClass *RC = DAG.MRI.getRegClass(Reg);
1103+
if (!SRI->isVectorSuperClass(RC))
1104+
continue;
1105+
1106+
reconstrainRegClass(Reg, SRI->getEquivalentVGPRClass(RC));
1107+
}
1108+
1109+
// TODO -- opposite case, inflate to AV when we have AVGPR + VGPR RP greater
1110+
// than addressable limit.
1111+
1112+
// TODO - after we separate out AVGPR pressure from the e.g. getVGPRNum
1113+
// pressure queries, we may need to update the cached RP.
1114+
1115+
// TODO - there is a benefit to rescheduling with the constraints, as the
1116+
// generic trackers do not track AVGPR pressure. But we should teach the
1117+
// default trackers about AVGPR rather than doing rescheduling here.
1118+
return false;
1119+
}
1120+
10391121
bool UnclusteredHighRPStage::initGCNSchedStage() {
10401122
if (DisableUnclusterHighRP)
10411123
return false;
@@ -1278,6 +1360,9 @@ void GCNSchedStage::checkScheduling() {
12781360
LLVM_DEBUG(dbgs() << "Pressure after scheduling: " << print(PressureAfter));
12791361
LLVM_DEBUG(dbgs() << "Region: " << RegionIdx << ".\n");
12801362

1363+
if (PressureAfter.getAVGPRNum())
1364+
DAG.RegionsWithAVRegs[RegionIdx] = true;
1365+
12811366
unsigned DynamicVGPRBlockSize = DAG.MFI.getDynamicVGPRBlockSize();
12821367

12831368
if (PressureAfter.getSGPRNum() <= S.SGPRCriticalLimit &&
@@ -1331,6 +1416,9 @@ void GCNSchedStage::checkScheduling() {
13311416
unsigned MaxArchVGPRs = std::min(MaxVGPRs, ST.getAddressableNumArchVGPRs());
13321417
unsigned MaxSGPRs = ST.getMaxNumSGPRs(MF);
13331418

1419+
if (PressureAfter.getArchVGPRNum() > ST.getAddressableNumArchVGPRs())
1420+
DAG.RegionsWithExcessVGPRRP[RegionIdx] = true;
1421+
13341422
if (PressureAfter.getVGPRNum(ST.hasGFX90AInsts()) > MaxVGPRs ||
13351423
PressureAfter.getArchVGPRNum() > MaxArchVGPRs ||
13361424
PressureAfter.getAGPRNum() > MaxArchVGPRs ||

llvm/lib/Target/AMDGPU/GCNSchedStrategy.h

Lines changed: 25 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -28,11 +28,12 @@ class GCNSchedStage;
2828

2929
enum class GCNSchedStageID : unsigned {
3030
OccInitialSchedule = 0,
31-
UnclusteredHighRPReschedule = 1,
32-
ClusteredLowOccupancyReschedule = 2,
33-
PreRARematerialize = 3,
34-
ILPInitialSchedule = 4,
35-
MemoryClauseInitialSchedule = 5
31+
AVGPRRewriteSchedule = 1,
32+
UnclusteredHighRPReschedule = 2,
33+
ClusteredLowOccupancyReschedule = 3,
34+
PreRARematerialize = 4,
35+
ILPInitialSchedule = 5,
36+
MemoryClauseInitialSchedule = 6
3637
};
3738

3839
#ifndef NDEBUG
@@ -224,6 +225,7 @@ using RegionBoundaries =
224225
class GCNScheduleDAGMILive final : public ScheduleDAGMILive {
225226
friend class GCNSchedStage;
226227
friend class OccInitialScheduleStage;
228+
friend class AVGPRRewriteScheduleStage;
227229
friend class UnclusteredHighRPStage;
228230
friend class ClusteredLowOccStage;
229231
friend class PreRARematStage;
@@ -250,9 +252,15 @@ class GCNScheduleDAGMILive final : public ScheduleDAGMILive {
250252
// limit. Register pressure in these regions usually will result in spilling.
251253
BitVector RegionsWithExcessRP;
252254

255+
// Regions that have VGPR RP which exceed the addressable limit.
256+
BitVector RegionsWithExcessVGPRRP;
257+
253258
// Regions that has the same occupancy as the latest MinOccupancy
254259
BitVector RegionsWithMinOcc;
255260

261+
// Regions which use the AV RC.
262+
BitVector RegionsWithAVRegs;
263+
256264
// Regions that have IGLP instructions (SCHED_GROUP_BARRIER or IGLP_OPT).
257265
BitVector RegionsWithIGLPInstrs;
258266

@@ -401,6 +409,18 @@ class OccInitialScheduleStage : public GCNSchedStage {
401409
: GCNSchedStage(StageID, DAG) {}
402410
};
403411

412+
class AVGPRRewriteScheduleStage : public GCNSchedStage {
413+
private:
414+
bool reconstrainRegClass(Register Reg,
415+
const TargetRegisterClass *NewRC) const;
416+
417+
public:
418+
bool initGCNSchedStage() override;
419+
420+
AVGPRRewriteScheduleStage(GCNSchedStageID StageID, GCNScheduleDAGMILive &DAG)
421+
: GCNSchedStage(StageID, DAG) {}
422+
};
423+
404424
class UnclusteredHighRPStage : public GCNSchedStage {
405425
private:
406426
// Save the initial occupancy before starting this stage.

0 commit comments

Comments
 (0)