@@ -528,6 +528,7 @@ GCNMaxOccupancySchedStrategy::GCNMaxOccupancySchedStrategy(
528
528
const MachineSchedContext *C, bool IsLegacyScheduler)
529
529
: GCNSchedStrategy(C) {
530
530
SchedStages.push_back (GCNSchedStageID::OccInitialSchedule);
531
+ SchedStages.push_back (GCNSchedStageID::AVGPRRewriteSchedule);
531
532
SchedStages.push_back (GCNSchedStageID::UnclusteredHighRPReschedule);
532
533
SchedStages.push_back (GCNSchedStageID::ClusteredLowOccupancyReschedule);
533
534
SchedStages.push_back (GCNSchedStageID::PreRARematerialize);
@@ -778,6 +779,8 @@ GCNScheduleDAGMILive::createSchedStage(GCNSchedStageID SchedStageID) {
778
779
switch (SchedStageID) {
779
780
case GCNSchedStageID::OccInitialSchedule:
780
781
return std::make_unique<OccInitialScheduleStage>(SchedStageID, *this );
782
+ case GCNSchedStageID::AVGPRRewriteSchedule:
783
+ return std::make_unique<AVGPRRewriteScheduleStage>(SchedStageID, *this );
781
784
case GCNSchedStageID::UnclusteredHighRPReschedule:
782
785
return std::make_unique<UnclusteredHighRPStage>(SchedStageID, *this );
783
786
case GCNSchedStageID::ClusteredLowOccupancyReschedule:
@@ -941,10 +944,14 @@ void GCNScheduleDAGMILive::finalizeSchedule() {
941
944
Pressure.resize (Regions.size ());
942
945
RegionsWithHighRP.resize (Regions.size ());
943
946
RegionsWithExcessRP.resize (Regions.size ());
947
+ RegionsWithAVRegs.resize (Regions.size ());
948
+ RegionsWithExcessVGPRRP.resize (Regions.size ());
944
949
RegionsWithMinOcc.resize (Regions.size ());
945
950
RegionsWithIGLPInstrs.resize (Regions.size ());
946
951
RegionsWithHighRP.reset ();
947
952
RegionsWithExcessRP.reset ();
953
+ RegionsWithAVRegs.reset ();
954
+ RegionsWithExcessVGPRRP.reset ();
948
955
RegionsWithMinOcc.reset ();
949
956
RegionsWithIGLPInstrs.reset ();
950
957
@@ -1003,6 +1010,9 @@ raw_ostream &llvm::operator<<(raw_ostream &OS, const GCNSchedStageID &StageID) {
1003
1010
case GCNSchedStageID::OccInitialSchedule:
1004
1011
OS << " Max Occupancy Initial Schedule" ;
1005
1012
break ;
1013
+ case GCNSchedStageID::AVGPRRewriteSchedule:
1014
+ OS << " AVGPR Rewriting Reschedule" ;
1015
+ break ;
1006
1016
case GCNSchedStageID::UnclusteredHighRPReschedule:
1007
1017
OS << " Unclustered High Register Pressure Reschedule" ;
1008
1018
break ;
@@ -1036,6 +1046,78 @@ bool GCNSchedStage::initGCNSchedStage() {
1036
1046
return true ;
1037
1047
}
1038
1048
1049
+ bool AVGPRRewriteScheduleStage::reconstrainRegClass (
1050
+ Register Reg, const TargetRegisterClass *NewRC) const {
1051
+ const TargetInstrInfo *TII = MF.getSubtarget ().getInstrInfo ();
1052
+ const TargetRegisterClass *OldRC = DAG.MRI .getRegClass (Reg);
1053
+ const TargetRegisterInfo *TRI = DAG.MRI .getTargetRegisterInfo ();
1054
+ const TargetRegisterClass *ConstrainRC = NewRC;
1055
+ const SIRegisterInfo *SRI = MF.getSubtarget <GCNSubtarget>().getRegisterInfo ();
1056
+
1057
+ // Stop early if there is nothing to do.
1058
+ if (!NewRC || NewRC == OldRC)
1059
+ return false ;
1060
+
1061
+ // Accumulate constraints from all uses.
1062
+ for (MachineOperand &MO : DAG.MRI .reg_nodbg_operands (Reg)) {
1063
+ // Apply the effect of the given operand to NewRC.
1064
+ MachineInstr *MI = MO.getParent ();
1065
+ unsigned OpNo = &MO - &MI->getOperand (0 );
1066
+ ConstrainRC = MI->getRegClassConstraintEffect (OpNo, ConstrainRC, TII, TRI);
1067
+ if (!ConstrainRC)
1068
+ return false ;
1069
+ if (MI->isCopy ()) {
1070
+ MachineOperand &OtherOp = MI->getOperand (1 - OpNo);
1071
+ if (!OtherOp.isReg ())
1072
+ continue ;
1073
+
1074
+ if (!SRI->isVGPR (DAG.MRI , OtherOp.getReg ()))
1075
+ return false ;
1076
+ }
1077
+ }
1078
+ DAG.MRI .setRegClass (Reg, ConstrainRC);
1079
+ return true ;
1080
+ }
1081
+
1082
+ bool AVGPRRewriteScheduleStage::initGCNSchedStage () {
1083
+ const GCNSubtarget &ST = MF.getSubtarget <GCNSubtarget>();
1084
+
1085
+ // The main benefit of AVReg usage is that the register can be assigned to
1086
+ // either VGPR or AGPR. However, for the unified RF case, we should only be
1087
+ // using AGPR if strictly necessary. That is, if the required number of VGPRs
1088
+ // exceeds the addressable limit. Otherwise, we should be stricly using VGPRs
1089
+ // to minimize cross RC copies. Thus, if we are underc this limit, we should
1090
+ // constrain AVReg- > VReg.
1091
+ // TODO: AVReg constraining for non unified case.
1092
+ if (!ST.hasGFX90AInsts () || DAG.RegionsWithAVRegs .empty () ||
1093
+ DAG.RegionsWithExcessVGPRRP .any ())
1094
+ return false ;
1095
+
1096
+ const SIRegisterInfo *SRI = ST.getRegisterInfo ();
1097
+
1098
+ for (unsigned I = 0 , E = DAG.MRI .getNumVirtRegs (); I != E; ++I) {
1099
+ Register Reg = Register::index2VirtReg (I);
1100
+ if (!DAG.LIS ->hasInterval (Reg))
1101
+ continue ;
1102
+ const TargetRegisterClass *RC = DAG.MRI .getRegClass (Reg);
1103
+ if (!SRI->isVectorSuperClass (RC))
1104
+ continue ;
1105
+
1106
+ reconstrainRegClass (Reg, SRI->getEquivalentVGPRClass (RC));
1107
+ }
1108
+
1109
+ // TODO -- opposite case, inflate to AV when we have AVGPR + VGPR RP greater
1110
+ // than addressable limit.
1111
+
1112
+ // TODO - after we separate out AVGPR pressure from the e.g. getVGPRNum
1113
+ // pressure queries, we may need to update the cached RP.
1114
+
1115
+ // TODO - there is a benefit to rescheduling with the constraints, as the
1116
+ // generic trackers do not track AVGPR pressure. But we should teach the
1117
+ // default trackers about AVGPR rather than doing rescheduling here.
1118
+ return false ;
1119
+ }
1120
+
1039
1121
bool UnclusteredHighRPStage::initGCNSchedStage () {
1040
1122
if (DisableUnclusterHighRP)
1041
1123
return false ;
@@ -1278,6 +1360,9 @@ void GCNSchedStage::checkScheduling() {
1278
1360
LLVM_DEBUG (dbgs () << " Pressure after scheduling: " << print (PressureAfter));
1279
1361
LLVM_DEBUG (dbgs () << " Region: " << RegionIdx << " .\n " );
1280
1362
1363
+ if (PressureAfter.getAVGPRNum ())
1364
+ DAG.RegionsWithAVRegs [RegionIdx] = true ;
1365
+
1281
1366
unsigned DynamicVGPRBlockSize = DAG.MFI .getDynamicVGPRBlockSize ();
1282
1367
1283
1368
if (PressureAfter.getSGPRNum () <= S.SGPRCriticalLimit &&
@@ -1331,6 +1416,9 @@ void GCNSchedStage::checkScheduling() {
1331
1416
unsigned MaxArchVGPRs = std::min (MaxVGPRs, ST.getAddressableNumArchVGPRs ());
1332
1417
unsigned MaxSGPRs = ST.getMaxNumSGPRs (MF);
1333
1418
1419
+ if (PressureAfter.getArchVGPRNum () > ST.getAddressableNumArchVGPRs ())
1420
+ DAG.RegionsWithExcessVGPRRP [RegionIdx] = true ;
1421
+
1334
1422
if (PressureAfter.getVGPRNum (ST.hasGFX90AInsts ()) > MaxVGPRs ||
1335
1423
PressureAfter.getArchVGPRNum () > MaxArchVGPRs ||
1336
1424
PressureAfter.getAGPRNum () > MaxArchVGPRs ||
0 commit comments