-
Notifications
You must be signed in to change notification settings - Fork 14.7k
[MachineScheduler] Make cluster check more efficient #150884
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
@llvm/pr-subscribers-backend-powerpc @llvm/pr-subscribers-backend-amdgpu Author: Ruiling, Song (ruiling) ChangesFull diff: https://github.com/llvm/llvm-project/pull/150884.diff 5 Files Affected:
diff --git a/llvm/include/llvm/CodeGen/MachineScheduler.h b/llvm/include/llvm/CodeGen/MachineScheduler.h
index efda7eb8ffc8d..5a2aee2fa7643 100644
--- a/llvm/include/llvm/CodeGen/MachineScheduler.h
+++ b/llvm/include/llvm/CodeGen/MachineScheduler.h
@@ -1303,8 +1303,8 @@ class LLVM_ABI GenericScheduler : public GenericSchedulerBase {
SchedBoundary Top;
SchedBoundary Bot;
- ClusterInfo *TopCluster;
- ClusterInfo *BotCluster;
+ unsigned TopClusterID;
+ unsigned BotClusterID;
/// Candidate last picked from Top boundary.
SchedCandidate TopCand;
@@ -1346,8 +1346,8 @@ class LLVM_ABI PostGenericScheduler : public GenericSchedulerBase {
/// Candidate last picked from Bot boundary.
SchedCandidate BotCand;
- ClusterInfo *TopCluster;
- ClusterInfo *BotCluster;
+ unsigned TopClusterID;
+ unsigned BotClusterID;
public:
PostGenericScheduler(const MachineSchedContext *C)
diff --git a/llvm/include/llvm/CodeGen/ScheduleDAG.h b/llvm/include/llvm/CodeGen/ScheduleDAG.h
index 3a0a31b1930b0..122b7be96b46a 100644
--- a/llvm/include/llvm/CodeGen/ScheduleDAG.h
+++ b/llvm/include/llvm/CodeGen/ScheduleDAG.h
@@ -240,6 +240,11 @@ class TargetRegisterInfo;
typedef SmallSet<SUnit *, 8> ClusterInfo;
constexpr unsigned InvalidClusterId = ~0u;
+ /// Return whether the input cluster ID's are the same and valid.
+ inline bool isTheSameCluster(unsigned A, unsigned B) {
+ return A != InvalidClusterId && A == B;
+ }
+
/// Scheduling unit. This is a node in the scheduling DAG.
class SUnit {
private:
diff --git a/llvm/lib/CodeGen/MachineScheduler.cpp b/llvm/lib/CodeGen/MachineScheduler.cpp
index 9d5c39ce7ae76..454eb4b32b338 100644
--- a/llvm/lib/CodeGen/MachineScheduler.cpp
+++ b/llvm/lib/CodeGen/MachineScheduler.cpp
@@ -3676,8 +3676,8 @@ void GenericScheduler::initialize(ScheduleDAGMI *dag) {
TopCand.SU = nullptr;
BotCand.SU = nullptr;
- TopCluster = nullptr;
- BotCluster = nullptr;
+ TopClusterID = InvalidClusterId;
+ BotClusterID = InvalidClusterId;
}
/// Initialize the per-region scheduling policy.
@@ -3988,10 +3988,14 @@ bool GenericScheduler::tryCandidate(SchedCandidate &Cand,
// This is a best effort to set things up for a post-RA pass. Optimizations
// like generating loads of multiple registers should ideally be done within
// the scheduler pass by combining the loads during DAG postprocessing.
- const ClusterInfo *CandCluster = Cand.AtTop ? TopCluster : BotCluster;
- const ClusterInfo *TryCandCluster = TryCand.AtTop ? TopCluster : BotCluster;
- if (tryGreater(TryCandCluster && TryCandCluster->contains(TryCand.SU),
- CandCluster && CandCluster->contains(Cand.SU), TryCand, Cand,
+ unsigned CandZoneCluster = Cand.AtTop ? TopClusterID : BotClusterID;
+ unsigned TryCandZoneCluster = TryCand.AtTop ? TopClusterID : BotClusterID;
+ bool CandIsClusterSucc =
+ isTheSameCluster(CandZoneCluster, Cand.SU->ParentClusterIdx);
+ bool TryCandIsClusterSucc =
+ isTheSameCluster(TryCandZoneCluster, TryCand.SU->ParentClusterIdx);
+
+ if (tryGreater(TryCandIsClusterSucc, CandIsClusterSucc, TryCand, Cand,
Cluster))
return TryCand.Reason != NoCand;
@@ -4251,8 +4255,9 @@ void GenericScheduler::reschedulePhysReg(SUnit *SU, bool isTop) {
void GenericScheduler::schedNode(SUnit *SU, bool IsTopNode) {
if (IsTopNode) {
SU->TopReadyCycle = std::max(SU->TopReadyCycle, Top.getCurrCycle());
- TopCluster = DAG->getCluster(SU->ParentClusterIdx);
- LLVM_DEBUG(if (TopCluster) {
+ TopClusterID = SU->ParentClusterIdx;
+ LLVM_DEBUG(if (TopClusterID != InvalidClusterId) {
+ ClusterInfo *TopCluster = DAG->getCluster(TopClusterID);
dbgs() << " Top Cluster: ";
for (auto *N : *TopCluster)
dbgs() << N->NodeNum << '\t';
@@ -4263,8 +4268,9 @@ void GenericScheduler::schedNode(SUnit *SU, bool IsTopNode) {
reschedulePhysReg(SU, true);
} else {
SU->BotReadyCycle = std::max(SU->BotReadyCycle, Bot.getCurrCycle());
- BotCluster = DAG->getCluster(SU->ParentClusterIdx);
- LLVM_DEBUG(if (BotCluster) {
+ BotClusterID = SU->ParentClusterIdx;
+ LLVM_DEBUG(if (BotClusterID != InvalidClusterId) {
+ ClusterInfo *BotCluster = DAG->getCluster(BotClusterID);
dbgs() << " Bot Cluster: ";
for (auto *N : *BotCluster)
dbgs() << N->NodeNum << '\t';
@@ -4306,8 +4312,8 @@ void PostGenericScheduler::initialize(ScheduleDAGMI *Dag) {
if (!Bot.HazardRec) {
Bot.HazardRec = DAG->TII->CreateTargetMIHazardRecognizer(Itin, DAG);
}
- TopCluster = nullptr;
- BotCluster = nullptr;
+ TopClusterID = InvalidClusterId;
+ BotClusterID = InvalidClusterId;
}
void PostGenericScheduler::initPolicy(MachineBasicBlock::iterator Begin,
@@ -4373,10 +4379,14 @@ bool PostGenericScheduler::tryCandidate(SchedCandidate &Cand,
return TryCand.Reason != NoCand;
// Keep clustered nodes together.
- const ClusterInfo *CandCluster = Cand.AtTop ? TopCluster : BotCluster;
- const ClusterInfo *TryCandCluster = TryCand.AtTop ? TopCluster : BotCluster;
- if (tryGreater(TryCandCluster && TryCandCluster->contains(TryCand.SU),
- CandCluster && CandCluster->contains(Cand.SU), TryCand, Cand,
+ unsigned CandZoneCluster = Cand.AtTop ? TopClusterID : BotClusterID;
+ unsigned TryCandZoneCluster = TryCand.AtTop ? TopClusterID : BotClusterID;
+ bool CandIsClusterSucc =
+ isTheSameCluster(CandZoneCluster, Cand.SU->ParentClusterIdx);
+ bool TryCandIsClusterSucc =
+ isTheSameCluster(TryCandZoneCluster, TryCand.SU->ParentClusterIdx);
+
+ if (tryGreater(TryCandIsClusterSucc, CandIsClusterSucc, TryCand, Cand,
Cluster))
return TryCand.Reason != NoCand;
// Avoid critical resource consumption and balance the schedule.
@@ -4575,11 +4585,11 @@ SUnit *PostGenericScheduler::pickNode(bool &IsTopNode) {
void PostGenericScheduler::schedNode(SUnit *SU, bool IsTopNode) {
if (IsTopNode) {
SU->TopReadyCycle = std::max(SU->TopReadyCycle, Top.getCurrCycle());
- TopCluster = DAG->getCluster(SU->ParentClusterIdx);
+ TopClusterID = SU->ParentClusterIdx;
Top.bumpNode(SU);
} else {
SU->BotReadyCycle = std::max(SU->BotReadyCycle, Bot.getCurrCycle());
- BotCluster = DAG->getCluster(SU->ParentClusterIdx);
+ BotClusterID = SU->ParentClusterIdx;
Bot.bumpNode(SU);
}
}
diff --git a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp
index a6553083d722b..b7622d143dabd 100644
--- a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp
+++ b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp
@@ -592,10 +592,13 @@ bool GCNMaxILPSchedStrategy::tryCandidate(SchedCandidate &Cand,
// This is a best effort to set things up for a post-RA pass. Optimizations
// like generating loads of multiple registers should ideally be done within
// the scheduler pass by combining the loads during DAG postprocessing.
- const ClusterInfo *CandCluster = Cand.AtTop ? TopCluster : BotCluster;
- const ClusterInfo *TryCandCluster = TryCand.AtTop ? TopCluster : BotCluster;
- if (tryGreater(TryCandCluster && TryCandCluster->contains(TryCand.SU),
- CandCluster && CandCluster->contains(Cand.SU), TryCand, Cand,
+ unsigned CandZoneCluster = Cand.AtTop ? TopClusterID : BotClusterID;
+ unsigned TryCandZoneCluster = TryCand.AtTop ? TopClusterID : BotClusterID;
+ bool CandIsClusterSucc =
+ isTheSameCluster(CandZoneCluster, Cand.SU->ParentClusterIdx);
+ bool TryCandIsClusterSucc =
+ isTheSameCluster(TryCandZoneCluster, TryCand.SU->ParentClusterIdx);
+ if (tryGreater(TryCandIsClusterSucc, CandIsClusterSucc, TryCand, Cand,
Cluster))
return TryCand.Reason != NoCand;
@@ -666,10 +669,13 @@ bool GCNMaxMemoryClauseSchedStrategy::tryCandidate(SchedCandidate &Cand,
// MaxMemoryClause-specific: We prioritize clustered instructions as we would
// get more benefit from clausing these memory instructions.
- const ClusterInfo *CandCluster = Cand.AtTop ? TopCluster : BotCluster;
- const ClusterInfo *TryCandCluster = TryCand.AtTop ? TopCluster : BotCluster;
- if (tryGreater(TryCandCluster && TryCandCluster->contains(TryCand.SU),
- CandCluster && CandCluster->contains(Cand.SU), TryCand, Cand,
+ unsigned CandZoneCluster = Cand.AtTop ? TopClusterID : BotClusterID;
+ unsigned TryCandZoneCluster = TryCand.AtTop ? TopClusterID : BotClusterID;
+ bool CandIsClusterSucc =
+ isTheSameCluster(CandZoneCluster, Cand.SU->ParentClusterIdx);
+ bool TryCandIsClusterSucc =
+ isTheSameCluster(TryCandZoneCluster, TryCand.SU->ParentClusterIdx);
+ if (tryGreater(TryCandIsClusterSucc, CandIsClusterSucc, TryCand, Cand,
Cluster))
return TryCand.Reason != NoCand;
diff --git a/llvm/lib/Target/PowerPC/PPCMachineScheduler.cpp b/llvm/lib/Target/PowerPC/PPCMachineScheduler.cpp
index 5eb1f0128643d..b7e2263424863 100644
--- a/llvm/lib/Target/PowerPC/PPCMachineScheduler.cpp
+++ b/llvm/lib/Target/PowerPC/PPCMachineScheduler.cpp
@@ -100,10 +100,14 @@ bool PPCPreRASchedStrategy::tryCandidate(SchedCandidate &Cand,
// This is a best effort to set things up for a post-RA pass. Optimizations
// like generating loads of multiple registers should ideally be done within
// the scheduler pass by combining the loads during DAG postprocessing.
- const ClusterInfo *CandCluster = Cand.AtTop ? TopCluster : BotCluster;
- const ClusterInfo *TryCandCluster = TryCand.AtTop ? TopCluster : BotCluster;
- if (tryGreater(TryCandCluster && TryCandCluster->contains(TryCand.SU),
- CandCluster && CandCluster->contains(Cand.SU), TryCand, Cand,
+ unsigned CandZoneCluster = Cand.AtTop ? TopClusterID : BotClusterID;
+ unsigned TryCandZoneCluster = TryCand.AtTop ? TopClusterID : BotClusterID;
+ bool CandIsClusterSucc =
+ isTheSameCluster(CandZoneCluster, Cand.SU->ParentClusterIdx);
+ bool TryCandIsClusterSucc =
+ isTheSameCluster(TryCandZoneCluster, TryCand.SU->ParentClusterIdx);
+
+ if (tryGreater(TryCandIsClusterSucc, CandIsClusterSucc, TryCand, Cand,
Cluster))
return TryCand.Reason != NoCand;
@@ -189,10 +193,14 @@ bool PPCPostRASchedStrategy::tryCandidate(SchedCandidate &Cand,
return TryCand.Reason != NoCand;
// Keep clustered nodes together.
- const ClusterInfo *CandCluster = Cand.AtTop ? TopCluster : BotCluster;
- const ClusterInfo *TryCandCluster = TryCand.AtTop ? TopCluster : BotCluster;
- if (tryGreater(TryCandCluster && TryCandCluster->contains(TryCand.SU),
- CandCluster && CandCluster->contains(Cand.SU), TryCand, Cand,
+ unsigned CandZoneCluster = Cand.AtTop ? TopClusterID : BotClusterID;
+ unsigned TryCandZoneCluster = TryCand.AtTop ? TopClusterID : BotClusterID;
+ bool CandIsClusterSucc =
+ isTheSameCluster(CandZoneCluster, Cand.SU->ParentClusterIdx);
+ bool TryCandIsClusterSucc =
+ isTheSameCluster(TryCandZoneCluster, TryCand.SU->ParentClusterIdx);
+
+ if (tryGreater(TryCandIsClusterSucc, CandIsClusterSucc, TryCand, Cand,
Cluster))
return TryCand.Reason != NoCand;
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Can you confirm the compile-time gain?
I don't see the compile-time issue on my side. I am not sure whether @mysterymath can help on this as he observed the issue. But I think the change would help reduce compile time as it stops calling |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM.
It seems this wasn't the root cause of our regression, since IIRC went away in the meantime. |
No description provided.