@@ -49,6 +49,14 @@ using namespace llvm;
4949// This might be compatible with a future extension of the DEBUG rigging
5050#define DEBUG_DETAIL (x ) DEBUG_WITH_TYPE(" aie-waw-reg-rewrite:2" , x)
5151
52+ enum class RewriteMode {
53+ Basic,
54+ Automatic,
55+ LatencyAware,
56+ SWPAware,
57+ SWPAwareAutoBias,
58+ };
59+
5260static cl::opt<bool > AggressiveReAlloc (
5361 " aie-aggressive-realloc" , cl::Hidden, cl::init(false ),
5462 cl::desc(" Aggressively de-allocate live-through registers to favor "
@@ -66,16 +74,21 @@ static cl::opt<unsigned>
6674 " considered for WAW rewriting" ),
6775 cl::init(3 ));
6876
69- static cl::opt<bool >
70- LatencyAware (" aie-realloc-latencyaware" , cl::Hidden, cl::init(true ),
71- cl::desc(" Enable latency-aware allocation strategy" ));
72-
73- static cl::opt<bool >
74- SWPAware (" aie-realloc-swp-aware" , cl::Hidden, cl::init(false ),
75- cl::desc(" Use assignment order based on interleaved swp stages" ));
77+ static cl::opt<RewriteMode> RegRewriteMode (
78+ " aie-reg-rewrite-mode" , cl::Hidden, cl::init(RewriteMode::Automatic),
79+ cl::desc(" Set the rewriting mode" ),
80+ cl::values(clEnumValN(RewriteMode::Basic, " basic" , " Basic" ),
81+ clEnumValN(RewriteMode::Automatic, " auto" ,
82+ " Automatic selection based on loop class" ),
83+ clEnumValN(RewriteMode::LatencyAware, " latencyaware" ,
84+ " Latency aware" ),
85+ clEnumValN(RewriteMode::SWPAware, " swpaware" ,
86+ " SWP aware with default bias" ),
87+ clEnumValN(RewriteMode::SWPAwareAutoBias, " swpaware-auto" ,
88+ " SWP aware with automatic bias" )));
7689
7790static cl::opt<int > MinIIBias (" aie-realloc-ii-bias" , cl::Hidden, cl::init(0 ),
78- cl::desc(" MinII bias for swp-aware " ));
91+ cl::desc(" Set default MinII bias for swpaware " ));
7992
8093namespace {
8194
@@ -89,6 +102,44 @@ using OriginalAllocation =
89102// Record success for a whole register class.
90103using RegClassSuccess = std::map<const TargetRegisterClass *, bool >;
91104
105+ RewriteMode selectMode (RewriteMode Mode, int LoopClass) {
106+ if (Mode != RewriteMode::Automatic) {
107+ return Mode;
108+ }
109+ switch (LoopClass) {
110+ case 14 :
111+ case 29 :
112+ case 47 :
113+ return RewriteMode::SWPAwareAutoBias;
114+ default :
115+ return RewriteMode::LatencyAware;
116+ }
117+ }
118+
119+ bool runSWPAware (RewriteMode Mode, int LoopClass, int &Bias) {
120+ Bias = MinIIBias;
121+ switch (Mode) {
122+ case RewriteMode::SWPAwareAutoBias:
123+ break ;
124+ case RewriteMode::SWPAware:
125+ return true ;
126+ default :
127+ return false ;
128+ }
129+
130+ switch (LoopClass) {
131+ case 14 :
132+ Bias = -1 ;
133+ return true ;
134+ case 18 :
135+ case 29 :
136+ Bias = 1 ;
137+ return true ;
138+ default :
139+ return true ;
140+ }
141+ }
142+
92143// /
93144// / This pass rewrites physical register assignments in critical parts of the
94145// / code (like loops) to break WAW and WAR dependencies.
@@ -141,7 +192,8 @@ class AIEWawRegRewriter : public MachineFunctionPass {
141192 const std::map<const TargetRegisterClass *, bool > &RegClasses);
142193
143194 // / Sort the candidates to mimic interleaving the pipeline stages
144- void sortSWPAware (OriginalAllocation &Candidates, MachineBasicBlock &MBB);
195+ void sortSWPAware (OriginalAllocation &Candidates, MachineBasicBlock &MBB,
196+ const llvm::AIE::SlotStatistics &Statistics, int LoopClass);
145197
146198 // / Pre-allocate all virtual registers in Candidates. The sole purpose of
147199 // / this is to prime the LRURegisters, so that the end of the loop is
@@ -383,21 +435,17 @@ RoundRobin AIEWawRegRewriter::computeLRURegisters(
383435 return LRURegisters;
384436}
385437
386- void AIEWawRegRewriter::sortSWPAware (OriginalAllocation &Candidates,
387- MachineBasicBlock &MBB) {
388-
438+ void AIEWawRegRewriter::sortSWPAware (
439+ OriginalAllocation &Candidates, MachineBasicBlock &MBB,
440+ const llvm::AIE::SlotStatistics &Statistics, int Bias) {
389441 // We estimate the length of the schedule based on latencies and the
390442 // minimum II based on slots. We then estimate the modulo cycle of each
391443 // instruction based on its depth and apply LRU in the order of the modulo
392444 // cycle.
393445 // Note that both the depth and the II are underestimations since we don't
394446 // account for them interfering. Hence the modulo cycle estimate won't be
395447 // too far off.
396- AIE::SlotStatistics Statistics = AIE::computeSlotStatistics (MBB, TII);
397- DEBUG_DETAIL (dbgs () << " Stats=" ; Statistics.dumpShort (); dbgs () << " \n " );
398- DEBUG_DETAIL (dbgs () << " LoopClass=" << llvm::AIE::classifyLoop (Statistics)
399- << " \n " );
400- const int MinII = std::max (Statistics.getMinII () + MinIIBias, 1 );
448+ const int MinII = std::max (Statistics.getMinII () + Bias, 1 );
401449
402450 MachineSchedContext Context;
403451 Context.MF = MF;
@@ -456,7 +504,18 @@ bool AIEWawRegRewriter::renameMBBPhysRegs(const MachineBasicBlock *MBB) {
456504 IndexedMap<const MachineInstr *, VirtReg2IndexFunctor> LastVRegDef =
457505 getLastVRegDef (*MBB);
458506
459- std::set<MCRegister> HighLatencyRegs = getHighOutputLatencyRegs (MBB);
507+ auto &NonConstMBB = *(const_cast <MachineBasicBlock *>(MBB));
508+ AIE::SlotStatistics Statistics = AIE::computeSlotStatistics (NonConstMBB, TII);
509+ const int LoopClass = llvm::AIE::classifyLoop (Statistics);
510+ LLVM_DEBUG (dbgs () << " Stats=" ; Statistics.dumpShort (); dbgs () << " \n " );
511+ LLVM_DEBUG (dbgs () << " LoopClass=" << LoopClass << " \n " );
512+
513+ RewriteMode Mode = selectMode (RegRewriteMode, LoopClass);
514+
515+ std::set<MCRegister> HighLatencyRegs;
516+ if (Mode == RewriteMode::LatencyAware) {
517+ HighLatencyRegs = getHighOutputLatencyRegs (MBB);
518+ }
460519
461520 OriginalAllocation Candidates;
462521
@@ -533,9 +592,9 @@ bool AIEWawRegRewriter::renameMBBPhysRegs(const MachineBasicBlock *MBB) {
533592 }
534593 }
535594
536- if (SWPAware) {
537- auto &NCMBB = *( const_cast <MachineBasicBlock *>(MBB));
538- sortSWPAware (Candidates, NCMBB );
595+ int Bias = MinIIBias;
596+ if ( runSWPAware (Mode, LoopClass, Bias)) {
597+ sortSWPAware (Candidates, NonConstMBB, Statistics, Bias );
539598 }
540599
541600 // Least-Recently-Used list of physical registers for assignments to VRegs.
@@ -730,10 +789,6 @@ AIEWawRegRewriter::getLastVRegDef(const MachineBasicBlock &MBB) const {
730789
731790std::set<MCRegister> AIEWawRegRewriter::getHighOutputLatencyRegs (
732791 const MachineBasicBlock *MBB) const {
733-
734- if (!LatencyAware)
735- return {};
736-
737792 auto *ItinData = MF->getSubtarget ().getInstrItineraryData ();
738793 std::set<MCRegister> HighLatRegisters;
739794 for (const MachineInstr &MI : *MBB) {
0 commit comments