-
Notifications
You must be signed in to change notification settings - Fork 29
SWP-aware WAWRegRewriter. #699
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
4fd558d
388686c
d4a0620
e286289
2045da6
a5f9660
87e33e2
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -195,6 +195,7 @@ bool updateSuccLatency(SDep &SuccEdge, SUnit &PredSU, int Latency) { | |
| // The initial graph will have ordering edges induced by hasSideEffects of the | ||
| // locks/DONE. | ||
| class LockDelays : public ScheduleDAGMutation { | ||
| bool ExactLatencies = true; | ||
| void apply(ScheduleDAGInstrs *DAG) override { | ||
| const auto *TII = static_cast<const AIEBaseInstrInfo *>(DAG->TII); | ||
| const int CoreStallCycle = TII->getCoreStallCycleAfterLock(); | ||
|
|
@@ -218,22 +219,31 @@ class LockDelays : public ScheduleDAGMutation { | |
| continue; | ||
| } | ||
| // Ensure memory operation happens before the core stalls | ||
| int Delay = *TII->getLastMemoryCycle(LdSt->getDesc().SchedClass) - | ||
| CoreStallCycle + 1; | ||
| auto OptLastMemCycle = | ||
| TII->getLastMemoryCycle(LdSt->getDesc().SchedClass); | ||
| assert(!ExactLatencies || OptLastMemCycle); | ||
| const int LastMemCycle = OptLastMemCycle.value_or(7); | ||
| const int Delay = LastMemCycle - CoreStallCycle + 1; | ||
| updatePredLatency(PredEdge, SU, Delay); | ||
| } | ||
| for (auto &SuccEdge : SU.Succs) { | ||
| MachineInstr *LdSt = SuccEdge.getSUnit()->getInstr(); | ||
| if (SuccEdge.getKind() != SDep::Order || !LdSt->mayLoadOrStore()) { | ||
| continue; | ||
| } | ||
| auto OptFirstMemCycle = | ||
| TII->getFirstMemoryCycle(LdSt->getDesc().SchedClass); | ||
| assert(!ExactLatencies || OptFirstMemCycle); | ||
| const int FirstMemCycle = OptFirstMemCycle.value_or(4); | ||
| // Ensure memory operation happens after the core resumes | ||
| int Delay = CoreResumeCycle - | ||
| *TII->getFirstMemoryCycle(LdSt->getDesc().SchedClass) + 1; | ||
| const int Delay = CoreResumeCycle - FirstMemCycle + 1; | ||
| updateSuccLatency(SuccEdge, SU, Delay); | ||
| } | ||
| } | ||
| }; | ||
| } | ||
|
|
||
| public: | ||
| LockDelays(bool ExactLatencies) : ExactLatencies(ExactLatencies) {}; | ||
| }; | ||
|
|
||
| #undef DEBUG_TYPE | ||
|
|
@@ -649,6 +659,7 @@ class PropagateIncomingLatencies : public ScheduleDAGMutation { | |
| /// fix the latencies to preserve the ordering. | ||
| /// E.g. in AIE2: VST.SRS stores in E7, while VLDA reads in E5. | ||
| class MemoryEdges : public ScheduleDAGMutation { | ||
| bool ExactLatencies = true; | ||
| void apply(ScheduleDAGInstrs *DAG) override { | ||
| const auto *TII = static_cast<const AIEBaseInstrInfo *>(DAG->TII); | ||
| // Run over all instructions that may load or store, and correct the | ||
|
|
@@ -677,16 +688,22 @@ class MemoryEdges : public ScheduleDAGMutation { | |
| // Get the correct latency from the Sched model. | ||
| std::optional<int> MemLat = TII->getMemoryLatency( | ||
| SrcMI.getDesc().getSchedClass(), MI.getDesc().getSchedClass()); | ||
| if (!MemLat.has_value()) { | ||
| int Latency = 1; | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. CHECK: if we don't need exact latencies, and we don't have them, we use a very optimistic value instead.
Collaborator
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Not very optimistic. For a RAW ST -> LD pair, the latency is actually 1, since the LOAD reads memory late.. It's also the default that was assumed before Gaetan made it defined by a target hook. |
||
| if (MemLat.has_value()) { | ||
| Latency = *MemLat; | ||
| } else if (ExactLatencies) { | ||
| LLVM_DEBUG(llvm::dbgs() | ||
| << "Error: no memory latency info for dependency\n from: " | ||
| << SrcMI << " to: " << MI); | ||
| report_fatal_error("Missing memory latency info."); | ||
| } | ||
| updatePredLatency(PredEdge, SU, *MemLat); | ||
| updatePredLatency(PredEdge, SU, Latency); | ||
| } | ||
| } | ||
| }; | ||
| } | ||
|
|
||
| public: | ||
| MemoryEdges(bool ExactLatencies) : ExactLatencies(ExactLatencies) {}; | ||
| }; | ||
|
|
||
| void dumpDependencies(ScheduleDAGInstrs *DAG, SDep::Kind depType, | ||
|
|
@@ -865,12 +882,12 @@ class WAWStickyRegistersEdges : public ScheduleDAGMutation { | |
| std::vector<std::unique_ptr<ScheduleDAGMutation>> | ||
| AIEBaseSubtarget::getPostRAMutationsImpl(const Triple &TT) { | ||
| std::vector<std::unique_ptr<ScheduleDAGMutation>> Mutations; | ||
| Mutations.emplace_back(std::make_unique<LockDelays>()); | ||
| Mutations.emplace_back(std::make_unique<LockDelays>(true)); | ||
| if (!TT.isAIE1()) { | ||
| if (EnableWAWStickyRegisters) | ||
| Mutations.emplace_back(std::make_unique<WAWStickyRegistersEdges>()); | ||
| Mutations.emplace_back(std::make_unique<RegionEndEdges>()); | ||
| Mutations.emplace_back(std::make_unique<MemoryEdges>()); | ||
| Mutations.emplace_back(std::make_unique<MemoryEdges>(true)); | ||
| Mutations.emplace_back(std::make_unique<MachineSchedWAWEdges>()); | ||
| Mutations.emplace_back(std::make_unique<BiasDepth>()); | ||
| Mutations.emplace_back(std::make_unique<EmitFixedSUnits>()); | ||
|
|
@@ -880,12 +897,12 @@ AIEBaseSubtarget::getPostRAMutationsImpl(const Triple &TT) { | |
|
|
||
| // List the Mutations that apply to the interblock DAG construction. | ||
| std::vector<std::unique_ptr<ScheduleDAGMutation>> | ||
| AIEBaseSubtarget::getInterBlockMutationsImpl(const Triple &TT) { | ||
| AIEBaseSubtarget::getDDGMutationsImpl(const Triple &TT, bool ExactLatencies) { | ||
| std::vector<std::unique_ptr<ScheduleDAGMutation>> Mutations; | ||
| Mutations.emplace_back(std::make_unique<LockDelays>()); | ||
| Mutations.emplace_back(std::make_unique<LockDelays>(ExactLatencies)); | ||
| if (!TT.isAIE1()) { | ||
| Mutations.emplace_back(std::make_unique<RegionEndEdges>()); | ||
| Mutations.emplace_back(std::make_unique<MemoryEdges>()); | ||
| Mutations.emplace_back(std::make_unique<MemoryEdges>(ExactLatencies)); | ||
| Mutations.emplace_back(std::make_unique<MachineSchedWAWEdges>()); | ||
| } | ||
| return Mutations; | ||
|
|
||
Uh oh!
There was an error while loading. Please reload this page.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
If we do this:
We don't need any change in the current instantiation (getPostRAMutationsImpl) of the mutators.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I guess that's true. But I have bad feelings about default parameters in constructors. It restricts future constructors which would be ambiguous.