Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
43 changes: 30 additions & 13 deletions llvm/lib/Target/AIE/AIEBaseSubtarget.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -195,6 +195,7 @@ bool updateSuccLatency(SDep &SuccEdge, SUnit &PredSU, int Latency) {
// The initial graph will have ordering edges induced by hasSideEffects of the
// locks/DONE.
class LockDelays : public ScheduleDAGMutation {
bool ExactLatencies = true;
Copy link
Collaborator

@andcarminati andcarminati Nov 7, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If we do this:

 class LockDelays : public ScheduleDAGMutation {
-  bool ExactLatencies = true;
+  bool ExactLatencies;
   void apply(ScheduleDAGInstrs *DAG) override {
     const auto *TII = static_cast<const AIEBaseInstrInfo *>(DAG->TII);
     const int CoreStallCycle = TII->getCoreStallCycleAfterLock();
@@ -243,7 +243,7 @@ class LockDelays : public ScheduleDAGMutation {
   }
 
 public:
-  LockDelays(bool ExactLatencies) : ExactLatencies(ExactLatencies) {};
+  LockDelays(bool ExactLatencies = true) : ExactLatencies(ExactLatencies) {};
 };

We don't need any change in the current instantiation (getPostRAMutationsImpl) of the mutators.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I guess that's true. But I have bad feelings about default parameters in constructors. It restricts future constructors which would be ambiguous.

void apply(ScheduleDAGInstrs *DAG) override {
const auto *TII = static_cast<const AIEBaseInstrInfo *>(DAG->TII);
const int CoreStallCycle = TII->getCoreStallCycleAfterLock();
Expand All @@ -218,22 +219,31 @@ class LockDelays : public ScheduleDAGMutation {
continue;
}
// Ensure memory operation happens before the core stalls
int Delay = *TII->getLastMemoryCycle(LdSt->getDesc().SchedClass) -
CoreStallCycle + 1;
auto OptLastMemCycle =
TII->getLastMemoryCycle(LdSt->getDesc().SchedClass);
assert(!ExactLatencies || OptLastMemCycle);
const int LastMemCycle = OptLastMemCycle.value_or(7);
const int Delay = LastMemCycle - CoreStallCycle + 1;
updatePredLatency(PredEdge, SU, Delay);
}
for (auto &SuccEdge : SU.Succs) {
MachineInstr *LdSt = SuccEdge.getSUnit()->getInstr();
if (SuccEdge.getKind() != SDep::Order || !LdSt->mayLoadOrStore()) {
continue;
}
auto OptFirstMemCycle =
TII->getFirstMemoryCycle(LdSt->getDesc().SchedClass);
assert(!ExactLatencies || OptFirstMemCycle);
const int FirstMemCycle = OptFirstMemCycle.value_or(4);
// Ensure memory operation happens after the core resumes
int Delay = CoreResumeCycle -
*TII->getFirstMemoryCycle(LdSt->getDesc().SchedClass) + 1;
const int Delay = CoreResumeCycle - FirstMemCycle + 1;
updateSuccLatency(SuccEdge, SU, Delay);
}
}
};
}

public:
LockDelays(bool ExactLatencies) : ExactLatencies(ExactLatencies) {};
};

#undef DEBUG_TYPE
Expand Down Expand Up @@ -649,6 +659,7 @@ class PropagateIncomingLatencies : public ScheduleDAGMutation {
/// fix the latencies to preserve the ordering.
/// E.g. in AIE2: VST.SRS stores in E7, while VLDA reads in E5.
class MemoryEdges : public ScheduleDAGMutation {
bool ExactLatencies = true;
void apply(ScheduleDAGInstrs *DAG) override {
const auto *TII = static_cast<const AIEBaseInstrInfo *>(DAG->TII);
// Run over all instructions that may load or store, and correct the
Expand Down Expand Up @@ -677,16 +688,22 @@ class MemoryEdges : public ScheduleDAGMutation {
// Get the correct latency from the Sched model.
std::optional<int> MemLat = TII->getMemoryLatency(
SrcMI.getDesc().getSchedClass(), MI.getDesc().getSchedClass());
if (!MemLat.has_value()) {
int Latency = 1;
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

CHECK: if we don't need exact latencies, and we don't have them, we use a very optimistic value instead.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Not very optimistic. For a RAW ST -> LD pair, the latency is actually 1, since the LOAD reads memory late.. It's also the default that was assumed before Gaetan made it defined by a target hook.

if (MemLat.has_value()) {
Latency = *MemLat;
} else if (ExactLatencies) {
LLVM_DEBUG(llvm::dbgs()
<< "Error: no memory latency info for dependency\n from: "
<< SrcMI << " to: " << MI);
report_fatal_error("Missing memory latency info.");
}
updatePredLatency(PredEdge, SU, *MemLat);
updatePredLatency(PredEdge, SU, Latency);
}
}
};
}

public:
MemoryEdges(bool ExactLatencies) : ExactLatencies(ExactLatencies) {};
};

void dumpDependencies(ScheduleDAGInstrs *DAG, SDep::Kind depType,
Expand Down Expand Up @@ -865,12 +882,12 @@ class WAWStickyRegistersEdges : public ScheduleDAGMutation {
std::vector<std::unique_ptr<ScheduleDAGMutation>>
AIEBaseSubtarget::getPostRAMutationsImpl(const Triple &TT) {
std::vector<std::unique_ptr<ScheduleDAGMutation>> Mutations;
Mutations.emplace_back(std::make_unique<LockDelays>());
Mutations.emplace_back(std::make_unique<LockDelays>(true));
if (!TT.isAIE1()) {
if (EnableWAWStickyRegisters)
Mutations.emplace_back(std::make_unique<WAWStickyRegistersEdges>());
Mutations.emplace_back(std::make_unique<RegionEndEdges>());
Mutations.emplace_back(std::make_unique<MemoryEdges>());
Mutations.emplace_back(std::make_unique<MemoryEdges>(true));
Mutations.emplace_back(std::make_unique<MachineSchedWAWEdges>());
Mutations.emplace_back(std::make_unique<BiasDepth>());
Mutations.emplace_back(std::make_unique<EmitFixedSUnits>());
Expand All @@ -880,12 +897,12 @@ AIEBaseSubtarget::getPostRAMutationsImpl(const Triple &TT) {

// List the Mutations that apply to the interblock DAG construction.
std::vector<std::unique_ptr<ScheduleDAGMutation>>
AIEBaseSubtarget::getInterBlockMutationsImpl(const Triple &TT) {
AIEBaseSubtarget::getDDGMutationsImpl(const Triple &TT, bool ExactLatencies) {
std::vector<std::unique_ptr<ScheduleDAGMutation>> Mutations;
Mutations.emplace_back(std::make_unique<LockDelays>());
Mutations.emplace_back(std::make_unique<LockDelays>(ExactLatencies));
if (!TT.isAIE1()) {
Mutations.emplace_back(std::make_unique<RegionEndEdges>());
Mutations.emplace_back(std::make_unique<MemoryEdges>());
Mutations.emplace_back(std::make_unique<MemoryEdges>(ExactLatencies));
Mutations.emplace_back(std::make_unique<MachineSchedWAWEdges>());
}
return Mutations;
Expand Down
4 changes: 2 additions & 2 deletions llvm/lib/Target/AIE/AIEBaseSubtarget.h
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
// (c) Copyright 2023-2024 Advanced Micro Devices, Inc. or its affiliates
// (c) Copyright 2023-2025 Advanced Micro Devices, Inc. or its affiliates
//
//===----------------------------------------------------------------------===//
//
Expand Down Expand Up @@ -77,7 +77,7 @@ class AIEBaseSubtarget {

/// Required DAG mutations for InterBlock dependence analysis
static std::vector<std::unique_ptr<ScheduleDAGMutation>>
getInterBlockMutationsImpl(const Triple &TT);
getDDGMutationsImpl(const Triple &TT, bool ExactLatencies);

/// Required DAG mutations during Pre-RA scheduling.
static std::vector<std::unique_ptr<ScheduleDAGMutation>>
Expand Down
5 changes: 3 additions & 2 deletions llvm/lib/Target/AIE/AIEDataDependenceHelper.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -21,14 +21,15 @@ static cl::opt<bool>
cl::desc("Allow memory dependences in DataDependenceHelper "));

DataDependenceHelper::DataDependenceHelper(const MachineSchedContext &Context,
bool AddMutators)
bool AddMutators,
bool ExactLatencies)
: ScheduleDAGInstrs(*Context.MF, Context.MLI), Context(Context) {
if (!AddMutators)
return;

auto &Subtarget = Context.MF->getSubtarget();
auto TT = Subtarget.getTargetTriple();
for (auto &M : AIEBaseSubtarget::getInterBlockMutationsImpl(TT)) {
for (auto &M : AIEBaseSubtarget::getDDGMutationsImpl(TT, ExactLatencies)) {
Mutations.emplace_back(std::move(M));
}
}
Expand Down
7 changes: 2 additions & 5 deletions llvm/lib/Target/AIE/AIEDataDependenceHelper.h
Original file line number Diff line number Diff line change
Expand Up @@ -39,11 +39,8 @@ class DataDependenceHelper : public ScheduleDAGInstrs {
bool mayAlias(SUnit *SUa, SUnit *SUb, bool TBAA) override;

public:
DataDependenceHelper(const MachineSchedContext &Context,
bool AddMutators = true);

// After adding the nodes, create the edges, using the order in which the
// nodes were added.
DataDependenceHelper(const MachineSchedContext &Context, bool AddMutators,
bool ExactLatencies);
void buildEdges();

// Compute the maximum depth of all nodes. The depth is the earliest cycle
Expand Down
13 changes: 2 additions & 11 deletions llvm/lib/Target/AIE/AIEInterBlockScheduling.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
// (c) Copyright 2024 Advanced Micro Devices, Inc. or its affiliates
// (c) Copyright 2024-2025 Advanced Micro Devices, Inc. or its affiliates
//
//===----------------------------------------------------------------------===//
// Implementations of the classes used to support inter-block scheduling
Expand Down Expand Up @@ -1140,16 +1140,7 @@ void BlockState::classify() {
// This can only be done if we have an epilogue and the epilogue is not itself
// a loop.
auto IsLoop = [](const MachineBasicBlock *MBB) {
int NumLoopEdges = 0;
int NumExitEdges = 0;
for (auto *S : MBB->successors()) {
if (S == MBB) {
NumLoopEdges++;
} else {
NumExitEdges++;
}
}
return NumLoopEdges == 1 && NumExitEdges == 1;
return AIELoopUtils::isSingleMBBLoop(MBB);
};

// We generalize slightly; we require the epilogue to be a dedicated exit of
Expand Down
3 changes: 2 additions & 1 deletion llvm/lib/Target/AIE/AIEInterBlockScheduling.h
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,8 @@ class InterBlockEdges {
IndexMap SuccMap;

public:
InterBlockEdges(const MachineSchedContext &Context) : DDG(Context) {}
InterBlockEdges(const MachineSchedContext &Context)
: DDG(Context, true, true) {}

/// Add a Node to the DAG.
void addNode(MachineInstr *);
Expand Down
2 changes: 2 additions & 0 deletions llvm/lib/Target/AIE/AIELoopClass.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,8 @@ static const KernelFeatures Kernels[] = {
{43, {{900, 300, 0, 2460, 0, 0, 3360}, {30, 30, 0, 30, 30}}},
{44, {{900, 300, 0, 2520, 0, 0, 3360}, {30, 30, 0, 30, 30}}},
{45, {{900, 300, 0, 2520, 0, 0, 3360}, {45, 45, 0, 45, 45}}},
{46, {{0, 0, 0, 0, 2160, 0, 120, 1080}, {0, 420, 420}}},
{47, {{0, 0, 0, 0, 360, 0, 240, 360}, {0, 60, 60}}},
};

std::vector<int> getLoopClassScores(const SlotStatistics &Stats) {
Expand Down
21 changes: 20 additions & 1 deletion llvm/lib/Target/AIE/AIEPostPipeliner.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -547,6 +547,8 @@ int PostPipeliner::computeMinScheduleLength() const {
return MinLength;
}

namespace {

void dumpGraph(const ScheduleInfo &Info, ScheduleDAGInstrs *DAG) {
dbgs() << "digraph {\n";

Expand Down Expand Up @@ -604,17 +606,31 @@ void dumpGraph(const ScheduleInfo &Info, ScheduleDAGInstrs *DAG) {
dbgs() << "}\n";
}

char slotLetter(const SlotCounts &Slots) {
// Slots are sorted by name in tablegen.
// alu, lda, ldb, lng, mov, nop, st, vec
const char *const L = "XABLMNSVW9";

for (int I = 0; I < 10; I++) {
if (Slots[I] > 0) {
return L[I];
}
}
return '*';
}

void dumpSchedule(const ScheduleInfo &Info, int MinLength, int II,
std::function<bool(int I, int K)> Select) {
for (int K = 0; K < Info.NInstr; K++) {
char S = slotLetter(Info[K].Slots);
std::string Head = "SU" + std::to_string(K);
dbgs() << Head;
for (int I = Head.length() - 6; I < MinLength; I++) {
if (I >= 0 && I % II == 0) {
dbgs() << "|";
}
if (Select(I, K)) {
dbgs() << "*";
dbgs() << S;
} else {
dbgs() << " ";
}
Expand All @@ -640,6 +656,7 @@ void dumpCycles(const ScheduleInfo &Info, int II) {
dumpSchedule(Info, FullStageLength, II,
[&](int I, int K) { return I == Info[K].Cycle; });
}
} // namespace

int PostPipeliner::mostUrgent(PostPipelinerStrategy &Strategy) {
assert(FirstUnscheduled <= LastUnscheduled);
Expand Down Expand Up @@ -831,6 +848,7 @@ bool PostPipeliner::scheduleWithStrategy(PostPipelinerStrategy &S) {
return true;
}

namespace {
int getMinOutputLat(ArrayRef<SDep> Edges) {
int Min = std::numeric_limits<int>::max();
for (const SDep &Dep : Edges) {
Expand All @@ -840,6 +858,7 @@ int getMinOutputLat(ArrayRef<SDep> Edges) {
}
return Min;
}
} // namespace

class DefaultStrategy : public PostPipelinerStrategy {
public:
Expand Down
4 changes: 3 additions & 1 deletion llvm/lib/Target/AIE/AIEPtrModOptimizer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,9 @@ bool AIEPtrModOptimizer::runOnMachineFunction(MachineFunction &MF) {

// To build the edges in the DAG, the reserved Registers have to be freezed
MRI.freezeReservedRegs();
AIE::DataDependenceHelper DAG(Context, /*AddMutators=*/false);
const bool AddMutators = false;
const bool ExactLatencies = false;
AIE::DataDependenceHelper DAG(Context, AddMutators, ExactLatencies);

// Fixme: these combiners should be provided by tablegen
std::vector<const AIE::GenericCombiner *> Combiners;
Expand Down
Loading