Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 12 additions & 0 deletions llvm/lib/Target/AIE/AIE2InstrInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1650,3 +1650,15 @@ unsigned AIE2InstrInfo::getBasicVectorBitSize() const { return 512; }
unsigned AIE2InstrInfo::getMaxVectorBitSize() const { return 1024; }

unsigned AIE2InstrInfo::getMaxSupportedLdStIncSize() const { return 512; }

AIEBaseInstrInfo::ImmediateRangeBounds
AIE2InstrInfo::getLoadStorePostIncImmediateRange(LLT MemType) const {
Copy link
Collaborator

@F-Stuckmann F-Stuckmann May 7, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I will open up a PR, where i have the Immediate Range checks for all the MemorySizes.
You are missing the vector Sizes checks and the checks for different Loads, such as G_ZEXT_LOAD, which are pitiful small.

if (MemType.getSizeInBits() == 8)
return {7, -8};
else if (MemType.getSizeInBits() == 16)
return {7, -8};
else if (MemType.getSizeInBits() <= 32)
return {252, -256};
else
llvm_unreachable("Unsupported");
}
3 changes: 3 additions & 0 deletions llvm/lib/Target/AIE/AIE2InstrInfo.h
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,9 @@ class AIE2InstrInfo : public AIE2GenInstrInfo {
unsigned getMaxVectorBitSize() const override;
unsigned getMaxSupportedLdStIncSize() const override;

ImmediateRangeBounds
getLoadStorePostIncImmediateRange(LLT MemType) const override;

virtual unsigned
getNumReservedDelaySlots(const MachineInstr &MI) const override;

Expand Down
10 changes: 10 additions & 0 deletions llvm/lib/Target/AIE/AIEBaseInstrInfo.h
Original file line number Diff line number Diff line change
Expand Up @@ -638,6 +638,16 @@ struct AIEBaseInstrInfo : public TargetInstrInfo {
llvm_unreachable("Target didn't implement getMaxSupportedLdStIncSize!");
}

struct ImmediateRangeBounds {
int64_t ImmediateRangeMax;
int64_t ImmediateRangeMin;
};
virtual ImmediateRangeBounds
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can we simply use std::pair?

getLoadStorePostIncImmediateRange(LLT MemType) const {
llvm_unreachable(
"Target didn't implement getLoadStorePostIncImmediateRange!");
}

/// Abstract operations to help the decoding of complex operations.
struct AbstractOp {
enum class OperationType : unsigned {
Expand Down
133 changes: 128 additions & 5 deletions llvm/lib/Target/AIE/AIEClusterBaseAddress.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
// (c) Copyright 2023-2024 Advanced Micro Devices, Inc. or its affiliates
// (c) Copyright 2023-2025 Advanced Micro Devices, Inc. or its affiliates
//
//===----------------------------------------------------------------------===//
//
Expand Down Expand Up @@ -45,12 +45,15 @@
//===----------------------------------------------------------------------===//

#include "AIE.h"
#include "AIEBaseInstrInfo.h"
#include "Utils/AIELoopUtils.h"
#include "llvm/CodeGen/GlobalISel/CSEInfo.h"
#include "llvm/CodeGen/GlobalISel/CSEMIRBuilder.h"
#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineLoopInfo.h"
#include "llvm/CodeGen/MachineModuleInfo.h"
#include "llvm/CodeGen/TargetPassConfig.h"
#include "llvm/InitializePasses.h"
Expand All @@ -72,6 +75,11 @@ static cl::opt<bool> EnableChainsForVectorLdSt(
"aie-chain-addr-vec-ldst", cl::Hidden, cl::init(true),
cl::desc("Enable ptradd chaining for vector loads and stores."));

cl::opt<int> AddressChainCostLimit(
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nit: can we make this static?

"aie-chain-cost-limit",
cl::desc("Maximum allowed cost for pointer add chains"), cl::init(-1),
cl::Hidden);

namespace {

/// Try and re-order PTR_ADD instructions to maximise the size of constant
Expand Down Expand Up @@ -163,6 +171,8 @@ class AIEClusterBaseAddress : public MachineFunctionPass {
void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.addRequired<MachineModuleInfoWrapperPass>();
AU.addRequired<GISelCSEAnalysisWrapperPass>();
AU.addRequired<MachineLoopInfo>();
AU.addPreserved<MachineLoopInfo>();
AU.addRequired<TargetPassConfig>();
AU.setPreservesAll();
}
Expand Down Expand Up @@ -223,10 +233,123 @@ class AIEClusterBaseAddress : public MachineFunctionPass {
if (Instrs.size() <= 1)
return true;

// If the base reg is used in any of the successive MBBs, then we don't
// want to chain the corresponding ptr adds, since this would introduce a
// COPY and increase reg pressure.
return isRegUsedInSuccessiveMBBs(&MBB, PtrReg);
// If the base reg is used in any of the successive MBBs, would introduce a
// COPY and increase reg pressure. We only skip chaining in this case if it
// is considered unprofitable.
if (isRegUsedInSuccessiveMBBs(&MBB, PtrReg) &&
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can we move isRegUsedInSuccessiveMBBs inside profitability check?

!isChainingProfitable(PtrReg, Instrs, MBB))
return true;

return false;
}

// Decide heuristically if chaining will be profitable
bool isChainingProfitable(Register PtrReg,
const SmallVector<MachineInstr *, 8> &Instrs,
MachineBasicBlock &MBB) {
const TargetSubtargetInfo &ST = MBB.getParent()->getSubtarget();
const AIEBaseInstrInfo *TII =
static_cast<const AIEBaseInstrInfo *>(ST.getInstrInfo());
using OffsetType = std::variant<int64_t, std::string>;
assert(Instrs.size() > 1);

bool InLoop = true;
MachineLoopInfo &MLI = getAnalysis<MachineLoopInfo>();
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can we use AIELoopUtils::isSingleMBBLoop, then no need to include MachineLoopInfo

MachineLoop *ToLoop = MLI.getLoopFor(&MBB);
if (!ToLoop)
InLoop = false;

unsigned ChainedCost = 0;
unsigned ChainedCostLimit = Instrs.size() / 2; // Experimental threshold

if (AddressChainCostLimit > -1) {
ChainedCostLimit = AddressChainCostLimit;
}

if (isRegUsedInSuccessiveMBBs(&MBB, PtrReg)) {
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We are checking isRegUsedInSuccessiveMBBs already right?

if (InLoop)
return false; // A copy in a loop is costly
ChainedCost += 1; // Add cost of resulting copy
}

int64_t ImmediateRangeMax = 0;
int64_t ImmediateRangeMin = 0;
bool ImmediateRangeSet = false;
int64_t AccumulatedOffset = 0;
int64_t NewOffset;
SmallVector<OffsetType, 8> Offsets;

for (unsigned I = 0; I < Instrs.size() - 1; I++) {
MachineInstr *MI = Instrs[I];
MachineInstr *MINext = Instrs[I + 1];

const Register PtrReg = MI->getOperand(0).getReg();
for (const MachineInstr &UseMI : MRI->use_instructions(PtrReg)) {
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do you really need to iterate all the uses here, as I understand, you just need just one use store/load to determine the offset range.

if (ImmediateRangeSet)
continue; // Check first use only
if (!UseMI.mayLoadOrStore())
continue;
const LLT MemType = getLoadStoreType(UseMI);
// Immediate ranges for vectors are sufficient so we
// assume chaining is always profitable.
if (MemType.isVector()) {
return true;
} else {
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nit: remove else, it will reduce the indentation.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

you can assert MemType.getSizeInBits() <= 32 and remove the next if as well.

if (MemType.getSizeInBits() <= 32) {
ImmediateRangeMax = TII->getLoadStorePostIncImmediateRange(MemType)
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nit: one query to TII is enough.

.ImmediateRangeMax;
ImmediateRangeMin = TII->getLoadStorePostIncImmediateRange(MemType)
.ImmediateRangeMin;
ImmediateRangeSet = true;
} else {
llvm_unreachable(
"unreachable: Unsupported immediate range of scalar size ");
}
}
}

// If the immediate range is not set, the pointers aren't used by any
// loads and stores, so we return.
if (!ImmediateRangeSet) {
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is this dead code? This covered by:

llvm_unreachable(
  "unreachable: Unsupported immediate range of scalar size ");

Should we reuse that else to just return false?

assert(ImmediateRangeMin == 0 && ImmediateRangeMax == 0);
return false;
}

auto OffsetMI =
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nit: OffsetValue may be.

getIConstantVRegValWithLookThrough(MI->getOperand(2).getReg(), *MRI);
auto OffsetMINext = getIConstantVRegValWithLookThrough(
MINext->getOperand(2).getReg(), *MRI);

if (shouldBreakChain(MI, MINext, OffsetMI, OffsetMINext)) {
ChainedCost++;
AccumulatedOffset = 0;
Offsets.push_back("Break");
continue;
}

const int64_t CurrOffset = OffsetMI->Value.getSExtValue();
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Maybe we could include some description of the idea (also as a future reference).

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

check: does getIConstantVRegValWithLookThrough guaranty a valid value?

const int64_t NextOffset = OffsetMINext->Value.getSExtValue();

assert(I == 0 || !Offsets.empty());
AccumulatedOffset +=
(I == 0 || (std::holds_alternative<std::string>(Offsets.back()) &&
std::get<std::string>(Offsets.back()) == "Break"))
? CurrOffset
: NewOffset;
Offsets.push_back(
(I == 0 || (std::holds_alternative<std::string>(Offsets.back()) &&
std::get<std::string>(Offsets.back()) == "Break"))
? CurrOffset
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Looks like CurrOffset is an integer value.

: OffsetType(NewOffset));

NewOffset = NextOffset - AccumulatedOffset;

if (NewOffset < ImmediateRangeMin || NewOffset > ImmediateRangeMax) {
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

check: are we supposed to check AccumulatedOffset with immediate range right?

ChainedCost += 1; // Immediate materialization cost
}
}

return ChainedCostLimit > ChainedCost;
}

// Build a chain (or set of chains) of G_PTR_ADDs. We consider as
Expand Down
12 changes: 12 additions & 0 deletions llvm/lib/Target/AIE/aie2p/AIE2PInstrInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1812,6 +1812,18 @@ unsigned AIE2PInstrInfo::getMaxVectorBitSize() const { return 2048; }

unsigned AIE2PInstrInfo::getMaxSupportedLdStIncSize() const { return 2048; }

AIEBaseInstrInfo::ImmediateRangeBounds
AIE2PInstrInfo::getLoadStorePostIncImmediateRange(LLT MemType) const {
if (MemType.getSizeInBits() == 8)
return {7, -8};
else if (MemType.getSizeInBits() == 16)
return {14, -16};
else if (MemType.getSizeInBits() <= 32)
return {28, -32};
else
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do we have memory type for s128 as well right?

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can we gracefully exit, rather than asserting?

llvm_unreachable("Unsupported");
}

using AbstractOp = AIEBaseInstrInfo::AbstractOp;

std::optional<const AbstractOp>
Expand Down
3 changes: 3 additions & 0 deletions llvm/lib/Target/AIE/aie2p/AIE2PInstrInfo.h
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,9 @@ class AIE2PInstrInfo : public AIE2PGenInstrInfo {
unsigned getMaxVectorBitSize() const override;
unsigned getMaxSupportedLdStIncSize() const override;

ImmediateRangeBounds
getLoadStorePostIncImmediateRange(LLT MemType) const override;

virtual unsigned
getNumReservedDelaySlots(const MachineInstr &MI) const override;

Expand Down
Loading
Loading