-
Notifications
You must be signed in to change notification settings - Fork 28
[AIE2P] Allow more address chaining and add a profitability check heuristic #451
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: aie-public
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -638,6 +638,16 @@ struct AIEBaseInstrInfo : public TargetInstrInfo { | |
llvm_unreachable("Target didn't implement getMaxSupportedLdStIncSize!"); | ||
} | ||
|
||
struct ImmediateRangeBounds { | ||
int64_t ImmediateRangeMax; | ||
int64_t ImmediateRangeMin; | ||
}; | ||
virtual ImmediateRangeBounds | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Can we simply use std::pair? |
||
getLoadStorePostIncImmediateRange(LLT MemType) const { | ||
llvm_unreachable( | ||
"Target didn't implement getLoadStorePostIncImmediateRange!"); | ||
} | ||
|
||
/// Abstract operations to help the decoding of complex operations. | ||
struct AbstractOp { | ||
enum class OperationType : unsigned { | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -4,7 +4,7 @@ | |
// See https://llvm.org/LICENSE.txt for license information. | ||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | ||
// | ||
// (c) Copyright 2023-2024 Advanced Micro Devices, Inc. or its affiliates | ||
// (c) Copyright 2023-2025 Advanced Micro Devices, Inc. or its affiliates | ||
// | ||
//===----------------------------------------------------------------------===// | ||
// | ||
|
@@ -45,12 +45,15 @@ | |
//===----------------------------------------------------------------------===// | ||
|
||
#include "AIE.h" | ||
#include "AIEBaseInstrInfo.h" | ||
#include "Utils/AIELoopUtils.h" | ||
#include "llvm/CodeGen/GlobalISel/CSEInfo.h" | ||
#include "llvm/CodeGen/GlobalISel/CSEMIRBuilder.h" | ||
#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h" | ||
#include "llvm/CodeGen/MachineFunction.h" | ||
#include "llvm/CodeGen/MachineFunctionPass.h" | ||
#include "llvm/CodeGen/MachineInstrBuilder.h" | ||
#include "llvm/CodeGen/MachineLoopInfo.h" | ||
#include "llvm/CodeGen/MachineModuleInfo.h" | ||
#include "llvm/CodeGen/TargetPassConfig.h" | ||
#include "llvm/InitializePasses.h" | ||
|
@@ -72,6 +75,11 @@ static cl::opt<bool> EnableChainsForVectorLdSt( | |
"aie-chain-addr-vec-ldst", cl::Hidden, cl::init(true), | ||
cl::desc("Enable ptradd chaining for vector loads and stores.")); | ||
|
||
cl::opt<int> AddressChainCostLimit( | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Nit: can we make this static? |
||
"aie-chain-cost-limit", | ||
cl::desc("Maximum allowed cost for pointer add chains"), cl::init(-1), | ||
cl::Hidden); | ||
|
||
namespace { | ||
|
||
/// Try and re-order PTR_ADD instructions to maximise the size of constant | ||
|
@@ -163,6 +171,8 @@ class AIEClusterBaseAddress : public MachineFunctionPass { | |
void getAnalysisUsage(AnalysisUsage &AU) const override { | ||
AU.addRequired<MachineModuleInfoWrapperPass>(); | ||
AU.addRequired<GISelCSEAnalysisWrapperPass>(); | ||
AU.addRequired<MachineLoopInfo>(); | ||
AU.addPreserved<MachineLoopInfo>(); | ||
AU.addRequired<TargetPassConfig>(); | ||
AU.setPreservesAll(); | ||
} | ||
|
@@ -223,10 +233,123 @@ class AIEClusterBaseAddress : public MachineFunctionPass { | |
if (Instrs.size() <= 1) | ||
return true; | ||
|
||
// If the base reg is used in any of the successive MBBs, then we don't | ||
// want to chain the corresponding ptr adds, since this would introduce a | ||
// COPY and increase reg pressure. | ||
return isRegUsedInSuccessiveMBBs(&MBB, PtrReg); | ||
// If the base reg is used in any of the successive MBBs, would introduce a | ||
// COPY and increase reg pressure. We only skip chaining in this case if it | ||
// is considered unprofitable. | ||
if (isRegUsedInSuccessiveMBBs(&MBB, PtrReg) && | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Can we move |
||
!isChainingProfitable(PtrReg, Instrs, MBB)) | ||
return true; | ||
|
||
return false; | ||
} | ||
|
||
// Decide heuristically if chaining will be profitable | ||
bool isChainingProfitable(Register PtrReg, | ||
const SmallVector<MachineInstr *, 8> &Instrs, | ||
MachineBasicBlock &MBB) { | ||
const TargetSubtargetInfo &ST = MBB.getParent()->getSubtarget(); | ||
const AIEBaseInstrInfo *TII = | ||
static_cast<const AIEBaseInstrInfo *>(ST.getInstrInfo()); | ||
using OffsetType = std::variant<int64_t, std::string>; | ||
assert(Instrs.size() > 1); | ||
|
||
bool InLoop = true; | ||
MachineLoopInfo &MLI = getAnalysis<MachineLoopInfo>(); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Can we use |
||
MachineLoop *ToLoop = MLI.getLoopFor(&MBB); | ||
if (!ToLoop) | ||
InLoop = false; | ||
|
||
unsigned ChainedCost = 0; | ||
unsigned ChainedCostLimit = Instrs.size() / 2; // Experimental threshold | ||
|
||
if (AddressChainCostLimit > -1) { | ||
ChainedCostLimit = AddressChainCostLimit; | ||
} | ||
|
||
if (isRegUsedInSuccessiveMBBs(&MBB, PtrReg)) { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We are checking |
||
if (InLoop) | ||
return false; // A copy in a loop is costly | ||
ChainedCost += 1; // Add cost of resulting copy | ||
} | ||
|
||
int64_t ImmediateRangeMax = 0; | ||
int64_t ImmediateRangeMin = 0; | ||
bool ImmediateRangeSet = false; | ||
int64_t AccumulatedOffset = 0; | ||
int64_t NewOffset; | ||
SmallVector<OffsetType, 8> Offsets; | ||
|
||
for (unsigned I = 0; I < Instrs.size() - 1; I++) { | ||
MachineInstr *MI = Instrs[I]; | ||
MachineInstr *MINext = Instrs[I + 1]; | ||
|
||
const Register PtrReg = MI->getOperand(0).getReg(); | ||
for (const MachineInstr &UseMI : MRI->use_instructions(PtrReg)) { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Do you really need to iterate all the uses here, as I understand, you just need just one use store/load to determine the offset range. |
||
if (ImmediateRangeSet) | ||
continue; // Check first use only | ||
if (!UseMI.mayLoadOrStore()) | ||
continue; | ||
const LLT MemType = getLoadStoreType(UseMI); | ||
// Immediate ranges for vectors are sufficient so we | ||
// assume chaining is always profitable. | ||
if (MemType.isVector()) { | ||
return true; | ||
} else { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. nit: remove else, it will reduce the indentation. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. you can assert |
||
if (MemType.getSizeInBits() <= 32) { | ||
ImmediateRangeMax = TII->getLoadStorePostIncImmediateRange(MemType) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. nit: one query to TII is enough. |
||
.ImmediateRangeMax; | ||
ImmediateRangeMin = TII->getLoadStorePostIncImmediateRange(MemType) | ||
.ImmediateRangeMin; | ||
ImmediateRangeSet = true; | ||
} else { | ||
llvm_unreachable( | ||
"unreachable: Unsupported immediate range of scalar size "); | ||
} | ||
} | ||
} | ||
|
||
// If the immediate range is not set, the pointers aren't used by any | ||
// loads and stores, so we return. | ||
if (!ImmediateRangeSet) { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Is this dead code? This covered by:
Should we reuse that else to just return false? |
||
assert(ImmediateRangeMin == 0 && ImmediateRangeMax == 0); | ||
return false; | ||
} | ||
|
||
auto OffsetMI = | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. nit: OffsetValue may be. |
||
getIConstantVRegValWithLookThrough(MI->getOperand(2).getReg(), *MRI); | ||
auto OffsetMINext = getIConstantVRegValWithLookThrough( | ||
MINext->getOperand(2).getReg(), *MRI); | ||
|
||
if (shouldBreakChain(MI, MINext, OffsetMI, OffsetMINext)) { | ||
ChainedCost++; | ||
AccumulatedOffset = 0; | ||
Offsets.push_back("Break"); | ||
continue; | ||
} | ||
|
||
const int64_t CurrOffset = OffsetMI->Value.getSExtValue(); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Maybe we could include some description of the idea (also as a future reference). There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. check: does |
||
const int64_t NextOffset = OffsetMINext->Value.getSExtValue(); | ||
|
||
assert(I == 0 || !Offsets.empty()); | ||
AccumulatedOffset += | ||
(I == 0 || (std::holds_alternative<std::string>(Offsets.back()) && | ||
std::get<std::string>(Offsets.back()) == "Break")) | ||
? CurrOffset | ||
: NewOffset; | ||
Offsets.push_back( | ||
(I == 0 || (std::holds_alternative<std::string>(Offsets.back()) && | ||
std::get<std::string>(Offsets.back()) == "Break")) | ||
? CurrOffset | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Looks like |
||
: OffsetType(NewOffset)); | ||
|
||
NewOffset = NextOffset - AccumulatedOffset; | ||
|
||
if (NewOffset < ImmediateRangeMin || NewOffset > ImmediateRangeMax) { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. check: are we supposed to check |
||
ChainedCost += 1; // Immediate materialization cost | ||
} | ||
} | ||
|
||
return ChainedCostLimit > ChainedCost; | ||
} | ||
|
||
// Build a chain (or set of chains) of G_PTR_ADDs. We consider as | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -1812,6 +1812,18 @@ unsigned AIE2PInstrInfo::getMaxVectorBitSize() const { return 2048; } | |
|
||
unsigned AIE2PInstrInfo::getMaxSupportedLdStIncSize() const { return 2048; } | ||
|
||
AIEBaseInstrInfo::ImmediateRangeBounds | ||
AIE2PInstrInfo::getLoadStorePostIncImmediateRange(LLT MemType) const { | ||
if (MemType.getSizeInBits() == 8) | ||
return {7, -8}; | ||
else if (MemType.getSizeInBits() == 16) | ||
return {14, -16}; | ||
else if (MemType.getSizeInBits() <= 32) | ||
return {28, -32}; | ||
else | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Do we have memory type for s128 as well right? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Can we gracefully exit, rather than asserting? |
||
llvm_unreachable("Unsupported"); | ||
} | ||
|
||
using AbstractOp = AIEBaseInstrInfo::AbstractOp; | ||
|
||
std::optional<const AbstractOp> | ||
|
Uh oh!
There was an error while loading. Please reload this page.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I will open up a PR, where i have the Immediate Range checks for all the MemorySizes.
You are missing the vector Sizes checks and the checks for different Loads, such as G_ZEXT_LOAD, which are pitiful small.