Skip to content
Merged
Show file tree
Hide file tree
Changes from 8 commits
Commits
Show all changes
29 commits
Select commit Hold shift + click to select a range
dc5b7c1
[LV] Add initial legality checks for loops with non-dereferenceable l…
arcbbb Aug 7, 2025
78f05df
Fix braces
arcbbb Aug 8, 2025
f5315f8
Limit to single unbound access. Add tests
arcbbb Aug 9, 2025
d3246bd
clang-formatted
arcbbb Aug 9, 2025
eb316ee
trivial fix: use lower case
arcbbb Aug 9, 2025
faa6112
trivial fix
arcbbb Aug 9, 2025
b206c9f
Address comment and refine TTI to check data type
arcbbb Aug 12, 2025
72383dd
Query subtarget feature for misaligned access support
arcbbb Aug 21, 2025
0c1d007
Add align 1 case
arcbbb Aug 22, 2025
e22c486
Rename isLegalSpeculativeLoad to isLegalFaultOnlyFirstLoad
arcbbb Aug 27, 2025
2a37216
Rename isReadOnlyLoopWithSafeOrSpeculativeLoads to
arcbbb Aug 27, 2025
abb0120
Rename SpeculativeLoads to FaultOnlyFirstLoads
arcbbb Aug 27, 2025
6b920f4
Refine comments with ff loads
arcbbb Aug 27, 2025
22dee5b
Update unittest to check the returned instructions
arcbbb Aug 27, 2025
e88d5d4
Refine report messages
arcbbb Aug 27, 2025
12e6dc9
Remove TTI isLegalFaultOnlyFirstLoad
arcbbb Aug 28, 2025
3284452
Update tests
arcbbb Aug 28, 2025
10156ca
Refine description
arcbbb Sep 2, 2025
16b5376
Rename isLoopSafeWithLoadOnlyFaults to isReadOnlyLoop
arcbbb Sep 2, 2025
357ff34
Refine error message
arcbbb Sep 2, 2025
ead7a78
Refine LoadTest
arcbbb Sep 2, 2025
9e98d12
clang-format
arcbbb Sep 2, 2025
f18dae7
Update tests
arcbbb Sep 2, 2025
9dc5a7a
Rename IsLoadOnlyFaultingLoop to IsReadOnlyLoop
arcbbb Sep 3, 2025
380e064
Assert the size of NonDerefLoads
arcbbb Sep 3, 2025
93db706
pass NonDereferenceableAndAlignedLoads as reference
arcbbb Sep 3, 2025
43976f7
Rename FaultOnlyFirstLoads to PotentiallyFaultingLoads
arcbbb Sep 3, 2025
51a8329
Update report message in LoopVectorize
arcbbb Sep 3, 2025
7a99472
Update descriptions
arcbbb Sep 3, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 4 additions & 2 deletions llvm/include/llvm/Analysis/Loads.h
Original file line number Diff line number Diff line change
Expand Up @@ -86,9 +86,11 @@ LLVM_ABI bool isDereferenceableAndAlignedInLoop(
SmallVectorImpl<const SCEVPredicate *> *Predicates = nullptr);

/// Return true if the loop \p L cannot fault on any iteration and only
/// contains read-only memory accesses.
LLVM_ABI bool isDereferenceableReadOnlyLoop(
/// contains read-only memory accesses. Also collect loads that are not
/// guaranteed to be dereferenceable.
LLVM_ABI bool isReadOnlyLoopWithSafeOrSpeculativeLoads(
Loop *L, ScalarEvolution *SE, DominatorTree *DT, AssumptionCache *AC,
SmallVectorImpl<LoadInst *> *SpeculativeLoads,
SmallVectorImpl<const SCEVPredicate *> *Predicates = nullptr);

/// Return true if we know that executing a load from this value cannot trap.
Expand Down
3 changes: 3 additions & 0 deletions llvm/include/llvm/Analysis/TargetTransformInfo.h
Original file line number Diff line number Diff line change
Expand Up @@ -843,6 +843,9 @@ class TargetTransformInfo {
/// Return true if the target supports strided load.
LLVM_ABI bool isLegalStridedLoadStore(Type *DataType, Align Alignment) const;

/// Return true if the target supports speculative load.
LLVM_ABI bool isLegalSpeculativeLoad(Type *DataType, Align Alignment) const;

/// Return true is the target supports interleaved access for the given vector
/// type \p VTy, interleave factor \p Factor, alignment \p Alignment and
/// address space \p AddrSpace.
Expand Down
4 changes: 4 additions & 0 deletions llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
Original file line number Diff line number Diff line change
Expand Up @@ -374,6 +374,10 @@ class TargetTransformInfoImplBase {
return false;
}

virtual bool isLegalSpeculativeLoad(Type *DataType, Align Alignment) const {
return false;
}

virtual bool isLegalInterleavedAccessType(VectorType *VTy, unsigned Factor,
Align Alignment,
unsigned AddrSpace) const {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -445,6 +445,11 @@ class LoopVectorizationLegality {
/// Returns a list of all known histogram operations in the loop.
bool hasHistograms() const { return !Histograms.empty(); }

/// Returns the loads that may fault and need to be speculative.
const SmallPtrSetImpl<const Instruction *> &getSpeculativeLoads() const {
return SpeculativeLoads;
}

PredicatedScalarEvolution *getPredicatedScalarEvolution() const {
return &PSE;
}
Expand Down Expand Up @@ -630,6 +635,9 @@ class LoopVectorizationLegality {
/// may work on the same memory location.
SmallVector<HistogramInfo, 1> Histograms;

/// Hold all loads that need to be speculative.
SmallPtrSet<const Instruction *, 4> SpeculativeLoads;

/// BFI and PSI are used to check for profile guided size optimizations.
BlockFrequencyInfo *BFI;
ProfileSummaryInfo *PSI;
Expand Down
9 changes: 6 additions & 3 deletions llvm/lib/Analysis/Loads.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -856,16 +856,19 @@ bool llvm::canReplacePointersIfEqual(const Value *From, const Value *To,
return isPointerAlwaysReplaceable(From, To, DL);
}

bool llvm::isDereferenceableReadOnlyLoop(
bool llvm::isReadOnlyLoopWithSafeOrSpeculativeLoads(
Loop *L, ScalarEvolution *SE, DominatorTree *DT, AssumptionCache *AC,
SmallVectorImpl<LoadInst *> *SpeculativeLoads,
SmallVectorImpl<const SCEVPredicate *> *Predicates) {
for (BasicBlock *BB : L->blocks()) {
for (Instruction &I : *BB) {
if (auto *LI = dyn_cast<LoadInst>(&I)) {
if (!isDereferenceableAndAlignedInLoop(LI, L, *SE, *DT, AC, Predicates))
return false;
} else if (I.mayReadFromMemory() || I.mayWriteToMemory() || I.mayThrow())
SpeculativeLoads->push_back(LI);
} else if (I.mayReadFromMemory() || I.mayWriteToMemory() ||
I.mayThrow()) {
return false;
}
}
}
return true;
Expand Down
5 changes: 5 additions & 0 deletions llvm/lib/Analysis/TargetTransformInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -531,6 +531,11 @@ bool TargetTransformInfo::isLegalStridedLoadStore(Type *DataType,
return TTIImpl->isLegalStridedLoadStore(DataType, Alignment);
}

bool TargetTransformInfo::isLegalSpeculativeLoad(Type *DataType,
Align Alignment) const {
return TTIImpl->isLegalSpeculativeLoad(DataType, Alignment);
}

bool TargetTransformInfo::isLegalInterleavedAccessType(
VectorType *VTy, unsigned Factor, Align Alignment,
unsigned AddrSpace) const {
Expand Down
16 changes: 16 additions & 0 deletions llvm/lib/Target/RISCV/RISCVISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -24405,6 +24405,22 @@ bool RISCVTargetLowering::isLegalStridedLoadStore(EVT DataType,
return true;
}

bool RISCVTargetLowering::isLegalSpeculativeLoad(EVT DataType,
Align Alignment) const {
if (!Subtarget.hasVInstructions())
return false;

EVT ScalarType = DataType.getScalarType();
if (!isLegalElementTypeForRVV(ScalarType))
return false;

if (!Subtarget.enableUnalignedVectorMem() &&
Alignment < ScalarType.getStoreSize())
return false;

return true;
}

MachineInstr *
RISCVTargetLowering::EmitKCFICheck(MachineBasicBlock &MBB,
MachineBasicBlock::instr_iterator &MBBI,
Expand Down
4 changes: 4 additions & 0 deletions llvm/lib/Target/RISCV/RISCVISelLowering.h
Original file line number Diff line number Diff line change
Expand Up @@ -425,6 +425,10 @@ class RISCVTargetLowering : public TargetLowering {
/// alignment is legal.
bool isLegalStridedLoadStore(EVT DataType, Align Alignment) const;

/// Return true if a speculative load of the given result type and
/// alignment is legal.
bool isLegalSpeculativeLoad(EVT DataType, Align Alignment) const;

unsigned getMaxSupportedInterleaveFactor() const override { return 8; }

bool fallBackToDAGISel(const Instruction &Inst) const override;
Expand Down
5 changes: 5 additions & 0 deletions llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h
Original file line number Diff line number Diff line change
Expand Up @@ -324,6 +324,11 @@ class RISCVTTIImpl final : public BasicTTIImplBase<RISCVTTIImpl> {
return TLI->isLegalStridedLoadStore(DataTypeVT, Alignment);
}

bool isLegalSpeculativeLoad(Type *DataType, Align Alignment) const override {
EVT DataTypeVT = TLI->getValueType(DL, DataType);
return TLI->isLegalSpeculativeLoad(DataTypeVT, Alignment);
}

bool isLegalInterleavedAccessType(VectorType *VTy, unsigned Factor,
Align Alignment,
unsigned AddrSpace) const override {
Expand Down
29 changes: 26 additions & 3 deletions llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1760,16 +1760,39 @@ bool LoopVectorizationLegality::isVectorizableEarlyExitLoop() {
assert(LatchBB->getUniquePredecessor() == SingleUncountableExitingBlock &&
"Expected latch predecessor to be the early exiting block");

// TODO: Handle loops that may fault.
Predicates.clear();
if (!isDereferenceableReadOnlyLoop(TheLoop, PSE.getSE(), DT, AC,
&Predicates)) {
SmallVector<LoadInst *, 4> NonDerefLoads;
if (!isReadOnlyLoopWithSafeOrSpeculativeLoads(TheLoop, PSE.getSE(), DT, AC,
&NonDerefLoads, &Predicates)) {
reportVectorizationFailure(
"Loop may fault",
"Cannot vectorize potentially faulting early exit loop",
"PotentiallyFaultingEarlyExitLoop", ORE, TheLoop);
return false;
}
// Check non-dereferenceable loads if any.
for (LoadInst *LI : NonDerefLoads) {
// Only support unit-stride access for now.
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Just a question about potential AArch64 support, SVE has first-fault gathers IIUC which could be used for strided accesses. I presume we don't need to check for alignment there? From quickly scanning the docs it looks like an unaligned access non-first-fault will be handled the same as any other non-first-fault.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, SVE's first faulting gathers have the same alignment rules as normal gathers, so no extra checks required.

int Stride = isConsecutivePtr(LI->getType(), LI->getPointerOperand());
if (Stride != 1) {
reportVectorizationFailure("Loop contains strided unbound access",
"Cannot vectorize early exit loop with "
"speculative strided load",
"SpeculativeNonUnitStrideLoadEarlyExitLoop",
ORE, TheLoop);
return false;
}
if (!TTI->isLegalSpeculativeLoad(LI->getType(), LI->getAlign())) {
reportVectorizationFailure("Loop may fault",
"Cannot vectorize early exit loop with "
"illegal speculative load",
"IllegalSpeculativeLoadEarlyExitLoop", ORE,
TheLoop);
return false;
}
SpeculativeLoads.insert(LI);
LLVM_DEBUG(dbgs() << "LV: Found speculative load: " << *LI << "\n");
}

[[maybe_unused]] const SCEV *SymbolicMaxBTC =
PSE.getSymbolicMaxBackedgeTakenCount();
Expand Down
7 changes: 7 additions & 0 deletions llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -10041,6 +10041,13 @@ bool LoopVectorizePass::processLoop(Loop *L) {
return false;
}

if (!LVL.getSpeculativeLoads().empty()) {
reportVectorizationFailure("Auto-vectorization of loops with speculative "
"load is not supported",
"SpeculativeLoadsNotSupported", ORE, L);
return false;
}

// Entrance to the VPlan-native vectorization path. Outer loops are processed
// here. They may require CFG and instruction level transformations before
// even evaluating whether vectorization is profitable. Since we cannot modify
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,84 @@
; REQUIRES: asserts
; RUN: opt -passes=loop-vectorize -debug-only=loop-vectorize -disable-output 2>&1 -mtriple=riscv64 -mattr=+v -S %s | FileCheck %s

define ptr @unsupported_data_type(ptr %first, ptr %last, i128 %value) {
; CHECK-LABEL: LV: Checking a loop in 'unsupported_data_type'
; CHECK: LV: Not vectorizing: Loop may fault.
entry:
%cond = icmp eq ptr %first, %last
br i1 %cond, label %return, label %for.body

for.body:
%first.addr = phi ptr [ %first, %entry ], [ %first.next, %for.inc ]
%1 = load i128, ptr %first.addr, align 4
%cond2 = icmp eq i128 %1, %value
br i1 %cond2, label %for.end, label %for.inc

for.inc:
%first.next = getelementptr inbounds i128, ptr %first.addr, i64 1
%cond3 = icmp eq ptr %first.next, %last
br i1 %cond3, label %for.end, label %for.body

for.end:
%retval.ph = phi ptr [ %first.addr, %for.body ], [ %last, %for.inc ]
br label %return

return:
%retval = phi ptr [ %first, %entry ], [ %retval.ph, %for.end ]
ret ptr %retval
}

define ptr @unbound_strided_access(ptr %first, ptr %last, i32 %value) {
; CHECK-LABEL: LV: Checking a loop in 'unbound_strided_access'
; CHECK: LV: Not vectorizing: Loop contains strided unbound access.
entry:
%cond = icmp eq ptr %first, %last
br i1 %cond, label %return, label %for.body

for.body:
%first.addr = phi ptr [ %first, %entry ], [ %first.next, %for.inc ]
%1 = load i32, ptr %first.addr, align 4
%cond2 = icmp eq i32 %1, %value
br i1 %cond2, label %for.end, label %for.inc

for.inc:
%first.next = getelementptr inbounds i32, ptr %first.addr, i64 2
%cond3 = icmp eq ptr %first.next, %last
br i1 %cond3, label %for.end, label %for.body

for.end:
%retval.ph = phi ptr [ %first.addr, %for.body ], [ %last, %for.inc ]
br label %return

return:
%retval = phi ptr [ %first, %entry ], [ %retval.ph, %for.end ]
ret ptr %retval
}

define ptr @single_unbound_access(ptr %first, ptr %last, i32 %value) {
; CHECK-LABEL: LV: Checking a loop in 'single_unbound_access'
; CHECK: LV: We can vectorize this loop!
; CHECK-NEXT: LV: Not vectorizing: Auto-vectorization of loops with speculative load is not supported.
entry:
%cond = icmp eq ptr %first, %last
br i1 %cond, label %return, label %for.body

for.body:
%first.addr = phi ptr [ %first, %entry ], [ %first.next, %for.inc ]
%1 = load i32, ptr %first.addr, align 4
%cond2 = icmp eq i32 %1, %value
br i1 %cond2, label %for.end, label %for.inc

for.inc:
%first.next = getelementptr inbounds i32, ptr %first.addr, i64 1
%cond3 = icmp eq ptr %first.next, %last
br i1 %cond3, label %for.end, label %for.body

for.end:
%retval.ph = phi ptr [ %first.addr, %for.body ], [ %last, %for.inc ]
br label %return

return:
%retval = phi ptr [ %first, %entry ], [ %retval.ph, %for.end ]
ret ptr %retval
}
5 changes: 4 additions & 1 deletion llvm/unittests/Analysis/LoadsTest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -195,7 +195,10 @@ loop.end:
assert(Header->getName() == "loop");
Loop *L = LI.getLoopFor(Header);

return isDereferenceableReadOnlyLoop(L, &SE, &DT, &AC);
SmallVector<LoadInst *, 4> NonDerefLoads;
return isReadOnlyLoopWithSafeOrSpeculativeLoads(L, &SE, &DT, &AC,
&NonDerefLoads) &&
NonDerefLoads.empty();
};

ASSERT_TRUE(IsDerefReadOnlyLoop(F1));
Expand Down