Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
29 commits
Select commit Hold shift + click to select a range
dc5b7c1
[LV] Add initial legality checks for loops with non-dereferenceable l…
arcbbb Aug 7, 2025
78f05df
Fix braces
arcbbb Aug 8, 2025
f5315f8
Limit to single unbound access. Add tests
arcbbb Aug 9, 2025
d3246bd
clang-formatted
arcbbb Aug 9, 2025
eb316ee
trivial fix: use lower case
arcbbb Aug 9, 2025
faa6112
trivial fix
arcbbb Aug 9, 2025
b206c9f
Address comment and refine TTI to check data type
arcbbb Aug 12, 2025
72383dd
Query subtarget feature for misaligned access support
arcbbb Aug 21, 2025
0c1d007
Add align 1 case
arcbbb Aug 22, 2025
e22c486
Rename isLegalSpeculativeLoad to isLegalFaultOnlyFirstLoad
arcbbb Aug 27, 2025
2a37216
Rename isReadOnlyLoopWithSafeOrSpeculativeLoads to
arcbbb Aug 27, 2025
abb0120
Rename SpeculativeLoads to FaultOnlyFirstLoads
arcbbb Aug 27, 2025
6b920f4
Refine comments with ff loads
arcbbb Aug 27, 2025
22dee5b
Update unittest to check the returned instructions
arcbbb Aug 27, 2025
e88d5d4
Refine report messages
arcbbb Aug 27, 2025
12e6dc9
Remove TTI isLegalFaultOnlyFirstLoad
arcbbb Aug 28, 2025
3284452
Update tests
arcbbb Aug 28, 2025
10156ca
Refine description
arcbbb Sep 2, 2025
16b5376
Rename isLoopSafeWithLoadOnlyFaults to isReadOnlyLoop
arcbbb Sep 2, 2025
357ff34
Refine error message
arcbbb Sep 2, 2025
ead7a78
Refine LoadTest
arcbbb Sep 2, 2025
9e98d12
clang-format
arcbbb Sep 2, 2025
f18dae7
Update tests
arcbbb Sep 2, 2025
9dc5a7a
Rename IsLoadOnlyFaultingLoop to IsReadOnlyLoop
arcbbb Sep 3, 2025
380e064
Assert the size of NonDerefLoads
arcbbb Sep 3, 2025
93db706
pass NonDereferenceableAndAlignedLoads as reference
arcbbb Sep 3, 2025
43976f7
Rename FaultOnlyFirstLoads to PotentiallyFaultingLoads
arcbbb Sep 3, 2025
51a8329
Update report message in LoopVectorize
arcbbb Sep 3, 2025
7a99472
Update descriptions
arcbbb Sep 3, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 7 additions & 5 deletions llvm/include/llvm/Analysis/Loads.h
Original file line number Diff line number Diff line change
Expand Up @@ -85,11 +85,13 @@ LLVM_ABI bool isDereferenceableAndAlignedInLoop(
AssumptionCache *AC = nullptr,
SmallVectorImpl<const SCEVPredicate *> *Predicates = nullptr);

/// Return true if the loop \p L cannot fault on any iteration and only
/// contains read-only memory accesses.
LLVM_ABI bool isDereferenceableReadOnlyLoop(
Loop *L, ScalarEvolution *SE, DominatorTree *DT, AssumptionCache *AC,
SmallVectorImpl<const SCEVPredicate *> *Predicates = nullptr);
/// Returns true if the loop contains read-only memory accesses and doesn't
/// throw. Puts loads that may fault into \p NonDereferenceableAndAlignedLoads.
LLVM_ABI bool
isReadOnlyLoop(Loop *L, ScalarEvolution *SE, DominatorTree *DT,
AssumptionCache *AC,
SmallVectorImpl<LoadInst *> &NonDereferenceableAndAlignedLoads,
SmallVectorImpl<const SCEVPredicate *> *Predicates = nullptr);

/// Return true if we know that executing a load from this value cannot trap.
///
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -445,6 +445,12 @@ class LoopVectorizationLegality {
/// Returns a list of all known histogram operations in the loop.
bool hasHistograms() const { return !Histograms.empty(); }

/// Returns potentially faulting loads.
const SmallPtrSetImpl<const Instruction *> &
getPotentiallyFaultingLoads() const {
return PotentiallyFaultingLoads;
}

PredicatedScalarEvolution *getPredicatedScalarEvolution() const {
return &PSE;
}
Expand Down Expand Up @@ -630,6 +636,9 @@ class LoopVectorizationLegality {
/// may work on the same memory location.
SmallVector<HistogramInfo, 1> Histograms;

/// Hold potentially faulting loads.
SmallPtrSet<const Instruction *, 4> PotentiallyFaultingLoads;

/// BFI and PSI are used to check for profile guided size optimizations.
BlockFrequencyInfo *BFI;
ProfileSummaryInfo *PSI;
Expand Down
9 changes: 6 additions & 3 deletions llvm/lib/Analysis/Loads.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -856,16 +856,19 @@ bool llvm::canReplacePointersIfEqual(const Value *From, const Value *To,
return isPointerAlwaysReplaceable(From, To, DL);
}

bool llvm::isDereferenceableReadOnlyLoop(
bool llvm::isReadOnlyLoop(
Loop *L, ScalarEvolution *SE, DominatorTree *DT, AssumptionCache *AC,
SmallVectorImpl<LoadInst *> &NonDereferenceableAndAlignedLoads,
SmallVectorImpl<const SCEVPredicate *> *Predicates) {
for (BasicBlock *BB : L->blocks()) {
for (Instruction &I : *BB) {
if (auto *LI = dyn_cast<LoadInst>(&I)) {
if (!isDereferenceableAndAlignedInLoop(LI, L, *SE, *DT, AC, Predicates))
return false;
} else if (I.mayReadFromMemory() || I.mayWriteToMemory() || I.mayThrow())
NonDereferenceableAndAlignedLoads.push_back(LI);
} else if (I.mayReadFromMemory() || I.mayWriteToMemory() ||
I.mayThrow()) {
return false;
}
}
}
return true;
Expand Down
29 changes: 22 additions & 7 deletions llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1760,16 +1760,31 @@ bool LoopVectorizationLegality::isVectorizableEarlyExitLoop() {
assert(LatchBB->getUniquePredecessor() == SingleUncountableExitingBlock &&
"Expected latch predecessor to be the early exiting block");

// TODO: Handle loops that may fault.
Predicates.clear();
if (!isDereferenceableReadOnlyLoop(TheLoop, PSE.getSE(), DT, AC,
&Predicates)) {
reportVectorizationFailure(
"Loop may fault",
"Cannot vectorize potentially faulting early exit loop",
"PotentiallyFaultingEarlyExitLoop", ORE, TheLoop);
SmallVector<LoadInst *, 4> NonDerefLoads;
if (!isReadOnlyLoop(TheLoop, PSE.getSE(), DT, AC, NonDerefLoads,
&Predicates)) {
reportVectorizationFailure("Loop may fault",
"Cannot vectorize non-read-only early exit loop",
"NonReadOnlyEarlyExitLoop", ORE, TheLoop);
return false;
}
// Check non-dereferenceable loads if any.
for (LoadInst *LI : NonDerefLoads) {
// Only support unit-stride access for now.
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Just a question about potential AArch64 support, SVE has first-fault gathers IIUC which could be used for strided accesses. I presume we don't need to check for alignment there? From quickly scanning the docs it looks like an unaligned access non-first-fault will be handled the same as any other non-first-fault.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, SVE's first faulting gathers have the same alignment rules as normal gathers, so no extra checks required.

int Stride = isConsecutivePtr(LI->getType(), LI->getPointerOperand());
if (Stride != 1) {
reportVectorizationFailure(
"Loop contains potentially faulting strided load",
"Cannot vectorize early exit loop with "
"strided fault-only-first load",
"EarlyExitLoopWithStridedFaultOnlyFirstLoad", ORE, TheLoop);
return false;
}
PotentiallyFaultingLoads.insert(LI);
LLVM_DEBUG(dbgs() << "LV: Found potentially faulting load: " << *LI
<< "\n");
}

[[maybe_unused]] const SCEV *SymbolicMaxBTC =
PSE.getSymbolicMaxBackedgeTakenCount();
Expand Down
7 changes: 7 additions & 0 deletions llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -10041,6 +10041,13 @@ bool LoopVectorizePass::processLoop(Loop *L) {
return false;
}

if (!LVL.getPotentiallyFaultingLoads().empty()) {
reportVectorizationFailure("Auto-vectorization of loops with potentially "
"faulting load is not supported",
"PotentiallyFaultingLoadsNotSupported", ORE, L);
return false;
}

// Entrance to the VPlan-native vectorization path. Outer loops are processed
// here. They may require CFG and instruction level transformations before
// even evaluating whether vectorization is profitable. Since we cannot modify
Expand Down
32 changes: 29 additions & 3 deletions llvm/test/Transforms/LoopVectorize/early_exit_legality.ll
Original file line number Diff line number Diff line change
Expand Up @@ -208,7 +208,7 @@ loop.end:

define i64 @same_exit_block_pre_inc_use1_too_small_allocas() {
; CHECK-LABEL: LV: Checking a loop in 'same_exit_block_pre_inc_use1_too_small_allocas'
; CHECK: LV: Not vectorizing: Loop may fault.
; CHECK: LV: Not vectorizing: Auto-vectorization of loops with potentially faulting load is not supported.
entry:
%p1 = alloca [42 x i8]
%p2 = alloca [42 x i8]
Expand Down Expand Up @@ -238,7 +238,7 @@ loop.end:

define i64 @same_exit_block_pre_inc_use1_too_small_deref_ptrs(ptr dereferenceable(42) %p1, ptr dereferenceable(42) %p2) {
; CHECK-LABEL: LV: Checking a loop in 'same_exit_block_pre_inc_use1_too_small_deref_ptrs'
; CHECK: LV: Not vectorizing: Loop may fault.
; CHECK: LV: Not vectorizing: Auto-vectorization of loops with potentially faulting load is not supported.
entry:
br label %loop

Expand All @@ -264,7 +264,7 @@ loop.end:

define i64 @same_exit_block_pre_inc_use1_unknown_ptrs(ptr %p1, ptr %p2) {
; CHECK-LABEL: LV: Checking a loop in 'same_exit_block_pre_inc_use1_unknown_ptrs'
; CHECK: LV: Not vectorizing: Loop may fault.
; CHECK: LV: Not vectorizing: Auto-vectorization of loops with potentially faulting load is not supported.
entry:
br label %loop

Expand All @@ -287,6 +287,32 @@ loop.end:
ret i64 %retval
}

define ptr @same_exit_block_strided_unknown_ptr(ptr %first, ptr %last, i32 %value) {
; CHECK-LABEL: LV: Checking a loop in 'same_exit_block_strided_unknown_ptr'
; CHECK: LV: Not vectorizing: Loop contains potentially faulting strided load.
entry:
%cond = icmp eq ptr %first, %last
br i1 %cond, label %return, label %for.body

for.body:
%first.addr = phi ptr [ %first, %entry ], [ %first.next, %for.inc ]
%1 = load i32, ptr %first.addr, align 4
%cond2 = icmp eq i32 %1, %value
br i1 %cond2, label %for.end, label %for.inc

for.inc:
%first.next = getelementptr inbounds i32, ptr %first.addr, i64 2
%cond3 = icmp eq ptr %first.next, %last
br i1 %cond3, label %for.end, label %for.body

for.end:
%retval.ph = phi ptr [ %first.addr, %for.body ], [ %last, %for.inc ]
br label %return

return:
%retval = phi ptr [ %first, %entry ], [ %retval.ph, %for.end ]
ret ptr %retval
}

; The early exit (i.e. unknown exit-not-taken count) is the latch - we don't
; support this yet.
Expand Down
15 changes: 10 additions & 5 deletions llvm/unittests/Analysis/LoadsTest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -120,7 +120,7 @@ define void @f(i32* %p1, i32* %p2, i64 %i) {
EXPECT_TRUE(canReplacePointersInUseIfEqual(IcmpUse, P2, DL));
}

TEST(LoadsTest, IsDerefReadOnlyLoop) {
TEST(LoadsTest, IsReadOnlyLoop) {
LLVMContext C;
std::unique_ptr<Module> M = parseIR(C,
R"IR(
Expand Down Expand Up @@ -183,7 +183,8 @@ loop.end:
TargetLibraryInfoImpl TLII(M->getTargetTriple());
TargetLibraryInfo TLI(TLII);

auto IsDerefReadOnlyLoop = [&TLI](Function *F) -> bool {
auto IsReadOnlyLoop =
[&TLI](Function *F, SmallVector<LoadInst *, 4> &NonDerefLoads) -> bool {
AssumptionCache AC(*F);
DominatorTree DT(*F);
LoopInfo LI(DT);
Expand All @@ -195,9 +196,13 @@ loop.end:
assert(Header->getName() == "loop");
Loop *L = LI.getLoopFor(Header);

return isDereferenceableReadOnlyLoop(L, &SE, &DT, &AC);
return isReadOnlyLoop(L, &SE, &DT, &AC, NonDerefLoads);
};

ASSERT_TRUE(IsDerefReadOnlyLoop(F1));
ASSERT_FALSE(IsDerefReadOnlyLoop(F2));
SmallVector<LoadInst *, 4> NonDerefLoads;
ASSERT_TRUE(IsReadOnlyLoop(F1, NonDerefLoads));
ASSERT_TRUE(NonDerefLoads.empty());
ASSERT_TRUE(IsReadOnlyLoop(F2, NonDerefLoads));
ASSERT_TRUE((NonDerefLoads.size() == 1) &&
(NonDerefLoads[0]->getName() == "ld1"));
}