Skip to content

Commit ac2dab8

Browse files
committed
[LV] Add initial legality checks for loops with non-dereferenceable load.
1 parent f615269 commit ac2dab8

File tree

9 files changed

+79
-3
lines changed

9 files changed

+79
-3
lines changed

llvm/include/llvm/Analysis/Loads.h

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -91,6 +91,14 @@ LLVM_ABI bool isDereferenceableReadOnlyLoop(
9191
Loop *L, ScalarEvolution *SE, DominatorTree *DT, AssumptionCache *AC,
9292
SmallVectorImpl<const SCEVPredicate *> *Predicates = nullptr);
9393

94+
/// Return true if the loop \p L cannot fault on any iteration and only
95+
/// contains read-only memory accesses. Also collect loads that are not
96+
/// guaranteed to be dereferenceable.
97+
LLVM_ABI bool isReadOnlyLoopWithSafeOrSpeculativeLoads(
98+
Loop *L, ScalarEvolution *SE, DominatorTree *DT, AssumptionCache *AC,
99+
SmallVectorImpl<LoadInst *> *SpeculativeLoads,
100+
SmallVectorImpl<const SCEVPredicate *> *Predicates = nullptr);
101+
94102
/// Return true if we know that executing a load from this value cannot trap.
95103
///
96104
/// If DT and ScanFrom are specified this method performs context-sensitive

llvm/include/llvm/Analysis/TargetTransformInfo.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1857,6 +1857,9 @@ class TargetTransformInfo {
18571857
/// \returns True if the target supports scalable vectors.
18581858
LLVM_ABI bool supportsScalableVectors() const;
18591859

1860+
/// \returns True if the target supports speculative load intrinsics (e.g., vp.load.ff).
1861+
LLVM_ABI bool supportsSpeculativeLoads() const;
1862+
18601863
/// \return true when scalable vectorization is preferred.
18611864
LLVM_ABI bool enableScalableVectorization() const;
18621865

llvm/include/llvm/Analysis/TargetTransformInfoImpl.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1106,6 +1106,8 @@ class TargetTransformInfoImplBase {
11061106

11071107
virtual bool supportsScalableVectors() const { return false; }
11081108

1109+
virtual bool supportsSpeculativeLoads() const { return false; }
1110+
11091111
virtual bool enableScalableVectorization() const { return false; }
11101112

11111113
virtual bool hasActiveVectorLength() const { return false; }

llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -445,6 +445,11 @@ class LoopVectorizationLegality {
445445
/// Returns a list of all known histogram operations in the loop.
446446
bool hasHistograms() const { return !Histograms.empty(); }
447447

448+
/// Returns the loads that may fault and need to be speculative.
449+
const SmallPtrSetImpl<const Instruction *> &getSpeculativeLoads() const {
450+
return SpeculativeLoads;
451+
}
452+
448453
PredicatedScalarEvolution *getPredicatedScalarEvolution() const {
449454
return &PSE;
450455
}
@@ -630,6 +635,9 @@ class LoopVectorizationLegality {
630635
/// may work on the same memory location.
631636
SmallVector<HistogramInfo, 1> Histograms;
632637

638+
/// Hold all loads that need to be speculative.
639+
SmallPtrSet<const Instruction *, 4> SpeculativeLoads;
640+
633641
/// BFI and PSI are used to check for profile guided size optimizations.
634642
BlockFrequencyInfo *BFI;
635643
ProfileSummaryInfo *PSI;

llvm/lib/Analysis/Loads.cpp

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -870,3 +870,19 @@ bool llvm::isDereferenceableReadOnlyLoop(
870870
}
871871
return true;
872872
}
873+
874+
bool llvm::isReadOnlyLoopWithSafeOrSpeculativeLoads(
875+
Loop *L, ScalarEvolution *SE, DominatorTree *DT, AssumptionCache *AC,
876+
SmallVectorImpl<LoadInst *> *SpeculativeLoads,
877+
SmallVectorImpl<const SCEVPredicate *> *Predicates) {
878+
for (BasicBlock *BB : L->blocks()) {
879+
for (Instruction &I : *BB) {
880+
if (auto *LI = dyn_cast<LoadInst>(&I)) {
881+
if (!isDereferenceableAndAlignedInLoop(LI, L, *SE, *DT, AC, Predicates))
882+
SpeculativeLoads->push_back(LI);
883+
} else if (I.mayReadFromMemory() || I.mayWriteToMemory() || I.mayThrow())
884+
return false;
885+
}
886+
}
887+
return true;
888+
}

llvm/lib/Analysis/TargetTransformInfo.cpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1457,6 +1457,10 @@ bool TargetTransformInfo::supportsScalableVectors() const {
14571457
return TTIImpl->supportsScalableVectors();
14581458
}
14591459

1460+
bool TargetTransformInfo::supportsSpeculativeLoads() const {
1461+
return TTIImpl->supportsSpeculativeLoads();
1462+
}
1463+
14601464
bool TargetTransformInfo::enableScalableVectorization() const {
14611465
return TTIImpl->enableScalableVectorization();
14621466
}

llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -110,6 +110,9 @@ class RISCVTTIImpl final : public BasicTTIImplBase<RISCVTTIImpl> {
110110
bool supportsScalableVectors() const override {
111111
return ST->hasVInstructions();
112112
}
113+
bool supportsSpeculativeLoads() const override {
114+
return ST->hasVInstructions();
115+
}
113116
bool enableOrderedReductions() const override { return true; }
114117
bool enableScalableVectorization() const override {
115118
return ST->hasVInstructions();

llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp

Lines changed: 28 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1760,16 +1760,41 @@ bool LoopVectorizationLegality::isVectorizableEarlyExitLoop() {
17601760
assert(LatchBB->getUniquePredecessor() == SingleUncountableExitingBlock &&
17611761
"Expected latch predecessor to be the early exiting block");
17621762

1763-
// TODO: Handle loops that may fault.
17641763
Predicates.clear();
1765-
if (!isDereferenceableReadOnlyLoop(TheLoop, PSE.getSE(), DT, AC,
1766-
&Predicates)) {
1764+
SmallVector<LoadInst *, 4> NonDerefLoads;
1765+
bool HasSafeAccess =
1766+
TTI->supportsSpeculativeLoads()
1767+
? isReadOnlyLoopWithSafeOrSpeculativeLoads(
1768+
TheLoop, PSE.getSE(), DT, AC, &NonDerefLoads, &Predicates)
1769+
: isDereferenceableReadOnlyLoop(TheLoop, PSE.getSE(), DT, AC,
1770+
&Predicates);
1771+
if (!HasSafeAccess) {
17671772
reportVectorizationFailure(
17681773
"Loop may fault",
17691774
"Cannot vectorize potentially faulting early exit loop",
17701775
"PotentiallyFaultingEarlyExitLoop", ORE, TheLoop);
17711776
return false;
17721777
}
1778+
// Speculative loads need to be unit-stride.
1779+
for (LoadInst *LI : NonDerefLoads) {
1780+
if (LI->getParent() != TheLoop->getHeader()) {
1781+
reportVectorizationFailure("Cannot vectorize predicated speculative load",
1782+
"SpeculativeLoadNeedsPredication", ORE,
1783+
TheLoop);
1784+
return false;
1785+
}
1786+
int Stride = isConsecutivePtr(LI->getType(), LI->getPointerOperand());
1787+
if (Stride != 1) {
1788+
reportVectorizationFailure("Loop contains non-unit-stride load",
1789+
"Cannot vectorize early exit loop with "
1790+
"speculative non-unit-stride load",
1791+
"SpeculativeNonUnitStrideLoadEarlyExitLoop",
1792+
ORE, TheLoop);
1793+
return false;
1794+
}
1795+
SpeculativeLoads.insert(LI);
1796+
LLVM_DEBUG(dbgs() << "LV: Found speculative load: " << *LI << "\n");
1797+
}
17731798

17741799
[[maybe_unused]] const SCEV *SymbolicMaxBTC =
17751800
PSE.getSymbolicMaxBackedgeTakenCount();

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10041,6 +10041,13 @@ bool LoopVectorizePass::processLoop(Loop *L) {
1004110041
return false;
1004210042
}
1004310043

10044+
if (!LVL.getSpeculativeLoads().empty()) {
10045+
reportVectorizationFailure("Auto-vectorization of loops with speculative "
10046+
"load is not supported",
10047+
"SpeculativeLoadsNotSupported", ORE, L);
10048+
return false;
10049+
}
10050+
1004410051
// Entrance to the VPlan-native vectorization path. Outer loops are processed
1004510052
// here. They may require CFG and instruction level transformations before
1004610053
// even evaluating whether vectorization is profitable. Since we cannot modify

0 commit comments

Comments
 (0)