diff --git a/llvm/include/llvm/Analysis/Loads.h b/llvm/include/llvm/Analysis/Loads.h index 84564563de8e3..080757b6d1fe0 100644 --- a/llvm/include/llvm/Analysis/Loads.h +++ b/llvm/include/llvm/Analysis/Loads.h @@ -91,6 +91,14 @@ LLVM_ABI bool isDereferenceableReadOnlyLoop( Loop *L, ScalarEvolution *SE, DominatorTree *DT, AssumptionCache *AC, SmallVectorImpl *Predicates = nullptr); +/// Return true if the loop \p L cannot fault on any iteration and only +/// contains read-only memory accesses. Also collect loads that are not +/// guaranteed to be dereferenceable. +LLVM_ABI bool isReadOnlyLoopWithSafeOrSpeculativeLoads( + Loop *L, ScalarEvolution *SE, DominatorTree *DT, AssumptionCache *AC, + SmallVectorImpl *SpeculativeLoads, + SmallVectorImpl *Predicates = nullptr); + /// Return true if we know that executing a load from this value cannot trap. /// /// If DT and ScanFrom are specified this method performs context-sensitive diff --git a/llvm/include/llvm/Analysis/TargetTransformInfo.h b/llvm/include/llvm/Analysis/TargetTransformInfo.h index aa4550de455e0..2b8e6be92238c 100644 --- a/llvm/include/llvm/Analysis/TargetTransformInfo.h +++ b/llvm/include/llvm/Analysis/TargetTransformInfo.h @@ -1857,6 +1857,10 @@ class TargetTransformInfo { /// \returns True if the target supports scalable vectors. LLVM_ABI bool supportsScalableVectors() const; + /// \returns True if the target supports speculative load intrinsics (e.g., + /// vp.load.ff). + LLVM_ABI bool supportsSpeculativeLoads() const; + /// \return true when scalable vectorization is preferred. LLVM_ABI bool enableScalableVectorization() const; diff --git a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h index abdbca04488db..1df93ecc7ec16 100644 --- a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h +++ b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h @@ -1106,6 +1106,8 @@ class TargetTransformInfoImplBase { virtual bool supportsScalableVectors() const { return false; } + virtual bool supportsSpeculativeLoads() const { return false; } + virtual bool enableScalableVectorization() const { return false; } virtual bool hasActiveVectorLength() const { return false; } diff --git a/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h b/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h index 43ff084816d18..3b5638f3f570a 100644 --- a/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h +++ b/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h @@ -445,6 +445,11 @@ class LoopVectorizationLegality { /// Returns a list of all known histogram operations in the loop. bool hasHistograms() const { return !Histograms.empty(); } + /// Returns the loads that may fault and need to be speculative. + const SmallPtrSetImpl &getSpeculativeLoads() const { + return SpeculativeLoads; + } + PredicatedScalarEvolution *getPredicatedScalarEvolution() const { return &PSE; } @@ -630,6 +635,9 @@ class LoopVectorizationLegality { /// may work on the same memory location. SmallVector Histograms; + /// Hold all loads that need to be speculative. + SmallPtrSet SpeculativeLoads; + /// BFI and PSI are used to check for profile guided size optimizations. BlockFrequencyInfo *BFI; ProfileSummaryInfo *PSI; diff --git a/llvm/lib/Analysis/Loads.cpp b/llvm/lib/Analysis/Loads.cpp index 78d0887d5d87e..c5a55e9903d41 100644 --- a/llvm/lib/Analysis/Loads.cpp +++ b/llvm/lib/Analysis/Loads.cpp @@ -870,3 +870,19 @@ bool llvm::isDereferenceableReadOnlyLoop( } return true; } + +bool llvm::isReadOnlyLoopWithSafeOrSpeculativeLoads( + Loop *L, ScalarEvolution *SE, DominatorTree *DT, AssumptionCache *AC, + SmallVectorImpl *SpeculativeLoads, + SmallVectorImpl *Predicates) { + for (BasicBlock *BB : L->blocks()) { + for (Instruction &I : *BB) { + if (auto *LI = dyn_cast(&I)) { + if (!isDereferenceableAndAlignedInLoop(LI, L, *SE, *DT, AC, Predicates)) + SpeculativeLoads->push_back(LI); + } else if (I.mayReadFromMemory() || I.mayWriteToMemory() || I.mayThrow()) + return false; + } + } + return true; +} diff --git a/llvm/lib/Analysis/TargetTransformInfo.cpp b/llvm/lib/Analysis/TargetTransformInfo.cpp index c7eb2ec18c679..9f05e01d34781 100644 --- a/llvm/lib/Analysis/TargetTransformInfo.cpp +++ b/llvm/lib/Analysis/TargetTransformInfo.cpp @@ -1457,6 +1457,10 @@ bool TargetTransformInfo::supportsScalableVectors() const { return TTIImpl->supportsScalableVectors(); } +bool TargetTransformInfo::supportsSpeculativeLoads() const { + return TTIImpl->supportsSpeculativeLoads(); +} + bool TargetTransformInfo::enableScalableVectorization() const { return TTIImpl->enableScalableVectorization(); } diff --git a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h index 05d504cbcb6bb..54e9c8346b6e2 100644 --- a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h +++ b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h @@ -110,6 +110,9 @@ class RISCVTTIImpl final : public BasicTTIImplBase { bool supportsScalableVectors() const override { return ST->hasVInstructions(); } + bool supportsSpeculativeLoads() const override { + return ST->hasVInstructions(); + } bool enableOrderedReductions() const override { return true; } bool enableScalableVectorization() const override { return ST->hasVInstructions(); diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp index c47fd9421fddd..46660866741ea 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp @@ -1760,16 +1760,41 @@ bool LoopVectorizationLegality::isVectorizableEarlyExitLoop() { assert(LatchBB->getUniquePredecessor() == SingleUncountableExitingBlock && "Expected latch predecessor to be the early exiting block"); - // TODO: Handle loops that may fault. Predicates.clear(); - if (!isDereferenceableReadOnlyLoop(TheLoop, PSE.getSE(), DT, AC, - &Predicates)) { + SmallVector NonDerefLoads; + bool HasSafeAccess = + TTI->supportsSpeculativeLoads() + ? isReadOnlyLoopWithSafeOrSpeculativeLoads( + TheLoop, PSE.getSE(), DT, AC, &NonDerefLoads, &Predicates) + : isDereferenceableReadOnlyLoop(TheLoop, PSE.getSE(), DT, AC, + &Predicates); + if (!HasSafeAccess) { reportVectorizationFailure( "Loop may fault", "Cannot vectorize potentially faulting early exit loop", "PotentiallyFaultingEarlyExitLoop", ORE, TheLoop); return false; } + // Speculative loads need to be unit-stride. + for (LoadInst *LI : NonDerefLoads) { + if (LI->getParent() != TheLoop->getHeader()) { + reportVectorizationFailure("Cannot vectorize predicated speculative load", + "SpeculativeLoadNeedsPredication", ORE, + TheLoop); + return false; + } + int Stride = isConsecutivePtr(LI->getType(), LI->getPointerOperand()); + if (Stride != 1) { + reportVectorizationFailure("Loop contains non-unit-stride load", + "Cannot vectorize early exit loop with " + "speculative non-unit-stride load", + "SpeculativeNonUnitStrideLoadEarlyExitLoop", + ORE, TheLoop); + return false; + } + SpeculativeLoads.insert(LI); + LLVM_DEBUG(dbgs() << "LV: Found speculative load: " << *LI << "\n"); + } [[maybe_unused]] const SCEV *SymbolicMaxBTC = PSE.getSymbolicMaxBackedgeTakenCount(); diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index 9667b506e594f..790a5236d4f04 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -10041,6 +10041,13 @@ bool LoopVectorizePass::processLoop(Loop *L) { return false; } + if (!LVL.getSpeculativeLoads().empty()) { + reportVectorizationFailure("Auto-vectorization of loops with speculative " + "load is not supported", + "SpeculativeLoadsNotSupported", ORE, L); + return false; + } + // Entrance to the VPlan-native vectorization path. Outer loops are processed // here. They may require CFG and instruction level transformations before // even evaluating whether vectorization is profitable. Since we cannot modify