From dc5b7c14e92f0850fc1f097401ed1c5c97c02613 Mon Sep 17 00:00:00 2001 From: ShihPo Hung Date: Wed, 6 Aug 2025 17:44:19 -0700 Subject: [PATCH 1/6] [LV] Add initial legality checks for loops with non-dereferenceable load. --- llvm/include/llvm/Analysis/Loads.h | 8 +++++ .../llvm/Analysis/TargetTransformInfo.h | 4 +++ .../llvm/Analysis/TargetTransformInfoImpl.h | 2 ++ .../Vectorize/LoopVectorizationLegality.h | 8 +++++ llvm/lib/Analysis/Loads.cpp | 16 ++++++++++ llvm/lib/Analysis/TargetTransformInfo.cpp | 4 +++ .../Target/RISCV/RISCVTargetTransformInfo.h | 3 ++ .../Vectorize/LoopVectorizationLegality.cpp | 31 +++++++++++++++++-- .../Transforms/Vectorize/LoopVectorize.cpp | 7 +++++ 9 files changed, 80 insertions(+), 3 deletions(-) diff --git a/llvm/include/llvm/Analysis/Loads.h b/llvm/include/llvm/Analysis/Loads.h index 84564563de8e3..080757b6d1fe0 100644 --- a/llvm/include/llvm/Analysis/Loads.h +++ b/llvm/include/llvm/Analysis/Loads.h @@ -91,6 +91,14 @@ LLVM_ABI bool isDereferenceableReadOnlyLoop( Loop *L, ScalarEvolution *SE, DominatorTree *DT, AssumptionCache *AC, SmallVectorImpl *Predicates = nullptr); +/// Return true if the loop \p L cannot fault on any iteration and only +/// contains read-only memory accesses. Also collect loads that are not +/// guaranteed to be dereferenceable. +LLVM_ABI bool isReadOnlyLoopWithSafeOrSpeculativeLoads( + Loop *L, ScalarEvolution *SE, DominatorTree *DT, AssumptionCache *AC, + SmallVectorImpl *SpeculativeLoads, + SmallVectorImpl *Predicates = nullptr); + /// Return true if we know that executing a load from this value cannot trap. /// /// If DT and ScanFrom are specified this method performs context-sensitive diff --git a/llvm/include/llvm/Analysis/TargetTransformInfo.h b/llvm/include/llvm/Analysis/TargetTransformInfo.h index aa4550de455e0..2b8e6be92238c 100644 --- a/llvm/include/llvm/Analysis/TargetTransformInfo.h +++ b/llvm/include/llvm/Analysis/TargetTransformInfo.h @@ -1857,6 +1857,10 @@ class TargetTransformInfo { /// \returns True if the target supports scalable vectors. LLVM_ABI bool supportsScalableVectors() const; + /// \returns True if the target supports speculative load intrinsics (e.g., + /// vp.load.ff). + LLVM_ABI bool supportsSpeculativeLoads() const; + /// \return true when scalable vectorization is preferred. LLVM_ABI bool enableScalableVectorization() const; diff --git a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h index abdbca04488db..1df93ecc7ec16 100644 --- a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h +++ b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h @@ -1106,6 +1106,8 @@ class TargetTransformInfoImplBase { virtual bool supportsScalableVectors() const { return false; } + virtual bool supportsSpeculativeLoads() const { return false; } + virtual bool enableScalableVectorization() const { return false; } virtual bool hasActiveVectorLength() const { return false; } diff --git a/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h b/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h index 43ff084816d18..3b5638f3f570a 100644 --- a/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h +++ b/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h @@ -445,6 +445,11 @@ class LoopVectorizationLegality { /// Returns a list of all known histogram operations in the loop. bool hasHistograms() const { return !Histograms.empty(); } + /// Returns the loads that may fault and need to be speculative. + const SmallPtrSetImpl &getSpeculativeLoads() const { + return SpeculativeLoads; + } + PredicatedScalarEvolution *getPredicatedScalarEvolution() const { return &PSE; } @@ -630,6 +635,9 @@ class LoopVectorizationLegality { /// may work on the same memory location. SmallVector Histograms; + /// Hold all loads that need to be speculative. + SmallPtrSet SpeculativeLoads; + /// BFI and PSI are used to check for profile guided size optimizations. BlockFrequencyInfo *BFI; ProfileSummaryInfo *PSI; diff --git a/llvm/lib/Analysis/Loads.cpp b/llvm/lib/Analysis/Loads.cpp index 78d0887d5d87e..c5a55e9903d41 100644 --- a/llvm/lib/Analysis/Loads.cpp +++ b/llvm/lib/Analysis/Loads.cpp @@ -870,3 +870,19 @@ bool llvm::isDereferenceableReadOnlyLoop( } return true; } + +bool llvm::isReadOnlyLoopWithSafeOrSpeculativeLoads( + Loop *L, ScalarEvolution *SE, DominatorTree *DT, AssumptionCache *AC, + SmallVectorImpl *SpeculativeLoads, + SmallVectorImpl *Predicates) { + for (BasicBlock *BB : L->blocks()) { + for (Instruction &I : *BB) { + if (auto *LI = dyn_cast(&I)) { + if (!isDereferenceableAndAlignedInLoop(LI, L, *SE, *DT, AC, Predicates)) + SpeculativeLoads->push_back(LI); + } else if (I.mayReadFromMemory() || I.mayWriteToMemory() || I.mayThrow()) + return false; + } + } + return true; +} diff --git a/llvm/lib/Analysis/TargetTransformInfo.cpp b/llvm/lib/Analysis/TargetTransformInfo.cpp index c7eb2ec18c679..9f05e01d34781 100644 --- a/llvm/lib/Analysis/TargetTransformInfo.cpp +++ b/llvm/lib/Analysis/TargetTransformInfo.cpp @@ -1457,6 +1457,10 @@ bool TargetTransformInfo::supportsScalableVectors() const { return TTIImpl->supportsScalableVectors(); } +bool TargetTransformInfo::supportsSpeculativeLoads() const { + return TTIImpl->supportsSpeculativeLoads(); +} + bool TargetTransformInfo::enableScalableVectorization() const { return TTIImpl->enableScalableVectorization(); } diff --git a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h index 05d504cbcb6bb..54e9c8346b6e2 100644 --- a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h +++ b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h @@ -110,6 +110,9 @@ class RISCVTTIImpl final : public BasicTTIImplBase { bool supportsScalableVectors() const override { return ST->hasVInstructions(); } + bool supportsSpeculativeLoads() const override { + return ST->hasVInstructions(); + } bool enableOrderedReductions() const override { return true; } bool enableScalableVectorization() const override { return ST->hasVInstructions(); diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp index c47fd9421fddd..46660866741ea 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp @@ -1760,16 +1760,41 @@ bool LoopVectorizationLegality::isVectorizableEarlyExitLoop() { assert(LatchBB->getUniquePredecessor() == SingleUncountableExitingBlock && "Expected latch predecessor to be the early exiting block"); - // TODO: Handle loops that may fault. Predicates.clear(); - if (!isDereferenceableReadOnlyLoop(TheLoop, PSE.getSE(), DT, AC, - &Predicates)) { + SmallVector NonDerefLoads; + bool HasSafeAccess = + TTI->supportsSpeculativeLoads() + ? isReadOnlyLoopWithSafeOrSpeculativeLoads( + TheLoop, PSE.getSE(), DT, AC, &NonDerefLoads, &Predicates) + : isDereferenceableReadOnlyLoop(TheLoop, PSE.getSE(), DT, AC, + &Predicates); + if (!HasSafeAccess) { reportVectorizationFailure( "Loop may fault", "Cannot vectorize potentially faulting early exit loop", "PotentiallyFaultingEarlyExitLoop", ORE, TheLoop); return false; } + // Speculative loads need to be unit-stride. + for (LoadInst *LI : NonDerefLoads) { + if (LI->getParent() != TheLoop->getHeader()) { + reportVectorizationFailure("Cannot vectorize predicated speculative load", + "SpeculativeLoadNeedsPredication", ORE, + TheLoop); + return false; + } + int Stride = isConsecutivePtr(LI->getType(), LI->getPointerOperand()); + if (Stride != 1) { + reportVectorizationFailure("Loop contains non-unit-stride load", + "Cannot vectorize early exit loop with " + "speculative non-unit-stride load", + "SpeculativeNonUnitStrideLoadEarlyExitLoop", + ORE, TheLoop); + return false; + } + SpeculativeLoads.insert(LI); + LLVM_DEBUG(dbgs() << "LV: Found speculative load: " << *LI << "\n"); + } [[maybe_unused]] const SCEV *SymbolicMaxBTC = PSE.getSymbolicMaxBackedgeTakenCount(); diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index 9667b506e594f..790a5236d4f04 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -10041,6 +10041,13 @@ bool LoopVectorizePass::processLoop(Loop *L) { return false; } + if (!LVL.getSpeculativeLoads().empty()) { + reportVectorizationFailure("Auto-vectorization of loops with speculative " + "load is not supported", + "SpeculativeLoadsNotSupported", ORE, L); + return false; + } + // Entrance to the VPlan-native vectorization path. Outer loops are processed // here. They may require CFG and instruction level transformations before // even evaluating whether vectorization is profitable. Since we cannot modify From 78f05df3753e6a7f167559560e9122a9f96b488c Mon Sep 17 00:00:00 2001 From: ShihPo Hung Date: Fri, 8 Aug 2025 00:28:14 -0700 Subject: [PATCH 2/6] Fix braces --- llvm/lib/Analysis/Loads.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/llvm/lib/Analysis/Loads.cpp b/llvm/lib/Analysis/Loads.cpp index c5a55e9903d41..d05b89f86f554 100644 --- a/llvm/lib/Analysis/Loads.cpp +++ b/llvm/lib/Analysis/Loads.cpp @@ -880,8 +880,9 @@ bool llvm::isReadOnlyLoopWithSafeOrSpeculativeLoads( if (auto *LI = dyn_cast(&I)) { if (!isDereferenceableAndAlignedInLoop(LI, L, *SE, *DT, AC, Predicates)) SpeculativeLoads->push_back(LI); - } else if (I.mayReadFromMemory() || I.mayWriteToMemory() || I.mayThrow()) + } else if (I.mayReadFromMemory() || I.mayWriteToMemory() || I.mayThrow()) { return false; + } } } return true; From f5315f8e262ec049b7e498157b48c5a5e3d36f8e Mon Sep 17 00:00:00 2001 From: ShihPo Hung Date: Sat, 9 Aug 2025 15:38:09 -0700 Subject: [PATCH 3/6] Limit to single unbound access. Add tests --- .../Vectorize/LoopVectorizationLegality.cpp | 15 ++-- .../RISCV/unbound-access-legality.ll | 89 +++++++++++++++++++ 2 files changed, 97 insertions(+), 7 deletions(-) create mode 100644 llvm/test/Transforms/LoopVectorize/RISCV/unbound-access-legality.ll diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp index 46660866741ea..b20fab14d5384 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp @@ -1777,15 +1777,9 @@ bool LoopVectorizationLegality::isVectorizableEarlyExitLoop() { } // Speculative loads need to be unit-stride. for (LoadInst *LI : NonDerefLoads) { - if (LI->getParent() != TheLoop->getHeader()) { - reportVectorizationFailure("Cannot vectorize predicated speculative load", - "SpeculativeLoadNeedsPredication", ORE, - TheLoop); - return false; - } int Stride = isConsecutivePtr(LI->getType(), LI->getPointerOperand()); if (Stride != 1) { - reportVectorizationFailure("Loop contains non-unit-stride load", + reportVectorizationFailure("Loop contains strided unbound access", "Cannot vectorize early exit loop with " "speculative non-unit-stride load", "SpeculativeNonUnitStrideLoadEarlyExitLoop", @@ -1795,6 +1789,13 @@ bool LoopVectorizationLegality::isVectorizableEarlyExitLoop() { SpeculativeLoads.insert(LI); LLVM_DEBUG(dbgs() << "LV: Found speculative load: " << *LI << "\n"); } + // Support single Speculative load for now. + if (NonDerefLoads.size() > 1) { + reportVectorizationFailure("Loop contains more than one unbound access", + "TooManySpeculativeLoadInEarlyExitLoop", + ORE, TheLoop); + return false; + } [[maybe_unused]] const SCEV *SymbolicMaxBTC = PSE.getSymbolicMaxBackedgeTakenCount(); diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/unbound-access-legality.ll b/llvm/test/Transforms/LoopVectorize/RISCV/unbound-access-legality.ll new file mode 100644 index 0000000000000..e35d109304274 --- /dev/null +++ b/llvm/test/Transforms/LoopVectorize/RISCV/unbound-access-legality.ll @@ -0,0 +1,89 @@ +; REQUIRES: asserts +; RUN: opt -passes=loop-vectorize -debug-only=loop-vectorize -disable-output 2>&1 -mtriple=riscv64 -mattr=+v -S %s | FileCheck %s + +define ptr @two_unbound_access(ptr %first, ptr %last, ptr %addr2) { +; CHECK-LABEL: LV: Checking a loop in 'two_unbound_access' +; CHECK: LV: Not vectorizing: Loop contains more than one unbound access. +entry: + %cond = icmp eq ptr %first, %last + br i1 %cond, label %return, label %for.body + +for.body: + %first.addr = phi ptr [ %first, %entry], [ %first.next, %for.inc ] + %match.addr = phi ptr [ %addr2, %entry ], [ %match.next, %for.inc ] + %1 = load i32, ptr %first.addr, align 4 + %match.value = load i32, ptr %match.addr, align 4 + %cmp1 = icmp eq i32 %1, %match.value + br i1 %cmp1, label %early.exit, label %for.inc + +for.inc: + %match.next = getelementptr inbounds nuw i8, ptr %match.addr, i64 4 + %first.next = getelementptr inbounds i8, ptr %first.addr, i64 4 + %exit = icmp eq ptr %first.next, %last + br i1 %exit, label %main.exit, label %for.body + +early.exit: + br label %return + +main.exit: + br label %return + +return: + %retval = phi ptr [ %first, %entry ], [ %last, %main.exit ], [ %first.addr, %early.exit ] + ret ptr %retval +} + +define ptr @unbound_strided_access(ptr %first, ptr %last, i32 %value) { +; CHECK-LABEL: LV: Checking a loop in 'unbound_strided_access' +; CHECK: LV: Not vectorizing: Loop contains strided unbound access. +entry: + %cond = icmp eq ptr %first, %last + br i1 %cond, label %return, label %for.body + +for.body: + %first.addr = phi ptr [ %first, %entry ], [ %first.next, %for.inc ] + %1 = load i32, ptr %first.addr, align 4 + %cond2= icmp eq i32 %1, %value + br i1 %cond2, label %for.end, label %for.inc + +for.inc: + %first.next = getelementptr inbounds i32, ptr %first.addr, i64 2 + %cond3 = icmp eq ptr %first.next, %last + br i1 %cond3, label %for.end, label %for.body + +for.end: + %retval.ph = phi ptr [ %first.addr, %for.body ], [ %last, %for.inc ] + br label %return + +return: + %retval = phi ptr [ %first, %entry ], [ %retval.ph, %for.end ] + ret ptr %retval +} + +define ptr @single_unbound_access(ptr %first, ptr %last, i32 %value) { +; CHECK-LABEL: LV: Checking a loop in 'single_unbound_access' +; CHECK: LV: We can vectorize this loop! +; CHECK-NEXT: LV: Not vectorizing: Auto-vectorization of loops with speculative load is not supported. +entry: + %cond = icmp eq ptr %first, %last + br i1 %cond, label %return, label %for.body + +for.body: + %first.addr = phi ptr [ %first, %entry ], [ %first.next, %for.inc ] + %1 = load i32, ptr %first.addr, align 4 + %cond2= icmp eq i32 %1, %value + br i1 %cond2, label %for.end, label %for.inc + +for.inc: + %first.next = getelementptr inbounds i32, ptr %first.addr, i64 1 + %cond3 = icmp eq ptr %first.next, %last + br i1 %cond3, label %for.end, label %for.body + +for.end: + %retval.ph = phi ptr [ %first.addr, %for.body ], [ %last, %for.inc ] + br label %return + +return: + %retval = phi ptr [ %first, %entry ], [ %retval.ph, %for.end ] + ret ptr %retval +} From d3246bd1b132e63ede51b02417f37ae8f3d0cea3 Mon Sep 17 00:00:00 2001 From: ShihPo Hung Date: Sat, 9 Aug 2025 15:39:30 -0700 Subject: [PATCH 4/6] clang-formatted --- llvm/lib/Analysis/Loads.cpp | 3 ++- .../Transforms/Vectorize/LoopVectorizationLegality.cpp | 8 ++++---- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/llvm/lib/Analysis/Loads.cpp b/llvm/lib/Analysis/Loads.cpp index d05b89f86f554..b09dd227978b1 100644 --- a/llvm/lib/Analysis/Loads.cpp +++ b/llvm/lib/Analysis/Loads.cpp @@ -880,7 +880,8 @@ bool llvm::isReadOnlyLoopWithSafeOrSpeculativeLoads( if (auto *LI = dyn_cast(&I)) { if (!isDereferenceableAndAlignedInLoop(LI, L, *SE, *DT, AC, Predicates)) SpeculativeLoads->push_back(LI); - } else if (I.mayReadFromMemory() || I.mayWriteToMemory() || I.mayThrow()) { + } else if (I.mayReadFromMemory() || I.mayWriteToMemory() || + I.mayThrow()) { return false; } } diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp index b20fab14d5384..0fa3a36db1a3e 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp @@ -1791,10 +1791,10 @@ bool LoopVectorizationLegality::isVectorizableEarlyExitLoop() { } // Support single Speculative load for now. if (NonDerefLoads.size() > 1) { - reportVectorizationFailure("Loop contains more than one unbound access", - "TooManySpeculativeLoadInEarlyExitLoop", - ORE, TheLoop); - return false; + reportVectorizationFailure("Loop contains more than one unbound access", + "TooManySpeculativeLoadInEarlyExitLoop", ORE, + TheLoop); + return false; } [[maybe_unused]] const SCEV *SymbolicMaxBTC = From eb316eeff7688d577e5a061ada36f7e68fada25a Mon Sep 17 00:00:00 2001 From: ShihPo Hung Date: Sat, 9 Aug 2025 15:42:06 -0700 Subject: [PATCH 5/6] trivial fix: use lower case --- llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp index 0fa3a36db1a3e..37e1693e256bb 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp @@ -1789,7 +1789,7 @@ bool LoopVectorizationLegality::isVectorizableEarlyExitLoop() { SpeculativeLoads.insert(LI); LLVM_DEBUG(dbgs() << "LV: Found speculative load: " << *LI << "\n"); } - // Support single Speculative load for now. + // Support single speculative load for now. if (NonDerefLoads.size() > 1) { reportVectorizationFailure("Loop contains more than one unbound access", "TooManySpeculativeLoadInEarlyExitLoop", ORE, From faa61124a3369bd72e0f01466102f6c22af26528 Mon Sep 17 00:00:00 2001 From: ShihPo Hung Date: Sat, 9 Aug 2025 15:46:12 -0700 Subject: [PATCH 6/6] trivial fix --- .../Transforms/LoopVectorize/RISCV/unbound-access-legality.ll | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/unbound-access-legality.ll b/llvm/test/Transforms/LoopVectorize/RISCV/unbound-access-legality.ll index e35d109304274..a5786a0d131f7 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/unbound-access-legality.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/unbound-access-legality.ll @@ -43,7 +43,7 @@ entry: for.body: %first.addr = phi ptr [ %first, %entry ], [ %first.next, %for.inc ] %1 = load i32, ptr %first.addr, align 4 - %cond2= icmp eq i32 %1, %value + %cond2 = icmp eq i32 %1, %value br i1 %cond2, label %for.end, label %for.inc for.inc: @@ -71,7 +71,7 @@ entry: for.body: %first.addr = phi ptr [ %first, %entry ], [ %first.next, %for.inc ] %1 = load i32, ptr %first.addr, align 4 - %cond2= icmp eq i32 %1, %value + %cond2 = icmp eq i32 %1, %value br i1 %cond2, label %for.end, label %for.inc for.inc: