@@ -2032,15 +2032,6 @@ MemoryDepChecker::getDependenceDistanceStrideAndSize(
2032
2032
LLVM_DEBUG (dbgs () << " LAA: Distance for " << *AInst << " to " << *BInst
2033
2033
<< " : " << *Dist << " \n " );
2034
2034
2035
- // At the moment this is limited to cases where either source or
2036
- // sink are loop invariant to avoid compile-time increases. This is not
2037
- // required for correctness.
2038
- if (SE.isLoopInvariant (Src, InnermostLoop) ||
2039
- SE.isLoopInvariant (Sink, InnermostLoop)) {
2040
- if (areAccessesCompletelyBeforeOrAfter (Src, ATy, Sink, BTy))
2041
- return Dependence::NoDep;
2042
- }
2043
-
2044
2035
// Need accesses with constant strides and the same direction for further
2045
2036
// dependence analysis. We don't want to vectorize "A[B[i]] += ..." and
2046
2037
// similar code or pointer arithmetic that could wrap in the address space.
@@ -2103,18 +2094,37 @@ MemoryDepChecker::isDependent(const MemAccessInfo &A, unsigned AIdx,
2103
2094
const MemAccessInfo &B, unsigned BIdx) {
2104
2095
assert (AIdx < BIdx && " Must pass arguments in program order" );
2105
2096
2097
+ // Check if we can prove that Sink only accesses memory after Src's end or
2098
+ // vice versa. The helper is used to perform the checks only on the exit paths
2099
+ // where it helps to improve the analysis result.
2100
+ auto CheckCompletelyBeforeOrAfter = [&]() {
2101
+ auto *APtr = A.getPointer ();
2102
+ auto *BPtr = B.getPointer ();
2103
+ Type *ATy = getLoadStoreType (InstMap[AIdx]);
2104
+ Type *BTy = getLoadStoreType (InstMap[BIdx]);
2105
+ const SCEV *Src = PSE.getSCEV (APtr);
2106
+ const SCEV *Sink = PSE.getSCEV (BPtr);
2107
+ return areAccessesCompletelyBeforeOrAfter (Src, ATy, Sink, BTy);
2108
+ };
2109
+
2106
2110
// Get the dependence distance, stride, type size and what access writes for
2107
2111
// the dependence between A and B.
2108
2112
auto Res =
2109
2113
getDependenceDistanceStrideAndSize (A, InstMap[AIdx], B, InstMap[BIdx]);
2110
- if (std::holds_alternative<Dependence::DepType>(Res))
2114
+ if (std::holds_alternative<Dependence::DepType>(Res)) {
2115
+ if (std::get<Dependence::DepType>(Res) == Dependence::Unknown &&
2116
+ CheckCompletelyBeforeOrAfter ())
2117
+ return Dependence::NoDep;
2111
2118
return std::get<Dependence::DepType>(Res);
2119
+ }
2112
2120
2113
2121
auto &[Dist, MaxStride, CommonStride, TypeByteSize, AIsWrite, BIsWrite] =
2114
2122
std::get<DepDistanceStrideAndSizeInfo>(Res);
2115
2123
bool HasSameSize = TypeByteSize > 0 ;
2116
2124
2117
2125
if (isa<SCEVCouldNotCompute>(Dist)) {
2126
+ if (CheckCompletelyBeforeOrAfter ())
2127
+ return Dependence::NoDep;
2118
2128
LLVM_DEBUG (dbgs () << " LAA: Dependence because of uncomputable distance.\n " );
2119
2129
return Dependence::Unknown;
2120
2130
}
@@ -2176,8 +2186,10 @@ MemoryDepChecker::isDependent(const MemAccessInfo &A, unsigned AIdx,
2176
2186
// forward dependency will allow vectorization using any width.
2177
2187
2178
2188
if (IsTrueDataDependence && EnableForwardingConflictDetection) {
2179
- if (!ConstDist)
2180
- return Dependence::Unknown;
2189
+ if (!ConstDist) {
2190
+ return CheckCompletelyBeforeOrAfter () ? Dependence::NoDep
2191
+ : Dependence::Unknown;
2192
+ }
2181
2193
if (!HasSameSize ||
2182
2194
couldPreventStoreLoadForward (ConstDist, TypeByteSize)) {
2183
2195
LLVM_DEBUG (
@@ -2192,10 +2204,14 @@ MemoryDepChecker::isDependent(const MemAccessInfo &A, unsigned AIdx,
2192
2204
2193
2205
int64_t MinDistance = SE.getSignedRangeMin (Dist).getSExtValue ();
2194
2206
// Below we only handle strictly positive distances.
2195
- if (MinDistance <= 0 )
2196
- return Dependence::Unknown;
2207
+ if (MinDistance <= 0 ) {
2208
+ return CheckCompletelyBeforeOrAfter () ? Dependence::NoDep
2209
+ : Dependence::Unknown;
2210
+ }
2197
2211
2198
2212
if (!HasSameSize) {
2213
+ if (CheckCompletelyBeforeOrAfter ())
2214
+ return Dependence::NoDep;
2199
2215
LLVM_DEBUG (dbgs () << " LAA: ReadWrite-Write positive dependency with "
2200
2216
" different type sizes\n " );
2201
2217
return Dependence::Unknown;
@@ -2247,8 +2263,9 @@ MemoryDepChecker::isDependent(const MemAccessInfo &A, unsigned AIdx,
2247
2263
// For non-constant distances, we checked the lower bound of the
2248
2264
// dependence distance and the distance may be larger at runtime (and safe
2249
2265
// for vectorization). Classify it as Unknown, so we re-try with runtime
2250
- // checks.
2251
- return Dependence::Unknown;
2266
+ // checks, unless we can prove both accesses cannot overlap.
2267
+ return CheckCompletelyBeforeOrAfter () ? Dependence::NoDep
2268
+ : Dependence::Unknown;
2252
2269
}
2253
2270
LLVM_DEBUG (dbgs () << " LAA: Failure because of positive minimum distance "
2254
2271
<< MinDistance << ' \n ' );
@@ -2279,10 +2296,15 @@ MemoryDepChecker::isDependent(const MemAccessInfo &A, unsigned AIdx,
2279
2296
if (!ConstDist && MaxVFInBits < MaxTargetVectorWidthInBits) {
2280
2297
// For non-constant distances, we checked the lower bound of the dependence
2281
2298
// distance and the distance may be larger at runtime (and safe for
2282
- // vectorization). Classify it as Unknown, so we re-try with runtime checks.
2283
- return Dependence::Unknown;
2299
+ // vectorization). Classify it as Unknown, so we re-try with runtime checks,
2300
+ // unless we can prove both accesses cannot overlap.
2301
+ return CheckCompletelyBeforeOrAfter () ? Dependence::NoDep
2302
+ : Dependence::Unknown;
2284
2303
}
2285
2304
2305
+ if (CheckCompletelyBeforeOrAfter ())
2306
+ return Dependence::NoDep;
2307
+
2286
2308
MaxSafeVectorWidthInBits = std::min (MaxSafeVectorWidthInBits, MaxVFInBits);
2287
2309
return Dependence::BackwardVectorizable;
2288
2310
}
0 commit comments