Skip to content

Commit 5a4586f

Browse files
committed
Reapply "[LAA] Remove loop-invariant check added in 234cc40."
This reverts commit d43a809. With the correctness issue blocking the recommit finally fixed (5d01697), again unconditionally check if accesses are completely before or after each other.
1 parent a73aa72 commit 5a4586f

File tree

6 files changed

+49
-74
lines changed

6 files changed

+49
-74
lines changed

llvm/lib/Analysis/LoopAccessAnalysis.cpp

Lines changed: 40 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -2032,15 +2032,6 @@ MemoryDepChecker::getDependenceDistanceStrideAndSize(
20322032
LLVM_DEBUG(dbgs() << "LAA: Distance for " << *AInst << " to " << *BInst
20332033
<< ": " << *Dist << "\n");
20342034

2035-
// At the moment this is limited to cases where either source or
2036-
// sink are loop invariant to avoid compile-time increases. This is not
2037-
// required for correctness.
2038-
if (SE.isLoopInvariant(Src, InnermostLoop) ||
2039-
SE.isLoopInvariant(Sink, InnermostLoop)) {
2040-
if (areAccessesCompletelyBeforeOrAfter(Src, ATy, Sink, BTy))
2041-
return Dependence::NoDep;
2042-
}
2043-
20442035
// Need accesses with constant strides and the same direction for further
20452036
// dependence analysis. We don't want to vectorize "A[B[i]] += ..." and
20462037
// similar code or pointer arithmetic that could wrap in the address space.
@@ -2103,18 +2094,37 @@ MemoryDepChecker::isDependent(const MemAccessInfo &A, unsigned AIdx,
21032094
const MemAccessInfo &B, unsigned BIdx) {
21042095
assert(AIdx < BIdx && "Must pass arguments in program order");
21052096

2097+
// Check if we can prove that Sink only accesses memory after Src's end or
2098+
// vice versa. The helper is used to perform the checks only on the exit paths
2099+
// where it helps to improve the analysis result.
2100+
auto CheckCompletelyBeforeOrAfter = [&]() {
2101+
auto *APtr = A.getPointer();
2102+
auto *BPtr = B.getPointer();
2103+
Type *ATy = getLoadStoreType(InstMap[AIdx]);
2104+
Type *BTy = getLoadStoreType(InstMap[BIdx]);
2105+
const SCEV *Src = PSE.getSCEV(APtr);
2106+
const SCEV *Sink = PSE.getSCEV(BPtr);
2107+
return areAccessesCompletelyBeforeOrAfter(Src, ATy, Sink, BTy);
2108+
};
2109+
21062110
// Get the dependence distance, stride, type size and what access writes for
21072111
// the dependence between A and B.
21082112
auto Res =
21092113
getDependenceDistanceStrideAndSize(A, InstMap[AIdx], B, InstMap[BIdx]);
2110-
if (std::holds_alternative<Dependence::DepType>(Res))
2114+
if (std::holds_alternative<Dependence::DepType>(Res)) {
2115+
if (std::get<Dependence::DepType>(Res) == Dependence::Unknown &&
2116+
CheckCompletelyBeforeOrAfter())
2117+
return Dependence::NoDep;
21112118
return std::get<Dependence::DepType>(Res);
2119+
}
21122120

21132121
auto &[Dist, MaxStride, CommonStride, TypeByteSize, AIsWrite, BIsWrite] =
21142122
std::get<DepDistanceStrideAndSizeInfo>(Res);
21152123
bool HasSameSize = TypeByteSize > 0;
21162124

21172125
if (isa<SCEVCouldNotCompute>(Dist)) {
2126+
if (CheckCompletelyBeforeOrAfter())
2127+
return Dependence::NoDep;
21182128
LLVM_DEBUG(dbgs() << "LAA: Dependence because of uncomputable distance.\n");
21192129
return Dependence::Unknown;
21202130
}
@@ -2176,8 +2186,10 @@ MemoryDepChecker::isDependent(const MemAccessInfo &A, unsigned AIdx,
21762186
// forward dependency will allow vectorization using any width.
21772187

21782188
if (IsTrueDataDependence && EnableForwardingConflictDetection) {
2179-
if (!ConstDist)
2180-
return Dependence::Unknown;
2189+
if (!ConstDist) {
2190+
return CheckCompletelyBeforeOrAfter() ? Dependence::NoDep
2191+
: Dependence::Unknown;
2192+
}
21812193
if (!HasSameSize ||
21822194
couldPreventStoreLoadForward(ConstDist, TypeByteSize)) {
21832195
LLVM_DEBUG(
@@ -2192,10 +2204,14 @@ MemoryDepChecker::isDependent(const MemAccessInfo &A, unsigned AIdx,
21922204

21932205
int64_t MinDistance = SE.getSignedRangeMin(Dist).getSExtValue();
21942206
// Below we only handle strictly positive distances.
2195-
if (MinDistance <= 0)
2196-
return Dependence::Unknown;
2207+
if (MinDistance <= 0) {
2208+
return CheckCompletelyBeforeOrAfter() ? Dependence::NoDep
2209+
: Dependence::Unknown;
2210+
}
21972211

21982212
if (!HasSameSize) {
2213+
if (CheckCompletelyBeforeOrAfter())
2214+
return Dependence::NoDep;
21992215
LLVM_DEBUG(dbgs() << "LAA: ReadWrite-Write positive dependency with "
22002216
"different type sizes\n");
22012217
return Dependence::Unknown;
@@ -2247,8 +2263,9 @@ MemoryDepChecker::isDependent(const MemAccessInfo &A, unsigned AIdx,
22472263
// For non-constant distances, we checked the lower bound of the
22482264
// dependence distance and the distance may be larger at runtime (and safe
22492265
// for vectorization). Classify it as Unknown, so we re-try with runtime
2250-
// checks.
2251-
return Dependence::Unknown;
2266+
// checks, unless we can prove both accesses cannot overlap.
2267+
return CheckCompletelyBeforeOrAfter() ? Dependence::NoDep
2268+
: Dependence::Unknown;
22522269
}
22532270
LLVM_DEBUG(dbgs() << "LAA: Failure because of positive minimum distance "
22542271
<< MinDistance << '\n');
@@ -2279,10 +2296,15 @@ MemoryDepChecker::isDependent(const MemAccessInfo &A, unsigned AIdx,
22792296
if (!ConstDist && MaxVFInBits < MaxTargetVectorWidthInBits) {
22802297
// For non-constant distances, we checked the lower bound of the dependence
22812298
// distance and the distance may be larger at runtime (and safe for
2282-
// vectorization). Classify it as Unknown, so we re-try with runtime checks.
2283-
return Dependence::Unknown;
2299+
// vectorization). Classify it as Unknown, so we re-try with runtime checks,
2300+
// unless we can prove both accesses cannot overlap.
2301+
return CheckCompletelyBeforeOrAfter() ? Dependence::NoDep
2302+
: Dependence::Unknown;
22842303
}
22852304

2305+
if (CheckCompletelyBeforeOrAfter())
2306+
return Dependence::NoDep;
2307+
22862308
MaxSafeVectorWidthInBits = std::min(MaxSafeVectorWidthInBits, MaxVFInBits);
22872309
return Dependence::BackwardVectorizable;
22882310
}

llvm/test/Analysis/LoopAccessAnalysis/accesses-completely-before-or-after.ll

Lines changed: 4 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -4,21 +4,10 @@
44
define i32 @completely_before_or_after_true_dep_different_size(ptr %d) {
55
; CHECK-LABEL: 'completely_before_or_after_true_dep_different_size'
66
; CHECK-NEXT: loop:
7-
; CHECK-NEXT: Memory dependences are safe with run-time checks
7+
; CHECK-NEXT: Memory dependences are safe
88
; CHECK-NEXT: Dependences:
99
; CHECK-NEXT: Run-time memory checks:
10-
; CHECK-NEXT: Check 0:
11-
; CHECK-NEXT: Comparing group GRP0:
12-
; CHECK-NEXT: %gep.128.iv = getelementptr i64, ptr %gep.128, i64 %iv
13-
; CHECK-NEXT: Against group GRP1:
14-
; CHECK-NEXT: %gep.iv = getelementptr i32, ptr %d, i64 %iv
1510
; CHECK-NEXT: Grouped accesses:
16-
; CHECK-NEXT: Group GRP0:
17-
; CHECK-NEXT: (Low: (128 + %d) High: (384 + %d))
18-
; CHECK-NEXT: Member: {(128 + %d),+,8}<nw><%loop>
19-
; CHECK-NEXT: Group GRP1:
20-
; CHECK-NEXT: (Low: %d High: (128 + %d))
21-
; CHECK-NEXT: Member: {%d,+,4}<nw><%loop>
2211
; CHECK-EMPTY:
2312
; CHECK-NEXT: Non vectorizable stores to invariant address were not found in loop.
2413
; CHECK-NEXT: SCEV assumptions:
@@ -88,21 +77,10 @@ exit:
8877
define void @completely_after_stores_with_different_sizes(ptr %dst) {
8978
; CHECK-LABEL: 'completely_after_stores_with_different_sizes'
9079
; CHECK-NEXT: loop:
91-
; CHECK-NEXT: Memory dependences are safe with run-time checks
80+
; CHECK-NEXT: Memory dependences are safe
9281
; CHECK-NEXT: Dependences:
9382
; CHECK-NEXT: Run-time memory checks:
94-
; CHECK-NEXT: Check 0:
95-
; CHECK-NEXT: Comparing group GRP0:
96-
; CHECK-NEXT: %gep.iv = getelementptr i16, ptr %dst, i64 %iv
97-
; CHECK-NEXT: Against group GRP1:
98-
; CHECK-NEXT: %gep.dst.128.iv = getelementptr i8, ptr %gep.dst.128, i64 %iv
9983
; CHECK-NEXT: Grouped accesses:
100-
; CHECK-NEXT: Group GRP0:
101-
; CHECK-NEXT: (Low: %dst High: (128 + %dst)<nuw>)
102-
; CHECK-NEXT: Member: {%dst,+,2}<nw><%loop>
103-
; CHECK-NEXT: Group GRP1:
104-
; CHECK-NEXT: (Low: (128 + %dst)<nuw> High: (192 + %dst))
105-
; CHECK-NEXT: Member: {(128 + %dst)<nuw>,+,1}<nw><%loop>
10684
; CHECK-EMPTY:
10785
; CHECK-NEXT: Non vectorizable stores to invariant address were not found in loop.
10886
; CHECK-NEXT: SCEV assumptions:
@@ -173,13 +151,8 @@ exit: ; preds = %loop
173151
define void @completely_before_or_after_non_const_distance(ptr %dst) {
174152
; CHECK-LABEL: 'completely_before_or_after_non_const_distance'
175153
; CHECK-NEXT: loop:
176-
; CHECK-NEXT: Report: unsafe dependent memory operations in loop. Use #pragma clang loop distribute(enable) to allow loop distribution to attempt to isolate the offending operations into a separate loop
177-
; CHECK-NEXT: Unknown data dependence.
154+
; CHECK-NEXT: Memory dependences are safe
178155
; CHECK-NEXT: Dependences:
179-
; CHECK-NEXT: Unknown:
180-
; CHECK-NEXT: store i32 0, ptr %gep.iv.mul, align 4 ->
181-
; CHECK-NEXT: store i32 0, ptr %gep.off.iv, align 4
182-
; CHECK-EMPTY:
183156
; CHECK-NEXT: Run-time memory checks:
184157
; CHECK-NEXT: Grouped accesses:
185158
; CHECK-EMPTY:
@@ -249,12 +222,8 @@ exit:
249222
define void @accesses_completely_before_or_after_instead_backwards_vectorizable(ptr dereferenceable(800) %dst) {
250223
; CHECK-LABEL: 'accesses_completely_before_or_after_instead_backwards_vectorizable'
251224
; CHECK-NEXT: loop:
252-
; CHECK-NEXT: Memory dependences are safe with a maximum safe vector width of 128 bits
225+
; CHECK-NEXT: Memory dependences are safe
253226
; CHECK-NEXT: Dependences:
254-
; CHECK-NEXT: BackwardVectorizable:
255-
; CHECK-NEXT: store i16 0, ptr %gep.mul.2, align 2 ->
256-
; CHECK-NEXT: store i16 0, ptr %gep.iv.32, align 2
257-
; CHECK-EMPTY:
258227
; CHECK-NEXT: Run-time memory checks:
259228
; CHECK-NEXT: Grouped accesses:
260229
; CHECK-EMPTY:

llvm/test/Analysis/LoopAccessAnalysis/depend_diff_types.ll

Lines changed: 1 addition & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -129,16 +129,8 @@ define void @neg_dist_dep_type_size_equivalence(ptr nocapture %vec, i64 %n) {
129129
; CHECK-LABEL: 'neg_dist_dep_type_size_equivalence'
130130
; CHECK-NEXT: loop:
131131
; CHECK-NEXT: Report: unsafe dependent memory operations in loop. Use #pragma clang loop distribute(enable) to allow loop distribution to attempt to isolate the offending operations into a separate loop
132-
; CHECK-NEXT: Unknown data dependence.
132+
; CHECK-NEXT: Backward loop carried data dependence that prevents store-to-load forwarding.
133133
; CHECK-NEXT: Dependences:
134-
; CHECK-NEXT: Unknown:
135-
; CHECK-NEXT: %ld.f64 = load double, ptr %gep.iv, align 8 ->
136-
; CHECK-NEXT: store i32 %ld.i64.i32, ptr %gep.iv.n.i64, align 8
137-
; CHECK-EMPTY:
138-
; CHECK-NEXT: Unknown:
139-
; CHECK-NEXT: %ld.i64 = load i64, ptr %gep.iv, align 8 ->
140-
; CHECK-NEXT: store i32 %ld.i64.i32, ptr %gep.iv.n.i64, align 8
141-
; CHECK-EMPTY:
142134
; CHECK-NEXT: BackwardVectorizableButPreventsForwarding:
143135
; CHECK-NEXT: %ld.f64 = load double, ptr %gep.iv, align 8 ->
144136
; CHECK-NEXT: store double %val, ptr %gep.iv.101.i64, align 8

llvm/test/Analysis/LoopAccessAnalysis/different_strides.ll

Lines changed: 1 addition & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -20,12 +20,8 @@
2020
define void @different_strides_backward_vectorizable() {
2121
; CHECK-LABEL: 'different_strides_backward_vectorizable'
2222
; CHECK-NEXT: inner.body:
23-
; CHECK-NEXT: Memory dependences are safe with a maximum safe vector width of 2048 bits
23+
; CHECK-NEXT: Memory dependences are safe
2424
; CHECK-NEXT: Dependences:
25-
; CHECK-NEXT: BackwardVectorizable:
26-
; CHECK-NEXT: %3 = load float, ptr %arrayidx, align 4 ->
27-
; CHECK-NEXT: store float %add9, ptr %arrayidx8, align 4
28-
; CHECK-EMPTY:
2925
; CHECK-NEXT: Forward:
3026
; CHECK-NEXT: %5 = load float, ptr %arrayidx8, align 4 ->
3127
; CHECK-NEXT: store float %add9, ptr %arrayidx8, align 4

llvm/test/Analysis/LoopAccessAnalysis/non-constant-strides-backward.ll

Lines changed: 1 addition & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -45,12 +45,8 @@ exit:
4545
define void @different_non_constant_strides_known_backward_distance_larger_than_trip_count(ptr %A) {
4646
; CHECK-LABEL: 'different_non_constant_strides_known_backward_distance_larger_than_trip_count'
4747
; CHECK-NEXT: loop:
48-
; CHECK-NEXT: Memory dependences are safe with a maximum safe vector width of 4096 bits
48+
; CHECK-NEXT: Memory dependences are safe
4949
; CHECK-NEXT: Dependences:
50-
; CHECK-NEXT: BackwardVectorizable:
51-
; CHECK-NEXT: %l = load i32, ptr %gep, align 4 ->
52-
; CHECK-NEXT: store i32 %add, ptr %gep.mul.2, align 4
53-
; CHECK-EMPTY:
5450
; CHECK-NEXT: Run-time memory checks:
5551
; CHECK-NEXT: Grouped accesses:
5652
; CHECK-EMPTY:

llvm/test/Transforms/LoopVectorize/global_alias.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -503,7 +503,7 @@ for.end: ; preds = %for.body
503503
; return Foo.A[a];
504504
; }
505505
; CHECK-LABEL: define i32 @mayAlias01(
506-
; CHECK-NOT: add nsw <4 x i32>
506+
; CHECK: add nsw <4 x i32>
507507
; CHECK: ret
508508

509509
define i32 @mayAlias01(i32 %a) nounwind {
@@ -536,7 +536,7 @@ for.end: ; preds = %for.body
536536
; return Foo.A[a];
537537
; }
538538
; CHECK-LABEL: define i32 @mayAlias02(
539-
; CHECK-NOT: add nsw <4 x i32>
539+
; CHECK: add nsw <4 x i32>
540540
; CHECK: ret
541541

542542
define i32 @mayAlias02(i32 %a) nounwind {

0 commit comments

Comments
 (0)