Skip to content

Commit a7b1889

Browse files
committed
[SLP]Consider non-inst operands, when checking insts, used outside only
If the instructions in the node do not require scheduling and used outside basic block only, still need to check, if their operands are non-inst too. Such nodes should be emitted in the beginning of the block. Fixes #165151
1 parent f767f23 commit a7b1889

File tree

6 files changed

+74
-13
lines changed

6 files changed

+74
-13
lines changed

llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp

Lines changed: 22 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -17641,12 +17641,28 @@ Instruction &BoUpSLP::getLastInstructionInBundle(const TreeEntry *E) {
1764117641
[](Value *V) {
1764217642
return !isa<GetElementPtrInst>(V) && isa<Instruction>(V);
1764317643
})) ||
17644-
all_of(E->Scalars, [&](Value *V) {
17645-
return isa<PoisonValue>(V) ||
17646-
(E->Idx == 0 && isa<InsertElementInst>(V)) ||
17647-
E->isCopyableElement(V) ||
17648-
(!isVectorLikeInstWithConstOps(V) && isUsedOutsideBlock(V));
17649-
}))
17644+
(all_of(E->Scalars,
17645+
[&](Value *V) {
17646+
return isa<PoisonValue>(V) ||
17647+
(E->Idx == 0 && isa<InsertElementInst>(V)) ||
17648+
E->isCopyableElement(V) ||
17649+
(!isVectorLikeInstWithConstOps(V) &&
17650+
isUsedOutsideBlock(V));
17651+
}) &&
17652+
(!E->doesNotNeedToSchedule() ||
17653+
any_of(E->Scalars,
17654+
[&](Value *V) {
17655+
if (!isa<Instruction>(V) ||
17656+
(E->hasCopyableElements() && E->isCopyableElement(V)))
17657+
return false;
17658+
return !areAllOperandsNonInsts(V);
17659+
}) ||
17660+
none_of(E->Scalars, [&](Value *V) {
17661+
if (!isa<Instruction>(V) ||
17662+
(E->hasCopyableElements() && E->isCopyableElement(V)))
17663+
return false;
17664+
return MustGather.contains(V);
17665+
}))))
1765017666
Res = FindLastInst();
1765117667
else
1765217668
Res = FindFirstInst();

llvm/test/Transforms/SLPVectorizer/X86/buildvector-reused-with-bv-subvector.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,9 +6,9 @@ define void @test(ptr %0, i64 %1, i64 %2) {
66
; CHECK-SAME: ptr [[TMP0:%.*]], i64 [[TMP1:%.*]], i64 [[TMP2:%.*]]) #[[ATTR0:[0-9]+]] {
77
; CHECK-NEXT: [[TMP4:%.*]] = insertelement <4 x ptr> poison, ptr [[TMP0]], i32 0
88
; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <4 x ptr> [[TMP4]], <4 x ptr> poison, <4 x i32> zeroinitializer
9-
; CHECK-NEXT: [[TMP6:%.*]] = ptrtoint <4 x ptr> [[TMP5]] to <4 x i64>
109
; CHECK-NEXT: [[TMP7:%.*]] = ptrtoint <4 x ptr> [[TMP5]] to <4 x i64>
1110
; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <4 x i64> [[TMP7]], <4 x i64> poison, <8 x i32> <i32 0, i32 0, i32 1, i32 2, i32 2, i32 1, i32 3, i32 1>
11+
; CHECK-NEXT: [[TMP6:%.*]] = ptrtoint <4 x ptr> [[TMP5]] to <4 x i64>
1212
; CHECK-NEXT: br [[DOTPREHEADER_LR_PH:label %.*]]
1313
; CHECK: [[_PREHEADER_LR_PH:.*:]]
1414
; CHECK-NEXT: br [[DOTPREHEADER_US_US_PREHEADER:label %.*]]

llvm/test/Transforms/SLPVectorizer/X86/entry-no-bundle-but-extra-use-on-vec.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -9,11 +9,11 @@ define void @test(ptr %nExp, float %0, i1 %cmp, float %1) {
99
; CHECK-NEXT: [[TMP3:%.*]] = insertelement <4 x float> [[TMP2]], float [[TMP0]], i32 3
1010
; CHECK-NEXT: br i1 [[CMP]], label %[[IF_THEN:.*]], label %[[IF_END:.*]]
1111
; CHECK: [[IF_THEN]]:
12-
; CHECK-NEXT: [[TMP4:%.*]] = load float, ptr [[NEXP]], align 4
1312
; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <4 x float> [[TMP3]], <4 x float> poison, <2 x i32> <i32 3, i32 3>
13+
; CHECK-NEXT: [[TMP8:%.*]] = fmul <2 x float> [[TMP5]], zeroinitializer
14+
; CHECK-NEXT: [[TMP4:%.*]] = load float, ptr [[NEXP]], align 4
1415
; CHECK-NEXT: [[TMP6:%.*]] = insertelement <2 x float> [[TMP5]], float [[TMP4]], i32 0
1516
; CHECK-NEXT: [[TMP7:%.*]] = fmul <2 x float> [[TMP6]], zeroinitializer
16-
; CHECK-NEXT: [[TMP8:%.*]] = fmul <2 x float> [[TMP5]], zeroinitializer
1717
; CHECK-NEXT: [[TMP9:%.*]] = insertelement <4 x float> <float poison, float 0.000000e+00, float 0.000000e+00, float poison>, float [[TMP1]], i32 3
1818
; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <2 x float> [[TMP8]], <2 x float> poison, <4 x i32> <i32 0, i32 poison, i32 poison, i32 poison>
1919
; CHECK-NEXT: [[TMP11:%.*]] = shufflevector <4 x float> [[TMP9]], <4 x float> [[TMP10]], <4 x i32> <i32 4, i32 1, i32 2, i32 3>
Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,45 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6
2+
; RUN: opt -S --passes=slp-vectorizer -mtriple=x86_64-unknown-linux-gnu -slp-threshold=-100 < %s | FileCheck %s
3+
4+
define void @test(i32 %arg) {
5+
; CHECK-LABEL: define void @test(
6+
; CHECK-SAME: i32 [[ARG:%.*]]) {
7+
; CHECK-NEXT: [[BB:.*:]]
8+
; CHECK-NEXT: br label %[[BB1:.*]]
9+
; CHECK: [[BB1]]:
10+
; CHECK-NEXT: [[TMP0:%.*]] = insertelement <4 x i32> <i32 poison, i32 0, i32 0, i32 0>, i32 [[ARG]], i32 0
11+
; CHECK-NEXT: [[TMP1:%.*]] = sub <4 x i32> zeroinitializer, [[TMP0]]
12+
; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> <i32 poison, i32 0, i32 0, i32 0>, <4 x i32> <i32 0, i32 5, i32 6, i32 7>
13+
; CHECK-NEXT: [[TMP3:%.*]] = add <4 x i32> zeroinitializer, [[TMP2]]
14+
; CHECK-NEXT: br i1 false, label %[[BB8:.*]], label %[[BB4:.*]]
15+
; CHECK: [[BB4]]:
16+
; CHECK-NEXT: [[TMP4:%.*]] = add <4 x i32> zeroinitializer, [[TMP3]]
17+
; CHECK-NEXT: br label %[[BB8]]
18+
; CHECK: [[BB8]]:
19+
; CHECK-NEXT: [[TMP5:%.*]] = phi <4 x i32> [ [[TMP4]], %[[BB4]] ], [ [[TMP1]], %[[BB1]] ]
20+
; CHECK-NEXT: ret void
21+
;
22+
bb:
23+
br label %bb1
24+
25+
bb1:
26+
%sub = sub i32 0, %arg
27+
%add = add i32 0, 0
28+
%add2 = add i32 0, 0
29+
%add3 = add i32 0, 0
30+
br i1 false, label %bb8, label %bb4
31+
32+
bb4:
33+
%add5 = add i32 %add3, 0
34+
%add6 = add i32 0, 0
35+
%add7 = add i32 0, 0
36+
br label %bb8
37+
38+
bb8:
39+
%phi = phi i32 [ %sub, %bb4 ], [ %sub, %bb1 ]
40+
%phi9 = phi i32 [ %add5, %bb4 ], [ %add, %bb1 ]
41+
%phi10 = phi i32 [ %add6, %bb4 ], [ %add2, %bb1 ]
42+
%phi11 = phi i32 [ %add7, %bb4 ], [ %add3, %bb1 ]
43+
ret void
44+
}
45+

llvm/test/Transforms/SLPVectorizer/X86/parent-node-non-schedulable.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,11 +5,11 @@ define void @test(ptr %0, i64 %1, i64 %2, i1 %3, i64 %4, i64 %5) {
55
; CHECK-LABEL: define void @test(
66
; CHECK-SAME: ptr [[TMP0:%.*]], i64 [[TMP1:%.*]], i64 [[TMP2:%.*]], i1 [[TMP3:%.*]], i64 [[TMP4:%.*]], i64 [[TMP5:%.*]]) #[[ATTR0:[0-9]+]] {
77
; CHECK-NEXT: [[TMP7:%.*]] = getelementptr i8, ptr [[TMP0]], i32 240
8-
; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[TMP0]], i32 128
98
; CHECK-NEXT: [[TMP9:%.*]] = insertelement <4 x i64> poison, i64 [[TMP1]], i32 0
109
; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <4 x i64> [[TMP9]], <4 x i64> poison, <4 x i32> zeroinitializer
1110
; CHECK-NEXT: [[TMP11:%.*]] = insertelement <4 x i64> <i64 1, i64 1, i64 1, i64 poison>, i64 [[TMP2]], i32 3
1211
; CHECK-NEXT: [[TMP12:%.*]] = add <4 x i64> [[TMP10]], [[TMP11]]
12+
; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[TMP0]], i32 128
1313
; CHECK-NEXT: [[TMP13:%.*]] = load <2 x i64>, ptr [[TMP7]], align 4
1414
; CHECK-NEXT: [[TMP14:%.*]] = load i64, ptr null, align 4
1515
; CHECK-NEXT: [[TMP15:%.*]] = load <2 x i64>, ptr [[TMP8]], align 4

llvm/test/Transforms/SLPVectorizer/X86/same-last-instruction-different-parents.ll

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -10,13 +10,13 @@ define i32 @test(i32 %0, i1 %1) {
1010
; CHECK-NEXT: [[TMP6:%.*]] = sitofp <2 x i32> [[TMP4]] to <2 x double>
1111
; CHECK-NEXT: br i1 [[TMP1]], label %[[BB7:.*]], label %[[BB9:.*]]
1212
; CHECK: [[BB7]]:
13-
; CHECK-NEXT: [[TMP8:%.*]] = call <2 x double> @llvm.fmuladd.v2f64(<2 x double> [[TMP6]], <2 x double> zeroinitializer, <2 x double> zeroinitializer)
13+
; CHECK-NEXT: [[TMP8:%.*]] = call <2 x double> @llvm.fmuladd.v2f64(<2 x double> [[TMP5]], <2 x double> zeroinitializer, <2 x double> zeroinitializer)
1414
; CHECK-NEXT: br label %[[BB16:.*]]
1515
; CHECK: [[BB9]]:
1616
; CHECK-NEXT: br i1 false, label %[[BB14:.*]], label %[[BB10:.*]]
1717
; CHECK: [[BB10]]:
18-
; CHECK-NEXT: [[TMP11:%.*]] = call <2 x double> @llvm.copysign.v2f64(<2 x double> zeroinitializer, <2 x double> [[TMP5]])
19-
; CHECK-NEXT: [[TMP12:%.*]] = shufflevector <2 x double> [[TMP6]], <2 x double> <double 0.000000e+00, double poison>, <2 x i32> <i32 2, i32 1>
18+
; CHECK-NEXT: [[TMP11:%.*]] = call <2 x double> @llvm.copysign.v2f64(<2 x double> zeroinitializer, <2 x double> [[TMP6]])
19+
; CHECK-NEXT: [[TMP12:%.*]] = shufflevector <2 x double> [[TMP5]], <2 x double> <double 0.000000e+00, double poison>, <2 x i32> <i32 2, i32 1>
2020
; CHECK-NEXT: [[TMP13:%.*]] = call <2 x double> @llvm.fmuladd.v2f64(<2 x double> [[TMP11]], <2 x double> [[TMP12]], <2 x double> zeroinitializer)
2121
; CHECK-NEXT: br label %[[BB14]]
2222
; CHECK: [[BB14]]:

0 commit comments

Comments
 (0)