From cef7e3275b9a4a540f0d25e059b10a1b8ea6bb98 Mon Sep 17 00:00:00 2001 From: Alexey Bataev Date: Thu, 14 Aug 2025 21:03:23 +0000 Subject: [PATCH] =?UTF-8?q?[=F0=9D=98=80=F0=9D=97=BD=F0=9D=97=BF]=20initia?= =?UTF-8?q?l=20version?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Created using spr 1.3.5 --- .../Transforms/Vectorize/SLPVectorizer.cpp | 22 ++++++++-------- .../remark-zext-incoming-for-neg-icmp.ll | 4 +-- .../SystemZ/ext-not-resized-op-resized.ll | 6 ++--- .../Transforms/SLPVectorizer/X86/PR35628_2.ll | 2 +- .../Transforms/SLPVectorizer/X86/PR40310.ll | 2 +- .../X86/multi-nodes-to-shuffle.ll | 4 +-- .../SLPVectorizer/X86/no_alternate_divrem.ll | 6 ++--- .../X86/non-power-of-2-subvectors-insert.ll | 18 ++++++------- .../Transforms/SLPVectorizer/X86/resched.ll | 26 ++++--------------- .../X86/same-values-sub-node-with-poisons.ll | 6 ++--- .../X86/vec_list_bias-inseltpoison.ll | 9 +++---- .../SLPVectorizer/X86/vec_list_bias.ll | 24 ++++++++--------- .../vec_list_bias_external_insert_shuffled.ll | 24 ++++++++--------- .../SLPVectorizer/alternate-non-profitable.ll | 9 +++---- .../zext-incoming-for-neg-icmp.ll | 4 +-- 15 files changed, 72 insertions(+), 94 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp index 7362d5b0b5865..df3089847dbe3 100644 --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -11307,27 +11307,27 @@ void BoUpSLP::buildTreeRec(ArrayRef VLRef, unsigned Depth, } ScalarsVectorizationLegality Legality = getScalarsVectorizationLegality( - VL, Depth, UserTreeIdx, /*TryCopyableElementsVectorization=*/false); + VL, Depth, UserTreeIdx, /*TryCopyableElementsVectorization=*/true); InstructionsState S = Legality.getInstructionsState(); if (!Legality.isLegal()) { - if (Legality.trySplitVectorize()) { - auto [MainOp, AltOp] = getMainAltOpsNoStateVL(VL); - // Last chance to try to vectorize alternate node. - if (MainOp && AltOp && TrySplitNode(InstructionsState(MainOp, AltOp))) - return; - } - if (!S) + if (!S) { Legality = getScalarsVectorizationLegality( - VL, Depth, UserTreeIdx, /*TryCopyableElementsVectorization=*/true); + VL, Depth, UserTreeIdx, /*TryCopyableElementsVectorization=*/false); + S = Legality.getInstructionsState(); + } if (!Legality.isLegal()) { + if (Legality.trySplitVectorize()) { + auto [MainOp, AltOp] = getMainAltOpsNoStateVL(VL); + // Last chance to try to vectorize alternate node. + if (MainOp && AltOp && TrySplitNode(InstructionsState(MainOp, AltOp))) + return; + } if (Legality.tryToFindDuplicates()) tryToFindDuplicates(VL, ReuseShuffleIndices, *TTI, *TLI, S, UserTreeIdx); - newGatherTreeEntry(VL, S, UserTreeIdx, ReuseShuffleIndices); return; } - S = Legality.getInstructionsState(); } // FIXME: investigate if there are profitable cases for VL.size() <= 4. diff --git a/llvm/test/Transforms/SLPVectorizer/RISCV/remark-zext-incoming-for-neg-icmp.ll b/llvm/test/Transforms/SLPVectorizer/RISCV/remark-zext-incoming-for-neg-icmp.ll index 09c11bbefd4a3..485807e84966b 100644 --- a/llvm/test/Transforms/SLPVectorizer/RISCV/remark-zext-incoming-for-neg-icmp.ll +++ b/llvm/test/Transforms/SLPVectorizer/RISCV/remark-zext-incoming-for-neg-icmp.ll @@ -16,11 +16,11 @@ define i32 @test(i32 %a, i8 %b, i8 %c) { ; CHECK-LABEL: define i32 @test( ; CHECK-SAME: i32 [[A:%.*]], i8 [[B:%.*]], i8 [[C:%.*]]) #[[ATTR0:[0-9]+]] { ; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP3:%.*]] = insertelement <4 x i8> poison, i8 [[B]], i32 0 +; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <4 x i8> [[TMP3]], <4 x i8> poison, <4 x i32> zeroinitializer ; CHECK-NEXT: [[TMP0:%.*]] = insertelement <4 x i8> poison, i8 [[C]], i32 0 ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i8> [[TMP0]], <4 x i8> poison, <4 x i32> zeroinitializer ; CHECK-NEXT: [[TMP2:%.*]] = add <4 x i8> [[TMP1]], -; CHECK-NEXT: [[TMP3:%.*]] = insertelement <4 x i8> poison, i8 [[B]], i32 0 -; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <4 x i8> [[TMP3]], <4 x i8> poison, <4 x i32> zeroinitializer ; CHECK-NEXT: [[TMP8:%.*]] = zext <4 x i8> [[TMP2]] to <4 x i16> ; CHECK-NEXT: [[TMP9:%.*]] = sext <4 x i8> [[TMP4]] to <4 x i16> ; CHECK-NEXT: [[TMP5:%.*]] = icmp sle <4 x i16> [[TMP8]], [[TMP9]] diff --git a/llvm/test/Transforms/SLPVectorizer/SystemZ/ext-not-resized-op-resized.ll b/llvm/test/Transforms/SLPVectorizer/SystemZ/ext-not-resized-op-resized.ll index ca93cbd698ada..377d9b4751f58 100644 --- a/llvm/test/Transforms/SLPVectorizer/SystemZ/ext-not-resized-op-resized.ll +++ b/llvm/test/Transforms/SLPVectorizer/SystemZ/ext-not-resized-op-resized.ll @@ -4,12 +4,12 @@ define void @test(i64 %0, i1 %.cmp.i.2, i1 %1, ptr %a) { ; CHECK-LABEL: define void @test( ; CHECK-SAME: i64 [[TMP0:%.*]], i1 [[DOTCMP_I_2:%.*]], i1 [[TMP1:%.*]], ptr [[A:%.*]]) #[[ATTR0:[0-9]+]] { -; CHECK-NEXT: [[TMP3:%.*]] = insertelement <4 x i64> poison, i64 [[TMP0]], i32 0 -; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <4 x i64> [[TMP3]], <4 x i64> poison, <4 x i32> zeroinitializer -; CHECK-NEXT: [[TMP5:%.*]] = lshr <4 x i64> [[TMP4]], splat (i64 63) ; CHECK-NEXT: [[TMP6:%.*]] = insertelement <4 x i1> poison, i1 [[DOTCMP_I_2]], i32 0 ; CHECK-NEXT: [[TMP7:%.*]] = insertelement <4 x i1> [[TMP6]], i1 [[TMP1]], i32 1 ; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <4 x i1> [[TMP7]], <4 x i1> poison, <4 x i32> +; CHECK-NEXT: [[TMP15:%.*]] = insertelement <4 x i64> poison, i64 [[TMP0]], i32 0 +; CHECK-NEXT: [[TMP16:%.*]] = shufflevector <4 x i64> [[TMP15]], <4 x i64> poison, <4 x i32> zeroinitializer +; CHECK-NEXT: [[TMP5:%.*]] = lshr <4 x i64> [[TMP16]], splat (i64 63) ; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <4 x i1> [[TMP7]], <4 x i1> poison, <4 x i32> ; CHECK-NEXT: [[TMP10:%.*]] = trunc <4 x i64> [[TMP5]] to <4 x i1> ; CHECK-NEXT: [[TMP11:%.*]] = select <4 x i1> [[TMP9]], <4 x i1> [[TMP10]], <4 x i1> [[TMP8]] diff --git a/llvm/test/Transforms/SLPVectorizer/X86/PR35628_2.ll b/llvm/test/Transforms/SLPVectorizer/X86/PR35628_2.ll index 5ebbb76f3d6c3..f0b88e2647511 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/PR35628_2.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/PR35628_2.ll @@ -9,11 +9,11 @@ define void @test() #0 { ; CHECK: loop: ; CHECK-NEXT: [[DUMMY_PHI:%.*]] = phi i64 [ 1, [[ENTRY:%.*]] ], [ [[OP_RDX1:%.*]], [[LOOP]] ] ; CHECK-NEXT: [[TMP0:%.*]] = phi i64 [ 2, [[ENTRY]] ], [ [[TMP4:%.*]], [[LOOP]] ] +; CHECK-NEXT: [[DUMMY_ADD:%.*]] = add i16 0, 0 ; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x i64> poison, i64 [[TMP0]], i32 0 ; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x i64> [[TMP1]], <4 x i64> poison, <4 x i32> zeroinitializer ; CHECK-NEXT: [[TMP3:%.*]] = add <4 x i64> [[TMP2]], ; CHECK-NEXT: [[TMP4]] = extractelement <4 x i64> [[TMP3]], i32 3 -; CHECK-NEXT: [[DUMMY_ADD:%.*]] = add i16 0, 0 ; CHECK-NEXT: [[TMP5:%.*]] = extractelement <4 x i64> [[TMP3]], i32 0 ; CHECK-NEXT: [[DUMMY_SHL:%.*]] = shl i64 [[TMP5]], 32 ; CHECK-NEXT: [[TMP6:%.*]] = add <4 x i64> splat (i64 1), [[TMP3]] diff --git a/llvm/test/Transforms/SLPVectorizer/X86/PR40310.ll b/llvm/test/Transforms/SLPVectorizer/X86/PR40310.ll index 194c7021f60f5..fc9a7d8e658c3 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/PR40310.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/PR40310.ll @@ -11,8 +11,8 @@ define void @mainTest(i32 %param, ptr %vals, i32 %len) { ; CHECK-NEXT: [[LOCAL_4_:%.*]] = phi i32 [ [[V44:%.*]], [[BCI_15]] ], [ 31, [[BCI_15_PREHEADER]] ] ; CHECK-NEXT: [[TMP1:%.*]] = phi <2 x i32> [ [[TMP6:%.*]], [[BCI_15]] ], [ [[TMP0]], [[BCI_15_PREHEADER]] ] ; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> poison, <16 x i32> -; CHECK-NEXT: [[TMP3:%.*]] = add <16 x i32> [[TMP2]], ; CHECK-NEXT: store atomic i32 [[LOCAL_0_]], ptr [[VALS:%.*]] unordered, align 4 +; CHECK-NEXT: [[TMP3:%.*]] = add <16 x i32> [[TMP2]], ; CHECK-NEXT: [[TMP4:%.*]] = call i32 @llvm.vector.reduce.and.v16i32(<16 x i32> [[TMP3]]) ; CHECK-NEXT: [[OP_RDX]] = and i32 [[TMP4]], [[LOCAL_4_]] ; CHECK-NEXT: [[V44]] = add i32 [[LOCAL_4_]], 16 diff --git a/llvm/test/Transforms/SLPVectorizer/X86/multi-nodes-to-shuffle.ll b/llvm/test/Transforms/SLPVectorizer/X86/multi-nodes-to-shuffle.ll index a48076adc8090..0667d58221966 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/multi-nodes-to-shuffle.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/multi-nodes-to-shuffle.ll @@ -9,8 +9,8 @@ define void @test(i64 %p0, i64 %p1, i64 %p2, i64 %p3) { ; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x i64> [[TMP0]], i64 [[P1:%.*]], i32 1 ; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x i64> [[TMP1]], i64 [[P2:%.*]], i32 2 ; CHECK-NEXT: [[TMP3:%.*]] = insertelement <4 x i64> [[TMP2]], i64 [[P3:%.*]], i32 3 -; CHECK-NEXT: [[TMP4:%.*]] = add <4 x i64> [[TMP3]], [[TMP3]] ; CHECK-NEXT: [[TMP5:%.*]] = mul <4 x i64> [[TMP3]], [[TMP3]] +; CHECK-NEXT: [[TMP4:%.*]] = add <4 x i64> [[TMP3]], [[TMP3]] ; CHECK-NEXT: [[TMP6:%.*]] = sdiv <4 x i64> [[TMP3]], [[TMP3]] ; CHECK-NEXT: [[TMP7:%.*]] = sub <4 x i64> [[TMP5]], [[TMP6]] ; CHECK-NEXT: [[TMP8:%.*]] = shl <4 x i64> [[TMP4]], [[TMP7]] @@ -34,8 +34,8 @@ define void @test(i64 %p0, i64 %p1, i64 %p2, i64 %p3) { ; AVX2-NEXT: [[TMP1:%.*]] = insertelement <4 x i64> [[TMP0]], i64 [[P1:%.*]], i32 1 ; AVX2-NEXT: [[TMP2:%.*]] = insertelement <4 x i64> [[TMP1]], i64 [[P2:%.*]], i32 2 ; AVX2-NEXT: [[TMP3:%.*]] = insertelement <4 x i64> [[TMP2]], i64 [[P3:%.*]], i32 3 -; AVX2-NEXT: [[TMP4:%.*]] = add <4 x i64> [[TMP3]], [[TMP3]] ; AVX2-NEXT: [[TMP5:%.*]] = mul <4 x i64> [[TMP3]], [[TMP3]] +; AVX2-NEXT: [[TMP4:%.*]] = add <4 x i64> [[TMP3]], [[TMP3]] ; AVX2-NEXT: [[TMP6:%.*]] = sdiv <4 x i64> [[TMP3]], [[TMP3]] ; AVX2-NEXT: [[TMP7:%.*]] = sub <4 x i64> [[TMP5]], [[TMP6]] ; AVX2-NEXT: [[TMP8:%.*]] = shl <4 x i64> [[TMP4]], [[TMP7]] diff --git a/llvm/test/Transforms/SLPVectorizer/X86/no_alternate_divrem.ll b/llvm/test/Transforms/SLPVectorizer/X86/no_alternate_divrem.ll index 89051c7aba42c..28b836e43efba 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/no_alternate_divrem.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/no_alternate_divrem.ll @@ -10,14 +10,14 @@ define void @test_add_sdiv(ptr %arr1, ptr %arr2, i32 %a0, i32 %a1, i32 %a2, i32 ; CHECK-NEXT: [[GEP2_3:%.*]] = getelementptr i32, ptr [[ARR2]], i32 3 ; CHECK-NEXT: [[V2:%.*]] = load i32, ptr [[GEP1_2]], align 4 ; CHECK-NEXT: [[V3:%.*]] = load i32, ptr [[GEP1_3]], align 4 -; CHECK-NEXT: [[TMP0:%.*]] = insertelement <2 x i32> poison, i32 [[A0:%.*]], i32 0 -; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x i32> [[TMP0]], i32 [[A1:%.*]], i32 1 -; CHECK-NEXT: [[TMP2:%.*]] = add nsw <2 x i32> [[TMP1]], ; CHECK-NEXT: [[Y2:%.*]] = add nsw i32 [[A2:%.*]], 42 ; CHECK-NEXT: [[Y3:%.*]] = add nsw i32 [[A3:%.*]], 0 ; CHECK-NEXT: [[RES2:%.*]] = sdiv i32 [[V2]], [[Y2]] ; CHECK-NEXT: [[RES3:%.*]] = add nsw i32 [[V3]], [[Y3]] ; CHECK-NEXT: [[TMP4:%.*]] = load <2 x i32>, ptr [[ARR1]], align 4 +; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x i32> poison, i32 [[A0:%.*]], i32 0 +; CHECK-NEXT: [[TMP3:%.*]] = insertelement <2 x i32> [[TMP1]], i32 [[A1:%.*]], i32 1 +; CHECK-NEXT: [[TMP2:%.*]] = add nsw <2 x i32> [[TMP3]], ; CHECK-NEXT: [[TMP5:%.*]] = add nsw <2 x i32> [[TMP4]], [[TMP2]] ; CHECK-NEXT: store <2 x i32> [[TMP5]], ptr [[ARR2]], align 4 ; CHECK-NEXT: store i32 [[RES2]], ptr [[GEP2_2]], align 4 diff --git a/llvm/test/Transforms/SLPVectorizer/X86/non-power-of-2-subvectors-insert.ll b/llvm/test/Transforms/SLPVectorizer/X86/non-power-of-2-subvectors-insert.ll index 1fedde4cc9fd7..cee333341271b 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/non-power-of-2-subvectors-insert.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/non-power-of-2-subvectors-insert.ll @@ -4,17 +4,15 @@ define void @test() { ; CHECK-LABEL: define void @test() { ; CHECK-NEXT: [[XOR108_I_I_I:%.*]] = xor i64 0, 1 -; CHECK-NEXT: [[TMP1:%.*]] = insertelement <12 x i64> , i64 [[XOR108_I_I_I]], i32 10 -; CHECK-NEXT: [[TMP2:%.*]] = lshr <12 x i64> [[TMP1]], zeroinitializer -; CHECK-NEXT: [[TMP3:%.*]] = insertelement <16 x i64> poison, i64 [[XOR108_I_I_I]], i32 3 -; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <12 x i64> [[TMP2]], <12 x i64> poison, <16 x i32> -; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <16 x i64> [[TMP5]], <16 x i64> [[TMP3]], <16 x i32> -; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <16 x i64> [[TMP6]], <16 x i64> poison, <16 x i32> -; CHECK-NEXT: [[TMP8:%.*]] = trunc <16 x i64> [[TMP7]] to <16 x i1> -; CHECK-NEXT: [[TMP9:%.*]] = or <16 x i1> [[TMP8]], zeroinitializer -; CHECK-NEXT: [[TMP10:%.*]] = freeze <16 x i1> [[TMP9]] +; CHECK-NEXT: [[TMP1:%.*]] = trunc i64 [[XOR108_I_I_I]] to i1 +; CHECK-NEXT: [[TMP2:%.*]] = insertelement <14 x i1> , i1 [[TMP1]], i32 3 +; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <14 x i1> [[TMP2]], <14 x i1> poison, <14 x i32> +; CHECK-NEXT: [[TMP4:%.*]] = lshr <14 x i1> [[TMP3]], +; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <14 x i1> [[TMP4]], <14 x i1> poison, <16 x i32> ; CHECK-NEXT: [[TMP11:%.*]] = zext <16 x i1> [[TMP10]] to <16 x i16> -; CHECK-NEXT: [[TMP12:%.*]] = icmp eq <16 x i16> [[TMP11]], zeroinitializer +; CHECK-NEXT: [[TMP7:%.*]] = or <16 x i16> [[TMP11]], zeroinitializer +; CHECK-NEXT: [[TMP8:%.*]] = freeze <16 x i16> [[TMP7]] +; CHECK-NEXT: [[TMP9:%.*]] = icmp eq <16 x i16> [[TMP8]], zeroinitializer ; CHECK-NEXT: ret void ; %xor108.i.i.i = xor i64 0, 1 diff --git a/llvm/test/Transforms/SLPVectorizer/X86/resched.ll b/llvm/test/Transforms/SLPVectorizer/X86/resched.ll index 20a42777cf8e4..8f59a2ad09a0c 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/resched.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/resched.ll @@ -11,27 +11,11 @@ define fastcc void @_ZN12_GLOBAL__N_127PolynomialMultiplyRecognize9recognizeEv(i ; CHECK: if.then22.i: ; CHECK-NEXT: [[SUB_I:%.*]] = add nsw i32 undef, -1 ; CHECK-NEXT: [[CONV31_I:%.*]] = and i32 undef, [[SUB_I]] -; CHECK-NEXT: [[SHR_I_I:%.*]] = lshr i32 [[CONV31_I]], 1 -; CHECK-NEXT: [[TMP0:%.*]] = insertelement <2 x i32> poison, i32 [[CONV31_I]], i32 0 -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <2 x i32> [[TMP0]], <2 x i32> poison, <2 x i32> zeroinitializer -; CHECK-NEXT: [[TMP2:%.*]] = lshr <2 x i32> [[TMP1]], -; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> poison, <4 x i32> zeroinitializer -; CHECK-NEXT: [[TMP4:%.*]] = lshr <4 x i32> [[TMP3]], -; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <4 x i32> [[TMP3]], <4 x i32> poison, <8 x i32> zeroinitializer -; CHECK-NEXT: [[TMP6:%.*]] = lshr <8 x i32> [[TMP5]], -; CHECK-NEXT: [[TMP7:%.*]] = trunc i32 [[SUB_I]] to i8 -; CHECK-NEXT: [[TMP8:%.*]] = insertelement <16 x i8> poison, i8 [[TMP7]], i32 0 -; CHECK-NEXT: [[TMP9:%.*]] = trunc i32 [[SHR_I_I]] to i8 -; CHECK-NEXT: [[TMP10:%.*]] = insertelement <16 x i8> [[TMP8]], i8 [[TMP9]], i32 1 -; CHECK-NEXT: [[TMP11:%.*]] = trunc <8 x i32> [[TMP6]] to <8 x i8> -; CHECK-NEXT: [[TMP12:%.*]] = shufflevector <8 x i8> [[TMP11]], <8 x i8> poison, <16 x i32> -; CHECK-NEXT: [[TMP14:%.*]] = shufflevector <16 x i8> [[TMP10]], <16 x i8> [[TMP12]], <16 x i32> -; CHECK-NEXT: [[TMP13:%.*]] = trunc <4 x i32> [[TMP4]] to <4 x i8> -; CHECK-NEXT: [[TMP19:%.*]] = shufflevector <4 x i8> [[TMP13]], <4 x i8> poison, <16 x i32> -; CHECK-NEXT: [[TMP20:%.*]] = shufflevector <16 x i8> [[TMP14]], <16 x i8> [[TMP19]], <16 x i32> -; CHECK-NEXT: [[TMP15:%.*]] = trunc <2 x i32> [[TMP2]] to <2 x i8> -; CHECK-NEXT: [[TMP18:%.*]] = shufflevector <2 x i8> [[TMP15]], <2 x i8> poison, <16 x i32> -; CHECK-NEXT: [[TMP16:%.*]] = shufflevector <16 x i8> [[TMP20]], <16 x i8> [[TMP18]], <16 x i32> +; CHECK-NEXT: [[TMP0:%.*]] = insertelement <16 x i32> poison, i32 [[SUB_I]], i32 0 +; CHECK-NEXT: [[TMP1:%.*]] = insertelement <16 x i32> [[TMP0]], i32 [[CONV31_I]], i32 1 +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <16 x i32> [[TMP1]], <16 x i32> poison, <16 x i32> +; CHECK-NEXT: [[TMP3:%.*]] = lshr <16 x i32> [[TMP2]], +; CHECK-NEXT: [[TMP16:%.*]] = trunc <16 x i32> [[TMP3]] to <16 x i8> ; CHECK-NEXT: [[TMP17:%.*]] = and <16 x i8> [[TMP16]], splat (i8 1) ; CHECK-NEXT: store <16 x i8> [[TMP17]], ptr undef, align 1 ; CHECK-NEXT: ret void diff --git a/llvm/test/Transforms/SLPVectorizer/X86/same-values-sub-node-with-poisons.ll b/llvm/test/Transforms/SLPVectorizer/X86/same-values-sub-node-with-poisons.ll index 9c0f65ec27165..af533a6ccbb79 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/same-values-sub-node-with-poisons.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/same-values-sub-node-with-poisons.ll @@ -30,9 +30,6 @@ define i32 @test(ptr %f, i1 %tobool.i.4, i32 %retval.0.i.219) { ; CHECK-NEXT: [[TMP5:%.*]] = phi <2 x i32> [ [[TMP8]], %[[D_EXIT_6]] ], [ poison, %[[IF_END_I_5]] ] ; CHECK-NEXT: [[TMP13:%.*]] = shufflevector <2 x i32> [[TMP5]], <2 x i32> poison, <4 x i32> ; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <2 x i32> [[TMP7]], <2 x i32> poison, <4 x i32> -; CHECK-NEXT: [[TMP11:%.*]] = insertelement <4 x i32> , i32 [[TMP0]], i32 0 -; CHECK-NEXT: [[TMP15:%.*]] = insertelement <4 x i32> [[TMP11]], i32 [[RETVAL_0_I_219]], i32 3 -; CHECK-NEXT: [[TMP16:%.*]] = add <4 x i32> [[TMP15]], [[TMP13]] ; CHECK-NEXT: [[TMP14:%.*]] = shufflevector <2 x i32> [[TMP6]], <2 x i32> poison, <4 x i32> ; CHECK-NEXT: [[TMP21:%.*]] = shufflevector <4 x i32> [[TMP14]], <4 x i32> poison, <8 x i32> ; CHECK-NEXT: [[TMP24:%.*]] = shufflevector <4 x i32> [[TMP10]], <4 x i32> poison, <8 x i32> @@ -40,6 +37,9 @@ define i32 @test(ptr %f, i1 %tobool.i.4, i32 %retval.0.i.219) { ; CHECK-NEXT: [[TMP23:%.*]] = shufflevector <2 x i32> [[TMP2]], <2 x i32> poison, <8 x i32> ; CHECK-NEXT: [[TMP18:%.*]] = shufflevector <8 x i32> [[TMP23]], <8 x i32> , <8 x i32> ; CHECK-NEXT: [[TMP19:%.*]] = add <8 x i32> [[TMP18]], [[TMP22]] +; CHECK-NEXT: [[TMP26:%.*]] = insertelement <4 x i32> , i32 [[TMP0]], i32 0 +; CHECK-NEXT: [[TMP27:%.*]] = insertelement <4 x i32> [[TMP26]], i32 [[RETVAL_0_I_219]], i32 3 +; CHECK-NEXT: [[TMP16:%.*]] = add <4 x i32> [[TMP27]], [[TMP13]] ; CHECK-NEXT: [[TMP20:%.*]] = shufflevector <8 x i32> [[TMP19]], <8 x i32> poison, <4 x i32> ; CHECK-NEXT: [[RDX_OP:%.*]] = or <4 x i32> [[TMP20]], [[TMP16]] ; CHECK-NEXT: [[TMP25:%.*]] = shufflevector <4 x i32> [[RDX_OP]], <4 x i32> poison, <8 x i32> diff --git a/llvm/test/Transforms/SLPVectorizer/X86/vec_list_bias-inseltpoison.ll b/llvm/test/Transforms/SLPVectorizer/X86/vec_list_bias-inseltpoison.ll index 2cc2f28ccf6d5..42e9c24954441 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/vec_list_bias-inseltpoison.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/vec_list_bias-inseltpoison.ll @@ -39,19 +39,18 @@ define void @test(ptr nocapture %t2) { ; CHECK-NEXT: [[T29:%.*]] = sub nsw i32 [[T9]], [[T15]] ; CHECK-NEXT: [[T30:%.*]] = add nsw i32 [[T27]], [[T29]] ; CHECK-NEXT: [[T31:%.*]] = mul nsw i32 [[T30]], 4433 +; CHECK-NEXT: [[T32:%.*]] = mul nsw i32 [[T27]], 6270 ; CHECK-NEXT: [[T34:%.*]] = mul nsw i32 [[T29]], -15137 ; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> poison, <4 x i32> ; CHECK-NEXT: [[TMP3:%.*]] = insertelement <4 x i32> [[TMP2]], i32 [[T40]], i32 1 -; CHECK-NEXT: [[TMP4:%.*]] = insertelement <4 x i32> [[TMP3]], i32 [[T27]], i32 2 +; CHECK-NEXT: [[TMP4:%.*]] = insertelement <4 x i32> [[TMP3]], i32 [[T32]], i32 2 ; CHECK-NEXT: [[TMP5:%.*]] = insertelement <4 x i32> [[TMP4]], i32 [[T47]], i32 3 ; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> poison, <4 x i32> -; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <4 x i32> , <4 x i32> [[TMP6]], <4 x i32> +; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <4 x i32> , <4 x i32> [[TMP6]], <4 x i32> ; CHECK-NEXT: [[TMP8:%.*]] = insertelement <4 x i32> [[TMP7]], i32 [[T48]], i32 1 ; CHECK-NEXT: [[TMP9:%.*]] = insertelement <4 x i32> [[TMP8]], i32 [[T40]], i32 3 ; CHECK-NEXT: [[TMP10:%.*]] = add nsw <4 x i32> [[TMP5]], [[TMP9]] -; CHECK-NEXT: [[TMP11:%.*]] = mul nsw <4 x i32> [[TMP5]], [[TMP9]] -; CHECK-NEXT: [[TMP12:%.*]] = shufflevector <4 x i32> [[TMP10]], <4 x i32> [[TMP11]], <4 x i32> -; CHECK-NEXT: [[T701:%.*]] = shufflevector <4 x i32> [[TMP12]], <4 x i32> poison, <8 x i32> +; CHECK-NEXT: [[T701:%.*]] = shufflevector <4 x i32> [[TMP10]], <4 x i32> poison, <8 x i32> ; CHECK-NEXT: [[T71:%.*]] = insertelement <8 x i32> [[T701]], i32 [[T34]], i32 6 ; CHECK-NEXT: [[T76:%.*]] = shl <8 x i32> [[T71]], splat (i32 3) ; CHECK-NEXT: store <8 x i32> [[T76]], ptr [[T2]], align 4 diff --git a/llvm/test/Transforms/SLPVectorizer/X86/vec_list_bias.ll b/llvm/test/Transforms/SLPVectorizer/X86/vec_list_bias.ll index cea98bf55b6ff..1c80530593ab3 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/vec_list_bias.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/vec_list_bias.ll @@ -25,7 +25,6 @@ define void @test(ptr nocapture %t2) { ; CHECK-NEXT: [[T24:%.*]] = add nsw i32 [[T23]], [[T21]] ; CHECK-NEXT: [[T25:%.*]] = sub nsw i32 [[T21]], [[T23]] ; CHECK-NEXT: [[T27:%.*]] = sub nsw i32 [[T3]], [[T24]] -; CHECK-NEXT: [[T32:%.*]] = mul nsw i32 [[T27]], 6270 ; CHECK-NEXT: [[T37:%.*]] = add nsw i32 [[T25]], [[T11]] ; CHECK-NEXT: [[T38:%.*]] = add nsw i32 [[T17]], [[T5]] ; CHECK-NEXT: [[T39:%.*]] = add nsw i32 [[T37]], [[T38]] @@ -34,25 +33,26 @@ define void @test(ptr nocapture %t2) { ; CHECK-NEXT: [[T42:%.*]] = mul nsw i32 [[T17]], 16819 ; CHECK-NEXT: [[T47:%.*]] = mul nsw i32 [[T37]], -16069 ; CHECK-NEXT: [[T48:%.*]] = mul nsw i32 [[T38]], -3196 -; CHECK-NEXT: [[T49:%.*]] = add nsw i32 [[T40]], [[T47]] ; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i32>, ptr [[T8]], align 4 ; CHECK-NEXT: [[T15:%.*]] = load i32, ptr [[T14]], align 4 ; CHECK-NEXT: [[T9:%.*]] = load i32, ptr [[T8]], align 4 ; CHECK-NEXT: [[T29:%.*]] = sub nsw i32 [[T9]], [[T15]] ; CHECK-NEXT: [[T30:%.*]] = add nsw i32 [[T27]], [[T29]] ; CHECK-NEXT: [[T31:%.*]] = mul nsw i32 [[T30]], 4433 +; CHECK-NEXT: [[T32:%.*]] = mul nsw i32 [[T27]], 6270 ; CHECK-NEXT: [[T34:%.*]] = mul nsw i32 [[T29]], -15137 -; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> poison, <2 x i32> -; CHECK-NEXT: [[TMP3:%.*]] = insertelement <2 x i32> [[TMP2]], i32 [[T40]], i32 1 -; CHECK-NEXT: [[TMP4:%.*]] = insertelement <2 x i32> [[TMP1]], i32 [[T48]], i32 1 -; CHECK-NEXT: [[TMP5:%.*]] = add nsw <2 x i32> [[TMP3]], [[TMP4]] -; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <2 x i32> [[TMP5]], <2 x i32> poison, <8 x i32> -; CHECK-NEXT: [[T67:%.*]] = insertelement <8 x i32> [[TMP6]], i32 [[T32]], i32 2 -; CHECK-NEXT: [[T68:%.*]] = insertelement <8 x i32> [[T67]], i32 [[T49]], i32 3 -; CHECK-NEXT: [[T701:%.*]] = shufflevector <8 x i32> [[T68]], <8 x i32> [[TMP6]], <8 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> poison, <4 x i32> +; CHECK-NEXT: [[TMP3:%.*]] = insertelement <4 x i32> [[TMP2]], i32 [[T40]], i32 1 +; CHECK-NEXT: [[TMP4:%.*]] = insertelement <4 x i32> [[TMP3]], i32 [[T32]], i32 2 +; CHECK-NEXT: [[TMP5:%.*]] = insertelement <4 x i32> [[TMP4]], i32 [[T47]], i32 3 +; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> poison, <4 x i32> +; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <4 x i32> , <4 x i32> [[TMP6]], <4 x i32> +; CHECK-NEXT: [[TMP8:%.*]] = insertelement <4 x i32> [[TMP7]], i32 [[T48]], i32 1 +; CHECK-NEXT: [[TMP9:%.*]] = insertelement <4 x i32> [[TMP8]], i32 [[T40]], i32 3 +; CHECK-NEXT: [[TMP10:%.*]] = add nsw <4 x i32> [[TMP5]], [[TMP9]] +; CHECK-NEXT: [[T701:%.*]] = shufflevector <4 x i32> [[TMP10]], <4 x i32> poison, <8 x i32> ; CHECK-NEXT: [[T71:%.*]] = insertelement <8 x i32> [[T701]], i32 [[T34]], i32 6 -; CHECK-NEXT: [[T72:%.*]] = insertelement <8 x i32> [[T71]], i32 [[T49]], i32 7 -; CHECK-NEXT: [[T76:%.*]] = shl <8 x i32> [[T72]], splat (i32 3) +; CHECK-NEXT: [[T76:%.*]] = shl <8 x i32> [[T71]], splat (i32 3) ; CHECK-NEXT: store <8 x i32> [[T76]], ptr [[T2]], align 4 ; CHECK-NEXT: ret void ; diff --git a/llvm/test/Transforms/SLPVectorizer/X86/vec_list_bias_external_insert_shuffled.ll b/llvm/test/Transforms/SLPVectorizer/X86/vec_list_bias_external_insert_shuffled.ll index 7060288d739bd..cd0ed9cd7884c 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/vec_list_bias_external_insert_shuffled.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/vec_list_bias_external_insert_shuffled.ll @@ -19,7 +19,6 @@ define void @test(ptr nocapture %t2) { ; CHECK-NEXT: [[T24:%.*]] = add nsw i32 [[T23]], [[T21]] ; CHECK-NEXT: [[T25:%.*]] = sub nsw i32 [[T21]], [[T23]] ; CHECK-NEXT: [[T27:%.*]] = sub nsw i32 [[T3]], [[T24]] -; CHECK-NEXT: [[T32:%.*]] = mul nsw i32 [[T27]], 6270 ; CHECK-NEXT: [[T37:%.*]] = add nsw i32 [[T25]], [[T11]] ; CHECK-NEXT: [[T38:%.*]] = add nsw i32 [[T17]], [[T5]] ; CHECK-NEXT: [[T39:%.*]] = add nsw i32 [[T37]], [[T38]] @@ -28,26 +27,25 @@ define void @test(ptr nocapture %t2) { ; CHECK-NEXT: [[T42:%.*]] = mul nsw i32 [[T17]], 16819 ; CHECK-NEXT: [[T47:%.*]] = mul nsw i32 [[T37]], -16069 ; CHECK-NEXT: [[T48:%.*]] = mul nsw i32 [[T38]], -3196 -; CHECK-NEXT: [[T49:%.*]] = add nsw i32 [[T40]], [[T47]] ; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i32>, ptr [[T8]], align 4 ; CHECK-NEXT: [[T15:%.*]] = load i32, ptr [[T14]], align 4 ; CHECK-NEXT: [[T9:%.*]] = load i32, ptr [[T8]], align 4 ; CHECK-NEXT: [[T29:%.*]] = sub nsw i32 [[T9]], [[T15]] ; CHECK-NEXT: [[T30:%.*]] = add nsw i32 [[T27]], [[T29]] ; CHECK-NEXT: [[T31:%.*]] = mul nsw i32 [[T30]], 4433 +; CHECK-NEXT: [[T32:%.*]] = mul nsw i32 [[T27]], 6270 ; CHECK-NEXT: [[T34:%.*]] = mul nsw i32 [[T29]], -15137 -; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> poison, <2 x i32> -; CHECK-NEXT: [[TMP3:%.*]] = insertelement <2 x i32> [[TMP2]], i32 [[T40]], i32 1 -; CHECK-NEXT: [[TMP4:%.*]] = insertelement <2 x i32> [[TMP1]], i32 [[T48]], i32 1 -; CHECK-NEXT: [[TMP5:%.*]] = add nsw <2 x i32> [[TMP3]], [[TMP4]] -; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <2 x i32> [[TMP5]], <2 x i32> poison, <8 x i32> -; CHECK-NEXT: [[T67:%.*]] = insertelement <8 x i32> [[TMP6]], i32 [[T32]], i32 2 -; CHECK-NEXT: [[T68:%.*]] = insertelement <8 x i32> [[T67]], i32 [[T49]], i32 3 -; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <2 x i32> [[TMP5]], <2 x i32> poison, <8 x i32> -; CHECK-NEXT: [[T701:%.*]] = shufflevector <8 x i32> [[T68]], <8 x i32> [[TMP7]], <8 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> poison, <4 x i32> +; CHECK-NEXT: [[TMP3:%.*]] = insertelement <4 x i32> [[TMP2]], i32 [[T32]], i32 2 +; CHECK-NEXT: [[TMP4:%.*]] = insertelement <4 x i32> [[TMP3]], i32 [[T47]], i32 3 +; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <4 x i32> [[TMP4]], <4 x i32> , <4 x i32> +; CHECK-NEXT: [[TMP6:%.*]] = insertelement <4 x i32> [[TMP5]], i32 [[T40]], i32 3 +; CHECK-NEXT: [[TMP7:%.*]] = add nsw <4 x i32> [[TMP4]], [[TMP6]] +; CHECK-NEXT: [[T50:%.*]] = add nsw i32 [[T40]], [[T48]] +; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <4 x i32> [[TMP7]], <4 x i32> poison, <8 x i32> +; CHECK-NEXT: [[T701:%.*]] = insertelement <8 x i32> [[TMP8]], i32 [[T50]], i32 5 ; CHECK-NEXT: [[T71:%.*]] = insertelement <8 x i32> [[T701]], i32 [[T34]], i32 6 -; CHECK-NEXT: [[T72:%.*]] = insertelement <8 x i32> [[T71]], i32 [[T49]], i32 7 -; CHECK-NEXT: [[T76:%.*]] = shl <8 x i32> [[T72]], splat (i32 3) +; CHECK-NEXT: [[T76:%.*]] = shl <8 x i32> [[T71]], splat (i32 3) ; CHECK-NEXT: store <8 x i32> [[T76]], ptr [[T2]], align 4 ; CHECK-NEXT: ret void ; diff --git a/llvm/test/Transforms/SLPVectorizer/alternate-non-profitable.ll b/llvm/test/Transforms/SLPVectorizer/alternate-non-profitable.ll index 125c2dce32663..9f95f79ecc7fc 100644 --- a/llvm/test/Transforms/SLPVectorizer/alternate-non-profitable.ll +++ b/llvm/test/Transforms/SLPVectorizer/alternate-non-profitable.ll @@ -170,11 +170,10 @@ define <2 x i8> @replace_through_binop_fail_cant_speculate(i8 %inp, <2 x i8> %d, define <2 x i8> @replace_through_binop_preserve_flags(i8 %inp, <2 x i8> %d, <2 x i8> %any) { ; CHECK-LABEL: define <2 x i8> @replace_through_binop_preserve_flags( ; CHECK-SAME: i8 [[INP:%.*]], <2 x i8> [[D:%.*]], <2 x i8> [[ANY:%.*]]) { -; CHECK-NEXT: [[ADD:%.*]] = xor i8 [[INP]], 5 -; CHECK-NEXT: [[TMP1:%.*]] = xor i8 [[INP]], 123 -; CHECK-NEXT: [[TMP2:%.*]] = add nsw i8 [[ADD]], 1 -; CHECK-NEXT: [[TMP3:%.*]] = insertelement <2 x i8> poison, i8 [[TMP1]], i64 0 -; CHECK-NEXT: [[R:%.*]] = insertelement <2 x i8> [[TMP3]], i8 [[TMP2]], i64 1 +; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x i8> poison, i8 [[INP]], i32 0 +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <2 x i8> [[TMP1]], <2 x i8> poison, <2 x i32> zeroinitializer +; CHECK-NEXT: [[TMP3:%.*]] = xor <2 x i8> [[TMP2]], +; CHECK-NEXT: [[R:%.*]] = add <2 x i8> [[TMP3]], ; CHECK-NEXT: ret <2 x i8> [[R]] ; %add = xor i8 %inp, 5 diff --git a/llvm/test/Transforms/SLPVectorizer/zext-incoming-for-neg-icmp.ll b/llvm/test/Transforms/SLPVectorizer/zext-incoming-for-neg-icmp.ll index 303e31dfa5e64..1ddc3b0d7bf88 100644 --- a/llvm/test/Transforms/SLPVectorizer/zext-incoming-for-neg-icmp.ll +++ b/llvm/test/Transforms/SLPVectorizer/zext-incoming-for-neg-icmp.ll @@ -6,11 +6,11 @@ define i32 @test(i32 %a, i8 %b, i8 %c) { ; CHECK-LABEL: define i32 @test( ; CHECK-SAME: i32 [[A:%.*]], i8 [[B:%.*]], i8 [[C:%.*]]) { ; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP3:%.*]] = insertelement <4 x i8> poison, i8 [[B]], i32 0 +; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <4 x i8> [[TMP3]], <4 x i8> poison, <4 x i32> zeroinitializer ; CHECK-NEXT: [[TMP0:%.*]] = insertelement <4 x i8> poison, i8 [[C]], i32 0 ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i8> [[TMP0]], <4 x i8> poison, <4 x i32> zeroinitializer ; CHECK-NEXT: [[TMP2:%.*]] = add <4 x i8> [[TMP1]], -; CHECK-NEXT: [[TMP3:%.*]] = insertelement <4 x i8> poison, i8 [[B]], i32 0 -; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <4 x i8> [[TMP3]], <4 x i8> poison, <4 x i32> zeroinitializer ; CHECK-NEXT: [[TMP8:%.*]] = zext <4 x i8> [[TMP2]] to <4 x i16> ; CHECK-NEXT: [[TMP9:%.*]] = sext <4 x i8> [[TMP4]] to <4 x i16> ; CHECK-NEXT: [[TMP5:%.*]] = icmp sle <4 x i16> [[TMP8]], [[TMP9]]