From e73271c71e321bc69e7a85a334077922759d7132 Mon Sep 17 00:00:00 2001 From: Samuel Tebbs Date: Wed, 9 Jul 2025 11:43:12 +0100 Subject: [PATCH 1/6] [LV] Remove common extends and selects in CSE This PR extends the LoopVectorizer's common sub-expression elimination (CSE) to remove identical extend and select instructions. Originally the work only included extends but that uncovered some duplicate selects in reduction-inloop.ll. --- llvm/lib/IR/Instruction.cpp | 2 + .../Transforms/Utils/FunctionComparator.cpp | 4 + .../Transforms/Vectorize/LoopVectorize.cpp | 3 +- .../AArch64/induction-costs-sve.ll | 19 +- ...outer_loop_test1_no_explicit_vect_width.ll | 3 +- .../LoopVectorize/ARM/mve-reductions.ll | 3 +- .../LoopVectorize/PowerPC/vectorize-bswap.ll | 4 +- .../RISCV/blocks-with-dead-instructions.ll | 35 +- .../LoopVectorize/RISCV/dead-ops-cost.ll | 15 +- .../RISCV/evl-compatible-loops.ll | 8 +- .../LoopVectorize/RISCV/induction-costs.ll | 5 +- .../LoopVectorize/RISCV/mask-index-type.ll | 5 +- .../RISCV/masked_gather_scatter.ll | 10 +- ...ruction-or-drop-poison-generating-flags.ll | 5 +- .../RISCV/riscv-vector-reverse-output.ll | 653 ++++++++++++++++++ .../RISCV/riscv-vector-reverse.ll | 40 +- .../LoopVectorize/RISCV/strided-accesses.ll | 24 +- .../RISCV/tail-folding-cast-intrinsics.ll | 5 +- .../RISCV/tail-folding-cond-reduction.ll | 10 +- .../tail-folding-fixed-order-recurrence.ll | 5 +- .../RISCV/tail-folding-gather-scatter.ll | 5 +- .../RISCV/tail-folding-interleave.ll | 15 +- .../RISCV/tail-folding-reduction-cost.ll | 1 + .../RISCV/tail-folding-reverse-load-store.ll | 47 +- .../RISCV/tail-folding-uniform-store.ll | 9 +- .../LoopVectorize/RISCV/uniform-load-store.ll | 30 +- ...outer_loop_test1_no_explicit_vect_width.ll | 6 +- llvm/test/Transforms/LoopVectorize/cse.ll | 309 +++++++++ .../LoopVectorize/first-order-recurrence.ll | 14 +- .../LoopVectorize/float-induction.ll | 3 +- .../LoopVectorize/outer_loop_test1.ll | 3 +- .../LoopVectorize/reduction-inloop.ll | 9 +- 32 files changed, 1104 insertions(+), 205 deletions(-) create mode 100644 llvm/test/Transforms/LoopVectorize/RISCV/riscv-vector-reverse-output.ll create mode 100644 llvm/test/Transforms/LoopVectorize/cse.ll diff --git a/llvm/lib/IR/Instruction.cpp b/llvm/lib/IR/Instruction.cpp index 5e87b5ff941ad..94dfed4df72fe 100644 --- a/llvm/lib/IR/Instruction.cpp +++ b/llvm/lib/IR/Instruction.cpp @@ -935,6 +935,8 @@ bool Instruction::hasSameSpecialState(const Instruction *I2, if (const GetElementPtrInst *GEP = dyn_cast(I1)) return GEP->getSourceElementType() == cast(I2)->getSourceElementType(); + if (const CastInst *Cast = dyn_cast(I1)) + return Cast->getDestTy() == cast(I2)->getDestTy(); return true; } diff --git a/llvm/lib/Transforms/Utils/FunctionComparator.cpp b/llvm/lib/Transforms/Utils/FunctionComparator.cpp index 6d4026e8209de..8d1f7c9b3a2ea 100644 --- a/llvm/lib/Transforms/Utils/FunctionComparator.cpp +++ b/llvm/lib/Transforms/Utils/FunctionComparator.cpp @@ -804,6 +804,10 @@ int FunctionComparator::cmpOperations(const Instruction *L, return Res; } } + if (const CastInst *Cast = dyn_cast(L)) { + const CastInst *CastR = cast(R); + return cmpTypes(Cast->getDestTy(), CastR->getDestTy()); + } return 0; } diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index 675a230bd2c94..34cb28bf48a85 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -2511,7 +2511,8 @@ namespace { struct CSEDenseMapInfo { static bool canHandle(const Instruction *I) { return isa(I) || isa(I) || - isa(I) || isa(I); + isa(I) || isa(I) || + isa(I) || isa(I); } static inline Instruction *getEmptyKey() { diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/induction-costs-sve.ll b/llvm/test/Transforms/LoopVectorize/AArch64/induction-costs-sve.ll index 137e07336fd50..392e6efd31376 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/induction-costs-sve.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/induction-costs-sve.ll @@ -44,10 +44,8 @@ define void @iv_casts(ptr %dst, ptr %src, i32 %x, i64 %N) #0 { ; DEFAULT-NEXT: [[TMP27:%.*]] = zext [[WIDE_LOAD4]] to ; DEFAULT-NEXT: [[TMP28:%.*]] = mul [[TMP26]], [[TMP13]] ; DEFAULT-NEXT: [[TMP29:%.*]] = mul [[TMP27]], [[TMP13]] -; DEFAULT-NEXT: [[TMP30:%.*]] = zext [[WIDE_LOAD]] to -; DEFAULT-NEXT: [[TMP31:%.*]] = zext [[WIDE_LOAD4]] to -; DEFAULT-NEXT: [[TMP32:%.*]] = or [[TMP28]], [[TMP30]] -; DEFAULT-NEXT: [[TMP33:%.*]] = or [[TMP29]], [[TMP31]] +; DEFAULT-NEXT: [[TMP32:%.*]] = or [[TMP28]], [[TMP26]] +; DEFAULT-NEXT: [[TMP33:%.*]] = or [[TMP29]], [[TMP27]] ; DEFAULT-NEXT: [[TMP34:%.*]] = lshr [[TMP32]], splat (i16 1) ; DEFAULT-NEXT: [[TMP35:%.*]] = lshr [[TMP33]], splat (i16 1) ; DEFAULT-NEXT: [[TMP36:%.*]] = trunc [[TMP34]] to @@ -118,8 +116,7 @@ define void @iv_casts(ptr %dst, ptr %src, i32 %x, i64 %N) #0 { ; PRED-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call @llvm.masked.load.nxv16i8.p0(ptr [[TMP18]], i32 1, [[ACTIVE_LANE_MASK]], poison) ; PRED-NEXT: [[TMP17:%.*]] = zext [[WIDE_MASKED_LOAD]] to ; PRED-NEXT: [[TMP22:%.*]] = mul [[TMP17]], [[TMP16]] -; PRED-NEXT: [[TMP24:%.*]] = zext [[WIDE_MASKED_LOAD]] to -; PRED-NEXT: [[TMP20:%.*]] = or [[TMP22]], [[TMP24]] +; PRED-NEXT: [[TMP20:%.*]] = or [[TMP22]], [[TMP17]] ; PRED-NEXT: [[TMP21:%.*]] = lshr [[TMP20]], splat (i16 1) ; PRED-NEXT: [[TMP23:%.*]] = trunc [[TMP21]] to ; PRED-NEXT: [[TMP26:%.*]] = getelementptr i8, ptr [[DST]], i64 [[INDEX]] @@ -370,9 +367,8 @@ define void @trunc_ivs_and_store(i32 %x, ptr %dst, i64 %N) #0 { ; DEFAULT-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; DEFAULT-NEXT: [[OFFSET_IDX:%.*]] = trunc i64 [[INDEX]] to i32 ; DEFAULT-NEXT: [[TMP15:%.*]] = add i32 [[OFFSET_IDX]], 1 -; DEFAULT-NEXT: [[TMP16:%.*]] = trunc i64 [[INDEX]] to i32 -; DEFAULT-NEXT: [[TMP18:%.*]] = add i32 [[TMP16]], 1 -; DEFAULT-NEXT: [[TMP19:%.*]] = mul i32 [[MUL]], [[TMP16]] +; DEFAULT-NEXT: [[TMP18:%.*]] = add i32 [[OFFSET_IDX]], 1 +; DEFAULT-NEXT: [[TMP19:%.*]] = mul i32 [[MUL]], [[OFFSET_IDX]] ; DEFAULT-NEXT: [[TMP20:%.*]] = mul i32 [[MUL]], [[TMP18]] ; DEFAULT-NEXT: [[TMP21:%.*]] = zext i32 [[TMP19]] to i64 ; DEFAULT-NEXT: [[TMP22:%.*]] = zext i32 [[TMP20]] to i64 @@ -562,9 +558,8 @@ define void @ivs_trunc_and_ext(i32 %x, ptr %dst, i64 %N) #0 { ; DEFAULT-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; DEFAULT-NEXT: [[OFFSET_IDX:%.*]] = trunc i64 [[INDEX]] to i32 ; DEFAULT-NEXT: [[TMP14:%.*]] = add i32 [[OFFSET_IDX]], 1 -; DEFAULT-NEXT: [[TMP15:%.*]] = trunc i64 [[INDEX]] to i32 -; DEFAULT-NEXT: [[TMP17:%.*]] = add i32 [[TMP15]], 1 -; DEFAULT-NEXT: [[TMP18:%.*]] = mul i32 [[ADD]], [[TMP15]] +; DEFAULT-NEXT: [[TMP17:%.*]] = add i32 [[OFFSET_IDX]], 1 +; DEFAULT-NEXT: [[TMP18:%.*]] = mul i32 [[ADD]], [[OFFSET_IDX]] ; DEFAULT-NEXT: [[TMP19:%.*]] = mul i32 [[ADD]], [[TMP17]] ; DEFAULT-NEXT: [[TMP20:%.*]] = zext i32 [[TMP18]] to i64 ; DEFAULT-NEXT: [[TMP21:%.*]] = zext i32 [[TMP19]] to i64 diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/outer_loop_test1_no_explicit_vect_width.ll b/llvm/test/Transforms/LoopVectorize/AArch64/outer_loop_test1_no_explicit_vect_width.ll index 5e99425c1482c..ae28eb4c3a49d 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/outer_loop_test1_no_explicit_vect_width.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/outer_loop_test1_no_explicit_vect_width.ll @@ -39,8 +39,7 @@ define void @foo_i32(i32 %n) { ; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds [8 x i32], ptr @arr2, i64 0, <4 x i64> [[VEC_IND]] ; CHECK-NEXT: [[TMP1:%.*]] = trunc <4 x i64> [[VEC_IND]] to <4 x i32> ; CHECK-NEXT: call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> [[TMP1]], <4 x ptr> [[TMP0]], i32 4, <4 x i1> splat (i1 true)) -; CHECK-NEXT: [[TMP8:%.*]] = trunc <4 x i64> [[VEC_IND]] to <4 x i32> -; CHECK-NEXT: [[TMP2:%.*]] = add nsw <4 x i32> [[TMP8]], [[BROADCAST_SPLAT]] +; CHECK-NEXT: [[TMP2:%.*]] = add nsw <4 x i32> [[TMP1]], [[BROADCAST_SPLAT]] ; CHECK-NEXT: br label %[[FOR_BODY31:.*]] ; CHECK: [[FOR_BODY31]]: ; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i64> [ zeroinitializer, %[[VECTOR_BODY]] ], [ [[TMP4:%.*]], %[[FOR_BODY31]] ] diff --git a/llvm/test/Transforms/LoopVectorize/ARM/mve-reductions.ll b/llvm/test/Transforms/LoopVectorize/ARM/mve-reductions.ll index 658b9a4569191..1540baab53719 100644 --- a/llvm/test/Transforms/LoopVectorize/ARM/mve-reductions.ll +++ b/llvm/test/Transforms/LoopVectorize/ARM/mve-reductions.ll @@ -1679,8 +1679,7 @@ define i64 @test_std_q31(ptr %x, i32 %n) #0 { ; CHECK-NEXT: [[TMP2:%.*]] = sext <4 x i32> [[TMP1]] to <4 x i64> ; CHECK-NEXT: [[TMP3:%.*]] = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> [[TMP2]]) ; CHECK-NEXT: [[TMP4]] = add i64 [[VEC_PHI]], [[TMP3]] -; CHECK-NEXT: [[TMP5:%.*]] = sext <4 x i32> [[TMP1]] to <4 x i64> -; CHECK-NEXT: [[TMP6:%.*]] = mul nsw <4 x i64> [[TMP5]], [[TMP5]] +; CHECK-NEXT: [[TMP6:%.*]] = mul nsw <4 x i64> [[TMP2]], [[TMP2]] ; CHECK-NEXT: [[TMP7:%.*]] = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> [[TMP6]]) ; CHECK-NEXT: [[TMP8]] = add i64 [[VEC_PHI1]], [[TMP7]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 diff --git a/llvm/test/Transforms/LoopVectorize/PowerPC/vectorize-bswap.ll b/llvm/test/Transforms/LoopVectorize/PowerPC/vectorize-bswap.ll index 2c85b75dda018..7f0a487fca945 100644 --- a/llvm/test/Transforms/LoopVectorize/PowerPC/vectorize-bswap.ll +++ b/llvm/test/Transforms/LoopVectorize/PowerPC/vectorize-bswap.ll @@ -19,9 +19,7 @@ define dso_local void @test(ptr %Arr, i32 signext %Len) { ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[ARR:%.*]], i64 [[TMP1]] ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP2]], align 4 ; CHECK-NEXT: [[TMP4:%.*]] = call <4 x i32> @llvm.bswap.v4i32(<4 x i32> [[WIDE_LOAD]]) -; CHECK-NEXT: [[TMP5:%.*]] = sext i32 [[INDEX]] to i64 -; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[ARR]], i64 [[TMP5]] -; CHECK-NEXT: store <4 x i32> [[TMP4]], ptr [[TMP6]], align 4 +; CHECK-NEXT: store <4 x i32> [[TMP4]], ptr [[TMP3]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 ; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/blocks-with-dead-instructions.ll b/llvm/test/Transforms/LoopVectorize/RISCV/blocks-with-dead-instructions.ll index 757c77fef98c8..2a5ec3de6d391 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/blocks-with-dead-instructions.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/blocks-with-dead-instructions.ll @@ -28,9 +28,8 @@ define void @block_with_dead_inst_1(ptr %src, i64 %N) #0 { ; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer ; CHECK-NEXT: [[TMP20:%.*]] = getelementptr i16, ptr [[SRC]], [[VEC_IND]] ; CHECK-NEXT: call void @llvm.vp.scatter.nxv8i16.nxv8p0( zeroinitializer, align 2 [[TMP20]], splat (i1 true), i32 [[TMP12]]) -; CHECK-NEXT: [[TMP14:%.*]] = zext i32 [[TMP12]] to i64 -; CHECK-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP14]], [[EVL_BASED_IV]] -; CHECK-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP14]] +; CHECK-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP17]], [[EVL_BASED_IV]] +; CHECK-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP17]] ; CHECK-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[DOTSPLAT]] ; CHECK-NEXT: [[TMP15:%.*]] = icmp eq i64 [[INDEX_EVL_NEXT]], [[TMP2]] ; CHECK-NEXT: br i1 [[TMP15]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] @@ -104,9 +103,8 @@ define void @block_with_dead_inst_2(ptr %src) #0 { ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer ; CHECK-NEXT: [[TMP10:%.*]] = getelementptr i16, ptr [[SRC]], [[VEC_IND]] ; CHECK-NEXT: call void @llvm.vp.scatter.nxv8i16.nxv8p0( zeroinitializer, align 2 [[TMP10]], splat (i1 true), i32 [[TMP9]]) -; CHECK-NEXT: [[TMP11:%.*]] = zext i32 [[TMP9]] to i64 -; CHECK-NEXT: [[INDEX_EVL_NEXT]] = add nuw i64 [[TMP11]], [[EVL_BASED_IV]] -; CHECK-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP11]] +; CHECK-NEXT: [[INDEX_EVL_NEXT]] = add nuw i64 [[TMP7]], [[EVL_BASED_IV]] +; CHECK-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP7]] ; CHECK-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[BROADCAST_SPLAT]] ; CHECK-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_EVL_NEXT]], 333 ; CHECK-NEXT: br i1 [[TMP12]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] @@ -180,9 +178,8 @@ define void @multiple_blocks_with_dead_insts_3(ptr %src) #0 { ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer ; CHECK-NEXT: [[TMP10:%.*]] = getelementptr i16, ptr [[SRC]], [[VEC_IND]] ; CHECK-NEXT: call void @llvm.vp.scatter.nxv8i16.nxv8p0( zeroinitializer, align 2 [[TMP10]], splat (i1 true), i32 [[TMP9]]) -; CHECK-NEXT: [[TMP11:%.*]] = zext i32 [[TMP9]] to i64 -; CHECK-NEXT: [[INDEX_EVL_NEXT]] = add nuw i64 [[TMP11]], [[EVL_BASED_IV]] -; CHECK-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP11]] +; CHECK-NEXT: [[INDEX_EVL_NEXT]] = add nuw i64 [[TMP7]], [[EVL_BASED_IV]] +; CHECK-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP7]] ; CHECK-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[BROADCAST_SPLAT]] ; CHECK-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_EVL_NEXT]], 333 ; CHECK-NEXT: br i1 [[TMP12]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]] @@ -266,9 +263,8 @@ define void @multiple_blocks_with_dead_insts_4(ptr %src, i64 %N) #0 { ; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer ; CHECK-NEXT: [[TMP20:%.*]] = getelementptr i16, ptr [[SRC]], [[VEC_IND]] ; CHECK-NEXT: call void @llvm.vp.scatter.nxv8i16.nxv8p0( zeroinitializer, align 2 [[TMP20]], splat (i1 true), i32 [[TMP12]]) -; CHECK-NEXT: [[TMP14:%.*]] = zext i32 [[TMP12]] to i64 -; CHECK-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP14]], [[EVL_BASED_IV]] -; CHECK-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP14]] +; CHECK-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP17]], [[EVL_BASED_IV]] +; CHECK-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP17]] ; CHECK-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[DOTSPLAT]] ; CHECK-NEXT: [[TMP15:%.*]] = icmp eq i64 [[INDEX_EVL_NEXT]], [[TMP2]] ; CHECK-NEXT: br i1 [[TMP15]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]] @@ -354,9 +350,8 @@ define void @multiple_blocks_with_dead_inst_multiple_successors_5(ptr %src) #0 { ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer ; CHECK-NEXT: [[TMP10:%.*]] = getelementptr i16, ptr [[SRC]], [[VEC_IND]] ; CHECK-NEXT: call void @llvm.vp.scatter.nxv8i16.nxv8p0( zeroinitializer, align 2 [[TMP10]], splat (i1 true), i32 [[TMP9]]) -; CHECK-NEXT: [[TMP11:%.*]] = zext i32 [[TMP9]] to i64 -; CHECK-NEXT: [[INDEX_EVL_NEXT]] = add nuw i64 [[TMP11]], [[EVL_BASED_IV]] -; CHECK-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP11]] +; CHECK-NEXT: [[INDEX_EVL_NEXT]] = add nuw i64 [[TMP7]], [[EVL_BASED_IV]] +; CHECK-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP7]] ; CHECK-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[BROADCAST_SPLAT]] ; CHECK-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_EVL_NEXT]], 333 ; CHECK-NEXT: br i1 [[TMP12]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP11:![0-9]+]] @@ -466,9 +461,8 @@ define void @multiple_blocks_with_dead_inst_multiple_successors_6(ptr %src, i1 % ; CHECK-NEXT: [[TMP23:%.*]] = select [[TMP18]], [[BROADCAST_SPLAT]], zeroinitializer ; CHECK-NEXT: [[TMP24:%.*]] = or [[TMP22]], [[TMP23]] ; CHECK-NEXT: call void @llvm.vp.scatter.nxv8i16.nxv8p0( zeroinitializer, align 2 [[TMP20]], [[TMP24]], i32 [[TMP27]]) -; CHECK-NEXT: [[TMP25:%.*]] = zext i32 [[TMP27]] to i64 -; CHECK-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP25]], [[EVL_BASED_IV]] -; CHECK-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP25]] +; CHECK-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP12]], [[EVL_BASED_IV]] +; CHECK-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP12]] ; CHECK-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[DOTSPLAT]] ; CHECK-NEXT: [[TMP26:%.*]] = icmp eq i64 [[INDEX_EVL_NEXT]], [[TMP2]] ; CHECK-NEXT: br i1 [[TMP26]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP13:![0-9]+]] @@ -737,9 +731,8 @@ define void @dead_load_in_block(ptr %dst, ptr %src, i8 %N, i64 %x) #0 { ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer ; CHECK-NEXT: [[TMP21:%.*]] = getelementptr i32, ptr [[DST]], [[VEC_IND]] ; CHECK-NEXT: call void @llvm.vp.scatter.nxv4i32.nxv4p0( zeroinitializer, align 4 [[TMP21]], splat (i1 true), i32 [[TMP18]]), !alias.scope [[META19:![0-9]+]], !noalias [[META22:![0-9]+]] -; CHECK-NEXT: [[TMP22:%.*]] = zext i32 [[TMP18]] to i64 -; CHECK-NEXT: [[INDEX_EVL_NEXT]] = add nuw i64 [[TMP22]], [[EVL_BASED_IV]] -; CHECK-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP22]] +; CHECK-NEXT: [[INDEX_EVL_NEXT]] = add nuw i64 [[TMP17]], [[EVL_BASED_IV]] +; CHECK-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP17]] ; CHECK-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[BROADCAST_SPLAT]] ; CHECK-NEXT: [[TMP26:%.*]] = icmp eq i64 [[INDEX_EVL_NEXT]], [[TMP3]] ; CHECK-NEXT: br i1 [[TMP26]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP25:![0-9]+]] diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/dead-ops-cost.ll b/llvm/test/Transforms/LoopVectorize/RISCV/dead-ops-cost.ll index 29d901f084bdb..60f86b37ab8cf 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/dead-ops-cost.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/dead-ops-cost.ll @@ -37,9 +37,8 @@ define void @dead_load(ptr %p, i16 %start) { ; CHECK-NEXT: [[DOTSPLAT2:%.*]] = shufflevector [[DOTSPLATINSERT1]], poison, zeroinitializer ; CHECK-NEXT: [[TMP21:%.*]] = getelementptr i16, ptr [[P]], [[VEC_IND]] ; CHECK-NEXT: call void @llvm.vp.scatter.nxv8i16.nxv8p0( zeroinitializer, align 2 [[TMP21]], splat (i1 true), i32 [[TMP16]]) -; CHECK-NEXT: [[TMP22:%.*]] = zext i32 [[TMP16]] to i64 -; CHECK-NEXT: [[INDEX_EVL_NEXT]] = add nuw i64 [[TMP22]], [[EVL_BASED_IV]] -; CHECK-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP22]] +; CHECK-NEXT: [[INDEX_EVL_NEXT]] = add nuw i64 [[TMP19]], [[EVL_BASED_IV]] +; CHECK-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP19]] ; CHECK-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[DOTSPLAT2]] ; CHECK-NEXT: [[TMP18:%.*]] = icmp eq i64 [[INDEX_EVL_NEXT]], [[TMP5]] ; CHECK-NEXT: br i1 [[TMP18]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] @@ -332,9 +331,8 @@ define void @test_phi_in_latch_redundant(ptr %dst, i32 %a) { ; CHECK-NEXT: [[PREDPHI:%.*]] = select [[TMP14]], zeroinitializer, [[TMP19]] ; CHECK-NEXT: [[TMP16:%.*]] = getelementptr i32, ptr [[DST]], [[VEC_IND]] ; CHECK-NEXT: call void @llvm.vp.scatter.nxv4i32.nxv4p0( [[PREDPHI]], align 4 [[TMP16]], [[TMP15]], i32 [[TMP8]]) -; CHECK-NEXT: [[TMP17:%.*]] = zext i32 [[TMP8]] to i64 -; CHECK-NEXT: [[INDEX_EVL_NEXT]] = add nuw i64 [[TMP17]], [[EVL_BASED_IV]] -; CHECK-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP17]] +; CHECK-NEXT: [[INDEX_EVL_NEXT]] = add nuw i64 [[TMP5]], [[EVL_BASED_IV]] +; CHECK-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP5]] ; CHECK-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[BROADCAST_SPLAT2]] ; CHECK-NEXT: [[TMP18:%.*]] = icmp eq i64 [[INDEX_EVL_NEXT]], 37 ; CHECK-NEXT: br i1 [[TMP18]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP19:![0-9]+]] @@ -419,9 +417,8 @@ define void @gather_interleave_group_with_dead_insert_pos(i64 %N, ptr noalias %s ; CHECK-NEXT: [[TMP18:%.*]] = zext [[TMP17]] to ; CHECK-NEXT: [[TMP19:%.*]] = getelementptr i32, ptr [[DST]], [[VEC_IND]] ; CHECK-NEXT: call void @llvm.vp.scatter.nxv4i32.nxv4p0( [[TMP18]], align 4 [[TMP19]], splat (i1 true), i32 [[TMP10]]) -; CHECK-NEXT: [[TMP20:%.*]] = zext i32 [[TMP10]] to i64 -; CHECK-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP20]], [[EVL_BASED_IV]] -; CHECK-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP20]] +; CHECK-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP16]], [[EVL_BASED_IV]] +; CHECK-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP16]] ; CHECK-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[DOTSPLAT]] ; CHECK-NEXT: [[TMP21:%.*]] = icmp eq i64 [[INDEX_EVL_NEXT]], [[TMP2]] ; CHECK-NEXT: br i1 [[TMP21]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP21:![0-9]+]] diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/evl-compatible-loops.ll b/llvm/test/Transforms/LoopVectorize/RISCV/evl-compatible-loops.ll index be6f32a6f4ea2..27722aa49378c 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/evl-compatible-loops.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/evl-compatible-loops.ll @@ -24,9 +24,8 @@ define void @test_wide_integer_induction(ptr noalias %a, i64 %N) { ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer ; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[EVL_BASED_IV]] ; CHECK-NEXT: call void @llvm.vp.store.nxv2i64.p0( [[VEC_IND]], ptr align 8 [[TMP14]], splat (i1 true), i32 [[TMP11]]) -; CHECK-NEXT: [[TMP16:%.*]] = zext i32 [[TMP11]] to i64 -; CHECK-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP16]], [[EVL_BASED_IV]] -; CHECK-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP16]] +; CHECK-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP12]], [[EVL_BASED_IV]] +; CHECK-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP12]] ; CHECK-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[BROADCAST_SPLAT]] ; CHECK-NEXT: [[TMP17:%.*]] = icmp eq i64 [[INDEX_EVL_NEXT]], [[N]] ; CHECK-NEXT: br i1 [[TMP17]], label [[MIDDLE_BLOCK:%.*]], label [[FOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] @@ -80,8 +79,7 @@ define void @test_wide_ptr_induction(ptr noalias %a, ptr noalias %b, i64 %N) { ; CHECK-NEXT: [[TMP9:%.*]] = zext i32 [[TMP7]] to i64 ; CHECK-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP9]], [[EVL_BASED_IV]] ; CHECK-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP9]] -; CHECK-NEXT: [[TMP10:%.*]] = zext i32 [[TMP7]] to i64 -; CHECK-NEXT: [[TMP11:%.*]] = mul i64 8, [[TMP10]] +; CHECK-NEXT: [[TMP11:%.*]] = mul i64 8, [[TMP9]] ; CHECK-NEXT: [[PTR_IND]] = getelementptr i8, ptr [[POINTER_PHI]], i64 [[TMP11]] ; CHECK-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_EVL_NEXT]], [[N]] ; CHECK-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/induction-costs.ll b/llvm/test/Transforms/LoopVectorize/RISCV/induction-costs.ll index 18dc20495117e..f45e7c31ffc4f 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/induction-costs.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/induction-costs.ll @@ -70,9 +70,8 @@ define void @skip_free_iv_truncate(i16 %x, ptr %A) #0 { ; CHECK-NEXT: [[DOTSPLAT25:%.*]] = shufflevector [[DOTSPLATINSERT24]], poison, zeroinitializer ; CHECK-NEXT: [[TMP59:%.*]] = getelementptr i16, ptr [[A]], [[VEC_IND]] ; CHECK-NEXT: call void @llvm.vp.scatter.nxv8i16.nxv8p0( zeroinitializer, align 2 [[TMP59]], splat (i1 true), i32 [[TMP27]]), !alias.scope [[META0:![0-9]+]], !noalias [[META3:![0-9]+]] -; CHECK-NEXT: [[TMP47:%.*]] = zext i32 [[TMP27]] to i64 -; CHECK-NEXT: [[INDEX_EVL_NEXT]] = add nuw i64 [[TMP47]], [[EVL_BASED_IV]] -; CHECK-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP47]] +; CHECK-NEXT: [[INDEX_EVL_NEXT]] = add nuw i64 [[TMP28]], [[EVL_BASED_IV]] +; CHECK-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP28]] ; CHECK-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[DOTSPLAT25]] ; CHECK-NEXT: [[TMP48:%.*]] = icmp eq i64 [[INDEX_EVL_NEXT]], [[TMP5]] ; CHECK-NEXT: br i1 [[TMP48]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/mask-index-type.ll b/llvm/test/Transforms/LoopVectorize/RISCV/mask-index-type.ll index a46d877825f87..1fa0d08b1dcf6 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/mask-index-type.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/mask-index-type.ll @@ -39,9 +39,8 @@ define void @test(ptr noalias nocapture %a, ptr noalias nocapture %b, i32 %v) { ; VLENUNK-NEXT: [[TMP17:%.*]] = add [[PREDPHI]], [[BROADCAST_SPLAT]] ; VLENUNK-NEXT: [[TMP18:%.*]] = getelementptr inbounds i32, ptr [[B:%.*]], i64 [[INDEX]] ; VLENUNK-NEXT: call void @llvm.vp.store.nxv4i32.p0( [[TMP17]], ptr align 4 [[TMP18]], splat (i1 true), i32 [[TMP7]]) -; VLENUNK-NEXT: [[TMP19:%.*]] = zext i32 [[TMP7]] to i64 -; VLENUNK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[TMP19]], [[INDEX]] -; VLENUNK-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP19]] +; VLENUNK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[TMP15]], [[INDEX]] +; VLENUNK-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP15]] ; VLENUNK-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[DOTSPLAT]] ; VLENUNK-NEXT: [[TMP20:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024 ; VLENUNK-NEXT: br i1 [[TMP20]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/masked_gather_scatter.ll b/llvm/test/Transforms/LoopVectorize/RISCV/masked_gather_scatter.ll index 83f23a48285f5..a86179384f443 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/masked_gather_scatter.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/masked_gather_scatter.ll @@ -53,9 +53,8 @@ define void @foo4(ptr nocapture %A, ptr nocapture readonly %B, ptr nocapture rea ; RV32-NEXT: [[TMP18:%.*]] = fadd [[WIDE_MASKED_GATHER6]], [[TMP17]] ; RV32-NEXT: [[TMP19:%.*]] = getelementptr inbounds double, ptr [[A]], [[VEC_IND]] ; RV32-NEXT: call void @llvm.vp.scatter.nxv2f64.nxv2p0( [[TMP18]], align 8 [[TMP19]], [[TMP14]], i32 [[TMP10]]), !alias.scope [[META5:![0-9]+]], !noalias [[META7:![0-9]+]] -; RV32-NEXT: [[TMP20:%.*]] = zext i32 [[TMP10]] to i64 -; RV32-NEXT: [[INDEX_NEXT]] = add nuw i64 [[TMP20]], [[INDEX]] -; RV32-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP20]] +; RV32-NEXT: [[INDEX_NEXT]] = add nuw i64 [[TMP8]], [[INDEX]] +; RV32-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP8]] ; RV32-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[DOTSPLAT]] ; RV32-NEXT: [[TMP24:%.*]] = icmp eq i64 [[INDEX_NEXT]], 625 ; RV32-NEXT: br i1 [[TMP24]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] @@ -125,9 +124,8 @@ define void @foo4(ptr nocapture %A, ptr nocapture readonly %B, ptr nocapture rea ; RV64-NEXT: [[TMP18:%.*]] = fadd [[WIDE_MASKED_GATHER6]], [[TMP17]] ; RV64-NEXT: [[TMP19:%.*]] = getelementptr inbounds double, ptr [[A]], [[VEC_IND]] ; RV64-NEXT: call void @llvm.vp.scatter.nxv2f64.nxv2p0( [[TMP18]], align 8 [[TMP19]], [[TMP14]], i32 [[TMP10]]), !alias.scope [[META5:![0-9]+]], !noalias [[META7:![0-9]+]] -; RV64-NEXT: [[TMP20:%.*]] = zext i32 [[TMP10]] to i64 -; RV64-NEXT: [[INDEX_NEXT]] = add nuw i64 [[TMP20]], [[INDEX]] -; RV64-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP20]] +; RV64-NEXT: [[INDEX_NEXT]] = add nuw i64 [[TMP8]], [[INDEX]] +; RV64-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP8]] ; RV64-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[DOTSPLAT]] ; RV64-NEXT: [[TMP24:%.*]] = icmp eq i64 [[INDEX_NEXT]], 625 ; RV64-NEXT: br i1 [[TMP24]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/pr87378-vpinstruction-or-drop-poison-generating-flags.ll b/llvm/test/Transforms/LoopVectorize/RISCV/pr87378-vpinstruction-or-drop-poison-generating-flags.ll index 37a0a8b4d7c87..2317cf76600fd 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/pr87378-vpinstruction-or-drop-poison-generating-flags.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/pr87378-vpinstruction-or-drop-poison-generating-flags.ll @@ -49,9 +49,8 @@ define void @pr87378_vpinstruction_or_drop_poison_generating_flags(ptr %arg, i64 ; CHECK-NEXT: [[PREDPHI:%.*]] = select i1 [[TMP23]], i64 poison, i64 [[INDEX]] ; CHECK-NEXT: [[TMP24:%.*]] = getelementptr i16, ptr [[ARG]], i64 [[PREDPHI]] ; CHECK-NEXT: call void @llvm.vp.store.nxv8i16.p0( zeroinitializer, ptr align 2 [[TMP24]], [[TMP22]], i32 [[TMP25]]) -; CHECK-NEXT: [[TMP26:%.*]] = zext i32 [[TMP25]] to i64 -; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[TMP26]], [[INDEX]] -; CHECK-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP26]] +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[TMP8]], [[INDEX]] +; CHECK-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP8]] ; CHECK-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[DOTSPLAT]] ; CHECK-NEXT: [[TMP27:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1001 ; CHECK-NEXT: br i1 [[TMP27]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/riscv-vector-reverse-output.ll b/llvm/test/Transforms/LoopVectorize/RISCV/riscv-vector-reverse-output.ll new file mode 100644 index 0000000000000..1d7c80b3d722f --- /dev/null +++ b/llvm/test/Transforms/LoopVectorize/RISCV/riscv-vector-reverse-output.ll @@ -0,0 +1,653 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals none --version 5 +;; This is the loop in c++ being vectorize in this file with +;; vector.reverse +;; #pragma clang loop vectorize_width(4, scalable) +;; for (int i = N-1; i >= 0; --i) +;; a[i] = b[i] + 1.0; + +; RUN: opt -passes=loop-vectorize -mtriple=riscv64 -mattr=+v \ +; RUN: -riscv-v-vector-bits-min=128 -S < %s \ +; RUN: | FileCheck --check-prefix=RV64 %s + +; RUN: opt -passes=loop-vectorize -mtriple=riscv32 -mattr=+v \ +; RUN: -riscv-v-vector-bits-min=128 -S < %s \ +; RUN: | FileCheck --check-prefix=RV32 %s + +; RUN: opt -passes=loop-vectorize -mtriple=riscv64 -mattr=+v \ +; RUN: -riscv-v-vector-bits-min=128 -force-vector-interleave=2 -S < %s \ +; RUN: | FileCheck --check-prefix=RV64-UF2 %s + +define void @vector_reverse_i32(ptr noalias %A, ptr noalias %B) { +; RV64-LABEL: define void @vector_reverse_i32( +; RV64-SAME: ptr noalias [[A:%.*]], ptr noalias [[B:%.*]]) #[[ATTR0:[0-9]+]] { +; RV64-NEXT: [[ENTRY:.*:]] +; RV64-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] +; RV64: [[VECTOR_PH]]: +; RV64-NEXT: br label %[[VECTOR_BODY:.*]] +; RV64: [[VECTOR_BODY]]: +; RV64-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], %[[VECTOR_BODY]] ] +; RV64-NEXT: [[AVL:%.*]] = phi i64 [ 1023, %[[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], %[[VECTOR_BODY]] ] +; RV64-NEXT: [[TMP0:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 4, i1 true) +; RV64-NEXT: [[OFFSET_IDX:%.*]] = sub i64 1023, [[INDEX]] +; RV64-NEXT: [[TMP8:%.*]] = add nsw i64 [[OFFSET_IDX]], -1 +; RV64-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[TMP8]] +; RV64-NEXT: [[TMP5:%.*]] = zext i32 [[TMP0]] to i64 +; RV64-NEXT: [[TMP10:%.*]] = mul i64 0, [[TMP5]] +; RV64-NEXT: [[TMP22:%.*]] = sub i64 [[TMP5]], 1 +; RV64-NEXT: [[TMP11:%.*]] = mul i64 -1, [[TMP22]] +; RV64-NEXT: [[TMP7:%.*]] = getelementptr i32, ptr [[TMP9]], i64 [[TMP10]] +; RV64-NEXT: [[TMP12:%.*]] = getelementptr i32, ptr [[TMP7]], i64 [[TMP11]] +; RV64-NEXT: [[VP_OP_LOAD:%.*]] = call @llvm.vp.load.nxv4i32.p0(ptr align 4 [[TMP12]], splat (i1 true), i32 [[TMP0]]) +; RV64-NEXT: [[REVERSE:%.*]] = call @llvm.experimental.vp.reverse.nxv4i32( [[VP_OP_LOAD]], splat (i1 true), i32 [[TMP0]]) +; RV64-NEXT: [[TMP14:%.*]] = add [[REVERSE]], splat (i32 1) +; RV64-NEXT: [[TMP15:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP8]] +; RV64-NEXT: [[TMP16:%.*]] = mul i64 0, [[TMP5]] +; RV64-NEXT: [[TMP23:%.*]] = sub i64 [[TMP5]], 1 +; RV64-NEXT: [[TMP17:%.*]] = mul i64 -1, [[TMP23]] +; RV64-NEXT: [[TMP18:%.*]] = getelementptr i32, ptr [[TMP15]], i64 [[TMP16]] +; RV64-NEXT: [[TMP19:%.*]] = getelementptr i32, ptr [[TMP18]], i64 [[TMP17]] +; RV64-NEXT: [[VP_REVERSE1:%.*]] = call @llvm.experimental.vp.reverse.nxv4i32( [[TMP14]], splat (i1 true), i32 [[TMP0]]) +; RV64-NEXT: call void @llvm.vp.store.nxv4i32.p0( [[VP_REVERSE1]], ptr align 4 [[TMP19]], splat (i1 true), i32 [[TMP0]]) +; RV64-NEXT: [[INDEX_EVL_NEXT]] = add nuw i64 [[TMP5]], [[INDEX]] +; RV64-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP5]] +; RV64-NEXT: [[TMP20:%.*]] = icmp eq i64 [[INDEX_EVL_NEXT]], 1023 +; RV64-NEXT: br i1 [[TMP20]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] +; RV64: [[MIDDLE_BLOCK]]: +; RV64-NEXT: br label %[[EXIT:.*]] +; RV64: [[SCALAR_PH]]: +; RV64-NEXT: br label %[[FOR_BODY:.*]] +; RV64: [[FOR_BODY]]: +; RV64-NEXT: [[DEC_IV:%.*]] = phi i64 [ 1023, %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[FOR_BODY]] ] +; RV64-NEXT: [[IV_NEXT]] = add nsw i64 [[DEC_IV]], -1 +; RV64-NEXT: [[ARRAYIDX_B:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[IV_NEXT]] +; RV64-NEXT: [[TMP21:%.*]] = load i32, ptr [[ARRAYIDX_B]], align 4 +; RV64-NEXT: [[ADD:%.*]] = add i32 [[TMP21]], 1 +; RV64-NEXT: [[ARRAYIDX_A:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[IV_NEXT]] +; RV64-NEXT: store i32 [[ADD]], ptr [[ARRAYIDX_A]], align 4 +; RV64-NEXT: [[CMP:%.*]] = icmp ugt i64 [[DEC_IV]], 1 +; RV64-NEXT: br i1 [[CMP]], label %[[FOR_BODY]], label %[[EXIT]], !llvm.loop [[LOOP4:![0-9]+]] +; RV64: [[EXIT]]: +; RV64-NEXT: ret void +; +; RV32-LABEL: define void @vector_reverse_i32( +; RV32-SAME: ptr noalias [[A:%.*]], ptr noalias [[B:%.*]]) #[[ATTR0:[0-9]+]] { +; RV32-NEXT: [[ENTRY:.*:]] +; RV32-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] +; RV32: [[VECTOR_PH]]: +; RV32-NEXT: br label %[[VECTOR_BODY:.*]] +; RV32: [[VECTOR_BODY]]: +; RV32-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], %[[VECTOR_BODY]] ] +; RV32-NEXT: [[AVL:%.*]] = phi i64 [ 1023, %[[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], %[[VECTOR_BODY]] ] +; RV32-NEXT: [[TMP10:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 4, i1 true) +; RV32-NEXT: [[OFFSET_IDX:%.*]] = sub i64 1023, [[INDEX]] +; RV32-NEXT: [[TMP8:%.*]] = add nsw i64 [[OFFSET_IDX]], -1 +; RV32-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[TMP8]] +; RV32-NEXT: [[TMP11:%.*]] = mul i32 0, [[TMP10]] +; RV32-NEXT: [[TMP24:%.*]] = sub i32 [[TMP10]], 1 +; RV32-NEXT: [[TMP12:%.*]] = mul i32 -1, [[TMP24]] +; RV32-NEXT: [[TMP6:%.*]] = getelementptr i32, ptr [[TMP9]], i32 [[TMP11]] +; RV32-NEXT: [[TMP7:%.*]] = getelementptr i32, ptr [[TMP6]], i32 [[TMP12]] +; RV32-NEXT: [[VP_OP_LOAD:%.*]] = call @llvm.vp.load.nxv4i32.p0(ptr align 4 [[TMP7]], splat (i1 true), i32 [[TMP10]]) +; RV32-NEXT: [[REVERSE:%.*]] = call @llvm.experimental.vp.reverse.nxv4i32( [[VP_OP_LOAD]], splat (i1 true), i32 [[TMP10]]) +; RV32-NEXT: [[TMP15:%.*]] = add [[REVERSE]], splat (i32 1) +; RV32-NEXT: [[TMP16:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP8]] +; RV32-NEXT: [[TMP18:%.*]] = mul i32 0, [[TMP10]] +; RV32-NEXT: [[TMP25:%.*]] = sub i32 [[TMP10]], 1 +; RV32-NEXT: [[TMP19:%.*]] = mul i32 -1, [[TMP25]] +; RV32-NEXT: [[TMP13:%.*]] = getelementptr i32, ptr [[TMP16]], i32 [[TMP18]] +; RV32-NEXT: [[TMP14:%.*]] = getelementptr i32, ptr [[TMP13]], i32 [[TMP19]] +; RV32-NEXT: [[VP_REVERSE1:%.*]] = call @llvm.experimental.vp.reverse.nxv4i32( [[TMP15]], splat (i1 true), i32 [[TMP10]]) +; RV32-NEXT: call void @llvm.vp.store.nxv4i32.p0( [[VP_REVERSE1]], ptr align 4 [[TMP14]], splat (i1 true), i32 [[TMP10]]) +; RV32-NEXT: [[TMP17:%.*]] = zext i32 [[TMP10]] to i64 +; RV32-NEXT: [[INDEX_EVL_NEXT]] = add nuw i64 [[TMP17]], [[INDEX]] +; RV32-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP17]] +; RV32-NEXT: [[TMP20:%.*]] = icmp eq i64 [[INDEX_EVL_NEXT]], 1023 +; RV32-NEXT: br i1 [[TMP20]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] +; RV32: [[MIDDLE_BLOCK]]: +; RV32-NEXT: br label %[[EXIT:.*]] +; RV32: [[SCALAR_PH]]: +; RV32-NEXT: br label %[[FOR_BODY:.*]] +; RV32: [[FOR_BODY]]: +; RV32-NEXT: [[DEC_IV:%.*]] = phi i64 [ 1023, %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[FOR_BODY]] ] +; RV32-NEXT: [[IV_NEXT]] = add nsw i64 [[DEC_IV]], -1 +; RV32-NEXT: [[ARRAYIDX_B:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[IV_NEXT]] +; RV32-NEXT: [[TMP23:%.*]] = load i32, ptr [[ARRAYIDX_B]], align 4 +; RV32-NEXT: [[ADD:%.*]] = add i32 [[TMP23]], 1 +; RV32-NEXT: [[ARRAYIDX_A:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[IV_NEXT]] +; RV32-NEXT: store i32 [[ADD]], ptr [[ARRAYIDX_A]], align 4 +; RV32-NEXT: [[CMP:%.*]] = icmp ugt i64 [[DEC_IV]], 1 +; RV32-NEXT: br i1 [[CMP]], label %[[FOR_BODY]], label %[[EXIT]], !llvm.loop [[LOOP4:![0-9]+]] +; RV32: [[EXIT]]: +; RV32-NEXT: ret void +; +; RV64-UF2-LABEL: define void @vector_reverse_i32( +; RV64-UF2-SAME: ptr noalias [[A:%.*]], ptr noalias [[B:%.*]]) #[[ATTR0:[0-9]+]] { +; RV64-UF2-NEXT: [[ENTRY:.*]]: +; RV64-UF2-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() +; RV64-UF2-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 3 +; RV64-UF2-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 1023, [[TMP1]] +; RV64-UF2-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] +; RV64-UF2: [[VECTOR_PH]]: +; RV64-UF2-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() +; RV64-UF2-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP4]], 4 +; RV64-UF2-NEXT: [[TMP6:%.*]] = mul i64 [[TMP5]], 2 +; RV64-UF2-NEXT: [[N_VEC:%.*]] = urem i64 1023, [[TMP6]] +; RV64-UF2-NEXT: [[TMP7:%.*]] = sub i64 1023, [[N_VEC]] +; RV64-UF2-NEXT: [[TMP8:%.*]] = sub i64 1023, [[TMP7]] +; RV64-UF2-NEXT: br label %[[VECTOR_BODY:.*]] +; RV64-UF2: [[VECTOR_BODY]]: +; RV64-UF2-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] +; RV64-UF2-NEXT: [[OFFSET_IDX:%.*]] = sub i64 1023, [[INDEX]] +; RV64-UF2-NEXT: [[TMP9:%.*]] = add nsw i64 [[OFFSET_IDX]], -1 +; RV64-UF2-NEXT: [[TMP10:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[TMP9]] +; RV64-UF2-NEXT: [[TMP11:%.*]] = mul i64 0, [[TMP5]] +; RV64-UF2-NEXT: [[TMP32:%.*]] = sub i64 [[TMP5]], 1 +; RV64-UF2-NEXT: [[TMP12:%.*]] = mul i64 -1, [[TMP32]] +; RV64-UF2-NEXT: [[TMP13:%.*]] = getelementptr inbounds i32, ptr [[TMP10]], i64 [[TMP11]] +; RV64-UF2-NEXT: [[TMP14:%.*]] = getelementptr inbounds i32, ptr [[TMP13]], i64 [[TMP12]] +; RV64-UF2-NEXT: [[TMP15:%.*]] = mul i64 -1, [[TMP5]] +; RV64-UF2-NEXT: [[TMP33:%.*]] = sub i64 [[TMP5]], 1 +; RV64-UF2-NEXT: [[TMP16:%.*]] = mul i64 -1, [[TMP33]] +; RV64-UF2-NEXT: [[TMP17:%.*]] = getelementptr inbounds i32, ptr [[TMP10]], i64 [[TMP15]] +; RV64-UF2-NEXT: [[TMP18:%.*]] = getelementptr inbounds i32, ptr [[TMP17]], i64 [[TMP16]] +; RV64-UF2-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP14]], align 4 +; RV64-UF2-NEXT: [[REVERSE:%.*]] = call @llvm.vector.reverse.nxv4i32( [[WIDE_LOAD]]) +; RV64-UF2-NEXT: [[WIDE_LOAD1:%.*]] = load , ptr [[TMP18]], align 4 +; RV64-UF2-NEXT: [[REVERSE2:%.*]] = call @llvm.vector.reverse.nxv4i32( [[WIDE_LOAD1]]) +; RV64-UF2-NEXT: [[TMP19:%.*]] = add [[REVERSE]], splat (i32 1) +; RV64-UF2-NEXT: [[TMP20:%.*]] = add [[REVERSE2]], splat (i32 1) +; RV64-UF2-NEXT: [[TMP21:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP9]] +; RV64-UF2-NEXT: [[TMP22:%.*]] = mul i64 0, [[TMP5]] +; RV64-UF2-NEXT: [[TMP34:%.*]] = sub i64 [[TMP5]], 1 +; RV64-UF2-NEXT: [[TMP23:%.*]] = mul i64 -1, [[TMP34]] +; RV64-UF2-NEXT: [[TMP24:%.*]] = getelementptr inbounds i32, ptr [[TMP21]], i64 [[TMP22]] +; RV64-UF2-NEXT: [[TMP25:%.*]] = getelementptr inbounds i32, ptr [[TMP24]], i64 [[TMP23]] +; RV64-UF2-NEXT: [[TMP26:%.*]] = mul i64 -1, [[TMP5]] +; RV64-UF2-NEXT: [[TMP35:%.*]] = sub i64 [[TMP5]], 1 +; RV64-UF2-NEXT: [[TMP27:%.*]] = mul i64 -1, [[TMP35]] +; RV64-UF2-NEXT: [[TMP28:%.*]] = getelementptr inbounds i32, ptr [[TMP21]], i64 [[TMP26]] +; RV64-UF2-NEXT: [[TMP29:%.*]] = getelementptr inbounds i32, ptr [[TMP28]], i64 [[TMP27]] +; RV64-UF2-NEXT: [[REVERSE3:%.*]] = call @llvm.vector.reverse.nxv4i32( [[TMP19]]) +; RV64-UF2-NEXT: store [[REVERSE3]], ptr [[TMP25]], align 4 +; RV64-UF2-NEXT: [[REVERSE4:%.*]] = call @llvm.vector.reverse.nxv4i32( [[TMP20]]) +; RV64-UF2-NEXT: store [[REVERSE4]], ptr [[TMP29]], align 4 +; RV64-UF2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP6]] +; RV64-UF2-NEXT: [[TMP36:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[TMP7]] +; RV64-UF2-NEXT: br i1 [[TMP36]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] +; RV64-UF2: [[MIDDLE_BLOCK]]: +; RV64-UF2-NEXT: [[CMP_N:%.*]] = icmp eq i64 1023, [[TMP7]] +; RV64-UF2-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]] +; RV64-UF2: [[SCALAR_PH]]: +; RV64-UF2-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[TMP8]], %[[MIDDLE_BLOCK]] ], [ 1023, %[[ENTRY]] ] +; RV64-UF2-NEXT: br label %[[FOR_BODY:.*]] +; RV64-UF2: [[FOR_BODY]]: +; RV64-UF2-NEXT: [[DEC_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[FOR_BODY]] ] +; RV64-UF2-NEXT: [[IV_NEXT]] = add nsw i64 [[DEC_IV]], -1 +; RV64-UF2-NEXT: [[ARRAYIDX_B:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[IV_NEXT]] +; RV64-UF2-NEXT: [[TMP31:%.*]] = load i32, ptr [[ARRAYIDX_B]], align 4 +; RV64-UF2-NEXT: [[ADD:%.*]] = add i32 [[TMP31]], 1 +; RV64-UF2-NEXT: [[ARRAYIDX_A:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[IV_NEXT]] +; RV64-UF2-NEXT: store i32 [[ADD]], ptr [[ARRAYIDX_A]], align 4 +; RV64-UF2-NEXT: [[CMP:%.*]] = icmp ugt i64 [[DEC_IV]], 1 +; RV64-UF2-NEXT: br i1 [[CMP]], label %[[FOR_BODY]], label %[[EXIT]], !llvm.loop [[LOOP3:![0-9]+]] +; RV64-UF2: [[EXIT]]: +; RV64-UF2-NEXT: ret void +; +entry: + br label %for.body + +for.body: + %dec.iv = phi i64 [ 1023, %entry ], [ %iv.next, %for.body ] + %iv.next = add nsw i64 %dec.iv, -1 + %arrayidx.b = getelementptr inbounds i32, ptr %B, i64 %iv.next + %0 = load i32, ptr %arrayidx.b, align 4 + %add = add i32 %0, 1 + %arrayidx.a = getelementptr inbounds i32, ptr %A, i64 %iv.next + store i32 %add, ptr %arrayidx.a, align 4 + %cmp = icmp ugt i64 %dec.iv, 1 + br i1 %cmp, label %for.body, label %exit, !llvm.loop !0 + +exit: + ret void +} + +define void @vector_reverse_f32(ptr noalias %A, ptr noalias %B) { +; RV64-LABEL: define void @vector_reverse_f32( +; RV64-SAME: ptr noalias [[A:%.*]], ptr noalias [[B:%.*]]) #[[ATTR0]] { +; RV64-NEXT: [[ENTRY:.*:]] +; RV64-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] +; RV64: [[VECTOR_PH]]: +; RV64-NEXT: br label %[[VECTOR_BODY:.*]] +; RV64: [[VECTOR_BODY]]: +; RV64-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], %[[VECTOR_BODY]] ] +; RV64-NEXT: [[AVL:%.*]] = phi i64 [ 1023, %[[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], %[[VECTOR_BODY]] ] +; RV64-NEXT: [[TMP0:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 4, i1 true) +; RV64-NEXT: [[OFFSET_IDX:%.*]] = sub i64 1023, [[INDEX]] +; RV64-NEXT: [[TMP8:%.*]] = add nsw i64 [[OFFSET_IDX]], -1 +; RV64-NEXT: [[TMP9:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[TMP8]] +; RV64-NEXT: [[TMP5:%.*]] = zext i32 [[TMP0]] to i64 +; RV64-NEXT: [[TMP10:%.*]] = mul i64 0, [[TMP5]] +; RV64-NEXT: [[TMP22:%.*]] = sub i64 [[TMP5]], 1 +; RV64-NEXT: [[TMP11:%.*]] = mul i64 -1, [[TMP22]] +; RV64-NEXT: [[TMP7:%.*]] = getelementptr float, ptr [[TMP9]], i64 [[TMP10]] +; RV64-NEXT: [[TMP12:%.*]] = getelementptr float, ptr [[TMP7]], i64 [[TMP11]] +; RV64-NEXT: [[VP_OP_LOAD:%.*]] = call @llvm.vp.load.nxv4f32.p0(ptr align 4 [[TMP12]], splat (i1 true), i32 [[TMP0]]) +; RV64-NEXT: [[REVERSE:%.*]] = call @llvm.experimental.vp.reverse.nxv4f32( [[VP_OP_LOAD]], splat (i1 true), i32 [[TMP0]]) +; RV64-NEXT: [[TMP14:%.*]] = fadd [[REVERSE]], splat (float 1.000000e+00) +; RV64-NEXT: [[TMP15:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP8]] +; RV64-NEXT: [[TMP16:%.*]] = mul i64 0, [[TMP5]] +; RV64-NEXT: [[TMP23:%.*]] = sub i64 [[TMP5]], 1 +; RV64-NEXT: [[TMP17:%.*]] = mul i64 -1, [[TMP23]] +; RV64-NEXT: [[TMP18:%.*]] = getelementptr float, ptr [[TMP15]], i64 [[TMP16]] +; RV64-NEXT: [[TMP19:%.*]] = getelementptr float, ptr [[TMP18]], i64 [[TMP17]] +; RV64-NEXT: [[VP_REVERSE1:%.*]] = call @llvm.experimental.vp.reverse.nxv4f32( [[TMP14]], splat (i1 true), i32 [[TMP0]]) +; RV64-NEXT: call void @llvm.vp.store.nxv4f32.p0( [[VP_REVERSE1]], ptr align 4 [[TMP19]], splat (i1 true), i32 [[TMP0]]) +; RV64-NEXT: [[INDEX_EVL_NEXT]] = add nuw i64 [[TMP5]], [[INDEX]] +; RV64-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP5]] +; RV64-NEXT: [[TMP20:%.*]] = icmp eq i64 [[INDEX_EVL_NEXT]], 1023 +; RV64-NEXT: br i1 [[TMP20]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] +; RV64: [[MIDDLE_BLOCK]]: +; RV64-NEXT: br label %[[EXIT:.*]] +; RV64: [[SCALAR_PH]]: +; RV64-NEXT: br label %[[FOR_BODY:.*]] +; RV64: [[FOR_BODY]]: +; RV64-NEXT: [[DEC_IV:%.*]] = phi i64 [ 1023, %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[FOR_BODY]] ] +; RV64-NEXT: [[IV_NEXT]] = add nsw i64 [[DEC_IV]], -1 +; RV64-NEXT: [[ARRAYIDX_B:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[IV_NEXT]] +; RV64-NEXT: [[TMP21:%.*]] = load float, ptr [[ARRAYIDX_B]], align 4 +; RV64-NEXT: [[FADD:%.*]] = fadd float [[TMP21]], 1.000000e+00 +; RV64-NEXT: [[ARRAYIDX_A:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[IV_NEXT]] +; RV64-NEXT: store float [[FADD]], ptr [[ARRAYIDX_A]], align 4 +; RV64-NEXT: [[CMP:%.*]] = icmp ugt i64 [[DEC_IV]], 1 +; RV64-NEXT: br i1 [[CMP]], label %[[FOR_BODY]], label %[[EXIT]], !llvm.loop [[LOOP6:![0-9]+]] +; RV64: [[EXIT]]: +; RV64-NEXT: ret void +; +; RV32-LABEL: define void @vector_reverse_f32( +; RV32-SAME: ptr noalias [[A:%.*]], ptr noalias [[B:%.*]]) #[[ATTR0]] { +; RV32-NEXT: [[ENTRY:.*:]] +; RV32-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] +; RV32: [[VECTOR_PH]]: +; RV32-NEXT: br label %[[VECTOR_BODY:.*]] +; RV32: [[VECTOR_BODY]]: +; RV32-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], %[[VECTOR_BODY]] ] +; RV32-NEXT: [[AVL:%.*]] = phi i64 [ 1023, %[[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], %[[VECTOR_BODY]] ] +; RV32-NEXT: [[TMP10:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 4, i1 true) +; RV32-NEXT: [[OFFSET_IDX:%.*]] = sub i64 1023, [[INDEX]] +; RV32-NEXT: [[TMP8:%.*]] = add nsw i64 [[OFFSET_IDX]], -1 +; RV32-NEXT: [[TMP9:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[TMP8]] +; RV32-NEXT: [[TMP11:%.*]] = mul i32 0, [[TMP10]] +; RV32-NEXT: [[TMP24:%.*]] = sub i32 [[TMP10]], 1 +; RV32-NEXT: [[TMP12:%.*]] = mul i32 -1, [[TMP24]] +; RV32-NEXT: [[TMP6:%.*]] = getelementptr float, ptr [[TMP9]], i32 [[TMP11]] +; RV32-NEXT: [[TMP7:%.*]] = getelementptr float, ptr [[TMP6]], i32 [[TMP12]] +; RV32-NEXT: [[VP_OP_LOAD:%.*]] = call @llvm.vp.load.nxv4f32.p0(ptr align 4 [[TMP7]], splat (i1 true), i32 [[TMP10]]) +; RV32-NEXT: [[REVERSE:%.*]] = call @llvm.experimental.vp.reverse.nxv4f32( [[VP_OP_LOAD]], splat (i1 true), i32 [[TMP10]]) +; RV32-NEXT: [[TMP15:%.*]] = fadd [[REVERSE]], splat (float 1.000000e+00) +; RV32-NEXT: [[TMP16:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP8]] +; RV32-NEXT: [[TMP18:%.*]] = mul i32 0, [[TMP10]] +; RV32-NEXT: [[TMP25:%.*]] = sub i32 [[TMP10]], 1 +; RV32-NEXT: [[TMP19:%.*]] = mul i32 -1, [[TMP25]] +; RV32-NEXT: [[TMP13:%.*]] = getelementptr float, ptr [[TMP16]], i32 [[TMP18]] +; RV32-NEXT: [[TMP14:%.*]] = getelementptr float, ptr [[TMP13]], i32 [[TMP19]] +; RV32-NEXT: [[VP_REVERSE1:%.*]] = call @llvm.experimental.vp.reverse.nxv4f32( [[TMP15]], splat (i1 true), i32 [[TMP10]]) +; RV32-NEXT: call void @llvm.vp.store.nxv4f32.p0( [[VP_REVERSE1]], ptr align 4 [[TMP14]], splat (i1 true), i32 [[TMP10]]) +; RV32-NEXT: [[TMP17:%.*]] = zext i32 [[TMP10]] to i64 +; RV32-NEXT: [[INDEX_EVL_NEXT]] = add nuw i64 [[TMP17]], [[INDEX]] +; RV32-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP17]] +; RV32-NEXT: [[TMP20:%.*]] = icmp eq i64 [[INDEX_EVL_NEXT]], 1023 +; RV32-NEXT: br i1 [[TMP20]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] +; RV32: [[MIDDLE_BLOCK]]: +; RV32-NEXT: br label %[[EXIT:.*]] +; RV32: [[SCALAR_PH]]: +; RV32-NEXT: br label %[[FOR_BODY:.*]] +; RV32: [[FOR_BODY]]: +; RV32-NEXT: [[DEC_IV:%.*]] = phi i64 [ 1023, %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[FOR_BODY]] ] +; RV32-NEXT: [[IV_NEXT]] = add nsw i64 [[DEC_IV]], -1 +; RV32-NEXT: [[ARRAYIDX_B:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[IV_NEXT]] +; RV32-NEXT: [[TMP23:%.*]] = load float, ptr [[ARRAYIDX_B]], align 4 +; RV32-NEXT: [[FADD:%.*]] = fadd float [[TMP23]], 1.000000e+00 +; RV32-NEXT: [[ARRAYIDX_A:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[IV_NEXT]] +; RV32-NEXT: store float [[FADD]], ptr [[ARRAYIDX_A]], align 4 +; RV32-NEXT: [[CMP:%.*]] = icmp ugt i64 [[DEC_IV]], 1 +; RV32-NEXT: br i1 [[CMP]], label %[[FOR_BODY]], label %[[EXIT]], !llvm.loop [[LOOP6:![0-9]+]] +; RV32: [[EXIT]]: +; RV32-NEXT: ret void +; +; RV64-UF2-LABEL: define void @vector_reverse_f32( +; RV64-UF2-SAME: ptr noalias [[A:%.*]], ptr noalias [[B:%.*]]) #[[ATTR0]] { +; RV64-UF2-NEXT: [[ENTRY:.*]]: +; RV64-UF2-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() +; RV64-UF2-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 3 +; RV64-UF2-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 1023, [[TMP1]] +; RV64-UF2-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] +; RV64-UF2: [[VECTOR_PH]]: +; RV64-UF2-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() +; RV64-UF2-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP4]], 4 +; RV64-UF2-NEXT: [[TMP6:%.*]] = mul i64 [[TMP5]], 2 +; RV64-UF2-NEXT: [[N_VEC:%.*]] = urem i64 1023, [[TMP6]] +; RV64-UF2-NEXT: [[TMP7:%.*]] = sub i64 1023, [[N_VEC]] +; RV64-UF2-NEXT: [[TMP8:%.*]] = sub i64 1023, [[TMP7]] +; RV64-UF2-NEXT: br label %[[VECTOR_BODY:.*]] +; RV64-UF2: [[VECTOR_BODY]]: +; RV64-UF2-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] +; RV64-UF2-NEXT: [[OFFSET_IDX:%.*]] = sub i64 1023, [[INDEX]] +; RV64-UF2-NEXT: [[TMP9:%.*]] = add nsw i64 [[OFFSET_IDX]], -1 +; RV64-UF2-NEXT: [[TMP10:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[TMP9]] +; RV64-UF2-NEXT: [[TMP11:%.*]] = mul i64 0, [[TMP5]] +; RV64-UF2-NEXT: [[TMP32:%.*]] = sub i64 [[TMP5]], 1 +; RV64-UF2-NEXT: [[TMP12:%.*]] = mul i64 -1, [[TMP32]] +; RV64-UF2-NEXT: [[TMP13:%.*]] = getelementptr inbounds float, ptr [[TMP10]], i64 [[TMP11]] +; RV64-UF2-NEXT: [[TMP14:%.*]] = getelementptr inbounds float, ptr [[TMP13]], i64 [[TMP12]] +; RV64-UF2-NEXT: [[TMP15:%.*]] = mul i64 -1, [[TMP5]] +; RV64-UF2-NEXT: [[TMP33:%.*]] = sub i64 [[TMP5]], 1 +; RV64-UF2-NEXT: [[TMP16:%.*]] = mul i64 -1, [[TMP33]] +; RV64-UF2-NEXT: [[TMP17:%.*]] = getelementptr inbounds float, ptr [[TMP10]], i64 [[TMP15]] +; RV64-UF2-NEXT: [[TMP18:%.*]] = getelementptr inbounds float, ptr [[TMP17]], i64 [[TMP16]] +; RV64-UF2-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP14]], align 4 +; RV64-UF2-NEXT: [[REVERSE:%.*]] = call @llvm.vector.reverse.nxv4f32( [[WIDE_LOAD]]) +; RV64-UF2-NEXT: [[WIDE_LOAD1:%.*]] = load , ptr [[TMP18]], align 4 +; RV64-UF2-NEXT: [[REVERSE2:%.*]] = call @llvm.vector.reverse.nxv4f32( [[WIDE_LOAD1]]) +; RV64-UF2-NEXT: [[TMP19:%.*]] = fadd [[REVERSE]], splat (float 1.000000e+00) +; RV64-UF2-NEXT: [[TMP20:%.*]] = fadd [[REVERSE2]], splat (float 1.000000e+00) +; RV64-UF2-NEXT: [[TMP21:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP9]] +; RV64-UF2-NEXT: [[TMP22:%.*]] = mul i64 0, [[TMP5]] +; RV64-UF2-NEXT: [[TMP34:%.*]] = sub i64 [[TMP5]], 1 +; RV64-UF2-NEXT: [[TMP23:%.*]] = mul i64 -1, [[TMP34]] +; RV64-UF2-NEXT: [[TMP24:%.*]] = getelementptr inbounds float, ptr [[TMP21]], i64 [[TMP22]] +; RV64-UF2-NEXT: [[TMP25:%.*]] = getelementptr inbounds float, ptr [[TMP24]], i64 [[TMP23]] +; RV64-UF2-NEXT: [[TMP26:%.*]] = mul i64 -1, [[TMP5]] +; RV64-UF2-NEXT: [[TMP35:%.*]] = sub i64 [[TMP5]], 1 +; RV64-UF2-NEXT: [[TMP27:%.*]] = mul i64 -1, [[TMP35]] +; RV64-UF2-NEXT: [[TMP28:%.*]] = getelementptr inbounds float, ptr [[TMP21]], i64 [[TMP26]] +; RV64-UF2-NEXT: [[TMP29:%.*]] = getelementptr inbounds float, ptr [[TMP28]], i64 [[TMP27]] +; RV64-UF2-NEXT: [[REVERSE3:%.*]] = call @llvm.vector.reverse.nxv4f32( [[TMP19]]) +; RV64-UF2-NEXT: store [[REVERSE3]], ptr [[TMP25]], align 4 +; RV64-UF2-NEXT: [[REVERSE4:%.*]] = call @llvm.vector.reverse.nxv4f32( [[TMP20]]) +; RV64-UF2-NEXT: store [[REVERSE4]], ptr [[TMP29]], align 4 +; RV64-UF2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP6]] +; RV64-UF2-NEXT: [[TMP36:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[TMP7]] +; RV64-UF2-NEXT: br i1 [[TMP36]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] +; RV64-UF2: [[MIDDLE_BLOCK]]: +; RV64-UF2-NEXT: [[CMP_N:%.*]] = icmp eq i64 1023, [[TMP7]] +; RV64-UF2-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]] +; RV64-UF2: [[SCALAR_PH]]: +; RV64-UF2-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[TMP8]], %[[MIDDLE_BLOCK]] ], [ 1023, %[[ENTRY]] ] +; RV64-UF2-NEXT: br label %[[FOR_BODY:.*]] +; RV64-UF2: [[FOR_BODY]]: +; RV64-UF2-NEXT: [[DEC_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[FOR_BODY]] ] +; RV64-UF2-NEXT: [[IV_NEXT]] = add nsw i64 [[DEC_IV]], -1 +; RV64-UF2-NEXT: [[ARRAYIDX_B:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[IV_NEXT]] +; RV64-UF2-NEXT: [[TMP31:%.*]] = load float, ptr [[ARRAYIDX_B]], align 4 +; RV64-UF2-NEXT: [[FADD:%.*]] = fadd float [[TMP31]], 1.000000e+00 +; RV64-UF2-NEXT: [[ARRAYIDX_A:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[IV_NEXT]] +; RV64-UF2-NEXT: store float [[FADD]], ptr [[ARRAYIDX_A]], align 4 +; RV64-UF2-NEXT: [[CMP:%.*]] = icmp ugt i64 [[DEC_IV]], 1 +; RV64-UF2-NEXT: br i1 [[CMP]], label %[[FOR_BODY]], label %[[EXIT]], !llvm.loop [[LOOP5:![0-9]+]] +; RV64-UF2: [[EXIT]]: +; RV64-UF2-NEXT: ret void +; +entry: + br label %for.body + +for.body: + %dec.iv = phi i64 [ 1023, %entry ], [ %iv.next, %for.body ] + %iv.next = add nsw i64 %dec.iv, -1 + %arrayidx.b = getelementptr inbounds float, ptr %B, i64 %iv.next + %0 = load float, ptr %arrayidx.b, align 4 + %fadd = fadd float %0, 1.000000e+00 + %arrayidx.a = getelementptr inbounds float, ptr %A, i64 %iv.next + store float %fadd, ptr %arrayidx.a, align 4 + %cmp = icmp ugt i64 %dec.iv, 1 + br i1 %cmp, label %for.body, label %exit, !llvm.loop !0 + +exit: + ret void +} + +define void @vector_reverse_irregular_type(ptr noalias %A, ptr noalias %B) { +; RV64-LABEL: define void @vector_reverse_irregular_type( +; RV64-SAME: ptr noalias [[A:%.*]], ptr noalias [[B:%.*]]) #[[ATTR0]] { +; RV64-NEXT: [[ENTRY:.*]]: +; RV64-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] +; RV64: [[VECTOR_PH]]: +; RV64-NEXT: br label %[[VECTOR_BODY:.*]] +; RV64: [[VECTOR_BODY]]: +; RV64-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] +; RV64-NEXT: [[OFFSET_IDX:%.*]] = sub i64 1023, [[INDEX]] +; RV64-NEXT: [[DEC_IV:%.*]] = add i64 [[OFFSET_IDX]], 0 +; RV64-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], -1 +; RV64-NEXT: [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], -2 +; RV64-NEXT: [[TMP3:%.*]] = add i64 [[OFFSET_IDX]], -3 +; RV64-NEXT: [[IV_NEXT:%.*]] = add nsw i64 [[DEC_IV]], -1 +; RV64-NEXT: [[TMP5:%.*]] = add nsw i64 [[TMP1]], -1 +; RV64-NEXT: [[TMP6:%.*]] = add nsw i64 [[TMP2]], -1 +; RV64-NEXT: [[TMP7:%.*]] = add nsw i64 [[TMP3]], -1 +; RV64-NEXT: [[ARRAYIDX_B:%.*]] = getelementptr inbounds i7, ptr [[B]], i64 [[IV_NEXT]] +; RV64-NEXT: [[TMP9:%.*]] = getelementptr inbounds i7, ptr [[B]], i64 [[TMP5]] +; RV64-NEXT: [[TMP10:%.*]] = getelementptr inbounds i7, ptr [[B]], i64 [[TMP6]] +; RV64-NEXT: [[TMP11:%.*]] = getelementptr inbounds i7, ptr [[B]], i64 [[TMP7]] +; RV64-NEXT: [[TMP0:%.*]] = load i7, ptr [[ARRAYIDX_B]], align 1 +; RV64-NEXT: [[TMP13:%.*]] = load i7, ptr [[TMP9]], align 1 +; RV64-NEXT: [[TMP14:%.*]] = load i7, ptr [[TMP10]], align 1 +; RV64-NEXT: [[TMP15:%.*]] = load i7, ptr [[TMP11]], align 1 +; RV64-NEXT: [[TMP16:%.*]] = insertelement <4 x i7> poison, i7 [[TMP0]], i32 0 +; RV64-NEXT: [[TMP17:%.*]] = insertelement <4 x i7> [[TMP16]], i7 [[TMP13]], i32 1 +; RV64-NEXT: [[TMP18:%.*]] = insertelement <4 x i7> [[TMP17]], i7 [[TMP14]], i32 2 +; RV64-NEXT: [[TMP19:%.*]] = insertelement <4 x i7> [[TMP18]], i7 [[TMP15]], i32 3 +; RV64-NEXT: [[TMP20:%.*]] = add <4 x i7> [[TMP19]], splat (i7 1) +; RV64-NEXT: [[TMP21:%.*]] = getelementptr inbounds i7, ptr [[A]], i64 [[IV_NEXT]] +; RV64-NEXT: [[TMP22:%.*]] = getelementptr inbounds i7, ptr [[A]], i64 [[TMP5]] +; RV64-NEXT: [[TMP23:%.*]] = getelementptr inbounds i7, ptr [[A]], i64 [[TMP6]] +; RV64-NEXT: [[TMP24:%.*]] = getelementptr inbounds i7, ptr [[A]], i64 [[TMP7]] +; RV64-NEXT: [[TMP25:%.*]] = extractelement <4 x i7> [[TMP20]], i32 0 +; RV64-NEXT: store i7 [[TMP25]], ptr [[TMP21]], align 1 +; RV64-NEXT: [[TMP26:%.*]] = extractelement <4 x i7> [[TMP20]], i32 1 +; RV64-NEXT: store i7 [[TMP26]], ptr [[TMP22]], align 1 +; RV64-NEXT: [[TMP27:%.*]] = extractelement <4 x i7> [[TMP20]], i32 2 +; RV64-NEXT: store i7 [[TMP27]], ptr [[TMP23]], align 1 +; RV64-NEXT: [[TMP28:%.*]] = extractelement <4 x i7> [[TMP20]], i32 3 +; RV64-NEXT: store i7 [[TMP28]], ptr [[TMP24]], align 1 +; RV64-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 +; RV64-NEXT: [[TMP29:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1020 +; RV64-NEXT: br i1 [[TMP29]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]] +; RV64: [[MIDDLE_BLOCK]]: +; RV64-NEXT: br label %[[SCALAR_PH]] +; RV64: [[SCALAR_PH]]: +; RV64-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 3, %[[MIDDLE_BLOCK]] ], [ 1023, %[[ENTRY]] ] +; RV64-NEXT: br label %[[FOR_BODY:.*]] +; RV64: [[FOR_BODY]]: +; RV64-NEXT: [[DEC_IV1:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT1:%.*]], %[[FOR_BODY]] ] +; RV64-NEXT: [[IV_NEXT1]] = add nsw i64 [[DEC_IV1]], -1 +; RV64-NEXT: [[ARRAYIDX_B1:%.*]] = getelementptr inbounds i7, ptr [[B]], i64 [[IV_NEXT1]] +; RV64-NEXT: [[TMP30:%.*]] = load i7, ptr [[ARRAYIDX_B1]], align 1 +; RV64-NEXT: [[ADD:%.*]] = add i7 [[TMP30]], 1 +; RV64-NEXT: [[ARRAYIDX_A:%.*]] = getelementptr inbounds i7, ptr [[A]], i64 [[IV_NEXT1]] +; RV64-NEXT: store i7 [[ADD]], ptr [[ARRAYIDX_A]], align 1 +; RV64-NEXT: [[CMP:%.*]] = icmp ugt i64 [[DEC_IV1]], 1 +; RV64-NEXT: br i1 [[CMP]], label %[[FOR_BODY]], label %[[EXIT:.*]], !llvm.loop [[LOOP8:![0-9]+]] +; RV64: [[EXIT]]: +; RV64-NEXT: ret void +; +; RV32-LABEL: define void @vector_reverse_irregular_type( +; RV32-SAME: ptr noalias [[A:%.*]], ptr noalias [[B:%.*]]) #[[ATTR0]] { +; RV32-NEXT: [[ENTRY:.*]]: +; RV32-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] +; RV32: [[VECTOR_PH]]: +; RV32-NEXT: br label %[[VECTOR_BODY:.*]] +; RV32: [[VECTOR_BODY]]: +; RV32-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] +; RV32-NEXT: [[OFFSET_IDX:%.*]] = sub i64 1023, [[INDEX]] +; RV32-NEXT: [[DEC_IV:%.*]] = add i64 [[OFFSET_IDX]], 0 +; RV32-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], -1 +; RV32-NEXT: [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], -2 +; RV32-NEXT: [[TMP3:%.*]] = add i64 [[OFFSET_IDX]], -3 +; RV32-NEXT: [[IV_NEXT:%.*]] = add nsw i64 [[DEC_IV]], -1 +; RV32-NEXT: [[TMP5:%.*]] = add nsw i64 [[TMP1]], -1 +; RV32-NEXT: [[TMP6:%.*]] = add nsw i64 [[TMP2]], -1 +; RV32-NEXT: [[TMP7:%.*]] = add nsw i64 [[TMP3]], -1 +; RV32-NEXT: [[ARRAYIDX_B:%.*]] = getelementptr inbounds i7, ptr [[B]], i64 [[IV_NEXT]] +; RV32-NEXT: [[TMP9:%.*]] = getelementptr inbounds i7, ptr [[B]], i64 [[TMP5]] +; RV32-NEXT: [[TMP10:%.*]] = getelementptr inbounds i7, ptr [[B]], i64 [[TMP6]] +; RV32-NEXT: [[TMP11:%.*]] = getelementptr inbounds i7, ptr [[B]], i64 [[TMP7]] +; RV32-NEXT: [[TMP0:%.*]] = load i7, ptr [[ARRAYIDX_B]], align 1 +; RV32-NEXT: [[TMP13:%.*]] = load i7, ptr [[TMP9]], align 1 +; RV32-NEXT: [[TMP14:%.*]] = load i7, ptr [[TMP10]], align 1 +; RV32-NEXT: [[TMP15:%.*]] = load i7, ptr [[TMP11]], align 1 +; RV32-NEXT: [[TMP16:%.*]] = insertelement <4 x i7> poison, i7 [[TMP0]], i32 0 +; RV32-NEXT: [[TMP17:%.*]] = insertelement <4 x i7> [[TMP16]], i7 [[TMP13]], i32 1 +; RV32-NEXT: [[TMP18:%.*]] = insertelement <4 x i7> [[TMP17]], i7 [[TMP14]], i32 2 +; RV32-NEXT: [[TMP19:%.*]] = insertelement <4 x i7> [[TMP18]], i7 [[TMP15]], i32 3 +; RV32-NEXT: [[TMP20:%.*]] = add <4 x i7> [[TMP19]], splat (i7 1) +; RV32-NEXT: [[TMP21:%.*]] = getelementptr inbounds i7, ptr [[A]], i64 [[IV_NEXT]] +; RV32-NEXT: [[TMP22:%.*]] = getelementptr inbounds i7, ptr [[A]], i64 [[TMP5]] +; RV32-NEXT: [[TMP23:%.*]] = getelementptr inbounds i7, ptr [[A]], i64 [[TMP6]] +; RV32-NEXT: [[TMP24:%.*]] = getelementptr inbounds i7, ptr [[A]], i64 [[TMP7]] +; RV32-NEXT: [[TMP25:%.*]] = extractelement <4 x i7> [[TMP20]], i32 0 +; RV32-NEXT: store i7 [[TMP25]], ptr [[TMP21]], align 1 +; RV32-NEXT: [[TMP26:%.*]] = extractelement <4 x i7> [[TMP20]], i32 1 +; RV32-NEXT: store i7 [[TMP26]], ptr [[TMP22]], align 1 +; RV32-NEXT: [[TMP27:%.*]] = extractelement <4 x i7> [[TMP20]], i32 2 +; RV32-NEXT: store i7 [[TMP27]], ptr [[TMP23]], align 1 +; RV32-NEXT: [[TMP28:%.*]] = extractelement <4 x i7> [[TMP20]], i32 3 +; RV32-NEXT: store i7 [[TMP28]], ptr [[TMP24]], align 1 +; RV32-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 +; RV32-NEXT: [[TMP29:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1020 +; RV32-NEXT: br i1 [[TMP29]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]] +; RV32: [[MIDDLE_BLOCK]]: +; RV32-NEXT: br label %[[SCALAR_PH]] +; RV32: [[SCALAR_PH]]: +; RV32-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 3, %[[MIDDLE_BLOCK]] ], [ 1023, %[[ENTRY]] ] +; RV32-NEXT: br label %[[FOR_BODY:.*]] +; RV32: [[FOR_BODY]]: +; RV32-NEXT: [[DEC_IV1:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT1:%.*]], %[[FOR_BODY]] ] +; RV32-NEXT: [[IV_NEXT1]] = add nsw i64 [[DEC_IV1]], -1 +; RV32-NEXT: [[ARRAYIDX_B1:%.*]] = getelementptr inbounds i7, ptr [[B]], i64 [[IV_NEXT1]] +; RV32-NEXT: [[TMP30:%.*]] = load i7, ptr [[ARRAYIDX_B1]], align 1 +; RV32-NEXT: [[ADD:%.*]] = add i7 [[TMP30]], 1 +; RV32-NEXT: [[ARRAYIDX_A:%.*]] = getelementptr inbounds i7, ptr [[A]], i64 [[IV_NEXT1]] +; RV32-NEXT: store i7 [[ADD]], ptr [[ARRAYIDX_A]], align 1 +; RV32-NEXT: [[CMP:%.*]] = icmp ugt i64 [[DEC_IV1]], 1 +; RV32-NEXT: br i1 [[CMP]], label %[[FOR_BODY]], label %[[EXIT:.*]], !llvm.loop [[LOOP8:![0-9]+]] +; RV32: [[EXIT]]: +; RV32-NEXT: ret void +; +; RV64-UF2-LABEL: define void @vector_reverse_irregular_type( +; RV64-UF2-SAME: ptr noalias [[A:%.*]], ptr noalias [[B:%.*]]) #[[ATTR0]] { +; RV64-UF2-NEXT: [[ENTRY:.*]]: +; RV64-UF2-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] +; RV64-UF2: [[VECTOR_PH]]: +; RV64-UF2-NEXT: br label %[[VECTOR_BODY:.*]] +; RV64-UF2: [[VECTOR_BODY]]: +; RV64-UF2-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] +; RV64-UF2-NEXT: [[OFFSET_IDX:%.*]] = sub i64 1023, [[INDEX]] +; RV64-UF2-NEXT: [[TMP16:%.*]] = add i64 [[OFFSET_IDX]], 0 +; RV64-UF2-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], -1 +; RV64-UF2-NEXT: [[TMP17:%.*]] = add i64 [[OFFSET_IDX]], -2 +; RV64-UF2-NEXT: [[TMP24:%.*]] = add i64 [[OFFSET_IDX]], -3 +; RV64-UF2-NEXT: [[TMP25:%.*]] = add i64 [[OFFSET_IDX]], -4 +; RV64-UF2-NEXT: [[TMP42:%.*]] = add i64 [[OFFSET_IDX]], -5 +; RV64-UF2-NEXT: [[TMP43:%.*]] = add i64 [[OFFSET_IDX]], -6 +; RV64-UF2-NEXT: [[TMP50:%.*]] = add i64 [[OFFSET_IDX]], -7 +; RV64-UF2-NEXT: [[TMP1:%.*]] = add nsw i64 [[TMP16]], -1 +; RV64-UF2-NEXT: [[TMP2:%.*]] = add nsw i64 [[TMP0]], -1 +; RV64-UF2-NEXT: [[TMP51:%.*]] = add nsw i64 [[TMP17]], -1 +; RV64-UF2-NEXT: [[TMP11:%.*]] = add nsw i64 [[TMP24]], -1 +; RV64-UF2-NEXT: [[TMP59:%.*]] = add nsw i64 [[TMP25]], -1 +; RV64-UF2-NEXT: [[TMP13:%.*]] = add nsw i64 [[TMP42]], -1 +; RV64-UF2-NEXT: [[TMP14:%.*]] = add nsw i64 [[TMP43]], -1 +; RV64-UF2-NEXT: [[TMP15:%.*]] = add nsw i64 [[TMP50]], -1 +; RV64-UF2-NEXT: [[TMP3:%.*]] = getelementptr inbounds i7, ptr [[B]], i64 [[TMP1]] +; RV64-UF2-NEXT: [[TMP4:%.*]] = getelementptr inbounds i7, ptr [[B]], i64 [[TMP2]] +; RV64-UF2-NEXT: [[TMP18:%.*]] = getelementptr inbounds i7, ptr [[B]], i64 [[TMP51]] +; RV64-UF2-NEXT: [[TMP19:%.*]] = getelementptr inbounds i7, ptr [[B]], i64 [[TMP11]] +; RV64-UF2-NEXT: [[TMP20:%.*]] = getelementptr inbounds i7, ptr [[B]], i64 [[TMP59]] +; RV64-UF2-NEXT: [[TMP21:%.*]] = getelementptr inbounds i7, ptr [[B]], i64 [[TMP13]] +; RV64-UF2-NEXT: [[TMP22:%.*]] = getelementptr inbounds i7, ptr [[B]], i64 [[TMP14]] +; RV64-UF2-NEXT: [[TMP23:%.*]] = getelementptr inbounds i7, ptr [[B]], i64 [[TMP15]] +; RV64-UF2-NEXT: [[TMP5:%.*]] = load i7, ptr [[TMP3]], align 1 +; RV64-UF2-NEXT: [[TMP6:%.*]] = load i7, ptr [[TMP4]], align 1 +; RV64-UF2-NEXT: [[TMP26:%.*]] = load i7, ptr [[TMP18]], align 1 +; RV64-UF2-NEXT: [[TMP27:%.*]] = load i7, ptr [[TMP19]], align 1 +; RV64-UF2-NEXT: [[TMP28:%.*]] = insertelement <4 x i7> poison, i7 [[TMP5]], i32 0 +; RV64-UF2-NEXT: [[TMP29:%.*]] = insertelement <4 x i7> [[TMP28]], i7 [[TMP6]], i32 1 +; RV64-UF2-NEXT: [[TMP30:%.*]] = insertelement <4 x i7> [[TMP29]], i7 [[TMP26]], i32 2 +; RV64-UF2-NEXT: [[TMP31:%.*]] = insertelement <4 x i7> [[TMP30]], i7 [[TMP27]], i32 3 +; RV64-UF2-NEXT: [[TMP32:%.*]] = load i7, ptr [[TMP20]], align 1 +; RV64-UF2-NEXT: [[TMP33:%.*]] = load i7, ptr [[TMP21]], align 1 +; RV64-UF2-NEXT: [[TMP34:%.*]] = load i7, ptr [[TMP22]], align 1 +; RV64-UF2-NEXT: [[TMP35:%.*]] = load i7, ptr [[TMP23]], align 1 +; RV64-UF2-NEXT: [[TMP36:%.*]] = insertelement <4 x i7> poison, i7 [[TMP32]], i32 0 +; RV64-UF2-NEXT: [[TMP37:%.*]] = insertelement <4 x i7> [[TMP36]], i7 [[TMP33]], i32 1 +; RV64-UF2-NEXT: [[TMP38:%.*]] = insertelement <4 x i7> [[TMP37]], i7 [[TMP34]], i32 2 +; RV64-UF2-NEXT: [[TMP39:%.*]] = insertelement <4 x i7> [[TMP38]], i7 [[TMP35]], i32 3 +; RV64-UF2-NEXT: [[TMP40:%.*]] = add <4 x i7> [[TMP31]], splat (i7 1) +; RV64-UF2-NEXT: [[TMP41:%.*]] = add <4 x i7> [[TMP39]], splat (i7 1) +; RV64-UF2-NEXT: [[TMP9:%.*]] = getelementptr inbounds i7, ptr [[A]], i64 [[TMP1]] +; RV64-UF2-NEXT: [[TMP10:%.*]] = getelementptr inbounds i7, ptr [[A]], i64 [[TMP2]] +; RV64-UF2-NEXT: [[TMP44:%.*]] = getelementptr inbounds i7, ptr [[A]], i64 [[TMP51]] +; RV64-UF2-NEXT: [[TMP45:%.*]] = getelementptr inbounds i7, ptr [[A]], i64 [[TMP11]] +; RV64-UF2-NEXT: [[TMP46:%.*]] = getelementptr inbounds i7, ptr [[A]], i64 [[TMP59]] +; RV64-UF2-NEXT: [[TMP47:%.*]] = getelementptr inbounds i7, ptr [[A]], i64 [[TMP13]] +; RV64-UF2-NEXT: [[TMP48:%.*]] = getelementptr inbounds i7, ptr [[A]], i64 [[TMP14]] +; RV64-UF2-NEXT: [[TMP49:%.*]] = getelementptr inbounds i7, ptr [[A]], i64 [[TMP15]] +; RV64-UF2-NEXT: [[TMP7:%.*]] = extractelement <4 x i7> [[TMP40]], i32 0 +; RV64-UF2-NEXT: store i7 [[TMP7]], ptr [[TMP9]], align 1 +; RV64-UF2-NEXT: [[TMP8:%.*]] = extractelement <4 x i7> [[TMP40]], i32 1 +; RV64-UF2-NEXT: store i7 [[TMP8]], ptr [[TMP10]], align 1 +; RV64-UF2-NEXT: [[TMP52:%.*]] = extractelement <4 x i7> [[TMP40]], i32 2 +; RV64-UF2-NEXT: store i7 [[TMP52]], ptr [[TMP44]], align 1 +; RV64-UF2-NEXT: [[TMP53:%.*]] = extractelement <4 x i7> [[TMP40]], i32 3 +; RV64-UF2-NEXT: store i7 [[TMP53]], ptr [[TMP45]], align 1 +; RV64-UF2-NEXT: [[TMP54:%.*]] = extractelement <4 x i7> [[TMP41]], i32 0 +; RV64-UF2-NEXT: store i7 [[TMP54]], ptr [[TMP46]], align 1 +; RV64-UF2-NEXT: [[TMP55:%.*]] = extractelement <4 x i7> [[TMP41]], i32 1 +; RV64-UF2-NEXT: store i7 [[TMP55]], ptr [[TMP47]], align 1 +; RV64-UF2-NEXT: [[TMP56:%.*]] = extractelement <4 x i7> [[TMP41]], i32 2 +; RV64-UF2-NEXT: store i7 [[TMP56]], ptr [[TMP48]], align 1 +; RV64-UF2-NEXT: [[TMP57:%.*]] = extractelement <4 x i7> [[TMP41]], i32 3 +; RV64-UF2-NEXT: store i7 [[TMP57]], ptr [[TMP49]], align 1 +; RV64-UF2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 +; RV64-UF2-NEXT: [[TMP58:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1016 +; RV64-UF2-NEXT: br i1 [[TMP58]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] +; RV64-UF2: [[MIDDLE_BLOCK]]: +; RV64-UF2-NEXT: br label %[[SCALAR_PH]] +; RV64-UF2: [[SCALAR_PH]]: +; RV64-UF2-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 7, %[[MIDDLE_BLOCK]] ], [ 1023, %[[ENTRY]] ] +; RV64-UF2-NEXT: br label %[[FOR_BODY:.*]] +; RV64-UF2: [[FOR_BODY]]: +; RV64-UF2-NEXT: [[DEC_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[FOR_BODY]] ] +; RV64-UF2-NEXT: [[IV_NEXT]] = add nsw i64 [[DEC_IV]], -1 +; RV64-UF2-NEXT: [[ARRAYIDX_B:%.*]] = getelementptr inbounds i7, ptr [[B]], i64 [[IV_NEXT]] +; RV64-UF2-NEXT: [[TMP12:%.*]] = load i7, ptr [[ARRAYIDX_B]], align 1 +; RV64-UF2-NEXT: [[ADD:%.*]] = add i7 [[TMP12]], 1 +; RV64-UF2-NEXT: [[ARRAYIDX_A:%.*]] = getelementptr inbounds i7, ptr [[A]], i64 [[IV_NEXT]] +; RV64-UF2-NEXT: store i7 [[ADD]], ptr [[ARRAYIDX_A]], align 1 +; RV64-UF2-NEXT: [[CMP:%.*]] = icmp ugt i64 [[DEC_IV]], 1 +; RV64-UF2-NEXT: br i1 [[CMP]], label %[[FOR_BODY]], label %[[EXIT:.*]], !llvm.loop [[LOOP7:![0-9]+]] +; RV64-UF2: [[EXIT]]: +; RV64-UF2-NEXT: ret void +; +entry: + br label %for.body + +for.body: + %dec.iv = phi i64 [ 1023, %entry ], [ %iv.next, %for.body ] + %iv.next = add nsw i64 %dec.iv, -1 + %arrayidx.b = getelementptr inbounds i7, ptr %B, i64 %iv.next + %0 = load i7, ptr %arrayidx.b, align 1 + %add = add i7 %0, 1 + %arrayidx.a = getelementptr inbounds i7, ptr %A, i64 %iv.next + store i7 %add, ptr %arrayidx.a, align 1 + %cmp = icmp ugt i64 %dec.iv, 1 + br i1 %cmp, label %for.body, label %exit, !llvm.loop !4 + +exit: + ret void +} + +!0 = distinct !{!0, !1, !2, !3} +!1 = !{!"llvm.loop.vectorize.width", i32 4} +!2 = !{!"llvm.loop.vectorize.scalable.enable", i1 true} +!3 = !{!"llvm.loop.vectorize.enable", i1 true} +!4 = distinct !{!4, !1, !3} diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/riscv-vector-reverse.ll b/llvm/test/Transforms/LoopVectorize/RISCV/riscv-vector-reverse.ll index e49a71eb5b930..be69b714b5919 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/riscv-vector-reverse.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/riscv-vector-reverse.ll @@ -39,17 +39,15 @@ define void @vector_reverse_i32(ptr noalias %A, ptr noalias %B) { ; RV64-NEXT: [[REVERSE:%.*]] = call @llvm.experimental.vp.reverse.nxv4i32( [[VP_OP_LOAD]], splat (i1 true), i32 [[TMP19]]) ; RV64-NEXT: [[TMP14:%.*]] = add [[REVERSE]], splat (i32 1) ; RV64-NEXT: [[TMP15:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP7]] -; RV64-NEXT: [[TMP16:%.*]] = zext i32 [[TMP19]] to i64 -; RV64-NEXT: [[TMP25:%.*]] = mul i64 0, [[TMP16]] -; RV64-NEXT: [[TMP17:%.*]] = sub i64 [[TMP16]], 1 +; RV64-NEXT: [[TMP25:%.*]] = mul i64 0, [[TMP24]] +; RV64-NEXT: [[TMP17:%.*]] = sub i64 [[TMP24]], 1 ; RV64-NEXT: [[TMP18:%.*]] = mul i64 -1, [[TMP17]] ; RV64-NEXT: [[TMP20:%.*]] = getelementptr i32, ptr [[TMP15]], i64 [[TMP25]] ; RV64-NEXT: [[TMP21:%.*]] = getelementptr i32, ptr [[TMP20]], i64 [[TMP18]] ; RV64-NEXT: [[VP_REVERSE1:%.*]] = call @llvm.experimental.vp.reverse.nxv4i32( [[TMP14]], splat (i1 true), i32 [[TMP19]]) ; RV64-NEXT: call void @llvm.vp.store.nxv4i32.p0( [[VP_REVERSE1]], ptr align 4 [[TMP21]], splat (i1 true), i32 [[TMP19]]) -; RV64-NEXT: [[TMP22:%.*]] = zext i32 [[TMP19]] to i64 -; RV64-NEXT: [[INDEX_EVL_NEXT]] = add nuw i64 [[TMP22]], [[INDEX]] -; RV64-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP22]] +; RV64-NEXT: [[INDEX_EVL_NEXT]] = add nuw i64 [[TMP24]], [[INDEX]] +; RV64-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP24]] ; RV64-NEXT: [[TMP23:%.*]] = icmp eq i64 [[INDEX_EVL_NEXT]], 1023 ; RV64-NEXT: br i1 [[TMP23]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; RV64: [[MIDDLE_BLOCK]]: @@ -230,17 +228,15 @@ define void @vector_reverse_i64(ptr nocapture noundef writeonly %A, ptr nocaptur ; RV64-NEXT: [[REVERSE:%.*]] = call @llvm.experimental.vp.reverse.nxv4i32( [[VP_OP_LOAD]], splat (i1 true), i32 [[TMP20]]) ; RV64-NEXT: [[TMP29:%.*]] = add [[REVERSE]], splat (i32 1) ; RV64-NEXT: [[TMP30:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP22]] -; RV64-NEXT: [[TMP39:%.*]] = zext i32 [[TMP20]] to i64 -; RV64-NEXT: [[TMP31:%.*]] = mul i64 0, [[TMP39]] -; RV64-NEXT: [[TMP32:%.*]] = sub i64 [[TMP39]], 1 +; RV64-NEXT: [[TMP31:%.*]] = mul i64 0, [[TMP24]] +; RV64-NEXT: [[TMP32:%.*]] = sub i64 [[TMP24]], 1 ; RV64-NEXT: [[TMP33:%.*]] = mul i64 -1, [[TMP32]] ; RV64-NEXT: [[TMP34:%.*]] = getelementptr i32, ptr [[TMP30]], i64 [[TMP31]] ; RV64-NEXT: [[TMP35:%.*]] = getelementptr i32, ptr [[TMP34]], i64 [[TMP33]] ; RV64-NEXT: [[VP_REVERSE3:%.*]] = call @llvm.experimental.vp.reverse.nxv4i32( [[TMP29]], splat (i1 true), i32 [[TMP20]]) ; RV64-NEXT: call void @llvm.vp.store.nxv4i32.p0( [[VP_REVERSE3]], ptr align 4 [[TMP35]], splat (i1 true), i32 [[TMP20]]) -; RV64-NEXT: [[TMP36:%.*]] = zext i32 [[TMP20]] to i64 -; RV64-NEXT: [[INDEX_EVL_NEXT]] = add nuw i64 [[TMP36]], [[INDEX]] -; RV64-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP36]] +; RV64-NEXT: [[INDEX_EVL_NEXT]] = add nuw i64 [[TMP24]], [[INDEX]] +; RV64-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP24]] ; RV64-NEXT: [[TMP37:%.*]] = icmp eq i64 [[INDEX_EVL_NEXT]], [[TMP0]] ; RV64-NEXT: br i1 [[TMP37]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] ; RV64: [[MIDDLE_BLOCK]]: @@ -491,17 +487,15 @@ define void @vector_reverse_f32(ptr nocapture noundef writeonly %A, ptr nocaptur ; RV64-NEXT: [[REVERSE:%.*]] = call @llvm.experimental.vp.reverse.nxv4f32( [[VP_OP_LOAD]], splat (i1 true), i32 [[TMP20]]) ; RV64-NEXT: [[TMP29:%.*]] = fadd [[REVERSE]], splat (float 1.000000e+00) ; RV64-NEXT: [[TMP30:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP22]] -; RV64-NEXT: [[TMP39:%.*]] = zext i32 [[TMP20]] to i64 -; RV64-NEXT: [[TMP31:%.*]] = mul i64 0, [[TMP39]] -; RV64-NEXT: [[TMP32:%.*]] = sub i64 [[TMP39]], 1 +; RV64-NEXT: [[TMP31:%.*]] = mul i64 0, [[TMP24]] +; RV64-NEXT: [[TMP32:%.*]] = sub i64 [[TMP24]], 1 ; RV64-NEXT: [[TMP33:%.*]] = mul i64 -1, [[TMP32]] ; RV64-NEXT: [[TMP34:%.*]] = getelementptr float, ptr [[TMP30]], i64 [[TMP31]] ; RV64-NEXT: [[TMP35:%.*]] = getelementptr float, ptr [[TMP34]], i64 [[TMP33]] ; RV64-NEXT: [[VP_REVERSE3:%.*]] = call @llvm.experimental.vp.reverse.nxv4f32( [[TMP29]], splat (i1 true), i32 [[TMP20]]) ; RV64-NEXT: call void @llvm.vp.store.nxv4f32.p0( [[VP_REVERSE3]], ptr align 4 [[TMP35]], splat (i1 true), i32 [[TMP20]]) -; RV64-NEXT: [[TMP36:%.*]] = zext i32 [[TMP20]] to i64 -; RV64-NEXT: [[INDEX_EVL_NEXT]] = add nuw i64 [[TMP36]], [[INDEX]] -; RV64-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP36]] +; RV64-NEXT: [[INDEX_EVL_NEXT]] = add nuw i64 [[TMP24]], [[INDEX]] +; RV64-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP24]] ; RV64-NEXT: [[TMP37:%.*]] = icmp eq i64 [[INDEX_EVL_NEXT]], [[TMP0]] ; RV64-NEXT: br i1 [[TMP37]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]] ; RV64: [[MIDDLE_BLOCK]]: @@ -724,17 +718,15 @@ define void @vector_reverse_f32_simplify(ptr noalias %A, ptr noalias %B) { ; RV64-NEXT: [[REVERSE:%.*]] = call @llvm.experimental.vp.reverse.nxv4f32( [[VP_OP_LOAD]], splat (i1 true), i32 [[TMP19]]) ; RV64-NEXT: [[TMP14:%.*]] = fadd [[REVERSE]], splat (float 1.000000e+00) ; RV64-NEXT: [[TMP15:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP7]] -; RV64-NEXT: [[TMP16:%.*]] = zext i32 [[TMP19]] to i64 -; RV64-NEXT: [[TMP25:%.*]] = mul i64 0, [[TMP16]] -; RV64-NEXT: [[TMP17:%.*]] = sub i64 [[TMP16]], 1 +; RV64-NEXT: [[TMP25:%.*]] = mul i64 0, [[TMP24]] +; RV64-NEXT: [[TMP17:%.*]] = sub i64 [[TMP24]], 1 ; RV64-NEXT: [[TMP18:%.*]] = mul i64 -1, [[TMP17]] ; RV64-NEXT: [[TMP20:%.*]] = getelementptr float, ptr [[TMP15]], i64 [[TMP25]] ; RV64-NEXT: [[TMP21:%.*]] = getelementptr float, ptr [[TMP20]], i64 [[TMP18]] ; RV64-NEXT: [[VP_REVERSE1:%.*]] = call @llvm.experimental.vp.reverse.nxv4f32( [[TMP14]], splat (i1 true), i32 [[TMP19]]) ; RV64-NEXT: call void @llvm.vp.store.nxv4f32.p0( [[VP_REVERSE1]], ptr align 4 [[TMP21]], splat (i1 true), i32 [[TMP19]]) -; RV64-NEXT: [[TMP22:%.*]] = zext i32 [[TMP19]] to i64 -; RV64-NEXT: [[INDEX_EVL_NEXT]] = add nuw i64 [[TMP22]], [[INDEX]] -; RV64-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP22]] +; RV64-NEXT: [[INDEX_EVL_NEXT]] = add nuw i64 [[TMP24]], [[INDEX]] +; RV64-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP24]] ; RV64-NEXT: [[TMP23:%.*]] = icmp eq i64 [[INDEX_EVL_NEXT]], 1023 ; RV64-NEXT: br i1 [[TMP23]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]] ; RV64: [[MIDDLE_BLOCK]]: diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/strided-accesses.ll b/llvm/test/Transforms/LoopVectorize/RISCV/strided-accesses.ll index 47979d358690a..58e09c2285307 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/strided-accesses.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/strided-accesses.ll @@ -25,9 +25,8 @@ define void @single_constant_stride_int_scaled(ptr %p) { ; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call @llvm.vp.gather.nxv4i32.nxv4p0( align 4 [[TMP15]], splat (i1 true), i32 [[TMP11]]) ; CHECK-NEXT: [[TMP16:%.*]] = add [[WIDE_MASKED_GATHER]], splat (i32 1) ; CHECK-NEXT: call void @llvm.vp.scatter.nxv4i32.nxv4p0( [[TMP16]], align 4 [[TMP15]], splat (i1 true), i32 [[TMP11]]) -; CHECK-NEXT: [[TMP13:%.*]] = zext i32 [[TMP11]] to i64 -; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[TMP13]], [[INDEX]] -; CHECK-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP13]] +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[TMP12]], [[INDEX]] +; CHECK-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP12]] ; CHECK-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[DOTSPLAT]] ; CHECK-NEXT: [[TMP17:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024 ; CHECK-NEXT: br i1 [[TMP17]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] @@ -88,9 +87,8 @@ define void @single_constant_stride_int_iv(ptr %p) { ; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call @llvm.vp.gather.nxv4i32.nxv4p0( align 4 [[TMP12]], splat (i1 true), i32 [[TMP7]]) ; CHECK-NEXT: [[TMP13:%.*]] = add [[WIDE_MASKED_GATHER]], splat (i32 1) ; CHECK-NEXT: call void @llvm.vp.scatter.nxv4i32.nxv4p0( [[TMP13]], align 4 [[TMP12]], splat (i1 true), i32 [[TMP7]]) -; CHECK-NEXT: [[TMP14:%.*]] = zext i32 [[TMP7]] to i64 -; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[TMP14]], [[INDEX]] -; CHECK-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP14]] +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[TMP11]], [[INDEX]] +; CHECK-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP11]] ; CHECK-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[DOTSPLAT]] ; CHECK-NEXT: [[TMP15:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024 ; CHECK-NEXT: br i1 [[TMP15]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] @@ -152,8 +150,7 @@ define void @single_constant_stride_ptr_iv(ptr %p) { ; CHECK-NEXT: [[TMP9:%.*]] = zext i32 [[TMP11]] to i64 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[TMP9]], [[INDEX]] ; CHECK-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP9]] -; CHECK-NEXT: [[TMP10:%.*]] = zext i32 [[TMP11]] to i64 -; CHECK-NEXT: [[TMP12:%.*]] = mul i64 8, [[TMP10]] +; CHECK-NEXT: [[TMP12:%.*]] = mul i64 8, [[TMP9]] ; CHECK-NEXT: [[PTR_IND]] = getelementptr i8, ptr [[POINTER_PHI]], i64 [[TMP12]] ; CHECK-NEXT: [[TMP13:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024 ; CHECK-NEXT: br i1 [[TMP13]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]] @@ -503,9 +500,8 @@ define void @double_stride_int_scaled(ptr %p, ptr %p2, i64 %stride) { ; STRIDED-NEXT: [[TMP20:%.*]] = add [[WIDE_MASKED_GATHER]], splat (i32 1) ; STRIDED-NEXT: [[TMP21:%.*]] = getelementptr i32, ptr [[P2]], [[TMP18]] ; STRIDED-NEXT: call void @llvm.vp.scatter.nxv4i32.nxv4p0( [[TMP20]], align 4 [[TMP21]], splat (i1 true), i32 [[TMP43]]), !alias.scope [[META12:![0-9]+]], !noalias [[META9]] -; STRIDED-NEXT: [[TMP46:%.*]] = zext i32 [[TMP43]] to i64 -; STRIDED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[TMP46]], [[INDEX]] -; STRIDED-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP46]] +; STRIDED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[TMP44]], [[INDEX]] +; STRIDED-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP44]] ; STRIDED-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[BROADCAST_SPLAT]] ; STRIDED-NEXT: [[TMP41:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024 ; STRIDED-NEXT: br i1 [[TMP41]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]] @@ -690,11 +686,9 @@ define void @double_stride_ptr_iv(ptr %p, ptr %p2, i64 %stride) { ; STRIDED-NEXT: [[TMP16:%.*]] = zext i32 [[TMP14]] to i64 ; STRIDED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[TMP16]], [[INDEX]] ; STRIDED-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP16]] -; STRIDED-NEXT: [[TMP20:%.*]] = zext i32 [[TMP14]] to i64 -; STRIDED-NEXT: [[TMP25:%.*]] = mul i64 [[STRIDE]], [[TMP20]] +; STRIDED-NEXT: [[TMP25:%.*]] = mul i64 [[STRIDE]], [[TMP16]] ; STRIDED-NEXT: [[PTR_IND]] = getelementptr i8, ptr [[POINTER_PHI]], i64 [[TMP25]] -; STRIDED-NEXT: [[TMP22:%.*]] = zext i32 [[TMP14]] to i64 -; STRIDED-NEXT: [[TMP17:%.*]] = mul i64 [[STRIDE]], [[TMP22]] +; STRIDED-NEXT: [[TMP17:%.*]] = mul i64 [[STRIDE]], [[TMP16]] ; STRIDED-NEXT: [[PTR_IND12]] = getelementptr i8, ptr [[POINTER_PHI11]], i64 [[TMP17]] ; STRIDED-NEXT: [[TMP23:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024 ; STRIDED-NEXT: br i1 [[TMP23]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP21:![0-9]+]] diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-cast-intrinsics.ll b/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-cast-intrinsics.ll index 7ea462eed42db..f2d296e2685c7 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-cast-intrinsics.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-cast-intrinsics.ll @@ -1209,9 +1209,8 @@ define void @vp_ptrtoint(ptr %a, ptr %b, i64 %N) { ; IF-EVL-NEXT: [[TMP15:%.*]] = ptrtoint [[TMP14]] to ; IF-EVL-NEXT: [[TMP16:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[EVL_BASED_IV]] ; IF-EVL-NEXT: call void @llvm.vp.store.nxv2i64.p0( [[TMP15]], ptr align 8 [[TMP16]], splat (i1 true), i32 [[TMP11]]) -; IF-EVL-NEXT: [[TMP18:%.*]] = zext i32 [[TMP11]] to i64 -; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP18]], [[EVL_BASED_IV]] -; IF-EVL-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP18]] +; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP12]], [[EVL_BASED_IV]] +; IF-EVL-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP12]] ; IF-EVL-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[BROADCAST_SPLAT]] ; IF-EVL-NEXT: [[TMP19:%.*]] = icmp eq i64 [[INDEX_EVL_NEXT]], [[N]] ; IF-EVL-NEXT: br i1 [[TMP19]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP48:![0-9]+]] diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-cond-reduction.ll b/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-cond-reduction.ll index fe3a723a9bbea..53c00dc1c7446 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-cond-reduction.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-cond-reduction.ll @@ -703,12 +703,12 @@ define i32 @step_cond_add_pred(ptr %a, i64 %n, i32 %start) { ; IF-EVL-OUTLOOP-NEXT: [[TMP23:%.*]] = select [[TMP18]], [[TMP21]], zeroinitializer ; IF-EVL-OUTLOOP-NEXT: [[PREDPHI:%.*]] = select [[TMP23]], [[VEC_PHI]], [[TMP22]] ; IF-EVL-OUTLOOP-NEXT: [[TMP24]] = call @llvm.vp.merge.nxv4i32( splat (i1 true), [[PREDPHI]], [[VEC_PHI]], i32 [[TMP14]]) -; IF-EVL-OUTLOOP-NEXT: [[TMP25:%.*]] = zext i32 [[TMP14]] to i64 -; IF-EVL-OUTLOOP-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP25]], [[IV]] -; IF-EVL-OUTLOOP-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP25]] +; IF-EVL-OUTLOOP-NEXT: [[TMP16:%.*]] = zext i32 [[TMP14]] to i64 +; IF-EVL-OUTLOOP-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP16]], [[IV]] +; IF-EVL-OUTLOOP-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP16]] ; IF-EVL-OUTLOOP-NEXT: [[VEC_IND_NEXT7]] = add [[VEC_IND2]], [[BROADCAST_SPLAT2]] -; IF-EVL-OUTLOOP-NEXT: [[TMP26:%.*]] = icmp eq i64 [[INDEX_EVL_NEXT]], [[N]] -; IF-EVL-OUTLOOP-NEXT: br i1 [[TMP26]], label [[MIDDLE_BLOCK1:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]] +; IF-EVL-OUTLOOP-NEXT: [[TMP15:%.*]] = icmp eq i64 [[INDEX_EVL_NEXT]], [[N]] +; IF-EVL-OUTLOOP-NEXT: br i1 [[TMP15]], label [[MIDDLE_BLOCK1:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]] ; IF-EVL-OUTLOOP: middle.block: ; IF-EVL-OUTLOOP-NEXT: [[TMP27:%.*]] = call i32 @llvm.vector.reduce.add.nxv4i32( [[TMP24]]) ; IF-EVL-OUTLOOP-NEXT: br label [[FOR_END:%.*]] diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-fixed-order-recurrence.ll b/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-fixed-order-recurrence.ll index bf54f669b65f9..ba2fa17267fd5 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-fixed-order-recurrence.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-fixed-order-recurrence.ll @@ -609,9 +609,8 @@ define void @first_order_recurrence_indvar(ptr noalias %A, i64 %TC) { ; IF-EVL-NEXT: [[TMP15:%.*]] = call @llvm.experimental.vp.splice.nxv2i64( [[VECTOR_RECUR]], [[TMP20]], i32 -1, splat (i1 true), i32 [[PREV_EVL]], i32 [[TMP11]]) ; IF-EVL-NEXT: [[TMP9:%.*]] = getelementptr inbounds nuw i64, ptr [[A]], i64 [[EVL_BASED_IV]] ; IF-EVL-NEXT: call void @llvm.vp.store.nxv2i64.p0( [[TMP15]], ptr align 8 [[TMP9]], splat (i1 true), i32 [[TMP11]]) -; IF-EVL-NEXT: [[TMP21:%.*]] = zext i32 [[TMP11]] to i64 -; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP21]], [[EVL_BASED_IV]] -; IF-EVL-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP21]] +; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP7]], [[EVL_BASED_IV]] +; IF-EVL-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP7]] ; IF-EVL-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[BROADCAST_SPLAT]] ; IF-EVL-NEXT: [[TMP22:%.*]] = icmp eq i64 [[INDEX_EVL_NEXT]], [[TC]] ; IF-EVL-NEXT: br i1 [[TMP22]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP11:![0-9]+]] diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-gather-scatter.ll b/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-gather-scatter.ll index 6f723c268914c..653549dfb7fa8 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-gather-scatter.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-gather-scatter.ll @@ -30,9 +30,8 @@ define void @gather_scatter(ptr noalias %in, ptr noalias %out, ptr noalias %inde ; IF-EVL-NEXT: [[WIDE_MASKED_GATHER2:%.*]] = call @llvm.vp.gather.nxv2f32.nxv2p0( align 4 [[TMP15]], splat (i1 true), i32 [[TMP11]]) ; IF-EVL-NEXT: [[TMP16:%.*]] = getelementptr inbounds float, ptr [[OUT:%.*]], [[WIDE_MASKED_GATHER]] ; IF-EVL-NEXT: call void @llvm.vp.scatter.nxv2f32.nxv2p0( [[WIDE_MASKED_GATHER2]], align 4 [[TMP16]], splat (i1 true), i32 [[TMP11]]) -; IF-EVL-NEXT: [[TMP17:%.*]] = zext i32 [[TMP11]] to i64 -; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP17]], [[EVL_BASED_IV]] -; IF-EVL-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP17]] +; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP12]], [[EVL_BASED_IV]] +; IF-EVL-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP12]] ; IF-EVL-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[BROADCAST_SPLAT]] ; IF-EVL-NEXT: [[TMP18:%.*]] = icmp eq i64 [[INDEX_EVL_NEXT]], [[N]] ; IF-EVL-NEXT: br i1 [[TMP18]], label [[MIDDLE_BLOCK:%.*]], label [[FOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-interleave.ll b/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-interleave.ll index 332c16e8eb656..e45c98093871b 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-interleave.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-interleave.ll @@ -159,9 +159,8 @@ define i32 @load_factor_4_with_gap(i64 %n, ptr noalias %a) { ; IF-EVL-NEXT: [[WIDE_MASKED_GATHER2:%.*]] = call @llvm.vp.gather.nxv4i32.nxv4p0( align 4 [[TMP10]], splat (i1 true), i32 [[TMP4]]) ; IF-EVL-NEXT: [[TMP11:%.*]] = add [[TMP9]], [[WIDE_MASKED_GATHER2]] ; IF-EVL-NEXT: [[TMP12]] = call @llvm.vp.merge.nxv4i32( splat (i1 true), [[TMP11]], [[VEC_PHI]], i32 [[TMP4]]) -; IF-EVL-NEXT: [[TMP13:%.*]] = zext i32 [[TMP4]] to i64 -; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP13]], [[EVL_BASED_IV]] -; IF-EVL-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP13]] +; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP5]], [[EVL_BASED_IV]] +; IF-EVL-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP5]] ; IF-EVL-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[BROADCAST_SPLAT]] ; IF-EVL-NEXT: [[TMP14:%.*]] = icmp eq i64 [[INDEX_EVL_NEXT]], [[N]] ; IF-EVL-NEXT: br i1 [[TMP14]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] @@ -435,9 +434,8 @@ define i32 @load_factor_4_with_tail_gap(i64 %n, ptr noalias %a) { ; IF-EVL-NEXT: [[WIDE_MASKED_GATHER2:%.*]] = call @llvm.vp.gather.nxv4i32.nxv4p0( align 4 [[TMP10]], splat (i1 true), i32 [[TMP4]]) ; IF-EVL-NEXT: [[TMP11:%.*]] = add [[TMP9]], [[WIDE_MASKED_GATHER2]] ; IF-EVL-NEXT: [[TMP12]] = call @llvm.vp.merge.nxv4i32( splat (i1 true), [[TMP11]], [[VEC_PHI]], i32 [[TMP4]]) -; IF-EVL-NEXT: [[TMP13:%.*]] = zext i32 [[TMP4]] to i64 -; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP13]], [[EVL_BASED_IV]] -; IF-EVL-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP13]] +; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP5]], [[EVL_BASED_IV]] +; IF-EVL-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP5]] ; IF-EVL-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[BROADCAST_SPLAT]] ; IF-EVL-NEXT: [[TMP14:%.*]] = icmp eq i64 [[INDEX_EVL_NEXT]], [[N]] ; IF-EVL-NEXT: br i1 [[TMP14]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]] @@ -710,9 +708,8 @@ define i32 @load_factor_4_reverse(i64 %n, ptr noalias %a) { ; IF-EVL-NEXT: [[WIDE_MASKED_GATHER5:%.*]] = call @llvm.vp.gather.nxv4i32.nxv4p0( align 4 [[TMP14]], splat (i1 true), i32 [[TMP6]]) ; IF-EVL-NEXT: [[TMP15:%.*]] = add [[TMP13]], [[WIDE_MASKED_GATHER5]] ; IF-EVL-NEXT: [[TMP16]] = call @llvm.vp.merge.nxv4i32( splat (i1 true), [[TMP15]], [[VEC_PHI]], i32 [[TMP6]]) -; IF-EVL-NEXT: [[TMP17:%.*]] = zext i32 [[TMP6]] to i64 -; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP17]], [[EVL_BASED_IV]] -; IF-EVL-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP17]] +; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP7]], [[EVL_BASED_IV]] +; IF-EVL-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP7]] ; IF-EVL-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[BROADCAST_SPLAT2]] ; IF-EVL-NEXT: [[TMP18:%.*]] = icmp eq i64 [[INDEX_EVL_NEXT]], [[TMP1]] ; IF-EVL-NEXT: br i1 [[TMP18]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP13:![0-9]+]] diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-reduction-cost.ll b/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-reduction-cost.ll index 10d83f4ad125e..34fd04f01ad61 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-reduction-cost.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-reduction-cost.ll @@ -1,3 +1,4 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 ; REQUIRES: asserts ; RUN: opt -passes=loop-vectorize -debug-only=loop-vectorize --disable-output \ ; RUN: -prefer-predicate-over-epilogue=predicate-else-scalar-epilogue \ diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-reverse-load-store.ll b/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-reverse-load-store.ll index c804fd9ceb7a9..e0e3d4b174a66 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-reverse-load-store.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-reverse-load-store.ll @@ -29,19 +29,17 @@ define void @reverse_load_store(i64 %startval, ptr noalias %ptr, ptr noalias %pt ; IF-EVL-NEXT: [[VP_OP_LOAD:%.*]] = call @llvm.vp.load.nxv4i32.p0(ptr align 4 [[TMP12]], splat (i1 true), i32 [[TMP5]]) ; IF-EVL-NEXT: [[VP_REVERSE:%.*]] = call @llvm.experimental.vp.reverse.nxv4i32( [[VP_OP_LOAD]], splat (i1 true), i32 [[TMP5]]) ; IF-EVL-NEXT: [[TMP13:%.*]] = getelementptr inbounds i32, ptr [[PTR2:%.*]], i64 [[TMP7]] -; IF-EVL-NEXT: [[TMP19:%.*]] = zext i32 [[TMP5]] to i64 -; IF-EVL-NEXT: [[TMP14:%.*]] = mul i64 0, [[TMP19]] -; IF-EVL-NEXT: [[TMP23:%.*]] = sub i64 [[TMP19]], 1 +; IF-EVL-NEXT: [[TMP14:%.*]] = mul i64 0, [[TMP18]] +; IF-EVL-NEXT: [[TMP23:%.*]] = sub i64 [[TMP18]], 1 ; IF-EVL-NEXT: [[TMP15:%.*]] = mul i64 -1, [[TMP23]] ; IF-EVL-NEXT: [[TMP22:%.*]] = getelementptr i32, ptr [[TMP13]], i64 [[TMP14]] ; IF-EVL-NEXT: [[TMP17:%.*]] = getelementptr i32, ptr [[TMP22]], i64 [[TMP15]] ; IF-EVL-NEXT: [[VP_REVERSE3:%.*]] = call @llvm.experimental.vp.reverse.nxv4i32( [[VP_REVERSE]], splat (i1 true), i32 [[TMP5]]) ; IF-EVL-NEXT: call void @llvm.vp.store.nxv4i32.p0( [[VP_REVERSE3]], ptr align 4 [[TMP17]], splat (i1 true), i32 [[TMP5]]) -; IF-EVL-NEXT: [[TMP20:%.*]] = zext i32 [[TMP5]] to i64 -; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add nuw i64 [[TMP20]], [[EVL_BASED_IV]] -; IF-EVL-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP20]] -; IF-EVL-NEXT: [[TMP24:%.*]] = icmp eq i64 [[INDEX_EVL_NEXT]], 1024 -; IF-EVL-NEXT: br i1 [[TMP24]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] +; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add nuw i64 [[TMP18]], [[EVL_BASED_IV]] +; IF-EVL-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP18]] +; IF-EVL-NEXT: [[TMP19:%.*]] = icmp eq i64 [[INDEX_EVL_NEXT]], 1024 +; IF-EVL-NEXT: br i1 [[TMP19]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; IF-EVL: middle.block: ; IF-EVL-NEXT: br label [[LOOPEND:%.*]] ; IF-EVL: scalar.ph: @@ -164,20 +162,18 @@ define void @reverse_load_store_masked(i64 %startval, ptr noalias %ptr, ptr noal ; IF-EVL-NEXT: [[VP_OP_LOAD4:%.*]] = call @llvm.vp.load.nxv4i32.p0(ptr align 4 [[TMP20]], [[VP_REVERSE_MASK]], i32 [[TMP5]]) ; IF-EVL-NEXT: [[VP_REVERSE:%.*]] = call @llvm.experimental.vp.reverse.nxv4i32( [[VP_OP_LOAD4]], splat (i1 true), i32 [[TMP5]]) ; IF-EVL-NEXT: [[TMP21:%.*]] = getelementptr i32, ptr [[PTR2:%.*]], i64 [[TMP11]] -; IF-EVL-NEXT: [[TMP27:%.*]] = zext i32 [[TMP5]] to i64 -; IF-EVL-NEXT: [[TMP22:%.*]] = mul i64 0, [[TMP27]] -; IF-EVL-NEXT: [[TMP30:%.*]] = sub i64 [[TMP27]], 1 +; IF-EVL-NEXT: [[TMP22:%.*]] = mul i64 0, [[TMP26]] +; IF-EVL-NEXT: [[TMP30:%.*]] = sub i64 [[TMP26]], 1 ; IF-EVL-NEXT: [[TMP23:%.*]] = mul i64 -1, [[TMP30]] ; IF-EVL-NEXT: [[TMP24:%.*]] = getelementptr i32, ptr [[TMP21]], i64 [[TMP22]] ; IF-EVL-NEXT: [[TMP25:%.*]] = getelementptr i32, ptr [[TMP24]], i64 [[TMP23]] ; IF-EVL-NEXT: [[VP_REVERSE5:%.*]] = call @llvm.experimental.vp.reverse.nxv4i32( [[VP_REVERSE]], splat (i1 true), i32 [[TMP5]]) ; IF-EVL-NEXT: [[VP_REVERSE_MASK6:%.*]] = call @llvm.experimental.vp.reverse.nxv4i1( [[TMP14]], splat (i1 true), i32 [[TMP5]]) ; IF-EVL-NEXT: call void @llvm.vp.store.nxv4i32.p0( [[VP_REVERSE5]], ptr align 4 [[TMP25]], [[VP_REVERSE_MASK6]], i32 [[TMP5]]) -; IF-EVL-NEXT: [[TMP28:%.*]] = zext i32 [[TMP5]] to i64 -; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add nuw i64 [[TMP28]], [[EVL_BASED_IV]] -; IF-EVL-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP28]] -; IF-EVL-NEXT: [[TMP29:%.*]] = icmp eq i64 [[INDEX_EVL_NEXT]], 1024 -; IF-EVL-NEXT: br i1 [[TMP29]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] +; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add nuw i64 [[TMP26]], [[EVL_BASED_IV]] +; IF-EVL-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP26]] +; IF-EVL-NEXT: [[TMP27:%.*]] = icmp eq i64 [[INDEX_EVL_NEXT]], 1024 +; IF-EVL-NEXT: br i1 [[TMP27]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] ; IF-EVL: middle.block: ; IF-EVL-NEXT: br label [[LOOPEND:%.*]] ; IF-EVL: scalar.ph: @@ -328,28 +324,25 @@ define void @multiple_reverse_vector_pointer(ptr noalias %a, ptr noalias %b, ptr ; IF-EVL-NEXT: [[TMP14:%.*]] = getelementptr i8, ptr [[B:%.*]], [[VP_REVERSE]] ; IF-EVL-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call @llvm.vp.gather.nxv16i8.nxv16p0( align 1 [[TMP14]], splat (i1 true), i32 [[TMP6]]) ; IF-EVL-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[C:%.*]], i64 [[OFFSET_IDX]] -; IF-EVL-NEXT: [[TMP16:%.*]] = zext i32 [[TMP6]] to i64 -; IF-EVL-NEXT: [[TMP17:%.*]] = mul i64 0, [[TMP16]] -; IF-EVL-NEXT: [[TMP30:%.*]] = sub i64 [[TMP16]], 1 +; IF-EVL-NEXT: [[TMP17:%.*]] = mul i64 0, [[TMP9]] +; IF-EVL-NEXT: [[TMP30:%.*]] = sub i64 [[TMP9]], 1 ; IF-EVL-NEXT: [[TMP18:%.*]] = mul i64 -1, [[TMP30]] ; IF-EVL-NEXT: [[TMP19:%.*]] = getelementptr i8, ptr [[TMP15]], i64 [[TMP17]] ; IF-EVL-NEXT: [[TMP20:%.*]] = getelementptr i8, ptr [[TMP19]], i64 [[TMP18]] ; IF-EVL-NEXT: [[VP_REVERSE1:%.*]] = call @llvm.experimental.vp.reverse.nxv16i8( [[WIDE_MASKED_GATHER]], splat (i1 true), i32 [[TMP6]]) ; IF-EVL-NEXT: call void @llvm.vp.store.nxv16i8.p0( [[VP_REVERSE1]], ptr align 1 [[TMP20]], splat (i1 true), i32 [[TMP6]]) ; IF-EVL-NEXT: [[TMP21:%.*]] = getelementptr i8, ptr [[D:%.*]], i64 [[OFFSET_IDX]] -; IF-EVL-NEXT: [[TMP22:%.*]] = zext i32 [[TMP6]] to i64 -; IF-EVL-NEXT: [[TMP23:%.*]] = mul i64 0, [[TMP22]] -; IF-EVL-NEXT: [[TMP31:%.*]] = sub i64 [[TMP22]], 1 +; IF-EVL-NEXT: [[TMP23:%.*]] = mul i64 0, [[TMP9]] +; IF-EVL-NEXT: [[TMP31:%.*]] = sub i64 [[TMP9]], 1 ; IF-EVL-NEXT: [[TMP24:%.*]] = mul i64 -1, [[TMP31]] ; IF-EVL-NEXT: [[TMP25:%.*]] = getelementptr i8, ptr [[TMP21]], i64 [[TMP23]] ; IF-EVL-NEXT: [[TMP26:%.*]] = getelementptr i8, ptr [[TMP25]], i64 [[TMP24]] ; IF-EVL-NEXT: [[VP_REVERSE2:%.*]] = call @llvm.experimental.vp.reverse.nxv16i8( [[WIDE_MASKED_GATHER]], splat (i1 true), i32 [[TMP6]]) ; IF-EVL-NEXT: call void @llvm.vp.store.nxv16i8.p0( [[VP_REVERSE2]], ptr align 1 [[TMP26]], splat (i1 true), i32 [[TMP6]]) -; IF-EVL-NEXT: [[TMP27:%.*]] = zext i32 [[TMP6]] to i64 -; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add nuw i64 [[TMP27]], [[EVL_BASED_IV]] -; IF-EVL-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP27]] -; IF-EVL-NEXT: [[TMP32:%.*]] = icmp eq i64 [[INDEX_EVL_NEXT]], 1025 -; IF-EVL-NEXT: br i1 [[TMP32]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]] +; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add nuw i64 [[TMP9]], [[EVL_BASED_IV]] +; IF-EVL-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP9]] +; IF-EVL-NEXT: [[TMP22:%.*]] = icmp eq i64 [[INDEX_EVL_NEXT]], 1025 +; IF-EVL-NEXT: br i1 [[TMP22]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]] ; IF-EVL: middle.block: ; IF-EVL-NEXT: br label [[EXIT:%.*]] ; IF-EVL: scalar.ph: diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-uniform-store.ll b/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-uniform-store.ll index 55c4c4a411837..51512dd3fb30e 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-uniform-store.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-uniform-store.ll @@ -31,11 +31,10 @@ define void @lshift_significand(i32 %n, ptr nocapture writeonly %dst) { ; CHECK-NEXT: [[TMP19:%.*]] = getelementptr i64, ptr [[TMP18]], i64 [[TMP17]] ; CHECK-NEXT: [[VP_REVERSE:%.*]] = call @llvm.experimental.vp.reverse.nxv2i64( zeroinitializer, splat (i1 true), i32 [[TMP11]]) ; CHECK-NEXT: call void @llvm.vp.store.nxv2i64.p0( [[VP_REVERSE]], ptr align 8 [[TMP19]], splat (i1 true), i32 [[TMP11]]) -; CHECK-NEXT: [[TMP20:%.*]] = zext i32 [[TMP11]] to i64 -; CHECK-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP20]], [[EVL_BASED_IV]] -; CHECK-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP20]] -; CHECK-NEXT: [[TMP21:%.*]] = icmp eq i64 [[INDEX_EVL_NEXT]], [[TMP0]] -; CHECK-NEXT: br i1 [[TMP21]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] +; CHECK-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP15]], [[EVL_BASED_IV]] +; CHECK-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP15]] +; CHECK-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_EVL_NEXT]], [[TMP0]] +; CHECK-NEXT: br i1 [[TMP10]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: br label %[[EXIT:.*]] ; CHECK: [[SCALAR_PH]]: diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/uniform-load-store.ll b/llvm/test/Transforms/LoopVectorize/RISCV/uniform-load-store.ll index e3a93cbf450ae..0ecba49b421ae 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/uniform-load-store.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/uniform-load-store.ll @@ -292,9 +292,8 @@ define void @conditional_uniform_load(ptr noalias nocapture %a, ptr noalias noca ; SCALABLE-NEXT: [[PREDPHI:%.*]] = select [[TMP13]], [[WIDE_MASKED_GATHER]], zeroinitializer ; SCALABLE-NEXT: [[TMP12:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[INDEX]] ; SCALABLE-NEXT: call void @llvm.vp.store.nxv4i64.p0( [[PREDPHI]], ptr align 8 [[TMP12]], splat (i1 true), i32 [[TMP17]]) -; SCALABLE-NEXT: [[TMP15:%.*]] = zext i32 [[TMP17]] to i64 -; SCALABLE-NEXT: [[INDEX_EVL_NEXT]] = add nuw i64 [[TMP15]], [[INDEX]] -; SCALABLE-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP15]] +; SCALABLE-NEXT: [[INDEX_EVL_NEXT]] = add nuw i64 [[TMP8]], [[INDEX]] +; SCALABLE-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP8]] ; SCALABLE-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[DOTSPLAT]] ; SCALABLE-NEXT: [[TMP16:%.*]] = icmp eq i64 [[INDEX_EVL_NEXT]], 1025 ; SCALABLE-NEXT: br i1 [[TMP16]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]] @@ -396,9 +395,8 @@ define void @conditional_uniform_load(ptr noalias nocapture %a, ptr noalias noca ; TF-SCALABLE-NEXT: [[PREDPHI:%.*]] = select [[TMP9]], [[WIDE_MASKED_GATHER]], zeroinitializer ; TF-SCALABLE-NEXT: [[TMP12:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[INDEX]] ; TF-SCALABLE-NEXT: call void @llvm.vp.store.nxv4i64.p0( [[PREDPHI]], ptr align 8 [[TMP12]], splat (i1 true), i32 [[TMP7]]) -; TF-SCALABLE-NEXT: [[TMP15:%.*]] = zext i32 [[TMP7]] to i64 -; TF-SCALABLE-NEXT: [[INDEX_EVL_NEXT]] = add nuw i64 [[TMP15]], [[INDEX]] -; TF-SCALABLE-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP15]] +; TF-SCALABLE-NEXT: [[INDEX_EVL_NEXT]] = add nuw i64 [[TMP11]], [[INDEX]] +; TF-SCALABLE-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP11]] ; TF-SCALABLE-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[DOTSPLAT]] ; TF-SCALABLE-NEXT: [[TMP17:%.*]] = icmp eq i64 [[INDEX_EVL_NEXT]], 1025 ; TF-SCALABLE-NEXT: br i1 [[TMP17]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]] @@ -713,9 +711,8 @@ define void @uniform_store_of_loop_varying(ptr noalias nocapture %a, ptr noalias ; SCALABLE-NEXT: call void @llvm.vp.scatter.nxv2i64.nxv2p0( [[VEC_IND]], align 8 [[BROADCAST_SPLAT1]], splat (i1 true), i32 [[TMP7]]) ; SCALABLE-NEXT: [[TMP16:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP10]] ; SCALABLE-NEXT: call void @llvm.vp.store.nxv2i64.p0( [[BROADCAST_SPLAT]], ptr align 8 [[TMP16]], splat (i1 true), i32 [[TMP7]]) -; SCALABLE-NEXT: [[TMP11:%.*]] = zext i32 [[TMP7]] to i64 -; SCALABLE-NEXT: [[INDEX_EVL_NEXT]] = add nuw i64 [[TMP11]], [[TMP10]] -; SCALABLE-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP11]] +; SCALABLE-NEXT: [[INDEX_EVL_NEXT]] = add nuw i64 [[TMP8]], [[TMP10]] +; SCALABLE-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP8]] ; SCALABLE-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[DOTSPLAT]] ; SCALABLE-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_EVL_NEXT]], 1025 ; SCALABLE-NEXT: br i1 [[TMP12]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP13:![0-9]+]] @@ -796,9 +793,8 @@ define void @uniform_store_of_loop_varying(ptr noalias nocapture %a, ptr noalias ; TF-SCALABLE-NEXT: call void @llvm.vp.scatter.nxv2i64.nxv2p0( [[VEC_IND]], align 8 [[BROADCAST_SPLAT]], splat (i1 true), i32 [[TMP9]]) ; TF-SCALABLE-NEXT: [[TMP10:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[INDEX]] ; TF-SCALABLE-NEXT: call void @llvm.vp.store.nxv2i64.p0( [[BROADCAST_SPLAT3]], ptr align 8 [[TMP10]], splat (i1 true), i32 [[TMP9]]) -; TF-SCALABLE-NEXT: [[TMP11:%.*]] = zext i32 [[TMP9]] to i64 -; TF-SCALABLE-NEXT: [[INDEX_EVL_NEXT]] = add nuw i64 [[TMP11]], [[INDEX]] -; TF-SCALABLE-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP11]] +; TF-SCALABLE-NEXT: [[INDEX_EVL_NEXT]] = add nuw i64 [[TMP13]], [[INDEX]] +; TF-SCALABLE-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP13]] ; TF-SCALABLE-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[BROADCAST_SPLAT2]] ; TF-SCALABLE-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_EVL_NEXT]], 1025 ; TF-SCALABLE-NEXT: br i1 [[TMP12]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP13:![0-9]+]] @@ -859,9 +855,8 @@ define void @conditional_uniform_store(ptr noalias nocapture %a, ptr noalias noc ; SCALABLE-NEXT: call void @llvm.vp.scatter.nxv2i64.nxv2p0( [[BROADCAST_SPLAT1]], align 8 [[BROADCAST_SPLAT2]], [[TMP10]], i32 [[TMP7]]) ; SCALABLE-NEXT: [[TMP12:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[INDEX]] ; SCALABLE-NEXT: call void @llvm.vp.store.nxv2i64.p0( [[BROADCAST_SPLAT1]], ptr align 8 [[TMP12]], splat (i1 true), i32 [[TMP7]]) -; SCALABLE-NEXT: [[TMP15:%.*]] = zext i32 [[TMP7]] to i64 -; SCALABLE-NEXT: [[INDEX_EVL_NEXT]] = add nuw i64 [[TMP15]], [[INDEX]] -; SCALABLE-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP15]] +; SCALABLE-NEXT: [[INDEX_EVL_NEXT]] = add nuw i64 [[TMP14]], [[INDEX]] +; SCALABLE-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP14]] ; SCALABLE-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[BROADCAST_SPLAT]] ; SCALABLE-NEXT: [[TMP13:%.*]] = icmp eq i64 [[INDEX_EVL_NEXT]], 1025 ; SCALABLE-NEXT: br i1 [[TMP13]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP15:![0-9]+]] @@ -957,9 +952,8 @@ define void @conditional_uniform_store(ptr noalias nocapture %a, ptr noalias noc ; TF-SCALABLE-NEXT: call void @llvm.vp.scatter.nxv2i64.nxv2p0( [[BROADCAST_SPLAT1]], align 8 [[BROADCAST_SPLAT2]], [[TMP10]], i32 [[TMP9]]) ; TF-SCALABLE-NEXT: [[TMP12:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[INDEX]] ; TF-SCALABLE-NEXT: call void @llvm.vp.store.nxv2i64.p0( [[BROADCAST_SPLAT1]], ptr align 8 [[TMP12]], splat (i1 true), i32 [[TMP9]]) -; TF-SCALABLE-NEXT: [[TMP14:%.*]] = zext i32 [[TMP9]] to i64 -; TF-SCALABLE-NEXT: [[INDEX_EVL_NEXT]] = add nuw i64 [[TMP14]], [[INDEX]] -; TF-SCALABLE-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP14]] +; TF-SCALABLE-NEXT: [[INDEX_EVL_NEXT]] = add nuw i64 [[TMP11]], [[INDEX]] +; TF-SCALABLE-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP11]] ; TF-SCALABLE-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[BROADCAST_SPLAT]] ; TF-SCALABLE-NEXT: [[TMP13:%.*]] = icmp eq i64 [[INDEX_EVL_NEXT]], 1025 ; TF-SCALABLE-NEXT: br i1 [[TMP13]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP15:![0-9]+]] diff --git a/llvm/test/Transforms/LoopVectorize/X86/outer_loop_test1_no_explicit_vect_width.ll b/llvm/test/Transforms/LoopVectorize/X86/outer_loop_test1_no_explicit_vect_width.ll index 1225e0fc583e1..ec9bdbd337187 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/outer_loop_test1_no_explicit_vect_width.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/outer_loop_test1_no_explicit_vect_width.ll @@ -38,8 +38,7 @@ define void @foo(i32 %n) { ; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds [8 x i32], ptr @arr2, i64 0, <4 x i64> [[VEC_IND]] ; CHECK-NEXT: [[TMP1:%.*]] = trunc <4 x i64> [[VEC_IND]] to <4 x i32> ; CHECK-NEXT: call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> [[TMP1]], <4 x ptr> [[TMP0]], i32 4, <4 x i1> splat (i1 true)) -; CHECK-NEXT: [[TMP8:%.*]] = trunc <4 x i64> [[VEC_IND]] to <4 x i32> -; CHECK-NEXT: [[TMP2:%.*]] = add nsw <4 x i32> [[TMP8]], [[BROADCAST_SPLAT]] +; CHECK-NEXT: [[TMP2:%.*]] = add nsw <4 x i32> [[TMP1]], [[BROADCAST_SPLAT]] ; CHECK-NEXT: br label %[[FOR_BODY31:.*]] ; CHECK: [[FOR_BODY31]]: ; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i64> [ zeroinitializer, %[[VECTOR_BODY]] ], [ [[TMP4:%.*]], %[[FOR_BODY31]] ] @@ -71,8 +70,7 @@ define void @foo(i32 %n) { ; AVX-NEXT: [[TMP0:%.*]] = getelementptr inbounds [8 x i32], ptr @arr2, i64 0, <8 x i64> [[VEC_IND]] ; AVX-NEXT: [[TMP1:%.*]] = trunc <8 x i64> [[VEC_IND]] to <8 x i32> ; AVX-NEXT: call void @llvm.masked.scatter.v8i32.v8p0(<8 x i32> [[TMP1]], <8 x ptr> [[TMP0]], i32 4, <8 x i1> splat (i1 true)) -; AVX-NEXT: [[TMP7:%.*]] = trunc <8 x i64> [[VEC_IND]] to <8 x i32> -; AVX-NEXT: [[TMP2:%.*]] = add nsw <8 x i32> [[TMP7]], [[BROADCAST_SPLAT]] +; AVX-NEXT: [[TMP2:%.*]] = add nsw <8 x i32> [[TMP1]], [[BROADCAST_SPLAT]] ; AVX-NEXT: br label %[[FOR_BODY31:.*]] ; AVX: [[FOR_BODY31]]: ; AVX-NEXT: [[VEC_PHI:%.*]] = phi <8 x i64> [ zeroinitializer, %[[VECTOR_BODY]] ], [ [[TMP4:%.*]], %[[FOR_BODY31]] ] diff --git a/llvm/test/Transforms/LoopVectorize/cse.ll b/llvm/test/Transforms/LoopVectorize/cse.ll new file mode 100644 index 0000000000000..616321edda102 --- /dev/null +++ b/llvm/test/Transforms/LoopVectorize/cse.ll @@ -0,0 +1,309 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals none --filter-out-after "^scalar.ph:" --version 5 +; RUN: opt -passes=loop-vectorize -force-vector-interleave=1 -force-vector-width=4 -enable-epilogue-vectorization=false -S < %s | FileCheck %s + +define i32 @common_sext(ptr %a, ptr %b, ptr %c, i32 %N) { +; CHECK-LABEL: define i32 @common_sext( +; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]], ptr [[C:%.*]], i32 [[N:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[CMP28_NOT:%.*]] = icmp ult i32 [[N]], 2 +; CHECK-NEXT: [[DIV27:%.*]] = lshr i32 [[N]], 1 +; CHECK-NEXT: [[WIDE_TRIP_COUNT:%.*]] = zext nneg i32 [[DIV27]] to i64 +; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[WIDE_TRIP_COUNT]], 4 +; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] +; CHECK: [[VECTOR_PH]]: +; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[WIDE_TRIP_COUNT]], 4 +; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[WIDE_TRIP_COUNT]], [[N_MOD_VF]] +; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] +; CHECK: [[VECTOR_BODY]]: +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 [[INDEX]] +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 [[INDEX]] +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds nuw i8, ptr [[C]], i64 [[INDEX]] +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i8>, ptr [[TMP0]], align 1 +; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = load <4 x i8>, ptr [[TMP1]], align 1 +; CHECK-NEXT: [[WIDE_LOAD2:%.*]] = load <4 x i8>, ptr [[TMP2]], align 1 +; CHECK-NEXT: [[TMP6:%.*]] = sext <4 x i8> [[WIDE_LOAD]] to <4 x i32> +; CHECK-NEXT: [[TMP7:%.*]] = sext <4 x i8> [[WIDE_LOAD1]] to <4 x i32> +; CHECK-NEXT: [[TMP8:%.*]] = sext <4 x i8> [[WIDE_LOAD2]] to <4 x i32> +; CHECK-NEXT: [[TMP9:%.*]] = add nsw <4 x i32> [[TMP6]], [[TMP7]] +; CHECK-NEXT: [[TMP10:%.*]] = add nsw <4 x i32> [[TMP8]], [[TMP7]] +; CHECK-NEXT: [[TMP11:%.*]] = add <4 x i32> [[TMP9]], [[TMP10]] +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 +; CHECK-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP12]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] +; CHECK: [[MIDDLE_BLOCK]]: +; CHECK-NEXT: [[TMP13:%.*]] = extractelement <4 x i32> [[TMP11]], i32 3 +; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i32> [[TMP11]], i32 3 +; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[WIDE_TRIP_COUNT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[CMP_N]], [[EXIT:label %.*]], label %[[SCALAR_PH]] +; CHECK: [[SCALAR_PH]]: +; +entry: + %cmp28.not = icmp ult i32 %N, 2 + %div27 = lshr i32 %N, 1 + %wide.trip.count = zext nneg i32 %div27 to i64 + br label %for.body + +for.body: ; preds = %for.body.preheader, %for.body + %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] + %res = phi i32 [ 0, %entry ], [ %add3, %for.body ] + %a.ptr = getelementptr inbounds nuw i8, ptr %a, i64 %iv + %b.ptr = getelementptr inbounds nuw i8, ptr %b, i64 %iv + %c.ptr = getelementptr inbounds nuw i8, ptr %c, i64 %iv + %a.val = load i8, ptr %a.ptr, align 1 + %b.val = load i8, ptr %b.ptr, align 1 + %c.val = load i8, ptr %c.ptr, align 1 + %a.ext = sext i8 %a.val to i32 + %b.ext = sext i8 %b.val to i32 + %b.ext2 = sext i8 %b.val to i32 + %c.ext = sext i8 %c.val to i32 + %add = add nsw i32 %a.ext, %b.ext + %add2 = add nsw i32 %c.ext, %b.ext2 + %add3 = add i32 %add, %add2 + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond = icmp eq i64 %iv.next, %wide.trip.count + br i1 %exitcond, label %exit, label %for.body + +exit: ; preds = %exit.loopexit, %entry + %res.0.lcssa = phi i32 [ %add3, %for.body ] + ret i32 %res.0.lcssa +} + +define i32 @common_zext(ptr %a, ptr %b, ptr %c, i32 %N) { +; CHECK-LABEL: define i32 @common_zext( +; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]], ptr [[C:%.*]], i32 [[N:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[CMP28_NOT:%.*]] = icmp ult i32 [[N]], 2 +; CHECK-NEXT: [[DIV27:%.*]] = lshr i32 [[N]], 1 +; CHECK-NEXT: [[WIDE_TRIP_COUNT:%.*]] = zext nneg i32 [[DIV27]] to i64 +; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[WIDE_TRIP_COUNT]], 4 +; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] +; CHECK: [[VECTOR_PH]]: +; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[WIDE_TRIP_COUNT]], 4 +; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[WIDE_TRIP_COUNT]], [[N_MOD_VF]] +; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] +; CHECK: [[VECTOR_BODY]]: +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 [[INDEX]] +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 [[INDEX]] +; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds nuw i8, ptr [[C]], i64 [[INDEX]] +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i8>, ptr [[TMP0]], align 1 +; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = load <4 x i8>, ptr [[TMP1]], align 1 +; CHECK-NEXT: [[WIDE_LOAD2:%.*]] = load <4 x i8>, ptr [[TMP11]], align 1 +; CHECK-NEXT: [[TMP4:%.*]] = zext <4 x i8> [[WIDE_LOAD]] to <4 x i32> +; CHECK-NEXT: [[TMP5:%.*]] = zext <4 x i8> [[WIDE_LOAD1]] to <4 x i32> +; CHECK-NEXT: [[TMP13:%.*]] = zext <4 x i8> [[WIDE_LOAD2]] to <4 x i32> +; CHECK-NEXT: [[TMP6:%.*]] = add nsw <4 x i32> [[TMP4]], [[TMP5]] +; CHECK-NEXT: [[TMP7:%.*]] = add nsw <4 x i32> [[TMP13]], [[TMP5]] +; CHECK-NEXT: [[TMP8:%.*]] = add <4 x i32> [[TMP6]], [[TMP7]] +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 +; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP9]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] +; CHECK: [[MIDDLE_BLOCK]]: +; CHECK-NEXT: [[TMP10:%.*]] = extractelement <4 x i32> [[TMP8]], i32 3 +; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i32> [[TMP8]], i32 3 +; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[WIDE_TRIP_COUNT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[CMP_N]], [[EXIT:label %.*]], label %[[SCALAR_PH]] +; CHECK: [[SCALAR_PH]]: +; +entry: + %cmp28.not = icmp ult i32 %N, 2 + %div27 = lshr i32 %N, 1 + %wide.trip.count = zext nneg i32 %div27 to i64 + br label %for.body + +for.body: ; preds = %for.body.preheader, %for.body + %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] + %res = phi i32 [ 0, %entry ], [ %add3, %for.body ] + %a.ptr = getelementptr inbounds nuw i8, ptr %a, i64 %iv + %b.ptr = getelementptr inbounds nuw i8, ptr %b, i64 %iv + %c.ptr = getelementptr inbounds nuw i8, ptr %c, i64 %iv + %a.val = load i8, ptr %a.ptr, align 1 + %b.val = load i8, ptr %b.ptr, align 1 + %c.val = load i8, ptr %c.ptr, align 1 + %a.ext = zext i8 %a.val to i32 + %b.ext = zext i8 %b.val to i32 + %b.ext2 = zext i8 %b.val to i32 + %c.ext = zext i8 %c.val to i32 + %add = add nsw i32 %a.ext, %b.ext + %add2 = add nsw i32 %c.ext, %b.ext2 + %add3 = add i32 %add, %add2 + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond = icmp eq i64 %iv.next, %wide.trip.count + br i1 %exitcond, label %exit, label %for.body + +exit: ; preds = %exit.loopexit, %entry + %res.0.lcssa = phi i32 [ %add3, %for.body ] + ret i32 %res.0.lcssa +} + +define i32 @common_sext_different_types(ptr %a, ptr %b, ptr %c, i32 %N) { +; CHECK-LABEL: define i32 @common_sext_different_types( +; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]], ptr [[C:%.*]], i32 [[N:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[CMP28_NOT:%.*]] = icmp ult i32 [[N]], 2 +; CHECK-NEXT: [[DIV27:%.*]] = lshr i32 [[N]], 1 +; CHECK-NEXT: [[WIDE_TRIP_COUNT:%.*]] = zext nneg i32 [[DIV27]] to i64 +; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[WIDE_TRIP_COUNT]], 4 +; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] +; CHECK: [[VECTOR_PH]]: +; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[WIDE_TRIP_COUNT]], 4 +; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[WIDE_TRIP_COUNT]], [[N_MOD_VF]] +; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] +; CHECK: [[VECTOR_BODY]]: +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 +; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 1 +; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 2 +; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 3 +; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 [[TMP0]] +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 [[TMP0]] +; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 [[TMP1]] +; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 [[TMP2]] +; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 [[TMP3]] +; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds nuw i8, ptr [[C]], i64 [[TMP0]] +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i8>, ptr [[TMP4]], align 1 +; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = load <4 x i8>, ptr [[TMP5]], align 1 +; CHECK-NEXT: [[TMP12:%.*]] = load i16, ptr [[TMP5]], align 1 +; CHECK-NEXT: [[TMP13:%.*]] = load i16, ptr [[TMP6]], align 1 +; CHECK-NEXT: [[TMP14:%.*]] = load i16, ptr [[TMP7]], align 1 +; CHECK-NEXT: [[TMP15:%.*]] = load i16, ptr [[TMP8]], align 1 +; CHECK-NEXT: [[TMP16:%.*]] = insertelement <4 x i16> poison, i16 [[TMP12]], i32 0 +; CHECK-NEXT: [[TMP17:%.*]] = insertelement <4 x i16> [[TMP16]], i16 [[TMP13]], i32 1 +; CHECK-NEXT: [[TMP18:%.*]] = insertelement <4 x i16> [[TMP17]], i16 [[TMP14]], i32 2 +; CHECK-NEXT: [[TMP19:%.*]] = insertelement <4 x i16> [[TMP18]], i16 [[TMP15]], i32 3 +; CHECK-NEXT: [[WIDE_LOAD2:%.*]] = load <4 x i8>, ptr [[TMP9]], align 1 +; CHECK-NEXT: [[TMP21:%.*]] = sext <4 x i8> [[WIDE_LOAD]] to <4 x i32> +; CHECK-NEXT: [[TMP22:%.*]] = sext <4 x i8> [[WIDE_LOAD1]] to <4 x i32> +; CHECK-NEXT: [[TMP23:%.*]] = sext <4 x i16> [[TMP19]] to <4 x i32> +; CHECK-NEXT: [[TMP24:%.*]] = sext <4 x i8> [[WIDE_LOAD2]] to <4 x i32> +; CHECK-NEXT: [[TMP25:%.*]] = add nsw <4 x i32> [[TMP21]], [[TMP22]] +; CHECK-NEXT: [[TMP26:%.*]] = add nsw <4 x i32> [[TMP24]], [[TMP23]] +; CHECK-NEXT: [[TMP27:%.*]] = add <4 x i32> [[TMP25]], [[TMP26]] +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 +; CHECK-NEXT: [[TMP28:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP28]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] +; CHECK: [[MIDDLE_BLOCK]]: +; CHECK-NEXT: [[TMP29:%.*]] = extractelement <4 x i32> [[TMP27]], i32 3 +; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i32> [[TMP27]], i32 3 +; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[WIDE_TRIP_COUNT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[CMP_N]], [[EXIT:label %.*]], label %[[SCALAR_PH]] +; CHECK: [[SCALAR_PH]]: +; +entry: + %cmp28.not = icmp ult i32 %N, 2 + %div27 = lshr i32 %N, 1 + %wide.trip.count = zext nneg i32 %div27 to i64 + br label %for.body + +for.body: ; preds = %for.body.preheader, %for.body + %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] + %res = phi i32 [ 0, %entry ], [ %add3, %for.body ] + %a.ptr = getelementptr inbounds nuw i8, ptr %a, i64 %iv + %b.ptr = getelementptr inbounds nuw i8, ptr %b, i64 %iv + %b.ptr2 = getelementptr inbounds nuw i16, ptr %b, i64 %iv + %c.ptr = getelementptr inbounds nuw i8, ptr %c, i64 %iv + %a.val = load i8, ptr %a.ptr, align 1 + %b.val = load i8, ptr %b.ptr, align 1 + %b.val2 = load i16, ptr %b.ptr, align 1 + %c.val = load i8, ptr %c.ptr, align 1 + %a.ext = sext i8 %a.val to i32 + %b.ext = sext i8 %b.val to i32 + %b.ext2 = sext i16 %b.val2 to i32 + %c.ext = sext i8 %c.val to i32 + %add = add nsw i32 %a.ext, %b.ext + %add2 = add nsw i32 %c.ext, %b.ext2 + %add3 = add i32 %add, %add2 + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond = icmp eq i64 %iv.next, %wide.trip.count + br i1 %exitcond, label %exit, label %for.body + +exit: ; preds = %exit.loopexit, %entry + %res.0.lcssa = phi i32 [ %add3, %for.body ] + ret i32 %res.0.lcssa +} +define i32 @common_zext_different_types(ptr %a, ptr %b, ptr %c, i32 %N) { +; CHECK-LABEL: define i32 @common_zext_different_types( +; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]], ptr [[C:%.*]], i32 [[N:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[CMP28_NOT:%.*]] = icmp ult i32 [[N]], 2 +; CHECK-NEXT: [[DIV27:%.*]] = lshr i32 [[N]], 1 +; CHECK-NEXT: [[WIDE_TRIP_COUNT:%.*]] = zext nneg i32 [[DIV27]] to i64 +; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[WIDE_TRIP_COUNT]], 4 +; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] +; CHECK: [[VECTOR_PH]]: +; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[WIDE_TRIP_COUNT]], 4 +; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[WIDE_TRIP_COUNT]], [[N_MOD_VF]] +; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] +; CHECK: [[VECTOR_BODY]]: +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 +; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 1 +; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 2 +; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 3 +; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 [[TMP0]] +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 [[TMP0]] +; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 [[TMP1]] +; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 [[TMP2]] +; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 [[TMP3]] +; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds nuw i8, ptr [[C]], i64 [[TMP0]] +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i8>, ptr [[TMP4]], align 1 +; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = load <4 x i8>, ptr [[TMP5]], align 1 +; CHECK-NEXT: [[TMP12:%.*]] = load i16, ptr [[TMP5]], align 1 +; CHECK-NEXT: [[TMP13:%.*]] = load i16, ptr [[TMP6]], align 1 +; CHECK-NEXT: [[TMP14:%.*]] = load i16, ptr [[TMP7]], align 1 +; CHECK-NEXT: [[TMP15:%.*]] = load i16, ptr [[TMP8]], align 1 +; CHECK-NEXT: [[TMP16:%.*]] = insertelement <4 x i16> poison, i16 [[TMP12]], i32 0 +; CHECK-NEXT: [[TMP17:%.*]] = insertelement <4 x i16> [[TMP16]], i16 [[TMP13]], i32 1 +; CHECK-NEXT: [[TMP18:%.*]] = insertelement <4 x i16> [[TMP17]], i16 [[TMP14]], i32 2 +; CHECK-NEXT: [[TMP19:%.*]] = insertelement <4 x i16> [[TMP18]], i16 [[TMP15]], i32 3 +; CHECK-NEXT: [[WIDE_LOAD2:%.*]] = load <4 x i8>, ptr [[TMP9]], align 1 +; CHECK-NEXT: [[TMP21:%.*]] = zext <4 x i8> [[WIDE_LOAD]] to <4 x i32> +; CHECK-NEXT: [[TMP22:%.*]] = zext <4 x i8> [[WIDE_LOAD1]] to <4 x i32> +; CHECK-NEXT: [[TMP23:%.*]] = zext <4 x i16> [[TMP19]] to <4 x i32> +; CHECK-NEXT: [[TMP24:%.*]] = zext <4 x i8> [[WIDE_LOAD2]] to <4 x i32> +; CHECK-NEXT: [[TMP25:%.*]] = add nsw <4 x i32> [[TMP21]], [[TMP22]] +; CHECK-NEXT: [[TMP26:%.*]] = add nsw <4 x i32> [[TMP24]], [[TMP23]] +; CHECK-NEXT: [[TMP27:%.*]] = add <4 x i32> [[TMP25]], [[TMP26]] +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 +; CHECK-NEXT: [[TMP28:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP28]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] +; CHECK: [[MIDDLE_BLOCK]]: +; CHECK-NEXT: [[TMP29:%.*]] = extractelement <4 x i32> [[TMP27]], i32 3 +; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i32> [[TMP27]], i32 3 +; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[WIDE_TRIP_COUNT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[CMP_N]], [[EXIT:label %.*]], label %[[SCALAR_PH]] +; CHECK: [[SCALAR_PH]]: +; +entry: + %cmp28.not = icmp ult i32 %N, 2 + %div27 = lshr i32 %N, 1 + %wide.trip.count = zext nneg i32 %div27 to i64 + br label %for.body + +for.body: ; preds = %for.body.preheader, %for.body + %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] + %res = phi i32 [ 0, %entry ], [ %add3, %for.body ] + %a.ptr = getelementptr inbounds nuw i8, ptr %a, i64 %iv + %b.ptr = getelementptr inbounds nuw i8, ptr %b, i64 %iv + %b.ptr2 = getelementptr inbounds nuw i16, ptr %b, i64 %iv + %c.ptr = getelementptr inbounds nuw i8, ptr %c, i64 %iv + %a.val = load i8, ptr %a.ptr, align 1 + %b.val = load i8, ptr %b.ptr, align 1 + %b.val2 = load i16, ptr %b.ptr, align 1 + %c.val = load i8, ptr %c.ptr, align 1 + %a.ext = zext i8 %a.val to i32 + %b.ext = zext i8 %b.val to i32 + %b.ext2 = zext i16 %b.val2 to i32 + %c.ext = zext i8 %c.val to i32 + %add = add nsw i32 %a.ext, %b.ext + %add2 = add nsw i32 %c.ext, %b.ext2 + %add3 = add i32 %add, %add2 + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond = icmp eq i64 %iv.next, %wide.trip.count + br i1 %exitcond, label %exit, label %for.body + +exit: ; preds = %exit.loopexit, %entry + %res.0.lcssa = phi i32 [ %add3, %for.body ] + ret i32 %res.0.lcssa +} diff --git a/llvm/test/Transforms/LoopVectorize/first-order-recurrence.ll b/llvm/test/Transforms/LoopVectorize/first-order-recurrence.ll index 10cbf66c783db..d4506d1205c95 100644 --- a/llvm/test/Transforms/LoopVectorize/first-order-recurrence.ll +++ b/llvm/test/Transforms/LoopVectorize/first-order-recurrence.ll @@ -563,13 +563,12 @@ define void @recurrence_3(ptr readonly noalias %a, ptr noalias %b, i32 %n, float ; UNROLL-NO-VF-NEXT: [[TMP6:%.*]] = getelementptr inbounds i16, ptr [[A]], i64 [[TMP4]] ; UNROLL-NO-VF-NEXT: [[TMP7:%.*]] = load i16, ptr [[TMP5]], align 2 ; UNROLL-NO-VF-NEXT: [[TMP8]] = load i16, ptr [[TMP6]], align 2 -; UNROLL-NO-VF-NEXT: [[TMP9:%.*]] = sitofp i16 [[TMP7]] to double +; UNROLL-NO-VF-NEXT: [[TMP12:%.*]] = sitofp i16 [[TMP7]] to double ; UNROLL-NO-VF-NEXT: [[TMP10:%.*]] = sitofp i16 [[TMP8]] to double ; UNROLL-NO-VF-NEXT: [[TMP11:%.*]] = sitofp i16 [[VECTOR_RECUR]] to double -; UNROLL-NO-VF-NEXT: [[TMP12:%.*]] = sitofp i16 [[TMP7]] to double ; UNROLL-NO-VF-NEXT: [[TMP13:%.*]] = fmul fast double [[TMP11]], [[CONV1]] ; UNROLL-NO-VF-NEXT: [[TMP14:%.*]] = fmul fast double [[TMP12]], [[CONV1]] -; UNROLL-NO-VF-NEXT: [[TMP15:%.*]] = fsub fast double [[TMP9]], [[TMP13]] +; UNROLL-NO-VF-NEXT: [[TMP15:%.*]] = fsub fast double [[TMP12]], [[TMP13]] ; UNROLL-NO-VF-NEXT: [[TMP16:%.*]] = fsub fast double [[TMP10]], [[TMP14]] ; UNROLL-NO-VF-NEXT: [[TMP17:%.*]] = getelementptr inbounds double, ptr [[B]], i64 [[OFFSET_IDX]] ; UNROLL-NO-VF-NEXT: [[TMP18:%.*]] = getelementptr inbounds double, ptr [[B]], i64 [[TMP4]] @@ -1792,11 +1791,10 @@ define void @sink_after(ptr noalias %a, ptr noalias %b, i64 %n) { ; UNROLL-NO-VF-NEXT: [[TMP6:%.*]] = load i16, ptr [[TMP4]], align 2 ; UNROLL-NO-VF-NEXT: [[TMP7]] = load i16, ptr [[TMP5]], align 2 ; UNROLL-NO-VF-NEXT: [[TMP8:%.*]] = sext i16 [[VECTOR_RECUR]] to i32 -; UNROLL-NO-VF-NEXT: [[TMP9:%.*]] = sext i16 [[TMP6]] to i32 ; UNROLL-NO-VF-NEXT: [[TMP10:%.*]] = sext i16 [[TMP6]] to i32 ; UNROLL-NO-VF-NEXT: [[TMP11:%.*]] = sext i16 [[TMP7]] to i32 ; UNROLL-NO-VF-NEXT: [[TMP12:%.*]] = mul nsw i32 [[TMP10]], [[TMP8]] -; UNROLL-NO-VF-NEXT: [[TMP13:%.*]] = mul nsw i32 [[TMP11]], [[TMP9]] +; UNROLL-NO-VF-NEXT: [[TMP13:%.*]] = mul nsw i32 [[TMP11]], [[TMP10]] ; UNROLL-NO-VF-NEXT: [[TMP14:%.*]] = getelementptr inbounds i32, ptr [[B:%.*]], i64 [[INDEX]] ; UNROLL-NO-VF-NEXT: [[TMP15:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[TMP1]] ; UNROLL-NO-VF-NEXT: store i32 [[TMP12]], ptr [[TMP14]], align 4 @@ -2026,11 +2024,10 @@ define void @PR34711(ptr noalias %a, ptr noalias %b, ptr noalias %c, i64 %n) { ; UNROLL-NO-VF-NEXT: [[TMP6:%.*]] = load i16, ptr [[TMP4]], align 2 ; UNROLL-NO-VF-NEXT: [[TMP7]] = load i16, ptr [[TMP5]], align 2 ; UNROLL-NO-VF-NEXT: [[TMP8:%.*]] = sext i16 [[VECTOR_RECUR]] to i32 -; UNROLL-NO-VF-NEXT: [[TMP9:%.*]] = sext i16 [[TMP6]] to i32 ; UNROLL-NO-VF-NEXT: [[TMP10:%.*]] = sext i16 [[TMP6]] to i32 ; UNROLL-NO-VF-NEXT: [[TMP11:%.*]] = sext i16 [[TMP7]] to i32 ; UNROLL-NO-VF-NEXT: [[TMP12:%.*]] = mul nsw i32 [[TMP10]], [[TMP8]] -; UNROLL-NO-VF-NEXT: [[TMP13:%.*]] = mul nsw i32 [[TMP11]], [[TMP9]] +; UNROLL-NO-VF-NEXT: [[TMP13:%.*]] = mul nsw i32 [[TMP11]], [[TMP10]] ; UNROLL-NO-VF-NEXT: [[TMP14:%.*]] = getelementptr inbounds i32, ptr [[B:%.*]], i64 [[INDEX]] ; UNROLL-NO-VF-NEXT: [[TMP15:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[TMP1]] ; UNROLL-NO-VF-NEXT: store i32 [[TMP12]], ptr [[TMP14]], align 4 @@ -2244,9 +2241,8 @@ define void @sink_after_with_multiple_users(ptr noalias %a, ptr noalias %b, i64 ; UNROLL-NO-VF-NEXT: [[TMP9:%.*]] = sext i16 [[TMP6]] to i32 ; UNROLL-NO-VF-NEXT: [[TMP10:%.*]] = add nsw i32 [[TMP8]], 2 ; UNROLL-NO-VF-NEXT: [[TMP11:%.*]] = add nsw i32 [[TMP9]], 2 -; UNROLL-NO-VF-NEXT: [[TMP12:%.*]] = sext i16 [[TMP6]] to i32 ; UNROLL-NO-VF-NEXT: [[TMP13:%.*]] = sext i16 [[TMP7]] to i32 -; UNROLL-NO-VF-NEXT: [[TMP14:%.*]] = mul nsw i32 [[TMP10]], [[TMP12]] +; UNROLL-NO-VF-NEXT: [[TMP14:%.*]] = mul nsw i32 [[TMP10]], [[TMP9]] ; UNROLL-NO-VF-NEXT: [[TMP15:%.*]] = mul nsw i32 [[TMP11]], [[TMP13]] ; UNROLL-NO-VF-NEXT: [[TMP16:%.*]] = getelementptr inbounds i32, ptr [[B:%.*]], i64 [[INDEX]] ; UNROLL-NO-VF-NEXT: [[TMP17:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[TMP1]] diff --git a/llvm/test/Transforms/LoopVectorize/float-induction.ll b/llvm/test/Transforms/LoopVectorize/float-induction.ll index 8a3cad0681013..5fe426efb5e3d 100644 --- a/llvm/test/Transforms/LoopVectorize/float-induction.ll +++ b/llvm/test/Transforms/LoopVectorize/float-induction.ll @@ -932,8 +932,7 @@ define void @fp_iv_loop3(float %init, ptr noalias nocapture %A, ptr noalias noca ; VEC1_INTERL2-NEXT: [[TMP4:%.*]] = or disjoint i64 [[INDEX]], 1 ; VEC1_INTERL2-NEXT: [[DOTCAST5:%.*]] = sitofp i64 [[INDEX]] to float ; VEC1_INTERL2-NEXT: [[TMP5:%.*]] = fmul fast float [[DOTCAST5]], -5.000000e-01 -; VEC1_INTERL2-NEXT: [[DOTCAST6:%.*]] = sitofp i64 [[INDEX]] to float -; VEC1_INTERL2-NEXT: [[TMP6:%.*]] = fmul fast float [[TMP0]], [[DOTCAST6]] +; VEC1_INTERL2-NEXT: [[TMP6:%.*]] = fmul fast float [[TMP0]], [[DOTCAST5]] ; VEC1_INTERL2-NEXT: [[OFFSET_IDX7:%.*]] = fadd fast float [[INIT]], [[TMP6]] ; VEC1_INTERL2-NEXT: [[TMP7:%.*]] = fadd fast float [[OFFSET_IDX7]], [[TMP0]] ; VEC1_INTERL2-NEXT: [[TMP8:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], i64 [[INDEX]] diff --git a/llvm/test/Transforms/LoopVectorize/outer_loop_test1.ll b/llvm/test/Transforms/LoopVectorize/outer_loop_test1.ll index 6bc2f389d65ce..4244ef690ac0c 100644 --- a/llvm/test/Transforms/LoopVectorize/outer_loop_test1.ll +++ b/llvm/test/Transforms/LoopVectorize/outer_loop_test1.ll @@ -36,8 +36,7 @@ define void @foo(i32 %n) { ; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds [8 x i32], ptr @arr2, i64 0, <4 x i64> [[VEC_IND]] ; CHECK-NEXT: [[TMP1:%.*]] = trunc <4 x i64> [[VEC_IND]] to <4 x i32> ; CHECK-NEXT: call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> [[TMP1]], <4 x ptr> [[TMP0]], i32 4, <4 x i1> splat (i1 true)) -; CHECK-NEXT: [[TMP8:%.*]] = trunc <4 x i64> [[VEC_IND]] to <4 x i32> -; CHECK-NEXT: [[TMP2:%.*]] = add nsw <4 x i32> [[TMP8]], [[BROADCAST_SPLAT]] +; CHECK-NEXT: [[TMP2:%.*]] = add nsw <4 x i32> [[TMP1]], [[BROADCAST_SPLAT]] ; CHECK-NEXT: br label %[[FOR_BODY31:.*]] ; CHECK: [[FOR_BODY31]]: ; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i64> [ zeroinitializer, %[[VECTOR_BODY]] ], [ [[TMP4:%.*]], %[[FOR_BODY31]] ] diff --git a/llvm/test/Transforms/LoopVectorize/reduction-inloop.ll b/llvm/test/Transforms/LoopVectorize/reduction-inloop.ll index e7f73a82292f0..3bd7d8fdd5cdc 100644 --- a/llvm/test/Transforms/LoopVectorize/reduction-inloop.ll +++ b/llvm/test/Transforms/LoopVectorize/reduction-inloop.ll @@ -2195,8 +2195,7 @@ define i32 @predicated_not_dominates_reduction_twoadd(ptr nocapture noundef read ; CHECK-NEXT: [[TMP5:%.*]] = zext nneg <4 x i8> [[TMP4]] to <4 x i32> ; CHECK-NEXT: [[TMP7:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP5]]) ; CHECK-NEXT: [[TMP8:%.*]] = add i32 [[VEC_PHI]], [[TMP7]] -; CHECK-NEXT: [[TMP9:%.*]] = zext nneg <4 x i8> [[TMP4]] to <4 x i32> -; CHECK-NEXT: [[TMP10:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP9]]) +; CHECK-NEXT: [[TMP10:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP5]]) ; CHECK-NEXT: [[TMP11]] = add i32 [[TMP8]], [[TMP10]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 ; CHECK-NEXT: [[TMP12:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] @@ -2261,11 +2260,9 @@ define i32 @predicated_not_dominates_reduction_twoadd(ptr nocapture noundef read ; CHECK-INTERLEAVED-NEXT: [[TMP12:%.*]] = zext nneg <4 x i8> [[TMP8]] to <4 x i32> ; CHECK-INTERLEAVED-NEXT: [[TMP13:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP12]]) ; CHECK-INTERLEAVED-NEXT: [[TMP14:%.*]] = add i32 [[VEC_PHI1]], [[TMP13]] -; CHECK-INTERLEAVED-NEXT: [[TMP15:%.*]] = zext nneg <4 x i8> [[TMP7]] to <4 x i32> -; CHECK-INTERLEAVED-NEXT: [[TMP16:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP15]]) +; CHECK-INTERLEAVED-NEXT: [[TMP16:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP9]]) ; CHECK-INTERLEAVED-NEXT: [[TMP17]] = add i32 [[TMP11]], [[TMP16]] -; CHECK-INTERLEAVED-NEXT: [[TMP18:%.*]] = zext nneg <4 x i8> [[TMP8]] to <4 x i32> -; CHECK-INTERLEAVED-NEXT: [[TMP19:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP18]]) +; CHECK-INTERLEAVED-NEXT: [[TMP19:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP12]]) ; CHECK-INTERLEAVED-NEXT: [[TMP20]] = add i32 [[TMP14]], [[TMP19]] ; CHECK-INTERLEAVED-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 8 ; CHECK-INTERLEAVED-NEXT: [[TMP21:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] From ee0a425812730d737226800029f5ceaed0763215 Mon Sep 17 00:00:00 2001 From: Samuel Tebbs Date: Thu, 10 Jul 2025 13:34:28 +0100 Subject: [PATCH 2/6] Remove FunctionComparator changes and add MergeFunc test --- llvm/lib/IR/Instruction.cpp | 2 - .../Transforms/Utils/FunctionComparator.cpp | 4 - llvm/test/Transforms/LoopVectorize/cse.ll | 159 +++++++++++++++++- llvm/test/Transforms/MergeFunc/merge-ext.ll | 59 +++++++ 4 files changed, 214 insertions(+), 10 deletions(-) create mode 100644 llvm/test/Transforms/MergeFunc/merge-ext.ll diff --git a/llvm/lib/IR/Instruction.cpp b/llvm/lib/IR/Instruction.cpp index 94dfed4df72fe..5e87b5ff941ad 100644 --- a/llvm/lib/IR/Instruction.cpp +++ b/llvm/lib/IR/Instruction.cpp @@ -935,8 +935,6 @@ bool Instruction::hasSameSpecialState(const Instruction *I2, if (const GetElementPtrInst *GEP = dyn_cast(I1)) return GEP->getSourceElementType() == cast(I2)->getSourceElementType(); - if (const CastInst *Cast = dyn_cast(I1)) - return Cast->getDestTy() == cast(I2)->getDestTy(); return true; } diff --git a/llvm/lib/Transforms/Utils/FunctionComparator.cpp b/llvm/lib/Transforms/Utils/FunctionComparator.cpp index 8d1f7c9b3a2ea..6d4026e8209de 100644 --- a/llvm/lib/Transforms/Utils/FunctionComparator.cpp +++ b/llvm/lib/Transforms/Utils/FunctionComparator.cpp @@ -804,10 +804,6 @@ int FunctionComparator::cmpOperations(const Instruction *L, return Res; } } - if (const CastInst *Cast = dyn_cast(L)) { - const CastInst *CastR = cast(R); - return cmpTypes(Cast->getDestTy(), CastR->getDestTy()); - } return 0; } diff --git a/llvm/test/Transforms/LoopVectorize/cse.ll b/llvm/test/Transforms/LoopVectorize/cse.ll index 616321edda102..71325de1ca275 100644 --- a/llvm/test/Transforms/LoopVectorize/cse.ll +++ b/llvm/test/Transforms/LoopVectorize/cse.ll @@ -137,8 +137,8 @@ exit: ; preds = %exit.loopexit, %entry ret i32 %res.0.lcssa } -define i32 @common_sext_different_types(ptr %a, ptr %b, ptr %c, i32 %N) { -; CHECK-LABEL: define i32 @common_sext_different_types( +define i32 @common_sext_different_src_types(ptr %a, ptr %b, ptr %c, i32 %N) { +; CHECK-LABEL: define i32 @common_sext_different_src_types( ; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]], ptr [[C:%.*]], i32 [[N:%.*]]) { ; CHECK-NEXT: [[ENTRY:.*:]] ; CHECK-NEXT: [[CMP28_NOT:%.*]] = icmp ult i32 [[N]], 2 @@ -222,8 +222,9 @@ exit: ; preds = %exit.loopexit, %entry %res.0.lcssa = phi i32 [ %add3, %for.body ] ret i32 %res.0.lcssa } -define i32 @common_zext_different_types(ptr %a, ptr %b, ptr %c, i32 %N) { -; CHECK-LABEL: define i32 @common_zext_different_types( + +define i32 @common_zext_different_src_types(ptr %a, ptr %b, ptr %c, i32 %N) { +; CHECK-LABEL: define i32 @common_zext_different_src_types( ; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]], ptr [[C:%.*]], i32 [[N:%.*]]) { ; CHECK-NEXT: [[ENTRY:.*:]] ; CHECK-NEXT: [[CMP28_NOT:%.*]] = icmp ult i32 [[N]], 2 @@ -307,3 +308,153 @@ exit: ; preds = %exit.loopexit, %entry %res.0.lcssa = phi i32 [ %add3, %for.body ] ret i32 %res.0.lcssa } + +define i32 @common_sext_different_dest_types(ptr %a, ptr %b, ptr %c, i32 %N) { +; CHECK-LABEL: define i32 @common_sext_different_dest_types( +; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]], ptr [[C:%.*]], i32 [[N:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[CMP28_NOT:%.*]] = icmp ult i32 [[N]], 2 +; CHECK-NEXT: [[DIV27:%.*]] = lshr i32 [[N]], 1 +; CHECK-NEXT: [[WIDE_TRIP_COUNT:%.*]] = zext nneg i32 [[DIV27]] to i64 +; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[WIDE_TRIP_COUNT]], 4 +; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] +; CHECK: [[VECTOR_PH]]: +; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[WIDE_TRIP_COUNT]], 4 +; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[WIDE_TRIP_COUNT]], [[N_MOD_VF]] +; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] +; CHECK: [[VECTOR_BODY]]: +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 [[INDEX]] +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 [[INDEX]] +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds nuw i8, ptr [[C]], i64 [[INDEX]] +; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP0]], i32 0 +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i8>, ptr [[TMP3]], align 1 +; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP1]], i32 0 +; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = load <4 x i8>, ptr [[TMP4]], align 1 +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP2]], i32 0 +; CHECK-NEXT: [[WIDE_LOAD2:%.*]] = load <4 x i8>, ptr [[TMP5]], align 1 +; CHECK-NEXT: [[TMP6:%.*]] = sext <4 x i8> [[WIDE_LOAD]] to <4 x i32> +; CHECK-NEXT: [[TMP7:%.*]] = sext <4 x i8> [[WIDE_LOAD1]] to <4 x i32> +; CHECK-NEXT: [[TMP8:%.*]] = sext <4 x i8> [[WIDE_LOAD1]] to <4 x i16> +; CHECK-NEXT: [[TMP9:%.*]] = sext <4 x i8> [[WIDE_LOAD2]] to <4 x i16> +; CHECK-NEXT: [[TMP10:%.*]] = add nsw <4 x i32> [[TMP6]], [[TMP7]] +; CHECK-NEXT: [[TMP11:%.*]] = add nsw <4 x i16> [[TMP9]], [[TMP8]] +; CHECK-NEXT: [[TMP12:%.*]] = sext <4 x i16> [[TMP11]] to <4 x i32> +; CHECK-NEXT: [[TMP13:%.*]] = add <4 x i32> [[TMP10]], [[TMP12]] +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 +; CHECK-NEXT: [[TMP14:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP14]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] +; CHECK: [[MIDDLE_BLOCK]]: +; CHECK-NEXT: [[TMP15:%.*]] = extractelement <4 x i32> [[TMP13]], i32 3 +; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i32> [[TMP13]], i32 3 +; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[WIDE_TRIP_COUNT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[CMP_N]], [[EXIT:label %.*]], label %[[SCALAR_PH]] +; CHECK: [[SCALAR_PH]]: +; +entry: + %cmp28.not = icmp ult i32 %N, 2 + %div27 = lshr i32 %N, 1 + %wide.trip.count = zext nneg i32 %div27 to i64 + br label %for.body + +for.body: ; preds = %for.body.preheader, %for.body + %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] + %res = phi i32 [ 0, %entry ], [ %add3, %for.body ] + %a.ptr = getelementptr inbounds nuw i8, ptr %a, i64 %iv + %b.ptr = getelementptr inbounds nuw i8, ptr %b, i64 %iv + %b.ptr2 = getelementptr inbounds nuw i16, ptr %b, i64 %iv + %c.ptr = getelementptr inbounds nuw i8, ptr %c, i64 %iv + %a.val = load i8, ptr %a.ptr, align 1 + %b.val = load i8, ptr %b.ptr, align 1 + %c.val = load i8, ptr %c.ptr, align 1 + %a.ext = sext i8 %a.val to i32 + %b.ext = sext i8 %b.val to i32 + %b.ext2 = sext i8 %b.val to i16 + %c.ext = sext i8 %c.val to i16 + %add = add nsw i32 %a.ext, %b.ext + %add2 = add nsw i16 %c.ext, %b.ext2 + %add2.ext = sext i16 %add2 to i32 + %add3 = add i32 %add, %add2.ext + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond = icmp eq i64 %iv.next, %wide.trip.count + br i1 %exitcond, label %exit, label %for.body + +exit: ; preds = %exit.loopexit, %entry + %res.0.lcssa = phi i32 [ %add3, %for.body ] + ret i32 %res.0.lcssa +} + +define i32 @common_zext_different_dest_types(ptr %a, ptr %b, ptr %c, i32 %N) { +; CHECK-LABEL: define i32 @common_zext_different_dest_types( +; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]], ptr [[C:%.*]], i32 [[N:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[CMP28_NOT:%.*]] = icmp ult i32 [[N]], 2 +; CHECK-NEXT: [[DIV27:%.*]] = lshr i32 [[N]], 1 +; CHECK-NEXT: [[WIDE_TRIP_COUNT:%.*]] = zext nneg i32 [[DIV27]] to i64 +; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[WIDE_TRIP_COUNT]], 4 +; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] +; CHECK: [[VECTOR_PH]]: +; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[WIDE_TRIP_COUNT]], 4 +; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[WIDE_TRIP_COUNT]], [[N_MOD_VF]] +; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] +; CHECK: [[VECTOR_BODY]]: +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 [[INDEX]] +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 [[INDEX]] +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds nuw i8, ptr [[C]], i64 [[INDEX]] +; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP0]], i32 0 +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i8>, ptr [[TMP3]], align 1 +; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP1]], i32 0 +; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = load <4 x i8>, ptr [[TMP4]], align 1 +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP2]], i32 0 +; CHECK-NEXT: [[WIDE_LOAD2:%.*]] = load <4 x i8>, ptr [[TMP5]], align 1 +; CHECK-NEXT: [[TMP6:%.*]] = zext <4 x i8> [[WIDE_LOAD]] to <4 x i32> +; CHECK-NEXT: [[TMP7:%.*]] = zext <4 x i8> [[WIDE_LOAD1]] to <4 x i32> +; CHECK-NEXT: [[TMP8:%.*]] = zext <4 x i8> [[WIDE_LOAD1]] to <4 x i16> +; CHECK-NEXT: [[TMP9:%.*]] = zext <4 x i8> [[WIDE_LOAD2]] to <4 x i16> +; CHECK-NEXT: [[TMP10:%.*]] = add nsw <4 x i32> [[TMP6]], [[TMP7]] +; CHECK-NEXT: [[TMP11:%.*]] = add nsw <4 x i16> [[TMP9]], [[TMP8]] +; CHECK-NEXT: [[TMP12:%.*]] = zext <4 x i16> [[TMP11]] to <4 x i32> +; CHECK-NEXT: [[TMP13:%.*]] = add <4 x i32> [[TMP10]], [[TMP12]] +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 +; CHECK-NEXT: [[TMP14:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP14]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] +; CHECK: [[MIDDLE_BLOCK]]: +; CHECK-NEXT: [[TMP15:%.*]] = extractelement <4 x i32> [[TMP13]], i32 3 +; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i32> [[TMP13]], i32 3 +; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[WIDE_TRIP_COUNT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[CMP_N]], [[EXIT:label %.*]], label %[[SCALAR_PH]] +; CHECK: [[SCALAR_PH]]: +; +entry: + %cmp28.not = icmp ult i32 %N, 2 + %div27 = lshr i32 %N, 1 + %wide.trip.count = zext nneg i32 %div27 to i64 + br label %for.body + +for.body: ; preds = %for.body.preheader, %for.body + %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] + %res = phi i32 [ 0, %entry ], [ %add3, %for.body ] + %a.ptr = getelementptr inbounds nuw i8, ptr %a, i64 %iv + %b.ptr = getelementptr inbounds nuw i8, ptr %b, i64 %iv + %b.ptr2 = getelementptr inbounds nuw i16, ptr %b, i64 %iv + %c.ptr = getelementptr inbounds nuw i8, ptr %c, i64 %iv + %a.val = load i8, ptr %a.ptr, align 1 + %b.val = load i8, ptr %b.ptr, align 1 + %c.val = load i8, ptr %c.ptr, align 1 + %a.ext = zext i8 %a.val to i32 + %b.ext = zext i8 %b.val to i32 + %b.ext2 = zext i8 %b.val to i16 + %c.ext = zext i8 %c.val to i16 + %add = add nsw i32 %a.ext, %b.ext + %add2 = add nsw i16 %c.ext, %b.ext2 + %add2.ext = zext i16 %add2 to i32 + %add3 = add i32 %add, %add2.ext + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond = icmp eq i64 %iv.next, %wide.trip.count + br i1 %exitcond, label %exit, label %for.body + +exit: ; preds = %exit.loopexit, %entry + %res.0.lcssa = phi i32 [ %add3, %for.body ] + ret i32 %res.0.lcssa +} diff --git a/llvm/test/Transforms/MergeFunc/merge-ext.ll b/llvm/test/Transforms/MergeFunc/merge-ext.ll new file mode 100644 index 0000000000000..e4da2dc1282b7 --- /dev/null +++ b/llvm/test/Transforms/MergeFunc/merge-ext.ll @@ -0,0 +1,59 @@ +; RUN: opt -S -passes=mergefunc < %s | FileCheck %s + +@llvm.compiler.used = appending global [2 x ptr] [ptr @zext_a, ptr @sext_a], section "llvm.metadata" + +define internal i32 @sext_a(i16 %a) unnamed_addr { + %b = sext i16 %a to i32 + ret i32 %b +} + +define i32 @sext_b(i16 %a) unnamed_addr { + %b = sext i16 %a to i32 + ret i32 %b +} + +define i32 @sext_c(i32 %a) unnamed_addr { + %b = tail call i32 @sext_a(i32 %a) + ret i32 %b +} + +define internal i32 @zext_a(i16 %a) unnamed_addr { + %b = zext i16 %a to i32 + ret i32 %b +} + +define i32 @zext_b(i16 %a) unnamed_addr { + %b = zext i16 %a to i32 + ret i32 %b +} + +define i32 @zext_c(i32 %a) unnamed_addr { + %b = tail call i32 @zext_a(i32 %a) + ret i32 %b +} + +; CHECK-LABEL: @llvm.compiler.used = appending global [2 x ptr] [ptr @zext_a, ptr @sext_a], section "llvm.metadata" + +; CHECK-LABEL: define i32 @sext_b(i16 %a) unnamed_addr +; CHECK-NEXT: sext +; CHECK-NEXT: ret + +; CHECK-LABEL: define i32 @sext_c(i32 %a) unnamed_addr +; CHECK-NEXT: tail call i32 @sext_b(i32 %a) +; CHECK-NEXT: ret + +; CHECK-LABEL: define i32 @zext_b(i16 %a) unnamed_addr +; CHECK-NEXT: zext +; CHECK-NEXT: ret + +; CHECK-LABEL: define i32 @zext_c(i32 %a) unnamed_addr +; CHECK-NEXT: tail call i32 @zext_b(i32 %a) +; CHECK-NEXT: ret + +; CHECK-LABEL: define internal i32 @sext_a(i16 %0) unnamed_addr +; CHECK-NEXT: tail call i32 @sext_b(i16 %0) +; CHECK-NEXT: ret + +; CHECK-LABEL: define internal i32 @zext_a(i16 %0) unnamed_addr +; CHECK-NEXT: tail call i32 @zext_b(i16 %0) +; CHECK-NEXT: ret From c2de7f0200c5ecca92c5f26997d5c23fc03e06b4 Mon Sep 17 00:00:00 2001 From: Samuel Tebbs Date: Tue, 22 Jul 2025 16:50:37 +0100 Subject: [PATCH 3/6] Update comment --- llvm/lib/Transforms/Vectorize/LoopVectorize.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index 34cb28bf48a85..f23ecc2311cf5 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -2539,7 +2539,7 @@ struct CSEDenseMapInfo { } // end anonymous namespace -///Perform cse of induction variable instructions. +/// Perform cse of induction variable instructions, casts and selects. static void cse(BasicBlock *BB) { // Perform simple cse. SmallDenseMap CSEMap; From 95cbc8a865241a4993fc9a75096adadc74c87cf0 Mon Sep 17 00:00:00 2001 From: Sam Tebbs Date: Tue, 5 Aug 2025 15:30:51 +0100 Subject: [PATCH 4/6] Remove merge-ext.ll test --- llvm/test/Transforms/MergeFunc/merge-ext.ll | 59 --------------------- 1 file changed, 59 deletions(-) delete mode 100644 llvm/test/Transforms/MergeFunc/merge-ext.ll diff --git a/llvm/test/Transforms/MergeFunc/merge-ext.ll b/llvm/test/Transforms/MergeFunc/merge-ext.ll deleted file mode 100644 index e4da2dc1282b7..0000000000000 --- a/llvm/test/Transforms/MergeFunc/merge-ext.ll +++ /dev/null @@ -1,59 +0,0 @@ -; RUN: opt -S -passes=mergefunc < %s | FileCheck %s - -@llvm.compiler.used = appending global [2 x ptr] [ptr @zext_a, ptr @sext_a], section "llvm.metadata" - -define internal i32 @sext_a(i16 %a) unnamed_addr { - %b = sext i16 %a to i32 - ret i32 %b -} - -define i32 @sext_b(i16 %a) unnamed_addr { - %b = sext i16 %a to i32 - ret i32 %b -} - -define i32 @sext_c(i32 %a) unnamed_addr { - %b = tail call i32 @sext_a(i32 %a) - ret i32 %b -} - -define internal i32 @zext_a(i16 %a) unnamed_addr { - %b = zext i16 %a to i32 - ret i32 %b -} - -define i32 @zext_b(i16 %a) unnamed_addr { - %b = zext i16 %a to i32 - ret i32 %b -} - -define i32 @zext_c(i32 %a) unnamed_addr { - %b = tail call i32 @zext_a(i32 %a) - ret i32 %b -} - -; CHECK-LABEL: @llvm.compiler.used = appending global [2 x ptr] [ptr @zext_a, ptr @sext_a], section "llvm.metadata" - -; CHECK-LABEL: define i32 @sext_b(i16 %a) unnamed_addr -; CHECK-NEXT: sext -; CHECK-NEXT: ret - -; CHECK-LABEL: define i32 @sext_c(i32 %a) unnamed_addr -; CHECK-NEXT: tail call i32 @sext_b(i32 %a) -; CHECK-NEXT: ret - -; CHECK-LABEL: define i32 @zext_b(i16 %a) unnamed_addr -; CHECK-NEXT: zext -; CHECK-NEXT: ret - -; CHECK-LABEL: define i32 @zext_c(i32 %a) unnamed_addr -; CHECK-NEXT: tail call i32 @zext_b(i32 %a) -; CHECK-NEXT: ret - -; CHECK-LABEL: define internal i32 @sext_a(i16 %0) unnamed_addr -; CHECK-NEXT: tail call i32 @sext_b(i16 %0) -; CHECK-NEXT: ret - -; CHECK-LABEL: define internal i32 @zext_a(i16 %0) unnamed_addr -; CHECK-NEXT: tail call i32 @zext_b(i16 %0) -; CHECK-NEXT: ret From e51726baf2405eda9348261c1116fcea6be778a4 Mon Sep 17 00:00:00 2001 From: Sam Tebbs Date: Tue, 5 Aug 2025 16:05:12 +0100 Subject: [PATCH 5/6] Rebase --- .../RISCV/riscv-vector-reverse-output.ll | 653 ------------------ llvm/test/Transforms/LoopVectorize/cse.ll | 18 +- 2 files changed, 6 insertions(+), 665 deletions(-) delete mode 100644 llvm/test/Transforms/LoopVectorize/RISCV/riscv-vector-reverse-output.ll diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/riscv-vector-reverse-output.ll b/llvm/test/Transforms/LoopVectorize/RISCV/riscv-vector-reverse-output.ll deleted file mode 100644 index 1d7c80b3d722f..0000000000000 --- a/llvm/test/Transforms/LoopVectorize/RISCV/riscv-vector-reverse-output.ll +++ /dev/null @@ -1,653 +0,0 @@ -; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals none --version 5 -;; This is the loop in c++ being vectorize in this file with -;; vector.reverse -;; #pragma clang loop vectorize_width(4, scalable) -;; for (int i = N-1; i >= 0; --i) -;; a[i] = b[i] + 1.0; - -; RUN: opt -passes=loop-vectorize -mtriple=riscv64 -mattr=+v \ -; RUN: -riscv-v-vector-bits-min=128 -S < %s \ -; RUN: | FileCheck --check-prefix=RV64 %s - -; RUN: opt -passes=loop-vectorize -mtriple=riscv32 -mattr=+v \ -; RUN: -riscv-v-vector-bits-min=128 -S < %s \ -; RUN: | FileCheck --check-prefix=RV32 %s - -; RUN: opt -passes=loop-vectorize -mtriple=riscv64 -mattr=+v \ -; RUN: -riscv-v-vector-bits-min=128 -force-vector-interleave=2 -S < %s \ -; RUN: | FileCheck --check-prefix=RV64-UF2 %s - -define void @vector_reverse_i32(ptr noalias %A, ptr noalias %B) { -; RV64-LABEL: define void @vector_reverse_i32( -; RV64-SAME: ptr noalias [[A:%.*]], ptr noalias [[B:%.*]]) #[[ATTR0:[0-9]+]] { -; RV64-NEXT: [[ENTRY:.*:]] -; RV64-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] -; RV64: [[VECTOR_PH]]: -; RV64-NEXT: br label %[[VECTOR_BODY:.*]] -; RV64: [[VECTOR_BODY]]: -; RV64-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], %[[VECTOR_BODY]] ] -; RV64-NEXT: [[AVL:%.*]] = phi i64 [ 1023, %[[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], %[[VECTOR_BODY]] ] -; RV64-NEXT: [[TMP0:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 4, i1 true) -; RV64-NEXT: [[OFFSET_IDX:%.*]] = sub i64 1023, [[INDEX]] -; RV64-NEXT: [[TMP8:%.*]] = add nsw i64 [[OFFSET_IDX]], -1 -; RV64-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[TMP8]] -; RV64-NEXT: [[TMP5:%.*]] = zext i32 [[TMP0]] to i64 -; RV64-NEXT: [[TMP10:%.*]] = mul i64 0, [[TMP5]] -; RV64-NEXT: [[TMP22:%.*]] = sub i64 [[TMP5]], 1 -; RV64-NEXT: [[TMP11:%.*]] = mul i64 -1, [[TMP22]] -; RV64-NEXT: [[TMP7:%.*]] = getelementptr i32, ptr [[TMP9]], i64 [[TMP10]] -; RV64-NEXT: [[TMP12:%.*]] = getelementptr i32, ptr [[TMP7]], i64 [[TMP11]] -; RV64-NEXT: [[VP_OP_LOAD:%.*]] = call @llvm.vp.load.nxv4i32.p0(ptr align 4 [[TMP12]], splat (i1 true), i32 [[TMP0]]) -; RV64-NEXT: [[REVERSE:%.*]] = call @llvm.experimental.vp.reverse.nxv4i32( [[VP_OP_LOAD]], splat (i1 true), i32 [[TMP0]]) -; RV64-NEXT: [[TMP14:%.*]] = add [[REVERSE]], splat (i32 1) -; RV64-NEXT: [[TMP15:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP8]] -; RV64-NEXT: [[TMP16:%.*]] = mul i64 0, [[TMP5]] -; RV64-NEXT: [[TMP23:%.*]] = sub i64 [[TMP5]], 1 -; RV64-NEXT: [[TMP17:%.*]] = mul i64 -1, [[TMP23]] -; RV64-NEXT: [[TMP18:%.*]] = getelementptr i32, ptr [[TMP15]], i64 [[TMP16]] -; RV64-NEXT: [[TMP19:%.*]] = getelementptr i32, ptr [[TMP18]], i64 [[TMP17]] -; RV64-NEXT: [[VP_REVERSE1:%.*]] = call @llvm.experimental.vp.reverse.nxv4i32( [[TMP14]], splat (i1 true), i32 [[TMP0]]) -; RV64-NEXT: call void @llvm.vp.store.nxv4i32.p0( [[VP_REVERSE1]], ptr align 4 [[TMP19]], splat (i1 true), i32 [[TMP0]]) -; RV64-NEXT: [[INDEX_EVL_NEXT]] = add nuw i64 [[TMP5]], [[INDEX]] -; RV64-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP5]] -; RV64-NEXT: [[TMP20:%.*]] = icmp eq i64 [[INDEX_EVL_NEXT]], 1023 -; RV64-NEXT: br i1 [[TMP20]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] -; RV64: [[MIDDLE_BLOCK]]: -; RV64-NEXT: br label %[[EXIT:.*]] -; RV64: [[SCALAR_PH]]: -; RV64-NEXT: br label %[[FOR_BODY:.*]] -; RV64: [[FOR_BODY]]: -; RV64-NEXT: [[DEC_IV:%.*]] = phi i64 [ 1023, %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[FOR_BODY]] ] -; RV64-NEXT: [[IV_NEXT]] = add nsw i64 [[DEC_IV]], -1 -; RV64-NEXT: [[ARRAYIDX_B:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[IV_NEXT]] -; RV64-NEXT: [[TMP21:%.*]] = load i32, ptr [[ARRAYIDX_B]], align 4 -; RV64-NEXT: [[ADD:%.*]] = add i32 [[TMP21]], 1 -; RV64-NEXT: [[ARRAYIDX_A:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[IV_NEXT]] -; RV64-NEXT: store i32 [[ADD]], ptr [[ARRAYIDX_A]], align 4 -; RV64-NEXT: [[CMP:%.*]] = icmp ugt i64 [[DEC_IV]], 1 -; RV64-NEXT: br i1 [[CMP]], label %[[FOR_BODY]], label %[[EXIT]], !llvm.loop [[LOOP4:![0-9]+]] -; RV64: [[EXIT]]: -; RV64-NEXT: ret void -; -; RV32-LABEL: define void @vector_reverse_i32( -; RV32-SAME: ptr noalias [[A:%.*]], ptr noalias [[B:%.*]]) #[[ATTR0:[0-9]+]] { -; RV32-NEXT: [[ENTRY:.*:]] -; RV32-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] -; RV32: [[VECTOR_PH]]: -; RV32-NEXT: br label %[[VECTOR_BODY:.*]] -; RV32: [[VECTOR_BODY]]: -; RV32-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], %[[VECTOR_BODY]] ] -; RV32-NEXT: [[AVL:%.*]] = phi i64 [ 1023, %[[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], %[[VECTOR_BODY]] ] -; RV32-NEXT: [[TMP10:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 4, i1 true) -; RV32-NEXT: [[OFFSET_IDX:%.*]] = sub i64 1023, [[INDEX]] -; RV32-NEXT: [[TMP8:%.*]] = add nsw i64 [[OFFSET_IDX]], -1 -; RV32-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[TMP8]] -; RV32-NEXT: [[TMP11:%.*]] = mul i32 0, [[TMP10]] -; RV32-NEXT: [[TMP24:%.*]] = sub i32 [[TMP10]], 1 -; RV32-NEXT: [[TMP12:%.*]] = mul i32 -1, [[TMP24]] -; RV32-NEXT: [[TMP6:%.*]] = getelementptr i32, ptr [[TMP9]], i32 [[TMP11]] -; RV32-NEXT: [[TMP7:%.*]] = getelementptr i32, ptr [[TMP6]], i32 [[TMP12]] -; RV32-NEXT: [[VP_OP_LOAD:%.*]] = call @llvm.vp.load.nxv4i32.p0(ptr align 4 [[TMP7]], splat (i1 true), i32 [[TMP10]]) -; RV32-NEXT: [[REVERSE:%.*]] = call @llvm.experimental.vp.reverse.nxv4i32( [[VP_OP_LOAD]], splat (i1 true), i32 [[TMP10]]) -; RV32-NEXT: [[TMP15:%.*]] = add [[REVERSE]], splat (i32 1) -; RV32-NEXT: [[TMP16:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP8]] -; RV32-NEXT: [[TMP18:%.*]] = mul i32 0, [[TMP10]] -; RV32-NEXT: [[TMP25:%.*]] = sub i32 [[TMP10]], 1 -; RV32-NEXT: [[TMP19:%.*]] = mul i32 -1, [[TMP25]] -; RV32-NEXT: [[TMP13:%.*]] = getelementptr i32, ptr [[TMP16]], i32 [[TMP18]] -; RV32-NEXT: [[TMP14:%.*]] = getelementptr i32, ptr [[TMP13]], i32 [[TMP19]] -; RV32-NEXT: [[VP_REVERSE1:%.*]] = call @llvm.experimental.vp.reverse.nxv4i32( [[TMP15]], splat (i1 true), i32 [[TMP10]]) -; RV32-NEXT: call void @llvm.vp.store.nxv4i32.p0( [[VP_REVERSE1]], ptr align 4 [[TMP14]], splat (i1 true), i32 [[TMP10]]) -; RV32-NEXT: [[TMP17:%.*]] = zext i32 [[TMP10]] to i64 -; RV32-NEXT: [[INDEX_EVL_NEXT]] = add nuw i64 [[TMP17]], [[INDEX]] -; RV32-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP17]] -; RV32-NEXT: [[TMP20:%.*]] = icmp eq i64 [[INDEX_EVL_NEXT]], 1023 -; RV32-NEXT: br i1 [[TMP20]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] -; RV32: [[MIDDLE_BLOCK]]: -; RV32-NEXT: br label %[[EXIT:.*]] -; RV32: [[SCALAR_PH]]: -; RV32-NEXT: br label %[[FOR_BODY:.*]] -; RV32: [[FOR_BODY]]: -; RV32-NEXT: [[DEC_IV:%.*]] = phi i64 [ 1023, %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[FOR_BODY]] ] -; RV32-NEXT: [[IV_NEXT]] = add nsw i64 [[DEC_IV]], -1 -; RV32-NEXT: [[ARRAYIDX_B:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[IV_NEXT]] -; RV32-NEXT: [[TMP23:%.*]] = load i32, ptr [[ARRAYIDX_B]], align 4 -; RV32-NEXT: [[ADD:%.*]] = add i32 [[TMP23]], 1 -; RV32-NEXT: [[ARRAYIDX_A:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[IV_NEXT]] -; RV32-NEXT: store i32 [[ADD]], ptr [[ARRAYIDX_A]], align 4 -; RV32-NEXT: [[CMP:%.*]] = icmp ugt i64 [[DEC_IV]], 1 -; RV32-NEXT: br i1 [[CMP]], label %[[FOR_BODY]], label %[[EXIT]], !llvm.loop [[LOOP4:![0-9]+]] -; RV32: [[EXIT]]: -; RV32-NEXT: ret void -; -; RV64-UF2-LABEL: define void @vector_reverse_i32( -; RV64-UF2-SAME: ptr noalias [[A:%.*]], ptr noalias [[B:%.*]]) #[[ATTR0:[0-9]+]] { -; RV64-UF2-NEXT: [[ENTRY:.*]]: -; RV64-UF2-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; RV64-UF2-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 3 -; RV64-UF2-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 1023, [[TMP1]] -; RV64-UF2-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] -; RV64-UF2: [[VECTOR_PH]]: -; RV64-UF2-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() -; RV64-UF2-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP4]], 4 -; RV64-UF2-NEXT: [[TMP6:%.*]] = mul i64 [[TMP5]], 2 -; RV64-UF2-NEXT: [[N_VEC:%.*]] = urem i64 1023, [[TMP6]] -; RV64-UF2-NEXT: [[TMP7:%.*]] = sub i64 1023, [[N_VEC]] -; RV64-UF2-NEXT: [[TMP8:%.*]] = sub i64 1023, [[TMP7]] -; RV64-UF2-NEXT: br label %[[VECTOR_BODY:.*]] -; RV64-UF2: [[VECTOR_BODY]]: -; RV64-UF2-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] -; RV64-UF2-NEXT: [[OFFSET_IDX:%.*]] = sub i64 1023, [[INDEX]] -; RV64-UF2-NEXT: [[TMP9:%.*]] = add nsw i64 [[OFFSET_IDX]], -1 -; RV64-UF2-NEXT: [[TMP10:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[TMP9]] -; RV64-UF2-NEXT: [[TMP11:%.*]] = mul i64 0, [[TMP5]] -; RV64-UF2-NEXT: [[TMP32:%.*]] = sub i64 [[TMP5]], 1 -; RV64-UF2-NEXT: [[TMP12:%.*]] = mul i64 -1, [[TMP32]] -; RV64-UF2-NEXT: [[TMP13:%.*]] = getelementptr inbounds i32, ptr [[TMP10]], i64 [[TMP11]] -; RV64-UF2-NEXT: [[TMP14:%.*]] = getelementptr inbounds i32, ptr [[TMP13]], i64 [[TMP12]] -; RV64-UF2-NEXT: [[TMP15:%.*]] = mul i64 -1, [[TMP5]] -; RV64-UF2-NEXT: [[TMP33:%.*]] = sub i64 [[TMP5]], 1 -; RV64-UF2-NEXT: [[TMP16:%.*]] = mul i64 -1, [[TMP33]] -; RV64-UF2-NEXT: [[TMP17:%.*]] = getelementptr inbounds i32, ptr [[TMP10]], i64 [[TMP15]] -; RV64-UF2-NEXT: [[TMP18:%.*]] = getelementptr inbounds i32, ptr [[TMP17]], i64 [[TMP16]] -; RV64-UF2-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP14]], align 4 -; RV64-UF2-NEXT: [[REVERSE:%.*]] = call @llvm.vector.reverse.nxv4i32( [[WIDE_LOAD]]) -; RV64-UF2-NEXT: [[WIDE_LOAD1:%.*]] = load , ptr [[TMP18]], align 4 -; RV64-UF2-NEXT: [[REVERSE2:%.*]] = call @llvm.vector.reverse.nxv4i32( [[WIDE_LOAD1]]) -; RV64-UF2-NEXT: [[TMP19:%.*]] = add [[REVERSE]], splat (i32 1) -; RV64-UF2-NEXT: [[TMP20:%.*]] = add [[REVERSE2]], splat (i32 1) -; RV64-UF2-NEXT: [[TMP21:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP9]] -; RV64-UF2-NEXT: [[TMP22:%.*]] = mul i64 0, [[TMP5]] -; RV64-UF2-NEXT: [[TMP34:%.*]] = sub i64 [[TMP5]], 1 -; RV64-UF2-NEXT: [[TMP23:%.*]] = mul i64 -1, [[TMP34]] -; RV64-UF2-NEXT: [[TMP24:%.*]] = getelementptr inbounds i32, ptr [[TMP21]], i64 [[TMP22]] -; RV64-UF2-NEXT: [[TMP25:%.*]] = getelementptr inbounds i32, ptr [[TMP24]], i64 [[TMP23]] -; RV64-UF2-NEXT: [[TMP26:%.*]] = mul i64 -1, [[TMP5]] -; RV64-UF2-NEXT: [[TMP35:%.*]] = sub i64 [[TMP5]], 1 -; RV64-UF2-NEXT: [[TMP27:%.*]] = mul i64 -1, [[TMP35]] -; RV64-UF2-NEXT: [[TMP28:%.*]] = getelementptr inbounds i32, ptr [[TMP21]], i64 [[TMP26]] -; RV64-UF2-NEXT: [[TMP29:%.*]] = getelementptr inbounds i32, ptr [[TMP28]], i64 [[TMP27]] -; RV64-UF2-NEXT: [[REVERSE3:%.*]] = call @llvm.vector.reverse.nxv4i32( [[TMP19]]) -; RV64-UF2-NEXT: store [[REVERSE3]], ptr [[TMP25]], align 4 -; RV64-UF2-NEXT: [[REVERSE4:%.*]] = call @llvm.vector.reverse.nxv4i32( [[TMP20]]) -; RV64-UF2-NEXT: store [[REVERSE4]], ptr [[TMP29]], align 4 -; RV64-UF2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP6]] -; RV64-UF2-NEXT: [[TMP36:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[TMP7]] -; RV64-UF2-NEXT: br i1 [[TMP36]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] -; RV64-UF2: [[MIDDLE_BLOCK]]: -; RV64-UF2-NEXT: [[CMP_N:%.*]] = icmp eq i64 1023, [[TMP7]] -; RV64-UF2-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]] -; RV64-UF2: [[SCALAR_PH]]: -; RV64-UF2-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[TMP8]], %[[MIDDLE_BLOCK]] ], [ 1023, %[[ENTRY]] ] -; RV64-UF2-NEXT: br label %[[FOR_BODY:.*]] -; RV64-UF2: [[FOR_BODY]]: -; RV64-UF2-NEXT: [[DEC_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[FOR_BODY]] ] -; RV64-UF2-NEXT: [[IV_NEXT]] = add nsw i64 [[DEC_IV]], -1 -; RV64-UF2-NEXT: [[ARRAYIDX_B:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[IV_NEXT]] -; RV64-UF2-NEXT: [[TMP31:%.*]] = load i32, ptr [[ARRAYIDX_B]], align 4 -; RV64-UF2-NEXT: [[ADD:%.*]] = add i32 [[TMP31]], 1 -; RV64-UF2-NEXT: [[ARRAYIDX_A:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[IV_NEXT]] -; RV64-UF2-NEXT: store i32 [[ADD]], ptr [[ARRAYIDX_A]], align 4 -; RV64-UF2-NEXT: [[CMP:%.*]] = icmp ugt i64 [[DEC_IV]], 1 -; RV64-UF2-NEXT: br i1 [[CMP]], label %[[FOR_BODY]], label %[[EXIT]], !llvm.loop [[LOOP3:![0-9]+]] -; RV64-UF2: [[EXIT]]: -; RV64-UF2-NEXT: ret void -; -entry: - br label %for.body - -for.body: - %dec.iv = phi i64 [ 1023, %entry ], [ %iv.next, %for.body ] - %iv.next = add nsw i64 %dec.iv, -1 - %arrayidx.b = getelementptr inbounds i32, ptr %B, i64 %iv.next - %0 = load i32, ptr %arrayidx.b, align 4 - %add = add i32 %0, 1 - %arrayidx.a = getelementptr inbounds i32, ptr %A, i64 %iv.next - store i32 %add, ptr %arrayidx.a, align 4 - %cmp = icmp ugt i64 %dec.iv, 1 - br i1 %cmp, label %for.body, label %exit, !llvm.loop !0 - -exit: - ret void -} - -define void @vector_reverse_f32(ptr noalias %A, ptr noalias %B) { -; RV64-LABEL: define void @vector_reverse_f32( -; RV64-SAME: ptr noalias [[A:%.*]], ptr noalias [[B:%.*]]) #[[ATTR0]] { -; RV64-NEXT: [[ENTRY:.*:]] -; RV64-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] -; RV64: [[VECTOR_PH]]: -; RV64-NEXT: br label %[[VECTOR_BODY:.*]] -; RV64: [[VECTOR_BODY]]: -; RV64-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], %[[VECTOR_BODY]] ] -; RV64-NEXT: [[AVL:%.*]] = phi i64 [ 1023, %[[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], %[[VECTOR_BODY]] ] -; RV64-NEXT: [[TMP0:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 4, i1 true) -; RV64-NEXT: [[OFFSET_IDX:%.*]] = sub i64 1023, [[INDEX]] -; RV64-NEXT: [[TMP8:%.*]] = add nsw i64 [[OFFSET_IDX]], -1 -; RV64-NEXT: [[TMP9:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[TMP8]] -; RV64-NEXT: [[TMP5:%.*]] = zext i32 [[TMP0]] to i64 -; RV64-NEXT: [[TMP10:%.*]] = mul i64 0, [[TMP5]] -; RV64-NEXT: [[TMP22:%.*]] = sub i64 [[TMP5]], 1 -; RV64-NEXT: [[TMP11:%.*]] = mul i64 -1, [[TMP22]] -; RV64-NEXT: [[TMP7:%.*]] = getelementptr float, ptr [[TMP9]], i64 [[TMP10]] -; RV64-NEXT: [[TMP12:%.*]] = getelementptr float, ptr [[TMP7]], i64 [[TMP11]] -; RV64-NEXT: [[VP_OP_LOAD:%.*]] = call @llvm.vp.load.nxv4f32.p0(ptr align 4 [[TMP12]], splat (i1 true), i32 [[TMP0]]) -; RV64-NEXT: [[REVERSE:%.*]] = call @llvm.experimental.vp.reverse.nxv4f32( [[VP_OP_LOAD]], splat (i1 true), i32 [[TMP0]]) -; RV64-NEXT: [[TMP14:%.*]] = fadd [[REVERSE]], splat (float 1.000000e+00) -; RV64-NEXT: [[TMP15:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP8]] -; RV64-NEXT: [[TMP16:%.*]] = mul i64 0, [[TMP5]] -; RV64-NEXT: [[TMP23:%.*]] = sub i64 [[TMP5]], 1 -; RV64-NEXT: [[TMP17:%.*]] = mul i64 -1, [[TMP23]] -; RV64-NEXT: [[TMP18:%.*]] = getelementptr float, ptr [[TMP15]], i64 [[TMP16]] -; RV64-NEXT: [[TMP19:%.*]] = getelementptr float, ptr [[TMP18]], i64 [[TMP17]] -; RV64-NEXT: [[VP_REVERSE1:%.*]] = call @llvm.experimental.vp.reverse.nxv4f32( [[TMP14]], splat (i1 true), i32 [[TMP0]]) -; RV64-NEXT: call void @llvm.vp.store.nxv4f32.p0( [[VP_REVERSE1]], ptr align 4 [[TMP19]], splat (i1 true), i32 [[TMP0]]) -; RV64-NEXT: [[INDEX_EVL_NEXT]] = add nuw i64 [[TMP5]], [[INDEX]] -; RV64-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP5]] -; RV64-NEXT: [[TMP20:%.*]] = icmp eq i64 [[INDEX_EVL_NEXT]], 1023 -; RV64-NEXT: br i1 [[TMP20]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] -; RV64: [[MIDDLE_BLOCK]]: -; RV64-NEXT: br label %[[EXIT:.*]] -; RV64: [[SCALAR_PH]]: -; RV64-NEXT: br label %[[FOR_BODY:.*]] -; RV64: [[FOR_BODY]]: -; RV64-NEXT: [[DEC_IV:%.*]] = phi i64 [ 1023, %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[FOR_BODY]] ] -; RV64-NEXT: [[IV_NEXT]] = add nsw i64 [[DEC_IV]], -1 -; RV64-NEXT: [[ARRAYIDX_B:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[IV_NEXT]] -; RV64-NEXT: [[TMP21:%.*]] = load float, ptr [[ARRAYIDX_B]], align 4 -; RV64-NEXT: [[FADD:%.*]] = fadd float [[TMP21]], 1.000000e+00 -; RV64-NEXT: [[ARRAYIDX_A:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[IV_NEXT]] -; RV64-NEXT: store float [[FADD]], ptr [[ARRAYIDX_A]], align 4 -; RV64-NEXT: [[CMP:%.*]] = icmp ugt i64 [[DEC_IV]], 1 -; RV64-NEXT: br i1 [[CMP]], label %[[FOR_BODY]], label %[[EXIT]], !llvm.loop [[LOOP6:![0-9]+]] -; RV64: [[EXIT]]: -; RV64-NEXT: ret void -; -; RV32-LABEL: define void @vector_reverse_f32( -; RV32-SAME: ptr noalias [[A:%.*]], ptr noalias [[B:%.*]]) #[[ATTR0]] { -; RV32-NEXT: [[ENTRY:.*:]] -; RV32-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] -; RV32: [[VECTOR_PH]]: -; RV32-NEXT: br label %[[VECTOR_BODY:.*]] -; RV32: [[VECTOR_BODY]]: -; RV32-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], %[[VECTOR_BODY]] ] -; RV32-NEXT: [[AVL:%.*]] = phi i64 [ 1023, %[[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], %[[VECTOR_BODY]] ] -; RV32-NEXT: [[TMP10:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 4, i1 true) -; RV32-NEXT: [[OFFSET_IDX:%.*]] = sub i64 1023, [[INDEX]] -; RV32-NEXT: [[TMP8:%.*]] = add nsw i64 [[OFFSET_IDX]], -1 -; RV32-NEXT: [[TMP9:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[TMP8]] -; RV32-NEXT: [[TMP11:%.*]] = mul i32 0, [[TMP10]] -; RV32-NEXT: [[TMP24:%.*]] = sub i32 [[TMP10]], 1 -; RV32-NEXT: [[TMP12:%.*]] = mul i32 -1, [[TMP24]] -; RV32-NEXT: [[TMP6:%.*]] = getelementptr float, ptr [[TMP9]], i32 [[TMP11]] -; RV32-NEXT: [[TMP7:%.*]] = getelementptr float, ptr [[TMP6]], i32 [[TMP12]] -; RV32-NEXT: [[VP_OP_LOAD:%.*]] = call @llvm.vp.load.nxv4f32.p0(ptr align 4 [[TMP7]], splat (i1 true), i32 [[TMP10]]) -; RV32-NEXT: [[REVERSE:%.*]] = call @llvm.experimental.vp.reverse.nxv4f32( [[VP_OP_LOAD]], splat (i1 true), i32 [[TMP10]]) -; RV32-NEXT: [[TMP15:%.*]] = fadd [[REVERSE]], splat (float 1.000000e+00) -; RV32-NEXT: [[TMP16:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP8]] -; RV32-NEXT: [[TMP18:%.*]] = mul i32 0, [[TMP10]] -; RV32-NEXT: [[TMP25:%.*]] = sub i32 [[TMP10]], 1 -; RV32-NEXT: [[TMP19:%.*]] = mul i32 -1, [[TMP25]] -; RV32-NEXT: [[TMP13:%.*]] = getelementptr float, ptr [[TMP16]], i32 [[TMP18]] -; RV32-NEXT: [[TMP14:%.*]] = getelementptr float, ptr [[TMP13]], i32 [[TMP19]] -; RV32-NEXT: [[VP_REVERSE1:%.*]] = call @llvm.experimental.vp.reverse.nxv4f32( [[TMP15]], splat (i1 true), i32 [[TMP10]]) -; RV32-NEXT: call void @llvm.vp.store.nxv4f32.p0( [[VP_REVERSE1]], ptr align 4 [[TMP14]], splat (i1 true), i32 [[TMP10]]) -; RV32-NEXT: [[TMP17:%.*]] = zext i32 [[TMP10]] to i64 -; RV32-NEXT: [[INDEX_EVL_NEXT]] = add nuw i64 [[TMP17]], [[INDEX]] -; RV32-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP17]] -; RV32-NEXT: [[TMP20:%.*]] = icmp eq i64 [[INDEX_EVL_NEXT]], 1023 -; RV32-NEXT: br i1 [[TMP20]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] -; RV32: [[MIDDLE_BLOCK]]: -; RV32-NEXT: br label %[[EXIT:.*]] -; RV32: [[SCALAR_PH]]: -; RV32-NEXT: br label %[[FOR_BODY:.*]] -; RV32: [[FOR_BODY]]: -; RV32-NEXT: [[DEC_IV:%.*]] = phi i64 [ 1023, %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[FOR_BODY]] ] -; RV32-NEXT: [[IV_NEXT]] = add nsw i64 [[DEC_IV]], -1 -; RV32-NEXT: [[ARRAYIDX_B:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[IV_NEXT]] -; RV32-NEXT: [[TMP23:%.*]] = load float, ptr [[ARRAYIDX_B]], align 4 -; RV32-NEXT: [[FADD:%.*]] = fadd float [[TMP23]], 1.000000e+00 -; RV32-NEXT: [[ARRAYIDX_A:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[IV_NEXT]] -; RV32-NEXT: store float [[FADD]], ptr [[ARRAYIDX_A]], align 4 -; RV32-NEXT: [[CMP:%.*]] = icmp ugt i64 [[DEC_IV]], 1 -; RV32-NEXT: br i1 [[CMP]], label %[[FOR_BODY]], label %[[EXIT]], !llvm.loop [[LOOP6:![0-9]+]] -; RV32: [[EXIT]]: -; RV32-NEXT: ret void -; -; RV64-UF2-LABEL: define void @vector_reverse_f32( -; RV64-UF2-SAME: ptr noalias [[A:%.*]], ptr noalias [[B:%.*]]) #[[ATTR0]] { -; RV64-UF2-NEXT: [[ENTRY:.*]]: -; RV64-UF2-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; RV64-UF2-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 3 -; RV64-UF2-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 1023, [[TMP1]] -; RV64-UF2-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] -; RV64-UF2: [[VECTOR_PH]]: -; RV64-UF2-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() -; RV64-UF2-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP4]], 4 -; RV64-UF2-NEXT: [[TMP6:%.*]] = mul i64 [[TMP5]], 2 -; RV64-UF2-NEXT: [[N_VEC:%.*]] = urem i64 1023, [[TMP6]] -; RV64-UF2-NEXT: [[TMP7:%.*]] = sub i64 1023, [[N_VEC]] -; RV64-UF2-NEXT: [[TMP8:%.*]] = sub i64 1023, [[TMP7]] -; RV64-UF2-NEXT: br label %[[VECTOR_BODY:.*]] -; RV64-UF2: [[VECTOR_BODY]]: -; RV64-UF2-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] -; RV64-UF2-NEXT: [[OFFSET_IDX:%.*]] = sub i64 1023, [[INDEX]] -; RV64-UF2-NEXT: [[TMP9:%.*]] = add nsw i64 [[OFFSET_IDX]], -1 -; RV64-UF2-NEXT: [[TMP10:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[TMP9]] -; RV64-UF2-NEXT: [[TMP11:%.*]] = mul i64 0, [[TMP5]] -; RV64-UF2-NEXT: [[TMP32:%.*]] = sub i64 [[TMP5]], 1 -; RV64-UF2-NEXT: [[TMP12:%.*]] = mul i64 -1, [[TMP32]] -; RV64-UF2-NEXT: [[TMP13:%.*]] = getelementptr inbounds float, ptr [[TMP10]], i64 [[TMP11]] -; RV64-UF2-NEXT: [[TMP14:%.*]] = getelementptr inbounds float, ptr [[TMP13]], i64 [[TMP12]] -; RV64-UF2-NEXT: [[TMP15:%.*]] = mul i64 -1, [[TMP5]] -; RV64-UF2-NEXT: [[TMP33:%.*]] = sub i64 [[TMP5]], 1 -; RV64-UF2-NEXT: [[TMP16:%.*]] = mul i64 -1, [[TMP33]] -; RV64-UF2-NEXT: [[TMP17:%.*]] = getelementptr inbounds float, ptr [[TMP10]], i64 [[TMP15]] -; RV64-UF2-NEXT: [[TMP18:%.*]] = getelementptr inbounds float, ptr [[TMP17]], i64 [[TMP16]] -; RV64-UF2-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP14]], align 4 -; RV64-UF2-NEXT: [[REVERSE:%.*]] = call @llvm.vector.reverse.nxv4f32( [[WIDE_LOAD]]) -; RV64-UF2-NEXT: [[WIDE_LOAD1:%.*]] = load , ptr [[TMP18]], align 4 -; RV64-UF2-NEXT: [[REVERSE2:%.*]] = call @llvm.vector.reverse.nxv4f32( [[WIDE_LOAD1]]) -; RV64-UF2-NEXT: [[TMP19:%.*]] = fadd [[REVERSE]], splat (float 1.000000e+00) -; RV64-UF2-NEXT: [[TMP20:%.*]] = fadd [[REVERSE2]], splat (float 1.000000e+00) -; RV64-UF2-NEXT: [[TMP21:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP9]] -; RV64-UF2-NEXT: [[TMP22:%.*]] = mul i64 0, [[TMP5]] -; RV64-UF2-NEXT: [[TMP34:%.*]] = sub i64 [[TMP5]], 1 -; RV64-UF2-NEXT: [[TMP23:%.*]] = mul i64 -1, [[TMP34]] -; RV64-UF2-NEXT: [[TMP24:%.*]] = getelementptr inbounds float, ptr [[TMP21]], i64 [[TMP22]] -; RV64-UF2-NEXT: [[TMP25:%.*]] = getelementptr inbounds float, ptr [[TMP24]], i64 [[TMP23]] -; RV64-UF2-NEXT: [[TMP26:%.*]] = mul i64 -1, [[TMP5]] -; RV64-UF2-NEXT: [[TMP35:%.*]] = sub i64 [[TMP5]], 1 -; RV64-UF2-NEXT: [[TMP27:%.*]] = mul i64 -1, [[TMP35]] -; RV64-UF2-NEXT: [[TMP28:%.*]] = getelementptr inbounds float, ptr [[TMP21]], i64 [[TMP26]] -; RV64-UF2-NEXT: [[TMP29:%.*]] = getelementptr inbounds float, ptr [[TMP28]], i64 [[TMP27]] -; RV64-UF2-NEXT: [[REVERSE3:%.*]] = call @llvm.vector.reverse.nxv4f32( [[TMP19]]) -; RV64-UF2-NEXT: store [[REVERSE3]], ptr [[TMP25]], align 4 -; RV64-UF2-NEXT: [[REVERSE4:%.*]] = call @llvm.vector.reverse.nxv4f32( [[TMP20]]) -; RV64-UF2-NEXT: store [[REVERSE4]], ptr [[TMP29]], align 4 -; RV64-UF2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP6]] -; RV64-UF2-NEXT: [[TMP36:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[TMP7]] -; RV64-UF2-NEXT: br i1 [[TMP36]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] -; RV64-UF2: [[MIDDLE_BLOCK]]: -; RV64-UF2-NEXT: [[CMP_N:%.*]] = icmp eq i64 1023, [[TMP7]] -; RV64-UF2-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]] -; RV64-UF2: [[SCALAR_PH]]: -; RV64-UF2-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[TMP8]], %[[MIDDLE_BLOCK]] ], [ 1023, %[[ENTRY]] ] -; RV64-UF2-NEXT: br label %[[FOR_BODY:.*]] -; RV64-UF2: [[FOR_BODY]]: -; RV64-UF2-NEXT: [[DEC_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[FOR_BODY]] ] -; RV64-UF2-NEXT: [[IV_NEXT]] = add nsw i64 [[DEC_IV]], -1 -; RV64-UF2-NEXT: [[ARRAYIDX_B:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[IV_NEXT]] -; RV64-UF2-NEXT: [[TMP31:%.*]] = load float, ptr [[ARRAYIDX_B]], align 4 -; RV64-UF2-NEXT: [[FADD:%.*]] = fadd float [[TMP31]], 1.000000e+00 -; RV64-UF2-NEXT: [[ARRAYIDX_A:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[IV_NEXT]] -; RV64-UF2-NEXT: store float [[FADD]], ptr [[ARRAYIDX_A]], align 4 -; RV64-UF2-NEXT: [[CMP:%.*]] = icmp ugt i64 [[DEC_IV]], 1 -; RV64-UF2-NEXT: br i1 [[CMP]], label %[[FOR_BODY]], label %[[EXIT]], !llvm.loop [[LOOP5:![0-9]+]] -; RV64-UF2: [[EXIT]]: -; RV64-UF2-NEXT: ret void -; -entry: - br label %for.body - -for.body: - %dec.iv = phi i64 [ 1023, %entry ], [ %iv.next, %for.body ] - %iv.next = add nsw i64 %dec.iv, -1 - %arrayidx.b = getelementptr inbounds float, ptr %B, i64 %iv.next - %0 = load float, ptr %arrayidx.b, align 4 - %fadd = fadd float %0, 1.000000e+00 - %arrayidx.a = getelementptr inbounds float, ptr %A, i64 %iv.next - store float %fadd, ptr %arrayidx.a, align 4 - %cmp = icmp ugt i64 %dec.iv, 1 - br i1 %cmp, label %for.body, label %exit, !llvm.loop !0 - -exit: - ret void -} - -define void @vector_reverse_irregular_type(ptr noalias %A, ptr noalias %B) { -; RV64-LABEL: define void @vector_reverse_irregular_type( -; RV64-SAME: ptr noalias [[A:%.*]], ptr noalias [[B:%.*]]) #[[ATTR0]] { -; RV64-NEXT: [[ENTRY:.*]]: -; RV64-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] -; RV64: [[VECTOR_PH]]: -; RV64-NEXT: br label %[[VECTOR_BODY:.*]] -; RV64: [[VECTOR_BODY]]: -; RV64-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] -; RV64-NEXT: [[OFFSET_IDX:%.*]] = sub i64 1023, [[INDEX]] -; RV64-NEXT: [[DEC_IV:%.*]] = add i64 [[OFFSET_IDX]], 0 -; RV64-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], -1 -; RV64-NEXT: [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], -2 -; RV64-NEXT: [[TMP3:%.*]] = add i64 [[OFFSET_IDX]], -3 -; RV64-NEXT: [[IV_NEXT:%.*]] = add nsw i64 [[DEC_IV]], -1 -; RV64-NEXT: [[TMP5:%.*]] = add nsw i64 [[TMP1]], -1 -; RV64-NEXT: [[TMP6:%.*]] = add nsw i64 [[TMP2]], -1 -; RV64-NEXT: [[TMP7:%.*]] = add nsw i64 [[TMP3]], -1 -; RV64-NEXT: [[ARRAYIDX_B:%.*]] = getelementptr inbounds i7, ptr [[B]], i64 [[IV_NEXT]] -; RV64-NEXT: [[TMP9:%.*]] = getelementptr inbounds i7, ptr [[B]], i64 [[TMP5]] -; RV64-NEXT: [[TMP10:%.*]] = getelementptr inbounds i7, ptr [[B]], i64 [[TMP6]] -; RV64-NEXT: [[TMP11:%.*]] = getelementptr inbounds i7, ptr [[B]], i64 [[TMP7]] -; RV64-NEXT: [[TMP0:%.*]] = load i7, ptr [[ARRAYIDX_B]], align 1 -; RV64-NEXT: [[TMP13:%.*]] = load i7, ptr [[TMP9]], align 1 -; RV64-NEXT: [[TMP14:%.*]] = load i7, ptr [[TMP10]], align 1 -; RV64-NEXT: [[TMP15:%.*]] = load i7, ptr [[TMP11]], align 1 -; RV64-NEXT: [[TMP16:%.*]] = insertelement <4 x i7> poison, i7 [[TMP0]], i32 0 -; RV64-NEXT: [[TMP17:%.*]] = insertelement <4 x i7> [[TMP16]], i7 [[TMP13]], i32 1 -; RV64-NEXT: [[TMP18:%.*]] = insertelement <4 x i7> [[TMP17]], i7 [[TMP14]], i32 2 -; RV64-NEXT: [[TMP19:%.*]] = insertelement <4 x i7> [[TMP18]], i7 [[TMP15]], i32 3 -; RV64-NEXT: [[TMP20:%.*]] = add <4 x i7> [[TMP19]], splat (i7 1) -; RV64-NEXT: [[TMP21:%.*]] = getelementptr inbounds i7, ptr [[A]], i64 [[IV_NEXT]] -; RV64-NEXT: [[TMP22:%.*]] = getelementptr inbounds i7, ptr [[A]], i64 [[TMP5]] -; RV64-NEXT: [[TMP23:%.*]] = getelementptr inbounds i7, ptr [[A]], i64 [[TMP6]] -; RV64-NEXT: [[TMP24:%.*]] = getelementptr inbounds i7, ptr [[A]], i64 [[TMP7]] -; RV64-NEXT: [[TMP25:%.*]] = extractelement <4 x i7> [[TMP20]], i32 0 -; RV64-NEXT: store i7 [[TMP25]], ptr [[TMP21]], align 1 -; RV64-NEXT: [[TMP26:%.*]] = extractelement <4 x i7> [[TMP20]], i32 1 -; RV64-NEXT: store i7 [[TMP26]], ptr [[TMP22]], align 1 -; RV64-NEXT: [[TMP27:%.*]] = extractelement <4 x i7> [[TMP20]], i32 2 -; RV64-NEXT: store i7 [[TMP27]], ptr [[TMP23]], align 1 -; RV64-NEXT: [[TMP28:%.*]] = extractelement <4 x i7> [[TMP20]], i32 3 -; RV64-NEXT: store i7 [[TMP28]], ptr [[TMP24]], align 1 -; RV64-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; RV64-NEXT: [[TMP29:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1020 -; RV64-NEXT: br i1 [[TMP29]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]] -; RV64: [[MIDDLE_BLOCK]]: -; RV64-NEXT: br label %[[SCALAR_PH]] -; RV64: [[SCALAR_PH]]: -; RV64-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 3, %[[MIDDLE_BLOCK]] ], [ 1023, %[[ENTRY]] ] -; RV64-NEXT: br label %[[FOR_BODY:.*]] -; RV64: [[FOR_BODY]]: -; RV64-NEXT: [[DEC_IV1:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT1:%.*]], %[[FOR_BODY]] ] -; RV64-NEXT: [[IV_NEXT1]] = add nsw i64 [[DEC_IV1]], -1 -; RV64-NEXT: [[ARRAYIDX_B1:%.*]] = getelementptr inbounds i7, ptr [[B]], i64 [[IV_NEXT1]] -; RV64-NEXT: [[TMP30:%.*]] = load i7, ptr [[ARRAYIDX_B1]], align 1 -; RV64-NEXT: [[ADD:%.*]] = add i7 [[TMP30]], 1 -; RV64-NEXT: [[ARRAYIDX_A:%.*]] = getelementptr inbounds i7, ptr [[A]], i64 [[IV_NEXT1]] -; RV64-NEXT: store i7 [[ADD]], ptr [[ARRAYIDX_A]], align 1 -; RV64-NEXT: [[CMP:%.*]] = icmp ugt i64 [[DEC_IV1]], 1 -; RV64-NEXT: br i1 [[CMP]], label %[[FOR_BODY]], label %[[EXIT:.*]], !llvm.loop [[LOOP8:![0-9]+]] -; RV64: [[EXIT]]: -; RV64-NEXT: ret void -; -; RV32-LABEL: define void @vector_reverse_irregular_type( -; RV32-SAME: ptr noalias [[A:%.*]], ptr noalias [[B:%.*]]) #[[ATTR0]] { -; RV32-NEXT: [[ENTRY:.*]]: -; RV32-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] -; RV32: [[VECTOR_PH]]: -; RV32-NEXT: br label %[[VECTOR_BODY:.*]] -; RV32: [[VECTOR_BODY]]: -; RV32-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] -; RV32-NEXT: [[OFFSET_IDX:%.*]] = sub i64 1023, [[INDEX]] -; RV32-NEXT: [[DEC_IV:%.*]] = add i64 [[OFFSET_IDX]], 0 -; RV32-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], -1 -; RV32-NEXT: [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], -2 -; RV32-NEXT: [[TMP3:%.*]] = add i64 [[OFFSET_IDX]], -3 -; RV32-NEXT: [[IV_NEXT:%.*]] = add nsw i64 [[DEC_IV]], -1 -; RV32-NEXT: [[TMP5:%.*]] = add nsw i64 [[TMP1]], -1 -; RV32-NEXT: [[TMP6:%.*]] = add nsw i64 [[TMP2]], -1 -; RV32-NEXT: [[TMP7:%.*]] = add nsw i64 [[TMP3]], -1 -; RV32-NEXT: [[ARRAYIDX_B:%.*]] = getelementptr inbounds i7, ptr [[B]], i64 [[IV_NEXT]] -; RV32-NEXT: [[TMP9:%.*]] = getelementptr inbounds i7, ptr [[B]], i64 [[TMP5]] -; RV32-NEXT: [[TMP10:%.*]] = getelementptr inbounds i7, ptr [[B]], i64 [[TMP6]] -; RV32-NEXT: [[TMP11:%.*]] = getelementptr inbounds i7, ptr [[B]], i64 [[TMP7]] -; RV32-NEXT: [[TMP0:%.*]] = load i7, ptr [[ARRAYIDX_B]], align 1 -; RV32-NEXT: [[TMP13:%.*]] = load i7, ptr [[TMP9]], align 1 -; RV32-NEXT: [[TMP14:%.*]] = load i7, ptr [[TMP10]], align 1 -; RV32-NEXT: [[TMP15:%.*]] = load i7, ptr [[TMP11]], align 1 -; RV32-NEXT: [[TMP16:%.*]] = insertelement <4 x i7> poison, i7 [[TMP0]], i32 0 -; RV32-NEXT: [[TMP17:%.*]] = insertelement <4 x i7> [[TMP16]], i7 [[TMP13]], i32 1 -; RV32-NEXT: [[TMP18:%.*]] = insertelement <4 x i7> [[TMP17]], i7 [[TMP14]], i32 2 -; RV32-NEXT: [[TMP19:%.*]] = insertelement <4 x i7> [[TMP18]], i7 [[TMP15]], i32 3 -; RV32-NEXT: [[TMP20:%.*]] = add <4 x i7> [[TMP19]], splat (i7 1) -; RV32-NEXT: [[TMP21:%.*]] = getelementptr inbounds i7, ptr [[A]], i64 [[IV_NEXT]] -; RV32-NEXT: [[TMP22:%.*]] = getelementptr inbounds i7, ptr [[A]], i64 [[TMP5]] -; RV32-NEXT: [[TMP23:%.*]] = getelementptr inbounds i7, ptr [[A]], i64 [[TMP6]] -; RV32-NEXT: [[TMP24:%.*]] = getelementptr inbounds i7, ptr [[A]], i64 [[TMP7]] -; RV32-NEXT: [[TMP25:%.*]] = extractelement <4 x i7> [[TMP20]], i32 0 -; RV32-NEXT: store i7 [[TMP25]], ptr [[TMP21]], align 1 -; RV32-NEXT: [[TMP26:%.*]] = extractelement <4 x i7> [[TMP20]], i32 1 -; RV32-NEXT: store i7 [[TMP26]], ptr [[TMP22]], align 1 -; RV32-NEXT: [[TMP27:%.*]] = extractelement <4 x i7> [[TMP20]], i32 2 -; RV32-NEXT: store i7 [[TMP27]], ptr [[TMP23]], align 1 -; RV32-NEXT: [[TMP28:%.*]] = extractelement <4 x i7> [[TMP20]], i32 3 -; RV32-NEXT: store i7 [[TMP28]], ptr [[TMP24]], align 1 -; RV32-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; RV32-NEXT: [[TMP29:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1020 -; RV32-NEXT: br i1 [[TMP29]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]] -; RV32: [[MIDDLE_BLOCK]]: -; RV32-NEXT: br label %[[SCALAR_PH]] -; RV32: [[SCALAR_PH]]: -; RV32-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 3, %[[MIDDLE_BLOCK]] ], [ 1023, %[[ENTRY]] ] -; RV32-NEXT: br label %[[FOR_BODY:.*]] -; RV32: [[FOR_BODY]]: -; RV32-NEXT: [[DEC_IV1:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT1:%.*]], %[[FOR_BODY]] ] -; RV32-NEXT: [[IV_NEXT1]] = add nsw i64 [[DEC_IV1]], -1 -; RV32-NEXT: [[ARRAYIDX_B1:%.*]] = getelementptr inbounds i7, ptr [[B]], i64 [[IV_NEXT1]] -; RV32-NEXT: [[TMP30:%.*]] = load i7, ptr [[ARRAYIDX_B1]], align 1 -; RV32-NEXT: [[ADD:%.*]] = add i7 [[TMP30]], 1 -; RV32-NEXT: [[ARRAYIDX_A:%.*]] = getelementptr inbounds i7, ptr [[A]], i64 [[IV_NEXT1]] -; RV32-NEXT: store i7 [[ADD]], ptr [[ARRAYIDX_A]], align 1 -; RV32-NEXT: [[CMP:%.*]] = icmp ugt i64 [[DEC_IV1]], 1 -; RV32-NEXT: br i1 [[CMP]], label %[[FOR_BODY]], label %[[EXIT:.*]], !llvm.loop [[LOOP8:![0-9]+]] -; RV32: [[EXIT]]: -; RV32-NEXT: ret void -; -; RV64-UF2-LABEL: define void @vector_reverse_irregular_type( -; RV64-UF2-SAME: ptr noalias [[A:%.*]], ptr noalias [[B:%.*]]) #[[ATTR0]] { -; RV64-UF2-NEXT: [[ENTRY:.*]]: -; RV64-UF2-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] -; RV64-UF2: [[VECTOR_PH]]: -; RV64-UF2-NEXT: br label %[[VECTOR_BODY:.*]] -; RV64-UF2: [[VECTOR_BODY]]: -; RV64-UF2-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] -; RV64-UF2-NEXT: [[OFFSET_IDX:%.*]] = sub i64 1023, [[INDEX]] -; RV64-UF2-NEXT: [[TMP16:%.*]] = add i64 [[OFFSET_IDX]], 0 -; RV64-UF2-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], -1 -; RV64-UF2-NEXT: [[TMP17:%.*]] = add i64 [[OFFSET_IDX]], -2 -; RV64-UF2-NEXT: [[TMP24:%.*]] = add i64 [[OFFSET_IDX]], -3 -; RV64-UF2-NEXT: [[TMP25:%.*]] = add i64 [[OFFSET_IDX]], -4 -; RV64-UF2-NEXT: [[TMP42:%.*]] = add i64 [[OFFSET_IDX]], -5 -; RV64-UF2-NEXT: [[TMP43:%.*]] = add i64 [[OFFSET_IDX]], -6 -; RV64-UF2-NEXT: [[TMP50:%.*]] = add i64 [[OFFSET_IDX]], -7 -; RV64-UF2-NEXT: [[TMP1:%.*]] = add nsw i64 [[TMP16]], -1 -; RV64-UF2-NEXT: [[TMP2:%.*]] = add nsw i64 [[TMP0]], -1 -; RV64-UF2-NEXT: [[TMP51:%.*]] = add nsw i64 [[TMP17]], -1 -; RV64-UF2-NEXT: [[TMP11:%.*]] = add nsw i64 [[TMP24]], -1 -; RV64-UF2-NEXT: [[TMP59:%.*]] = add nsw i64 [[TMP25]], -1 -; RV64-UF2-NEXT: [[TMP13:%.*]] = add nsw i64 [[TMP42]], -1 -; RV64-UF2-NEXT: [[TMP14:%.*]] = add nsw i64 [[TMP43]], -1 -; RV64-UF2-NEXT: [[TMP15:%.*]] = add nsw i64 [[TMP50]], -1 -; RV64-UF2-NEXT: [[TMP3:%.*]] = getelementptr inbounds i7, ptr [[B]], i64 [[TMP1]] -; RV64-UF2-NEXT: [[TMP4:%.*]] = getelementptr inbounds i7, ptr [[B]], i64 [[TMP2]] -; RV64-UF2-NEXT: [[TMP18:%.*]] = getelementptr inbounds i7, ptr [[B]], i64 [[TMP51]] -; RV64-UF2-NEXT: [[TMP19:%.*]] = getelementptr inbounds i7, ptr [[B]], i64 [[TMP11]] -; RV64-UF2-NEXT: [[TMP20:%.*]] = getelementptr inbounds i7, ptr [[B]], i64 [[TMP59]] -; RV64-UF2-NEXT: [[TMP21:%.*]] = getelementptr inbounds i7, ptr [[B]], i64 [[TMP13]] -; RV64-UF2-NEXT: [[TMP22:%.*]] = getelementptr inbounds i7, ptr [[B]], i64 [[TMP14]] -; RV64-UF2-NEXT: [[TMP23:%.*]] = getelementptr inbounds i7, ptr [[B]], i64 [[TMP15]] -; RV64-UF2-NEXT: [[TMP5:%.*]] = load i7, ptr [[TMP3]], align 1 -; RV64-UF2-NEXT: [[TMP6:%.*]] = load i7, ptr [[TMP4]], align 1 -; RV64-UF2-NEXT: [[TMP26:%.*]] = load i7, ptr [[TMP18]], align 1 -; RV64-UF2-NEXT: [[TMP27:%.*]] = load i7, ptr [[TMP19]], align 1 -; RV64-UF2-NEXT: [[TMP28:%.*]] = insertelement <4 x i7> poison, i7 [[TMP5]], i32 0 -; RV64-UF2-NEXT: [[TMP29:%.*]] = insertelement <4 x i7> [[TMP28]], i7 [[TMP6]], i32 1 -; RV64-UF2-NEXT: [[TMP30:%.*]] = insertelement <4 x i7> [[TMP29]], i7 [[TMP26]], i32 2 -; RV64-UF2-NEXT: [[TMP31:%.*]] = insertelement <4 x i7> [[TMP30]], i7 [[TMP27]], i32 3 -; RV64-UF2-NEXT: [[TMP32:%.*]] = load i7, ptr [[TMP20]], align 1 -; RV64-UF2-NEXT: [[TMP33:%.*]] = load i7, ptr [[TMP21]], align 1 -; RV64-UF2-NEXT: [[TMP34:%.*]] = load i7, ptr [[TMP22]], align 1 -; RV64-UF2-NEXT: [[TMP35:%.*]] = load i7, ptr [[TMP23]], align 1 -; RV64-UF2-NEXT: [[TMP36:%.*]] = insertelement <4 x i7> poison, i7 [[TMP32]], i32 0 -; RV64-UF2-NEXT: [[TMP37:%.*]] = insertelement <4 x i7> [[TMP36]], i7 [[TMP33]], i32 1 -; RV64-UF2-NEXT: [[TMP38:%.*]] = insertelement <4 x i7> [[TMP37]], i7 [[TMP34]], i32 2 -; RV64-UF2-NEXT: [[TMP39:%.*]] = insertelement <4 x i7> [[TMP38]], i7 [[TMP35]], i32 3 -; RV64-UF2-NEXT: [[TMP40:%.*]] = add <4 x i7> [[TMP31]], splat (i7 1) -; RV64-UF2-NEXT: [[TMP41:%.*]] = add <4 x i7> [[TMP39]], splat (i7 1) -; RV64-UF2-NEXT: [[TMP9:%.*]] = getelementptr inbounds i7, ptr [[A]], i64 [[TMP1]] -; RV64-UF2-NEXT: [[TMP10:%.*]] = getelementptr inbounds i7, ptr [[A]], i64 [[TMP2]] -; RV64-UF2-NEXT: [[TMP44:%.*]] = getelementptr inbounds i7, ptr [[A]], i64 [[TMP51]] -; RV64-UF2-NEXT: [[TMP45:%.*]] = getelementptr inbounds i7, ptr [[A]], i64 [[TMP11]] -; RV64-UF2-NEXT: [[TMP46:%.*]] = getelementptr inbounds i7, ptr [[A]], i64 [[TMP59]] -; RV64-UF2-NEXT: [[TMP47:%.*]] = getelementptr inbounds i7, ptr [[A]], i64 [[TMP13]] -; RV64-UF2-NEXT: [[TMP48:%.*]] = getelementptr inbounds i7, ptr [[A]], i64 [[TMP14]] -; RV64-UF2-NEXT: [[TMP49:%.*]] = getelementptr inbounds i7, ptr [[A]], i64 [[TMP15]] -; RV64-UF2-NEXT: [[TMP7:%.*]] = extractelement <4 x i7> [[TMP40]], i32 0 -; RV64-UF2-NEXT: store i7 [[TMP7]], ptr [[TMP9]], align 1 -; RV64-UF2-NEXT: [[TMP8:%.*]] = extractelement <4 x i7> [[TMP40]], i32 1 -; RV64-UF2-NEXT: store i7 [[TMP8]], ptr [[TMP10]], align 1 -; RV64-UF2-NEXT: [[TMP52:%.*]] = extractelement <4 x i7> [[TMP40]], i32 2 -; RV64-UF2-NEXT: store i7 [[TMP52]], ptr [[TMP44]], align 1 -; RV64-UF2-NEXT: [[TMP53:%.*]] = extractelement <4 x i7> [[TMP40]], i32 3 -; RV64-UF2-NEXT: store i7 [[TMP53]], ptr [[TMP45]], align 1 -; RV64-UF2-NEXT: [[TMP54:%.*]] = extractelement <4 x i7> [[TMP41]], i32 0 -; RV64-UF2-NEXT: store i7 [[TMP54]], ptr [[TMP46]], align 1 -; RV64-UF2-NEXT: [[TMP55:%.*]] = extractelement <4 x i7> [[TMP41]], i32 1 -; RV64-UF2-NEXT: store i7 [[TMP55]], ptr [[TMP47]], align 1 -; RV64-UF2-NEXT: [[TMP56:%.*]] = extractelement <4 x i7> [[TMP41]], i32 2 -; RV64-UF2-NEXT: store i7 [[TMP56]], ptr [[TMP48]], align 1 -; RV64-UF2-NEXT: [[TMP57:%.*]] = extractelement <4 x i7> [[TMP41]], i32 3 -; RV64-UF2-NEXT: store i7 [[TMP57]], ptr [[TMP49]], align 1 -; RV64-UF2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 -; RV64-UF2-NEXT: [[TMP58:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1016 -; RV64-UF2-NEXT: br i1 [[TMP58]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] -; RV64-UF2: [[MIDDLE_BLOCK]]: -; RV64-UF2-NEXT: br label %[[SCALAR_PH]] -; RV64-UF2: [[SCALAR_PH]]: -; RV64-UF2-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 7, %[[MIDDLE_BLOCK]] ], [ 1023, %[[ENTRY]] ] -; RV64-UF2-NEXT: br label %[[FOR_BODY:.*]] -; RV64-UF2: [[FOR_BODY]]: -; RV64-UF2-NEXT: [[DEC_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[FOR_BODY]] ] -; RV64-UF2-NEXT: [[IV_NEXT]] = add nsw i64 [[DEC_IV]], -1 -; RV64-UF2-NEXT: [[ARRAYIDX_B:%.*]] = getelementptr inbounds i7, ptr [[B]], i64 [[IV_NEXT]] -; RV64-UF2-NEXT: [[TMP12:%.*]] = load i7, ptr [[ARRAYIDX_B]], align 1 -; RV64-UF2-NEXT: [[ADD:%.*]] = add i7 [[TMP12]], 1 -; RV64-UF2-NEXT: [[ARRAYIDX_A:%.*]] = getelementptr inbounds i7, ptr [[A]], i64 [[IV_NEXT]] -; RV64-UF2-NEXT: store i7 [[ADD]], ptr [[ARRAYIDX_A]], align 1 -; RV64-UF2-NEXT: [[CMP:%.*]] = icmp ugt i64 [[DEC_IV]], 1 -; RV64-UF2-NEXT: br i1 [[CMP]], label %[[FOR_BODY]], label %[[EXIT:.*]], !llvm.loop [[LOOP7:![0-9]+]] -; RV64-UF2: [[EXIT]]: -; RV64-UF2-NEXT: ret void -; -entry: - br label %for.body - -for.body: - %dec.iv = phi i64 [ 1023, %entry ], [ %iv.next, %for.body ] - %iv.next = add nsw i64 %dec.iv, -1 - %arrayidx.b = getelementptr inbounds i7, ptr %B, i64 %iv.next - %0 = load i7, ptr %arrayidx.b, align 1 - %add = add i7 %0, 1 - %arrayidx.a = getelementptr inbounds i7, ptr %A, i64 %iv.next - store i7 %add, ptr %arrayidx.a, align 1 - %cmp = icmp ugt i64 %dec.iv, 1 - br i1 %cmp, label %for.body, label %exit, !llvm.loop !4 - -exit: - ret void -} - -!0 = distinct !{!0, !1, !2, !3} -!1 = !{!"llvm.loop.vectorize.width", i32 4} -!2 = !{!"llvm.loop.vectorize.scalable.enable", i1 true} -!3 = !{!"llvm.loop.vectorize.enable", i1 true} -!4 = distinct !{!4, !1, !3} diff --git a/llvm/test/Transforms/LoopVectorize/cse.ll b/llvm/test/Transforms/LoopVectorize/cse.ll index 71325de1ca275..37b9c094820e4 100644 --- a/llvm/test/Transforms/LoopVectorize/cse.ll +++ b/llvm/test/Transforms/LoopVectorize/cse.ll @@ -327,12 +327,9 @@ define i32 @common_sext_different_dest_types(ptr %a, ptr %b, ptr %c, i32 %N) { ; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 [[INDEX]] ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 [[INDEX]] ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds nuw i8, ptr [[C]], i64 [[INDEX]] -; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP0]], i32 0 -; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i8>, ptr [[TMP3]], align 1 -; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP1]], i32 0 -; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = load <4 x i8>, ptr [[TMP4]], align 1 -; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP2]], i32 0 -; CHECK-NEXT: [[WIDE_LOAD2:%.*]] = load <4 x i8>, ptr [[TMP5]], align 1 +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i8>, ptr [[TMP0]], align 1 +; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = load <4 x i8>, ptr [[TMP1]], align 1 +; CHECK-NEXT: [[WIDE_LOAD2:%.*]] = load <4 x i8>, ptr [[TMP2]], align 1 ; CHECK-NEXT: [[TMP6:%.*]] = sext <4 x i8> [[WIDE_LOAD]] to <4 x i32> ; CHECK-NEXT: [[TMP7:%.*]] = sext <4 x i8> [[WIDE_LOAD1]] to <4 x i32> ; CHECK-NEXT: [[TMP8:%.*]] = sext <4 x i8> [[WIDE_LOAD1]] to <4 x i16> @@ -402,12 +399,9 @@ define i32 @common_zext_different_dest_types(ptr %a, ptr %b, ptr %c, i32 %N) { ; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 [[INDEX]] ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 [[INDEX]] ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds nuw i8, ptr [[C]], i64 [[INDEX]] -; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP0]], i32 0 -; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i8>, ptr [[TMP3]], align 1 -; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP1]], i32 0 -; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = load <4 x i8>, ptr [[TMP4]], align 1 -; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP2]], i32 0 -; CHECK-NEXT: [[WIDE_LOAD2:%.*]] = load <4 x i8>, ptr [[TMP5]], align 1 +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i8>, ptr [[TMP0]], align 1 +; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = load <4 x i8>, ptr [[TMP1]], align 1 +; CHECK-NEXT: [[WIDE_LOAD2:%.*]] = load <4 x i8>, ptr [[TMP2]], align 1 ; CHECK-NEXT: [[TMP6:%.*]] = zext <4 x i8> [[WIDE_LOAD]] to <4 x i32> ; CHECK-NEXT: [[TMP7:%.*]] = zext <4 x i8> [[WIDE_LOAD1]] to <4 x i32> ; CHECK-NEXT: [[TMP8:%.*]] = zext <4 x i8> [[WIDE_LOAD1]] to <4 x i16> From 02919ed05d38f2a64734444f5b0d3de6ac6eedbd Mon Sep 17 00:00:00 2001 From: Samuel Tebbs Date: Tue, 19 Aug 2025 11:25:00 +0100 Subject: [PATCH 6/6] Fix powerpc test --- llvm/test/Transforms/LoopVectorize/PowerPC/vectorize-bswap.ll | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/test/Transforms/LoopVectorize/PowerPC/vectorize-bswap.ll b/llvm/test/Transforms/LoopVectorize/PowerPC/vectorize-bswap.ll index 7f0a487fca945..db1f2c71e0f77 100644 --- a/llvm/test/Transforms/LoopVectorize/PowerPC/vectorize-bswap.ll +++ b/llvm/test/Transforms/LoopVectorize/PowerPC/vectorize-bswap.ll @@ -19,7 +19,7 @@ define dso_local void @test(ptr %Arr, i32 signext %Len) { ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[ARR:%.*]], i64 [[TMP1]] ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP2]], align 4 ; CHECK-NEXT: [[TMP4:%.*]] = call <4 x i32> @llvm.bswap.v4i32(<4 x i32> [[WIDE_LOAD]]) -; CHECK-NEXT: store <4 x i32> [[TMP4]], ptr [[TMP3]], align 4 +; CHECK-NEXT: store <4 x i32> [[TMP4]], ptr [[TMP2]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 ; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]