Skip to content

Commit 50a5b81

Browse files
committed
!fixup add option enabling parallel reduction phis
1 parent 8578fb7 commit 50a5b81

File tree

7 files changed

+51
-77
lines changed

7 files changed

+51
-77
lines changed

llvm/lib/Transforms/Utils/LoopUnroll.cpp

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -109,6 +109,9 @@ UnrollVerifyLoopInfo("unroll-verify-loopinfo", cl::Hidden,
109109
#endif
110110
);
111111

112+
static cl::opt<bool> UnrollAddParallelReductions(
113+
"unroll-add-parallel-reductions", cl::init(false), cl::Hidden,
114+
cl::desc("Allow unrolling to add parallel reduction phis."));
112115

113116
/// Check if unrolling created a situation where we need to insert phi nodes to
114117
/// preserve LCSSA form.
@@ -668,7 +671,8 @@ llvm::UnrollLoop(Loop *L, UnrollLoopOptions ULO, LoopInfo *LI,
668671
// to not exit.
669672
DenseMap<PHINode *, RecurrenceDescriptor> Reductions;
670673
bool CanAddAdditionalAccumulators =
671-
!CompletelyUnroll && L->getNumBlocks() == 1 &&
674+
UnrollAddParallelReductions && !CompletelyUnroll &&
675+
L->getNumBlocks() == 1 &&
672676
(ULO.Runtime ||
673677
(ExitInfos.contains(Header) && ((ExitInfos[Header].TripCount != 0 &&
674678
ExitInfos[Header].BreakoutTrip == 0))));

llvm/test/Transforms/LoopUnroll/AArch64/apple-unrolling.ll

Lines changed: 17 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -603,33 +603,27 @@ define i32 @test_add_reduction_unroll_partial(ptr %a, i64 noundef %n) {
603603
; OTHER-NEXT: br label %[[LOOP:.*]]
604604
; OTHER: [[LOOP]]:
605605
; OTHER-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[IV_NEXT_3:%.*]], %[[LOOP]] ]
606-
; OTHER-NEXT: [[RDX_1:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[RDX_NEXT_1:%.*]], %[[LOOP]] ]
607-
; OTHER-NEXT: [[RDX_2:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[RDX_NEXT_2:%.*]], %[[LOOP]] ]
608-
; OTHER-NEXT: [[RDX_3:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[RDX_NEXT_3:%.*]], %[[LOOP]] ]
609-
; OTHER-NEXT: [[RDX:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[RDX_NEXT:%.*]], %[[LOOP]] ]
606+
; OTHER-NEXT: [[RDX:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[RDX_NEXT_3:%.*]], %[[LOOP]] ]
610607
; OTHER-NEXT: [[GEP_A:%.*]] = getelementptr inbounds nuw i32, ptr [[A]], i64 [[IV]]
611608
; OTHER-NEXT: [[TMP0:%.*]] = load i32, ptr [[GEP_A]], align 2
612-
; OTHER-NEXT: [[RDX_NEXT]] = add i32 [[RDX]], [[TMP0]]
609+
; OTHER-NEXT: [[RDX_NEXT:%.*]] = add nuw nsw i32 [[RDX]], [[TMP0]]
613610
; OTHER-NEXT: [[IV_NEXT:%.*]] = add nuw nsw i64 [[IV]], 1
614611
; OTHER-NEXT: [[GEP_A_1:%.*]] = getelementptr inbounds nuw i32, ptr [[A]], i64 [[IV_NEXT]]
615612
; OTHER-NEXT: [[TMP1:%.*]] = load i32, ptr [[GEP_A_1]], align 2
616-
; OTHER-NEXT: [[RDX_NEXT_1]] = add i32 [[RDX_1]], [[TMP1]]
613+
; OTHER-NEXT: [[RDX_2:%.*]] = add nuw nsw i32 [[RDX_NEXT]], [[TMP1]]
617614
; OTHER-NEXT: [[IV_NEXT_1:%.*]] = add nuw nsw i64 [[IV]], 2
618615
; OTHER-NEXT: [[GEP_A_2:%.*]] = getelementptr inbounds nuw i32, ptr [[A]], i64 [[IV_NEXT_1]]
619616
; OTHER-NEXT: [[TMP2:%.*]] = load i32, ptr [[GEP_A_2]], align 2
620-
; OTHER-NEXT: [[RDX_NEXT_2]] = add i32 [[RDX_2]], [[TMP2]]
617+
; OTHER-NEXT: [[RDX_NEXT_2:%.*]] = add nuw nsw i32 [[RDX_2]], [[TMP2]]
621618
; OTHER-NEXT: [[IV_NEXT_2:%.*]] = add nuw nsw i64 [[IV]], 3
622619
; OTHER-NEXT: [[GEP_A_3:%.*]] = getelementptr inbounds nuw i32, ptr [[A]], i64 [[IV_NEXT_2]]
623620
; OTHER-NEXT: [[TMP3:%.*]] = load i32, ptr [[GEP_A_3]], align 2
624-
; OTHER-NEXT: [[RDX_NEXT_3]] = add i32 [[RDX_3]], [[TMP3]]
621+
; OTHER-NEXT: [[RDX_NEXT_3]] = add nuw nsw i32 [[RDX_NEXT_2]], [[TMP3]]
625622
; OTHER-NEXT: [[IV_NEXT_3]] = add nuw nsw i64 [[IV]], 4
626623
; OTHER-NEXT: [[EC_3:%.*]] = icmp eq i64 [[IV_NEXT_3]], 1024
627624
; OTHER-NEXT: br i1 [[EC_3]], label %[[EXIT:.*]], label %[[LOOP]]
628625
; OTHER: [[EXIT]]:
629-
; OTHER-NEXT: [[RES:%.*]] = phi i32 [ [[RDX_NEXT_3]], %[[LOOP]] ]
630-
; OTHER-NEXT: [[BIN_RDX:%.*]] = add i32 [[RDX_NEXT_1]], [[RDX_NEXT]]
631-
; OTHER-NEXT: [[BIN_RDX1:%.*]] = add i32 [[RDX_NEXT_2]], [[BIN_RDX]]
632-
; OTHER-NEXT: [[BIN_RDX2:%.*]] = add i32 [[RDX_NEXT_3]], [[BIN_RDX1]]
626+
; OTHER-NEXT: [[BIN_RDX2:%.*]] = phi i32 [ [[RDX_NEXT_3]], %[[LOOP]] ]
633627
; OTHER-NEXT: ret i32 [[BIN_RDX2]]
634628
;
635629
entry:
@@ -753,25 +747,23 @@ define i32 @test_add_and_mul_reduction_unroll_partial(ptr %a, i64 noundef %n) {
753747
; OTHER-NEXT: br label %[[LOOP:.*]]
754748
; OTHER: [[LOOP]]:
755749
; OTHER-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[IV_NEXT_1:%.*]], %[[LOOP]] ]
756-
; OTHER-NEXT: [[RDX_1:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[RDX_NEXT_1:%.*]], %[[LOOP]] ]
757-
; OTHER-NEXT: [[RDX:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[RDX_NEXT:%.*]], %[[LOOP]] ]
750+
; OTHER-NEXT: [[RDX:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[RDX_NEXT_1:%.*]], %[[LOOP]] ]
758751
; OTHER-NEXT: [[RDX_2:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[RDX_2_NEXT_1:%.*]], %[[LOOP]] ]
759752
; OTHER-NEXT: [[GEP_A:%.*]] = getelementptr inbounds nuw i32, ptr [[A]], i64 [[IV]]
760753
; OTHER-NEXT: [[TMP0:%.*]] = load i32, ptr [[GEP_A]], align 2
761-
; OTHER-NEXT: [[RDX_NEXT]] = add i32 [[RDX]], [[TMP0]]
754+
; OTHER-NEXT: [[RDX_NEXT:%.*]] = add nuw nsw i32 [[RDX]], [[TMP0]]
762755
; OTHER-NEXT: [[RDX_2_NEXT:%.*]] = mul i32 [[RDX_2]], [[TMP0]]
763756
; OTHER-NEXT: [[IV_NEXT:%.*]] = add nuw nsw i64 [[IV]], 1
764757
; OTHER-NEXT: [[GEP_A_1:%.*]] = getelementptr inbounds nuw i32, ptr [[A]], i64 [[IV_NEXT]]
765758
; OTHER-NEXT: [[TMP1:%.*]] = load i32, ptr [[GEP_A_1]], align 2
766-
; OTHER-NEXT: [[RDX_NEXT_1]] = add i32 [[RDX_1]], [[TMP1]]
759+
; OTHER-NEXT: [[RDX_NEXT_1]] = add nuw nsw i32 [[RDX_NEXT]], [[TMP1]]
767760
; OTHER-NEXT: [[RDX_2_NEXT_1]] = mul i32 [[RDX_2_NEXT]], [[TMP1]]
768761
; OTHER-NEXT: [[IV_NEXT_1]] = add nuw nsw i64 [[IV]], 2
769762
; OTHER-NEXT: [[EC_1:%.*]] = icmp eq i64 [[IV_NEXT_1]], 1024
770763
; OTHER-NEXT: br i1 [[EC_1]], label %[[EXIT:.*]], label %[[LOOP]]
771764
; OTHER: [[EXIT]]:
772-
; OTHER-NEXT: [[RES_1:%.*]] = phi i32 [ [[RDX_NEXT_1]], %[[LOOP]] ]
765+
; OTHER-NEXT: [[BIN_RDX:%.*]] = phi i32 [ [[RDX_NEXT_1]], %[[LOOP]] ]
773766
; OTHER-NEXT: [[RES_2:%.*]] = phi i32 [ [[RDX_2_NEXT_1]], %[[LOOP]] ]
774-
; OTHER-NEXT: [[BIN_RDX:%.*]] = add i32 [[RDX_NEXT_1]], [[RDX_NEXT]]
775767
; OTHER-NEXT: [[SUM:%.*]] = add i32 [[BIN_RDX]], [[RES_2]]
776768
; OTHER-NEXT: ret i32 [[SUM]]
777769
;
@@ -828,26 +820,23 @@ define i32 @test_add_reduction_runtime(ptr %a, i64 noundef %n) {
828820
; OTHER-NEXT: br label %[[LOOP:.*]]
829821
; OTHER: [[LOOP]]:
830822
; OTHER-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[ENTRY_NEW]] ], [ [[IV_NEXT_3:%.*]], %[[LOOP]] ]
831-
; OTHER-NEXT: [[RDX_1:%.*]] = phi i32 [ 0, %[[ENTRY_NEW]] ], [ [[RDX_NEXT_1:%.*]], %[[LOOP]] ]
832-
; OTHER-NEXT: [[RDX_2:%.*]] = phi i32 [ 0, %[[ENTRY_NEW]] ], [ [[RDX_NEXT_2:%.*]], %[[LOOP]] ]
833-
; OTHER-NEXT: [[RDX_3:%.*]] = phi i32 [ 0, %[[ENTRY_NEW]] ], [ [[RDX_NEXT_3:%.*]], %[[LOOP]] ]
834-
; OTHER-NEXT: [[RDX:%.*]] = phi i32 [ 0, %[[ENTRY_NEW]] ], [ [[RDX_NEXT:%.*]], %[[LOOP]] ]
823+
; OTHER-NEXT: [[RDX:%.*]] = phi i32 [ 0, %[[ENTRY_NEW]] ], [ [[RDX_NEXT_3:%.*]], %[[LOOP]] ]
835824
; OTHER-NEXT: [[NITER:%.*]] = phi i64 [ 0, %[[ENTRY_NEW]] ], [ [[NITER_NEXT_3:%.*]], %[[LOOP]] ]
836825
; OTHER-NEXT: [[GEP_A:%.*]] = getelementptr inbounds nuw i32, ptr [[A]], i64 [[IV]]
837826
; OTHER-NEXT: [[TMP2:%.*]] = load i32, ptr [[GEP_A]], align 2
838-
; OTHER-NEXT: [[RDX_NEXT]] = add i32 [[RDX]], [[TMP2]]
827+
; OTHER-NEXT: [[RDX_NEXT:%.*]] = add nuw nsw i32 [[RDX]], [[TMP2]]
839828
; OTHER-NEXT: [[IV_NEXT:%.*]] = add nuw nsw i64 [[IV]], 1
840829
; OTHER-NEXT: [[GEP_A_1:%.*]] = getelementptr inbounds nuw i32, ptr [[A]], i64 [[IV_NEXT]]
841830
; OTHER-NEXT: [[TMP3:%.*]] = load i32, ptr [[GEP_A_1]], align 2
842-
; OTHER-NEXT: [[RDX_NEXT_1]] = add i32 [[RDX_1]], [[TMP3]]
831+
; OTHER-NEXT: [[RDX_2:%.*]] = add nuw nsw i32 [[RDX_NEXT]], [[TMP3]]
843832
; OTHER-NEXT: [[IV_NEXT_1:%.*]] = add nuw nsw i64 [[IV]], 2
844833
; OTHER-NEXT: [[GEP_A_2:%.*]] = getelementptr inbounds nuw i32, ptr [[A]], i64 [[IV_NEXT_1]]
845834
; OTHER-NEXT: [[TMP4:%.*]] = load i32, ptr [[GEP_A_2]], align 2
846-
; OTHER-NEXT: [[RDX_NEXT_2]] = add i32 [[RDX_2]], [[TMP4]]
835+
; OTHER-NEXT: [[RDX_NEXT_2:%.*]] = add nuw nsw i32 [[RDX_2]], [[TMP4]]
847836
; OTHER-NEXT: [[IV_NEXT_2:%.*]] = add nuw nsw i64 [[IV]], 3
848837
; OTHER-NEXT: [[GEP_A_3:%.*]] = getelementptr inbounds nuw i32, ptr [[A]], i64 [[IV_NEXT_2]]
849838
; OTHER-NEXT: [[TMP5:%.*]] = load i32, ptr [[GEP_A_3]], align 2
850-
; OTHER-NEXT: [[RDX_NEXT_3]] = add i32 [[RDX_3]], [[TMP5]]
839+
; OTHER-NEXT: [[RDX_NEXT_3]] = add nuw nsw i32 [[RDX_NEXT_2]], [[TMP5]]
851840
; OTHER-NEXT: [[IV_NEXT_3]] = add nuw nsw i64 [[IV]], 4
852841
; OTHER-NEXT: [[NITER_NEXT_3]] = add i64 [[NITER]], 4
853842
; OTHER-NEXT: [[NITER_NCMP_3:%.*]] = icmp eq i64 [[NITER_NEXT_3]], [[UNROLL_ITER]]
@@ -856,14 +845,11 @@ define i32 @test_add_reduction_runtime(ptr %a, i64 noundef %n) {
856845
; OTHER-NEXT: [[RES_PH_PH:%.*]] = phi i32 [ [[RDX_NEXT_3]], %[[LOOP]] ]
857846
; OTHER-NEXT: [[IV_UNR_PH:%.*]] = phi i64 [ [[IV_NEXT_3]], %[[LOOP]] ]
858847
; OTHER-NEXT: [[RDX_UNR_PH:%.*]] = phi i32 [ [[RDX_NEXT_3]], %[[LOOP]] ]
859-
; OTHER-NEXT: [[BIN_RDX:%.*]] = add i32 [[RDX_NEXT_1]], [[RDX_NEXT]]
860-
; OTHER-NEXT: [[BIN_RDX2:%.*]] = add i32 [[RDX_NEXT_2]], [[BIN_RDX]]
861-
; OTHER-NEXT: [[BIN_RDX3:%.*]] = add i32 [[RDX_NEXT_3]], [[BIN_RDX2]]
862848
; OTHER-NEXT: br label %[[EXIT_UNR_LCSSA]]
863849
; OTHER: [[EXIT_UNR_LCSSA]]:
864-
; OTHER-NEXT: [[RES_PH:%.*]] = phi i32 [ poison, %[[ENTRY]] ], [ [[BIN_RDX3]], %[[EXIT_UNR_LCSSA_LOOPEXIT]] ]
850+
; OTHER-NEXT: [[RES_PH:%.*]] = phi i32 [ poison, %[[ENTRY]] ], [ [[RES_PH_PH]], %[[EXIT_UNR_LCSSA_LOOPEXIT]] ]
865851
; OTHER-NEXT: [[IV_UNR:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[IV_UNR_PH]], %[[EXIT_UNR_LCSSA_LOOPEXIT]] ]
866-
; OTHER-NEXT: [[RDX_UNR:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[BIN_RDX3]], %[[EXIT_UNR_LCSSA_LOOPEXIT]] ]
852+
; OTHER-NEXT: [[RDX_UNR:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[RDX_UNR_PH]], %[[EXIT_UNR_LCSSA_LOOPEXIT]] ]
867853
; OTHER-NEXT: [[LCMP_MOD:%.*]] = icmp ne i64 [[XTRAITER]], 0
868854
; OTHER-NEXT: br i1 [[LCMP_MOD]], label %[[LOOP_EPIL_PREHEADER:.*]], label %[[EXIT:.*]]
869855
; OTHER: [[LOOP_EPIL_PREHEADER]]:

llvm/test/Transforms/LoopUnroll/ARM/instr-size-costs.ll

Lines changed: 6 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -196,14 +196,13 @@ define i32 @test_i32_select_optsize(ptr %a, ptr %b, ptr %c) #0 {
196196
; CHECK-V8: loop:
197197
; CHECK-V8-NEXT: [[IV:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[COUNT_1:%.*]], [[LOOP]] ]
198198
; CHECK-V8-NEXT: [[ACC:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[ACC_NEXT_1:%.*]], [[LOOP]] ]
199-
; CHECK-V8-NEXT: [[ACC1:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[ACC_NEXT_2:%.*]], [[LOOP]] ]
200199
; CHECK-V8-NEXT: [[ADDR_A:%.*]] = getelementptr i32, ptr [[A:%.*]], i32 [[IV]]
201200
; CHECK-V8-NEXT: [[ADDR_B:%.*]] = getelementptr i32, ptr [[B:%.*]], i32 [[IV]]
202201
; CHECK-V8-NEXT: [[DATA_A:%.*]] = load i32, ptr [[ADDR_A]], align 4
203202
; CHECK-V8-NEXT: [[DATA_B:%.*]] = load i32, ptr [[ADDR_B]], align 4
204203
; CHECK-V8-NEXT: [[UGT:%.*]] = icmp ugt i32 [[DATA_A]], [[DATA_B]]
205204
; CHECK-V8-NEXT: [[UMAX:%.*]] = select i1 [[UGT]], i32 [[DATA_A]], i32 [[DATA_B]]
206-
; CHECK-V8-NEXT: [[ACC_NEXT_2]] = add i32 [[UMAX]], [[ACC1]]
205+
; CHECK-V8-NEXT: [[ACC_NEXT:%.*]] = add i32 [[UMAX]], [[ACC]]
207206
; CHECK-V8-NEXT: [[ADDR_C:%.*]] = getelementptr i32, ptr [[C:%.*]], i32 [[IV]]
208207
; CHECK-V8-NEXT: store i32 [[UMAX]], ptr [[ADDR_C]], align 4
209208
; CHECK-V8-NEXT: [[COUNT:%.*]] = add nuw nsw i32 [[IV]], 1
@@ -213,15 +212,14 @@ define i32 @test_i32_select_optsize(ptr %a, ptr %b, ptr %c) #0 {
213212
; CHECK-V8-NEXT: [[DATA_B_1:%.*]] = load i32, ptr [[ADDR_B_1]], align 4
214213
; CHECK-V8-NEXT: [[UGT_1:%.*]] = icmp ugt i32 [[DATA_A_1]], [[DATA_B_1]]
215214
; CHECK-V8-NEXT: [[UMAX_1:%.*]] = select i1 [[UGT_1]], i32 [[DATA_A_1]], i32 [[DATA_B_1]]
216-
; CHECK-V8-NEXT: [[ACC_NEXT_1]] = add i32 [[UMAX_1]], [[ACC]]
215+
; CHECK-V8-NEXT: [[ACC_NEXT_1]] = add i32 [[UMAX_1]], [[ACC_NEXT]]
217216
; CHECK-V8-NEXT: [[ADDR_C_1:%.*]] = getelementptr i32, ptr [[C]], i32 [[COUNT]]
218217
; CHECK-V8-NEXT: store i32 [[UMAX_1]], ptr [[ADDR_C_1]], align 4
219218
; CHECK-V8-NEXT: [[COUNT_1]] = add nuw nsw i32 [[IV]], 2
220219
; CHECK-V8-NEXT: [[END_1:%.*]] = icmp ne i32 [[COUNT_1]], 100
221220
; CHECK-V8-NEXT: br i1 [[END_1]], label [[LOOP]], label [[EXIT:%.*]]
222221
; CHECK-V8: exit:
223-
; CHECK-V8-NEXT: [[ACC_NEXT_LCSSA1:%.*]] = phi i32 [ [[ACC_NEXT_1]], [[LOOP]] ]
224-
; CHECK-V8-NEXT: [[ACC_NEXT_LCSSA:%.*]] = add i32 [[ACC_NEXT_1]], [[ACC_NEXT_2]]
222+
; CHECK-V8-NEXT: [[ACC_NEXT_LCSSA:%.*]] = phi i32 [ [[ACC_NEXT_1]], [[LOOP]] ]
225223
; CHECK-V8-NEXT: ret i32 [[ACC_NEXT_LCSSA]]
226224
;
227225
entry:
@@ -254,14 +252,13 @@ define i32 @test_i32_select_minsize(ptr %a, ptr %b, ptr %c) #1 {
254252
; CHECK-V8: loop:
255253
; CHECK-V8-NEXT: [[IV:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[COUNT_1:%.*]], [[LOOP]] ]
256254
; CHECK-V8-NEXT: [[ACC:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[ACC_NEXT_1:%.*]], [[LOOP]] ]
257-
; CHECK-V8-NEXT: [[ACC1:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[ACC_NEXT_2:%.*]], [[LOOP]] ]
258255
; CHECK-V8-NEXT: [[ADDR_A:%.*]] = getelementptr i32, ptr [[A:%.*]], i32 [[IV]]
259256
; CHECK-V8-NEXT: [[ADDR_B:%.*]] = getelementptr i32, ptr [[B:%.*]], i32 [[IV]]
260257
; CHECK-V8-NEXT: [[DATA_A:%.*]] = load i32, ptr [[ADDR_A]], align 4
261258
; CHECK-V8-NEXT: [[DATA_B:%.*]] = load i32, ptr [[ADDR_B]], align 4
262259
; CHECK-V8-NEXT: [[UGT:%.*]] = icmp ugt i32 [[DATA_A]], [[DATA_B]]
263260
; CHECK-V8-NEXT: [[UMAX:%.*]] = select i1 [[UGT]], i32 [[DATA_A]], i32 [[DATA_B]]
264-
; CHECK-V8-NEXT: [[ACC_NEXT_2]] = add i32 [[UMAX]], [[ACC1]]
261+
; CHECK-V8-NEXT: [[ACC_NEXT:%.*]] = add i32 [[UMAX]], [[ACC]]
265262
; CHECK-V8-NEXT: [[ADDR_C:%.*]] = getelementptr i32, ptr [[C:%.*]], i32 [[IV]]
266263
; CHECK-V8-NEXT: store i32 [[UMAX]], ptr [[ADDR_C]], align 4
267264
; CHECK-V8-NEXT: [[COUNT:%.*]] = add nuw nsw i32 [[IV]], 1
@@ -271,15 +268,14 @@ define i32 @test_i32_select_minsize(ptr %a, ptr %b, ptr %c) #1 {
271268
; CHECK-V8-NEXT: [[DATA_B_1:%.*]] = load i32, ptr [[ADDR_B_1]], align 4
272269
; CHECK-V8-NEXT: [[UGT_1:%.*]] = icmp ugt i32 [[DATA_A_1]], [[DATA_B_1]]
273270
; CHECK-V8-NEXT: [[UMAX_1:%.*]] = select i1 [[UGT_1]], i32 [[DATA_A_1]], i32 [[DATA_B_1]]
274-
; CHECK-V8-NEXT: [[ACC_NEXT_1]] = add i32 [[UMAX_1]], [[ACC]]
271+
; CHECK-V8-NEXT: [[ACC_NEXT_1]] = add i32 [[UMAX_1]], [[ACC_NEXT]]
275272
; CHECK-V8-NEXT: [[ADDR_C_1:%.*]] = getelementptr i32, ptr [[C]], i32 [[COUNT]]
276273
; CHECK-V8-NEXT: store i32 [[UMAX_1]], ptr [[ADDR_C_1]], align 4
277274
; CHECK-V8-NEXT: [[COUNT_1]] = add nuw nsw i32 [[IV]], 2
278275
; CHECK-V8-NEXT: [[END_1:%.*]] = icmp ne i32 [[COUNT_1]], 100
279276
; CHECK-V8-NEXT: br i1 [[END_1]], label [[LOOP]], label [[EXIT:%.*]]
280277
; CHECK-V8: exit:
281-
; CHECK-V8-NEXT: [[ACC_NEXT_LCSSA1:%.*]] = phi i32 [ [[ACC_NEXT_1]], [[LOOP]] ]
282-
; CHECK-V8-NEXT: [[ACC_NEXT_LCSSA:%.*]] = add i32 [[ACC_NEXT_1]], [[ACC_NEXT_2]]
278+
; CHECK-V8-NEXT: [[ACC_NEXT_LCSSA:%.*]] = phi i32 [ [[ACC_NEXT_1]], [[LOOP]] ]
283279
; CHECK-V8-NEXT: ret i32 [[ACC_NEXT_LCSSA]]
284280
;
285281
entry:

llvm/test/Transforms/LoopUnroll/partial-unroll-reductions.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
2-
; RUN: opt -p loop-unroll -unroll-allow-partial -unroll-max-count=4 -S %s | FileCheck %s
2+
; RUN: opt -p loop-unroll -unroll-add-parallel-reductions -unroll-allow-partial -unroll-max-count=4 -S %s | FileCheck %s
33

44
define i32 @test_add(ptr %src, i64 %n, i32 %start) {
55
; CHECK-LABEL: define i32 @test_add(

llvm/test/Transforms/LoopUnroll/runtime-loop5.ll

Lines changed: 7 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -75,26 +75,23 @@ define i3 @test(ptr %a, i3 %n) {
7575
; UNROLL-4-NEXT: br label [[FOR_BODY:%.*]]
7676
; UNROLL-4: for.body:
7777
; UNROLL-4-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, [[FOR_BODY_PREHEADER_NEW]] ], [ [[INDVARS_IV_NEXT_3:%.*]], [[FOR_BODY]] ]
78-
; UNROLL-4-NEXT: [[SUM_02_1:%.*]] = phi i3 [ [[ADD:%.*]], [[FOR_BODY]] ], [ 0, [[FOR_BODY_PREHEADER_NEW]] ]
79-
; UNROLL-4-NEXT: [[ADD_1:%.*]] = phi i3 [ [[ADD_2:%.*]], [[FOR_BODY]] ], [ 0, [[FOR_BODY_PREHEADER_NEW]] ]
80-
; UNROLL-4-NEXT: [[SUM_02_3:%.*]] = phi i3 [ [[ADD_3:%.*]], [[FOR_BODY]] ], [ 0, [[FOR_BODY_PREHEADER_NEW]] ]
81-
; UNROLL-4-NEXT: [[SUM_02:%.*]] = phi i3 [ [[ADD1:%.*]], [[FOR_BODY]] ], [ 0, [[FOR_BODY_PREHEADER_NEW]] ]
78+
; UNROLL-4-NEXT: [[SUM_02:%.*]] = phi i3 [ 0, [[FOR_BODY_PREHEADER_NEW]] ], [ [[ADD_3:%.*]], [[FOR_BODY]] ]
8279
; UNROLL-4-NEXT: [[NITER:%.*]] = phi i3 [ 0, [[FOR_BODY_PREHEADER_NEW]] ], [ [[NITER_NEXT_3:%.*]], [[FOR_BODY]] ]
8380
; UNROLL-4-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i3, ptr [[A:%.*]], i64 [[INDVARS_IV]]
8481
; UNROLL-4-NEXT: [[TMP2:%.*]] = load i3, ptr [[ARRAYIDX]], align 1
85-
; UNROLL-4-NEXT: [[ADD1]] = add i3 [[TMP2]], [[SUM_02]]
82+
; UNROLL-4-NEXT: [[ADD:%.*]] = add nsw i3 [[TMP2]], [[SUM_02]]
8683
; UNROLL-4-NEXT: [[INDVARS_IV_NEXT:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 1
8784
; UNROLL-4-NEXT: [[ARRAYIDX_1:%.*]] = getelementptr inbounds i3, ptr [[A]], i64 [[INDVARS_IV_NEXT]]
8885
; UNROLL-4-NEXT: [[TMP3:%.*]] = load i3, ptr [[ARRAYIDX_1]], align 1
89-
; UNROLL-4-NEXT: [[ADD]] = add i3 [[TMP3]], [[SUM_02_1]]
86+
; UNROLL-4-NEXT: [[ADD_1:%.*]] = add nsw i3 [[TMP3]], [[ADD]]
9087
; UNROLL-4-NEXT: [[INDVARS_IV_NEXT_1:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 2
9188
; UNROLL-4-NEXT: [[ARRAYIDX_2:%.*]] = getelementptr inbounds i3, ptr [[A]], i64 [[INDVARS_IV_NEXT_1]]
9289
; UNROLL-4-NEXT: [[TMP4:%.*]] = load i3, ptr [[ARRAYIDX_2]], align 1
93-
; UNROLL-4-NEXT: [[ADD_2]] = add i3 [[TMP4]], [[ADD_1]]
90+
; UNROLL-4-NEXT: [[ADD_2:%.*]] = add nsw i3 [[TMP4]], [[ADD_1]]
9491
; UNROLL-4-NEXT: [[INDVARS_IV_NEXT_2:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 3
9592
; UNROLL-4-NEXT: [[ARRAYIDX_3:%.*]] = getelementptr inbounds i3, ptr [[A]], i64 [[INDVARS_IV_NEXT_2]]
9693
; UNROLL-4-NEXT: [[TMP5:%.*]] = load i3, ptr [[ARRAYIDX_3]], align 1
97-
; UNROLL-4-NEXT: [[ADD_3]] = add i3 [[TMP5]], [[SUM_02_3]]
94+
; UNROLL-4-NEXT: [[ADD_3]] = add nsw i3 [[TMP5]], [[ADD_2]]
9895
; UNROLL-4-NEXT: [[INDVARS_IV_NEXT_3]] = add nuw nsw i64 [[INDVARS_IV]], 4
9996
; UNROLL-4-NEXT: [[NITER_NEXT_3]] = add i3 [[NITER]], -4
10097
; UNROLL-4-NEXT: [[NITER_NCMP_3:%.*]] = icmp eq i3 [[NITER_NEXT_3]], [[UNROLL_ITER]]
@@ -103,14 +100,11 @@ define i3 @test(ptr %a, i3 %n) {
103100
; UNROLL-4-NEXT: [[ADD_LCSSA_PH_PH:%.*]] = phi i3 [ [[ADD_3]], [[FOR_BODY]] ]
104101
; UNROLL-4-NEXT: [[INDVARS_IV_UNR_PH:%.*]] = phi i64 [ [[INDVARS_IV_NEXT_3]], [[FOR_BODY]] ]
105102
; UNROLL-4-NEXT: [[SUM_02_UNR_PH:%.*]] = phi i3 [ [[ADD_3]], [[FOR_BODY]] ]
106-
; UNROLL-4-NEXT: [[BIN_RDX:%.*]] = add i3 [[ADD]], [[ADD1]]
107-
; UNROLL-4-NEXT: [[BIN_RDX2:%.*]] = add i3 [[ADD_2]], [[BIN_RDX]]
108-
; UNROLL-4-NEXT: [[BIN_RDX3:%.*]] = add i3 [[ADD_3]], [[BIN_RDX2]]
109103
; UNROLL-4-NEXT: br label [[FOR_END_LOOPEXIT_UNR_LCSSA]]
110104
; UNROLL-4: for.end.loopexit.unr-lcssa:
111-
; UNROLL-4-NEXT: [[ADD_LCSSA_PH:%.*]] = phi i3 [ poison, [[FOR_BODY_PREHEADER]] ], [ [[BIN_RDX3]], [[FOR_END_LOOPEXIT_UNR_LCSSA_LOOPEXIT]] ]
105+
; UNROLL-4-NEXT: [[ADD_LCSSA_PH:%.*]] = phi i3 [ poison, [[FOR_BODY_PREHEADER]] ], [ [[ADD_LCSSA_PH_PH]], [[FOR_END_LOOPEXIT_UNR_LCSSA_LOOPEXIT]] ]
112106
; UNROLL-4-NEXT: [[INDVARS_IV_UNR:%.*]] = phi i64 [ 0, [[FOR_BODY_PREHEADER]] ], [ [[INDVARS_IV_UNR_PH]], [[FOR_END_LOOPEXIT_UNR_LCSSA_LOOPEXIT]] ]
113-
; UNROLL-4-NEXT: [[SUM_02_UNR:%.*]] = phi i3 [ 0, [[FOR_BODY_PREHEADER]] ], [ [[BIN_RDX3]], [[FOR_END_LOOPEXIT_UNR_LCSSA_LOOPEXIT]] ]
107+
; UNROLL-4-NEXT: [[SUM_02_UNR:%.*]] = phi i3 [ 0, [[FOR_BODY_PREHEADER]] ], [ [[SUM_02_UNR_PH]], [[FOR_END_LOOPEXIT_UNR_LCSSA_LOOPEXIT]] ]
114108
; UNROLL-4-NEXT: [[LCMP_MOD:%.*]] = icmp ne i3 [[XTRAITER]], 0
115109
; UNROLL-4-NEXT: br i1 [[LCMP_MOD]], label [[FOR_BODY_EPIL_PREHEADER:%.*]], label [[FOR_END_LOOPEXIT:%.*]]
116110
; UNROLL-4: for.body.epil.preheader:

llvm/test/Transforms/LoopUnroll/runtime-unroll-reductions.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
2-
; RUN: opt -p loop-unroll -S %s | FileCheck %s
2+
; RUN: opt -p loop-unroll -unroll-add-parallel-reductions -S %s | FileCheck %s
33

44
define i32 @test_add_reduction(ptr %a, i64 %n) {
55
; CHECK-LABEL: define i32 @test_add_reduction(

0 commit comments

Comments
 (0)