Skip to content

Commit ea20c67

Browse files
committed
!fixup add 2 test cases which cannot be handled by LV.
1 parent 1440ad1 commit ea20c67

File tree

1 file changed

+113
-0
lines changed

1 file changed

+113
-0
lines changed

llvm/test/Transforms/LoopUnroll/partial-unroll-reductions.ll

Lines changed: 113 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -509,3 +509,116 @@ loop:
509509
exit:
510510
ret void
511511
}
512+
513+
declare i32 @foo()
514+
515+
; Loop with a call cannot be handled by LoopVectorize, introducing additional
516+
; accumulators when unrolling increases throughput.
517+
define i32 @test_add_with_call(i64 %n, i32 %start) {
518+
; CHECK-LABEL: define i32 @test_add(
519+
; CHECK-SAME: ptr [[SRC:%.*]], i64 [[N:%.*]], i32 [[START:%.*]]) {
520+
; CHECK-NEXT: [[ENTRY:.*]]:
521+
; CHECK-NEXT: br label %[[LOOP:.*]]
522+
; CHECK: [[LOOP]]:
523+
; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[IV_NEXT_3:%.*]], %[[LOOP]] ]
524+
; CHECK-NEXT: [[RDX_1:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[RDX_NEXT_3:%.*]], %[[LOOP]] ]
525+
; CHECK-NEXT: [[RDX_NEXT_1:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[RDX_NEXT_2:%.*]], %[[LOOP]] ]
526+
; CHECK-NEXT: [[RDX_3:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[RDX_NEXT_24:%.*]], %[[LOOP]] ]
527+
; CHECK-NEXT: [[RDX:%.*]] = phi i32 [ [[START]], %[[ENTRY]] ], [ [[RDX_NEXT:%.*]], %[[LOOP]] ]
528+
; CHECK-NEXT: [[IV_NEXT:%.*]] = add nuw nsw i64 [[IV]], 1
529+
; CHECK-NEXT: [[GEP_SRC:%.*]] = getelementptr i32, ptr [[SRC]], i64 [[IV]]
530+
; CHECK-NEXT: [[L:%.*]] = load i32, ptr [[GEP_SRC]], align 1
531+
; CHECK-NEXT: [[RDX_NEXT]] = add i32 [[RDX]], [[L]]
532+
; CHECK-NEXT: [[IV_NEXT_1:%.*]] = add nuw nsw i64 [[IV]], 2
533+
; CHECK-NEXT: [[GEP_SRC_1:%.*]] = getelementptr i32, ptr [[SRC]], i64 [[IV_NEXT]]
534+
; CHECK-NEXT: [[L_1:%.*]] = load i32, ptr [[GEP_SRC_1]], align 1
535+
; CHECK-NEXT: [[RDX_NEXT_3]] = add i32 [[RDX_1]], [[L_1]]
536+
; CHECK-NEXT: [[IV_NEXT_2:%.*]] = add nuw nsw i64 [[IV]], 3
537+
; CHECK-NEXT: [[GEP_SRC_2:%.*]] = getelementptr i32, ptr [[SRC]], i64 [[IV_NEXT_1]]
538+
; CHECK-NEXT: [[L_2:%.*]] = load i32, ptr [[GEP_SRC_2]], align 1
539+
; CHECK-NEXT: [[RDX_NEXT_2]] = add i32 [[RDX_NEXT_1]], [[L_2]]
540+
; CHECK-NEXT: [[IV_NEXT_3]] = add nuw nsw i64 [[IV]], 4
541+
; CHECK-NEXT: [[GEP_SRC_24:%.*]] = getelementptr i32, ptr [[SRC]], i64 [[IV_NEXT_2]]
542+
; CHECK-NEXT: [[L_24:%.*]] = load i32, ptr [[GEP_SRC_24]], align 1
543+
; CHECK-NEXT: [[RDX_NEXT_24]] = add i32 [[RDX_3]], [[L_24]]
544+
; CHECK-NEXT: [[EC_3:%.*]] = icmp ne i64 [[IV_NEXT_3]], 1000
545+
; CHECK-NEXT: br i1 [[EC_3]], label %[[LOOP]], label %[[EXIT:.*]]
546+
; CHECK: [[EXIT]]:
547+
; CHECK-NEXT: [[RDX_NEXT_LCSSA1:%.*]] = phi i32 [ [[RDX_NEXT_24]], %[[LOOP]] ]
548+
; CHECK-NEXT: [[BIN_RDX:%.*]] = add i32 [[RDX_NEXT_3]], [[RDX_NEXT]]
549+
; CHECK-NEXT: [[BIN_RDX1:%.*]] = add i32 [[RDX_NEXT_2]], [[BIN_RDX]]
550+
; CHECK-NEXT: [[RDX_NEXT_LCSSA:%.*]] = add i32 [[RDX_NEXT_24]], [[BIN_RDX1]]
551+
; CHECK-NEXT: ret i32 [[RDX_NEXT_LCSSA]]
552+
;
553+
entry:
554+
br label %loop
555+
556+
loop:
557+
%iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
558+
%rdx = phi i32 [ %start, %entry ], [ %rdx.next, %loop ]
559+
%iv.next = add i64 %iv, 1
560+
%l = call i32 @foo()
561+
%rdx.next = add i32 %rdx, %l
562+
%ec = icmp ne i64 %iv.next, 1000
563+
br i1 %ec, label %loop, label %exit
564+
565+
exit:
566+
ret i32 %rdx.next
567+
}
568+
569+
; Loop with backward dependence cannot be handled LoopVectorize, introducing additional
570+
; accumulators when unrolling increases throughput.
571+
define i32 @test_add_with_backward_dep(ptr %p, i64 %n, i32 %start) {
572+
; CHECK-LABEL: define i32 @test_add(
573+
; CHECK-SAME: ptr [[SRC:%.*]], i64 [[N:%.*]], i32 [[START:%.*]]) {
574+
; CHECK-NEXT: [[ENTRY:.*]]:
575+
; CHECK-NEXT: br label %[[LOOP:.*]]
576+
; CHECK: [[LOOP]]:
577+
; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[IV_NEXT_3:%.*]], %[[LOOP]] ]
578+
; CHECK-NEXT: [[RDX_1:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[RDX_NEXT_3:%.*]], %[[LOOP]] ]
579+
; CHECK-NEXT: [[RDX_NEXT_1:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[RDX_NEXT_2:%.*]], %[[LOOP]] ]
580+
; CHECK-NEXT: [[RDX_3:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[RDX_NEXT_24:%.*]], %[[LOOP]] ]
581+
; CHECK-NEXT: [[RDX:%.*]] = phi i32 [ [[START]], %[[ENTRY]] ], [ [[RDX_NEXT:%.*]], %[[LOOP]] ]
582+
; CHECK-NEXT: [[IV_NEXT:%.*]] = add nuw nsw i64 [[IV]], 1
583+
; CHECK-NEXT: [[GEP_SRC:%.*]] = getelementptr i32, ptr [[SRC]], i64 [[IV]]
584+
; CHECK-NEXT: [[L:%.*]] = load i32, ptr [[GEP_SRC]], align 1
585+
; CHECK-NEXT: [[RDX_NEXT]] = add i32 [[RDX]], [[L]]
586+
; CHECK-NEXT: [[IV_NEXT_1:%.*]] = add nuw nsw i64 [[IV]], 2
587+
; CHECK-NEXT: [[GEP_SRC_1:%.*]] = getelementptr i32, ptr [[SRC]], i64 [[IV_NEXT]]
588+
; CHECK-NEXT: [[L_1:%.*]] = load i32, ptr [[GEP_SRC_1]], align 1
589+
; CHECK-NEXT: [[RDX_NEXT_3]] = add i32 [[RDX_1]], [[L_1]]
590+
; CHECK-NEXT: [[IV_NEXT_2:%.*]] = add nuw nsw i64 [[IV]], 3
591+
; CHECK-NEXT: [[GEP_SRC_2:%.*]] = getelementptr i32, ptr [[SRC]], i64 [[IV_NEXT_1]]
592+
; CHECK-NEXT: [[L_2:%.*]] = load i32, ptr [[GEP_SRC_2]], align 1
593+
; CHECK-NEXT: [[RDX_NEXT_2]] = add i32 [[RDX_NEXT_1]], [[L_2]]
594+
; CHECK-NEXT: [[IV_NEXT_3]] = add nuw nsw i64 [[IV]], 4
595+
; CHECK-NEXT: [[GEP_SRC_24:%.*]] = getelementptr i32, ptr [[SRC]], i64 [[IV_NEXT_2]]
596+
; CHECK-NEXT: [[L_24:%.*]] = load i32, ptr [[GEP_SRC_24]], align 1
597+
; CHECK-NEXT: [[RDX_NEXT_24]] = add i32 [[RDX_3]], [[L_24]]
598+
; CHECK-NEXT: [[EC_3:%.*]] = icmp ne i64 [[IV_NEXT_3]], 1000
599+
; CHECK-NEXT: br i1 [[EC_3]], label %[[LOOP]], label %[[EXIT:.*]]
600+
; CHECK: [[EXIT]]:
601+
; CHECK-NEXT: [[RDX_NEXT_LCSSA1:%.*]] = phi i32 [ [[RDX_NEXT_24]], %[[LOOP]] ]
602+
; CHECK-NEXT: [[BIN_RDX:%.*]] = add i32 [[RDX_NEXT_3]], [[RDX_NEXT]]
603+
; CHECK-NEXT: [[BIN_RDX1:%.*]] = add i32 [[RDX_NEXT_2]], [[BIN_RDX]]
604+
; CHECK-NEXT: [[RDX_NEXT_LCSSA:%.*]] = add i32 [[RDX_NEXT_24]], [[BIN_RDX1]]
605+
; CHECK-NEXT: ret i32 [[RDX_NEXT_LCSSA]]
606+
;
607+
entry:
608+
br label %loop
609+
610+
loop:
611+
%iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
612+
%rdx = phi i32 [ %start, %entry ], [ %rdx.next, %loop ]
613+
%iv.next = add i64 %iv, 1
614+
%gep = getelementptr inbounds nuw i32, ptr %p, i64 %iv
615+
%l = load i32, ptr %gep
616+
%gep.1 = getelementptr inbounds nuw i32, ptr %p, i64 %iv.next
617+
store i32 0, ptr %gep.1
618+
%rdx.next = add i32 %rdx, %l
619+
%ec = icmp ne i64 %iv.next, 1000
620+
br i1 %ec, label %loop, label %exit
621+
622+
exit:
623+
ret i32 %rdx.next
624+
}

0 commit comments

Comments
 (0)