@@ -509,3 +509,116 @@ loop:
509
509
exit:
510
510
ret void
511
511
}
512
+
513
+ declare i32 @foo ()
514
+
515
+ ; Loop with a call cannot be handled by LoopVectorize, introducing additional
516
+ ; accumulators when unrolling increases throughput.
517
+ define i32 @test_add_with_call (i64 %n , i32 %start ) {
518
+ ; CHECK-LABEL: define i32 @test_add(
519
+ ; CHECK-SAME: ptr [[SRC:%.*]], i64 [[N:%.*]], i32 [[START:%.*]]) {
520
+ ; CHECK-NEXT: [[ENTRY:.*]]:
521
+ ; CHECK-NEXT: br label %[[LOOP:.*]]
522
+ ; CHECK: [[LOOP]]:
523
+ ; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[IV_NEXT_3:%.*]], %[[LOOP]] ]
524
+ ; CHECK-NEXT: [[RDX_1:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[RDX_NEXT_3:%.*]], %[[LOOP]] ]
525
+ ; CHECK-NEXT: [[RDX_NEXT_1:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[RDX_NEXT_2:%.*]], %[[LOOP]] ]
526
+ ; CHECK-NEXT: [[RDX_3:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[RDX_NEXT_24:%.*]], %[[LOOP]] ]
527
+ ; CHECK-NEXT: [[RDX:%.*]] = phi i32 [ [[START]], %[[ENTRY]] ], [ [[RDX_NEXT:%.*]], %[[LOOP]] ]
528
+ ; CHECK-NEXT: [[IV_NEXT:%.*]] = add nuw nsw i64 [[IV]], 1
529
+ ; CHECK-NEXT: [[GEP_SRC:%.*]] = getelementptr i32, ptr [[SRC]], i64 [[IV]]
530
+ ; CHECK-NEXT: [[L:%.*]] = load i32, ptr [[GEP_SRC]], align 1
531
+ ; CHECK-NEXT: [[RDX_NEXT]] = add i32 [[RDX]], [[L]]
532
+ ; CHECK-NEXT: [[IV_NEXT_1:%.*]] = add nuw nsw i64 [[IV]], 2
533
+ ; CHECK-NEXT: [[GEP_SRC_1:%.*]] = getelementptr i32, ptr [[SRC]], i64 [[IV_NEXT]]
534
+ ; CHECK-NEXT: [[L_1:%.*]] = load i32, ptr [[GEP_SRC_1]], align 1
535
+ ; CHECK-NEXT: [[RDX_NEXT_3]] = add i32 [[RDX_1]], [[L_1]]
536
+ ; CHECK-NEXT: [[IV_NEXT_2:%.*]] = add nuw nsw i64 [[IV]], 3
537
+ ; CHECK-NEXT: [[GEP_SRC_2:%.*]] = getelementptr i32, ptr [[SRC]], i64 [[IV_NEXT_1]]
538
+ ; CHECK-NEXT: [[L_2:%.*]] = load i32, ptr [[GEP_SRC_2]], align 1
539
+ ; CHECK-NEXT: [[RDX_NEXT_2]] = add i32 [[RDX_NEXT_1]], [[L_2]]
540
+ ; CHECK-NEXT: [[IV_NEXT_3]] = add nuw nsw i64 [[IV]], 4
541
+ ; CHECK-NEXT: [[GEP_SRC_24:%.*]] = getelementptr i32, ptr [[SRC]], i64 [[IV_NEXT_2]]
542
+ ; CHECK-NEXT: [[L_24:%.*]] = load i32, ptr [[GEP_SRC_24]], align 1
543
+ ; CHECK-NEXT: [[RDX_NEXT_24]] = add i32 [[RDX_3]], [[L_24]]
544
+ ; CHECK-NEXT: [[EC_3:%.*]] = icmp ne i64 [[IV_NEXT_3]], 1000
545
+ ; CHECK-NEXT: br i1 [[EC_3]], label %[[LOOP]], label %[[EXIT:.*]]
546
+ ; CHECK: [[EXIT]]:
547
+ ; CHECK-NEXT: [[RDX_NEXT_LCSSA1:%.*]] = phi i32 [ [[RDX_NEXT_24]], %[[LOOP]] ]
548
+ ; CHECK-NEXT: [[BIN_RDX:%.*]] = add i32 [[RDX_NEXT_3]], [[RDX_NEXT]]
549
+ ; CHECK-NEXT: [[BIN_RDX1:%.*]] = add i32 [[RDX_NEXT_2]], [[BIN_RDX]]
550
+ ; CHECK-NEXT: [[RDX_NEXT_LCSSA:%.*]] = add i32 [[RDX_NEXT_24]], [[BIN_RDX1]]
551
+ ; CHECK-NEXT: ret i32 [[RDX_NEXT_LCSSA]]
552
+ ;
553
+ entry:
554
+ br label %loop
555
+
556
+ loop:
557
+ %iv = phi i64 [ 0 , %entry ], [ %iv.next , %loop ]
558
+ %rdx = phi i32 [ %start , %entry ], [ %rdx.next , %loop ]
559
+ %iv.next = add i64 %iv , 1
560
+ %l = call i32 @foo ()
561
+ %rdx.next = add i32 %rdx , %l
562
+ %ec = icmp ne i64 %iv.next , 1000
563
+ br i1 %ec , label %loop , label %exit
564
+
565
+ exit:
566
+ ret i32 %rdx.next
567
+ }
568
+
569
+ ; Loop with backward dependence cannot be handled LoopVectorize, introducing additional
570
+ ; accumulators when unrolling increases throughput.
571
+ define i32 @test_add_with_backward_dep (ptr %p , i64 %n , i32 %start ) {
572
+ ; CHECK-LABEL: define i32 @test_add(
573
+ ; CHECK-SAME: ptr [[SRC:%.*]], i64 [[N:%.*]], i32 [[START:%.*]]) {
574
+ ; CHECK-NEXT: [[ENTRY:.*]]:
575
+ ; CHECK-NEXT: br label %[[LOOP:.*]]
576
+ ; CHECK: [[LOOP]]:
577
+ ; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[IV_NEXT_3:%.*]], %[[LOOP]] ]
578
+ ; CHECK-NEXT: [[RDX_1:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[RDX_NEXT_3:%.*]], %[[LOOP]] ]
579
+ ; CHECK-NEXT: [[RDX_NEXT_1:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[RDX_NEXT_2:%.*]], %[[LOOP]] ]
580
+ ; CHECK-NEXT: [[RDX_3:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[RDX_NEXT_24:%.*]], %[[LOOP]] ]
581
+ ; CHECK-NEXT: [[RDX:%.*]] = phi i32 [ [[START]], %[[ENTRY]] ], [ [[RDX_NEXT:%.*]], %[[LOOP]] ]
582
+ ; CHECK-NEXT: [[IV_NEXT:%.*]] = add nuw nsw i64 [[IV]], 1
583
+ ; CHECK-NEXT: [[GEP_SRC:%.*]] = getelementptr i32, ptr [[SRC]], i64 [[IV]]
584
+ ; CHECK-NEXT: [[L:%.*]] = load i32, ptr [[GEP_SRC]], align 1
585
+ ; CHECK-NEXT: [[RDX_NEXT]] = add i32 [[RDX]], [[L]]
586
+ ; CHECK-NEXT: [[IV_NEXT_1:%.*]] = add nuw nsw i64 [[IV]], 2
587
+ ; CHECK-NEXT: [[GEP_SRC_1:%.*]] = getelementptr i32, ptr [[SRC]], i64 [[IV_NEXT]]
588
+ ; CHECK-NEXT: [[L_1:%.*]] = load i32, ptr [[GEP_SRC_1]], align 1
589
+ ; CHECK-NEXT: [[RDX_NEXT_3]] = add i32 [[RDX_1]], [[L_1]]
590
+ ; CHECK-NEXT: [[IV_NEXT_2:%.*]] = add nuw nsw i64 [[IV]], 3
591
+ ; CHECK-NEXT: [[GEP_SRC_2:%.*]] = getelementptr i32, ptr [[SRC]], i64 [[IV_NEXT_1]]
592
+ ; CHECK-NEXT: [[L_2:%.*]] = load i32, ptr [[GEP_SRC_2]], align 1
593
+ ; CHECK-NEXT: [[RDX_NEXT_2]] = add i32 [[RDX_NEXT_1]], [[L_2]]
594
+ ; CHECK-NEXT: [[IV_NEXT_3]] = add nuw nsw i64 [[IV]], 4
595
+ ; CHECK-NEXT: [[GEP_SRC_24:%.*]] = getelementptr i32, ptr [[SRC]], i64 [[IV_NEXT_2]]
596
+ ; CHECK-NEXT: [[L_24:%.*]] = load i32, ptr [[GEP_SRC_24]], align 1
597
+ ; CHECK-NEXT: [[RDX_NEXT_24]] = add i32 [[RDX_3]], [[L_24]]
598
+ ; CHECK-NEXT: [[EC_3:%.*]] = icmp ne i64 [[IV_NEXT_3]], 1000
599
+ ; CHECK-NEXT: br i1 [[EC_3]], label %[[LOOP]], label %[[EXIT:.*]]
600
+ ; CHECK: [[EXIT]]:
601
+ ; CHECK-NEXT: [[RDX_NEXT_LCSSA1:%.*]] = phi i32 [ [[RDX_NEXT_24]], %[[LOOP]] ]
602
+ ; CHECK-NEXT: [[BIN_RDX:%.*]] = add i32 [[RDX_NEXT_3]], [[RDX_NEXT]]
603
+ ; CHECK-NEXT: [[BIN_RDX1:%.*]] = add i32 [[RDX_NEXT_2]], [[BIN_RDX]]
604
+ ; CHECK-NEXT: [[RDX_NEXT_LCSSA:%.*]] = add i32 [[RDX_NEXT_24]], [[BIN_RDX1]]
605
+ ; CHECK-NEXT: ret i32 [[RDX_NEXT_LCSSA]]
606
+ ;
607
+ entry:
608
+ br label %loop
609
+
610
+ loop:
611
+ %iv = phi i64 [ 0 , %entry ], [ %iv.next , %loop ]
612
+ %rdx = phi i32 [ %start , %entry ], [ %rdx.next , %loop ]
613
+ %iv.next = add i64 %iv , 1
614
+ %gep = getelementptr inbounds nuw i32 , ptr %p , i64 %iv
615
+ %l = load i32 , ptr %gep
616
+ %gep.1 = getelementptr inbounds nuw i32 , ptr %p , i64 %iv.next
617
+ store i32 0 , ptr %gep.1
618
+ %rdx.next = add i32 %rdx , %l
619
+ %ec = icmp ne i64 %iv.next , 1000
620
+ br i1 %ec , label %loop , label %exit
621
+
622
+ exit:
623
+ ret i32 %rdx.next
624
+ }
0 commit comments