@@ -515,40 +515,33 @@ declare i32 @foo()
515
515
; Loop with a call cannot be handled by LoopVectorize, introducing additional
516
516
; accumulators when unrolling increases throughput.
517
517
define i32 @test_add_with_call (i64 %n , i32 %start ) {
518
- ; CHECK-LABEL: define i32 @test_add (
519
- ; CHECK-SAME: ptr [[SRC:%.*]], i64 [[N:%.*]], i32 [[START:%.*]]) {
518
+ ; CHECK-LABEL: define i32 @test_add_with_call (
519
+ ; CHECK-SAME: i64 [[N:%.*]], i32 [[START:%.*]]) {
520
520
; CHECK-NEXT: [[ENTRY:.*]]:
521
521
; CHECK-NEXT: br label %[[LOOP:.*]]
522
522
; CHECK: [[LOOP]]:
523
523
; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[IV_NEXT_3:%.*]], %[[LOOP]] ]
524
- ; CHECK-NEXT: [[RDX_1:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[RDX_NEXT_3 :%.*]], %[[LOOP]] ]
525
- ; CHECK-NEXT: [[RDX_NEXT_1 :%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[RDX_NEXT_2:%.*]], %[[LOOP]] ]
526
- ; CHECK-NEXT: [[RDX_3:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[RDX_NEXT_24 :%.*]], %[[LOOP]] ]
524
+ ; CHECK-NEXT: [[RDX_1:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[RDX_NEXT_1 :%.*]], %[[LOOP]] ]
525
+ ; CHECK-NEXT: [[RDX_2 :%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[RDX_NEXT_2:%.*]], %[[LOOP]] ]
526
+ ; CHECK-NEXT: [[RDX_3:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[RDX_NEXT_3 :%.*]], %[[LOOP]] ]
527
527
; CHECK-NEXT: [[RDX:%.*]] = phi i32 [ [[START]], %[[ENTRY]] ], [ [[RDX_NEXT:%.*]], %[[LOOP]] ]
528
- ; CHECK-NEXT: [[IV_NEXT:%.*]] = add nuw nsw i64 [[IV]], 1
529
- ; CHECK-NEXT: [[GEP_SRC:%.*]] = getelementptr i32, ptr [[SRC]], i64 [[IV]]
530
- ; CHECK-NEXT: [[L:%.*]] = load i32, ptr [[GEP_SRC]], align 1
528
+ ; CHECK-NEXT: [[L:%.*]] = call i32 @foo()
531
529
; CHECK-NEXT: [[RDX_NEXT]] = add i32 [[RDX]], [[L]]
532
- ; CHECK-NEXT: [[IV_NEXT_1:%.*]] = add nuw nsw i64 [[IV]], 2
533
- ; CHECK-NEXT: [[GEP_SRC_1:%.*]] = getelementptr i32, ptr [[SRC]], i64 [[IV_NEXT]]
534
- ; CHECK-NEXT: [[L_1:%.*]] = load i32, ptr [[GEP_SRC_1]], align 1
535
- ; CHECK-NEXT: [[RDX_NEXT_3]] = add i32 [[RDX_1]], [[L_1]]
536
- ; CHECK-NEXT: [[IV_NEXT_2:%.*]] = add nuw nsw i64 [[IV]], 3
537
- ; CHECK-NEXT: [[GEP_SRC_2:%.*]] = getelementptr i32, ptr [[SRC]], i64 [[IV_NEXT_1]]
538
- ; CHECK-NEXT: [[L_2:%.*]] = load i32, ptr [[GEP_SRC_2]], align 1
539
- ; CHECK-NEXT: [[RDX_NEXT_2]] = add i32 [[RDX_NEXT_1]], [[L_2]]
530
+ ; CHECK-NEXT: [[L_1:%.*]] = call i32 @foo()
531
+ ; CHECK-NEXT: [[RDX_NEXT_1]] = add i32 [[RDX_1]], [[L_1]]
532
+ ; CHECK-NEXT: [[L_2:%.*]] = call i32 @foo()
533
+ ; CHECK-NEXT: [[RDX_NEXT_2]] = add i32 [[RDX_2]], [[L_2]]
540
534
; CHECK-NEXT: [[IV_NEXT_3]] = add nuw nsw i64 [[IV]], 4
541
- ; CHECK-NEXT: [[GEP_SRC_24:%.*]] = getelementptr i32, ptr [[SRC]], i64 [[IV_NEXT_2]]
542
- ; CHECK-NEXT: [[L_24:%.*]] = load i32, ptr [[GEP_SRC_24]], align 1
543
- ; CHECK-NEXT: [[RDX_NEXT_24]] = add i32 [[RDX_3]], [[L_24]]
535
+ ; CHECK-NEXT: [[L_3:%.*]] = call i32 @foo()
536
+ ; CHECK-NEXT: [[RDX_NEXT_3]] = add i32 [[RDX_3]], [[L_3]]
544
537
; CHECK-NEXT: [[EC_3:%.*]] = icmp ne i64 [[IV_NEXT_3]], 1000
545
538
; CHECK-NEXT: br i1 [[EC_3]], label %[[LOOP]], label %[[EXIT:.*]]
546
539
; CHECK: [[EXIT]]:
547
- ; CHECK-NEXT: [[RDX_NEXT_LCSSA1 :%.*]] = phi i32 [ [[RDX_NEXT_24 ]], %[[LOOP]] ]
548
- ; CHECK-NEXT: [[BIN_RDX:%.*]] = add i32 [[RDX_NEXT_3 ]], [[RDX_NEXT]]
540
+ ; CHECK-NEXT: [[RDX_NEXT_LCSSA :%.*]] = phi i32 [ [[RDX_NEXT_3 ]], %[[LOOP]] ]
541
+ ; CHECK-NEXT: [[BIN_RDX:%.*]] = add i32 [[RDX_NEXT_1 ]], [[RDX_NEXT]]
549
542
; CHECK-NEXT: [[BIN_RDX1:%.*]] = add i32 [[RDX_NEXT_2]], [[BIN_RDX]]
550
- ; CHECK-NEXT: [[RDX_NEXT_LCSSA :%.*]] = add i32 [[RDX_NEXT_24 ]], [[BIN_RDX1]]
551
- ; CHECK-NEXT: ret i32 [[RDX_NEXT_LCSSA ]]
543
+ ; CHECK-NEXT: [[BIN_RDX2 :%.*]] = add i32 [[RDX_NEXT_3 ]], [[BIN_RDX1]]
544
+ ; CHECK-NEXT: ret i32 [[BIN_RDX2 ]]
552
545
;
553
546
entry:
554
547
br label %loop
@@ -569,40 +562,48 @@ exit:
569
562
; Loop with backward dependence cannot be handled LoopVectorize, introducing additional
570
563
; accumulators when unrolling increases throughput.
571
564
define i32 @test_add_with_backward_dep (ptr %p , i64 %n , i32 %start ) {
572
- ; CHECK-LABEL: define i32 @test_add (
573
- ; CHECK-SAME: ptr [[SRC :%.*]], i64 [[N:%.*]], i32 [[START:%.*]]) {
565
+ ; CHECK-LABEL: define i32 @test_add_with_backward_dep (
566
+ ; CHECK-SAME: ptr [[P :%.*]], i64 [[N:%.*]], i32 [[START:%.*]]) {
574
567
; CHECK-NEXT: [[ENTRY:.*]]:
575
568
; CHECK-NEXT: br label %[[LOOP:.*]]
576
569
; CHECK: [[LOOP]]:
577
570
; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[IV_NEXT_3:%.*]], %[[LOOP]] ]
578
- ; CHECK-NEXT: [[RDX_1:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[RDX_NEXT_3 :%.*]], %[[LOOP]] ]
579
- ; CHECK-NEXT: [[RDX_NEXT_1 :%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[RDX_NEXT_2:%.*]], %[[LOOP]] ]
580
- ; CHECK-NEXT: [[RDX_3:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[RDX_NEXT_24 :%.*]], %[[LOOP]] ]
571
+ ; CHECK-NEXT: [[RDX_1:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[RDX_NEXT_1 :%.*]], %[[LOOP]] ]
572
+ ; CHECK-NEXT: [[RDX_2 :%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[RDX_NEXT_2:%.*]], %[[LOOP]] ]
573
+ ; CHECK-NEXT: [[RDX_3:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[RDX_NEXT_3 :%.*]], %[[LOOP]] ]
581
574
; CHECK-NEXT: [[RDX:%.*]] = phi i32 [ [[START]], %[[ENTRY]] ], [ [[RDX_NEXT:%.*]], %[[LOOP]] ]
582
575
; CHECK-NEXT: [[IV_NEXT:%.*]] = add nuw nsw i64 [[IV]], 1
583
- ; CHECK-NEXT: [[GEP_SRC:%.*]] = getelementptr i32, ptr [[SRC]], i64 [[IV]]
584
- ; CHECK-NEXT: [[L:%.*]] = load i32, ptr [[GEP_SRC]], align 1
576
+ ; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds nuw i32, ptr [[P]], i64 [[IV]]
577
+ ; CHECK-NEXT: [[L:%.*]] = load i32, ptr [[GEP]], align 4
578
+ ; CHECK-NEXT: [[GEP_1:%.*]] = getelementptr inbounds nuw i32, ptr [[P]], i64 [[IV_NEXT]]
579
+ ; CHECK-NEXT: store i32 0, ptr [[GEP_1]], align 4
585
580
; CHECK-NEXT: [[RDX_NEXT]] = add i32 [[RDX]], [[L]]
586
581
; CHECK-NEXT: [[IV_NEXT_1:%.*]] = add nuw nsw i64 [[IV]], 2
587
- ; CHECK-NEXT: [[GEP_SRC_1:%.*]] = getelementptr i32, ptr [[SRC]], i64 [[IV_NEXT]]
588
- ; CHECK-NEXT: [[L_1:%.*]] = load i32, ptr [[GEP_SRC_1]], align 1
589
- ; CHECK-NEXT: [[RDX_NEXT_3]] = add i32 [[RDX_1]], [[L_1]]
582
+ ; CHECK-NEXT: [[GEP_11:%.*]] = getelementptr inbounds nuw i32, ptr [[P]], i64 [[IV_NEXT]]
583
+ ; CHECK-NEXT: [[L_1:%.*]] = load i32, ptr [[GEP_11]], align 4
584
+ ; CHECK-NEXT: [[GEP_1_1:%.*]] = getelementptr inbounds nuw i32, ptr [[P]], i64 [[IV_NEXT_1]]
585
+ ; CHECK-NEXT: store i32 0, ptr [[GEP_1_1]], align 4
586
+ ; CHECK-NEXT: [[RDX_NEXT_1]] = add i32 [[RDX_1]], [[L_1]]
590
587
; CHECK-NEXT: [[IV_NEXT_2:%.*]] = add nuw nsw i64 [[IV]], 3
591
- ; CHECK-NEXT: [[GEP_SRC_2:%.*]] = getelementptr i32, ptr [[SRC]], i64 [[IV_NEXT_1]]
592
- ; CHECK-NEXT: [[L_2:%.*]] = load i32, ptr [[GEP_SRC_2]], align 1
593
- ; CHECK-NEXT: [[RDX_NEXT_2]] = add i32 [[RDX_NEXT_1]], [[L_2]]
588
+ ; CHECK-NEXT: [[GEP_2:%.*]] = getelementptr inbounds nuw i32, ptr [[P]], i64 [[IV_NEXT_1]]
589
+ ; CHECK-NEXT: [[L_2:%.*]] = load i32, ptr [[GEP_2]], align 4
590
+ ; CHECK-NEXT: [[GEP_1_2:%.*]] = getelementptr inbounds nuw i32, ptr [[P]], i64 [[IV_NEXT_2]]
591
+ ; CHECK-NEXT: store i32 0, ptr [[GEP_1_2]], align 4
592
+ ; CHECK-NEXT: [[RDX_NEXT_2]] = add i32 [[RDX_2]], [[L_2]]
594
593
; CHECK-NEXT: [[IV_NEXT_3]] = add nuw nsw i64 [[IV]], 4
595
- ; CHECK-NEXT: [[GEP_SRC_24:%.*]] = getelementptr i32, ptr [[SRC]], i64 [[IV_NEXT_2]]
596
- ; CHECK-NEXT: [[L_24:%.*]] = load i32, ptr [[GEP_SRC_24]], align 1
597
- ; CHECK-NEXT: [[RDX_NEXT_24]] = add i32 [[RDX_3]], [[L_24]]
594
+ ; CHECK-NEXT: [[GEP_3:%.*]] = getelementptr inbounds nuw i32, ptr [[P]], i64 [[IV_NEXT_2]]
595
+ ; CHECK-NEXT: [[L_3:%.*]] = load i32, ptr [[GEP_3]], align 4
596
+ ; CHECK-NEXT: [[GEP_1_3:%.*]] = getelementptr inbounds nuw i32, ptr [[P]], i64 [[IV_NEXT_3]]
597
+ ; CHECK-NEXT: store i32 0, ptr [[GEP_1_3]], align 4
598
+ ; CHECK-NEXT: [[RDX_NEXT_3]] = add i32 [[RDX_3]], [[L_3]]
598
599
; CHECK-NEXT: [[EC_3:%.*]] = icmp ne i64 [[IV_NEXT_3]], 1000
599
600
; CHECK-NEXT: br i1 [[EC_3]], label %[[LOOP]], label %[[EXIT:.*]]
600
601
; CHECK: [[EXIT]]:
601
- ; CHECK-NEXT: [[RDX_NEXT_LCSSA1 :%.*]] = phi i32 [ [[RDX_NEXT_24 ]], %[[LOOP]] ]
602
- ; CHECK-NEXT: [[BIN_RDX:%.*]] = add i32 [[RDX_NEXT_3 ]], [[RDX_NEXT]]
603
- ; CHECK-NEXT: [[BIN_RDX1 :%.*]] = add i32 [[RDX_NEXT_2]], [[BIN_RDX]]
604
- ; CHECK-NEXT: [[RDX_NEXT_LCSSA :%.*]] = add i32 [[RDX_NEXT_24 ]], [[BIN_RDX1 ]]
605
- ; CHECK-NEXT: ret i32 [[RDX_NEXT_LCSSA ]]
602
+ ; CHECK-NEXT: [[RDX_NEXT_LCSSA :%.*]] = phi i32 [ [[RDX_NEXT_3 ]], %[[LOOP]] ]
603
+ ; CHECK-NEXT: [[BIN_RDX:%.*]] = add i32 [[RDX_NEXT_1 ]], [[RDX_NEXT]]
604
+ ; CHECK-NEXT: [[BIN_RDX2 :%.*]] = add i32 [[RDX_NEXT_2]], [[BIN_RDX]]
605
+ ; CHECK-NEXT: [[BIN_RDX3 :%.*]] = add i32 [[RDX_NEXT_3 ]], [[BIN_RDX2 ]]
606
+ ; CHECK-NEXT: ret i32 [[BIN_RDX3 ]]
606
607
;
607
608
entry:
608
609
br label %loop
0 commit comments