Skip to content

Commit 3415ce5

Browse files
committed
!fixup update tests after rebase
1 parent 20b735e commit 3415ce5

File tree

1 file changed

+54
-38
lines changed

1 file changed

+54
-38
lines changed

llvm/test/Transforms/LoopVectorize/AArch64/sve-epilog-vect.ll

Lines changed: 54 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -164,11 +164,20 @@ define void @main_vf_vscale_x_2_no_epi_iteration(ptr %A) #0 vscale_range(8, 8) {
164164
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
165165
; CHECK: vector.body:
166166
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
167+
; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[INDEX]]
168+
; CHECK-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64()
169+
; CHECK-NEXT: [[TMP8:%.*]] = mul nuw i64 [[TMP7]], 2
170+
; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i64, ptr [[TMP6]], i64 [[TMP8]]
171+
; CHECK-NEXT: store <vscale x 2 x i64> splat (i64 1), ptr [[TMP6]], align 1
172+
; CHECK-NEXT: store <vscale x 2 x i64> splat (i64 1), ptr [[TMP9]], align 1
167173
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]]
168174
; CHECK-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
169175
; CHECK-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]]
170176
; CHECK: middle.block:
171177
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 1024, [[N_VEC]]
178+
; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]]
179+
; CHECK: scalar.ph:
180+
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
172181
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
173182
; CHECK: for.body:
174183
;
@@ -190,6 +199,29 @@ define void @main_vf_vscale_x_2_no_epi_iteration(ptr %A) #0 vscale_range(8, 8) {
190199
; CHECK-VF8-NEXT: br label [[VECTOR_BODY:%.*]]
191200
; CHECK-VF8: vector.body:
192201
; CHECK-VF8-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
202+
; CHECK-VF8-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[INDEX]]
203+
; CHECK-VF8-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64()
204+
; CHECK-VF8-NEXT: [[TMP8:%.*]] = mul nuw i64 [[TMP7]], 2
205+
; CHECK-VF8-NEXT: [[TMP9:%.*]] = getelementptr inbounds i64, ptr [[TMP6]], i64 [[TMP8]]
206+
; CHECK-VF8-NEXT: store <vscale x 2 x i64> splat (i64 1), ptr [[TMP6]], align 1
207+
; CHECK-VF8-NEXT: store <vscale x 2 x i64> splat (i64 1), ptr [[TMP9]], align 1
208+
; CHECK-VF8-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]]
209+
; CHECK-VF8-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
210+
; CHECK-VF8-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]]
211+
; CHECK-VF8: middle.block:
212+
; CHECK-VF8-NEXT: [[CMP_N:%.*]] = icmp eq i64 1024, [[N_VEC]]
213+
; CHECK-VF8-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[VEC_EPILOG_ITER_CHECK:%.*]]
214+
; CHECK-VF8: vec.epilog.iter.check:
215+
; CHECK-VF8-NEXT: [[N_VEC_REMAINING:%.*]] = sub i64 1024, [[N_VEC]]
216+
; CHECK-VF8-NEXT: [[MIN_EPILOG_ITERS_CHECK:%.*]] = icmp ult i64 [[N_VEC_REMAINING]], 8
217+
; CHECK-VF8-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]]
218+
; CHECK-VF8: vec.epilog.ph:
219+
; CHECK-VF8-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ]
220+
; CHECK-VF8-NEXT: br label [[VEC_EPILOG_VECTOR_BODY:%.*]]
221+
; CHECK-VF8: vec.epilog.vector.body:
222+
; CHECK-VF8-NEXT: [[INDEX1:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT2:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ]
223+
; CHECK-VF8-NEXT: [[TMP11:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[INDEX1]]
224+
; CHECK-VF8-NEXT: store <8 x i64> splat (i64 1), ptr [[TMP11]], align 1
193225
; CHECK-VF8-NEXT: [[INDEX_NEXT2]] = add nuw i64 [[INDEX1]], 8
194226
; CHECK-VF8-NEXT: [[TMP14:%.*]] = icmp eq i64 [[INDEX_NEXT2]], 1024
195227
; CHECK-VF8-NEXT: br i1 [[TMP14]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
@@ -250,11 +282,10 @@ define void @main_vf_vscale_x_2(ptr %A, i64 %n) #0 vscale_range(8, 8) {
250282
; CHECK: vector.body:
251283
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
252284
; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[INDEX]]
253-
; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds i64, ptr [[TMP12]], i32 0
254285
; CHECK-NEXT: [[TMP15:%.*]] = call i64 @llvm.vscale.i64()
255286
; CHECK-NEXT: [[TMP16:%.*]] = mul nuw i64 [[TMP15]], 2
256287
; CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds i64, ptr [[TMP12]], i64 [[TMP16]]
257-
; CHECK-NEXT: store <vscale x 2 x i64> splat (i64 1), ptr [[TMP14]], align 1
288+
; CHECK-NEXT: store <vscale x 2 x i64> splat (i64 1), ptr [[TMP12]], align 1
258289
; CHECK-NEXT: store <vscale x 2 x i64> splat (i64 1), ptr [[TMP17]], align 1
259290
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]]
260291
; CHECK-NEXT: [[TMP18:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
@@ -274,8 +305,7 @@ define void @main_vf_vscale_x_2(ptr %A, i64 %n) #0 vscale_range(8, 8) {
274305
; CHECK: vec.epilog.vector.body:
275306
; CHECK-NEXT: [[INDEX4:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT5:%.*]], [[FOR_BODY]] ]
276307
; CHECK-NEXT: [[TMP19:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[INDEX4]]
277-
; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds i64, ptr [[TMP19]], i32 0
278-
; CHECK-NEXT: store <8 x i64> splat (i64 1), ptr [[TMP13]], align 1
308+
; CHECK-NEXT: store <8 x i64> splat (i64 1), ptr [[TMP19]], align 1
279309
; CHECK-NEXT: [[INDEX_NEXT5]] = add nuw i64 [[INDEX4]], 8
280310
; CHECK-NEXT: [[TMP20:%.*]] = icmp eq i64 [[INDEX_NEXT5]], [[N_VEC3]]
281311
; CHECK-NEXT: br i1 [[TMP20]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[FOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]]
@@ -307,11 +337,10 @@ define void @main_vf_vscale_x_2(ptr %A, i64 %n) #0 vscale_range(8, 8) {
307337
; CHECK-VF8: vector.body:
308338
; CHECK-VF8-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
309339
; CHECK-VF8-NEXT: [[TMP12:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[INDEX]]
310-
; CHECK-VF8-NEXT: [[TMP14:%.*]] = getelementptr inbounds i64, ptr [[TMP12]], i32 0
311340
; CHECK-VF8-NEXT: [[TMP15:%.*]] = call i64 @llvm.vscale.i64()
312341
; CHECK-VF8-NEXT: [[TMP16:%.*]] = mul nuw i64 [[TMP15]], 2
313342
; CHECK-VF8-NEXT: [[TMP17:%.*]] = getelementptr inbounds i64, ptr [[TMP12]], i64 [[TMP16]]
314-
; CHECK-VF8-NEXT: store <vscale x 2 x i64> splat (i64 1), ptr [[TMP14]], align 1
343+
; CHECK-VF8-NEXT: store <vscale x 2 x i64> splat (i64 1), ptr [[TMP12]], align 1
315344
; CHECK-VF8-NEXT: store <vscale x 2 x i64> splat (i64 1), ptr [[TMP17]], align 1
316345
; CHECK-VF8-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]]
317346
; CHECK-VF8-NEXT: [[TMP18:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
@@ -331,8 +360,7 @@ define void @main_vf_vscale_x_2(ptr %A, i64 %n) #0 vscale_range(8, 8) {
331360
; CHECK-VF8: vec.epilog.vector.body:
332361
; CHECK-VF8-NEXT: [[INDEX1:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT2:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ]
333362
; CHECK-VF8-NEXT: [[TMP20:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[INDEX1]]
334-
; CHECK-VF8-NEXT: [[TMP21:%.*]] = getelementptr inbounds i64, ptr [[TMP20]], i32 0
335-
; CHECK-VF8-NEXT: store <8 x i64> splat (i64 1), ptr [[TMP21]], align 1
363+
; CHECK-VF8-NEXT: store <8 x i64> splat (i64 1), ptr [[TMP20]], align 1
336364
; CHECK-VF8-NEXT: [[INDEX_NEXT2]] = add nuw i64 [[INDEX1]], 8
337365
; CHECK-VF8-NEXT: [[TMP19:%.*]] = icmp eq i64 [[INDEX_NEXT2]], [[N_VEC3]]
338366
; CHECK-VF8-NEXT: br i1 [[TMP19]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]]
@@ -523,25 +551,23 @@ define void @trip_count_vscale(ptr noalias %a, ptr noalias %b) vscale_range(1, 1
523551
; CHECK: vector.body:
524552
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
525553
; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds nuw float, ptr [[A:%.*]], i64 [[INDEX]]
526-
; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds nuw float, ptr [[TMP8]], i32 0
527554
; CHECK-NEXT: [[TMP10:%.*]] = call i64 @llvm.vscale.i64()
528555
; CHECK-NEXT: [[TMP11:%.*]] = mul nuw i64 [[TMP10]], 4
529556
; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds nuw float, ptr [[TMP8]], i64 [[TMP11]]
530-
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 4 x float>, ptr [[TMP9]], align 4
557+
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 4 x float>, ptr [[TMP8]], align 4
531558
; CHECK-NEXT: [[WIDE_LOAD2:%.*]] = load <vscale x 4 x float>, ptr [[TMP12]], align 4
532559
; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds nuw float, ptr [[B:%.*]], i64 [[INDEX]]
533-
; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds nuw float, ptr [[TMP13]], i32 0
534560
; CHECK-NEXT: [[TMP15:%.*]] = call i64 @llvm.vscale.i64()
535561
; CHECK-NEXT: [[TMP16:%.*]] = mul nuw i64 [[TMP15]], 4
536562
; CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds nuw float, ptr [[TMP13]], i64 [[TMP16]]
537-
; CHECK-NEXT: [[WIDE_LOAD3:%.*]] = load <vscale x 4 x float>, ptr [[TMP14]], align 4
563+
; CHECK-NEXT: [[WIDE_LOAD3:%.*]] = load <vscale x 4 x float>, ptr [[TMP13]], align 4
538564
; CHECK-NEXT: [[WIDE_LOAD4:%.*]] = load <vscale x 4 x float>, ptr [[TMP17]], align 4
539565
; CHECK-NEXT: [[TMP18:%.*]] = fmul <vscale x 4 x float> [[WIDE_LOAD]], [[WIDE_LOAD3]]
540566
; CHECK-NEXT: [[TMP19:%.*]] = fmul <vscale x 4 x float> [[WIDE_LOAD2]], [[WIDE_LOAD4]]
541567
; CHECK-NEXT: [[TMP20:%.*]] = call i64 @llvm.vscale.i64()
542568
; CHECK-NEXT: [[TMP21:%.*]] = mul nuw i64 [[TMP20]], 4
543569
; CHECK-NEXT: [[TMP22:%.*]] = getelementptr inbounds nuw float, ptr [[TMP13]], i64 [[TMP21]]
544-
; CHECK-NEXT: store <vscale x 4 x float> [[TMP18]], ptr [[TMP14]], align 4
570+
; CHECK-NEXT: store <vscale x 4 x float> [[TMP18]], ptr [[TMP13]], align 4
545571
; CHECK-NEXT: store <vscale x 4 x float> [[TMP19]], ptr [[TMP22]], align 4
546572
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP7]]
547573
; CHECK-NEXT: [[TMP23:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
@@ -567,13 +593,11 @@ define void @trip_count_vscale(ptr noalias %a, ptr noalias %b) vscale_range(1, 1
567593
; CHECK: vec.epilog.vector.body:
568594
; CHECK-NEXT: [[INDEX7:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT10:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ]
569595
; CHECK-NEXT: [[TMP30:%.*]] = getelementptr inbounds nuw float, ptr [[A]], i64 [[INDEX7]]
570-
; CHECK-NEXT: [[TMP31:%.*]] = getelementptr inbounds nuw float, ptr [[TMP30]], i32 0
571-
; CHECK-NEXT: [[WIDE_LOAD8:%.*]] = load <vscale x 2 x float>, ptr [[TMP31]], align 4
596+
; CHECK-NEXT: [[WIDE_LOAD8:%.*]] = load <vscale x 2 x float>, ptr [[TMP30]], align 4
572597
; CHECK-NEXT: [[TMP32:%.*]] = getelementptr inbounds nuw float, ptr [[B]], i64 [[INDEX7]]
573-
; CHECK-NEXT: [[TMP33:%.*]] = getelementptr inbounds nuw float, ptr [[TMP32]], i32 0
574-
; CHECK-NEXT: [[WIDE_LOAD9:%.*]] = load <vscale x 2 x float>, ptr [[TMP33]], align 4
598+
; CHECK-NEXT: [[WIDE_LOAD9:%.*]] = load <vscale x 2 x float>, ptr [[TMP32]], align 4
575599
; CHECK-NEXT: [[TMP34:%.*]] = fmul <vscale x 2 x float> [[WIDE_LOAD8]], [[WIDE_LOAD9]]
576-
; CHECK-NEXT: store <vscale x 2 x float> [[TMP34]], ptr [[TMP33]], align 4
600+
; CHECK-NEXT: store <vscale x 2 x float> [[TMP34]], ptr [[TMP32]], align 4
577601
; CHECK-NEXT: [[INDEX_NEXT10]] = add nuw i64 [[INDEX7]], [[TMP29]]
578602
; CHECK-NEXT: [[TMP35:%.*]] = icmp eq i64 [[INDEX_NEXT10]], [[N_VEC6]]
579603
; CHECK-NEXT: br i1 [[TMP35]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]]
@@ -603,25 +627,23 @@ define void @trip_count_vscale(ptr noalias %a, ptr noalias %b) vscale_range(1, 1
603627
; CHECK-VF8: vector.body:
604628
; CHECK-VF8-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
605629
; CHECK-VF8-NEXT: [[TMP6:%.*]] = getelementptr inbounds nuw float, ptr [[A:%.*]], i64 [[INDEX]]
606-
; CHECK-VF8-NEXT: [[TMP7:%.*]] = getelementptr inbounds nuw float, ptr [[TMP6]], i32 0
607630
; CHECK-VF8-NEXT: [[TMP8:%.*]] = call i64 @llvm.vscale.i64()
608631
; CHECK-VF8-NEXT: [[TMP9:%.*]] = mul nuw i64 [[TMP8]], 4
609632
; CHECK-VF8-NEXT: [[TMP10:%.*]] = getelementptr inbounds nuw float, ptr [[TMP6]], i64 [[TMP9]]
610-
; CHECK-VF8-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 4 x float>, ptr [[TMP7]], align 4
633+
; CHECK-VF8-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 4 x float>, ptr [[TMP6]], align 4
611634
; CHECK-VF8-NEXT: [[WIDE_LOAD1:%.*]] = load <vscale x 4 x float>, ptr [[TMP10]], align 4
612635
; CHECK-VF8-NEXT: [[TMP11:%.*]] = getelementptr inbounds nuw float, ptr [[B:%.*]], i64 [[INDEX]]
613-
; CHECK-VF8-NEXT: [[TMP12:%.*]] = getelementptr inbounds nuw float, ptr [[TMP11]], i32 0
614636
; CHECK-VF8-NEXT: [[TMP13:%.*]] = call i64 @llvm.vscale.i64()
615637
; CHECK-VF8-NEXT: [[TMP14:%.*]] = mul nuw i64 [[TMP13]], 4
616638
; CHECK-VF8-NEXT: [[TMP15:%.*]] = getelementptr inbounds nuw float, ptr [[TMP11]], i64 [[TMP14]]
617-
; CHECK-VF8-NEXT: [[WIDE_LOAD2:%.*]] = load <vscale x 4 x float>, ptr [[TMP12]], align 4
639+
; CHECK-VF8-NEXT: [[WIDE_LOAD2:%.*]] = load <vscale x 4 x float>, ptr [[TMP11]], align 4
618640
; CHECK-VF8-NEXT: [[WIDE_LOAD3:%.*]] = load <vscale x 4 x float>, ptr [[TMP15]], align 4
619641
; CHECK-VF8-NEXT: [[TMP16:%.*]] = fmul <vscale x 4 x float> [[WIDE_LOAD]], [[WIDE_LOAD2]]
620642
; CHECK-VF8-NEXT: [[TMP17:%.*]] = fmul <vscale x 4 x float> [[WIDE_LOAD1]], [[WIDE_LOAD3]]
621643
; CHECK-VF8-NEXT: [[TMP18:%.*]] = call i64 @llvm.vscale.i64()
622644
; CHECK-VF8-NEXT: [[TMP19:%.*]] = mul nuw i64 [[TMP18]], 4
623645
; CHECK-VF8-NEXT: [[TMP20:%.*]] = getelementptr inbounds nuw float, ptr [[TMP11]], i64 [[TMP19]]
624-
; CHECK-VF8-NEXT: store <vscale x 4 x float> [[TMP16]], ptr [[TMP12]], align 4
646+
; CHECK-VF8-NEXT: store <vscale x 4 x float> [[TMP16]], ptr [[TMP11]], align 4
625647
; CHECK-VF8-NEXT: store <vscale x 4 x float> [[TMP17]], ptr [[TMP20]], align 4
626648
; CHECK-VF8-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]]
627649
; CHECK-VF8-NEXT: [[TMP21:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
@@ -681,25 +703,23 @@ define void @trip_count_vscale_no_epilogue_iterations(ptr noalias %a, ptr noalia
681703
; CHECK: vector.body:
682704
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
683705
; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds nuw float, ptr [[A:%.*]], i64 [[INDEX]]
684-
; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds nuw float, ptr [[TMP8]], i32 0
685706
; CHECK-NEXT: [[TMP10:%.*]] = call i64 @llvm.vscale.i64()
686707
; CHECK-NEXT: [[TMP11:%.*]] = mul nuw i64 [[TMP10]], 4
687708
; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds nuw float, ptr [[TMP8]], i64 [[TMP11]]
688-
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 4 x float>, ptr [[TMP9]], align 4
709+
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 4 x float>, ptr [[TMP8]], align 4
689710
; CHECK-NEXT: [[WIDE_LOAD2:%.*]] = load <vscale x 4 x float>, ptr [[TMP12]], align 4
690711
; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds nuw float, ptr [[B:%.*]], i64 [[INDEX]]
691-
; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds nuw float, ptr [[TMP13]], i32 0
692712
; CHECK-NEXT: [[TMP15:%.*]] = call i64 @llvm.vscale.i64()
693713
; CHECK-NEXT: [[TMP16:%.*]] = mul nuw i64 [[TMP15]], 4
694714
; CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds nuw float, ptr [[TMP13]], i64 [[TMP16]]
695-
; CHECK-NEXT: [[WIDE_LOAD3:%.*]] = load <vscale x 4 x float>, ptr [[TMP14]], align 4
715+
; CHECK-NEXT: [[WIDE_LOAD3:%.*]] = load <vscale x 4 x float>, ptr [[TMP13]], align 4
696716
; CHECK-NEXT: [[WIDE_LOAD4:%.*]] = load <vscale x 4 x float>, ptr [[TMP17]], align 4
697717
; CHECK-NEXT: [[TMP18:%.*]] = fmul <vscale x 4 x float> [[WIDE_LOAD]], [[WIDE_LOAD3]]
698718
; CHECK-NEXT: [[TMP19:%.*]] = fmul <vscale x 4 x float> [[WIDE_LOAD2]], [[WIDE_LOAD4]]
699719
; CHECK-NEXT: [[TMP20:%.*]] = call i64 @llvm.vscale.i64()
700720
; CHECK-NEXT: [[TMP21:%.*]] = mul nuw i64 [[TMP20]], 4
701721
; CHECK-NEXT: [[TMP22:%.*]] = getelementptr inbounds nuw float, ptr [[TMP13]], i64 [[TMP21]]
702-
; CHECK-NEXT: store <vscale x 4 x float> [[TMP18]], ptr [[TMP14]], align 4
722+
; CHECK-NEXT: store <vscale x 4 x float> [[TMP18]], ptr [[TMP13]], align 4
703723
; CHECK-NEXT: store <vscale x 4 x float> [[TMP19]], ptr [[TMP22]], align 4
704724
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP7]]
705725
; CHECK-NEXT: [[TMP23:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
@@ -725,13 +745,11 @@ define void @trip_count_vscale_no_epilogue_iterations(ptr noalias %a, ptr noalia
725745
; CHECK: vec.epilog.vector.body:
726746
; CHECK-NEXT: [[INDEX7:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT10:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ]
727747
; CHECK-NEXT: [[TMP30:%.*]] = getelementptr inbounds nuw float, ptr [[A]], i64 [[INDEX7]]
728-
; CHECK-NEXT: [[TMP31:%.*]] = getelementptr inbounds nuw float, ptr [[TMP30]], i32 0
729-
; CHECK-NEXT: [[WIDE_LOAD8:%.*]] = load <vscale x 2 x float>, ptr [[TMP31]], align 4
748+
; CHECK-NEXT: [[WIDE_LOAD8:%.*]] = load <vscale x 2 x float>, ptr [[TMP30]], align 4
730749
; CHECK-NEXT: [[TMP32:%.*]] = getelementptr inbounds nuw float, ptr [[B]], i64 [[INDEX7]]
731-
; CHECK-NEXT: [[TMP33:%.*]] = getelementptr inbounds nuw float, ptr [[TMP32]], i32 0
732-
; CHECK-NEXT: [[WIDE_LOAD9:%.*]] = load <vscale x 2 x float>, ptr [[TMP33]], align 4
750+
; CHECK-NEXT: [[WIDE_LOAD9:%.*]] = load <vscale x 2 x float>, ptr [[TMP32]], align 4
733751
; CHECK-NEXT: [[TMP34:%.*]] = fmul <vscale x 2 x float> [[WIDE_LOAD8]], [[WIDE_LOAD9]]
734-
; CHECK-NEXT: store <vscale x 2 x float> [[TMP34]], ptr [[TMP33]], align 4
752+
; CHECK-NEXT: store <vscale x 2 x float> [[TMP34]], ptr [[TMP32]], align 4
735753
; CHECK-NEXT: [[INDEX_NEXT10]] = add nuw i64 [[INDEX7]], [[TMP29]]
736754
; CHECK-NEXT: [[TMP35:%.*]] = icmp eq i64 [[INDEX_NEXT10]], [[N_VEC6]]
737755
; CHECK-NEXT: br i1 [[TMP35]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP17:![0-9]+]]
@@ -761,25 +779,23 @@ define void @trip_count_vscale_no_epilogue_iterations(ptr noalias %a, ptr noalia
761779
; CHECK-VF8: vector.body:
762780
; CHECK-VF8-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
763781
; CHECK-VF8-NEXT: [[TMP6:%.*]] = getelementptr inbounds nuw float, ptr [[A:%.*]], i64 [[INDEX]]
764-
; CHECK-VF8-NEXT: [[TMP7:%.*]] = getelementptr inbounds nuw float, ptr [[TMP6]], i32 0
765782
; CHECK-VF8-NEXT: [[TMP8:%.*]] = call i64 @llvm.vscale.i64()
766783
; CHECK-VF8-NEXT: [[TMP9:%.*]] = mul nuw i64 [[TMP8]], 4
767784
; CHECK-VF8-NEXT: [[TMP10:%.*]] = getelementptr inbounds nuw float, ptr [[TMP6]], i64 [[TMP9]]
768-
; CHECK-VF8-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 4 x float>, ptr [[TMP7]], align 4
785+
; CHECK-VF8-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 4 x float>, ptr [[TMP6]], align 4
769786
; CHECK-VF8-NEXT: [[WIDE_LOAD1:%.*]] = load <vscale x 4 x float>, ptr [[TMP10]], align 4
770787
; CHECK-VF8-NEXT: [[TMP11:%.*]] = getelementptr inbounds nuw float, ptr [[B:%.*]], i64 [[INDEX]]
771-
; CHECK-VF8-NEXT: [[TMP12:%.*]] = getelementptr inbounds nuw float, ptr [[TMP11]], i32 0
772788
; CHECK-VF8-NEXT: [[TMP13:%.*]] = call i64 @llvm.vscale.i64()
773789
; CHECK-VF8-NEXT: [[TMP14:%.*]] = mul nuw i64 [[TMP13]], 4
774790
; CHECK-VF8-NEXT: [[TMP15:%.*]] = getelementptr inbounds nuw float, ptr [[TMP11]], i64 [[TMP14]]
775-
; CHECK-VF8-NEXT: [[WIDE_LOAD2:%.*]] = load <vscale x 4 x float>, ptr [[TMP12]], align 4
791+
; CHECK-VF8-NEXT: [[WIDE_LOAD2:%.*]] = load <vscale x 4 x float>, ptr [[TMP11]], align 4
776792
; CHECK-VF8-NEXT: [[WIDE_LOAD3:%.*]] = load <vscale x 4 x float>, ptr [[TMP15]], align 4
777793
; CHECK-VF8-NEXT: [[TMP16:%.*]] = fmul <vscale x 4 x float> [[WIDE_LOAD]], [[WIDE_LOAD2]]
778794
; CHECK-VF8-NEXT: [[TMP17:%.*]] = fmul <vscale x 4 x float> [[WIDE_LOAD1]], [[WIDE_LOAD3]]
779795
; CHECK-VF8-NEXT: [[TMP18:%.*]] = call i64 @llvm.vscale.i64()
780796
; CHECK-VF8-NEXT: [[TMP19:%.*]] = mul nuw i64 [[TMP18]], 4
781797
; CHECK-VF8-NEXT: [[TMP20:%.*]] = getelementptr inbounds nuw float, ptr [[TMP11]], i64 [[TMP19]]
782-
; CHECK-VF8-NEXT: store <vscale x 4 x float> [[TMP16]], ptr [[TMP12]], align 4
798+
; CHECK-VF8-NEXT: store <vscale x 4 x float> [[TMP16]], ptr [[TMP11]], align 4
783799
; CHECK-VF8-NEXT: store <vscale x 4 x float> [[TMP17]], ptr [[TMP20]], align 4
784800
; CHECK-VF8-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]]
785801
; CHECK-VF8-NEXT: [[TMP21:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]

0 commit comments

Comments
 (0)