@@ -164,11 +164,20 @@ define void @main_vf_vscale_x_2_no_epi_iteration(ptr %A) #0 vscale_range(8, 8) {
164
164
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
165
165
; CHECK: vector.body:
166
166
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
167
+ ; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[INDEX]]
168
+ ; CHECK-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64()
169
+ ; CHECK-NEXT: [[TMP8:%.*]] = mul nuw i64 [[TMP7]], 2
170
+ ; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i64, ptr [[TMP6]], i64 [[TMP8]]
171
+ ; CHECK-NEXT: store <vscale x 2 x i64> splat (i64 1), ptr [[TMP6]], align 1
172
+ ; CHECK-NEXT: store <vscale x 2 x i64> splat (i64 1), ptr [[TMP9]], align 1
167
173
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]]
168
174
; CHECK-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
169
175
; CHECK-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]]
170
176
; CHECK: middle.block:
171
177
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 1024, [[N_VEC]]
178
+ ; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]]
179
+ ; CHECK: scalar.ph:
180
+ ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
172
181
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
173
182
; CHECK: for.body:
174
183
;
@@ -190,6 +199,29 @@ define void @main_vf_vscale_x_2_no_epi_iteration(ptr %A) #0 vscale_range(8, 8) {
190
199
; CHECK-VF8-NEXT: br label [[VECTOR_BODY:%.*]]
191
200
; CHECK-VF8: vector.body:
192
201
; CHECK-VF8-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
202
+ ; CHECK-VF8-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[INDEX]]
203
+ ; CHECK-VF8-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64()
204
+ ; CHECK-VF8-NEXT: [[TMP8:%.*]] = mul nuw i64 [[TMP7]], 2
205
+ ; CHECK-VF8-NEXT: [[TMP9:%.*]] = getelementptr inbounds i64, ptr [[TMP6]], i64 [[TMP8]]
206
+ ; CHECK-VF8-NEXT: store <vscale x 2 x i64> splat (i64 1), ptr [[TMP6]], align 1
207
+ ; CHECK-VF8-NEXT: store <vscale x 2 x i64> splat (i64 1), ptr [[TMP9]], align 1
208
+ ; CHECK-VF8-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]]
209
+ ; CHECK-VF8-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
210
+ ; CHECK-VF8-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]]
211
+ ; CHECK-VF8: middle.block:
212
+ ; CHECK-VF8-NEXT: [[CMP_N:%.*]] = icmp eq i64 1024, [[N_VEC]]
213
+ ; CHECK-VF8-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[VEC_EPILOG_ITER_CHECK:%.*]]
214
+ ; CHECK-VF8: vec.epilog.iter.check:
215
+ ; CHECK-VF8-NEXT: [[N_VEC_REMAINING:%.*]] = sub i64 1024, [[N_VEC]]
216
+ ; CHECK-VF8-NEXT: [[MIN_EPILOG_ITERS_CHECK:%.*]] = icmp ult i64 [[N_VEC_REMAINING]], 8
217
+ ; CHECK-VF8-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]]
218
+ ; CHECK-VF8: vec.epilog.ph:
219
+ ; CHECK-VF8-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ]
220
+ ; CHECK-VF8-NEXT: br label [[VEC_EPILOG_VECTOR_BODY:%.*]]
221
+ ; CHECK-VF8: vec.epilog.vector.body:
222
+ ; CHECK-VF8-NEXT: [[INDEX1:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT2:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ]
223
+ ; CHECK-VF8-NEXT: [[TMP11:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[INDEX1]]
224
+ ; CHECK-VF8-NEXT: store <8 x i64> splat (i64 1), ptr [[TMP11]], align 1
193
225
; CHECK-VF8-NEXT: [[INDEX_NEXT2]] = add nuw i64 [[INDEX1]], 8
194
226
; CHECK-VF8-NEXT: [[TMP14:%.*]] = icmp eq i64 [[INDEX_NEXT2]], 1024
195
227
; CHECK-VF8-NEXT: br i1 [[TMP14]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
@@ -250,11 +282,10 @@ define void @main_vf_vscale_x_2(ptr %A, i64 %n) #0 vscale_range(8, 8) {
250
282
; CHECK: vector.body:
251
283
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
252
284
; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[INDEX]]
253
- ; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds i64, ptr [[TMP12]], i32 0
254
285
; CHECK-NEXT: [[TMP15:%.*]] = call i64 @llvm.vscale.i64()
255
286
; CHECK-NEXT: [[TMP16:%.*]] = mul nuw i64 [[TMP15]], 2
256
287
; CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds i64, ptr [[TMP12]], i64 [[TMP16]]
257
- ; CHECK-NEXT: store <vscale x 2 x i64> splat (i64 1), ptr [[TMP14 ]], align 1
288
+ ; CHECK-NEXT: store <vscale x 2 x i64> splat (i64 1), ptr [[TMP12 ]], align 1
258
289
; CHECK-NEXT: store <vscale x 2 x i64> splat (i64 1), ptr [[TMP17]], align 1
259
290
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]]
260
291
; CHECK-NEXT: [[TMP18:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
@@ -274,8 +305,7 @@ define void @main_vf_vscale_x_2(ptr %A, i64 %n) #0 vscale_range(8, 8) {
274
305
; CHECK: vec.epilog.vector.body:
275
306
; CHECK-NEXT: [[INDEX4:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT5:%.*]], [[FOR_BODY]] ]
276
307
; CHECK-NEXT: [[TMP19:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[INDEX4]]
277
- ; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds i64, ptr [[TMP19]], i32 0
278
- ; CHECK-NEXT: store <8 x i64> splat (i64 1), ptr [[TMP13]], align 1
308
+ ; CHECK-NEXT: store <8 x i64> splat (i64 1), ptr [[TMP19]], align 1
279
309
; CHECK-NEXT: [[INDEX_NEXT5]] = add nuw i64 [[INDEX4]], 8
280
310
; CHECK-NEXT: [[TMP20:%.*]] = icmp eq i64 [[INDEX_NEXT5]], [[N_VEC3]]
281
311
; CHECK-NEXT: br i1 [[TMP20]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[FOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]]
@@ -307,11 +337,10 @@ define void @main_vf_vscale_x_2(ptr %A, i64 %n) #0 vscale_range(8, 8) {
307
337
; CHECK-VF8: vector.body:
308
338
; CHECK-VF8-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
309
339
; CHECK-VF8-NEXT: [[TMP12:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[INDEX]]
310
- ; CHECK-VF8-NEXT: [[TMP14:%.*]] = getelementptr inbounds i64, ptr [[TMP12]], i32 0
311
340
; CHECK-VF8-NEXT: [[TMP15:%.*]] = call i64 @llvm.vscale.i64()
312
341
; CHECK-VF8-NEXT: [[TMP16:%.*]] = mul nuw i64 [[TMP15]], 2
313
342
; CHECK-VF8-NEXT: [[TMP17:%.*]] = getelementptr inbounds i64, ptr [[TMP12]], i64 [[TMP16]]
314
- ; CHECK-VF8-NEXT: store <vscale x 2 x i64> splat (i64 1), ptr [[TMP14 ]], align 1
343
+ ; CHECK-VF8-NEXT: store <vscale x 2 x i64> splat (i64 1), ptr [[TMP12 ]], align 1
315
344
; CHECK-VF8-NEXT: store <vscale x 2 x i64> splat (i64 1), ptr [[TMP17]], align 1
316
345
; CHECK-VF8-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]]
317
346
; CHECK-VF8-NEXT: [[TMP18:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
@@ -331,8 +360,7 @@ define void @main_vf_vscale_x_2(ptr %A, i64 %n) #0 vscale_range(8, 8) {
331
360
; CHECK-VF8: vec.epilog.vector.body:
332
361
; CHECK-VF8-NEXT: [[INDEX1:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT2:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ]
333
362
; CHECK-VF8-NEXT: [[TMP20:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[INDEX1]]
334
- ; CHECK-VF8-NEXT: [[TMP21:%.*]] = getelementptr inbounds i64, ptr [[TMP20]], i32 0
335
- ; CHECK-VF8-NEXT: store <8 x i64> splat (i64 1), ptr [[TMP21]], align 1
363
+ ; CHECK-VF8-NEXT: store <8 x i64> splat (i64 1), ptr [[TMP20]], align 1
336
364
; CHECK-VF8-NEXT: [[INDEX_NEXT2]] = add nuw i64 [[INDEX1]], 8
337
365
; CHECK-VF8-NEXT: [[TMP19:%.*]] = icmp eq i64 [[INDEX_NEXT2]], [[N_VEC3]]
338
366
; CHECK-VF8-NEXT: br i1 [[TMP19]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]]
@@ -523,25 +551,23 @@ define void @trip_count_vscale(ptr noalias %a, ptr noalias %b) vscale_range(1, 1
523
551
; CHECK: vector.body:
524
552
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
525
553
; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds nuw float, ptr [[A:%.*]], i64 [[INDEX]]
526
- ; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds nuw float, ptr [[TMP8]], i32 0
527
554
; CHECK-NEXT: [[TMP10:%.*]] = call i64 @llvm.vscale.i64()
528
555
; CHECK-NEXT: [[TMP11:%.*]] = mul nuw i64 [[TMP10]], 4
529
556
; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds nuw float, ptr [[TMP8]], i64 [[TMP11]]
530
- ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 4 x float>, ptr [[TMP9 ]], align 4
557
+ ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 4 x float>, ptr [[TMP8 ]], align 4
531
558
; CHECK-NEXT: [[WIDE_LOAD2:%.*]] = load <vscale x 4 x float>, ptr [[TMP12]], align 4
532
559
; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds nuw float, ptr [[B:%.*]], i64 [[INDEX]]
533
- ; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds nuw float, ptr [[TMP13]], i32 0
534
560
; CHECK-NEXT: [[TMP15:%.*]] = call i64 @llvm.vscale.i64()
535
561
; CHECK-NEXT: [[TMP16:%.*]] = mul nuw i64 [[TMP15]], 4
536
562
; CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds nuw float, ptr [[TMP13]], i64 [[TMP16]]
537
- ; CHECK-NEXT: [[WIDE_LOAD3:%.*]] = load <vscale x 4 x float>, ptr [[TMP14 ]], align 4
563
+ ; CHECK-NEXT: [[WIDE_LOAD3:%.*]] = load <vscale x 4 x float>, ptr [[TMP13 ]], align 4
538
564
; CHECK-NEXT: [[WIDE_LOAD4:%.*]] = load <vscale x 4 x float>, ptr [[TMP17]], align 4
539
565
; CHECK-NEXT: [[TMP18:%.*]] = fmul <vscale x 4 x float> [[WIDE_LOAD]], [[WIDE_LOAD3]]
540
566
; CHECK-NEXT: [[TMP19:%.*]] = fmul <vscale x 4 x float> [[WIDE_LOAD2]], [[WIDE_LOAD4]]
541
567
; CHECK-NEXT: [[TMP20:%.*]] = call i64 @llvm.vscale.i64()
542
568
; CHECK-NEXT: [[TMP21:%.*]] = mul nuw i64 [[TMP20]], 4
543
569
; CHECK-NEXT: [[TMP22:%.*]] = getelementptr inbounds nuw float, ptr [[TMP13]], i64 [[TMP21]]
544
- ; CHECK-NEXT: store <vscale x 4 x float> [[TMP18]], ptr [[TMP14 ]], align 4
570
+ ; CHECK-NEXT: store <vscale x 4 x float> [[TMP18]], ptr [[TMP13 ]], align 4
545
571
; CHECK-NEXT: store <vscale x 4 x float> [[TMP19]], ptr [[TMP22]], align 4
546
572
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP7]]
547
573
; CHECK-NEXT: [[TMP23:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
@@ -567,13 +593,11 @@ define void @trip_count_vscale(ptr noalias %a, ptr noalias %b) vscale_range(1, 1
567
593
; CHECK: vec.epilog.vector.body:
568
594
; CHECK-NEXT: [[INDEX7:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT10:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ]
569
595
; CHECK-NEXT: [[TMP30:%.*]] = getelementptr inbounds nuw float, ptr [[A]], i64 [[INDEX7]]
570
- ; CHECK-NEXT: [[TMP31:%.*]] = getelementptr inbounds nuw float, ptr [[TMP30]], i32 0
571
- ; CHECK-NEXT: [[WIDE_LOAD8:%.*]] = load <vscale x 2 x float>, ptr [[TMP31]], align 4
596
+ ; CHECK-NEXT: [[WIDE_LOAD8:%.*]] = load <vscale x 2 x float>, ptr [[TMP30]], align 4
572
597
; CHECK-NEXT: [[TMP32:%.*]] = getelementptr inbounds nuw float, ptr [[B]], i64 [[INDEX7]]
573
- ; CHECK-NEXT: [[TMP33:%.*]] = getelementptr inbounds nuw float, ptr [[TMP32]], i32 0
574
- ; CHECK-NEXT: [[WIDE_LOAD9:%.*]] = load <vscale x 2 x float>, ptr [[TMP33]], align 4
598
+ ; CHECK-NEXT: [[WIDE_LOAD9:%.*]] = load <vscale x 2 x float>, ptr [[TMP32]], align 4
575
599
; CHECK-NEXT: [[TMP34:%.*]] = fmul <vscale x 2 x float> [[WIDE_LOAD8]], [[WIDE_LOAD9]]
576
- ; CHECK-NEXT: store <vscale x 2 x float> [[TMP34]], ptr [[TMP33 ]], align 4
600
+ ; CHECK-NEXT: store <vscale x 2 x float> [[TMP34]], ptr [[TMP32 ]], align 4
577
601
; CHECK-NEXT: [[INDEX_NEXT10]] = add nuw i64 [[INDEX7]], [[TMP29]]
578
602
; CHECK-NEXT: [[TMP35:%.*]] = icmp eq i64 [[INDEX_NEXT10]], [[N_VEC6]]
579
603
; CHECK-NEXT: br i1 [[TMP35]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]]
@@ -603,25 +627,23 @@ define void @trip_count_vscale(ptr noalias %a, ptr noalias %b) vscale_range(1, 1
603
627
; CHECK-VF8: vector.body:
604
628
; CHECK-VF8-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
605
629
; CHECK-VF8-NEXT: [[TMP6:%.*]] = getelementptr inbounds nuw float, ptr [[A:%.*]], i64 [[INDEX]]
606
- ; CHECK-VF8-NEXT: [[TMP7:%.*]] = getelementptr inbounds nuw float, ptr [[TMP6]], i32 0
607
630
; CHECK-VF8-NEXT: [[TMP8:%.*]] = call i64 @llvm.vscale.i64()
608
631
; CHECK-VF8-NEXT: [[TMP9:%.*]] = mul nuw i64 [[TMP8]], 4
609
632
; CHECK-VF8-NEXT: [[TMP10:%.*]] = getelementptr inbounds nuw float, ptr [[TMP6]], i64 [[TMP9]]
610
- ; CHECK-VF8-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 4 x float>, ptr [[TMP7 ]], align 4
633
+ ; CHECK-VF8-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 4 x float>, ptr [[TMP6 ]], align 4
611
634
; CHECK-VF8-NEXT: [[WIDE_LOAD1:%.*]] = load <vscale x 4 x float>, ptr [[TMP10]], align 4
612
635
; CHECK-VF8-NEXT: [[TMP11:%.*]] = getelementptr inbounds nuw float, ptr [[B:%.*]], i64 [[INDEX]]
613
- ; CHECK-VF8-NEXT: [[TMP12:%.*]] = getelementptr inbounds nuw float, ptr [[TMP11]], i32 0
614
636
; CHECK-VF8-NEXT: [[TMP13:%.*]] = call i64 @llvm.vscale.i64()
615
637
; CHECK-VF8-NEXT: [[TMP14:%.*]] = mul nuw i64 [[TMP13]], 4
616
638
; CHECK-VF8-NEXT: [[TMP15:%.*]] = getelementptr inbounds nuw float, ptr [[TMP11]], i64 [[TMP14]]
617
- ; CHECK-VF8-NEXT: [[WIDE_LOAD2:%.*]] = load <vscale x 4 x float>, ptr [[TMP12 ]], align 4
639
+ ; CHECK-VF8-NEXT: [[WIDE_LOAD2:%.*]] = load <vscale x 4 x float>, ptr [[TMP11 ]], align 4
618
640
; CHECK-VF8-NEXT: [[WIDE_LOAD3:%.*]] = load <vscale x 4 x float>, ptr [[TMP15]], align 4
619
641
; CHECK-VF8-NEXT: [[TMP16:%.*]] = fmul <vscale x 4 x float> [[WIDE_LOAD]], [[WIDE_LOAD2]]
620
642
; CHECK-VF8-NEXT: [[TMP17:%.*]] = fmul <vscale x 4 x float> [[WIDE_LOAD1]], [[WIDE_LOAD3]]
621
643
; CHECK-VF8-NEXT: [[TMP18:%.*]] = call i64 @llvm.vscale.i64()
622
644
; CHECK-VF8-NEXT: [[TMP19:%.*]] = mul nuw i64 [[TMP18]], 4
623
645
; CHECK-VF8-NEXT: [[TMP20:%.*]] = getelementptr inbounds nuw float, ptr [[TMP11]], i64 [[TMP19]]
624
- ; CHECK-VF8-NEXT: store <vscale x 4 x float> [[TMP16]], ptr [[TMP12 ]], align 4
646
+ ; CHECK-VF8-NEXT: store <vscale x 4 x float> [[TMP16]], ptr [[TMP11 ]], align 4
625
647
; CHECK-VF8-NEXT: store <vscale x 4 x float> [[TMP17]], ptr [[TMP20]], align 4
626
648
; CHECK-VF8-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]]
627
649
; CHECK-VF8-NEXT: [[TMP21:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
@@ -681,25 +703,23 @@ define void @trip_count_vscale_no_epilogue_iterations(ptr noalias %a, ptr noalia
681
703
; CHECK: vector.body:
682
704
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
683
705
; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds nuw float, ptr [[A:%.*]], i64 [[INDEX]]
684
- ; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds nuw float, ptr [[TMP8]], i32 0
685
706
; CHECK-NEXT: [[TMP10:%.*]] = call i64 @llvm.vscale.i64()
686
707
; CHECK-NEXT: [[TMP11:%.*]] = mul nuw i64 [[TMP10]], 4
687
708
; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds nuw float, ptr [[TMP8]], i64 [[TMP11]]
688
- ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 4 x float>, ptr [[TMP9 ]], align 4
709
+ ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 4 x float>, ptr [[TMP8 ]], align 4
689
710
; CHECK-NEXT: [[WIDE_LOAD2:%.*]] = load <vscale x 4 x float>, ptr [[TMP12]], align 4
690
711
; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds nuw float, ptr [[B:%.*]], i64 [[INDEX]]
691
- ; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds nuw float, ptr [[TMP13]], i32 0
692
712
; CHECK-NEXT: [[TMP15:%.*]] = call i64 @llvm.vscale.i64()
693
713
; CHECK-NEXT: [[TMP16:%.*]] = mul nuw i64 [[TMP15]], 4
694
714
; CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds nuw float, ptr [[TMP13]], i64 [[TMP16]]
695
- ; CHECK-NEXT: [[WIDE_LOAD3:%.*]] = load <vscale x 4 x float>, ptr [[TMP14 ]], align 4
715
+ ; CHECK-NEXT: [[WIDE_LOAD3:%.*]] = load <vscale x 4 x float>, ptr [[TMP13 ]], align 4
696
716
; CHECK-NEXT: [[WIDE_LOAD4:%.*]] = load <vscale x 4 x float>, ptr [[TMP17]], align 4
697
717
; CHECK-NEXT: [[TMP18:%.*]] = fmul <vscale x 4 x float> [[WIDE_LOAD]], [[WIDE_LOAD3]]
698
718
; CHECK-NEXT: [[TMP19:%.*]] = fmul <vscale x 4 x float> [[WIDE_LOAD2]], [[WIDE_LOAD4]]
699
719
; CHECK-NEXT: [[TMP20:%.*]] = call i64 @llvm.vscale.i64()
700
720
; CHECK-NEXT: [[TMP21:%.*]] = mul nuw i64 [[TMP20]], 4
701
721
; CHECK-NEXT: [[TMP22:%.*]] = getelementptr inbounds nuw float, ptr [[TMP13]], i64 [[TMP21]]
702
- ; CHECK-NEXT: store <vscale x 4 x float> [[TMP18]], ptr [[TMP14 ]], align 4
722
+ ; CHECK-NEXT: store <vscale x 4 x float> [[TMP18]], ptr [[TMP13 ]], align 4
703
723
; CHECK-NEXT: store <vscale x 4 x float> [[TMP19]], ptr [[TMP22]], align 4
704
724
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP7]]
705
725
; CHECK-NEXT: [[TMP23:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
@@ -725,13 +745,11 @@ define void @trip_count_vscale_no_epilogue_iterations(ptr noalias %a, ptr noalia
725
745
; CHECK: vec.epilog.vector.body:
726
746
; CHECK-NEXT: [[INDEX7:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT10:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ]
727
747
; CHECK-NEXT: [[TMP30:%.*]] = getelementptr inbounds nuw float, ptr [[A]], i64 [[INDEX7]]
728
- ; CHECK-NEXT: [[TMP31:%.*]] = getelementptr inbounds nuw float, ptr [[TMP30]], i32 0
729
- ; CHECK-NEXT: [[WIDE_LOAD8:%.*]] = load <vscale x 2 x float>, ptr [[TMP31]], align 4
748
+ ; CHECK-NEXT: [[WIDE_LOAD8:%.*]] = load <vscale x 2 x float>, ptr [[TMP30]], align 4
730
749
; CHECK-NEXT: [[TMP32:%.*]] = getelementptr inbounds nuw float, ptr [[B]], i64 [[INDEX7]]
731
- ; CHECK-NEXT: [[TMP33:%.*]] = getelementptr inbounds nuw float, ptr [[TMP32]], i32 0
732
- ; CHECK-NEXT: [[WIDE_LOAD9:%.*]] = load <vscale x 2 x float>, ptr [[TMP33]], align 4
750
+ ; CHECK-NEXT: [[WIDE_LOAD9:%.*]] = load <vscale x 2 x float>, ptr [[TMP32]], align 4
733
751
; CHECK-NEXT: [[TMP34:%.*]] = fmul <vscale x 2 x float> [[WIDE_LOAD8]], [[WIDE_LOAD9]]
734
- ; CHECK-NEXT: store <vscale x 2 x float> [[TMP34]], ptr [[TMP33 ]], align 4
752
+ ; CHECK-NEXT: store <vscale x 2 x float> [[TMP34]], ptr [[TMP32 ]], align 4
735
753
; CHECK-NEXT: [[INDEX_NEXT10]] = add nuw i64 [[INDEX7]], [[TMP29]]
736
754
; CHECK-NEXT: [[TMP35:%.*]] = icmp eq i64 [[INDEX_NEXT10]], [[N_VEC6]]
737
755
; CHECK-NEXT: br i1 [[TMP35]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP17:![0-9]+]]
@@ -761,25 +779,23 @@ define void @trip_count_vscale_no_epilogue_iterations(ptr noalias %a, ptr noalia
761
779
; CHECK-VF8: vector.body:
762
780
; CHECK-VF8-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
763
781
; CHECK-VF8-NEXT: [[TMP6:%.*]] = getelementptr inbounds nuw float, ptr [[A:%.*]], i64 [[INDEX]]
764
- ; CHECK-VF8-NEXT: [[TMP7:%.*]] = getelementptr inbounds nuw float, ptr [[TMP6]], i32 0
765
782
; CHECK-VF8-NEXT: [[TMP8:%.*]] = call i64 @llvm.vscale.i64()
766
783
; CHECK-VF8-NEXT: [[TMP9:%.*]] = mul nuw i64 [[TMP8]], 4
767
784
; CHECK-VF8-NEXT: [[TMP10:%.*]] = getelementptr inbounds nuw float, ptr [[TMP6]], i64 [[TMP9]]
768
- ; CHECK-VF8-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 4 x float>, ptr [[TMP7 ]], align 4
785
+ ; CHECK-VF8-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 4 x float>, ptr [[TMP6 ]], align 4
769
786
; CHECK-VF8-NEXT: [[WIDE_LOAD1:%.*]] = load <vscale x 4 x float>, ptr [[TMP10]], align 4
770
787
; CHECK-VF8-NEXT: [[TMP11:%.*]] = getelementptr inbounds nuw float, ptr [[B:%.*]], i64 [[INDEX]]
771
- ; CHECK-VF8-NEXT: [[TMP12:%.*]] = getelementptr inbounds nuw float, ptr [[TMP11]], i32 0
772
788
; CHECK-VF8-NEXT: [[TMP13:%.*]] = call i64 @llvm.vscale.i64()
773
789
; CHECK-VF8-NEXT: [[TMP14:%.*]] = mul nuw i64 [[TMP13]], 4
774
790
; CHECK-VF8-NEXT: [[TMP15:%.*]] = getelementptr inbounds nuw float, ptr [[TMP11]], i64 [[TMP14]]
775
- ; CHECK-VF8-NEXT: [[WIDE_LOAD2:%.*]] = load <vscale x 4 x float>, ptr [[TMP12 ]], align 4
791
+ ; CHECK-VF8-NEXT: [[WIDE_LOAD2:%.*]] = load <vscale x 4 x float>, ptr [[TMP11 ]], align 4
776
792
; CHECK-VF8-NEXT: [[WIDE_LOAD3:%.*]] = load <vscale x 4 x float>, ptr [[TMP15]], align 4
777
793
; CHECK-VF8-NEXT: [[TMP16:%.*]] = fmul <vscale x 4 x float> [[WIDE_LOAD]], [[WIDE_LOAD2]]
778
794
; CHECK-VF8-NEXT: [[TMP17:%.*]] = fmul <vscale x 4 x float> [[WIDE_LOAD1]], [[WIDE_LOAD3]]
779
795
; CHECK-VF8-NEXT: [[TMP18:%.*]] = call i64 @llvm.vscale.i64()
780
796
; CHECK-VF8-NEXT: [[TMP19:%.*]] = mul nuw i64 [[TMP18]], 4
781
797
; CHECK-VF8-NEXT: [[TMP20:%.*]] = getelementptr inbounds nuw float, ptr [[TMP11]], i64 [[TMP19]]
782
- ; CHECK-VF8-NEXT: store <vscale x 4 x float> [[TMP16]], ptr [[TMP12 ]], align 4
798
+ ; CHECK-VF8-NEXT: store <vscale x 4 x float> [[TMP16]], ptr [[TMP11 ]], align 4
783
799
; CHECK-VF8-NEXT: store <vscale x 4 x float> [[TMP17]], ptr [[TMP20]], align 4
784
800
; CHECK-VF8-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]]
785
801
; CHECK-VF8-NEXT: [[TMP21:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
0 commit comments