@@ -288,38 +288,32 @@ define void @main_vector_loop_fixed_single_vector_iteration_with_runtime_checks(
288
288
; CHECK-LABEL: define void @main_vector_loop_fixed_single_vector_iteration_with_runtime_checks(
289
289
; CHECK-SAME: ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], ptr noalias [[C:%.*]], ptr noalias [[D:%.*]], ptr noalias [[E:%.*]], ptr noalias [[F:%.*]], ptr noalias [[G:%.*]], ptr noalias [[H:%.*]], ptr noalias [[I:%.*]], ptr noalias [[J:%.*]], ptr noalias [[K:%.*]], ptr [[L:%.*]]) #[[ATTR1:[0-9]+]] {
290
290
; CHECK-NEXT: [[ENTRY:.*]]:
291
- ; CHECK-NEXT: br i1 true , label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
291
+ ; CHECK-NEXT: br i1 false , label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
292
292
; CHECK: [[VECTOR_PH]]:
293
293
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
294
294
; CHECK: [[VECTOR_BODY]]:
295
- ; CHECK-NEXT: [[TMP0:%.*]] = getelementptr i64, ptr [[J]], i64 0
296
- ; CHECK-NEXT: [[WIDE_VEC:%.*]] = load <16 x i64>, ptr [[TMP0]], align 8
297
- ; CHECK-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <16 x i64> [[WIDE_VEC]], <16 x i64> poison, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
298
- ; CHECK-NEXT: [[TMP1:%.*]] = trunc <8 x i64> [[STRIDED_VEC]] to <8 x i16>
299
- ; CHECK-NEXT: [[TMP6:%.*]] = getelementptr i16, ptr [[K]], i64 0
300
- ; CHECK-NEXT: [[TMP7:%.*]] = getelementptr i16, ptr [[K]], i64 2
301
- ; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i16, ptr [[K]], i64 4
302
- ; CHECK-NEXT: [[TMP9:%.*]] = getelementptr i16, ptr [[K]], i64 6
303
- ; CHECK-NEXT: [[TMP10:%.*]] = getelementptr i16, ptr [[K]], i64 8
304
- ; CHECK-NEXT: [[TMP11:%.*]] = getelementptr i16, ptr [[K]], i64 10
305
- ; CHECK-NEXT: [[TMP12:%.*]] = getelementptr i16, ptr [[K]], i64 12
306
- ; CHECK-NEXT: [[TMP13:%.*]] = getelementptr i16, ptr [[K]], i64 14
307
- ; CHECK-NEXT: [[TMP14:%.*]] = extractelement <8 x i16> [[TMP1]], i32 0
308
- ; CHECK-NEXT: store i16 [[TMP14]], ptr [[TMP6]], align 2
309
- ; CHECK-NEXT: [[TMP15:%.*]] = extractelement <8 x i16> [[TMP1]], i32 1
310
- ; CHECK-NEXT: store i16 [[TMP15]], ptr [[TMP7]], align 2
311
- ; CHECK-NEXT: [[TMP16:%.*]] = extractelement <8 x i16> [[TMP1]], i32 2
312
- ; CHECK-NEXT: store i16 [[TMP16]], ptr [[TMP8]], align 2
313
- ; CHECK-NEXT: [[TMP17:%.*]] = extractelement <8 x i16> [[TMP1]], i32 3
314
- ; CHECK-NEXT: store i16 [[TMP17]], ptr [[TMP9]], align 2
315
- ; CHECK-NEXT: [[TMP18:%.*]] = extractelement <8 x i16> [[TMP1]], i32 4
316
- ; CHECK-NEXT: store i16 [[TMP18]], ptr [[TMP10]], align 2
317
- ; CHECK-NEXT: [[TMP19:%.*]] = extractelement <8 x i16> [[TMP1]], i32 5
318
- ; CHECK-NEXT: store i16 [[TMP19]], ptr [[TMP11]], align 2
319
- ; CHECK-NEXT: [[TMP20:%.*]] = extractelement <8 x i16> [[TMP1]], i32 6
320
- ; CHECK-NEXT: store i16 [[TMP20]], ptr [[TMP12]], align 2
321
- ; CHECK-NEXT: [[TMP21:%.*]] = extractelement <8 x i16> [[TMP1]], i32 7
322
- ; CHECK-NEXT: store i16 [[TMP21]], ptr [[TMP13]], align 2
295
+ ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
296
+ ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 2
297
+ ; CHECK-NEXT: [[IV:%.*]] = add i64 [[OFFSET_IDX]], 0
298
+ ; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 2
299
+ ; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], 4
300
+ ; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[OFFSET_IDX]], 6
301
+ ; CHECK-NEXT: [[GEP_J:%.*]] = getelementptr i64, ptr [[J]], i64 [[IV]]
302
+ ; CHECK-NEXT: [[WIDE_VEC:%.*]] = load <8 x i64>, ptr [[GEP_J]], align 8
303
+ ; CHECK-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <8 x i64> [[WIDE_VEC]], <8 x i64> poison, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
304
+ ; CHECK-NEXT: [[TMP5:%.*]] = trunc <4 x i64> [[STRIDED_VEC]] to <4 x i16>
305
+ ; CHECK-NEXT: [[TMP6:%.*]] = getelementptr i16, ptr [[K]], i64 [[IV]]
306
+ ; CHECK-NEXT: [[TMP7:%.*]] = getelementptr i16, ptr [[K]], i64 [[TMP1]]
307
+ ; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i16, ptr [[K]], i64 [[TMP2]]
308
+ ; CHECK-NEXT: [[TMP9:%.*]] = getelementptr i16, ptr [[K]], i64 [[TMP3]]
309
+ ; CHECK-NEXT: [[TMP10:%.*]] = extractelement <4 x i16> [[TMP5]], i32 0
310
+ ; CHECK-NEXT: store i16 [[TMP10]], ptr [[TMP6]], align 2
311
+ ; CHECK-NEXT: [[TMP11:%.*]] = extractelement <4 x i16> [[TMP5]], i32 1
312
+ ; CHECK-NEXT: store i16 [[TMP11]], ptr [[TMP7]], align 2
313
+ ; CHECK-NEXT: [[TMP12:%.*]] = extractelement <4 x i16> [[TMP5]], i32 2
314
+ ; CHECK-NEXT: store i16 [[TMP12]], ptr [[TMP8]], align 2
315
+ ; CHECK-NEXT: [[TMP13:%.*]] = extractelement <4 x i16> [[TMP5]], i32 3
316
+ ; CHECK-NEXT: store i16 [[TMP13]], ptr [[TMP9]], align 2
323
317
; CHECK-NEXT: store i64 0, ptr [[A]], align 8
324
318
; CHECK-NEXT: store i64 0, ptr [[B]], align 8
325
319
; CHECK-NEXT: store i64 0, ptr [[C]], align 8
@@ -330,18 +324,20 @@ define void @main_vector_loop_fixed_single_vector_iteration_with_runtime_checks(
330
324
; CHECK-NEXT: store i64 0, ptr [[H]], align 8
331
325
; CHECK-NEXT: store i64 0, ptr [[I]], align 8
332
326
; CHECK-NEXT: store i64 0, ptr [[L]], align 8
333
- ; CHECK-NEXT: br label %[[MIDDLE_BLOCK:.*]]
327
+ ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
328
+ ; CHECK-NEXT: [[TMP14:%.*]] = icmp eq i64 [[INDEX_NEXT]], 4
329
+ ; CHECK-NEXT: br i1 [[TMP14]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]]
334
330
; CHECK: [[MIDDLE_BLOCK]]:
335
331
; CHECK-NEXT: br label %[[SCALAR_PH]]
336
332
; CHECK: [[SCALAR_PH]]:
337
- ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0 , %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
333
+ ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 8 , %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
338
334
; CHECK-NEXT: br label %[[LOOP:.*]]
339
335
; CHECK: [[LOOP]]:
340
- ; CHECK-NEXT: [[IV :%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
341
- ; CHECK-NEXT: [[GEP_J :%.*]] = getelementptr i64, ptr [[J]], i64 [[IV ]]
342
- ; CHECK-NEXT: [[L_J:%.*]] = load i64, ptr [[GEP_J ]], align 8
336
+ ; CHECK-NEXT: [[IV1 :%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
337
+ ; CHECK-NEXT: [[GEP_J1 :%.*]] = getelementptr i64, ptr [[J]], i64 [[IV1 ]]
338
+ ; CHECK-NEXT: [[L_J:%.*]] = load i64, ptr [[GEP_J1 ]], align 8
343
339
; CHECK-NEXT: [[L_TRUNC:%.*]] = trunc i64 [[L_J]] to i16
344
- ; CHECK-NEXT: [[GEP_K:%.*]] = getelementptr i16, ptr [[K]], i64 [[IV ]]
340
+ ; CHECK-NEXT: [[GEP_K:%.*]] = getelementptr i16, ptr [[K]], i64 [[IV1 ]]
345
341
; CHECK-NEXT: store i16 [[L_TRUNC]], ptr [[GEP_K]], align 2
346
342
; CHECK-NEXT: store i64 0, ptr [[A]], align 8
347
343
; CHECK-NEXT: store i64 0, ptr [[B]], align 8
@@ -353,9 +349,9 @@ define void @main_vector_loop_fixed_single_vector_iteration_with_runtime_checks(
353
349
; CHECK-NEXT: store i64 0, ptr [[H]], align 8
354
350
; CHECK-NEXT: store i64 0, ptr [[I]], align 8
355
351
; CHECK-NEXT: store i64 0, ptr [[L]], align 8
356
- ; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV ]], 2
357
- ; CHECK-NEXT: [[EC:%.*]] = icmp ult i64 [[IV ]], 14
358
- ; CHECK-NEXT: br i1 [[EC]], label %[[LOOP]], label %[[EXIT:.*]], !llvm.loop [[LOOP8 :![0-9]+]]
352
+ ; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV1 ]], 2
353
+ ; CHECK-NEXT: [[EC:%.*]] = icmp ult i64 [[IV1 ]], 14
354
+ ; CHECK-NEXT: br i1 [[EC]], label %[[LOOP]], label %[[EXIT:.*]], !llvm.loop [[LOOP10 :![0-9]+]]
359
355
; CHECK: [[EXIT]]:
360
356
; CHECK-NEXT: ret void
361
357
;
0 commit comments