@@ -265,11 +265,9 @@ define <4 x i32> @test_mm_mask_dpbssd_epi32(<4 x i32> %__W, i4 zeroext %__U, <4
265
265
; CHECK-NEXT: [[TMP16:%.*]] = or <16 x i1> [[TMP13]], [[TMP14]]
266
266
; CHECK-NEXT: [[TMP17:%.*]] = or <16 x i1> [[TMP16]], [[TMP15]]
267
267
; CHECK-NEXT: [[TMP18:%.*]] = sext <16 x i1> [[TMP17]] to <16 x i8>
268
- ; CHECK-NEXT: [[TMP19:%.*]] = bitcast <16 x i8> [[TMP18]] to <8 x i16>
269
- ; CHECK-NEXT: [[TMP20:%.*]] = icmp ne <8 x i16> [[TMP19]], zeroinitializer
270
- ; CHECK-NEXT: [[TMP21:%.*]] = sext <8 x i1> [[TMP20]] to <8 x i16>
271
- ; CHECK-NEXT: [[TMP22:%.*]] = bitcast <8 x i16> [[TMP21]] to i128
272
- ; CHECK-NEXT: [[TMP23:%.*]] = bitcast i128 [[TMP22]] to <4 x i32>
268
+ ; CHECK-NEXT: [[TMP19:%.*]] = bitcast <16 x i8> [[TMP18]] to <4 x i32>
269
+ ; CHECK-NEXT: [[TMP20:%.*]] = icmp ne <4 x i32> [[TMP19]], zeroinitializer
270
+ ; CHECK-NEXT: [[TMP23:%.*]] = sext <4 x i1> [[TMP20]] to <4 x i32>
273
271
; CHECK-NEXT: [[_MSPROP1:%.*]] = or <4 x i32> [[TMP23]], [[TMP1]]
274
272
; CHECK-NEXT: [[DPI:%.*]] = tail call <4 x i32> @llvm.x86.avx2.vpdpbssd.128(<4 x i32> [[__W]], <4 x i32> [[__A]], <4 x i32> [[__B]])
275
273
; CHECK-NEXT: [[TMP5:%.*]] = bitcast i4 [[TMP4]] to <4 x i1>
@@ -311,11 +309,9 @@ define <4 x i32> @test_mm_maskz_dpbssds_epi32(i4 zeroext %__U, <4 x i32> %__W, <
311
309
; CHECK-NEXT: [[TMP16:%.*]] = or <16 x i1> [[TMP13]], [[TMP14]]
312
310
; CHECK-NEXT: [[TMP17:%.*]] = or <16 x i1> [[TMP16]], [[TMP15]]
313
311
; CHECK-NEXT: [[TMP18:%.*]] = sext <16 x i1> [[TMP17]] to <16 x i8>
314
- ; CHECK-NEXT: [[TMP19:%.*]] = bitcast <16 x i8> [[TMP18]] to <8 x i16>
315
- ; CHECK-NEXT: [[TMP20:%.*]] = icmp ne <8 x i16> [[TMP19]], zeroinitializer
316
- ; CHECK-NEXT: [[TMP21:%.*]] = sext <8 x i1> [[TMP20]] to <8 x i16>
317
- ; CHECK-NEXT: [[TMP22:%.*]] = bitcast <8 x i16> [[TMP21]] to i128
318
- ; CHECK-NEXT: [[TMP23:%.*]] = bitcast i128 [[TMP22]] to <4 x i32>
312
+ ; CHECK-NEXT: [[TMP19:%.*]] = bitcast <16 x i8> [[TMP18]] to <4 x i32>
313
+ ; CHECK-NEXT: [[TMP20:%.*]] = icmp ne <4 x i32> [[TMP19]], zeroinitializer
314
+ ; CHECK-NEXT: [[TMP23:%.*]] = sext <4 x i1> [[TMP20]] to <4 x i32>
319
315
; CHECK-NEXT: [[_MSPROP1:%.*]] = or <4 x i32> [[TMP23]], [[TMP24]]
320
316
; CHECK-NEXT: [[DPI:%.*]] = tail call <4 x i32> @llvm.x86.avx2.vpdpbssds.128(<4 x i32> [[__W]], <4 x i32> [[__A]], <4 x i32> [[__B]])
321
317
; CHECK-NEXT: [[TMP5:%.*]] = bitcast i4 [[TMP4]] to <4 x i1>
@@ -357,11 +353,9 @@ define <8 x i32> @test_mm256_maskz_dpbssds_epi32(<8 x i32> %__W, i8 zeroext %__U
357
353
; CHECK-NEXT: [[TMP16:%.*]] = or <32 x i1> [[TMP13]], [[TMP14]]
358
354
; CHECK-NEXT: [[TMP17:%.*]] = or <32 x i1> [[TMP16]], [[TMP15]]
359
355
; CHECK-NEXT: [[TMP18:%.*]] = sext <32 x i1> [[TMP17]] to <32 x i8>
360
- ; CHECK-NEXT: [[TMP19:%.*]] = bitcast <32 x i8> [[TMP18]] to <16 x i16>
361
- ; CHECK-NEXT: [[TMP20:%.*]] = icmp ne <16 x i16> [[TMP19]], zeroinitializer
362
- ; CHECK-NEXT: [[TMP21:%.*]] = sext <16 x i1> [[TMP20]] to <16 x i16>
363
- ; CHECK-NEXT: [[TMP22:%.*]] = bitcast <16 x i16> [[TMP21]] to i256
364
- ; CHECK-NEXT: [[TMP23:%.*]] = bitcast i256 [[TMP22]] to <8 x i32>
356
+ ; CHECK-NEXT: [[TMP19:%.*]] = bitcast <32 x i8> [[TMP18]] to <8 x i32>
357
+ ; CHECK-NEXT: [[TMP20:%.*]] = icmp ne <8 x i32> [[TMP19]], zeroinitializer
358
+ ; CHECK-NEXT: [[TMP23:%.*]] = sext <8 x i1> [[TMP20]] to <8 x i32>
365
359
; CHECK-NEXT: [[_MSPROP1:%.*]] = or <8 x i32> [[TMP23]], [[TMP1]]
366
360
; CHECK-NEXT: [[DPI:%.*]] = tail call <8 x i32> @llvm.x86.avx2.vpdpbssds.256(<8 x i32> [[__W]], <8 x i32> [[__A]], <8 x i32> [[__B]])
367
361
; CHECK-NEXT: [[TMP5:%.*]] = bitcast i8 [[TMP4]] to <8 x i1>
@@ -403,11 +397,9 @@ define <8 x i32> @test_mm256_mask_dpbssd_epi32(i8 zeroext %__U, <8 x i32> %__W,
403
397
; CHECK-NEXT: [[TMP16:%.*]] = or <32 x i1> [[TMP13]], [[TMP14]]
404
398
; CHECK-NEXT: [[TMP17:%.*]] = or <32 x i1> [[TMP16]], [[TMP15]]
405
399
; CHECK-NEXT: [[TMP18:%.*]] = sext <32 x i1> [[TMP17]] to <32 x i8>
406
- ; CHECK-NEXT: [[TMP19:%.*]] = bitcast <32 x i8> [[TMP18]] to <16 x i16>
407
- ; CHECK-NEXT: [[TMP20:%.*]] = icmp ne <16 x i16> [[TMP19]], zeroinitializer
408
- ; CHECK-NEXT: [[TMP21:%.*]] = sext <16 x i1> [[TMP20]] to <16 x i16>
409
- ; CHECK-NEXT: [[TMP22:%.*]] = bitcast <16 x i16> [[TMP21]] to i256
410
- ; CHECK-NEXT: [[TMP23:%.*]] = bitcast i256 [[TMP22]] to <8 x i32>
400
+ ; CHECK-NEXT: [[TMP19:%.*]] = bitcast <32 x i8> [[TMP18]] to <8 x i32>
401
+ ; CHECK-NEXT: [[TMP20:%.*]] = icmp ne <8 x i32> [[TMP19]], zeroinitializer
402
+ ; CHECK-NEXT: [[TMP23:%.*]] = sext <8 x i1> [[TMP20]] to <8 x i32>
411
403
; CHECK-NEXT: [[_MSPROP1:%.*]] = or <8 x i32> [[TMP23]], [[TMP24]]
412
404
; CHECK-NEXT: [[DPI:%.*]] = tail call <8 x i32> @llvm.x86.avx2.vpdpbssd.256(<8 x i32> [[__W]], <8 x i32> [[__A]], <8 x i32> [[__B]])
413
405
; CHECK-NEXT: [[TMP5:%.*]] = bitcast i8 [[TMP4]] to <8 x i1>
0 commit comments