Skip to content

Commit 1cc84bc

Browse files
authored
[msan] Fix multiply-add-accumulate (#153927) to use ReductionFactor (#155748)
#153927 incorrectly cast using a hardcoded reduction factor of two, rather than using the parameter. This caused false negatives but not false positives. (The only incorrect case was a reduction factor of four; if four values {A,B,C,D} are being reduced, the result is fully zero iff {A,B} and {C,D} are both zero after pairwise reduction. If only one of those reduced pairs is zero, then the quadwise reduction is non-zero.)
1 parent abb62b6 commit 1cc84bc

File tree

9 files changed

+171
-279
lines changed

9 files changed

+171
-279
lines changed

llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -3910,8 +3910,9 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
39103910
FixedVectorType *ImplicitReturnType = ReturnType;
39113911
// Step 1: instrument multiplication of corresponding vector elements
39123912
if (EltSizeInBits) {
3913-
ImplicitReturnType = cast<FixedVectorType>(getMMXVectorTy(
3914-
EltSizeInBits * 2, ParamType->getPrimitiveSizeInBits()));
3913+
ImplicitReturnType = cast<FixedVectorType>(
3914+
getMMXVectorTy(EltSizeInBits * ReductionFactor,
3915+
ParamType->getPrimitiveSizeInBits()));
39153916
ParamType = cast<FixedVectorType>(
39163917
getMMXVectorTy(EltSizeInBits, ParamType->getPrimitiveSizeInBits()));
39173918

@@ -3959,7 +3960,7 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
39593960

39603961
// Step 2: instrument horizontal add
39613962
// We don't need bit-precise horizontalReduce because we only want to check
3962-
// if each pair of elements is fully zero.
3963+
// if each pair/quad of elements is fully zero.
39633964
// Cast to <4 x i32>.
39643965
Value *Horizontal = IRB.CreateBitCast(And, ImplicitReturnType);
39653966

@@ -3969,7 +3970,8 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
39693970
Constant::getNullValue(Horizontal->getType())),
39703971
ImplicitReturnType);
39713972

3972-
// Cast it back to the required fake return type (<1 x i64>).
3973+
// Cast it back to the required fake return type (if MMX: <1 x i64>; for
3974+
// AVX, it is already correct).
39733975
if (EltSizeInBits)
39743976
OutShadow = CreateShadowCast(IRB, OutShadow, getShadowTy(&I));
39753977

llvm/test/Instrumentation/MemorySanitizer/X86/avx10_2_512ni-intrinsics.ll

Lines changed: 9 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -156,11 +156,9 @@ define <16 x i32> @test_mm512_dpbssd_epi32(<16 x i32> %__W, <16 x i32> %__A, ptr
156156
; CHECK-NEXT: [[TMP20:%.*]] = or <64 x i1> [[TMP17]], [[TMP18]]
157157
; CHECK-NEXT: [[TMP21:%.*]] = or <64 x i1> [[TMP20]], [[TMP19]]
158158
; CHECK-NEXT: [[TMP22:%.*]] = sext <64 x i1> [[TMP21]] to <64 x i8>
159-
; CHECK-NEXT: [[TMP23:%.*]] = bitcast <64 x i8> [[TMP22]] to <32 x i16>
160-
; CHECK-NEXT: [[TMP24:%.*]] = icmp ne <32 x i16> [[TMP23]], zeroinitializer
161-
; CHECK-NEXT: [[TMP25:%.*]] = sext <32 x i1> [[TMP24]] to <32 x i16>
162-
; CHECK-NEXT: [[TMP26:%.*]] = bitcast <32 x i16> [[TMP25]] to i512
163-
; CHECK-NEXT: [[TMP27:%.*]] = bitcast i512 [[TMP26]] to <16 x i32>
159+
; CHECK-NEXT: [[TMP23:%.*]] = bitcast <64 x i8> [[TMP22]] to <16 x i32>
160+
; CHECK-NEXT: [[TMP24:%.*]] = icmp ne <16 x i32> [[TMP23]], zeroinitializer
161+
; CHECK-NEXT: [[TMP27:%.*]] = sext <16 x i1> [[TMP24]] to <16 x i32>
164162
; CHECK-NEXT: [[_MSPROP1:%.*]] = or <16 x i32> [[TMP27]], [[TMP4]]
165163
; CHECK-NEXT: [[RES:%.*]] = tail call <16 x i32> @llvm.x86.avx10.vpdpbssd.512(<16 x i32> [[__W]], <16 x i32> [[__A]], <16 x i32> [[__B]])
166164
; CHECK-NEXT: store <16 x i32> [[_MSPROP1]], ptr @__msan_retval_tls, align 8
@@ -193,11 +191,9 @@ define <16 x i32> @test_mm512_mask_dpbssds_epi32(<16 x i32> %__W, i16 zeroext %_
193191
; CHECK-NEXT: [[TMP16:%.*]] = or <64 x i1> [[TMP13]], [[TMP14]]
194192
; CHECK-NEXT: [[TMP17:%.*]] = or <64 x i1> [[TMP16]], [[TMP15]]
195193
; CHECK-NEXT: [[TMP18:%.*]] = sext <64 x i1> [[TMP17]] to <64 x i8>
196-
; CHECK-NEXT: [[TMP19:%.*]] = bitcast <64 x i8> [[TMP18]] to <32 x i16>
197-
; CHECK-NEXT: [[TMP20:%.*]] = icmp ne <32 x i16> [[TMP19]], zeroinitializer
198-
; CHECK-NEXT: [[TMP21:%.*]] = sext <32 x i1> [[TMP20]] to <32 x i16>
199-
; CHECK-NEXT: [[TMP22:%.*]] = bitcast <32 x i16> [[TMP21]] to i512
200-
; CHECK-NEXT: [[TMP23:%.*]] = bitcast i512 [[TMP22]] to <16 x i32>
194+
; CHECK-NEXT: [[TMP19:%.*]] = bitcast <64 x i8> [[TMP18]] to <16 x i32>
195+
; CHECK-NEXT: [[TMP20:%.*]] = icmp ne <16 x i32> [[TMP19]], zeroinitializer
196+
; CHECK-NEXT: [[TMP23:%.*]] = sext <16 x i1> [[TMP20]] to <16 x i32>
201197
; CHECK-NEXT: [[_MSPROP1:%.*]] = or <16 x i32> [[TMP23]], [[TMP1]]
202198
; CHECK-NEXT: [[DPI:%.*]] = tail call <16 x i32> @llvm.x86.avx10.vpdpbssds.512(<16 x i32> [[__W]], <16 x i32> [[__A]], <16 x i32> [[__B]])
203199
; CHECK-NEXT: [[TMP5:%.*]] = bitcast i16 [[TMP4]] to <16 x i1>
@@ -239,11 +235,9 @@ define <16 x i32> @test_mm512_maskz_dpbssd_epi32(i16 zeroext %__U, <16 x i32> %_
239235
; CHECK-NEXT: [[TMP16:%.*]] = or <64 x i1> [[TMP13]], [[TMP14]]
240236
; CHECK-NEXT: [[TMP17:%.*]] = or <64 x i1> [[TMP16]], [[TMP15]]
241237
; CHECK-NEXT: [[TMP18:%.*]] = sext <64 x i1> [[TMP17]] to <64 x i8>
242-
; CHECK-NEXT: [[TMP19:%.*]] = bitcast <64 x i8> [[TMP18]] to <32 x i16>
243-
; CHECK-NEXT: [[TMP20:%.*]] = icmp ne <32 x i16> [[TMP19]], zeroinitializer
244-
; CHECK-NEXT: [[TMP21:%.*]] = sext <32 x i1> [[TMP20]] to <32 x i16>
245-
; CHECK-NEXT: [[TMP22:%.*]] = bitcast <32 x i16> [[TMP21]] to i512
246-
; CHECK-NEXT: [[TMP23:%.*]] = bitcast i512 [[TMP22]] to <16 x i32>
238+
; CHECK-NEXT: [[TMP19:%.*]] = bitcast <64 x i8> [[TMP18]] to <16 x i32>
239+
; CHECK-NEXT: [[TMP20:%.*]] = icmp ne <16 x i32> [[TMP19]], zeroinitializer
240+
; CHECK-NEXT: [[TMP23:%.*]] = sext <16 x i1> [[TMP20]] to <16 x i32>
247241
; CHECK-NEXT: [[_MSPROP1:%.*]] = or <16 x i32> [[TMP23]], [[TMP24]]
248242
; CHECK-NEXT: [[DPI:%.*]] = tail call <16 x i32> @llvm.x86.avx10.vpdpbssd.512(<16 x i32> [[__W]], <16 x i32> [[__A]], <16 x i32> [[__B]])
249243
; CHECK-NEXT: [[TMP5:%.*]] = bitcast i16 [[TMP4]] to <16 x i1>

llvm/test/Instrumentation/MemorySanitizer/X86/avx10_2ni-intrinsics.ll

Lines changed: 12 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -265,11 +265,9 @@ define <4 x i32> @test_mm_mask_dpbssd_epi32(<4 x i32> %__W, i4 zeroext %__U, <4
265265
; CHECK-NEXT: [[TMP16:%.*]] = or <16 x i1> [[TMP13]], [[TMP14]]
266266
; CHECK-NEXT: [[TMP17:%.*]] = or <16 x i1> [[TMP16]], [[TMP15]]
267267
; CHECK-NEXT: [[TMP18:%.*]] = sext <16 x i1> [[TMP17]] to <16 x i8>
268-
; CHECK-NEXT: [[TMP19:%.*]] = bitcast <16 x i8> [[TMP18]] to <8 x i16>
269-
; CHECK-NEXT: [[TMP20:%.*]] = icmp ne <8 x i16> [[TMP19]], zeroinitializer
270-
; CHECK-NEXT: [[TMP21:%.*]] = sext <8 x i1> [[TMP20]] to <8 x i16>
271-
; CHECK-NEXT: [[TMP22:%.*]] = bitcast <8 x i16> [[TMP21]] to i128
272-
; CHECK-NEXT: [[TMP23:%.*]] = bitcast i128 [[TMP22]] to <4 x i32>
268+
; CHECK-NEXT: [[TMP19:%.*]] = bitcast <16 x i8> [[TMP18]] to <4 x i32>
269+
; CHECK-NEXT: [[TMP20:%.*]] = icmp ne <4 x i32> [[TMP19]], zeroinitializer
270+
; CHECK-NEXT: [[TMP23:%.*]] = sext <4 x i1> [[TMP20]] to <4 x i32>
273271
; CHECK-NEXT: [[_MSPROP1:%.*]] = or <4 x i32> [[TMP23]], [[TMP1]]
274272
; CHECK-NEXT: [[DPI:%.*]] = tail call <4 x i32> @llvm.x86.avx2.vpdpbssd.128(<4 x i32> [[__W]], <4 x i32> [[__A]], <4 x i32> [[__B]])
275273
; CHECK-NEXT: [[TMP5:%.*]] = bitcast i4 [[TMP4]] to <4 x i1>
@@ -311,11 +309,9 @@ define <4 x i32> @test_mm_maskz_dpbssds_epi32(i4 zeroext %__U, <4 x i32> %__W, <
311309
; CHECK-NEXT: [[TMP16:%.*]] = or <16 x i1> [[TMP13]], [[TMP14]]
312310
; CHECK-NEXT: [[TMP17:%.*]] = or <16 x i1> [[TMP16]], [[TMP15]]
313311
; CHECK-NEXT: [[TMP18:%.*]] = sext <16 x i1> [[TMP17]] to <16 x i8>
314-
; CHECK-NEXT: [[TMP19:%.*]] = bitcast <16 x i8> [[TMP18]] to <8 x i16>
315-
; CHECK-NEXT: [[TMP20:%.*]] = icmp ne <8 x i16> [[TMP19]], zeroinitializer
316-
; CHECK-NEXT: [[TMP21:%.*]] = sext <8 x i1> [[TMP20]] to <8 x i16>
317-
; CHECK-NEXT: [[TMP22:%.*]] = bitcast <8 x i16> [[TMP21]] to i128
318-
; CHECK-NEXT: [[TMP23:%.*]] = bitcast i128 [[TMP22]] to <4 x i32>
312+
; CHECK-NEXT: [[TMP19:%.*]] = bitcast <16 x i8> [[TMP18]] to <4 x i32>
313+
; CHECK-NEXT: [[TMP20:%.*]] = icmp ne <4 x i32> [[TMP19]], zeroinitializer
314+
; CHECK-NEXT: [[TMP23:%.*]] = sext <4 x i1> [[TMP20]] to <4 x i32>
319315
; CHECK-NEXT: [[_MSPROP1:%.*]] = or <4 x i32> [[TMP23]], [[TMP24]]
320316
; CHECK-NEXT: [[DPI:%.*]] = tail call <4 x i32> @llvm.x86.avx2.vpdpbssds.128(<4 x i32> [[__W]], <4 x i32> [[__A]], <4 x i32> [[__B]])
321317
; CHECK-NEXT: [[TMP5:%.*]] = bitcast i4 [[TMP4]] to <4 x i1>
@@ -357,11 +353,9 @@ define <8 x i32> @test_mm256_maskz_dpbssds_epi32(<8 x i32> %__W, i8 zeroext %__U
357353
; CHECK-NEXT: [[TMP16:%.*]] = or <32 x i1> [[TMP13]], [[TMP14]]
358354
; CHECK-NEXT: [[TMP17:%.*]] = or <32 x i1> [[TMP16]], [[TMP15]]
359355
; CHECK-NEXT: [[TMP18:%.*]] = sext <32 x i1> [[TMP17]] to <32 x i8>
360-
; CHECK-NEXT: [[TMP19:%.*]] = bitcast <32 x i8> [[TMP18]] to <16 x i16>
361-
; CHECK-NEXT: [[TMP20:%.*]] = icmp ne <16 x i16> [[TMP19]], zeroinitializer
362-
; CHECK-NEXT: [[TMP21:%.*]] = sext <16 x i1> [[TMP20]] to <16 x i16>
363-
; CHECK-NEXT: [[TMP22:%.*]] = bitcast <16 x i16> [[TMP21]] to i256
364-
; CHECK-NEXT: [[TMP23:%.*]] = bitcast i256 [[TMP22]] to <8 x i32>
356+
; CHECK-NEXT: [[TMP19:%.*]] = bitcast <32 x i8> [[TMP18]] to <8 x i32>
357+
; CHECK-NEXT: [[TMP20:%.*]] = icmp ne <8 x i32> [[TMP19]], zeroinitializer
358+
; CHECK-NEXT: [[TMP23:%.*]] = sext <8 x i1> [[TMP20]] to <8 x i32>
365359
; CHECK-NEXT: [[_MSPROP1:%.*]] = or <8 x i32> [[TMP23]], [[TMP1]]
366360
; CHECK-NEXT: [[DPI:%.*]] = tail call <8 x i32> @llvm.x86.avx2.vpdpbssds.256(<8 x i32> [[__W]], <8 x i32> [[__A]], <8 x i32> [[__B]])
367361
; CHECK-NEXT: [[TMP5:%.*]] = bitcast i8 [[TMP4]] to <8 x i1>
@@ -403,11 +397,9 @@ define <8 x i32> @test_mm256_mask_dpbssd_epi32(i8 zeroext %__U, <8 x i32> %__W,
403397
; CHECK-NEXT: [[TMP16:%.*]] = or <32 x i1> [[TMP13]], [[TMP14]]
404398
; CHECK-NEXT: [[TMP17:%.*]] = or <32 x i1> [[TMP16]], [[TMP15]]
405399
; CHECK-NEXT: [[TMP18:%.*]] = sext <32 x i1> [[TMP17]] to <32 x i8>
406-
; CHECK-NEXT: [[TMP19:%.*]] = bitcast <32 x i8> [[TMP18]] to <16 x i16>
407-
; CHECK-NEXT: [[TMP20:%.*]] = icmp ne <16 x i16> [[TMP19]], zeroinitializer
408-
; CHECK-NEXT: [[TMP21:%.*]] = sext <16 x i1> [[TMP20]] to <16 x i16>
409-
; CHECK-NEXT: [[TMP22:%.*]] = bitcast <16 x i16> [[TMP21]] to i256
410-
; CHECK-NEXT: [[TMP23:%.*]] = bitcast i256 [[TMP22]] to <8 x i32>
400+
; CHECK-NEXT: [[TMP19:%.*]] = bitcast <32 x i8> [[TMP18]] to <8 x i32>
401+
; CHECK-NEXT: [[TMP20:%.*]] = icmp ne <8 x i32> [[TMP19]], zeroinitializer
402+
; CHECK-NEXT: [[TMP23:%.*]] = sext <8 x i1> [[TMP20]] to <8 x i32>
411403
; CHECK-NEXT: [[_MSPROP1:%.*]] = or <8 x i32> [[TMP23]], [[TMP24]]
412404
; CHECK-NEXT: [[DPI:%.*]] = tail call <8 x i32> @llvm.x86.avx2.vpdpbssd.256(<8 x i32> [[__W]], <8 x i32> [[__A]], <8 x i32> [[__B]])
413405
; CHECK-NEXT: [[TMP5:%.*]] = bitcast i8 [[TMP4]] to <8 x i1>

0 commit comments

Comments
 (0)