@@ -460,7 +460,7 @@ static bool foldSqrt(CallInst *Call, LibFunc Func, TargetTransformInfo &TTI,
460
460
// the numbers from 0 to \p InputBits that should represent cttz results.
461
461
static bool isCTTZTable (Constant *Table, uint64_t Mul, uint64_t Shift,
462
462
uint64_t AndMask, Type *AccessTy, unsigned InputBits,
463
- unsigned GEPIdxFactor, const DataLayout &DL) {
463
+ APInt GEPIdxFactor, const DataLayout &DL) {
464
464
for (unsigned Idx = 0 ; Idx < InputBits; Idx++) {
465
465
APInt Index = (APInt (InputBits, 1ull << Idx) * Mul).lshr (Shift) & AndMask;
466
466
ConstantInt *C = dyn_cast_or_null<ConstantInt>(
@@ -484,6 +484,11 @@ static bool isCTTZTable(Constant *Table, uint64_t Mul, uint64_t Shift,
484
484
// this can be lowered to `cttz` instruction.
485
485
// There is also a special case when the element is 0.
486
486
//
487
+ // The (x & -x) sets the lowest non-zero bit to 1. The multiply is a de-bruijn
488
+ // sequence that contains each patterns of bits in it. The shift extracts
489
+ // the top bits after the multiply, and that index into the table should
490
+ // represent the number of trailing zeros in the original number.
491
+ //
487
492
// Here are some examples or LLVM IR for a 64-bit target:
488
493
//
489
494
// CASE 1:
@@ -525,7 +530,7 @@ static bool isCTTZTable(Constant *Table, uint64_t Mul, uint64_t Shift,
525
530
// i64 %shr
526
531
// %0 = load i8, i8* %arrayidx, align 1, !tbaa !8
527
532
//
528
- // All this can be lowered to @llvm.cttz.i32/64 intrinsic .
533
+ // All these can be lowered to @llvm.cttz.i32/64 intrinsics .
529
534
static bool tryToRecognizeTableBasedCttz (Instruction &I, const DataLayout &DL) {
530
535
LoadInst *LI = dyn_cast<LoadInst>(&I);
531
536
if (!LI)
@@ -539,45 +544,40 @@ static bool tryToRecognizeTableBasedCttz(Instruction &I, const DataLayout &DL) {
539
544
if (!GEP || !GEP->hasNoUnsignedSignedWrap ())
540
545
return false ;
541
546
542
- Type *GEPSrcEltTy = GEP->getSourceElementType ();
543
- Value *GepIdx;
544
- if (GEP->getNumIndices () == 2 ) {
545
- if (!GEPSrcEltTy->isArrayTy () ||
546
- !match (GEP->idx_begin ()->get (), m_ZeroInt ()))
547
- return false ;
548
- GEPSrcEltTy = GEPSrcEltTy->getArrayElementType ();
549
- GepIdx = std::next (GEP->idx_begin ())->get ();
550
- } else if (GEP->getNumIndices () == 1 )
551
- GepIdx = GEP->idx_begin ()->get ();
552
- else
553
- return false ;
554
-
555
547
GlobalVariable *GVTable = dyn_cast<GlobalVariable>(GEP->getPointerOperand ());
556
548
if (!GVTable || !GVTable->hasInitializer () || !GVTable->isConstant ())
557
549
return false ;
558
550
551
+ unsigned BW = DL.getIndexTypeSizeInBits (GEP->getType ());
552
+ APInt ModOffset (BW, 0 );
553
+ SmallMapVector<Value *, APInt, 4 > VarOffsets;
554
+ if (!GEP->collectOffset (DL, BW, VarOffsets, ModOffset) ||
555
+ VarOffsets.size () != 1 || ModOffset != 0 )
556
+ return false ;
557
+ auto [GepIdx, GEPScale] = VarOffsets.front ();
558
+
559
559
Value *X1;
560
560
uint64_t MulConst, ShiftConst, AndCst = ~0ull ;
561
+ // Check that the gep variable index is ((x & -x) * MulConst) >> ShiftConst.
562
+ // This might be extended to the pointer index type, and if the gep index type
563
+ // has been replaced with an i8 then a new And (and different ShiftConst) will
564
+ // be present.
561
565
// FIXME: 64-bit targets have `i64` type for the GEP index, so this match will
562
566
// probably fail for other (e.g. 32-bit) targets.
563
- if (!match (GepIdx, m_ZExtOrSelf (m_LShr (
564
- m_Mul (m_c_And (m_Neg (m_Value (X1)), m_Deferred (X1)),
565
- m_ConstantInt (MulConst)),
566
- m_ConstantInt (ShiftConst)))) &&
567
- !match (GepIdx, m_ZExtOrSelf (m_And (m_LShr (m_Mul (m_c_And (m_Neg (m_Value (X1)),
568
- m_Deferred (X1)),
569
- m_ConstantInt (MulConst)),
570
- m_ConstantInt (ShiftConst)),
571
- m_ConstantInt (AndCst)))))
567
+ auto MatchInner = m_LShr (m_Mul (m_c_And (m_Neg (m_Value (X1)), m_Deferred (X1)),
568
+ m_ConstantInt (MulConst)),
569
+ m_ConstantInt (ShiftConst));
570
+ if (!match (GepIdx, m_ZExtOrSelf (MatchInner)) &&
571
+ !match (GepIdx, m_ZExtOrSelf (m_And (MatchInner, m_ConstantInt (AndCst)))))
572
572
return false ;
573
573
574
574
unsigned InputBits = X1->getType ()->getScalarSizeInBits ();
575
575
if (InputBits != 16 && InputBits != 32 && InputBits != 64 )
576
576
return false ;
577
577
578
- if (!isCTTZTable (GVTable-> getInitializer (), MulConst, ShiftConst, AndCst,
579
- AccessType, InputBits ,
580
- GEPSrcEltTy-> getScalarSizeInBits () / 8 , DL))
578
+ if (!GEPScale. isIntN (InputBits) ||
579
+ ! isCTTZTable (GVTable-> getInitializer (), MulConst, ShiftConst, AndCst ,
580
+ AccessType, InputBits, GEPScale. trunc (InputBits) , DL))
581
581
return false ;
582
582
583
583
ConstantInt *ZeroTableElem = cast<ConstantInt>(
0 commit comments