@@ -457,30 +457,20 @@ static bool foldSqrt(CallInst *Call, LibFunc Func, TargetTransformInfo &TTI,
457
457
458
458
// Check if this array of constants represents a cttz table.
459
459
// Iterate over the elements from \p Table by trying to find/match all
460
- // the numbers from 0 to \p InputBits that should represent cttz results.
461
- static bool isCTTZTable (const ConstantDataArray &Table, uint64_t Mul,
462
- uint64_t Shift, uint64_t InputBits) {
463
- unsigned Length = Table.getNumElements ();
464
- if (Length < InputBits || Length > InputBits * 2 )
465
- return false ;
466
-
467
- APInt Mask = APInt::getBitsSetFrom (InputBits, Shift);
468
- unsigned Matched = 0 ;
469
-
470
- for (unsigned i = 0 ; i < Length; i++) {
471
- uint64_t Element = Table.getElementAsInteger (i);
472
- if (Element >= InputBits)
473
- continue ;
474
-
475
- // Check if \p Element matches a concrete answer. It could fail for some
476
- // elements that are never accessed, so we keep iterating over each element
477
- // from the table. The number of matched elements should be equal to the
478
- // number of potential right answers which is \p InputBits actually.
479
- if ((((Mul << Element) & Mask.getZExtValue ()) >> Shift) == i)
480
- Matched++;
460
+ // the numbers from 0 to \p InputTy->getSizeInBits() that should represent cttz
461
+ // results.
462
+ static bool isCTTZTable (Constant *Table, uint64_t Mul, uint64_t Shift,
463
+ Type *AccessTy, unsigned InputBits,
464
+ unsigned GEPIdxFactor, const DataLayout &DL) {
465
+ for (unsigned Idx = 0 ; Idx < InputBits; Idx++) {
466
+ APInt Index = (APInt (InputBits, 1ull << Idx) * Mul).lshr (Shift);
467
+ ConstantInt *C = dyn_cast_or_null<ConstantInt>(
468
+ ConstantFoldLoadFromConst (Table, AccessTy, Index * GEPIdxFactor, DL));
469
+ if (!C || C->getZExtValue () != Idx)
470
+ return false ;
481
471
}
482
472
483
- return Matched == InputBits ;
473
+ return true ;
484
474
}
485
475
486
476
// Try to recognize table-based ctz implementation.
@@ -537,7 +527,7 @@ static bool isCTTZTable(const ConstantDataArray &Table, uint64_t Mul,
537
527
// %0 = load i8, i8* %arrayidx, align 1, !tbaa !8
538
528
//
539
529
// All this can be lowered to @llvm.cttz.i32/64 intrinsic.
540
- static bool tryToRecognizeTableBasedCttz (Instruction &I) {
530
+ static bool tryToRecognizeTableBasedCttz (Instruction &I, const DataLayout &DL ) {
541
531
LoadInst *LI = dyn_cast<LoadInst>(&I);
542
532
if (!LI)
543
533
return false ;
@@ -567,11 +557,6 @@ static bool tryToRecognizeTableBasedCttz(Instruction &I) {
567
557
if (!GVTable || !GVTable->hasInitializer () || !GVTable->isConstant ())
568
558
return false ;
569
559
570
- ConstantDataArray *ConstData =
571
- dyn_cast<ConstantDataArray>(GVTable->getInitializer ());
572
- if (!ConstData || ConstData->getElementType () != GEPSrcEltTy)
573
- return false ;
574
-
575
560
Value *X1;
576
561
uint64_t MulConst, ShiftConst;
577
562
// FIXME: 64-bit targets have `i64` type for the GEP index, so this match will
@@ -583,19 +568,21 @@ static bool tryToRecognizeTableBasedCttz(Instruction &I) {
583
568
return false ;
584
569
585
570
unsigned InputBits = X1->getType ()->getScalarSizeInBits ();
586
- if (InputBits != 32 && InputBits != 64 )
571
+ if (InputBits != 16 && InputBits != 32 && InputBits != 64 )
587
572
return false ;
588
573
589
- // Shift should extract top 5 ..7 bits.
574
+ // Shift should extract top 4 ..7 bits.
590
575
if (InputBits - Log2_32 (InputBits) != ShiftConst &&
591
576
InputBits - Log2_32 (InputBits) - 1 != ShiftConst)
592
577
return false ;
593
578
594
- if (!isCTTZTable (*ConstData, MulConst, ShiftConst, InputBits))
579
+ if (!isCTTZTable (GVTable->getInitializer (), MulConst, ShiftConst, AccessType,
580
+ InputBits, GEPSrcEltTy->getScalarSizeInBits () / 8 , DL))
595
581
return false ;
596
582
597
- auto ZeroTableElem = ConstData->getElementAsInteger (0 );
598
- bool DefinedForZero = ZeroTableElem == InputBits;
583
+ ConstantInt *ZeroTableElem = cast<ConstantInt>(
584
+ ConstantFoldLoadFromConst (GVTable->getInitializer (), AccessType, DL));
585
+ bool DefinedForZero = ZeroTableElem->getZExtValue () == InputBits;
599
586
600
587
IRBuilder<> B (LI);
601
588
ConstantInt *BoolConst = B.getInt1 (!DefinedForZero);
@@ -609,8 +596,7 @@ static bool tryToRecognizeTableBasedCttz(Instruction &I) {
609
596
// If the value in elem 0 isn't the same as InputBits, we still want to
610
597
// produce the value from the table.
611
598
auto Cmp = B.CreateICmpEQ (X1, ConstantInt::get (XType, 0 ));
612
- auto Select =
613
- B.CreateSelect (Cmp, ConstantInt::get (XType, ZeroTableElem), Cttz);
599
+ auto Select = B.CreateSelect (Cmp, B.CreateZExt (ZeroTableElem, XType), Cttz);
614
600
615
601
// NOTE: If the table[0] is 0, but the cttz(0) is defined by the Target
616
602
// it should be handled as: `cttz(x) & (typeSize - 1)`.
@@ -1479,7 +1465,7 @@ static bool foldUnusualPatterns(Function &F, DominatorTree &DT,
1479
1465
MadeChange |= foldGuardedFunnelShift (I, DT);
1480
1466
MadeChange |= tryToRecognizePopCount (I);
1481
1467
MadeChange |= tryToFPToSat (I, TTI);
1482
- MadeChange |= tryToRecognizeTableBasedCttz (I);
1468
+ MadeChange |= tryToRecognizeTableBasedCttz (I, DL );
1483
1469
MadeChange |= foldConsecutiveLoads (I, DL, TTI, AA, DT);
1484
1470
MadeChange |= foldPatternedLoads (I, DL);
1485
1471
MadeChange |= foldICmpOrChain (I, DL, TTI, AA, DT);
0 commit comments