Skip to content

Commit cfa67d8

Browse files
committed
Get i128 working too
1 parent f6b9b8d commit cfa67d8

File tree

2 files changed

+63
-17
lines changed

2 files changed

+63
-17
lines changed

llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp

Lines changed: 17 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -458,14 +458,15 @@ static bool foldSqrt(CallInst *Call, LibFunc Func, TargetTransformInfo &TTI,
458458
// Check if this array of constants represents a cttz table.
459459
// Iterate over the elements from \p Table by trying to find/match all
460460
// the numbers from 0 to \p InputBits that should represent cttz results.
461-
static bool isCTTZTable(Constant *Table, uint64_t Mul, uint64_t Shift,
462-
uint64_t AndMask, Type *AccessTy, unsigned InputBits,
463-
APInt GEPIdxFactor, const DataLayout &DL) {
461+
static bool isCTTZTable(Constant *Table, const APInt &Mul, const APInt &Shift,
462+
const APInt &AndMask, Type *AccessTy,
463+
unsigned InputBits, const APInt &GEPIdxFactor,
464+
const DataLayout &DL) {
464465
for (unsigned Idx = 0; Idx < InputBits; Idx++) {
465-
APInt Index = (APInt(InputBits, 1ull << Idx) * Mul).lshr(Shift) & AndMask;
466+
APInt Index = (APInt(InputBits, 1).shl(Idx) * Mul).lshr(Shift) & AndMask;
466467
ConstantInt *C = dyn_cast_or_null<ConstantInt>(
467468
ConstantFoldLoadFromConst(Table, AccessTy, Index * GEPIdxFactor, DL));
468-
if (!C || C->getZExtValue() != Idx)
469+
if (!C || C->getValue() != Idx)
469470
return false;
470471
}
471472

@@ -485,7 +486,7 @@ static bool isCTTZTable(Constant *Table, uint64_t Mul, uint64_t Shift,
485486
// There is also a special case when the element is 0.
486487
//
487488
// The (x & -x) sets the lowest non-zero bit to 1. The multiply is a de-bruijn
488-
// sequence that contains each patterns of bits in it. The shift extracts
489+
// sequence that contains each pattern of bits in it. The shift extracts
489490
// the top bits after the multiply, and that index into the table should
490491
// represent the number of trailing zeros in the original number.
491492
//
@@ -557,27 +558,26 @@ static bool tryToRecognizeTableBasedCttz(Instruction &I, const DataLayout &DL) {
557558
auto [GepIdx, GEPScale] = VarOffsets.front();
558559

559560
Value *X1;
560-
uint64_t MulConst, ShiftConst, AndCst = ~0ull;
561+
const APInt *MulConst, *ShiftConst, *AndCst = nullptr;
561562
// Check that the gep variable index is ((x & -x) * MulConst) >> ShiftConst.
562563
// This might be extended to the pointer index type, and if the gep index type
563564
// has been replaced with an i8 then a new And (and different ShiftConst) will
564565
// be present.
565-
// FIXME: 64-bit targets have `i64` type for the GEP index, so this match will
566-
// probably fail for other (e.g. 32-bit) targets.
567-
auto MatchInner = m_LShr(m_Mul(m_c_And(m_Neg(m_Value(X1)), m_Deferred(X1)),
568-
m_ConstantInt(MulConst)),
569-
m_ConstantInt(ShiftConst));
570-
if (!match(GepIdx, m_ZExtOrSelf(MatchInner)) &&
571-
!match(GepIdx, m_ZExtOrSelf(m_And(MatchInner, m_ConstantInt(AndCst)))))
566+
auto MatchInner = m_LShr(
567+
m_Mul(m_c_And(m_Neg(m_Value(X1)), m_Deferred(X1)), m_APInt(MulConst)),
568+
m_APInt(ShiftConst));
569+
if (!match(GepIdx, m_CastOrSelf(MatchInner)) &&
570+
!match(GepIdx, m_CastOrSelf(m_And(MatchInner, m_APInt(AndCst)))))
572571
return false;
573572

574573
unsigned InputBits = X1->getType()->getScalarSizeInBits();
575-
if (InputBits != 16 && InputBits != 32 && InputBits != 64)
574+
if (InputBits != 16 && InputBits != 32 && InputBits != 64 && InputBits != 128)
576575
return false;
577576

578577
if (!GEPScale.isIntN(InputBits) ||
579-
!isCTTZTable(GVTable->getInitializer(), MulConst, ShiftConst, AndCst,
580-
AccessType, InputBits, GEPScale.trunc(InputBits), DL))
578+
!isCTTZTable(GVTable->getInitializer(), *MulConst, *ShiftConst,
579+
AndCst ? *AndCst : APInt::getAllOnes(InputBits), AccessType,
580+
InputBits, GEPScale.zextOrTrunc(InputBits), DL))
581581
return false;
582582

583583
ConstantInt *ZeroTableElem = cast<ConstantInt>(

llvm/test/Transforms/AggressiveInstCombine/lower-table-based-cttz-basics.ll

Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -452,3 +452,49 @@ entry:
452452
%conv6 = zext i8 %1 to i32
453453
ret i32 %conv6
454454
}
455+
456+
; Same as ctz1 but the table and load is very large
457+
@ctz7i128.table = internal unnamed_addr constant [32 x i128] [i128 0, i128 1, i128 28, i128 2, i128 29, i128 14, i128 24, i128 3, i128 30, i128 22, i128 20, i128 15, i128 25, i128 17, i128 4, i128 8, i128 31, i128 27, i128 13, i128 23, i128 21, i128 19, i128 16, i128 7, i128 26, i128 12, i128 18, i128 6, i128 11, i128 5, i128 10, i128 9], align 16
458+
define i128 @ctz1_i128(i32 %x) {
459+
; CHECK-LABEL: @ctz1_i128(
460+
; CHECK-NEXT: entry:
461+
; CHECK-NEXT: [[TMP0:%.*]] = call i32 @llvm.cttz.i32(i32 [[X:%.*]], i1 true)
462+
; CHECK-NEXT: [[TMP1:%.*]] = icmp eq i32 [[X]], 0
463+
; CHECK-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 0, i32 [[TMP0]]
464+
; CHECK-NEXT: [[TMP3:%.*]] = zext i32 [[TMP2]] to i128
465+
; CHECK-NEXT: ret i128 [[TMP3]]
466+
;
467+
entry:
468+
%sub = sub i32 0, %x
469+
%and = and i32 %sub, %x
470+
%mul = mul i32 %and, 125613361
471+
%shr = lshr i32 %mul, 27
472+
%idxprom = zext i32 %shr to i64
473+
%arrayidx = getelementptr inbounds [32 x i128], ptr @ctz7i128.table, i64 0, i64 %idxprom
474+
%l = load i128, ptr %arrayidx, align 1
475+
ret i128 %l
476+
}
477+
478+
; This is roughly the same as ctz1 but using i128.
479+
@table.i128 = internal unnamed_addr constant [128 x i8] c"\00\01e\02tf<\03|ug^R=!\04}yvWoh_5ZSE>0\22\14\05~rzPwmX.pkiI`K6\1Ab[TBMF?'81*#\1C\15\0E\06\7Fds;{]Q xVn4YD/\13qOl-jHJ\19aAL&7)\1B\0Dc:\\\1FU3C\12N,G\18@%(\0C9\1E2\11+\17$\0B\1D\10\16\0A\0F\09\08\07", align 1
480+
define i32 @src(i128 noundef %x) {
481+
; CHECK-LABEL: @src(
482+
; CHECK-NEXT: entry:
483+
; CHECK-NEXT: [[TMP3:%.*]] = call i128 @llvm.cttz.i128(i128 [[X:%.*]], i1 true)
484+
; CHECK-NEXT: [[TMP1:%.*]] = icmp eq i128 [[X]], 0
485+
; CHECK-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i128 0, i128 [[TMP3]]
486+
; CHECK-NEXT: [[TMP0:%.*]] = trunc i128 [[TMP2]] to i8
487+
; CHECK-NEXT: [[CONV:%.*]] = zext i8 [[TMP0]] to i32
488+
; CHECK-NEXT: ret i32 [[CONV]]
489+
;
490+
entry:
491+
%sub = sub i128 0, %x
492+
%and = and i128 %x, %sub
493+
%mul = mul i128 %and, 2647824804797170443043024478319300753
494+
%shr = lshr i128 %mul, 121
495+
%idxprom = trunc i128 %shr to i64
496+
%arrayidx = getelementptr inbounds nuw i8, ptr @table.i128, i64 %idxprom
497+
%0 = load i8, ptr %arrayidx, align 1
498+
%conv = zext i8 %0 to i32
499+
ret i32 %conv
500+
}

0 commit comments

Comments
 (0)