Skip to content

Commit d105873

Browse files
committed
Attempt to get i8 gep offsets working
1 parent bb524c5 commit d105873

File tree

3 files changed

+73
-22
lines changed

3 files changed

+73
-22
lines changed

llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp

Lines changed: 13 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -457,13 +457,12 @@ static bool foldSqrt(CallInst *Call, LibFunc Func, TargetTransformInfo &TTI,
457457

458458
// Check if this array of constants represents a cttz table.
459459
// Iterate over the elements from \p Table by trying to find/match all
460-
// the numbers from 0 to \p InputTy->getSizeInBits() that should represent cttz
461-
// results.
460+
// the numbers from 0 to \p InputBits that should represent cttz results.
462461
static bool isCTTZTable(Constant *Table, uint64_t Mul, uint64_t Shift,
463-
Type *AccessTy, unsigned InputBits,
462+
uint64_t AndMask, Type *AccessTy, unsigned InputBits,
464463
unsigned GEPIdxFactor, const DataLayout &DL) {
465464
for (unsigned Idx = 0; Idx < InputBits; Idx++) {
466-
APInt Index = (APInt(InputBits, 1ull << Idx) * Mul).lshr(Shift);
465+
APInt Index = (APInt(InputBits, 1ull << Idx) * Mul).lshr(Shift) & AndMask;
467466
ConstantInt *C = dyn_cast_or_null<ConstantInt>(
468467
ConstantFoldLoadFromConst(Table, AccessTy, Index * GEPIdxFactor, DL));
469468
if (!C || C->getZExtValue() != Idx)
@@ -558,26 +557,27 @@ static bool tryToRecognizeTableBasedCttz(Instruction &I, const DataLayout &DL) {
558557
return false;
559558

560559
Value *X1;
561-
uint64_t MulConst, ShiftConst;
560+
uint64_t MulConst, ShiftConst, AndCst = ~0ull;
562561
// FIXME: 64-bit targets have `i64` type for the GEP index, so this match will
563562
// probably fail for other (e.g. 32-bit) targets.
564563
if (!match(GepIdx, m_ZExtOrSelf(m_LShr(
565564
m_Mul(m_c_And(m_Neg(m_Value(X1)), m_Deferred(X1)),
566565
m_ConstantInt(MulConst)),
567-
m_ConstantInt(ShiftConst)))))
566+
m_ConstantInt(ShiftConst)))) &&
567+
!match(GepIdx, m_ZExtOrSelf(m_And(m_LShr(m_Mul(m_c_And(m_Neg(m_Value(X1)),
568+
m_Deferred(X1)),
569+
m_ConstantInt(MulConst)),
570+
m_ConstantInt(ShiftConst)),
571+
m_ConstantInt(AndCst)))))
568572
return false;
569573

570574
unsigned InputBits = X1->getType()->getScalarSizeInBits();
571575
if (InputBits != 16 && InputBits != 32 && InputBits != 64)
572576
return false;
573577

574-
// Shift should extract top 4..7 bits.
575-
if (InputBits - Log2_32(InputBits) != ShiftConst &&
576-
InputBits - Log2_32(InputBits) - 1 != ShiftConst)
577-
return false;
578-
579-
if (!isCTTZTable(GVTable->getInitializer(), MulConst, ShiftConst, AccessType,
580-
InputBits, GEPSrcEltTy->getScalarSizeInBits() / 8, DL))
578+
if (!isCTTZTable(GVTable->getInitializer(), MulConst, ShiftConst, AndCst,
579+
AccessType, InputBits,
580+
GEPSrcEltTy->getScalarSizeInBits() / 8, DL))
581581
return false;
582582

583583
ConstantInt *ZeroTableElem = cast<ConstantInt>(

llvm/test/Transforms/AggressiveInstCombine/lower-table-based-cttz-basics.ll

Lines changed: 59 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -190,6 +190,39 @@ return: ; preds = %entry, %if.end
190190
ret i32 %retval.0
191191
}
192192

193+
define i32 @ctz3_with_i8gep(i32 %x) {
194+
; CHECK-LABEL: @ctz3_with_i8gep(
195+
; CHECK-NEXT: entry:
196+
; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[X:%.*]], 0
197+
; CHECK-NEXT: br i1 [[CMP]], label [[RETURN:%.*]], label [[IF_END:%.*]]
198+
; CHECK: if.end:
199+
; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.cttz.i32(i32 [[X]], i1 true)
200+
; CHECK-NEXT: br label [[RETURN]]
201+
; CHECK: return:
202+
; CHECK-NEXT: [[RETVAL_0:%.*]] = phi i32 [ [[TMP2]], [[IF_END]] ], [ 32, [[ENTRY:%.*]] ]
203+
; CHECK-NEXT: ret i32 [[RETVAL_0]]
204+
;
205+
entry:
206+
%cmp = icmp eq i32 %x, 0
207+
br i1 %cmp, label %return, label %if.end
208+
209+
if.end: ; preds = %entry
210+
%sub = sub i32 0, %x
211+
%and = and i32 %x, %sub
212+
%mul = mul i32 %and, 81224991
213+
%0 = lshr i32 %mul, 25
214+
%1 = and i32 %0, 124
215+
%arrayidx.idx = zext nneg i32 %1 to i64
216+
%arrayidx = getelementptr inbounds nuw i8, ptr @ctz3.table, i64 %arrayidx.idx
217+
%2 = load i32, ptr %arrayidx, align 4
218+
br label %return
219+
220+
return: ; preds = %if.end, %entry
221+
%retval.0 = phi i32 [ %2, %if.end ], [ 32, %entry ]
222+
ret i32 %retval.0
223+
}
224+
225+
193226
@table = internal unnamed_addr constant [64 x i32] [i32 0, i32 1, i32 12, i32 2, i32 13, i32 22, i32 17, i32 3, i32 14, i32 33, i32 23, i32 36, i32 18, i32 58, i32 28, i32 4, i32 62, i32 15, i32 34, i32 26, i32 24, i32 48, i32 50, i32 37, i32 19, i32 55, i32 59, i32 52, i32 29, i32 44, i32 39, i32 5, i32 63, i32 11, i32 21, i32 16, i32 32, i32 35, i32 57, i32 27, i32 61, i32 25, i32 47, i32 49, i32 54, i32 51, i32 43, i32 38, i32 10, i32 20, i32 31, i32 56, i32 60, i32 46, i32 53, i32 42, i32 9, i32 30, i32 45, i32 41, i32 8, i32 40, i32 7, i32 6], align 4
194227

195228
define i32 @ctz4(i64 %b) {
@@ -277,6 +310,30 @@ entry:
277310
ret i32 %0
278311
}
279312

313+
;; This has a wrong table size but is otherwise fine.
314+
@ctz9.table = internal unnamed_addr constant [128 x i8] c"\00\01\1C\02\1D\0E\18\03\1E\16\14\0F\19\11\04\08\1F\1B\0D\17\15\13\10\07\1A\0C\12\06\0B\05\0A\09\00\01\1C\02\1D\0E\18\03\1E\16\14\0F\19\11\04\08\1F\1B\0D\17\15\13\10\07\1A\0C\12\06\0B\05\0A\09\00\01\1C\02\1D\0E\18\03\1E\16\14\0F\19\11\04\08\1F\1B\0D\17\15\13\10\07\1A\0C\12\06\0B\05\0A\09\00\01\1C\02\1D\0E\18\03\1E\16\14\0F\19\11\04\08\1F\1B\0D\17\15\13\10\07\1A\0C\12\06\0B\05\0A\09", align 1
315+
define i32 @ctz9(i32 %x) {
316+
; CHECK-LABEL: @ctz9(
317+
; CHECK-NEXT: entry:
318+
; CHECK-NEXT: [[TMP0:%.*]] = call i32 @llvm.cttz.i32(i32 [[X:%.*]], i1 true)
319+
; CHECK-NEXT: [[TMP1:%.*]] = icmp eq i32 [[X]], 0
320+
; CHECK-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 0, i32 [[TMP0]]
321+
; CHECK-NEXT: [[TMP3:%.*]] = trunc i32 [[TMP2]] to i8
322+
; CHECK-NEXT: [[CONV:%.*]] = zext i8 [[TMP3]] to i32
323+
; CHECK-NEXT: ret i32 [[CONV]]
324+
;
325+
entry:
326+
%sub = sub i32 0, %x
327+
%and = and i32 %sub, %x
328+
%mul = mul i32 %and, 125613361
329+
%shr = lshr i32 %mul, 27
330+
%idxprom = zext i32 %shr to i64
331+
%arrayidx = getelementptr inbounds [128 x i8], ptr @ctz9.table, i64 0, i64 %idxprom
332+
%0 = load i8, ptr %arrayidx, align 1
333+
%conv = zext i8 %0 to i32
334+
ret i32 %conv
335+
}
336+
280337
define i32 @ctz1_with_i8_gep(i32 %x) {
281338
; CHECK-LABEL: @ctz1_with_i8_gep(
282339
; CHECK-NEXT: entry:
@@ -328,14 +385,8 @@ entry:
328385
; This is the same a ctz2_with_i8_gep but with the gep index multiplied by 2.
329386
define i32 @ctz2_with_i8_gep_fixed(i32 %x) {
330387
; CHECK-LABEL: @ctz2_with_i8_gep_fixed(
331-
; CHECK-NEXT: [[SUB:%.*]] = sub i32 0, [[X:%.*]]
332-
; CHECK-NEXT: [[AND:%.*]] = and i32 [[X]], [[SUB]]
333-
; CHECK-NEXT: [[MUL:%.*]] = mul i32 [[AND]], 72416175
334-
; CHECK-NEXT: [[SHR:%.*]] = lshr i32 [[MUL]], 25
335-
; CHECK-NEXT: [[SHR2:%.*]] = and i32 [[SHR]], 126
336-
; CHECK-NEXT: [[TMP1:%.*]] = zext nneg i32 [[SHR2]] to i64
337-
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds nuw i8, ptr @ctz2.table, i64 [[TMP1]]
338-
; CHECK-NEXT: [[TMP2:%.*]] = load i16, ptr [[ARRAYIDX]], align 2
388+
; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.cttz.i32(i32 [[X:%.*]], i1 false)
389+
; CHECK-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i16
339390
; CHECK-NEXT: [[CONV:%.*]] = sext i16 [[TMP2]] to i32
340391
; CHECK-NEXT: ret i32 [[CONV]]
341392
;

llvm/test/Transforms/AggressiveInstCombine/negative-lower-table-based-cttz.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -66,7 +66,7 @@ entry:
6666

6767
;; This is a negative test with a wrong table size and constants.
6868

69-
@ctz3.table = internal unnamed_addr constant [128 x i8] c"\00\01\1C\02\1D\0E\18\03\1E\16\14\0F\19\11\04\08\1F\1B\0D\17\15\13\10\07\1A\0C\12\06\0B\05\0A\09\00\01\1C\02\1D\0E\18\03\1E\16\14\0F\19\11\04\08\1F\1B\0D\17\15\13\10\07\1A\0C\12\06\0B\05\0A\09\00\01\1C\02\1D\0E\18\03\1E\16\14\0F\19\11\04\08\1F\1B\0D\17\15\13\10\07\1A\0C\12\06\0B\05\0A\09\00\01\1C\02\1D\0E\18\03\1E\16\14\0F\19\11\04\08\1F\1B\0D\17\15\13\10\07\1A\0C\12\06\0B\05\0A\09", align 1
69+
@ctz3.table = internal unnamed_addr constant [128 x i8] c"\01\01\1C\02\1D\0E\18\03\1E\16\14\0F\19\11\04\08\1F\1B\0D\17\15\13\10\07\1A\0C\12\06\0B\05\0A\09\00\01\1C\02\1D\0E\18\03\1E\16\14\0F\19\11\04\08\1F\1B\0D\17\15\13\10\07\1A\0C\12\06\0B\05\0A\09\00\01\1C\02\1D\0E\18\03\1E\16\14\0F\19\11\04\08\1F\1B\0D\17\15\13\10\07\1A\0C\12\06\0B\05\0A\09\00\01\1C\02\1D\0E\18\03\1E\16\14\0F\19\11\04\08\1F\1B\0D\17\15\13\10\07\1A\0C\12\06\0B\05\0A\09", align 1
7070

7171
define i32 @ctz5(i32 %x) {
7272
entry:

0 commit comments

Comments
 (0)