Skip to content

Commit bc33a14

Browse files
committed
[AggressiveInstCombine] Make cttz fold more resiliant to non-array geps
1 parent 76bebb5 commit bc33a14

File tree

3 files changed

+101
-20
lines changed

3 files changed

+101
-20
lines changed

llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp

Lines changed: 17 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -547,14 +547,20 @@ static bool tryToRecognizeTableBasedCttz(Instruction &I) {
547547
return false;
548548

549549
GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(LI->getPointerOperand());
550-
if (!GEP || !GEP->hasNoUnsignedSignedWrap() || GEP->getNumIndices() != 2)
550+
if (!GEP || !GEP->hasNoUnsignedSignedWrap())
551551
return false;
552552

553-
if (!GEP->getSourceElementType()->isArrayTy())
554-
return false;
555-
556-
uint64_t ArraySize = GEP->getSourceElementType()->getArrayNumElements();
557-
if (ArraySize != 32 && ArraySize != 64)
553+
Type *GEPSrcEltTy = GEP->getSourceElementType();
554+
Value *GepIdx;
555+
if (GEP->getNumIndices() == 2) {
556+
if (!GEPSrcEltTy->isArrayTy() ||
557+
!match(GEP->idx_begin()->get(), m_ZeroInt()))
558+
return false;
559+
GEPSrcEltTy = GEPSrcEltTy->getArrayElementType();
560+
GepIdx = std::next(GEP->idx_begin())->get();
561+
} else if (GEP->getNumIndices() == 1)
562+
GepIdx = GEP->idx_begin()->get();
563+
else
558564
return false;
559565

560566
GlobalVariable *GVTable = dyn_cast<GlobalVariable>(GEP->getPointerOperand());
@@ -563,21 +569,17 @@ static bool tryToRecognizeTableBasedCttz(Instruction &I) {
563569

564570
ConstantDataArray *ConstData =
565571
dyn_cast<ConstantDataArray>(GVTable->getInitializer());
566-
if (!ConstData)
567-
return false;
568-
569-
if (!match(GEP->idx_begin()->get(), m_ZeroInt()))
572+
if (!ConstData || ConstData->getElementType() != GEPSrcEltTy)
570573
return false;
571574

572-
Value *Idx2 = std::next(GEP->idx_begin())->get();
573575
Value *X1;
574576
uint64_t MulConst, ShiftConst;
575577
// FIXME: 64-bit targets have `i64` type for the GEP index, so this match will
576578
// probably fail for other (e.g. 32-bit) targets.
577-
if (!match(Idx2, m_ZExtOrSelf(
578-
m_LShr(m_Mul(m_c_And(m_Neg(m_Value(X1)), m_Deferred(X1)),
579-
m_ConstantInt(MulConst)),
580-
m_ConstantInt(ShiftConst)))))
579+
if (!match(GepIdx, m_ZExtOrSelf(m_LShr(
580+
m_Mul(m_c_And(m_Neg(m_Value(X1)), m_Deferred(X1)),
581+
m_ConstantInt(MulConst)),
582+
m_ConstantInt(ShiftConst)))))
581583
return false;
582584

583585
unsigned InputBits = X1->getType()->getScalarSizeInBits();

llvm/test/Transforms/AggressiveInstCombine/lower-table-based-cttz-basics.ll

Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -276,3 +276,50 @@ entry:
276276
%0 = load i32, ptr %arrayidx, align 4
277277
ret i32 %0
278278
}
279+
280+
define i32 @ctz1_with_i8_gep(i32 %x) {
281+
; CHECK-LABEL: @ctz1_with_i8_gep(
282+
; CHECK-NEXT: entry:
283+
; CHECK-NEXT: [[TMP0:%.*]] = call i32 @llvm.cttz.i32(i32 [[X:%.*]], i1 true)
284+
; CHECK-NEXT: [[TMP1:%.*]] = icmp eq i32 [[X]], 0
285+
; CHECK-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 0, i32 [[TMP0]]
286+
; CHECK-NEXT: [[TMP3:%.*]] = trunc i32 [[TMP2]] to i8
287+
; CHECK-NEXT: [[CONV:%.*]] = zext i8 [[TMP3]] to i32
288+
; CHECK-NEXT: ret i32 [[CONV]]
289+
;
290+
entry:
291+
%sub = sub i32 0, %x
292+
%and = and i32 %sub, %x
293+
%mul = mul i32 %and, 125613361
294+
%shr = lshr i32 %mul, 27
295+
%idxprom = zext i32 %shr to i64
296+
%arrayidx = getelementptr inbounds i8, ptr @ctz7.table, i64 %idxprom
297+
%0 = load i8, ptr %arrayidx, align 1
298+
%conv = zext i8 %0 to i32
299+
ret i32 %conv
300+
}
301+
302+
define i32 @ctz2_with_i8_gep(i32 %x) {
303+
; CHECK-LABEL: @ctz2_with_i8_gep(
304+
; CHECK-NEXT: entry:
305+
; CHECK-NEXT: [[SUB:%.*]] = sub i32 0, [[X:%.*]]
306+
; CHECK-NEXT: [[AND:%.*]] = and i32 [[SUB]], [[X]]
307+
; CHECK-NEXT: [[MUL:%.*]] = mul i32 [[AND]], 72416175
308+
; CHECK-NEXT: [[SHR:%.*]] = lshr i32 [[MUL]], 26
309+
; CHECK-NEXT: [[IDXPROM:%.*]] = zext i32 [[SHR]] to i64
310+
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [64 x i8], ptr @ctz2.table, i64 0, i64 [[IDXPROM]]
311+
; CHECK-NEXT: [[TMP0:%.*]] = load i16, ptr [[ARRAYIDX]], align 2
312+
; CHECK-NEXT: [[CONV:%.*]] = sext i16 [[TMP0]] to i32
313+
; CHECK-NEXT: ret i32 [[CONV]]
314+
;
315+
entry:
316+
%sub = sub i32 0, %x
317+
%and = and i32 %sub, %x
318+
%mul = mul i32 %and, 72416175
319+
%shr = lshr i32 %mul, 26
320+
%idxprom = zext i32 %shr to i64
321+
%arrayidx = getelementptr inbounds [64 x i8], ptr @ctz2.table, i64 0, i64 %idxprom
322+
%0 = load i16, ptr %arrayidx, align 2
323+
%conv = sext i16 %0 to i32
324+
ret i32 %conv
325+
}

llvm/test/Transforms/PhaseOrdering/lower-table-based-cttz.ll

Lines changed: 37 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,6 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
2+
; RUN: opt -O3 -S < %s | FileCheck %s
3+
14
;; This tests lowering of the implementations of table-based ctz
25
;; algorithm to the llvm.cttz instruction in the -O3 case.
36

@@ -13,13 +16,17 @@
1316
;; }
1417
;; Compiled as: clang -O3 test.c -S -emit-llvm -Xclang -disable-llvm-optzns
1518

16-
; RUN: opt -O3 -S < %s | FileCheck %s
17-
18-
; CHECK: call range(i32 0, 33) i32 @llvm.cttz.i32
19-
2019
@ctz1.table = internal constant [32 x i8] c"\00\01\1C\02\1D\0E\18\03\1E\16\14\0F\19\11\04\08\1F\1B\0D\17\15\13\10\07\1A\0C\12\06\0B\05\0A\09", align 16
2120

22-
define i32 @ctz1(i32 noundef %x) {
21+
define i32 @ctz(i32 noundef %x) {
22+
; CHECK-LABEL: define range(i32 0, 32) i32 @ctz(
23+
; CHECK-SAME: i32 noundef [[X:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] {
24+
; CHECK-NEXT: [[ENTRY:.*:]]
25+
; CHECK-NEXT: [[TMP0:%.*]] = tail call range(i32 0, 33) i32 @llvm.cttz.i32(i32 [[X]], i1 true)
26+
; CHECK-NEXT: [[TMP1:%.*]] = icmp eq i32 [[X]], 0
27+
; CHECK-NEXT: [[CONV:%.*]] = select i1 [[TMP1]], i32 0, i32 [[TMP0]]
28+
; CHECK-NEXT: ret i32 [[CONV]]
29+
;
2330
entry:
2431
%x.addr = alloca i32, align 4
2532
store i32 %x, ptr %x.addr, align 4
@@ -35,3 +42,28 @@ entry:
3542
%conv = sext i8 %2 to i32
3643
ret i32 %conv
3744
}
45+
46+
define i32 @ctz_nonarraygep(i32 noundef %x) {
47+
; CHECK-LABEL: define range(i32 0, 32) i32 @ctz_nonarraygep(
48+
; CHECK-SAME: i32 noundef [[X:%.*]]) local_unnamed_addr #[[ATTR0]] {
49+
; CHECK-NEXT: [[ENTRY:.*:]]
50+
; CHECK-NEXT: [[TMP0:%.*]] = tail call range(i32 0, 33) i32 @llvm.cttz.i32(i32 [[X]], i1 true)
51+
; CHECK-NEXT: [[TMP1:%.*]] = icmp eq i32 [[X]], 0
52+
; CHECK-NEXT: [[CONV:%.*]] = select i1 [[TMP1]], i32 0, i32 [[TMP0]]
53+
; CHECK-NEXT: ret i32 [[CONV]]
54+
;
55+
entry:
56+
%x.addr = alloca i32, align 4
57+
store i32 %x, ptr %x.addr, align 4
58+
%0 = load i32, ptr %x.addr, align 4
59+
%1 = load i32, ptr %x.addr, align 4
60+
%sub = sub i32 0, %1
61+
%and = and i32 %0, %sub
62+
%mul = mul i32 %and, 125613361
63+
%shr = lshr i32 %mul, 27
64+
%idxprom = zext i32 %shr to i64
65+
%arrayidx = getelementptr inbounds i8, ptr @ctz1.table, i64 %idxprom
66+
%2 = load i8, ptr %arrayidx, align 1
67+
%conv = sext i8 %2 to i32
68+
ret i32 %conv
69+
}

0 commit comments

Comments
 (0)