-
Notifications
You must be signed in to change notification settings - Fork 14.7k
[AggressiveInstCombine] Make cttz fold more resiliant to non-array geps #150896
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
@llvm/pr-subscribers-llvm-transforms Author: David Green (davemgreen) ChangesSimilar to #150639 this fixes the AggressiveInstCombine fold for convert tables to cttz instructions if the gep types are not array types. i.e Full diff: https://github.com/llvm/llvm-project/pull/150896.diff 3 Files Affected:
diff --git a/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp b/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp
index 7af5ba4e0e103..975590214ffa6 100644
--- a/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp
+++ b/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp
@@ -547,14 +547,20 @@ static bool tryToRecognizeTableBasedCttz(Instruction &I) {
return false;
GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(LI->getPointerOperand());
- if (!GEP || !GEP->hasNoUnsignedSignedWrap() || GEP->getNumIndices() != 2)
+ if (!GEP || !GEP->hasNoUnsignedSignedWrap())
return false;
- if (!GEP->getSourceElementType()->isArrayTy())
- return false;
-
- uint64_t ArraySize = GEP->getSourceElementType()->getArrayNumElements();
- if (ArraySize != 32 && ArraySize != 64)
+ Type *GEPSrcEltTy = GEP->getSourceElementType();
+ Value *GepIdx;
+ if (GEP->getNumIndices() == 2) {
+ if (!GEPSrcEltTy->isArrayTy() ||
+ !match(GEP->idx_begin()->get(), m_ZeroInt()))
+ return false;
+ GEPSrcEltTy = GEPSrcEltTy->getArrayElementType();
+ GepIdx = std::next(GEP->idx_begin())->get();
+ } else if (GEP->getNumIndices() == 1)
+ GepIdx = GEP->idx_begin()->get();
+ else
return false;
GlobalVariable *GVTable = dyn_cast<GlobalVariable>(GEP->getPointerOperand());
@@ -563,21 +569,17 @@ static bool tryToRecognizeTableBasedCttz(Instruction &I) {
ConstantDataArray *ConstData =
dyn_cast<ConstantDataArray>(GVTable->getInitializer());
- if (!ConstData)
- return false;
-
- if (!match(GEP->idx_begin()->get(), m_ZeroInt()))
+ if (!ConstData || ConstData->getElementType() != GEPSrcEltTy)
return false;
- Value *Idx2 = std::next(GEP->idx_begin())->get();
Value *X1;
uint64_t MulConst, ShiftConst;
// FIXME: 64-bit targets have `i64` type for the GEP index, so this match will
// probably fail for other (e.g. 32-bit) targets.
- if (!match(Idx2, m_ZExtOrSelf(
- m_LShr(m_Mul(m_c_And(m_Neg(m_Value(X1)), m_Deferred(X1)),
- m_ConstantInt(MulConst)),
- m_ConstantInt(ShiftConst)))))
+ if (!match(GepIdx, m_ZExtOrSelf(m_LShr(
+ m_Mul(m_c_And(m_Neg(m_Value(X1)), m_Deferred(X1)),
+ m_ConstantInt(MulConst)),
+ m_ConstantInt(ShiftConst)))))
return false;
unsigned InputBits = X1->getType()->getScalarSizeInBits();
diff --git a/llvm/test/Transforms/AggressiveInstCombine/lower-table-based-cttz-basics.ll b/llvm/test/Transforms/AggressiveInstCombine/lower-table-based-cttz-basics.ll
index 4d571999df372..0bfa891a7887c 100644
--- a/llvm/test/Transforms/AggressiveInstCombine/lower-table-based-cttz-basics.ll
+++ b/llvm/test/Transforms/AggressiveInstCombine/lower-table-based-cttz-basics.ll
@@ -276,3 +276,50 @@ entry:
%0 = load i32, ptr %arrayidx, align 4
ret i32 %0
}
+
+define i32 @ctz1_with_i8_gep(i32 %x) {
+; CHECK-LABEL: @ctz1_with_i8_gep(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[TMP0:%.*]] = call i32 @llvm.cttz.i32(i32 [[X:%.*]], i1 true)
+; CHECK-NEXT: [[TMP1:%.*]] = icmp eq i32 [[X]], 0
+; CHECK-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 0, i32 [[TMP0]]
+; CHECK-NEXT: [[TMP3:%.*]] = trunc i32 [[TMP2]] to i8
+; CHECK-NEXT: [[CONV:%.*]] = zext i8 [[TMP3]] to i32
+; CHECK-NEXT: ret i32 [[CONV]]
+;
+entry:
+ %sub = sub i32 0, %x
+ %and = and i32 %sub, %x
+ %mul = mul i32 %and, 125613361
+ %shr = lshr i32 %mul, 27
+ %idxprom = zext i32 %shr to i64
+ %arrayidx = getelementptr inbounds i8, ptr @ctz7.table, i64 %idxprom
+ %0 = load i8, ptr %arrayidx, align 1
+ %conv = zext i8 %0 to i32
+ ret i32 %conv
+}
+
+define i32 @ctz2_with_i8_gep(i32 %x) {
+; CHECK-LABEL: @ctz2_with_i8_gep(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[SUB:%.*]] = sub i32 0, [[X:%.*]]
+; CHECK-NEXT: [[AND:%.*]] = and i32 [[SUB]], [[X]]
+; CHECK-NEXT: [[MUL:%.*]] = mul i32 [[AND]], 72416175
+; CHECK-NEXT: [[SHR:%.*]] = lshr i32 [[MUL]], 26
+; CHECK-NEXT: [[IDXPROM:%.*]] = zext i32 [[SHR]] to i64
+; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [64 x i8], ptr @ctz2.table, i64 0, i64 [[IDXPROM]]
+; CHECK-NEXT: [[TMP0:%.*]] = load i16, ptr [[ARRAYIDX]], align 2
+; CHECK-NEXT: [[CONV:%.*]] = sext i16 [[TMP0]] to i32
+; CHECK-NEXT: ret i32 [[CONV]]
+;
+entry:
+ %sub = sub i32 0, %x
+ %and = and i32 %sub, %x
+ %mul = mul i32 %and, 72416175
+ %shr = lshr i32 %mul, 26
+ %idxprom = zext i32 %shr to i64
+ %arrayidx = getelementptr inbounds [64 x i8], ptr @ctz2.table, i64 0, i64 %idxprom
+ %0 = load i16, ptr %arrayidx, align 2
+ %conv = sext i16 %0 to i32
+ ret i32 %conv
+}
diff --git a/llvm/test/Transforms/PhaseOrdering/lower-table-based-cttz.ll b/llvm/test/Transforms/PhaseOrdering/lower-table-based-cttz.ll
index 19fbc1f1ae64e..4455016c3e4a4 100644
--- a/llvm/test/Transforms/PhaseOrdering/lower-table-based-cttz.ll
+++ b/llvm/test/Transforms/PhaseOrdering/lower-table-based-cttz.ll
@@ -1,3 +1,6 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; RUN: opt -O3 -S < %s | FileCheck %s
+
;; This tests lowering of the implementations of table-based ctz
;; algorithm to the llvm.cttz instruction in the -O3 case.
@@ -13,13 +16,17 @@
;; }
;; Compiled as: clang -O3 test.c -S -emit-llvm -Xclang -disable-llvm-optzns
-; RUN: opt -O3 -S < %s | FileCheck %s
-
-; CHECK: call range(i32 0, 33) i32 @llvm.cttz.i32
-
@ctz1.table = internal constant [32 x i8] c"\00\01\1C\02\1D\0E\18\03\1E\16\14\0F\19\11\04\08\1F\1B\0D\17\15\13\10\07\1A\0C\12\06\0B\05\0A\09", align 16
-define i32 @ctz1(i32 noundef %x) {
+define i32 @ctz(i32 noundef %x) {
+; CHECK-LABEL: define range(i32 0, 32) i32 @ctz(
+; CHECK-SAME: i32 noundef [[X:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: [[TMP0:%.*]] = tail call range(i32 0, 33) i32 @llvm.cttz.i32(i32 [[X]], i1 true)
+; CHECK-NEXT: [[TMP1:%.*]] = icmp eq i32 [[X]], 0
+; CHECK-NEXT: [[CONV:%.*]] = select i1 [[TMP1]], i32 0, i32 [[TMP0]]
+; CHECK-NEXT: ret i32 [[CONV]]
+;
entry:
%x.addr = alloca i32, align 4
store i32 %x, ptr %x.addr, align 4
@@ -35,3 +42,28 @@ entry:
%conv = sext i8 %2 to i32
ret i32 %conv
}
+
+define i32 @ctz_nonarraygep(i32 noundef %x) {
+; CHECK-LABEL: define range(i32 0, 32) i32 @ctz_nonarraygep(
+; CHECK-SAME: i32 noundef [[X:%.*]]) local_unnamed_addr #[[ATTR0]] {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: [[TMP0:%.*]] = tail call range(i32 0, 33) i32 @llvm.cttz.i32(i32 [[X]], i1 true)
+; CHECK-NEXT: [[TMP1:%.*]] = icmp eq i32 [[X]], 0
+; CHECK-NEXT: [[CONV:%.*]] = select i1 [[TMP1]], i32 0, i32 [[TMP0]]
+; CHECK-NEXT: ret i32 [[CONV]]
+;
+entry:
+ %x.addr = alloca i32, align 4
+ store i32 %x, ptr %x.addr, align 4
+ %0 = load i32, ptr %x.addr, align 4
+ %1 = load i32, ptr %x.addr, align 4
+ %sub = sub i32 0, %1
+ %and = and i32 %0, %sub
+ %mul = mul i32 %and, 125613361
+ %shr = lshr i32 %mul, 27
+ %idxprom = zext i32 %shr to i64
+ %arrayidx = getelementptr inbounds i8, ptr @ctz1.table, i64 %idxprom
+ %2 = load i8, ptr %arrayidx, align 1
+ %conv = sext i8 %2 to i32
+ ret i32 %conv
+}
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Can you please directly port this one to be type independent (using ConstantFoldLoadFromConst)? It looks like it shouldn't be hard.
Well I wrote some changes and in the process got i16 working from i128 integers. This is the first time I've seen ConstantFoldLoadFromConst so let me know if you were thinking of something else. |
llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp
Outdated
Show resolved
Hide resolved
llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp
Outdated
Show resolved
Hide resolved
67cf707
to
fcc6d0c
Compare
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM, thanks!
llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp
Outdated
Show resolved
Hide resolved
llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp
Outdated
Show resolved
Hide resolved
fcc6d0c
to
cfa67d8
Compare
Similar to #150639 this fixes the AggressiveInstCombine fold for convert tables to cttz instructions if the gep types are not array types. i.e
gep i16 @glob, i64 %idx
instead ofgep [64 x i16] @glob, i64 0, i64 %idx
.