-
Notifications
You must be signed in to change notification settings - Fork 14.6k
[InstCombine] Make foldCmpLoadFromIndexedGlobal more resiliant to non-array geps. #150639
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Conversation
…-array geps. My understanding is that gep [n x i8] and gep i8 can be treated equivalently - the array type conveys no extra information and could be removed. This goes through foldCmpLoadFromIndexedGlobal and tries to make it work for non-array gep types, so long as the index type still matches the array being loaded.
@llvm/pr-subscribers-llvm-transforms Author: David Green (davemgreen) ChangesMy understanding is that gep [n x i8] and gep i8 can be treated equivalently - the array type conveys no extra information and could be removed. This goes through foldCmpLoadFromIndexedGlobal and tries to make it work for non-array gep types, so long as the index type still matches the array being loaded. Full diff: https://github.com/llvm/llvm-project/pull/150639.diff 2 Files Affected:
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
index da9b12686a8f1..bd96edab201b5 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
@@ -113,10 +113,16 @@ Instruction *InstCombinerImpl::foldCmpLoadFromIndexedGlobal(
LoadInst *LI, GetElementPtrInst *GEP, GlobalVariable *GV, CmpInst &ICI,
ConstantInt *AndCst) {
if (LI->isVolatile() || LI->getType() != GEP->getResultElementType() ||
- GV->getValueType() != GEP->getSourceElementType() || !GV->isConstant() ||
+ !GV->getValueType()->isArrayTy() || !GV->isConstant() ||
!GV->hasDefinitiveInitializer())
return nullptr;
+ Type *GEPSrcEltTy = GEP->getSourceElementType();
+ if (GEPSrcEltTy->isArrayTy())
+ GEPSrcEltTy = GEPSrcEltTy->getArrayElementType();
+ if (GV->getValueType()->getArrayElementType() != GEPSrcEltTy)
+ return nullptr;
+
Constant *Init = GV->getInitializer();
if (!isa<ConstantArray>(Init) && !isa<ConstantDataArray>(Init))
return nullptr;
@@ -127,21 +133,27 @@ Instruction *InstCombinerImpl::foldCmpLoadFromIndexedGlobal(
return nullptr;
// There are many forms of this optimization we can handle, for now, just do
- // the simple index into a single-dimensional array.
+ // the simple index into a single-dimensional array or elements of equal size.
//
- // Require: GEP GV, 0, i {{, constant indices}}
- if (GEP->getNumOperands() < 3 || !isa<ConstantInt>(GEP->getOperand(1)) ||
- !cast<ConstantInt>(GEP->getOperand(1))->isZero() ||
- isa<Constant>(GEP->getOperand(2)))
+ // Require: GEP [n x i8] GV, 0, Idx {{, constant indices}}
+ // Or: GEP i8 GV, Idx
+
+ SmallVector<unsigned, 4> LaterIndices;
+ unsigned GEPIdxOp = 1;
+ if (GEP->getSourceElementType()->isArrayTy()) {
+ GEPIdxOp = 2;
+ if (!match(GEP->getOperand(1), m_ZeroInt()))
+ return nullptr;
+ }
+ if (GEP->getNumOperands() < GEPIdxOp + 1 ||
+ isa<Constant>(GEP->getOperand(GEPIdxOp)))
return nullptr;
// Check that indices after the variable are constants and in-range for the
// type they index. Collect the indices. This is typically for arrays of
// structs.
- SmallVector<unsigned, 4> LaterIndices;
-
Type *EltTy = Init->getType()->getArrayElementType();
- for (unsigned i = 3, e = GEP->getNumOperands(); i != e; ++i) {
+ for (unsigned i = GEPIdxOp + 1, e = GEP->getNumOperands(); i != e; ++i) {
ConstantInt *Idx = dyn_cast<ConstantInt>(GEP->getOperand(i));
if (!Idx)
return nullptr; // Variable index.
@@ -290,7 +302,7 @@ Instruction *InstCombinerImpl::foldCmpLoadFromIndexedGlobal(
// Now that we've scanned the entire array, emit our new comparison(s). We
// order the state machines in complexity of the generated code.
- Value *Idx = GEP->getOperand(2);
+ Value *Idx = GEP->getOperand(GEPIdxOp);
// If the index is larger than the pointer offset size of the target, truncate
// the index down like the GEP would do implicitly. We don't have to do this
diff --git a/llvm/test/Transforms/InstCombine/load-cmp.ll b/llvm/test/Transforms/InstCombine/load-cmp.ll
index 12be81b8f815d..485da6d2aaec4 100644
--- a/llvm/test/Transforms/InstCombine/load-cmp.ll
+++ b/llvm/test/Transforms/InstCombine/load-cmp.ll
@@ -68,7 +68,6 @@ define i1 @test1_noinbounds_as1(i32 %x) {
%q = load i16, ptr addrspace(1) %p
%r = icmp eq i16 %q, 0
ret i1 %r
-
}
define i1 @test1_noinbounds_as2(i64 %x) {
@@ -81,7 +80,17 @@ define i1 @test1_noinbounds_as2(i64 %x) {
%q = load i16, ptr addrspace(2) %p
%r = icmp eq i16 %q, 0
ret i1 %r
+}
+define i1 @test1_noarrayty(i32 %X) {
+; CHECK-LABEL: @test1_noarrayty(
+; CHECK-NEXT: [[R:%.*]] = icmp eq i32 [[X:%.*]], 9
+; CHECK-NEXT: ret i1 [[R]]
+;
+ %P = getelementptr inbounds i16, ptr @G16, i32 %X
+ %Q = load i16, ptr %P
+ %R = icmp eq i16 %Q, 0
+ ret i1 %R
}
define i1 @test2(i32 %X) {
@@ -104,7 +113,17 @@ define i1 @test3(i32 %X) {
%Q = load double, ptr %P
%R = fcmp oeq double %Q, 1.0
ret i1 %R
+}
+define i1 @test3_noarrayty(i32 %X) {
+; CHECK-LABEL: @test3_noarrayty(
+; CHECK-NEXT: [[R:%.*]] = icmp eq i32 [[X:%.*]], 1
+; CHECK-NEXT: ret i1 [[R]]
+;
+ %P = getelementptr inbounds double, ptr @GD, i32 %X
+ %Q = load double, ptr %P
+ %R = fcmp oeq double %Q, 1.0
+ ret i1 %R
}
define i1 @test4(i32 %X) {
@@ -333,6 +352,17 @@ define i1 @test10_struct_arr_noinbounds_i64(i64 %x) {
ret i1 %r
}
+define i1 @test10_struct_arr_noarrayty(i32 %x) {
+; CHECK-LABEL: @test10_struct_arr_noarrayty(
+; CHECK-NEXT: [[R:%.*]] = icmp ne i32 [[X:%.*]], 1
+; CHECK-NEXT: ret i1 [[R]]
+;
+ %p = getelementptr inbounds %Foo, ptr @GStructArr, i32 %x, i32 2
+ %q = load i32, ptr %p
+ %r = icmp eq i32 %q, 9
+ ret i1 %r
+}
+
@table = internal constant [2 x ptr] [ptr @g, ptr getelementptr (i8, ptr @g, i64 4)], align 16
@g = external global [2 x i32]
|
IIRC #67093 is our final goal (i.e., cmp + load + gep i8/ptradd). But it looks stalled. |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LG. It looks like we are still far from the type-independent solution. I think this patch may be a simple mitigation. As for regressions caused by this patch, we may avoid emitting multiple icmps when the load has other users.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I'm fine with this as well. This is not ideal, but still an improvement.
// Require: GEP [n x i8] GV, 0, Idx {{, constant indices}} | ||
// Or: GEP i8 GV, Idx | ||
|
||
SmallVector<unsigned, 4> LaterIndices; |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Why move the declaration of LaterIndices?
My understanding is that gep [n x i8] and gep i8 can be treated equivalently - the array type conveys no extra information and could be removed. This goes through foldCmpLoadFromIndexedGlobal and tries to make it work for non-array gep types, so long as the index type still matches the array being loaded.