Skip to content

[InstCombine] Lower multi-dimensional GEP to ptradd #150383

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 3 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
75 changes: 75 additions & 0 deletions llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2997,6 +2997,78 @@ Value *InstCombiner::getFreelyInvertedImpl(Value *V, bool WillInvertAllUses,
return nullptr;
}

/// Accumulate constant indices from GEPs with all-constant indices, then
/// check if the outermost GEP (with one variable index) is flattenable.
/// Matches and returns true for multi-dimensional array geps with only one
/// variable index. The pointer could also be another gep with all constant
/// indices. For ex:
/// -getelementptr [9 x [9 x [9 x i32]]], ptr @arr, i64 0, i64 %i, i64 2, i64 3
/// -getelementptr [9 x [9 x [9 x i32]]],
/// <another gep>, i64 0, i64 %i, i64 2, i64 3
static bool ismultiDimGepFlattenable(const GetElementPtrInst &GEP) {
// Collect all indices, outermost last
SmallVector<const GEPOperator *, 4> GEPChain;
const Value *Base = &GEP;

// Go over GEPs with all constant indices
while (auto *CurGep = dyn_cast<GEPOperator>(Base)) {
bool AllConst = true;
for (unsigned I = 1; I < CurGep->getNumOperands(); ++I)
if (!isa<ConstantInt>(CurGep->getOperand(I)))
AllConst = false;
if (!AllConst)
break;
GEPChain.push_back(CurGep);
Base = CurGep->getOperand(0)->stripPointerCasts();
}

// Accumulate all indices from innermost to outermost
SmallVector<Value *, 8> Indices;
for (int I = GEPChain.size() - 1; I >= 0; --I) {
const GEPOperator *GO = GEPChain[I];
for (unsigned J = 1; J < GO->getNumOperands(); ++J)
Indices.push_back(GO->getOperand(J));
}

// Add indices from the main GEP (skip pointer operand)
for (unsigned J = 1; J < GEP.getNumOperands(); ++J)
Indices.push_back(GEP.getOperand(J));

if (Indices.empty())
return false;

// First index must be constant zero (array base)
if (!isa<ConstantInt>(Indices[0]) || !cast<ConstantInt>(Indices[0])->isZero())
return false;

unsigned NumDims = Indices.size() - 1;

// Limit lowering for arrays with 3 or more dimensions
if (NumDims < 3)
return false;

// Check that it's arrays all the way
Type *CurTy = GEP.getSourceElementType();
unsigned NumVar = 0;
for (unsigned I = 1; I < Indices.size(); ++I) {
auto *ArrTy = dyn_cast<ArrayType>(CurTy);
if (!ArrTy)
return false;
if (!isa<ConstantInt>(Indices[I]))
++NumVar;
CurTy = ArrTy->getElementType();
}

// Limit lowering only for one variable index
if (NumVar != 1)
return false;

if (!CurTy->isIntegerTy() || CurTy->getIntegerBitWidth() > 128)
return false;

return true;
}

/// Return true if we should canonicalize the gep to an i8 ptradd.
static bool shouldCanonicalizeGEPToPtrAdd(GetElementPtrInst &GEP) {
Value *PtrOp = GEP.getOperand(0);
Expand All @@ -3017,6 +3089,9 @@ static bool shouldCanonicalizeGEPToPtrAdd(GetElementPtrInst &GEP) {
m_Shl(m_Value(), m_ConstantInt())))))
return true;

if (ismultiDimGepFlattenable(GEP))
return true;

// gep (gep %p, C1), %x, C2 is expanded so the two constants can
// possibly be merged together.
auto PtrOpGep = dyn_cast<GEPOperator>(PtrOp);
Expand Down
61 changes: 61 additions & 0 deletions llvm/test/Transforms/InstCombine/canonicalize-gep-constglob.ll
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,9 @@
; RUN: opt < %s -passes=instcombine -S | FileCheck %s

@glob = internal global [10 x [10 x [10 x i32]]] zeroinitializer
@glob_i8 = internal global [10 x [10 x [10 x i8]]] zeroinitializer
@glob_i16 = internal global [10 x [10 x [10 x i16]]] zeroinitializer
@glob_i64 = internal global [10 x [10 x [10 x i64]]] zeroinitializer

define ptr @x12(i64 %x) {
; CHECK-LABEL: define ptr @x12(
Expand Down Expand Up @@ -78,3 +81,61 @@ entry:
%c = add i32 %a, %b
ret i32 %c
}

define i8* @flat_gep8(i64 %x) {
; CHECK-LABEL: define ptr @flat_gep8(
; CHECK-SAME: i64 [[X:%.*]]) {
; CHECK-NEXT: entry:
; CHECK-NEXT: [[GEP_IDX:%.*]] = mul i64 [[X]], 100
; CHECK-NEXT: [[TMP0:%.*]] = getelementptr i8, ptr @glob_i8, i64 [[GEP_IDX]]
; CHECK-NEXT: [[GEP:%.*]] = getelementptr i8, ptr [[TMP0]], i64 35
; CHECK-NEXT: ret ptr [[GEP]]
;
entry:
%gep = getelementptr [10 x [10 x [10 x i8]]], ptr @glob_i8, i64 0, i64 %x, i64 3, i64 5
ret ptr %gep
}

define i16* @flat_gep16(i64 %x) {
; CHECK-LABEL: define ptr @flat_gep16(
; CHECK-SAME: i64 [[X:%.*]]) {
; CHECK-NEXT: entry:
; CHECK-NEXT: [[GEP_IDX:%.*]] = mul i64 [[X]], 200
; CHECK-NEXT: [[TMP0:%.*]] = getelementptr i8, ptr @glob_i16, i64 [[GEP_IDX]]
; CHECK-NEXT: [[GEP:%.*]] = getelementptr i8, ptr [[TMP0]], i64 46
; CHECK-NEXT: ret ptr [[GEP]]
;
entry:
%gep = getelementptr [10 x [10 x [10 x i16]]], ptr @glob_i16, i64 0, i64 %x, i64 2, i64 3
ret ptr %gep
}

define i32* @flat_gep(i64 %x) {
; CHECK-LABEL: define ptr @flat_gep(
; CHECK-SAME: i64 [[X:%.*]]) {
; CHECK-NEXT: entry:
; CHECK-NEXT: [[GEP_IDX:%.*]] = mul i64 [[X]], 400
; CHECK-NEXT: [[TMP0:%.*]] = getelementptr i8, ptr @glob, i64 [[GEP_IDX]]
; CHECK-NEXT: [[GEP:%.*]] = getelementptr i8, ptr [[TMP0]], i64 100
; CHECK-NEXT: ret ptr [[GEP]]
;
entry:
%gep = getelementptr [10 x [10 x [10 x i32]]], ptr @glob, i64 0, i64 %x, i64 2, i64 5
ret ptr %gep
}

define i64* @flat_gep64(i64 %x) {
; CHECK-LABEL: define ptr @flat_gep64(
; CHECK-SAME: i64 [[X:%.*]]) {
; CHECK-NEXT: entry:
; CHECK-NEXT: [[GEP_IDX:%.*]] = mul i64 [[X]], 800
; CHECK-NEXT: [[TMP0:%.*]] = getelementptr i8, ptr @glob_i64, i64 [[GEP_IDX]]
; CHECK-NEXT: [[GEP:%.*]] = getelementptr i8, ptr [[TMP0]], i64 288
; CHECK-NEXT: ret ptr [[GEP]]
;
entry:
%gep = getelementptr [10 x [10 x [10 x i64]]], ptr @glob_i64, i64 0, i64 %x, i64 3, i64 6
ret ptr %gep
}