diff --git a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp index 5ee3bb1abe86e..b60b7c7c352af 100644 --- a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp +++ b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp @@ -2997,6 +2997,78 @@ Value *InstCombiner::getFreelyInvertedImpl(Value *V, bool WillInvertAllUses, return nullptr; } +/// Accumulate constant indices from GEPs with all-constant indices, then +/// check if the outermost GEP (with one variable index) is flattenable. +/// Matches and returns true for multi-dimensional array geps with only one +/// variable index. The pointer could also be another gep with all constant +/// indices. For ex: +/// -getelementptr [9 x [9 x [9 x i32]]], ptr @arr, i64 0, i64 %i, i64 2, i64 3 +/// -getelementptr [9 x [9 x [9 x i32]]], +/// , i64 0, i64 %i, i64 2, i64 3 +static bool ismultiDimGepFlattenable(const GetElementPtrInst &GEP) { + // Collect all indices, outermost last + SmallVector GEPChain; + const Value *Base = &GEP; + + // Go over GEPs with all constant indices + while (auto *CurGep = dyn_cast(Base)) { + bool AllConst = true; + for (unsigned I = 1; I < CurGep->getNumOperands(); ++I) + if (!isa(CurGep->getOperand(I))) + AllConst = false; + if (!AllConst) + break; + GEPChain.push_back(CurGep); + Base = CurGep->getOperand(0)->stripPointerCasts(); + } + + // Accumulate all indices from innermost to outermost + SmallVector Indices; + for (int I = GEPChain.size() - 1; I >= 0; --I) { + const GEPOperator *GO = GEPChain[I]; + for (unsigned J = 1; J < GO->getNumOperands(); ++J) + Indices.push_back(GO->getOperand(J)); + } + + // Add indices from the main GEP (skip pointer operand) + for (unsigned J = 1; J < GEP.getNumOperands(); ++J) + Indices.push_back(GEP.getOperand(J)); + + if (Indices.empty()) + return false; + + // First index must be constant zero (array base) + if (!isa(Indices[0]) || !cast(Indices[0])->isZero()) + return false; + + unsigned NumDims = Indices.size() - 1; + + // Limit lowering for arrays with 3 or more dimensions + if (NumDims < 3) + return false; + + // Check that it's arrays all the way + Type *CurTy = GEP.getSourceElementType(); + unsigned NumVar = 0; + for (unsigned I = 1; I < Indices.size(); ++I) { + auto *ArrTy = dyn_cast(CurTy); + if (!ArrTy) + return false; + if (!isa(Indices[I])) + ++NumVar; + CurTy = ArrTy->getElementType(); + } + + // Limit lowering only for one variable index + if (NumVar != 1) + return false; + + if (!CurTy->isIntegerTy() || CurTy->getIntegerBitWidth() > 128) + return false; + + return true; +} + /// Return true if we should canonicalize the gep to an i8 ptradd. static bool shouldCanonicalizeGEPToPtrAdd(GetElementPtrInst &GEP) { Value *PtrOp = GEP.getOperand(0); @@ -3017,6 +3089,9 @@ static bool shouldCanonicalizeGEPToPtrAdd(GetElementPtrInst &GEP) { m_Shl(m_Value(), m_ConstantInt()))))) return true; + if (ismultiDimGepFlattenable(GEP)) + return true; + // gep (gep %p, C1), %x, C2 is expanded so the two constants can // possibly be merged together. auto PtrOpGep = dyn_cast(PtrOp); diff --git a/llvm/test/Transforms/InstCombine/canonicalize-gep-constglob.ll b/llvm/test/Transforms/InstCombine/canonicalize-gep-constglob.ll index 07c8a8c6b90e1..1a1b3d1a746ed 100644 --- a/llvm/test/Transforms/InstCombine/canonicalize-gep-constglob.ll +++ b/llvm/test/Transforms/InstCombine/canonicalize-gep-constglob.ll @@ -2,6 +2,9 @@ ; RUN: opt < %s -passes=instcombine -S | FileCheck %s @glob = internal global [10 x [10 x [10 x i32]]] zeroinitializer +@glob_i8 = internal global [10 x [10 x [10 x i8]]] zeroinitializer +@glob_i16 = internal global [10 x [10 x [10 x i16]]] zeroinitializer +@glob_i64 = internal global [10 x [10 x [10 x i64]]] zeroinitializer define ptr @x12(i64 %x) { ; CHECK-LABEL: define ptr @x12( @@ -78,3 +81,61 @@ entry: %c = add i32 %a, %b ret i32 %c } + +define i8* @flat_gep8(i64 %x) { +; CHECK-LABEL: define ptr @flat_gep8( +; CHECK-SAME: i64 [[X:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[GEP_IDX:%.*]] = mul i64 [[X]], 100 +; CHECK-NEXT: [[TMP0:%.*]] = getelementptr i8, ptr @glob_i8, i64 [[GEP_IDX]] +; CHECK-NEXT: [[GEP:%.*]] = getelementptr i8, ptr [[TMP0]], i64 35 +; CHECK-NEXT: ret ptr [[GEP]] +; +entry: + %gep = getelementptr [10 x [10 x [10 x i8]]], ptr @glob_i8, i64 0, i64 %x, i64 3, i64 5 + ret ptr %gep +} + +define i16* @flat_gep16(i64 %x) { +; CHECK-LABEL: define ptr @flat_gep16( +; CHECK-SAME: i64 [[X:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[GEP_IDX:%.*]] = mul i64 [[X]], 200 +; CHECK-NEXT: [[TMP0:%.*]] = getelementptr i8, ptr @glob_i16, i64 [[GEP_IDX]] +; CHECK-NEXT: [[GEP:%.*]] = getelementptr i8, ptr [[TMP0]], i64 46 +; CHECK-NEXT: ret ptr [[GEP]] +; +entry: + %gep = getelementptr [10 x [10 x [10 x i16]]], ptr @glob_i16, i64 0, i64 %x, i64 2, i64 3 + ret ptr %gep +} + +define i32* @flat_gep(i64 %x) { +; CHECK-LABEL: define ptr @flat_gep( +; CHECK-SAME: i64 [[X:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[GEP_IDX:%.*]] = mul i64 [[X]], 400 +; CHECK-NEXT: [[TMP0:%.*]] = getelementptr i8, ptr @glob, i64 [[GEP_IDX]] +; CHECK-NEXT: [[GEP:%.*]] = getelementptr i8, ptr [[TMP0]], i64 100 +; CHECK-NEXT: ret ptr [[GEP]] +; +entry: + %gep = getelementptr [10 x [10 x [10 x i32]]], ptr @glob, i64 0, i64 %x, i64 2, i64 5 + ret ptr %gep +} + +define i64* @flat_gep64(i64 %x) { +; CHECK-LABEL: define ptr @flat_gep64( +; CHECK-SAME: i64 [[X:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[GEP_IDX:%.*]] = mul i64 [[X]], 800 +; CHECK-NEXT: [[TMP0:%.*]] = getelementptr i8, ptr @glob_i64, i64 [[GEP_IDX]] +; CHECK-NEXT: [[GEP:%.*]] = getelementptr i8, ptr [[TMP0]], i64 288 +; CHECK-NEXT: ret ptr [[GEP]] +; +entry: + %gep = getelementptr [10 x [10 x [10 x i64]]], ptr @glob_i64, i64 0, i64 %x, i64 3, i64 6 + ret ptr %gep +} + +