From 269f0ddde13fb6b95bcf530cf071ceaca109f037 Mon Sep 17 00:00:00 2001 From: Usha Gupta Date: Thu, 24 Jul 2025 07:26:14 +0000 Subject: [PATCH 1/3] [InstCombine] Lower multi-dimensional GEP to ptradd --- llvm/lib/Transforms/InstCombine/InstructionCombining.cpp | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp index 5ee3bb1abe86e..a62a462f26918 100644 --- a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp +++ b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp @@ -3017,6 +3017,15 @@ static bool shouldCanonicalizeGEPToPtrAdd(GetElementPtrInst &GEP) { m_Shl(m_Value(), m_ConstantInt()))))) return true; + // Flatten multidimensional GEPs with one variable index. + unsigned NumVarIndices = 0; + for (unsigned i = 1; i < GEP.getNumOperands(); ++i) { + if (!isa(GEP.getOperand(i))) + ++NumVarIndices; + } + if (NumVarIndices == 1) + return true; + // gep (gep %p, C1), %x, C2 is expanded so the two constants can // possibly be merged together. auto PtrOpGep = dyn_cast(PtrOp); From 4c121404369c094a45543977e96c9e29469a0c6a Mon Sep 17 00:00:00 2001 From: Usha Gupta Date: Fri, 25 Jul 2025 19:05:05 +0000 Subject: [PATCH 2/3] Add more constraints for handing multi-dimensional geps for global arrays --- .../InstCombine/InstructionCombining.cpp | 45 +++++++++++--- .../InstCombine/canonicalize-gep-constglob.ll | 61 +++++++++++++++++++ 2 files changed, 99 insertions(+), 7 deletions(-) diff --git a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp index a62a462f26918..9909a6901b63f 100644 --- a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp +++ b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp @@ -2997,6 +2997,43 @@ Value *InstCombiner::getFreelyInvertedImpl(Value *V, bool WillInvertAllUses, return nullptr; } +/// Return true if we should lower multi-dimensional geps +static bool ismultiDimGep(GetElementPtrInst &GEP) { + // Limit handling to only 3D and 4D arrays with integer types. + // getelementptr [9 x [9 x [9 x i32]]], ptr @arr, i64 0, i64 %i, i64 2, i64 3 + unsigned NumOps = GEP.getNumOperands(); + + // First index must be constant zero (array base) + if (!isa(GEP.getOperand(1)) || + !cast(GEP.getOperand(1))->isZero()) + return false; + + // Limit lowering for arrays with 3 or more dimensions + if (NumOps < 5) + return false; + + // Check that it's arrays all the way + Type *CurTy = GEP.getSourceElementType(); + unsigned NumVar = 0; + for (unsigned I = 2; I < NumOps; ++I) { + auto *ArrTy = dyn_cast(CurTy); + if (!ArrTy) + return false; + if (!isa(GEP.getOperand(I))) + ++NumVar; + CurTy = ArrTy->getElementType(); + } + + // Limit lowering only for one variable index + if (NumVar != 1) + return false; + + if (!CurTy->isIntegerTy() || CurTy->getIntegerBitWidth() > 128) + return false; + + return true; +} + /// Return true if we should canonicalize the gep to an i8 ptradd. static bool shouldCanonicalizeGEPToPtrAdd(GetElementPtrInst &GEP) { Value *PtrOp = GEP.getOperand(0); @@ -3017,13 +3054,7 @@ static bool shouldCanonicalizeGEPToPtrAdd(GetElementPtrInst &GEP) { m_Shl(m_Value(), m_ConstantInt()))))) return true; - // Flatten multidimensional GEPs with one variable index. - unsigned NumVarIndices = 0; - for (unsigned i = 1; i < GEP.getNumOperands(); ++i) { - if (!isa(GEP.getOperand(i))) - ++NumVarIndices; - } - if (NumVarIndices == 1) + if (ismultiDimGep(GEP)) return true; // gep (gep %p, C1), %x, C2 is expanded so the two constants can diff --git a/llvm/test/Transforms/InstCombine/canonicalize-gep-constglob.ll b/llvm/test/Transforms/InstCombine/canonicalize-gep-constglob.ll index 07c8a8c6b90e1..1a1b3d1a746ed 100644 --- a/llvm/test/Transforms/InstCombine/canonicalize-gep-constglob.ll +++ b/llvm/test/Transforms/InstCombine/canonicalize-gep-constglob.ll @@ -2,6 +2,9 @@ ; RUN: opt < %s -passes=instcombine -S | FileCheck %s @glob = internal global [10 x [10 x [10 x i32]]] zeroinitializer +@glob_i8 = internal global [10 x [10 x [10 x i8]]] zeroinitializer +@glob_i16 = internal global [10 x [10 x [10 x i16]]] zeroinitializer +@glob_i64 = internal global [10 x [10 x [10 x i64]]] zeroinitializer define ptr @x12(i64 %x) { ; CHECK-LABEL: define ptr @x12( @@ -78,3 +81,61 @@ entry: %c = add i32 %a, %b ret i32 %c } + +define i8* @flat_gep8(i64 %x) { +; CHECK-LABEL: define ptr @flat_gep8( +; CHECK-SAME: i64 [[X:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[GEP_IDX:%.*]] = mul i64 [[X]], 100 +; CHECK-NEXT: [[TMP0:%.*]] = getelementptr i8, ptr @glob_i8, i64 [[GEP_IDX]] +; CHECK-NEXT: [[GEP:%.*]] = getelementptr i8, ptr [[TMP0]], i64 35 +; CHECK-NEXT: ret ptr [[GEP]] +; +entry: + %gep = getelementptr [10 x [10 x [10 x i8]]], ptr @glob_i8, i64 0, i64 %x, i64 3, i64 5 + ret ptr %gep +} + +define i16* @flat_gep16(i64 %x) { +; CHECK-LABEL: define ptr @flat_gep16( +; CHECK-SAME: i64 [[X:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[GEP_IDX:%.*]] = mul i64 [[X]], 200 +; CHECK-NEXT: [[TMP0:%.*]] = getelementptr i8, ptr @glob_i16, i64 [[GEP_IDX]] +; CHECK-NEXT: [[GEP:%.*]] = getelementptr i8, ptr [[TMP0]], i64 46 +; CHECK-NEXT: ret ptr [[GEP]] +; +entry: + %gep = getelementptr [10 x [10 x [10 x i16]]], ptr @glob_i16, i64 0, i64 %x, i64 2, i64 3 + ret ptr %gep +} + +define i32* @flat_gep(i64 %x) { +; CHECK-LABEL: define ptr @flat_gep( +; CHECK-SAME: i64 [[X:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[GEP_IDX:%.*]] = mul i64 [[X]], 400 +; CHECK-NEXT: [[TMP0:%.*]] = getelementptr i8, ptr @glob, i64 [[GEP_IDX]] +; CHECK-NEXT: [[GEP:%.*]] = getelementptr i8, ptr [[TMP0]], i64 100 +; CHECK-NEXT: ret ptr [[GEP]] +; +entry: + %gep = getelementptr [10 x [10 x [10 x i32]]], ptr @glob, i64 0, i64 %x, i64 2, i64 5 + ret ptr %gep +} + +define i64* @flat_gep64(i64 %x) { +; CHECK-LABEL: define ptr @flat_gep64( +; CHECK-SAME: i64 [[X:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[GEP_IDX:%.*]] = mul i64 [[X]], 800 +; CHECK-NEXT: [[TMP0:%.*]] = getelementptr i8, ptr @glob_i64, i64 [[GEP_IDX]] +; CHECK-NEXT: [[GEP:%.*]] = getelementptr i8, ptr [[TMP0]], i64 288 +; CHECK-NEXT: ret ptr [[GEP]] +; +entry: + %gep = getelementptr [10 x [10 x [10 x i64]]], ptr @glob_i64, i64 0, i64 %x, i64 3, i64 6 + ret ptr %gep +} + + From 82e440d2125223817ad8b051b750e0473458fda4 Mon Sep 17 00:00:00 2001 From: Usha Gupta Date: Fri, 1 Aug 2025 15:41:24 +0000 Subject: [PATCH 3/3] Handle Nested gep with one variable index in the outer gep --- .../InstCombine/InstructionCombining.cpp | 57 +++++++++++++++---- 1 file changed, 46 insertions(+), 11 deletions(-) diff --git a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp index 9909a6901b63f..b60b7c7c352af 100644 --- a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp +++ b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp @@ -2997,29 +2997,64 @@ Value *InstCombiner::getFreelyInvertedImpl(Value *V, bool WillInvertAllUses, return nullptr; } -/// Return true if we should lower multi-dimensional geps -static bool ismultiDimGep(GetElementPtrInst &GEP) { - // Limit handling to only 3D and 4D arrays with integer types. - // getelementptr [9 x [9 x [9 x i32]]], ptr @arr, i64 0, i64 %i, i64 2, i64 3 - unsigned NumOps = GEP.getNumOperands(); +/// Accumulate constant indices from GEPs with all-constant indices, then +/// check if the outermost GEP (with one variable index) is flattenable. +/// Matches and returns true for multi-dimensional array geps with only one +/// variable index. The pointer could also be another gep with all constant +/// indices. For ex: +/// -getelementptr [9 x [9 x [9 x i32]]], ptr @arr, i64 0, i64 %i, i64 2, i64 3 +/// -getelementptr [9 x [9 x [9 x i32]]], +/// , i64 0, i64 %i, i64 2, i64 3 +static bool ismultiDimGepFlattenable(const GetElementPtrInst &GEP) { + // Collect all indices, outermost last + SmallVector GEPChain; + const Value *Base = &GEP; + + // Go over GEPs with all constant indices + while (auto *CurGep = dyn_cast(Base)) { + bool AllConst = true; + for (unsigned I = 1; I < CurGep->getNumOperands(); ++I) + if (!isa(CurGep->getOperand(I))) + AllConst = false; + if (!AllConst) + break; + GEPChain.push_back(CurGep); + Base = CurGep->getOperand(0)->stripPointerCasts(); + } + + // Accumulate all indices from innermost to outermost + SmallVector Indices; + for (int I = GEPChain.size() - 1; I >= 0; --I) { + const GEPOperator *GO = GEPChain[I]; + for (unsigned J = 1; J < GO->getNumOperands(); ++J) + Indices.push_back(GO->getOperand(J)); + } + + // Add indices from the main GEP (skip pointer operand) + for (unsigned J = 1; J < GEP.getNumOperands(); ++J) + Indices.push_back(GEP.getOperand(J)); + + if (Indices.empty()) + return false; // First index must be constant zero (array base) - if (!isa(GEP.getOperand(1)) || - !cast(GEP.getOperand(1))->isZero()) + if (!isa(Indices[0]) || !cast(Indices[0])->isZero()) return false; + unsigned NumDims = Indices.size() - 1; + // Limit lowering for arrays with 3 or more dimensions - if (NumOps < 5) + if (NumDims < 3) return false; // Check that it's arrays all the way Type *CurTy = GEP.getSourceElementType(); unsigned NumVar = 0; - for (unsigned I = 2; I < NumOps; ++I) { + for (unsigned I = 1; I < Indices.size(); ++I) { auto *ArrTy = dyn_cast(CurTy); if (!ArrTy) return false; - if (!isa(GEP.getOperand(I))) + if (!isa(Indices[I])) ++NumVar; CurTy = ArrTy->getElementType(); } @@ -3054,7 +3089,7 @@ static bool shouldCanonicalizeGEPToPtrAdd(GetElementPtrInst &GEP) { m_Shl(m_Value(), m_ConstantInt()))))) return true; - if (ismultiDimGep(GEP)) + if (ismultiDimGepFlattenable(GEP)) return true; // gep (gep %p, C1), %x, C2 is expanded so the two constants can