diff --git a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
index 5ee3bb1abe86e..b60b7c7c352af 100644
--- a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
@@ -2997,6 +2997,78 @@ Value *InstCombiner::getFreelyInvertedImpl(Value *V, bool WillInvertAllUses,
   return nullptr;
 }
 
+/// Accumulate constant indices from GEPs with all-constant indices, then
+/// check if the outermost GEP (with one variable index) is flattenable.
+/// Matches and returns true for multi-dimensional array geps with only one
+/// variable index. The pointer could also be another gep with all constant
+/// indices. For ex:
+/// -getelementptr [9 x [9 x [9 x i32]]], ptr @arr, i64 0, i64 %i, i64 2, i64 3
+/// -getelementptr [9 x [9 x [9 x i32]]],
+///                <another gep>, i64 0, i64 %i, i64 2, i64 3
+static bool ismultiDimGepFlattenable(const GetElementPtrInst &GEP) {
+  // Collect all indices, outermost last
+  SmallVector<const GEPOperator *, 4> GEPChain;
+  const Value *Base = &GEP;
+
+  // Go over GEPs with all constant indices
+  while (auto *CurGep = dyn_cast<GEPOperator>(Base)) {
+    bool AllConst = true;
+    for (unsigned I = 1; I < CurGep->getNumOperands(); ++I)
+      if (!isa<ConstantInt>(CurGep->getOperand(I)))
+        AllConst = false;
+    if (!AllConst)
+      break;
+    GEPChain.push_back(CurGep);
+    Base = CurGep->getOperand(0)->stripPointerCasts();
+  }
+
+  // Accumulate all indices from innermost to outermost
+  SmallVector<Value *, 8> Indices;
+  for (int I = GEPChain.size() - 1; I >= 0; --I) {
+    const GEPOperator *GO = GEPChain[I];
+    for (unsigned J = 1; J < GO->getNumOperands(); ++J)
+      Indices.push_back(GO->getOperand(J));
+  }
+
+  // Add indices from the main GEP (skip pointer operand)
+  for (unsigned J = 1; J < GEP.getNumOperands(); ++J)
+    Indices.push_back(GEP.getOperand(J));
+
+  if (Indices.empty())
+    return false;
+
+  // First index must be constant zero (array base)
+  if (!isa<ConstantInt>(Indices[0]) || !cast<ConstantInt>(Indices[0])->isZero())
+    return false;
+
+  unsigned NumDims = Indices.size() - 1;
+
+  // Limit lowering for arrays with 3 or more dimensions
+  if (NumDims < 3)
+    return false;
+
+  // Check that it's arrays all the way
+  Type *CurTy = GEP.getSourceElementType();
+  unsigned NumVar = 0;
+  for (unsigned I = 1; I < Indices.size(); ++I) {
+    auto *ArrTy = dyn_cast<ArrayType>(CurTy);
+    if (!ArrTy)
+      return false;
+    if (!isa<ConstantInt>(Indices[I]))
+      ++NumVar;
+    CurTy = ArrTy->getElementType();
+  }
+
+  // Limit lowering only for one variable index
+  if (NumVar != 1)
+    return false;
+
+  if (!CurTy->isIntegerTy() || CurTy->getIntegerBitWidth() > 128)
+    return false;
+
+  return true;
+}
+
 /// Return true if we should canonicalize the gep to an i8 ptradd.
 static bool shouldCanonicalizeGEPToPtrAdd(GetElementPtrInst &GEP) {
   Value *PtrOp = GEP.getOperand(0);
@@ -3017,6 +3089,9 @@ static bool shouldCanonicalizeGEPToPtrAdd(GetElementPtrInst &GEP) {
                                  m_Shl(m_Value(), m_ConstantInt())))))
     return true;
 
+  if (ismultiDimGepFlattenable(GEP))
+    return true;
+
   // gep (gep %p, C1), %x, C2 is expanded so the two constants can
   // possibly be merged together.
   auto PtrOpGep = dyn_cast<GEPOperator>(PtrOp);
diff --git a/llvm/test/Transforms/InstCombine/canonicalize-gep-constglob.ll b/llvm/test/Transforms/InstCombine/canonicalize-gep-constglob.ll
index 07c8a8c6b90e1..1a1b3d1a746ed 100644
--- a/llvm/test/Transforms/InstCombine/canonicalize-gep-constglob.ll
+++ b/llvm/test/Transforms/InstCombine/canonicalize-gep-constglob.ll
@@ -2,6 +2,9 @@
 ; RUN: opt < %s -passes=instcombine -S | FileCheck %s
 
 @glob = internal global [10 x [10 x [10 x i32]]] zeroinitializer
+@glob_i8 = internal global [10 x [10 x [10 x i8]]] zeroinitializer
+@glob_i16 = internal global [10 x [10 x [10 x i16]]] zeroinitializer
+@glob_i64 = internal global [10 x [10 x [10 x i64]]] zeroinitializer
 
 define ptr @x12(i64 %x) {
 ; CHECK-LABEL: define ptr @x12(
@@ -78,3 +81,61 @@ entry:
   %c = add i32 %a, %b
   ret i32 %c
 }
+
+define i8* @flat_gep8(i64 %x) {
+; CHECK-LABEL: define ptr @flat_gep8(
+; CHECK-SAME: i64 [[X:%.*]]) {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[GEP_IDX:%.*]] = mul i64 [[X]], 100
+; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr i8, ptr @glob_i8, i64 [[GEP_IDX]]
+; CHECK-NEXT:    [[GEP:%.*]] = getelementptr i8, ptr [[TMP0]], i64 35
+; CHECK-NEXT:    ret ptr [[GEP]]
+;
+entry:
+  %gep = getelementptr [10 x [10 x [10 x i8]]], ptr @glob_i8, i64 0, i64 %x, i64 3, i64 5
+  ret ptr %gep
+}
+
+define i16* @flat_gep16(i64 %x) {
+; CHECK-LABEL: define ptr @flat_gep16(
+; CHECK-SAME: i64 [[X:%.*]]) {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[GEP_IDX:%.*]] = mul i64 [[X]], 200
+; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr i8, ptr @glob_i16, i64 [[GEP_IDX]]
+; CHECK-NEXT:    [[GEP:%.*]] = getelementptr i8, ptr [[TMP0]], i64 46
+; CHECK-NEXT:    ret ptr [[GEP]]
+;
+entry:
+  %gep = getelementptr [10 x [10 x [10 x i16]]], ptr @glob_i16, i64 0, i64 %x, i64 2, i64 3
+  ret ptr %gep
+}
+
+define i32* @flat_gep(i64 %x) {
+; CHECK-LABEL: define ptr @flat_gep(
+; CHECK-SAME: i64 [[X:%.*]]) {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[GEP_IDX:%.*]] = mul i64 [[X]], 400
+; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr i8, ptr @glob, i64 [[GEP_IDX]]
+; CHECK-NEXT:    [[GEP:%.*]] = getelementptr i8, ptr [[TMP0]], i64 100
+; CHECK-NEXT:    ret ptr [[GEP]]
+;
+entry:
+  %gep = getelementptr [10 x [10 x [10 x i32]]], ptr @glob, i64 0, i64 %x, i64 2, i64 5
+  ret ptr %gep
+}
+
+define i64* @flat_gep64(i64 %x) {
+; CHECK-LABEL: define ptr @flat_gep64(
+; CHECK-SAME: i64 [[X:%.*]]) {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[GEP_IDX:%.*]] = mul i64 [[X]], 800
+; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr i8, ptr @glob_i64, i64 [[GEP_IDX]]
+; CHECK-NEXT:    [[GEP:%.*]] = getelementptr i8, ptr [[TMP0]], i64 288
+; CHECK-NEXT:    ret ptr [[GEP]]
+;
+entry:
+  %gep = getelementptr [10 x [10 x [10 x i64]]], ptr @glob_i64, i64 0, i64 %x, i64 3, i64 6
+  ret ptr %gep
+}
+
+