From 23ac9f05b01a20a62b8d12253afb51fb3c591950 Mon Sep 17 00:00:00 2001 From: Anna Thomas Date: Thu, 17 Jul 2025 17:48:13 -0400 Subject: [PATCH] [Loads] Support dereference for non-constant offset Improve isDereferenceableAndAlignedInLoop API to consider non-constant offset feeding into the first access in the loop. --- llvm/lib/Analysis/Loads.cpp | 28 ++++++------- ...able-info-from-assumption-variable-size.ll | 42 +++++-------------- 2 files changed, 25 insertions(+), 45 deletions(-) diff --git a/llvm/lib/Analysis/Loads.cpp b/llvm/lib/Analysis/Loads.cpp index 78d0887d5d87e..250b46c6560eb 100644 --- a/llvm/lib/Analysis/Loads.cpp +++ b/llvm/lib/Analysis/Loads.cpp @@ -361,29 +361,29 @@ bool llvm::isDereferenceableAndAlignedInLoop( AccessSize = MaxPtrDiff; AccessSizeSCEV = PtrDiff; } else if (auto *MinAdd = dyn_cast(AccessStart)) { - if (MinAdd->getNumOperands() != 2) - return false; + const auto *NewBase = dyn_cast(SE.getPointerBase(MinAdd)); + const auto *OffsetSCEV = SE.removePointerBase(MinAdd); - const auto *Offset = dyn_cast(MinAdd->getOperand(0)); - const auto *NewBase = dyn_cast(MinAdd->getOperand(1)); - if (!Offset || !NewBase) + if (!OffsetSCEV || !NewBase) return false; - // The following code below assumes the offset is unsigned, but GEP - // offsets are treated as signed so we can end up with a signed value - // here too. For example, suppose the initial PHI value is (i8 255), - // the offset will be treated as (i8 -1) and sign-extended to (i64 -1). - if (Offset->getAPInt().isNegative()) + if (!SE.isKnownNonNegative(OffsetSCEV)) return false; // For the moment, restrict ourselves to the case where the offset is a // multiple of the requested alignment and the base is aligned. // TODO: generalize if a case found which warrants - if (Offset->getAPInt().urem(Alignment.value()) != 0) + auto *OffsetSCEVTy = OffsetSCEV->getType(); + if (!SE.isKnownPredicate( + ICmpInst::ICMP_EQ, + SE.getURemExpr(OffsetSCEV, + SE.getConstant(OffsetSCEVTy, Alignment.value())), + SE.getZero(OffsetSCEVTy))) return false; - - AccessSize = MaxPtrDiff + Offset->getAPInt(); - AccessSizeSCEV = SE.getAddExpr(PtrDiff, Offset); + AccessSizeSCEV = SE.getAddExpr(PtrDiff, OffsetSCEV); + const auto *Offset = dyn_cast(OffsetSCEV); + AccessSize = MaxPtrDiff + (Offset ? Offset->getAPInt() + : SE.getUnsignedRangeMax(OffsetSCEV)); Base = NewBase->getValue(); } else return false; diff --git a/llvm/test/Transforms/LoopVectorize/dereferenceable-info-from-assumption-variable-size.ll b/llvm/test/Transforms/LoopVectorize/dereferenceable-info-from-assumption-variable-size.ll index 9852f538c6f74..4320d7d90bf47 100644 --- a/llvm/test/Transforms/LoopVectorize/dereferenceable-info-from-assumption-variable-size.ll +++ b/llvm/test/Transforms/LoopVectorize/dereferenceable-info-from-assumption-variable-size.ll @@ -562,38 +562,19 @@ define void @deref_assumption_loop_access_start_variable(i8 %v, ptr noundef %P, ; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[IV_START]], [[N_VEC]] ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] ; CHECK: [[VECTOR_BODY]]: -; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_LOAD_CONTINUE2:.*]] ] +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = add i64 [[IV_START]], [[INDEX]] -; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[OFFSET_IDX]] -; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i32>, ptr [[TMP6]], align 1 -; CHECK-NEXT: [[TMP8:%.*]] = icmp sge <2 x i32> [[WIDE_LOAD]], zeroinitializer -; CHECK-NEXT: [[TMP4:%.*]] = xor <2 x i1> [[TMP8]], splat (i1 true) -; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x i1> [[TMP4]], i32 0 -; CHECK-NEXT: br i1 [[TMP5]], label %[[PRED_LOAD_IF:.*]], label %[[PRED_LOAD_CONTINUE:.*]] -; CHECK: [[PRED_LOAD_IF]]: -; CHECK-NEXT: [[TMP16:%.*]] = add i64 [[OFFSET_IDX]], 0 -; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP16]] -; CHECK-NEXT: [[TMP19:%.*]] = load i32, ptr [[TMP7]], align 1 -; CHECK-NEXT: [[TMP9:%.*]] = insertelement <2 x i32> poison, i32 [[TMP19]], i32 0 -; CHECK-NEXT: br label %[[PRED_LOAD_CONTINUE]] -; CHECK: [[PRED_LOAD_CONTINUE]]: -; CHECK-NEXT: [[TMP10:%.*]] = phi <2 x i32> [ poison, %[[VECTOR_BODY]] ], [ [[TMP9]], %[[PRED_LOAD_IF]] ] -; CHECK-NEXT: [[TMP11:%.*]] = extractelement <2 x i1> [[TMP4]], i32 1 -; CHECK-NEXT: br i1 [[TMP11]], label %[[PRED_LOAD_IF1:.*]], label %[[PRED_LOAD_CONTINUE2]] -; CHECK: [[PRED_LOAD_IF1]]: -; CHECK-NEXT: [[TMP12:%.*]] = add i64 [[OFFSET_IDX]], 1 -; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP12]] -; CHECK-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 1 -; CHECK-NEXT: [[TMP15:%.*]] = insertelement <2 x i32> [[TMP10]], i32 [[TMP14]], i32 1 -; CHECK-NEXT: br label %[[PRED_LOAD_CONTINUE2]] -; CHECK: [[PRED_LOAD_CONTINUE2]]: -; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = phi <2 x i32> [ [[TMP10]], %[[PRED_LOAD_CONTINUE]] ], [ [[TMP15]], %[[PRED_LOAD_IF1]] ] -; CHECK-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[TMP8]], <2 x i32> [[WIDE_LOAD]], <2 x i32> [[WIDE_LOAD1]] -; CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds i32, ptr [[C]], i64 [[OFFSET_IDX]] -; CHECK-NEXT: store <2 x i32> [[PREDPHI]], ptr [[TMP17]], align 1 +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i32, ptr [[A]], i64 [[OFFSET_IDX]] +; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[OFFSET_IDX]] +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i32>, ptr [[TMP7]], align 1 +; CHECK-NEXT: [[TMP4:%.*]] = icmp sge <2 x i32> [[WIDE_LOAD]], zeroinitializer +; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = load <2 x i32>, ptr [[TMP2]], align 1 +; CHECK-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[TMP4]], <2 x i32> [[WIDE_LOAD]], <2 x i32> [[WIDE_LOAD1]] +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[C]], i64 [[OFFSET_IDX]] +; CHECK-NEXT: store <2 x i32> [[PREDPHI]], ptr [[TMP5]], align 1 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 -; CHECK-NEXT: [[TMP18:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP18]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]] +; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP6]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP3]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]] @@ -620,7 +601,6 @@ define void @deref_assumption_loop_access_start_variable(i8 %v, ptr noundef %P, ; CHECK: [[EXIT]]: ; CHECK-NEXT: ret void ; - entry: %a = getelementptr i8, ptr %P, i64 16 %cmp = icmp slt i64 %iv.start, %N