-
Notifications
You must be signed in to change notification settings - Fork 14.6k
[InstCombine] Support well-defined recurrences in isGuaranteedNotToBeUndefOrPoison #150420
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Conversation
@llvm/pr-subscribers-llvm-analysis Author: Cullen Rhodes (c-rhodes) ChangesThis function currently doesn't detect well-defined recurrences that can't produce undef or poison. This prevents instcombine from pushing freezes through GEPs to the ptr which may be undef or poison, when the offset is a well-defined recurrence, as the tests added in #145541 demonstrate. This patch fixes this by pulling existing code from foldFreezeIntoRecurrence in instcombine, which can detect whether a PHI produces undef/poison, so it can be reused by Full diff: https://github.com/llvm/llvm-project/pull/150420.diff 9 Files Affected:
|
@llvm/pr-subscribers-llvm-transforms Author: Cullen Rhodes (c-rhodes) ChangesThis function currently doesn't detect well-defined recurrences that can't produce undef or poison. This prevents instcombine from pushing freezes through GEPs to the ptr which may be undef or poison, when the offset is a well-defined recurrence, as the tests added in #145541 demonstrate. This patch fixes this by pulling existing code from foldFreezeIntoRecurrence in instcombine, which can detect whether a PHI produces undef/poison, so it can be reused by Full diff: https://github.com/llvm/llvm-project/pull/150420.diff 9 Files Affected:
diff --git a/llvm/include/llvm/Analysis/ValueTracking.h b/llvm/include/llvm/Analysis/ValueTracking.h
index 02990a3cb44f7..c37d00380e0e0 100644
--- a/llvm/include/llvm/Analysis/ValueTracking.h
+++ b/llvm/include/llvm/Analysis/ValueTracking.h
@@ -773,6 +773,14 @@ LLVM_ABI bool canCreatePoison(const Operator *Op,
/// impliesPoison returns true.
LLVM_ABI bool impliesPoison(const Value *ValAssumedPoison, const Value *V);
+/// Detect if PN is a recurrence with a start value and some number of backedge
+/// values. We'll check whether we can push the freeze through the backedge
+/// values (possibly dropping poison flags along the way) until we reach the
+/// phi again. In that case, we can move the freeze to the start value.
+LLVM_ABI Use *canFoldFreezeIntoRecurrence(
+ PHINode *PN, DominatorTree *DT, bool &StartNeedsFreeze,
+ SmallVectorImpl<Instruction *> *DropFlags = nullptr);
+
/// Return true if this function can prove that V does not have undef bits
/// and is never poison. If V is an aggregate value or vector, check whether
/// all elements (except padding) are not undef or poison.
diff --git a/llvm/lib/Analysis/ValueTracking.cpp b/llvm/lib/Analysis/ValueTracking.cpp
index af85ce4077ec8..0373e5cd4c301 100644
--- a/llvm/lib/Analysis/ValueTracking.cpp
+++ b/llvm/lib/Analysis/ValueTracking.cpp
@@ -7569,6 +7569,66 @@ bool llvm::impliesPoison(const Value *ValAssumedPoison, const Value *V) {
static bool programUndefinedIfUndefOrPoison(const Value *V, bool PoisonOnly);
+Use *llvm::canFoldFreezeIntoRecurrence(
+ PHINode *PN, DominatorTree *DT, bool &StartNeedsFreeze,
+ SmallVectorImpl<Instruction *> *DropFlags) {
+ // Detect whether this is a recurrence with a start value and some number of
+ // backedge values. We'll check whether we can push the freeze through the
+ // backedge values (possibly dropping poison flags along the way) until we
+ // reach the phi again. In that case, we can move the freeze to the start
+ // value.
+ Use *StartU = nullptr;
+ SmallVector<Value *> Worklist;
+ for (Use &U : PN->incoming_values()) {
+ if (DT && DT->dominates(PN->getParent(), PN->getIncomingBlock(U))) {
+ // Add backedge value to worklist.
+ Worklist.push_back(U.get());
+ continue;
+ }
+
+ // Don't bother handling multiple start values.
+ if (StartU)
+ return nullptr;
+ StartU = &U;
+ }
+
+ if (!StartU || Worklist.empty())
+ return nullptr; // Not a recurrence.
+
+ Value *StartV = StartU->get();
+ BasicBlock *StartBB = PN->getIncomingBlock(*StartU);
+ StartNeedsFreeze = !isGuaranteedNotToBeUndefOrPoison(StartV);
+ // We can't insert freeze if the start value is the result of the
+ // terminator (e.g. an invoke).
+ if (StartNeedsFreeze && StartBB->getTerminator() == StartV)
+ return nullptr;
+
+ SmallPtrSet<Value *, 32> Visited;
+ while (!Worklist.empty()) {
+ Value *V = Worklist.pop_back_val();
+ if (!Visited.insert(V).second)
+ continue;
+
+ if (Visited.size() > 32)
+ return nullptr; // Limit the total number of values we inspect.
+
+ // Assume that PN is non-poison, because it will be after the transform.
+ if (V == PN || isGuaranteedNotToBeUndefOrPoison(V))
+ continue;
+
+ Instruction *I = dyn_cast<Instruction>(V);
+ if (!I || canCreateUndefOrPoison(cast<Operator>(I),
+ /*ConsiderFlagsAndMetadata*/ false))
+ return nullptr;
+
+ if (DropFlags)
+ DropFlags->push_back(I);
+ append_range(Worklist, I->operands());
+ }
+
+ return StartU;
+}
+
static bool isGuaranteedNotToBeUndefOrPoison(
const Value *V, AssumptionCache *AC, const Instruction *CtxI,
const DominatorTree *DT, unsigned Depth, UndefPoisonKind Kind) {
@@ -7657,6 +7717,13 @@ static bool isGuaranteedNotToBeUndefOrPoison(
}
if (IsWellDefined)
return true;
+
+ bool StartNeedsFreeze;
+ if (canFoldFreezeIntoRecurrence(const_cast<PHINode *>(PN),
+ const_cast<DominatorTree *>(DT),
+ StartNeedsFreeze) &&
+ !StartNeedsFreeze)
+ return true;
} else if (!::canCreateUndefOrPoison(Opr, Kind,
/*ConsiderFlagsAndMetadata*/ true) &&
all_of(Opr->operands(), OpCheck))
diff --git a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
index e2a9255ca9c6e..e28f2abdede7f 100644
--- a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
@@ -4933,7 +4933,8 @@ InstCombinerImpl::pushFreezeToPreventPoisonFromPropagating(FreezeInst &OrigFI) {
// poison.
Value *MaybePoisonOperand = nullptr;
for (Value *V : OrigOpInst->operands()) {
- if (isa<MetadataAsValue>(V) || isGuaranteedNotToBeUndefOrPoison(V) ||
+ if (isa<MetadataAsValue>(V) ||
+ isGuaranteedNotToBeUndefOrPoison(V, &AC, &OrigFI, &DT) ||
// Treat identical operands as a single operand.
(MaybePoisonOperand && MaybePoisonOperand == V))
continue;
@@ -4959,64 +4960,19 @@ InstCombinerImpl::pushFreezeToPreventPoisonFromPropagating(FreezeInst &OrigFI) {
Instruction *InstCombinerImpl::foldFreezeIntoRecurrence(FreezeInst &FI,
PHINode *PN) {
- // Detect whether this is a recurrence with a start value and some number of
- // backedge values. We'll check whether we can push the freeze through the
- // backedge values (possibly dropping poison flags along the way) until we
- // reach the phi again. In that case, we can move the freeze to the start
- // value.
- Use *StartU = nullptr;
- SmallVector<Value *> Worklist;
- for (Use &U : PN->incoming_values()) {
- if (DT.dominates(PN->getParent(), PN->getIncomingBlock(U))) {
- // Add backedge value to worklist.
- Worklist.push_back(U.get());
- continue;
- }
-
- // Don't bother handling multiple start values.
- if (StartU)
- return nullptr;
- StartU = &U;
- }
-
- if (!StartU || Worklist.empty())
- return nullptr; // Not a recurrence.
-
- Value *StartV = StartU->get();
- BasicBlock *StartBB = PN->getIncomingBlock(*StartU);
- bool StartNeedsFreeze = !isGuaranteedNotToBeUndefOrPoison(StartV);
- // We can't insert freeze if the start value is the result of the
- // terminator (e.g. an invoke).
- if (StartNeedsFreeze && StartBB->getTerminator() == StartV)
- return nullptr;
-
- SmallPtrSet<Value *, 32> Visited;
+ bool StartNeedsFreeze;
SmallVector<Instruction *> DropFlags;
- while (!Worklist.empty()) {
- Value *V = Worklist.pop_back_val();
- if (!Visited.insert(V).second)
- continue;
-
- if (Visited.size() > 32)
- return nullptr; // Limit the total number of values we inspect.
-
- // Assume that PN is non-poison, because it will be after the transform.
- if (V == PN || isGuaranteedNotToBeUndefOrPoison(V))
- continue;
-
- Instruction *I = dyn_cast<Instruction>(V);
- if (!I || canCreateUndefOrPoison(cast<Operator>(I),
- /*ConsiderFlagsAndMetadata*/ false))
- return nullptr;
-
- DropFlags.push_back(I);
- append_range(Worklist, I->operands());
- }
+ Use *StartU =
+ canFoldFreezeIntoRecurrence(PN, &DT, StartNeedsFreeze, &DropFlags);
+ if (!StartU)
+ return nullptr;
for (Instruction *I : DropFlags)
I->dropPoisonGeneratingAnnotations();
if (StartNeedsFreeze) {
+ Value *StartV = StartU->get();
+ BasicBlock *StartBB = PN->getIncomingBlock(*StartU);
Builder.SetInsertPoint(StartBB->getTerminator());
Value *FrozenStartV = Builder.CreateFreeze(StartV,
StartV->getName() + ".fr");
diff --git a/llvm/test/Transforms/Attributor/dereferenceable-1.ll b/llvm/test/Transforms/Attributor/dereferenceable-1.ll
index 5bff2a2e6b208..32102e64351d0 100644
--- a/llvm/test/Transforms/Attributor/dereferenceable-1.ll
+++ b/llvm/test/Transforms/Attributor/dereferenceable-1.ll
@@ -95,7 +95,7 @@ define void @deref_phi_growing(ptr dereferenceable(4000) %a) {
; CHECK: for.cond:
; CHECK-NEXT: [[I_0:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_INC:%.*]] ]
; CHECK-NEXT: [[A_ADDR_0:%.*]] = phi ptr [ [[A]], [[ENTRY]] ], [ [[INCDEC_PTR:%.*]], [[FOR_INC]] ]
-; CHECK-NEXT: call void @deref_phi_user(ptr nonnull [[A_ADDR_0]])
+; CHECK-NEXT: call void @deref_phi_user(ptr noundef nonnull [[A_ADDR_0]])
; CHECK-NEXT: [[VAL:%.*]] = load i32, ptr [[A_ADDR_0]], align 4
; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[I_0]], [[VAL]]
; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY:%.*]], label [[FOR_COND_CLEANUP:%.*]]
@@ -146,7 +146,7 @@ define void @deref_phi_shrinking(ptr dereferenceable(4000) %a) {
; CHECK: for.cond:
; CHECK-NEXT: [[I_0:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_INC:%.*]] ]
; CHECK-NEXT: [[A_ADDR_0:%.*]] = phi ptr [ [[A]], [[ENTRY]] ], [ [[INCDEC_PTR:%.*]], [[FOR_INC]] ]
-; CHECK-NEXT: call void @deref_phi_user(ptr nonnull [[A_ADDR_0]])
+; CHECK-NEXT: call void @deref_phi_user(ptr noundef nonnull [[A_ADDR_0]])
; CHECK-NEXT: [[VAL:%.*]] = load i32, ptr [[A_ADDR_0]], align 4
; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[I_0]], [[VAL]]
; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY:%.*]], label [[FOR_COND_CLEANUP:%.*]]
diff --git a/llvm/test/Transforms/Attributor/value-simplify.ll b/llvm/test/Transforms/Attributor/value-simplify.ll
index beceab7ce9ed7..981964ec1efed 100644
--- a/llvm/test/Transforms/Attributor/value-simplify.ll
+++ b/llvm/test/Transforms/Attributor/value-simplify.ll
@@ -626,7 +626,7 @@ define internal ptr @test_byval2(ptr byval(%struct.X) %a) {
; CHECK-NEXT: [[A_PRIV:%.*]] = alloca [[STRUCT_X:%.*]], align 8
; CHECK-NEXT: store ptr [[TMP0]], ptr [[A_PRIV]], align 8
; CHECK-NEXT: call void @sync()
-; CHECK-NEXT: [[L:%.*]] = load ptr, ptr [[A_PRIV]], align 8
+; CHECK-NEXT: [[L:%.*]] = load ptr, ptr [[A_PRIV]], align 8, !invariant.load [[META0:![0-9]+]]
; CHECK-NEXT: ret ptr [[L]]
;
call void @sync()
@@ -1359,7 +1359,7 @@ define internal i32 @ret_speculatable_expr(ptr %mem, i32 %a2) {
; CGSCC-SAME: (i32 [[TMP0:%.*]]) #[[ATTR1]] {
; CGSCC-NEXT: [[MEM_PRIV:%.*]] = alloca i32, align 4
; CGSCC-NEXT: store i32 [[TMP0]], ptr [[MEM_PRIV]], align 4
-; CGSCC-NEXT: [[L:%.*]] = load i32, ptr [[MEM_PRIV]], align 4
+; CGSCC-NEXT: [[L:%.*]] = load i32, ptr [[MEM_PRIV]], align 4, !invariant.load [[META0]]
; CGSCC-NEXT: [[MUL:%.*]] = mul i32 [[L]], 13
; CGSCC-NEXT: [[ADD:%.*]] = add i32 [[MUL]], 7
; CGSCC-NEXT: ret i32 [[ADD]]
@@ -1709,3 +1709,7 @@ define i32 @readExtInitZeroInit() {
; CGSCC: attributes #[[ATTR17]] = { nosync }
; CGSCC: attributes #[[ATTR18]] = { nounwind }
;.
+; TUNIT: [[META0]] = !{}
+;.
+; CGSCC: [[META0]] = !{}
+;.
diff --git a/llvm/test/Transforms/InstCombine/freeze-landingpad.ll b/llvm/test/Transforms/InstCombine/freeze-landingpad.ll
index 7221dc14dfaf4..126fd43bbdebf 100644
--- a/llvm/test/Transforms/InstCombine/freeze-landingpad.ll
+++ b/llvm/test/Transforms/InstCombine/freeze-landingpad.ll
@@ -9,19 +9,18 @@ define i32 @propagate_freeze_in_landingpad() personality ptr null {
; CHECK: invoke.bb1:
; CHECK-NEXT: [[X:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[NORMAL_RETURN:%.*]] ]
; CHECK-NEXT: [[RES0:%.*]] = invoke i32 @foo()
-; CHECK-NEXT: to label [[INVOKE_BB2:%.*]] unwind label [[EXCEPTIONAL_RETURN:%.*]]
+; CHECK-NEXT: to label [[INVOKE_BB2:%.*]] unwind label [[EXCEPTIONAL_RETURN:%.*]]
; CHECK: invoke.bb2:
; CHECK-NEXT: [[RES1:%.*]] = invoke i32 @foo()
-; CHECK-NEXT: to label [[NORMAL_RETURN]] unwind label [[EXCEPTIONAL_RETURN]]
+; CHECK-NEXT: to label [[NORMAL_RETURN]] unwind label [[EXCEPTIONAL_RETURN]]
; CHECK: normal_return:
; CHECK-NEXT: [[INC]] = add nuw nsw i32 [[X]], 1
; CHECK-NEXT: br label [[INVOKE_BB1]]
; CHECK: exceptional_return:
; CHECK-NEXT: [[PHI:%.*]] = phi i32 [ [[X]], [[INVOKE_BB1]] ], [ 0, [[INVOKE_BB2]] ]
; CHECK-NEXT: [[LANDING_PAD:%.*]] = landingpad { ptr, i32 }
-; CHECK-NEXT: cleanup
-; CHECK-NEXT: [[FR:%.*]] = freeze i32 [[PHI]]
-; CHECK-NEXT: [[RES:%.*]] = shl i32 [[FR]], 1
+; CHECK-NEXT: cleanup
+; CHECK-NEXT: [[RES:%.*]] = shl nuw i32 [[PHI]], 1
; CHECK-NEXT: ret i32 [[RES]]
;
entry:
diff --git a/llvm/test/Transforms/InstCombine/freeze.ll b/llvm/test/Transforms/InstCombine/freeze.ll
index 3fedead2feab8..671ad32322c0f 100644
--- a/llvm/test/Transforms/InstCombine/freeze.ll
+++ b/llvm/test/Transforms/InstCombine/freeze.ll
@@ -889,17 +889,17 @@ exit: ; preds = %loop
}
; The recurrence for the GEP offset can't produce poison so the freeze should
-; be pushed through to the ptr, but this is not currently supported.
+; be pushed through to the ptr.
define void @fold_phi_gep_phi_offset(ptr %init, ptr %end, i64 noundef %n) {
; CHECK-LABEL: @fold_phi_gep_phi_offset(
; CHECK-NEXT: entry:
+; CHECK-NEXT: [[INIT:%.*]] = freeze ptr [[INIT1:%.*]]
; CHECK-NEXT: br label [[LOOP:%.*]]
; CHECK: loop:
-; CHECK-NEXT: [[I:%.*]] = phi ptr [ [[INIT:%.*]], [[ENTRY:%.*]] ], [ [[I_NEXT_FR:%.*]], [[LOOP]] ]
+; CHECK-NEXT: [[I:%.*]] = phi ptr [ [[INIT]], [[ENTRY:%.*]] ], [ [[I_NEXT_FR:%.*]], [[LOOP]] ]
; CHECK-NEXT: [[OFF:%.*]] = phi i64 [ [[N:%.*]], [[ENTRY]] ], [ [[OFF_NEXT:%.*]], [[LOOP]] ]
; CHECK-NEXT: [[OFF_NEXT]] = shl i64 [[OFF]], 3
-; CHECK-NEXT: [[I_NEXT:%.*]] = getelementptr i8, ptr [[I]], i64 [[OFF_NEXT]]
-; CHECK-NEXT: [[I_NEXT_FR]] = freeze ptr [[I_NEXT]]
+; CHECK-NEXT: [[I_NEXT_FR]] = getelementptr i8, ptr [[I]], i64 [[OFF_NEXT]]
; CHECK-NEXT: [[COND:%.*]] = icmp eq ptr [[I_NEXT_FR]], [[END:%.*]]
; CHECK-NEXT: br i1 [[COND]], label [[LOOP]], label [[EXIT:%.*]]
; CHECK: exit:
@@ -921,18 +921,18 @@ exit: ; preds = %loop
ret void
}
-; Offset is still guaranteed not to be poison, so the freeze could be moved
-; here if we strip inbounds from the GEP, but this is not currently supported.
+; Offset is still guaranteed not to be poison, so the freeze can be moved
+; here if we strip inbounds from the GEP.
define void @fold_phi_gep_inbounds_phi_offset(ptr %init, ptr %end, i64 noundef %n) {
; CHECK-LABEL: @fold_phi_gep_inbounds_phi_offset(
; CHECK-NEXT: entry:
+; CHECK-NEXT: [[INIT:%.*]] = freeze ptr [[INIT1:%.*]]
; CHECK-NEXT: br label [[LOOP:%.*]]
; CHECK: loop:
-; CHECK-NEXT: [[I:%.*]] = phi ptr [ [[INIT:%.*]], [[ENTRY:%.*]] ], [ [[I_NEXT_FR:%.*]], [[LOOP]] ]
+; CHECK-NEXT: [[I:%.*]] = phi ptr [ [[INIT]], [[ENTRY:%.*]] ], [ [[I_NEXT_FR:%.*]], [[LOOP]] ]
; CHECK-NEXT: [[OFF:%.*]] = phi i64 [ [[N:%.*]], [[ENTRY]] ], [ [[OFF_NEXT:%.*]], [[LOOP]] ]
; CHECK-NEXT: [[OFF_NEXT]] = shl i64 [[OFF]], 3
-; CHECK-NEXT: [[I_NEXT:%.*]] = getelementptr inbounds i8, ptr [[I]], i64 [[OFF_NEXT]]
-; CHECK-NEXT: [[I_NEXT_FR]] = freeze ptr [[I_NEXT]]
+; CHECK-NEXT: [[I_NEXT_FR]] = getelementptr i8, ptr [[I]], i64 [[OFF_NEXT]]
; CHECK-NEXT: [[COND:%.*]] = icmp eq ptr [[I_NEXT_FR]], [[END:%.*]]
; CHECK-NEXT: br i1 [[COND]], label [[LOOP]], label [[EXIT:%.*]]
; CHECK: exit:
@@ -994,7 +994,7 @@ define void @fold_phi_multiple_insts(i32 %init, i32 %n) {
; CHECK: loop:
; CHECK-NEXT: [[I:%.*]] = phi i32 [ [[INIT_FR]], [[ENTRY:%.*]] ], [ [[I_NEXT:%.*]], [[LOOP]] ]
; CHECK-NEXT: [[I_SQ:%.*]] = mul i32 [[I]], [[I]]
-; CHECK-NEXT: [[I_NEXT]] = add i32 [[I_SQ]], 1
+; CHECK-NEXT: [[I_NEXT]] = add nuw nsw i32 [[I_SQ]], 1
; CHECK-NEXT: [[COND:%.*]] = icmp eq i32 [[I_NEXT]], [[N:%.*]]
; CHECK-NEXT: br i1 [[COND]], label [[LOOP]], label [[EXIT:%.*]]
; CHECK: exit:
@@ -1127,7 +1127,7 @@ define void @fold_phi_invoke_noundef_start_value(i32 %n) personality ptr undef {
; CHECK-NEXT: to label [[LOOP:%.*]] unwind label [[UNWIND:%.*]]
; CHECK: loop:
; CHECK-NEXT: [[I:%.*]] = phi i32 [ [[INIT]], [[ENTRY:%.*]] ], [ [[I_NEXT:%.*]], [[LOOP]] ]
-; CHECK-NEXT: [[I_NEXT]] = add i32 [[I]], 1
+; CHECK-NEXT: [[I_NEXT]] = add nuw nsw i32 [[I]], 1
; CHECK-NEXT: [[COND:%.*]] = icmp eq i32 [[I_NEXT]], [[N:%.*]]
; CHECK-NEXT: br i1 [[COND]], label [[LOOP]], label [[EXIT:%.*]]
; CHECK: unwind:
diff --git a/llvm/test/Transforms/LoopUnroll/runtime-exit-phi-scev-invalidation.ll b/llvm/test/Transforms/LoopUnroll/runtime-exit-phi-scev-invalidation.ll
index a97b39494b2ef..22de6d5a0d6b6 100644
--- a/llvm/test/Transforms/LoopUnroll/runtime-exit-phi-scev-invalidation.ll
+++ b/llvm/test/Transforms/LoopUnroll/runtime-exit-phi-scev-invalidation.ll
@@ -16,13 +16,11 @@ define void @pr56282() {
; CHECK: outer.header:
; CHECK-NEXT: [[OUTER_IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[OUTER_IV_NEXT:%.*]], [[INNER_2:%.*]] ]
; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[OUTER_IV]], 1
-; CHECK-NEXT: [[TMP1:%.*]] = freeze i64 [[TMP0]]
-; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[TMP1]], -1
-; CHECK-NEXT: [[XTRAITER:%.*]] = and i64 [[TMP1]], 7
-; CHECK-NEXT: [[TMP3:%.*]] = icmp ult i64 [[TMP2]], 7
+; CHECK-NEXT: [[XTRAITER:%.*]] = and i64 [[TMP0]], 7
+; CHECK-NEXT: [[TMP3:%.*]] = icmp ult i64 [[OUTER_IV]], 7
; CHECK-NEXT: br i1 [[TMP3]], label [[OUTER_MIDDLE_UNR_LCSSA:%.*]], label [[OUTER_HEADER_NEW:%.*]]
; CHECK: outer.header.new:
-; CHECK-NEXT: [[UNROLL_ITER:%.*]] = sub i64 [[TMP1]], [[XTRAITER]]
+; CHECK-NEXT: [[UNROLL_ITER:%.*]] = sub i64 [[TMP0]], [[XTRAITER]]
; CHECK-NEXT: br label [[INNER_1_HEADER:%.*]]
; CHECK: inner.1.header:
; CHECK-NEXT: [[INNER_1_IV:%.*]] = phi i64 [ 0, [[OUTER_HEADER_NEW]] ], [ [[INNER_1_IV_NEXT_7:%.*]], [[INNER_1_LATCH_7:%.*]] ]
diff --git a/llvm/test/Transforms/SimpleLoopUnswitch/nontrivial-unswitch-select.ll b/llvm/test/Transforms/SimpleLoopUnswitch/nontrivial-unswitch-select.ll
index c86fa349200c5..7a9c33637521a 100644
--- a/llvm/test/Transforms/SimpleLoopUnswitch/nontrivial-unswitch-select.ll
+++ b/llvm/test/Transforms/SimpleLoopUnswitch/nontrivial-unswitch-select.ll
@@ -693,8 +693,7 @@ define dso_local void @select_invariant_outer_loop(i1 noundef zeroext %cond, i32
; CHECK-NEXT: [[I_021_US:%.*]] = phi i32 [ [[INC9_US:%.*]], [[FOR_COND1_FOR_COND_CLEANUP3_CRIT_EDGE_US:%.*]] ], [ 0, [[FOR_COND1_PREHEADER_US_PREHEADER]] ]
; CHECK-NEXT: [[REM_US:%.*]] = and i32 [[I_021_US]], 1
; CHECK-NEXT: [[CMP5_US:%.*]] = icmp eq i32 [[REM_US]], 0
-; CHECK-NEXT: [[CMP5_US_FR:%.*]] = freeze i1 [[CMP5_US]]
-; CHECK-NEXT: br i1 [[CMP5_US_FR]], label [[FOR_COND1_PREHEADER_US_SPLIT_US:%.*]], label [[FOR_COND1_PREHEADER_US_SPLIT:%.*]]
+; CHECK-NEXT: br i1 [[CMP5_US]], label [[FOR_COND1_PREHEADER_US_SPLIT_US:%.*]], label [[FOR_COND1_PREHEADER_US_SPLIT:%.*]]
; CHECK: for.cond1.preheader.us.split.us:
; CHECK-NEXT: br label [[FOR_BODY4_US_US:%.*]]
; CHECK: for.body4.us.us:
|
llvm/lib/Analysis/ValueTracking.cpp
Outdated
@@ -7657,6 +7717,13 @@ static bool isGuaranteedNotToBeUndefOrPoison( | |||
} | |||
if (IsWellDefined) | |||
return true; | |||
|
|||
bool StartNeedsFreeze; | |||
if (canFoldFreezeIntoRecurrence(const_cast<PHINode *>(PN), |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Is it worth having a limit on the number of incoming values to make sure we keep compile time under control?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I ran ctmark and the difference is negligible, but I suppose it's not a panacea, so I can if we think it's necessary. Although I'm not sure what a reasonable number is? tbh the thing I care about it's only 2 incoming values so I could restrict it to that.
llvm/lib/Analysis/ValueTracking.cpp
Outdated
return nullptr; // Limit the total number of values we inspect. | ||
|
||
// Assume that PN is non-poison, because it will be after the transform. | ||
if (V == PN || isGuaranteedNotToBeUndefOrPoison(V)) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Given you have access to the DominatorTree you could also pass that in isGuaranteedNotToBeUndefOrPoison
, right?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Looking at isGuaranteedNotToBeUndefOrPoison
it's only used as long as the AssumptionCache
and CtxI
are passed. I've tried your suggestion (see patch below), but it makes no testable difference so I'm not sure it's worth it, unless you're aware of some case that's missing?
diff --git a/llvm/include/llvm/Analysis/ValueTracking.h b/llvm/include/llvm/Analysis/ValueTracking.h
index a9dcc65d02a0..a4f8c22226e0 100644
--- a/llvm/include/llvm/Analysis/ValueTracking.h
+++ b/llvm/include/llvm/Analysis/ValueTracking.h
@@ -780,9 +780,11 @@ LLVM_ABI bool impliesPoison(const Value *ValAssumedPoison, const Value *V);
/// values. We'll check whether we can push the freeze through the backedge
/// values (possibly dropping poison flags along the way) until we reach the
/// phi again. In that case, we can move the freeze to the start value.
-LLVM_ABI Use *canFoldFreezeIntoRecurrence(
- PHINode *PN, DominatorTree *DT, bool &StartNeedsFreeze,
- SmallVectorImpl<Instruction *> *DropFlags = nullptr, unsigned Depth = 0);
+LLVM_ABI Use *
+canFoldFreezeIntoRecurrence(PHINode *PN, AssumptionCache *AC, Instruction *CtxI,
+ DominatorTree *DT, bool &StartNeedsFreeze,
+ SmallVectorImpl<Instruction *> *DropFlags = nullptr,
+ unsigned Depth = 0);
/// Return true if this function can prove that V does not have undef bits
/// and is never poison. If V is an aggregate value or vector, check whether
diff --git a/llvm/lib/Analysis/ValueTracking.cpp b/llvm/lib/Analysis/ValueTracking.cpp
index 948562c90709..fdb0725e361d 100644
--- a/llvm/lib/Analysis/ValueTracking.cpp
+++ b/llvm/lib/Analysis/ValueTracking.cpp
@@ -7570,8 +7570,9 @@ bool llvm::impliesPoison(const Value *ValAssumedPoison, const Value *V) {
static bool programUndefinedIfUndefOrPoison(const Value *V, bool PoisonOnly);
Use *llvm::canFoldFreezeIntoRecurrence(
- PHINode *PN, DominatorTree *DT, bool &StartNeedsFreeze,
- SmallVectorImpl<Instruction *> *DropFlags, unsigned Depth) {
+ PHINode *PN, AssumptionCache *AC, Instruction *CtxI, DominatorTree *DT,
+ bool &StartNeedsFreeze, SmallVectorImpl<Instruction *> *DropFlags,
+ unsigned Depth) {
// Detect whether this is a recurrence with a start value and some number of
// backedge values. We'll check whether we can push the freeze through the
// backedge values (possibly dropping poison flags along the way) until we
@@ -7723,8 +7724,9 @@ static bool isGuaranteedNotToBeUndefOrPoison(
bool StartNeedsFreeze;
if (canFoldFreezeIntoRecurrence(
- const_cast<PHINode *>(PN), const_cast<DominatorTree *>(DT),
- StartNeedsFreeze, /*DropFlags=*/nullptr, Depth) &&
+ const_cast<PHINode *>(PN), AC, const_cast<Instruction *>(CtxI),
+ const_cast<DominatorTree *>(DT), StartNeedsFreeze,
+ /*DropFlags=*/nullptr, Depth) &&
!StartNeedsFreeze)
return true;
} else if (!::canCreateUndefOrPoison(Opr, Kind,
diff --git a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
index e28f2abdede7..b32c39faafc8 100644
--- a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
@@ -4962,8 +4962,8 @@ Instruction *InstCombinerImpl::foldFreezeIntoRecurrence(FreezeInst &FI,
PHINode *PN) {
bool StartNeedsFreeze;
SmallVector<Instruction *> DropFlags;
- Use *StartU =
- canFoldFreezeIntoRecurrence(PN, &DT, StartNeedsFreeze, &DropFlags);
+ Use *StartU = canFoldFreezeIntoRecurrence(PN, &AC, PN, &DT, StartNeedsFreeze,
+ &DropFlags);
if (!StartU)
return nullptr;
…UndefOrPoison This function currently doesn't detect well-defined recurrences that can't produce undef or poison. This prevents instcombine from pushing freezes through GEPs to the ptr which may be undef or poison, when the offset is a well-defined recurrence, as the tests added in llvm#145541 demonstrate. This patch fixes this by pulling existing code from foldFreezeIntoRecurrence in instcombine, which can detect whether a PHI produces undef/poison, so it can be reused by isGuaranteedNotToBeUndefOrPoison in ValueTracking.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Looks good, thanks for addressing the comments! I just have one more comment then I'm happy. :)
f68aa92
to
29c54d8
Compare
This function currently doesn't detect well-defined recurrences that can't produce undef or poison. This prevents instcombine from pushing freezes through GEPs to the ptr which may be undef or poison, when the offset is a well-defined recurrence, as the tests added in #145541 demonstrate.
This patch fixes this by pulling existing code from foldFreezeIntoRecurrence in instcombine, which can detect whether a PHI produces undef/poison, so it can be reused by
isGuaranteedNotToBeUndefOrPoison in ValueTracking.