Skip to content

[LoopPeel] Fix branch weights' effect on block frequencies #128785

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 20 commits into
base: users/jdenny-ornl/pgo-estimated-trip-count
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
20 commits
Select commit Hold shift + click to select a range
f413520
[LoopPeel] Fix branch weights' effect on block frequencies
jdenny-ornl Mar 19, 2025
f821eeb
Run update_test_checks.py on a test
jdenny-ornl Mar 26, 2025
af8ec56
Fix typo
jdenny-ornl Apr 4, 2025
a0264ad
Merge branch 'main' into fix-peel-branch-weights
jdenny-ornl Apr 8, 2025
fd29a49
Merge branch 'main' into fix-peel-branch-weights
jdenny-ornl Apr 9, 2025
6303177
Document new metadata
jdenny-ornl Apr 10, 2025
bbd0e95
Improve LangRef.rst entry
jdenny-ornl May 1, 2025
715cb0a
Merge branch 'main' into fix-peel-branch-weights
jdenny-ornl May 5, 2025
67fa67d
Merge branch 'main' into fix-peel-branch-weights
jdenny-ornl Jun 10, 2025
37ce859
Update fixmes
jdenny-ornl Jun 16, 2025
4337dcd
Merge branch 'main' into fix-peel-branch-weights
jdenny-ornl Jun 16, 2025
5193158
Update test for AArch4, which I did not build before
jdenny-ornl Jun 17, 2025
bbd2f22
Merge branch 'main' into fix-peel-branch-weights
jdenny-ornl Jul 10, 2025
b23f467
Run update script on test changed by merge from main
jdenny-ornl Jul 10, 2025
e250cfc
Merge branch 'main' into fix-peel-branch-weights
jdenny-ornl Jul 15, 2025
859b84d
Merge branch 'pgo-estimated-trip-count' into fix-peel-branch-weights
jdenny-ornl Jul 15, 2025
3f6a91a
Merge branch 'pgo-estimated-trip-count' into fix-peel-branch-weights
jdenny-ornl Jul 24, 2025
e5a0a26
Update for merge from pgo-estimated-trip-count
jdenny-ornl Jul 24, 2025
c283ebe
Merge branch 'pgo-estimated-trip-count' into fix-peel-branch-weights
jdenny-ornl Jul 24, 2025
ecbf6e0
Merge branch 'pgo-estimated-trip-count' into fix-peel-branch-weights
jdenny-ornl Jul 28, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
135 changes: 42 additions & 93 deletions llvm/lib/Transforms/Utils/LoopPeel.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -742,84 +742,6 @@ void llvm::computePeelCount(Loop *L, unsigned LoopSize,
}
}

struct WeightInfo {
// Weights for current iteration.
SmallVector<uint32_t> Weights;
// Weights to subtract after each iteration.
const SmallVector<uint32_t> SubWeights;
};

/// Update the branch weights of an exiting block of a peeled-off loop
/// iteration.
/// Let F is a weight of the edge to continue (fallthrough) into the loop.
/// Let E is a weight of the edge to an exit.
/// F/(F+E) is a probability to go to loop and E/(F+E) is a probability to
/// go to exit.
/// Then, Estimated ExitCount = F / E.
/// For I-th (counting from 0) peeled off iteration we set the weights for
/// the peeled exit as (EC - I, 1). It gives us reasonable distribution,
/// The probability to go to exit 1/(EC-I) increases. At the same time
/// the estimated exit count in the remainder loop reduces by I.
/// To avoid dealing with division rounding we can just multiple both part
/// of weights to E and use weight as (F - I * E, E).
static void updateBranchWeights(Instruction *Term, WeightInfo &Info) {
setBranchWeights(*Term, Info.Weights, /*IsExpected=*/false);
for (auto [Idx, SubWeight] : enumerate(Info.SubWeights))
if (SubWeight != 0)
// Don't set the probability of taking the edge from latch to loop header
// to less than 1:1 ratio (meaning Weight should not be lower than
// SubWeight), as this could significantly reduce the loop's hotness,
// which would be incorrect in the case of underestimating the trip count.
Info.Weights[Idx] =
Info.Weights[Idx] > SubWeight
? std::max(Info.Weights[Idx] - SubWeight, SubWeight)
: SubWeight;
}

/// Initialize the weights for all exiting blocks.
static void initBranchWeights(DenseMap<Instruction *, WeightInfo> &WeightInfos,
Loop *L) {
SmallVector<BasicBlock *> ExitingBlocks;
L->getExitingBlocks(ExitingBlocks);
for (BasicBlock *ExitingBlock : ExitingBlocks) {
Instruction *Term = ExitingBlock->getTerminator();
SmallVector<uint32_t> Weights;
if (!extractBranchWeights(*Term, Weights))
continue;

// See the comment on updateBranchWeights() for an explanation of what we
// do here.
uint32_t FallThroughWeights = 0;
uint32_t ExitWeights = 0;
for (auto [Succ, Weight] : zip(successors(Term), Weights)) {
if (L->contains(Succ))
FallThroughWeights += Weight;
else
ExitWeights += Weight;
}

// Don't try to update weights for degenerate case.
if (FallThroughWeights == 0)
continue;

SmallVector<uint32_t> SubWeights;
for (auto [Succ, Weight] : zip(successors(Term), Weights)) {
if (!L->contains(Succ)) {
// Exit weights stay the same.
SubWeights.push_back(0);
continue;
}

// Subtract exit weights on each iteration, distributed across all
// fallthrough edges.
double W = (double)Weight / (double)FallThroughWeights;
SubWeights.push_back((uint32_t)(ExitWeights * W));
}

WeightInfos.insert({Term, {std::move(Weights), std::move(SubWeights)}});
}
}

/// Clones the body of the loop L, putting it between \p InsertTop and \p
/// InsertBot.
/// \param IterNumber The serial number of the iteration currently being
Expand Down Expand Up @@ -1191,11 +1113,6 @@ bool llvm::peelLoop(Loop *L, unsigned PeelCount, bool PeelLast, LoopInfo *LI,
Instruction *LatchTerm =
cast<Instruction>(cast<BasicBlock>(Latch)->getTerminator());

// If we have branch weight information, we'll want to update it for the
// newly created branches.
DenseMap<Instruction *, WeightInfo> Weights;
initBranchWeights(Weights, L);

// Identify what noalias metadata is inside the loop: if it is inside the
// loop, the associated metadata must be cloned for each iteration.
SmallVector<MDNode *, 6> LoopLocalNoAliasDeclScopes;
Expand Down Expand Up @@ -1241,11 +1158,6 @@ bool llvm::peelLoop(Loop *L, unsigned PeelCount, bool PeelLast, LoopInfo *LI,
assert(DT.verify(DominatorTree::VerificationLevel::Fast));
#endif

for (auto &[Term, Info] : Weights) {
auto *TermCopy = cast<Instruction>(VMap[Term]);
updateBranchWeights(TermCopy, Info);
}

// Remove Loop metadata from the latch branch instruction
// because it is not the Loop's latch branch anymore.
auto *LatchTermCopy = cast<Instruction>(VMap[LatchTerm]);
Expand Down Expand Up @@ -1285,15 +1197,52 @@ bool llvm::peelLoop(Loop *L, unsigned PeelCount, bool PeelLast, LoopInfo *LI,
}
}

for (const auto &[Term, Info] : Weights) {
setBranchWeights(*Term, Info.Weights, /*IsExpected=*/false);
}

// Update Metadata for count of peeled off iterations.
unsigned AlreadyPeeled = 0;
if (auto Peeled = getOptionalIntLoopAttribute(L, PeeledCountMetaData))
AlreadyPeeled = *Peeled;
addStringMetadataToLoop(L, PeeledCountMetaData, AlreadyPeeled + PeelCount);
unsigned TotalPeeled = AlreadyPeeled + PeelCount;
addStringMetadataToLoop(L, PeeledCountMetaData, TotalPeeled);

// Update metadata for the estimated trip count. The original branch weight
// metadata is already correct for both the remaining loop and the peeled loop
// iterations, so don't adjust it.
//
// For example, consider what happens when peeling 2 iterations from a loop
// with an estimated trip count of 10 and inserting them before the remaining
// loop. Each of the peeled iterations and each iteration in the remaining
// loop still has the same probability of exiting the *entire original* loop
// as it did when in the original loop, and thus it should still have the same
// branch weights. The peeled iterations' non-zero probabilities of exiting
// already appropriately reduce the probability of reaching the remaining
// iterations just as they did in the original loop. Trying to also adjust
// the remaining loop's branch weights to reflect its new trip count of 8 will
// erroneously further reduce its block frequencies. However, in case an
// analysis later needs to determine the trip count of the remaining loop
// while examining it in isolation without considering the probability of
// actually reaching it, we store the new trip count as separate metadata.
if (auto EstimatedTripCount = getLoopEstimatedTripCount(L)) {
// FIXME: The previous updateBranchWeights implementation had this
// comment:
//
// Don't set the probability of taking the edge from latch to loop header
// to less than 1:1 ratio (meaning Weight should not be lower than
// SubWeight), as this could significantly reduce the loop's hotness,
// which would be incorrect in the case of underestimating the trip count.
//
// See e8d5db206c2f commit log for further discussion. That seems to
// suggest that we should avoid ever setting a trip count of < 2 here
// (equal chance of continuing and exiting means the loop will likely
// continue once and then exit once). Or is keeping the original branch
// weights already a sufficient improvement for whatever analysis cares
// about this case?
unsigned EstimatedTripCountNew = *EstimatedTripCount;
if (EstimatedTripCountNew < TotalPeeled) // FIXME: TotalPeeled + 2?
EstimatedTripCountNew = 0; // FIXME: = 2?
else
EstimatedTripCountNew -= TotalPeeled;
setLoopEstimatedTripCount(L, EstimatedTripCountNew);
}

if (Loop *ParentLoop = L->getParentLoop())
L = ParentLoop;
Expand Down
75 changes: 75 additions & 0 deletions llvm/test/Transforms/LoopUnroll/peel-branch-weights-freq.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@
; Test branch weight metadata, estimated trip count metadata, and block
; frequencies after loop peeling.

; RUN: opt < %s -S -passes='print<block-freq>' 2>&1 | \
; RUN: FileCheck -check-prefix=CHECK %s

; The -implicit-check-not options make sure that no additional labels or calls
; to @f show up.
; RUN: opt < %s -S -passes='loop-unroll,print<block-freq>' \
; RUN: -unroll-force-peel-count=2 2>&1 | \
; RUN: FileCheck %s -check-prefix=CHECK-UR \
; RUN: -implicit-check-not='{{^[^ ;]*:}}' \
; RUN: -implicit-check-not='call void @f'

; CHECK: block-frequency-info: test
; CHECK: do.body: float = 10.0,

; The sum should still be ~10.
;
; CHECK-UR: block-frequency-info: test
; CHECK-UR: - [[DO_BODY_PEEL:.*]]: float = 1.0,
; CHECK-UR: - [[DO_BODY_PEEL2:.*]]: float = 0.9,
; CHECK-UR: - [[DO_BODY:.*]]: float = 8.1,

declare void @f(i32)

define void @test(i32 %n) {
; CHECK-UR-LABEL: define void @test(
; CHECK-UR: [[ENTRY:.*]]:
; CHECK-UR: br label %[[DO_BODY_PEEL_BEGIN:.*]]
; CHECK-UR: [[DO_BODY_PEEL_BEGIN]]:
; CHECK-UR: br label %[[DO_BODY_PEEL:.*]]
; CHECK-UR: [[DO_BODY_PEEL]]:
; CHECK-UR: call void @f
; CHECK-UR: br i1 %{{.*}}, label %[[DO_END:.*]], label %[[DO_BODY_PEEL_NEXT:.*]], !prof ![[#PROF:]]
; CHECK-UR: [[DO_BODY_PEEL_NEXT]]:
; CHECK-UR: br label %[[DO_BODY_PEEL2:.*]]
; CHECK-UR: [[DO_BODY_PEEL2]]:
; CHECK-UR: call void @f
; CHECK-UR: br i1 %{{.*}}, label %[[DO_END]], label %[[DO_BODY_PEEL_NEXT1:.*]], !prof ![[#PROF]]
; CHECK-UR: [[DO_BODY_PEEL_NEXT1]]:
; CHECK-UR: br label %[[DO_BODY_PEEL_NEXT5:.*]]
; CHECK-UR: [[DO_BODY_PEEL_NEXT5]]:
; CHECK-UR: br label %[[ENTRY_PEEL_NEWPH:.*]]
; CHECK-UR: [[ENTRY_PEEL_NEWPH]]:
; CHECK-UR: br label %[[DO_BODY]]
; CHECK-UR: [[DO_BODY]]:
; CHECK-UR: call void @f
; CHECK-UR: br i1 %{{.*}}, label %[[DO_END_LOOPEXIT:.*]], label %[[DO_BODY]], !prof ![[#PROF]], !llvm.loop ![[#LOOP_UR_LATCH:]]
; CHECK-UR: [[DO_END_LOOPEXIT]]:
; CHECK-UR: br label %[[DO_END]]
; CHECK-UR: [[DO_END]]:
; CHECK-UR: ret void

entry:
br label %do.body

do.body:
%i = phi i32 [ 0, %entry ], [ %inc, %do.body ]
%inc = add i32 %i, 1
call void @f(i32 %i)
%c = icmp sge i32 %inc, %n
br i1 %c, label %do.end, label %do.body, !prof !0

do.end:
ret void
}

!0 = !{!"branch_weights", i32 1, i32 9}

; CHECK-UR: ![[#PROF]] = !{!"branch_weights", i32 1, i32 9}
; CHECK-UR: ![[#LOOP_UR_LATCH]] = distinct !{![[#LOOP_UR_LATCH]], ![[#LOOP_UR_PC:]], ![[#LOOP_UR_TC:]], ![[#DISABLE:]]}
; CHECK-UR: ![[#LOOP_UR_PC]] = !{!"llvm.loop.peeled.count", i32 2}
; CHECK-UR: ![[#LOOP_UR_TC]] = !{!"llvm.loop.estimated_trip_count", i32 8}
; CHECK-UR: ![[#DISABLE]] = !{!"llvm.loop.unroll.disable"}
64 changes: 33 additions & 31 deletions llvm/test/Transforms/LoopUnroll/peel-branch-weights.ll
Original file line number Diff line number Diff line change
Expand Up @@ -15,9 +15,9 @@ define void @test() {
; CHECK: loop.peel:
; CHECK-NEXT: [[X_PEEL:%.*]] = call i32 @get.x()
; CHECK-NEXT: switch i32 [[X_PEEL]], label [[LOOP_LATCH_PEEL:%.*]] [
; CHECK-NEXT: i32 0, label [[LOOP_LATCH_PEEL]]
; CHECK-NEXT: i32 1, label [[LOOP_EXIT:%.*]]
; CHECK-NEXT: i32 2, label [[LOOP_EXIT]]
; CHECK-NEXT: i32 0, label [[LOOP_LATCH_PEEL]]
; CHECK-NEXT: i32 1, label [[LOOP_EXIT:%.*]]
; CHECK-NEXT: i32 2, label [[LOOP_EXIT]]
; CHECK-NEXT: ], !prof [[PROF0:![0-9]+]]
; CHECK: loop.latch.peel:
; CHECK-NEXT: br label [[LOOP_PEEL_NEXT:%.*]]
Expand All @@ -26,10 +26,10 @@ define void @test() {
; CHECK: loop.peel2:
; CHECK-NEXT: [[X_PEEL3:%.*]] = call i32 @get.x()
; CHECK-NEXT: switch i32 [[X_PEEL3]], label [[LOOP_LATCH_PEEL4:%.*]] [
; CHECK-NEXT: i32 0, label [[LOOP_LATCH_PEEL4]]
; CHECK-NEXT: i32 1, label [[LOOP_EXIT]]
; CHECK-NEXT: i32 2, label [[LOOP_EXIT]]
; CHECK-NEXT: ], !prof [[PROF1:![0-9]+]]
; CHECK-NEXT: i32 0, label [[LOOP_LATCH_PEEL4]]
; CHECK-NEXT: i32 1, label [[LOOP_EXIT]]
; CHECK-NEXT: i32 2, label [[LOOP_EXIT]]
; CHECK-NEXT: ], !prof [[PROF0]]
; CHECK: loop.latch.peel4:
; CHECK-NEXT: br label [[LOOP_PEEL_NEXT1:%.*]]
; CHECK: loop.peel.next1:
Expand All @@ -41,31 +41,33 @@ define void @test() {
; CHECK: loop:
; CHECK-NEXT: [[X:%.*]] = call i32 @get.x()
; CHECK-NEXT: switch i32 [[X]], label [[LOOP_LATCH:%.*]] [
; CHECK-NEXT: i32 0, label [[LOOP_LATCH]]
; CHECK-NEXT: i32 1, label [[LOOP_EXIT_LOOPEXIT:%.*]]
; CHECK-NEXT: i32 2, label [[LOOP_EXIT_LOOPEXIT]]
; CHECK-NEXT: ], !prof [[PROF2:![0-9]+]]
; CHECK-NEXT: i32 0, label [[LOOP_LATCH]]
; CHECK-NEXT: i32 1, label [[LOOP_EXIT_LOOPEXIT:%.*]]
; CHECK-NEXT: i32 2, label [[LOOP_EXIT_LOOPEXIT]]
; CHECK-NEXT: ], !prof [[PROF0]]
; CHECK: loop.latch:
; CHECK-NEXT: br label [[LOOP]], !llvm.loop [[LOOP3:![0-9]+]]
; CHECK-NEXT: br label [[LOOP]], !llvm.loop [[LOOP1:![0-9]+]]
; CHECK: loop.exit.loopexit:
; CHECK-NEXT: br label [[LOOP_EXIT]]
; CHECK: loop.exit:
; CHECK-NEXT: ret void
;
; DISABLEADV-LABEL: @test(
; DISABLEADV-NEXT: entry:
; DISABLEADV-NEXT: br label [[LOOP:%.*]]
; DISABLEADV: loop:
; DISABLEADV-NEXT: [[X:%.*]] = call i32 @get.x()
; DISABLEADV-NEXT: switch i32 [[X]], label [[LOOP_LATCH:%.*]] [
; DISABLEADV-NEXT: i32 0, label [[LOOP_LATCH]]
; DISABLEADV-NEXT: i32 1, label [[LOOP_EXIT:%.*]]
; DISABLEADV-NEXT: i32 2, label [[LOOP_EXIT]]
; DISABLEADV-NEXT: ], !prof [[PROF0:![0-9]+]]
; DISABLEADV: loop.latch:
; DISABLEADV-NEXT: br label [[LOOP]]
; DISABLEADV: loop.exit:
; DISABLEADV-NEXT: ret void
;

; DISABLEADV-LABEL: @test()
; DISABLEADV-NEXT: entry:
; DISABLEADV-NEXT: br label %loop
; DISABLEADV: loop
; DISABLEADV-NEXT: %x = call i32 @get.x()
; DISABLEADV-NEXT: switch i32 %x, label %loop.latch [
; DISABLEADV-NEXT: i32 0, label %loop.latch
; DISABLEADV-NEXT: i32 1, label %loop.exit
; DISABLEADV-NEXT: i32 2, label %loop.exit
; DISABLEADV-NEXT: ], !prof !0
; DISABLEADV: loop.latch:
; DISABLEADV-NEXT: br label %loop
; DISABLEADV: loop.exit:
; DISABLEADV-NEXT: ret void

entry:
br label %loop
Expand All @@ -89,9 +91,9 @@ loop.exit:

;.
; CHECK: [[PROF0]] = !{!"branch_weights", i32 100, i32 200, i32 20, i32 10}
; CHECK: [[PROF1]] = !{!"branch_weights", i32 90, i32 180, i32 20, i32 10}
; CHECK: [[PROF2]] = !{!"branch_weights", i32 80, i32 160, i32 20, i32 10}
; CHECK: [[LOOP3]] = distinct !{!3, !4, !5}
; CHECK: [[META4:![0-9]+]] = !{!"llvm.loop.peeled.count", i32 2}
; CHECK: [[META5:![0-9]+]] = !{!"llvm.loop.unroll.disable"}
; CHECK: [[LOOP1]] = distinct !{[[LOOP1]], [[META2:![0-9]+]], [[META3:![0-9]+]]}
; CHECK: [[META2]] = !{!"llvm.loop.peeled.count", i32 2}
; CHECK: [[META3]] = !{!"llvm.loop.unroll.disable"}
;.
; DISABLEADV: [[PROF0]] = !{!"branch_weights", i32 100, i32 200, i32 20, i32 10}
;.
11 changes: 6 additions & 5 deletions llvm/test/Transforms/LoopUnroll/peel-loop-pgo-deopt.ll
Original file line number Diff line number Diff line change
Expand Up @@ -15,13 +15,13 @@
; CHECK: br i1 %{{.*}}, label %[[NEXT0:.*]], label %for.cond.for.end_crit_edge, !prof !16
; CHECK: [[NEXT0]]:
; CHECK: br i1 %c, label %{{.*}}, label %side_exit, !prof !15
; CHECK: br i1 %{{.*}}, label %[[NEXT1:.*]], label %for.cond.for.end_crit_edge, !prof !17
; CHECK: br i1 %{{.*}}, label %[[NEXT1:.*]], label %for.cond.for.end_crit_edge, !prof !16
; CHECK: [[NEXT1]]:
; CHECK: br i1 %c, label %{{.*}}, label %side_exit, !prof !15
; CHECK: br i1 %{{.*}}, label %[[NEXT2:.*]], label %for.cond.for.end_crit_edge, !prof !18
; CHECK: br i1 %{{.*}}, label %[[NEXT2:.*]], label %for.cond.for.end_crit_edge, !prof !16
; CHECK: [[NEXT2]]:
; CHECK: br i1 %c, label %{{.*}}, label %side_exit.loopexit, !prof !15
; CHECK: br i1 %{{.*}}, label %for.body, label %{{.*}}, !prof !18
; CHECK: br i1 %{{.*}}, label %for.body, label %{{.*}}, !prof !16, !llvm.loop !17

define i32 @basic(ptr %p, i32 %k, i1 %c) #0 !prof !15 {
entry:
Expand Down Expand Up @@ -84,6 +84,7 @@ attributes #1 = { nounwind optsize }
;CHECK: !15 = !{!"branch_weights", i32 1, i32 0}
; This is a weights of latch and its copies.
;CHECK: !16 = !{!"branch_weights", i32 3001, i32 1001}
;CHECK: !17 = !{!"branch_weights", i32 2000, i32 1001}
;CHECK: !18 = !{!"branch_weights", i32 1001, i32 1001}
;CHECK: !17 = distinct !{!17, !18, !19, {{.*}}}
;CHECK: !18 = !{!"llvm.loop.peeled.count", i32 4}
;CHECK: !19 = !{!"llvm.loop.estimated_trip_count", i32 0}

13 changes: 7 additions & 6 deletions llvm/test/Transforms/LoopUnroll/peel-loop-pgo.ll
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
; RUN: opt < %s -S -profile-summary-huge-working-set-size-threshold=9 -debug-only=loop-unroll -passes='require<profile-summary>,function(require<opt-remark-emit>,loop-unroll)' 2>&1 | FileCheck %s --check-prefix=NOPEEL
; REQUIRES: asserts

; Make sure we use the profile information correctly to peel-off 3 iterations
; Make sure we use the profile information correctly to peel-off 4 iterations
; from the loop, and update the branch weights for the peeled loop properly.

; CHECK: Loop Unroll: F[basic]
Expand All @@ -20,11 +20,11 @@
; CHECK-LABEL: @basic
; CHECK: br i1 %{{.*}}, label %[[NEXT0:.*]], label %for.cond.for.end_crit_edge, !prof !15
; CHECK: [[NEXT0]]:
; CHECK: br i1 %{{.*}}, label %[[NEXT1:.*]], label %for.cond.for.end_crit_edge, !prof !16
; CHECK: br i1 %{{.*}}, label %[[NEXT1:.*]], label %for.cond.for.end_crit_edge, !prof !15
; CHECK: [[NEXT1]]:
; CHECK: br i1 %{{.*}}, label %[[NEXT2:.*]], label %for.cond.for.end_crit_edge, !prof !17
; CHECK: br i1 %{{.*}}, label %[[NEXT2:.*]], label %for.cond.for.end_crit_edge, !prof !15
; CHECK: [[NEXT2]]:
; CHECK: br i1 %{{.*}}, label %for.body, label %{{.*}}, !prof !17
; CHECK: br i1 %{{.*}}, label %for.body, label %{{.*}}, !prof !15, !llvm.loop !16

define void @basic(ptr %p, i32 %k) #0 !prof !15 {
entry:
Expand Down Expand Up @@ -104,6 +104,7 @@ attributes #1 = { nounwind optsize }
!16 = !{!"branch_weights", i32 3001, i32 1001}

;CHECK: !15 = !{!"branch_weights", i32 3001, i32 1001}
;CHECK: !16 = !{!"branch_weights", i32 2000, i32 1001}
;CHECK: !17 = !{!"branch_weights", i32 1001, i32 1001}
;CHECK: !16 = distinct !{!16, !17, !18, {{.*}}}
;CHECK: !17 = !{!"llvm.loop.peeled.count", i32 4}
;CHECK: !18 = !{!"llvm.loop.estimated_trip_count", i32 0}

Loading