Skip to content

Commit a332cfc

Browse files
[MemProf] Perform cloning for each allocation separately (#87112)
Restructures the cloning slightly to perform all cloning for each allocation separately. The prior algorithm would sometimes miss cloning opportunities in cases where trimmed cold contexts partially overlapped with longer contexts for different allocations. Most of the change is isolated to the helpers that move edges to new or existing clones, which now support moving a subset of context ids.
1 parent b561fd3 commit a332cfc

File tree

2 files changed

+344
-32
lines changed

2 files changed

+344
-32
lines changed

llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp

Lines changed: 112 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -526,25 +526,30 @@ class CallsiteContextGraph {
526526
/// Create a clone of Edge's callee and move Edge to that new callee node,
527527
/// performing the necessary context id and allocation type updates.
528528
/// If callee's caller edge iterator is supplied, it is updated when removing
529-
/// the edge from that list.
529+
/// the edge from that list. If ContextIdsToMove is non-empty, only that
530+
/// subset of Edge's ids are moved to an edge to the new callee.
530531
ContextNode *
531532
moveEdgeToNewCalleeClone(const std::shared_ptr<ContextEdge> &Edge,
532-
EdgeIter *CallerEdgeI = nullptr);
533+
EdgeIter *CallerEdgeI = nullptr,
534+
DenseSet<uint32_t> ContextIdsToMove = {});
533535

534536
/// Change the callee of Edge to existing callee clone NewCallee, performing
535537
/// the necessary context id and allocation type updates.
536538
/// If callee's caller edge iterator is supplied, it is updated when removing
537-
/// the edge from that list.
539+
/// the edge from that list. If ContextIdsToMove is non-empty, only that
540+
/// subset of Edge's ids are moved to an edge to the new callee.
538541
void moveEdgeToExistingCalleeClone(const std::shared_ptr<ContextEdge> &Edge,
539542
ContextNode *NewCallee,
540543
EdgeIter *CallerEdgeI = nullptr,
541-
bool NewClone = false);
544+
bool NewClone = false,
545+
DenseSet<uint32_t> ContextIdsToMove = {});
542546

543547
/// Recursively perform cloning on the graph for the given Node and its
544548
/// callers, in order to uniquely identify the allocation behavior of an
545-
/// allocation given its context.
546-
void identifyClones(ContextNode *Node,
547-
DenseSet<const ContextNode *> &Visited);
549+
/// allocation given its context. The context ids of the allocation being
550+
/// processed are given in AllocContextIds.
551+
void identifyClones(ContextNode *Node, DenseSet<const ContextNode *> &Visited,
552+
const DenseSet<uint32_t> &AllocContextIds);
548553

549554
/// Map from each context ID to the AllocationType assigned to that context.
550555
std::map<uint32_t, AllocationType> ContextIdToAllocationType;
@@ -2358,39 +2363,99 @@ void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::exportToDot(
23582363
template <typename DerivedCCG, typename FuncTy, typename CallTy>
23592364
typename CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode *
23602365
CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::moveEdgeToNewCalleeClone(
2361-
const std::shared_ptr<ContextEdge> &Edge, EdgeIter *CallerEdgeI) {
2366+
const std::shared_ptr<ContextEdge> &Edge, EdgeIter *CallerEdgeI,
2367+
DenseSet<uint32_t> ContextIdsToMove) {
23622368
ContextNode *Node = Edge->Callee;
23632369
NodeOwner.push_back(
23642370
std::make_unique<ContextNode>(Node->IsAllocation, Node->Call));
23652371
ContextNode *Clone = NodeOwner.back().get();
23662372
Node->addClone(Clone);
23672373
assert(NodeToCallingFunc.count(Node));
23682374
NodeToCallingFunc[Clone] = NodeToCallingFunc[Node];
2369-
moveEdgeToExistingCalleeClone(Edge, Clone, CallerEdgeI, /*NewClone=*/true);
2375+
moveEdgeToExistingCalleeClone(Edge, Clone, CallerEdgeI, /*NewClone=*/true,
2376+
ContextIdsToMove);
23702377
return Clone;
23712378
}
23722379

23732380
template <typename DerivedCCG, typename FuncTy, typename CallTy>
23742381
void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::
23752382
moveEdgeToExistingCalleeClone(const std::shared_ptr<ContextEdge> &Edge,
23762383
ContextNode *NewCallee, EdgeIter *CallerEdgeI,
2377-
bool NewClone) {
2384+
bool NewClone,
2385+
DenseSet<uint32_t> ContextIdsToMove) {
23782386
// NewCallee and Edge's current callee must be clones of the same original
23792387
// node (Edge's current callee may be the original node too).
23802388
assert(NewCallee->getOrigNode() == Edge->Callee->getOrigNode());
2381-
auto &EdgeContextIds = Edge->getContextIds();
2389+
23822390
ContextNode *OldCallee = Edge->Callee;
2383-
if (CallerEdgeI)
2384-
*CallerEdgeI = OldCallee->CallerEdges.erase(*CallerEdgeI);
2385-
else
2386-
OldCallee->eraseCallerEdge(Edge.get());
2387-
Edge->Callee = NewCallee;
2388-
NewCallee->CallerEdges.push_back(Edge);
2389-
// Don't need to update Edge's context ids since we are simply reconnecting
2390-
// it.
2391-
set_subtract(OldCallee->ContextIds, EdgeContextIds);
2392-
NewCallee->ContextIds.insert(EdgeContextIds.begin(), EdgeContextIds.end());
2393-
NewCallee->AllocTypes |= Edge->AllocTypes;
2391+
2392+
// We might already have an edge to the new callee from earlier cloning for a
2393+
// different allocation. If one exists we will reuse it.
2394+
auto ExistingEdgeToNewCallee = NewCallee->findEdgeFromCaller(Edge->Caller);
2395+
2396+
// Callers will pass an empty ContextIdsToMove set when they want to move the
2397+
// edge. Copy in Edge's ids for simplicity.
2398+
if (ContextIdsToMove.empty())
2399+
ContextIdsToMove = Edge->getContextIds();
2400+
2401+
// If we are moving all of Edge's ids, then just move the whole Edge.
2402+
// Otherwise only move the specified subset, to a new edge if needed.
2403+
if (Edge->getContextIds().size() == ContextIdsToMove.size()) {
2404+
// Moving the whole Edge.
2405+
if (CallerEdgeI)
2406+
*CallerEdgeI = OldCallee->CallerEdges.erase(*CallerEdgeI);
2407+
else
2408+
OldCallee->eraseCallerEdge(Edge.get());
2409+
if (ExistingEdgeToNewCallee) {
2410+
// Since we already have an edge to NewCallee, simply move the ids
2411+
// onto it, and remove the existing Edge.
2412+
ExistingEdgeToNewCallee->getContextIds().insert(ContextIdsToMove.begin(),
2413+
ContextIdsToMove.end());
2414+
ExistingEdgeToNewCallee->AllocTypes |= Edge->AllocTypes;
2415+
assert(Edge->ContextIds == ContextIdsToMove);
2416+
Edge->ContextIds.clear();
2417+
Edge->AllocTypes = (uint8_t)AllocationType::None;
2418+
Edge->Caller->eraseCalleeEdge(Edge.get());
2419+
} else {
2420+
// Otherwise just reconnect Edge to NewCallee.
2421+
Edge->Callee = NewCallee;
2422+
NewCallee->CallerEdges.push_back(Edge);
2423+
// Don't need to update Edge's context ids since we are simply
2424+
// reconnecting it.
2425+
}
2426+
// In either case, need to update the alloc types on New Callee.
2427+
NewCallee->AllocTypes |= Edge->AllocTypes;
2428+
} else {
2429+
// Only moving a subset of Edge's ids.
2430+
if (CallerEdgeI)
2431+
++CallerEdgeI;
2432+
// Compute the alloc type of the subset of ids being moved.
2433+
auto CallerEdgeAllocType = computeAllocType(ContextIdsToMove);
2434+
if (ExistingEdgeToNewCallee) {
2435+
// Since we already have an edge to NewCallee, simply move the ids
2436+
// onto it.
2437+
ExistingEdgeToNewCallee->getContextIds().insert(ContextIdsToMove.begin(),
2438+
ContextIdsToMove.end());
2439+
ExistingEdgeToNewCallee->AllocTypes |= CallerEdgeAllocType;
2440+
} else {
2441+
// Otherwise, create a new edge to NewCallee for the ids being moved.
2442+
auto NewEdge = std::make_shared<ContextEdge>(
2443+
NewCallee, Edge->Caller, CallerEdgeAllocType, ContextIdsToMove);
2444+
Edge->Caller->CalleeEdges.push_back(NewEdge);
2445+
NewCallee->CallerEdges.push_back(NewEdge);
2446+
}
2447+
// In either case, need to update the alloc types on NewCallee, and remove
2448+
// those ids and update the alloc type on the original Edge.
2449+
NewCallee->AllocTypes |= CallerEdgeAllocType;
2450+
set_subtract(Edge->ContextIds, ContextIdsToMove);
2451+
Edge->AllocTypes = computeAllocType(Edge->ContextIds);
2452+
}
2453+
// Now perform some updates that are common to all cases: the NewCallee gets
2454+
// the moved ids added, and we need to remove those ids from OldCallee and
2455+
// update its alloc type (NewCallee alloc type updates handled above).
2456+
NewCallee->ContextIds.insert(ContextIdsToMove.begin(),
2457+
ContextIdsToMove.end());
2458+
set_subtract(OldCallee->ContextIds, ContextIdsToMove);
23942459
OldCallee->AllocTypes = computeAllocType(OldCallee->ContextIds);
23952460
// OldCallee alloc type should be None iff its context id set is now empty.
23962461
assert((OldCallee->AllocTypes == (uint8_t)AllocationType::None) ==
@@ -2402,7 +2467,7 @@ void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::
24022467
// The context ids moving to the new callee are the subset of this edge's
24032468
// context ids and the context ids on the caller edge being moved.
24042469
DenseSet<uint32_t> EdgeContextIdsToMove =
2405-
set_intersection(OldCalleeEdge->getContextIds(), EdgeContextIds);
2470+
set_intersection(OldCalleeEdge->getContextIds(), ContextIdsToMove);
24062471
set_subtract(OldCalleeEdge->getContextIds(), EdgeContextIdsToMove);
24072472
OldCalleeEdge->AllocTypes =
24082473
computeAllocType(OldCalleeEdge->getContextIds());
@@ -2468,8 +2533,10 @@ void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::
24682533
template <typename DerivedCCG, typename FuncTy, typename CallTy>
24692534
void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::identifyClones() {
24702535
DenseSet<const ContextNode *> Visited;
2471-
for (auto &Entry : AllocationCallToContextNodeMap)
2472-
identifyClones(Entry.second, Visited);
2536+
for (auto &Entry : AllocationCallToContextNodeMap) {
2537+
Visited.clear();
2538+
identifyClones(Entry.second, Visited, Entry.second->ContextIds);
2539+
}
24732540
Visited.clear();
24742541
for (auto &Entry : AllocationCallToContextNodeMap)
24752542
recursivelyRemoveNoneTypeCalleeEdges(Entry.second, Visited);
@@ -2487,7 +2554,8 @@ bool checkColdOrNotCold(uint8_t AllocType) {
24872554

24882555
template <typename DerivedCCG, typename FuncTy, typename CallTy>
24892556
void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::identifyClones(
2490-
ContextNode *Node, DenseSet<const ContextNode *> &Visited) {
2557+
ContextNode *Node, DenseSet<const ContextNode *> &Visited,
2558+
const DenseSet<uint32_t> &AllocContextIds) {
24912559
if (VerifyNodes)
24922560
checkNode<DerivedCCG, FuncTy, CallTy>(Node, /*CheckEdges=*/false);
24932561
assert(!Node->CloneOf);
@@ -2521,7 +2589,7 @@ void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::identifyClones(
25212589
}
25222590
// Ignore any caller we previously visited via another edge.
25232591
if (!Visited.count(Edge->Caller) && !Edge->Caller->CloneOf) {
2524-
identifyClones(Edge->Caller, Visited);
2592+
identifyClones(Edge->Caller, Visited, AllocContextIds);
25252593
}
25262594
}
25272595
}
@@ -2584,13 +2652,23 @@ void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::identifyClones(
25842652
if (hasSingleAllocType(Node->AllocTypes) || Node->CallerEdges.size() <= 1)
25852653
break;
25862654

2655+
// Only need to process the ids along this edge pertaining to the given
2656+
// allocation.
2657+
auto CallerEdgeContextsForAlloc =
2658+
set_intersection(CallerEdge->getContextIds(), AllocContextIds);
2659+
if (CallerEdgeContextsForAlloc.empty()) {
2660+
++EI;
2661+
continue;
2662+
}
2663+
auto CallerAllocTypeForAlloc = computeAllocType(CallerEdgeContextsForAlloc);
2664+
25872665
// Compute the node callee edge alloc types corresponding to the context ids
25882666
// for this caller edge.
25892667
std::vector<uint8_t> CalleeEdgeAllocTypesForCallerEdge;
25902668
CalleeEdgeAllocTypesForCallerEdge.reserve(Node->CalleeEdges.size());
25912669
for (auto &CalleeEdge : Node->CalleeEdges)
25922670
CalleeEdgeAllocTypesForCallerEdge.push_back(intersectAllocTypes(
2593-
CalleeEdge->getContextIds(), CallerEdge->getContextIds()));
2671+
CalleeEdge->getContextIds(), CallerEdgeContextsForAlloc));
25942672

25952673
// Don't clone if doing so will not disambiguate any alloc types amongst
25962674
// caller edges (including the callee edges that would be cloned).
@@ -2605,7 +2683,7 @@ void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::identifyClones(
26052683
// disambiguated by splitting out different context ids.
26062684
assert(CallerEdge->AllocTypes != (uint8_t)AllocationType::None);
26072685
assert(Node->AllocTypes != (uint8_t)AllocationType::None);
2608-
if (allocTypeToUse(CallerEdge->AllocTypes) ==
2686+
if (allocTypeToUse(CallerAllocTypeForAlloc) ==
26092687
allocTypeToUse(Node->AllocTypes) &&
26102688
allocTypesMatch<DerivedCCG, FuncTy, CallTy>(
26112689
CalleeEdgeAllocTypesForCallerEdge, Node->CalleeEdges)) {
@@ -2618,7 +2696,7 @@ void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::identifyClones(
26182696
ContextNode *Clone = nullptr;
26192697
for (auto *CurClone : Node->Clones) {
26202698
if (allocTypeToUse(CurClone->AllocTypes) !=
2621-
allocTypeToUse(CallerEdge->AllocTypes))
2699+
allocTypeToUse(CallerAllocTypeForAlloc))
26222700
continue;
26232701

26242702
if (!allocTypesMatch<DerivedCCG, FuncTy, CallTy>(
@@ -2630,9 +2708,11 @@ void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::identifyClones(
26302708

26312709
// The edge iterator is adjusted when we move the CallerEdge to the clone.
26322710
if (Clone)
2633-
moveEdgeToExistingCalleeClone(CallerEdge, Clone, &EI);
2711+
moveEdgeToExistingCalleeClone(CallerEdge, Clone, &EI, /*NewClone=*/false,
2712+
CallerEdgeContextsForAlloc);
26342713
else
2635-
Clone = moveEdgeToNewCalleeClone(CallerEdge, &EI);
2714+
Clone =
2715+
moveEdgeToNewCalleeClone(CallerEdge, &EI, CallerEdgeContextsForAlloc);
26362716

26372717
assert(EI == Node->CallerEdges.end() ||
26382718
Node->AllocTypes != (uint8_t)AllocationType::None);

0 commit comments

Comments
 (0)