[AMDGPU] Test codegen'ing True16 additions.
[llvm-project.git] / llvm / lib / Transforms / IPO / MemProfContextDisambiguation.cpp
blobf835fb26fcb8f532b2762eb62dc4b1607c3bf54d
1 //==-- MemProfContextDisambiguation.cpp - Disambiguate contexts -------------=//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements support for context disambiguation of allocation
10 // calls for profile guided heap optimization. Specifically, it uses Memprof
11 // profiles which indicate context specific allocation behavior (currently
12 // distinguishing cold vs hot memory allocations). Cloning is performed to
13 // expose the cold allocation call contexts, and the allocation calls are
14 // subsequently annotated with an attribute for later transformation.
16 // The transformations can be performed either directly on IR (regular LTO), or
17 // on a ThinLTO index (and later applied to the IR during the ThinLTO backend).
18 // Both types of LTO operate on a the same base graph representation, which
19 // uses CRTP to support either IR or Index formats.
21 //===----------------------------------------------------------------------===//
23 #include "llvm/Transforms/IPO/MemProfContextDisambiguation.h"
24 #include "llvm/ADT/DenseMap.h"
25 #include "llvm/ADT/DenseSet.h"
26 #include "llvm/ADT/MapVector.h"
27 #include "llvm/ADT/SetOperations.h"
28 #include "llvm/ADT/SmallPtrSet.h"
29 #include "llvm/ADT/SmallSet.h"
30 #include "llvm/ADT/SmallVector.h"
31 #include "llvm/ADT/Statistic.h"
32 #include "llvm/Analysis/MemoryProfileInfo.h"
33 #include "llvm/Analysis/ModuleSummaryAnalysis.h"
34 #include "llvm/Analysis/OptimizationRemarkEmitter.h"
35 #include "llvm/Bitcode/BitcodeReader.h"
36 #include "llvm/IR/Constants.h"
37 #include "llvm/IR/Instructions.h"
38 #include "llvm/IR/Module.h"
39 #include "llvm/IR/ModuleSummaryIndex.h"
40 #include "llvm/Pass.h"
41 #include "llvm/Support/CommandLine.h"
42 #include "llvm/Support/FileSystem.h"
43 #include "llvm/Support/GraphWriter.h"
44 #include "llvm/Support/raw_ostream.h"
45 #include "llvm/Transforms/IPO.h"
46 #include "llvm/Transforms/Utils/Cloning.h"
47 #include <sstream>
48 #include <vector>
49 using namespace llvm;
50 using namespace llvm::memprof;
52 #define DEBUG_TYPE "memprof-context-disambiguation"
54 STATISTIC(FunctionClonesAnalysis,
55 "Number of function clones created during whole program analysis");
56 STATISTIC(FunctionClonesThinBackend,
57 "Number of function clones created during ThinLTO backend");
58 STATISTIC(FunctionsClonedThinBackend,
59 "Number of functions that had clones created during ThinLTO backend");
60 STATISTIC(AllocTypeNotCold, "Number of not cold static allocations (possibly "
61 "cloned) during whole program analysis");
62 STATISTIC(AllocTypeCold, "Number of cold static allocations (possibly cloned) "
63 "during whole program analysis");
64 STATISTIC(AllocTypeNotColdThinBackend,
65 "Number of not cold static allocations (possibly cloned) during "
66 "ThinLTO backend");
67 STATISTIC(AllocTypeColdThinBackend, "Number of cold static allocations "
68 "(possibly cloned) during ThinLTO backend");
69 STATISTIC(OrigAllocsThinBackend,
70 "Number of original (not cloned) allocations with memprof profiles "
71 "during ThinLTO backend");
72 STATISTIC(
73 AllocVersionsThinBackend,
74 "Number of allocation versions (including clones) during ThinLTO backend");
75 STATISTIC(MaxAllocVersionsThinBackend,
76 "Maximum number of allocation versions created for an original "
77 "allocation during ThinLTO backend");
78 STATISTIC(UnclonableAllocsThinBackend,
79 "Number of unclonable ambigous allocations during ThinLTO backend");
81 static cl::opt<std::string> DotFilePathPrefix(
82 "memprof-dot-file-path-prefix", cl::init(""), cl::Hidden,
83 cl::value_desc("filename"),
84 cl::desc("Specify the path prefix of the MemProf dot files."));
86 static cl::opt<bool> ExportToDot("memprof-export-to-dot", cl::init(false),
87 cl::Hidden,
88 cl::desc("Export graph to dot files."));
90 static cl::opt<bool>
91 DumpCCG("memprof-dump-ccg", cl::init(false), cl::Hidden,
92 cl::desc("Dump CallingContextGraph to stdout after each stage."));
94 static cl::opt<bool>
95 VerifyCCG("memprof-verify-ccg", cl::init(false), cl::Hidden,
96 cl::desc("Perform verification checks on CallingContextGraph."));
98 static cl::opt<bool>
99 VerifyNodes("memprof-verify-nodes", cl::init(false), cl::Hidden,
100 cl::desc("Perform frequent verification checks on nodes."));
102 static cl::opt<std::string> MemProfImportSummary(
103 "memprof-import-summary",
104 cl::desc("Import summary to use for testing the ThinLTO backend via opt"),
105 cl::Hidden);
107 // Indicate we are linking with an allocator that supports hot/cold operator
108 // new interfaces.
109 cl::opt<bool> SupportsHotColdNew(
110 "supports-hot-cold-new", cl::init(false), cl::Hidden,
111 cl::desc("Linking with hot/cold operator new interfaces"));
113 namespace {
114 /// CRTP base for graphs built from either IR or ThinLTO summary index.
116 /// The graph represents the call contexts in all memprof metadata on allocation
117 /// calls, with nodes for the allocations themselves, as well as for the calls
118 /// in each context. The graph is initially built from the allocation memprof
119 /// metadata (or summary) MIBs. It is then updated to match calls with callsite
120 /// metadata onto the nodes, updating it to reflect any inlining performed on
121 /// those calls.
123 /// Each MIB (representing an allocation's call context with allocation
124 /// behavior) is assigned a unique context id during the graph build. The edges
125 /// and nodes in the graph are decorated with the context ids they carry. This
126 /// is used to correctly update the graph when cloning is performed so that we
127 /// can uniquify the context for a single (possibly cloned) allocation.
128 template <typename DerivedCCG, typename FuncTy, typename CallTy>
129 class CallsiteContextGraph {
130 public:
131 CallsiteContextGraph() = default;
132 CallsiteContextGraph(const CallsiteContextGraph &) = default;
133 CallsiteContextGraph(CallsiteContextGraph &&) = default;
135 /// Main entry point to perform analysis and transformations on graph.
136 bool process();
138 /// Perform cloning on the graph necessary to uniquely identify the allocation
139 /// behavior of an allocation based on its context.
140 void identifyClones();
142 /// Assign callsite clones to functions, cloning functions as needed to
143 /// accommodate the combinations of their callsite clones reached by callers.
144 /// For regular LTO this clones functions and callsites in the IR, but for
145 /// ThinLTO the cloning decisions are noted in the summaries and later applied
146 /// in applyImport.
147 bool assignFunctions();
149 void dump() const;
150 void print(raw_ostream &OS) const;
152 friend raw_ostream &operator<<(raw_ostream &OS,
153 const CallsiteContextGraph &CCG) {
154 CCG.print(OS);
155 return OS;
158 friend struct GraphTraits<
159 const CallsiteContextGraph<DerivedCCG, FuncTy, CallTy> *>;
160 friend struct DOTGraphTraits<
161 const CallsiteContextGraph<DerivedCCG, FuncTy, CallTy> *>;
163 void exportToDot(std::string Label) const;
165 /// Represents a function clone via FuncTy pointer and clone number pair.
166 struct FuncInfo final
167 : public std::pair<FuncTy *, unsigned /*Clone number*/> {
168 using Base = std::pair<FuncTy *, unsigned>;
169 FuncInfo(const Base &B) : Base(B) {}
170 FuncInfo(FuncTy *F = nullptr, unsigned CloneNo = 0) : Base(F, CloneNo) {}
171 explicit operator bool() const { return this->first != nullptr; }
172 FuncTy *func() const { return this->first; }
173 unsigned cloneNo() const { return this->second; }
176 /// Represents a callsite clone via CallTy and clone number pair.
177 struct CallInfo final : public std::pair<CallTy, unsigned /*Clone number*/> {
178 using Base = std::pair<CallTy, unsigned>;
179 CallInfo(const Base &B) : Base(B) {}
180 CallInfo(CallTy Call = nullptr, unsigned CloneNo = 0)
181 : Base(Call, CloneNo) {}
182 explicit operator bool() const { return (bool)this->first; }
183 CallTy call() const { return this->first; }
184 unsigned cloneNo() const { return this->second; }
185 void setCloneNo(unsigned N) { this->second = N; }
186 void print(raw_ostream &OS) const {
187 if (!operator bool()) {
188 assert(!cloneNo());
189 OS << "null Call";
190 return;
192 call()->print(OS);
193 OS << "\t(clone " << cloneNo() << ")";
195 void dump() const {
196 print(dbgs());
197 dbgs() << "\n";
199 friend raw_ostream &operator<<(raw_ostream &OS, const CallInfo &Call) {
200 Call.print(OS);
201 return OS;
205 struct ContextEdge;
207 /// Node in the Callsite Context Graph
208 struct ContextNode {
209 // Keep this for now since in the IR case where we have an Instruction* it
210 // is not as immediately discoverable. Used for printing richer information
211 // when dumping graph.
212 bool IsAllocation;
214 // Keeps track of when the Call was reset to null because there was
215 // recursion.
216 bool Recursive = false;
218 // The corresponding allocation or interior call.
219 CallInfo Call;
221 // For alloc nodes this is a unique id assigned when constructed, and for
222 // callsite stack nodes it is the original stack id when the node is
223 // constructed from the memprof MIB metadata on the alloc nodes. Note that
224 // this is only used when matching callsite metadata onto the stack nodes
225 // created when processing the allocation memprof MIBs, and for labeling
226 // nodes in the dot graph. Therefore we don't bother to assign a value for
227 // clones.
228 uint64_t OrigStackOrAllocId = 0;
230 // This will be formed by ORing together the AllocationType enum values
231 // for contexts including this node.
232 uint8_t AllocTypes = 0;
234 // Edges to all callees in the profiled call stacks.
235 // TODO: Should this be a map (from Callee node) for more efficient lookup?
236 std::vector<std::shared_ptr<ContextEdge>> CalleeEdges;
238 // Edges to all callers in the profiled call stacks.
239 // TODO: Should this be a map (from Caller node) for more efficient lookup?
240 std::vector<std::shared_ptr<ContextEdge>> CallerEdges;
242 // The set of IDs for contexts including this node.
243 DenseSet<uint32_t> ContextIds;
245 // List of clones of this ContextNode, initially empty.
246 std::vector<ContextNode *> Clones;
248 // If a clone, points to the original uncloned node.
249 ContextNode *CloneOf = nullptr;
251 ContextNode(bool IsAllocation) : IsAllocation(IsAllocation), Call() {}
253 ContextNode(bool IsAllocation, CallInfo C)
254 : IsAllocation(IsAllocation), Call(C) {}
256 void addClone(ContextNode *Clone) {
257 if (CloneOf) {
258 CloneOf->Clones.push_back(Clone);
259 Clone->CloneOf = CloneOf;
260 } else {
261 Clones.push_back(Clone);
262 assert(!Clone->CloneOf);
263 Clone->CloneOf = this;
267 ContextNode *getOrigNode() {
268 if (!CloneOf)
269 return this;
270 return CloneOf;
273 void addOrUpdateCallerEdge(ContextNode *Caller, AllocationType AllocType,
274 unsigned int ContextId);
276 ContextEdge *findEdgeFromCallee(const ContextNode *Callee);
277 ContextEdge *findEdgeFromCaller(const ContextNode *Caller);
278 void eraseCalleeEdge(const ContextEdge *Edge);
279 void eraseCallerEdge(const ContextEdge *Edge);
281 void setCall(CallInfo C) { Call = C; }
283 bool hasCall() const { return (bool)Call.call(); }
285 void printCall(raw_ostream &OS) const { Call.print(OS); }
287 // True if this node was effectively removed from the graph, in which case
288 // its context id set, caller edges, and callee edges should all be empty.
289 bool isRemoved() const {
290 assert(ContextIds.empty() ==
291 (CalleeEdges.empty() && CallerEdges.empty()));
292 return ContextIds.empty();
295 void dump() const;
296 void print(raw_ostream &OS) const;
298 friend raw_ostream &operator<<(raw_ostream &OS, const ContextNode &Node) {
299 Node.print(OS);
300 return OS;
304 /// Edge in the Callsite Context Graph from a ContextNode N to a caller or
305 /// callee.
306 struct ContextEdge {
307 ContextNode *Callee;
308 ContextNode *Caller;
310 // This will be formed by ORing together the AllocationType enum values
311 // for contexts including this edge.
312 uint8_t AllocTypes = 0;
314 // The set of IDs for contexts including this edge.
315 DenseSet<uint32_t> ContextIds;
317 ContextEdge(ContextNode *Callee, ContextNode *Caller, uint8_t AllocType,
318 DenseSet<uint32_t> ContextIds)
319 : Callee(Callee), Caller(Caller), AllocTypes(AllocType),
320 ContextIds(ContextIds) {}
322 DenseSet<uint32_t> &getContextIds() { return ContextIds; }
324 void dump() const;
325 void print(raw_ostream &OS) const;
327 friend raw_ostream &operator<<(raw_ostream &OS, const ContextEdge &Edge) {
328 Edge.print(OS);
329 return OS;
333 /// Helper to remove callee edges that have allocation type None (due to not
334 /// carrying any context ids) after transformations.
335 void removeNoneTypeCalleeEdges(ContextNode *Node);
337 protected:
338 /// Get a list of nodes corresponding to the stack ids in the given callsite
339 /// context.
340 template <class NodeT, class IteratorT>
341 std::vector<uint64_t>
342 getStackIdsWithContextNodes(CallStack<NodeT, IteratorT> &CallsiteContext);
344 /// Adds nodes for the given allocation and any stack ids on its memprof MIB
345 /// metadata (or summary).
346 ContextNode *addAllocNode(CallInfo Call, const FuncTy *F);
348 /// Adds nodes for the given MIB stack ids.
349 template <class NodeT, class IteratorT>
350 void addStackNodesForMIB(ContextNode *AllocNode,
351 CallStack<NodeT, IteratorT> &StackContext,
352 CallStack<NodeT, IteratorT> &CallsiteContext,
353 AllocationType AllocType);
355 /// Matches all callsite metadata (or summary) to the nodes created for
356 /// allocation memprof MIB metadata, synthesizing new nodes to reflect any
357 /// inlining performed on those callsite instructions.
358 void updateStackNodes();
360 /// Update graph to conservatively handle any callsite stack nodes that target
361 /// multiple different callee target functions.
362 void handleCallsitesWithMultipleTargets();
364 /// Save lists of calls with MemProf metadata in each function, for faster
365 /// iteration.
366 std::vector<std::pair<FuncTy *, std::vector<CallInfo>>>
367 FuncToCallsWithMetadata;
369 /// Map from callsite node to the enclosing caller function.
370 std::map<const ContextNode *, const FuncTy *> NodeToCallingFunc;
372 private:
373 using EdgeIter = typename std::vector<std::shared_ptr<ContextEdge>>::iterator;
375 using CallContextInfo = std::tuple<CallTy, std::vector<uint64_t>,
376 const FuncTy *, DenseSet<uint32_t>>;
378 /// Assigns the given Node to calls at or inlined into the location with
379 /// the Node's stack id, after post order traversing and processing its
380 /// caller nodes. Uses the call information recorded in the given
381 /// StackIdToMatchingCalls map, and creates new nodes for inlined sequences
382 /// as needed. Called by updateStackNodes which sets up the given
383 /// StackIdToMatchingCalls map.
384 void assignStackNodesPostOrder(
385 ContextNode *Node, DenseSet<const ContextNode *> &Visited,
386 DenseMap<uint64_t, std::vector<CallContextInfo>> &StackIdToMatchingCalls);
388 /// Duplicates the given set of context ids, updating the provided
389 /// map from each original id with the newly generated context ids,
390 /// and returning the new duplicated id set.
391 DenseSet<uint32_t> duplicateContextIds(
392 const DenseSet<uint32_t> &StackSequenceContextIds,
393 DenseMap<uint32_t, DenseSet<uint32_t>> &OldToNewContextIds);
395 /// Propagates all duplicated context ids across the graph.
396 void propagateDuplicateContextIds(
397 const DenseMap<uint32_t, DenseSet<uint32_t>> &OldToNewContextIds);
399 /// Connect the NewNode to OrigNode's callees if TowardsCallee is true,
400 /// else to its callers. Also updates OrigNode's edges to remove any context
401 /// ids moved to the newly created edge.
402 void connectNewNode(ContextNode *NewNode, ContextNode *OrigNode,
403 bool TowardsCallee);
405 /// Get the stack id corresponding to the given Id or Index (for IR this will
406 /// return itself, for a summary index this will return the id recorded in the
407 /// index for that stack id index value).
408 uint64_t getStackId(uint64_t IdOrIndex) const {
409 return static_cast<const DerivedCCG *>(this)->getStackId(IdOrIndex);
412 /// Returns true if the given call targets the given function.
413 bool calleeMatchesFunc(CallTy Call, const FuncTy *Func) {
414 return static_cast<DerivedCCG *>(this)->calleeMatchesFunc(Call, Func);
417 /// Get a list of nodes corresponding to the stack ids in the given
418 /// callsite's context.
419 std::vector<uint64_t> getStackIdsWithContextNodesForCall(CallTy Call) {
420 return static_cast<DerivedCCG *>(this)->getStackIdsWithContextNodesForCall(
421 Call);
424 /// Get the last stack id in the context for callsite.
425 uint64_t getLastStackId(CallTy Call) {
426 return static_cast<DerivedCCG *>(this)->getLastStackId(Call);
429 /// Update the allocation call to record type of allocated memory.
430 void updateAllocationCall(CallInfo &Call, AllocationType AllocType) {
431 AllocType == AllocationType::Cold ? AllocTypeCold++ : AllocTypeNotCold++;
432 static_cast<DerivedCCG *>(this)->updateAllocationCall(Call, AllocType);
435 /// Update non-allocation call to invoke (possibly cloned) function
436 /// CalleeFunc.
437 void updateCall(CallInfo &CallerCall, FuncInfo CalleeFunc) {
438 static_cast<DerivedCCG *>(this)->updateCall(CallerCall, CalleeFunc);
441 /// Clone the given function for the given callsite, recording mapping of all
442 /// of the functions tracked calls to their new versions in the CallMap.
443 /// Assigns new clones to clone number CloneNo.
444 FuncInfo cloneFunctionForCallsite(
445 FuncInfo &Func, CallInfo &Call, std::map<CallInfo, CallInfo> &CallMap,
446 std::vector<CallInfo> &CallsWithMetadataInFunc, unsigned CloneNo) {
447 return static_cast<DerivedCCG *>(this)->cloneFunctionForCallsite(
448 Func, Call, CallMap, CallsWithMetadataInFunc, CloneNo);
451 /// Gets a label to use in the dot graph for the given call clone in the given
452 /// function.
453 std::string getLabel(const FuncTy *Func, const CallTy Call,
454 unsigned CloneNo) const {
455 return static_cast<const DerivedCCG *>(this)->getLabel(Func, Call, CloneNo);
458 /// Helpers to find the node corresponding to the given call or stackid.
459 ContextNode *getNodeForInst(const CallInfo &C);
460 ContextNode *getNodeForAlloc(const CallInfo &C);
461 ContextNode *getNodeForStackId(uint64_t StackId);
463 /// Removes the node information recorded for the given call.
464 void unsetNodeForInst(const CallInfo &C);
466 /// Computes the alloc type corresponding to the given context ids, by
467 /// unioning their recorded alloc types.
468 uint8_t computeAllocType(DenseSet<uint32_t> &ContextIds);
470 /// Returns the alloction type of the intersection of the contexts of two
471 /// nodes (based on their provided context id sets), optimized for the case
472 /// when Node1Ids is smaller than Node2Ids.
473 uint8_t intersectAllocTypesImpl(const DenseSet<uint32_t> &Node1Ids,
474 const DenseSet<uint32_t> &Node2Ids);
476 /// Returns the alloction type of the intersection of the contexts of two
477 /// nodes (based on their provided context id sets).
478 uint8_t intersectAllocTypes(const DenseSet<uint32_t> &Node1Ids,
479 const DenseSet<uint32_t> &Node2Ids);
481 /// Create a clone of Edge's callee and move Edge to that new callee node,
482 /// performing the necessary context id and allocation type updates.
483 /// If callee's caller edge iterator is supplied, it is updated when removing
484 /// the edge from that list.
485 ContextNode *
486 moveEdgeToNewCalleeClone(const std::shared_ptr<ContextEdge> &Edge,
487 EdgeIter *CallerEdgeI = nullptr);
489 /// Change the callee of Edge to existing callee clone NewCallee, performing
490 /// the necessary context id and allocation type updates.
491 /// If callee's caller edge iterator is supplied, it is updated when removing
492 /// the edge from that list.
493 void moveEdgeToExistingCalleeClone(const std::shared_ptr<ContextEdge> &Edge,
494 ContextNode *NewCallee,
495 EdgeIter *CallerEdgeI = nullptr,
496 bool NewClone = false);
498 /// Recursively perform cloning on the graph for the given Node and its
499 /// callers, in order to uniquely identify the allocation behavior of an
500 /// allocation given its context.
501 void identifyClones(ContextNode *Node,
502 DenseSet<const ContextNode *> &Visited);
504 /// Map from each context ID to the AllocationType assigned to that context.
505 std::map<uint32_t, AllocationType> ContextIdToAllocationType;
507 /// Identifies the context node created for a stack id when adding the MIB
508 /// contexts to the graph. This is used to locate the context nodes when
509 /// trying to assign the corresponding callsites with those stack ids to these
510 /// nodes.
511 std::map<uint64_t, ContextNode *> StackEntryIdToContextNodeMap;
513 /// Maps to track the calls to their corresponding nodes in the graph.
514 MapVector<CallInfo, ContextNode *> AllocationCallToContextNodeMap;
515 MapVector<CallInfo, ContextNode *> NonAllocationCallToContextNodeMap;
517 /// Owner of all ContextNode unique_ptrs.
518 std::vector<std::unique_ptr<ContextNode>> NodeOwner;
520 /// Perform sanity checks on graph when requested.
521 void check() const;
523 /// Keeps track of the last unique context id assigned.
524 unsigned int LastContextId = 0;
527 template <typename DerivedCCG, typename FuncTy, typename CallTy>
528 using ContextNode =
529 typename CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode;
530 template <typename DerivedCCG, typename FuncTy, typename CallTy>
531 using ContextEdge =
532 typename CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextEdge;
533 template <typename DerivedCCG, typename FuncTy, typename CallTy>
534 using FuncInfo =
535 typename CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::FuncInfo;
536 template <typename DerivedCCG, typename FuncTy, typename CallTy>
537 using CallInfo =
538 typename CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::CallInfo;
540 /// CRTP derived class for graphs built from IR (regular LTO).
541 class ModuleCallsiteContextGraph
542 : public CallsiteContextGraph<ModuleCallsiteContextGraph, Function,
543 Instruction *> {
544 public:
545 ModuleCallsiteContextGraph(
546 Module &M,
547 function_ref<OptimizationRemarkEmitter &(Function *)> OREGetter);
549 private:
550 friend CallsiteContextGraph<ModuleCallsiteContextGraph, Function,
551 Instruction *>;
553 uint64_t getStackId(uint64_t IdOrIndex) const;
554 bool calleeMatchesFunc(Instruction *Call, const Function *Func);
555 uint64_t getLastStackId(Instruction *Call);
556 std::vector<uint64_t> getStackIdsWithContextNodesForCall(Instruction *Call);
557 void updateAllocationCall(CallInfo &Call, AllocationType AllocType);
558 void updateCall(CallInfo &CallerCall, FuncInfo CalleeFunc);
559 CallsiteContextGraph<ModuleCallsiteContextGraph, Function,
560 Instruction *>::FuncInfo
561 cloneFunctionForCallsite(FuncInfo &Func, CallInfo &Call,
562 std::map<CallInfo, CallInfo> &CallMap,
563 std::vector<CallInfo> &CallsWithMetadataInFunc,
564 unsigned CloneNo);
565 std::string getLabel(const Function *Func, const Instruction *Call,
566 unsigned CloneNo) const;
568 const Module &Mod;
569 function_ref<OptimizationRemarkEmitter &(Function *)> OREGetter;
572 /// Represents a call in the summary index graph, which can either be an
573 /// allocation or an interior callsite node in an allocation's context.
574 /// Holds a pointer to the corresponding data structure in the index.
575 struct IndexCall : public PointerUnion<CallsiteInfo *, AllocInfo *> {
576 IndexCall() : PointerUnion() {}
577 IndexCall(std::nullptr_t) : IndexCall() {}
578 IndexCall(CallsiteInfo *StackNode) : PointerUnion(StackNode) {}
579 IndexCall(AllocInfo *AllocNode) : PointerUnion(AllocNode) {}
580 IndexCall(PointerUnion PT) : PointerUnion(PT) {}
582 IndexCall *operator->() { return this; }
584 PointerUnion<CallsiteInfo *, AllocInfo *> getBase() const { return *this; }
586 void print(raw_ostream &OS) const {
587 if (auto *AI = llvm::dyn_cast_if_present<AllocInfo *>(getBase())) {
588 OS << *AI;
589 } else {
590 auto *CI = llvm::dyn_cast_if_present<CallsiteInfo *>(getBase());
591 assert(CI);
592 OS << *CI;
597 /// CRTP derived class for graphs built from summary index (ThinLTO).
598 class IndexCallsiteContextGraph
599 : public CallsiteContextGraph<IndexCallsiteContextGraph, FunctionSummary,
600 IndexCall> {
601 public:
602 IndexCallsiteContextGraph(
603 ModuleSummaryIndex &Index,
604 function_ref<bool(GlobalValue::GUID, const GlobalValueSummary *)>
605 isPrevailing);
607 private:
608 friend CallsiteContextGraph<IndexCallsiteContextGraph, FunctionSummary,
609 IndexCall>;
611 uint64_t getStackId(uint64_t IdOrIndex) const;
612 bool calleeMatchesFunc(IndexCall &Call, const FunctionSummary *Func);
613 uint64_t getLastStackId(IndexCall &Call);
614 std::vector<uint64_t> getStackIdsWithContextNodesForCall(IndexCall &Call);
615 void updateAllocationCall(CallInfo &Call, AllocationType AllocType);
616 void updateCall(CallInfo &CallerCall, FuncInfo CalleeFunc);
617 CallsiteContextGraph<IndexCallsiteContextGraph, FunctionSummary,
618 IndexCall>::FuncInfo
619 cloneFunctionForCallsite(FuncInfo &Func, CallInfo &Call,
620 std::map<CallInfo, CallInfo> &CallMap,
621 std::vector<CallInfo> &CallsWithMetadataInFunc,
622 unsigned CloneNo);
623 std::string getLabel(const FunctionSummary *Func, const IndexCall &Call,
624 unsigned CloneNo) const;
626 // Saves mapping from function summaries containing memprof records back to
627 // its VI, for use in checking and debugging.
628 std::map<const FunctionSummary *, ValueInfo> FSToVIMap;
630 const ModuleSummaryIndex &Index;
632 } // namespace
634 namespace llvm {
635 template <>
636 struct DenseMapInfo<typename CallsiteContextGraph<
637 ModuleCallsiteContextGraph, Function, Instruction *>::CallInfo>
638 : public DenseMapInfo<std::pair<Instruction *, unsigned>> {};
639 template <>
640 struct DenseMapInfo<typename CallsiteContextGraph<
641 IndexCallsiteContextGraph, FunctionSummary, IndexCall>::CallInfo>
642 : public DenseMapInfo<std::pair<IndexCall, unsigned>> {};
643 template <>
644 struct DenseMapInfo<IndexCall>
645 : public DenseMapInfo<PointerUnion<CallsiteInfo *, AllocInfo *>> {};
646 } // end namespace llvm
648 namespace {
650 struct FieldSeparator {
651 bool Skip = true;
652 const char *Sep;
654 FieldSeparator(const char *Sep = ", ") : Sep(Sep) {}
657 raw_ostream &operator<<(raw_ostream &OS, FieldSeparator &FS) {
658 if (FS.Skip) {
659 FS.Skip = false;
660 return OS;
662 return OS << FS.Sep;
665 // Map the uint8_t alloc types (which may contain NotCold|Cold) to the alloc
666 // type we should actually use on the corresponding allocation.
667 // If we can't clone a node that has NotCold+Cold alloc type, we will fall
668 // back to using NotCold. So don't bother cloning to distinguish NotCold+Cold
669 // from NotCold.
670 AllocationType allocTypeToUse(uint8_t AllocTypes) {
671 assert(AllocTypes != (uint8_t)AllocationType::None);
672 if (AllocTypes ==
673 ((uint8_t)AllocationType::NotCold | (uint8_t)AllocationType::Cold))
674 return AllocationType::NotCold;
675 else
676 return (AllocationType)AllocTypes;
679 // Helper to check if the alloc types for all edges recorded in the
680 // InAllocTypes vector match the alloc types for all edges in the Edges
681 // vector.
682 template <typename DerivedCCG, typename FuncTy, typename CallTy>
683 bool allocTypesMatch(
684 const std::vector<uint8_t> &InAllocTypes,
685 const std::vector<std::shared_ptr<ContextEdge<DerivedCCG, FuncTy, CallTy>>>
686 &Edges) {
687 return std::equal(
688 InAllocTypes.begin(), InAllocTypes.end(), Edges.begin(),
689 [](const uint8_t &l,
690 const std::shared_ptr<ContextEdge<DerivedCCG, FuncTy, CallTy>> &r) {
691 // Can share if one of the edges is None type - don't
692 // care about the type along that edge as it doesn't
693 // exist for those context ids.
694 if (l == (uint8_t)AllocationType::None ||
695 r->AllocTypes == (uint8_t)AllocationType::None)
696 return true;
697 return allocTypeToUse(l) == allocTypeToUse(r->AllocTypes);
701 } // end anonymous namespace
703 template <typename DerivedCCG, typename FuncTy, typename CallTy>
704 typename CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode *
705 CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::getNodeForInst(
706 const CallInfo &C) {
707 ContextNode *Node = getNodeForAlloc(C);
708 if (Node)
709 return Node;
711 return NonAllocationCallToContextNodeMap.lookup(C);
714 template <typename DerivedCCG, typename FuncTy, typename CallTy>
715 typename CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode *
716 CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::getNodeForAlloc(
717 const CallInfo &C) {
718 return AllocationCallToContextNodeMap.lookup(C);
721 template <typename DerivedCCG, typename FuncTy, typename CallTy>
722 typename CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode *
723 CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::getNodeForStackId(
724 uint64_t StackId) {
725 auto StackEntryNode = StackEntryIdToContextNodeMap.find(StackId);
726 if (StackEntryNode != StackEntryIdToContextNodeMap.end())
727 return StackEntryNode->second;
728 return nullptr;
731 template <typename DerivedCCG, typename FuncTy, typename CallTy>
732 void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::unsetNodeForInst(
733 const CallInfo &C) {
734 AllocationCallToContextNodeMap.erase(C) ||
735 NonAllocationCallToContextNodeMap.erase(C);
736 assert(!AllocationCallToContextNodeMap.count(C) &&
737 !NonAllocationCallToContextNodeMap.count(C));
740 template <typename DerivedCCG, typename FuncTy, typename CallTy>
741 void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode::
742 addOrUpdateCallerEdge(ContextNode *Caller, AllocationType AllocType,
743 unsigned int ContextId) {
744 for (auto &Edge : CallerEdges) {
745 if (Edge->Caller == Caller) {
746 Edge->AllocTypes |= (uint8_t)AllocType;
747 Edge->getContextIds().insert(ContextId);
748 return;
751 std::shared_ptr<ContextEdge> Edge = std::make_shared<ContextEdge>(
752 this, Caller, (uint8_t)AllocType, DenseSet<uint32_t>({ContextId}));
753 CallerEdges.push_back(Edge);
754 Caller->CalleeEdges.push_back(Edge);
757 template <typename DerivedCCG, typename FuncTy, typename CallTy>
758 void CallsiteContextGraph<
759 DerivedCCG, FuncTy, CallTy>::removeNoneTypeCalleeEdges(ContextNode *Node) {
760 for (auto EI = Node->CalleeEdges.begin(); EI != Node->CalleeEdges.end();) {
761 auto Edge = *EI;
762 if (Edge->AllocTypes == (uint8_t)AllocationType::None) {
763 assert(Edge->ContextIds.empty());
764 Edge->Callee->eraseCallerEdge(Edge.get());
765 EI = Node->CalleeEdges.erase(EI);
766 } else
767 ++EI;
771 template <typename DerivedCCG, typename FuncTy, typename CallTy>
772 typename CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextEdge *
773 CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode::
774 findEdgeFromCallee(const ContextNode *Callee) {
775 for (const auto &Edge : CalleeEdges)
776 if (Edge->Callee == Callee)
777 return Edge.get();
778 return nullptr;
781 template <typename DerivedCCG, typename FuncTy, typename CallTy>
782 typename CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextEdge *
783 CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode::
784 findEdgeFromCaller(const ContextNode *Caller) {
785 for (const auto &Edge : CallerEdges)
786 if (Edge->Caller == Caller)
787 return Edge.get();
788 return nullptr;
791 template <typename DerivedCCG, typename FuncTy, typename CallTy>
792 void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode::
793 eraseCalleeEdge(const ContextEdge *Edge) {
794 auto EI =
795 std::find_if(CalleeEdges.begin(), CalleeEdges.end(),
796 [Edge](const std::shared_ptr<ContextEdge> &CalleeEdge) {
797 return CalleeEdge.get() == Edge;
799 assert(EI != CalleeEdges.end());
800 CalleeEdges.erase(EI);
803 template <typename DerivedCCG, typename FuncTy, typename CallTy>
804 void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode::
805 eraseCallerEdge(const ContextEdge *Edge) {
806 auto EI =
807 std::find_if(CallerEdges.begin(), CallerEdges.end(),
808 [Edge](const std::shared_ptr<ContextEdge> &CallerEdge) {
809 return CallerEdge.get() == Edge;
811 assert(EI != CallerEdges.end());
812 CallerEdges.erase(EI);
815 template <typename DerivedCCG, typename FuncTy, typename CallTy>
816 uint8_t CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::computeAllocType(
817 DenseSet<uint32_t> &ContextIds) {
818 uint8_t BothTypes =
819 (uint8_t)AllocationType::Cold | (uint8_t)AllocationType::NotCold;
820 uint8_t AllocType = (uint8_t)AllocationType::None;
821 for (auto Id : ContextIds) {
822 AllocType |= (uint8_t)ContextIdToAllocationType[Id];
823 // Bail early if alloc type reached both, no further refinement.
824 if (AllocType == BothTypes)
825 return AllocType;
827 return AllocType;
830 template <typename DerivedCCG, typename FuncTy, typename CallTy>
831 uint8_t
832 CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::intersectAllocTypesImpl(
833 const DenseSet<uint32_t> &Node1Ids, const DenseSet<uint32_t> &Node2Ids) {
834 uint8_t BothTypes =
835 (uint8_t)AllocationType::Cold | (uint8_t)AllocationType::NotCold;
836 uint8_t AllocType = (uint8_t)AllocationType::None;
837 for (auto Id : Node1Ids) {
838 if (!Node2Ids.count(Id))
839 continue;
840 AllocType |= (uint8_t)ContextIdToAllocationType[Id];
841 // Bail early if alloc type reached both, no further refinement.
842 if (AllocType == BothTypes)
843 return AllocType;
845 return AllocType;
848 template <typename DerivedCCG, typename FuncTy, typename CallTy>
849 uint8_t CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::intersectAllocTypes(
850 const DenseSet<uint32_t> &Node1Ids, const DenseSet<uint32_t> &Node2Ids) {
851 if (Node1Ids.size() < Node2Ids.size())
852 return intersectAllocTypesImpl(Node1Ids, Node2Ids);
853 else
854 return intersectAllocTypesImpl(Node2Ids, Node1Ids);
857 template <typename DerivedCCG, typename FuncTy, typename CallTy>
858 typename CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode *
859 CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::addAllocNode(
860 CallInfo Call, const FuncTy *F) {
861 assert(!getNodeForAlloc(Call));
862 NodeOwner.push_back(
863 std::make_unique<ContextNode>(/*IsAllocation=*/true, Call));
864 ContextNode *AllocNode = NodeOwner.back().get();
865 AllocationCallToContextNodeMap[Call] = AllocNode;
866 NodeToCallingFunc[AllocNode] = F;
867 // Use LastContextId as a uniq id for MIB allocation nodes.
868 AllocNode->OrigStackOrAllocId = LastContextId;
869 // Alloc type should be updated as we add in the MIBs. We should assert
870 // afterwards that it is not still None.
871 AllocNode->AllocTypes = (uint8_t)AllocationType::None;
873 return AllocNode;
876 template <typename DerivedCCG, typename FuncTy, typename CallTy>
877 template <class NodeT, class IteratorT>
878 void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::addStackNodesForMIB(
879 ContextNode *AllocNode, CallStack<NodeT, IteratorT> &StackContext,
880 CallStack<NodeT, IteratorT> &CallsiteContext, AllocationType AllocType) {
881 // Treating the hot alloc type as NotCold before the disambiguation for "hot"
882 // is done.
883 if (AllocType == AllocationType::Hot)
884 AllocType = AllocationType::NotCold;
886 ContextIdToAllocationType[++LastContextId] = AllocType;
888 // Update alloc type and context ids for this MIB.
889 AllocNode->AllocTypes |= (uint8_t)AllocType;
890 AllocNode->ContextIds.insert(LastContextId);
892 // Now add or update nodes for each stack id in alloc's context.
893 // Later when processing the stack ids on non-alloc callsites we will adjust
894 // for any inlining in the context.
895 ContextNode *PrevNode = AllocNode;
896 // Look for recursion (direct recursion should have been collapsed by
897 // module summary analysis, here we should just be detecting mutual
898 // recursion). Mark these nodes so we don't try to clone.
899 SmallSet<uint64_t, 8> StackIdSet;
900 // Skip any on the allocation call (inlining).
901 for (auto ContextIter = StackContext.beginAfterSharedPrefix(CallsiteContext);
902 ContextIter != StackContext.end(); ++ContextIter) {
903 auto StackId = getStackId(*ContextIter);
904 ContextNode *StackNode = getNodeForStackId(StackId);
905 if (!StackNode) {
906 NodeOwner.push_back(
907 std::make_unique<ContextNode>(/*IsAllocation=*/false));
908 StackNode = NodeOwner.back().get();
909 StackEntryIdToContextNodeMap[StackId] = StackNode;
910 StackNode->OrigStackOrAllocId = StackId;
912 auto Ins = StackIdSet.insert(StackId);
913 if (!Ins.second)
914 StackNode->Recursive = true;
915 StackNode->ContextIds.insert(LastContextId);
916 StackNode->AllocTypes |= (uint8_t)AllocType;
917 PrevNode->addOrUpdateCallerEdge(StackNode, AllocType, LastContextId);
918 PrevNode = StackNode;
922 template <typename DerivedCCG, typename FuncTy, typename CallTy>
923 DenseSet<uint32_t>
924 CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::duplicateContextIds(
925 const DenseSet<uint32_t> &StackSequenceContextIds,
926 DenseMap<uint32_t, DenseSet<uint32_t>> &OldToNewContextIds) {
927 DenseSet<uint32_t> NewContextIds;
928 for (auto OldId : StackSequenceContextIds) {
929 NewContextIds.insert(++LastContextId);
930 OldToNewContextIds[OldId].insert(LastContextId);
931 assert(ContextIdToAllocationType.count(OldId));
932 // The new context has the same allocation type as original.
933 ContextIdToAllocationType[LastContextId] = ContextIdToAllocationType[OldId];
935 return NewContextIds;
938 template <typename DerivedCCG, typename FuncTy, typename CallTy>
939 void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::
940 propagateDuplicateContextIds(
941 const DenseMap<uint32_t, DenseSet<uint32_t>> &OldToNewContextIds) {
942 // Build a set of duplicated context ids corresponding to the input id set.
943 auto GetNewIds = [&OldToNewContextIds](const DenseSet<uint32_t> &ContextIds) {
944 DenseSet<uint32_t> NewIds;
945 for (auto Id : ContextIds)
946 if (auto NewId = OldToNewContextIds.find(Id);
947 NewId != OldToNewContextIds.end())
948 NewIds.insert(NewId->second.begin(), NewId->second.end());
949 return NewIds;
952 // Recursively update context ids sets along caller edges.
953 auto UpdateCallers = [&](ContextNode *Node,
954 DenseSet<const ContextEdge *> &Visited,
955 auto &&UpdateCallers) -> void {
956 for (const auto &Edge : Node->CallerEdges) {
957 auto Inserted = Visited.insert(Edge.get());
958 if (!Inserted.second)
959 continue;
960 ContextNode *NextNode = Edge->Caller;
961 DenseSet<uint32_t> NewIdsToAdd = GetNewIds(Edge->getContextIds());
962 // Only need to recursively iterate to NextNode via this caller edge if
963 // it resulted in any added ids to NextNode.
964 if (!NewIdsToAdd.empty()) {
965 Edge->getContextIds().insert(NewIdsToAdd.begin(), NewIdsToAdd.end());
966 NextNode->ContextIds.insert(NewIdsToAdd.begin(), NewIdsToAdd.end());
967 UpdateCallers(NextNode, Visited, UpdateCallers);
972 DenseSet<const ContextEdge *> Visited;
973 for (auto &Entry : AllocationCallToContextNodeMap) {
974 auto *Node = Entry.second;
975 // Update ids on the allocation nodes before calling the recursive
976 // update along caller edges, since this simplifies the logic during
977 // that traversal.
978 DenseSet<uint32_t> NewIdsToAdd = GetNewIds(Node->ContextIds);
979 Node->ContextIds.insert(NewIdsToAdd.begin(), NewIdsToAdd.end());
980 UpdateCallers(Node, Visited, UpdateCallers);
984 template <typename DerivedCCG, typename FuncTy, typename CallTy>
985 void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::connectNewNode(
986 ContextNode *NewNode, ContextNode *OrigNode, bool TowardsCallee) {
987 // Make a copy of the context ids, since this will be adjusted below as they
988 // are moved.
989 DenseSet<uint32_t> RemainingContextIds = NewNode->ContextIds;
990 auto &OrigEdges =
991 TowardsCallee ? OrigNode->CalleeEdges : OrigNode->CallerEdges;
992 // Increment iterator in loop so that we can remove edges as needed.
993 for (auto EI = OrigEdges.begin(); EI != OrigEdges.end();) {
994 auto Edge = *EI;
995 // Remove any matching context ids from Edge, return set that were found and
996 // removed, these are the new edge's context ids. Also update the remaining
997 // (not found ids).
998 DenseSet<uint32_t> NewEdgeContextIds, NotFoundContextIds;
999 set_subtract(Edge->getContextIds(), RemainingContextIds, NewEdgeContextIds,
1000 NotFoundContextIds);
1001 RemainingContextIds.swap(NotFoundContextIds);
1002 // If no matching context ids for this edge, skip it.
1003 if (NewEdgeContextIds.empty()) {
1004 ++EI;
1005 continue;
1007 if (TowardsCallee) {
1008 auto NewEdge = std::make_shared<ContextEdge>(
1009 Edge->Callee, NewNode, computeAllocType(NewEdgeContextIds),
1010 NewEdgeContextIds);
1011 NewNode->CalleeEdges.push_back(NewEdge);
1012 NewEdge->Callee->CallerEdges.push_back(NewEdge);
1013 } else {
1014 auto NewEdge = std::make_shared<ContextEdge>(
1015 NewNode, Edge->Caller, computeAllocType(NewEdgeContextIds),
1016 NewEdgeContextIds);
1017 NewNode->CallerEdges.push_back(NewEdge);
1018 NewEdge->Caller->CalleeEdges.push_back(NewEdge);
1020 // Remove old edge if context ids empty.
1021 if (Edge->getContextIds().empty()) {
1022 if (TowardsCallee) {
1023 Edge->Callee->eraseCallerEdge(Edge.get());
1024 EI = OrigNode->CalleeEdges.erase(EI);
1025 } else {
1026 Edge->Caller->eraseCalleeEdge(Edge.get());
1027 EI = OrigNode->CallerEdges.erase(EI);
1029 continue;
1031 ++EI;
1035 template <typename DerivedCCG, typename FuncTy, typename CallTy>
1036 void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::
1037 assignStackNodesPostOrder(ContextNode *Node,
1038 DenseSet<const ContextNode *> &Visited,
1039 DenseMap<uint64_t, std::vector<CallContextInfo>>
1040 &StackIdToMatchingCalls) {
1041 auto Inserted = Visited.insert(Node);
1042 if (!Inserted.second)
1043 return;
1044 // Post order traversal. Iterate over a copy since we may add nodes and
1045 // therefore new callers during the recursive call, invalidating any
1046 // iterator over the original edge vector. We don't need to process these
1047 // new nodes as they were already processed on creation.
1048 auto CallerEdges = Node->CallerEdges;
1049 for (auto &Edge : CallerEdges) {
1050 // Skip any that have been removed during the recursion.
1051 if (!Edge)
1052 continue;
1053 assignStackNodesPostOrder(Edge->Caller, Visited, StackIdToMatchingCalls);
1056 // If this node's stack id is in the map, update the graph to contain new
1057 // nodes representing any inlining at interior callsites. Note we move the
1058 // associated context ids over to the new nodes.
1060 // Ignore this node if it is for an allocation or we didn't record any
1061 // stack id lists ending at it.
1062 if (Node->IsAllocation ||
1063 !StackIdToMatchingCalls.count(Node->OrigStackOrAllocId))
1064 return;
1066 auto &Calls = StackIdToMatchingCalls[Node->OrigStackOrAllocId];
1067 // Handle the simple case first. A single call with a single stack id.
1068 // In this case there is no need to create any new context nodes, simply
1069 // assign the context node for stack id to this Call.
1070 if (Calls.size() == 1) {
1071 auto &[Call, Ids, Func, SavedContextIds] = Calls[0];
1072 if (Ids.size() == 1) {
1073 assert(SavedContextIds.empty());
1074 // It should be this Node
1075 assert(Node == getNodeForStackId(Ids[0]));
1076 if (Node->Recursive)
1077 return;
1078 Node->setCall(Call);
1079 NonAllocationCallToContextNodeMap[Call] = Node;
1080 NodeToCallingFunc[Node] = Func;
1081 return;
1085 // Find the node for the last stack id, which should be the same
1086 // across all calls recorded for this id, and is this node's id.
1087 uint64_t LastId = Node->OrigStackOrAllocId;
1088 ContextNode *LastNode = getNodeForStackId(LastId);
1089 // We should only have kept stack ids that had nodes.
1090 assert(LastNode);
1092 for (unsigned I = 0; I < Calls.size(); I++) {
1093 auto &[Call, Ids, Func, SavedContextIds] = Calls[I];
1094 // Skip any for which we didn't assign any ids, these don't get a node in
1095 // the graph.
1096 if (SavedContextIds.empty())
1097 continue;
1099 assert(LastId == Ids.back());
1101 ContextNode *FirstNode = getNodeForStackId(Ids[0]);
1102 assert(FirstNode);
1104 // Recompute the context ids for this stack id sequence (the
1105 // intersection of the context ids of the corresponding nodes).
1106 // Start with the ids we saved in the map for this call, which could be
1107 // duplicated context ids. We have to recompute as we might have overlap
1108 // overlap between the saved context ids for different last nodes, and
1109 // removed them already during the post order traversal.
1110 set_intersect(SavedContextIds, FirstNode->ContextIds);
1111 ContextNode *PrevNode = nullptr;
1112 for (auto Id : Ids) {
1113 ContextNode *CurNode = getNodeForStackId(Id);
1114 // We should only have kept stack ids that had nodes and weren't
1115 // recursive.
1116 assert(CurNode);
1117 assert(!CurNode->Recursive);
1118 if (!PrevNode) {
1119 PrevNode = CurNode;
1120 continue;
1122 auto *Edge = CurNode->findEdgeFromCallee(PrevNode);
1123 if (!Edge) {
1124 SavedContextIds.clear();
1125 break;
1127 PrevNode = CurNode;
1128 set_intersect(SavedContextIds, Edge->getContextIds());
1130 // If we now have no context ids for clone, skip this call.
1131 if (SavedContextIds.empty())
1132 break;
1134 if (SavedContextIds.empty())
1135 continue;
1137 // Create new context node.
1138 NodeOwner.push_back(
1139 std::make_unique<ContextNode>(/*IsAllocation=*/false, Call));
1140 ContextNode *NewNode = NodeOwner.back().get();
1141 NodeToCallingFunc[NewNode] = Func;
1142 NonAllocationCallToContextNodeMap[Call] = NewNode;
1143 NewNode->ContextIds = SavedContextIds;
1144 NewNode->AllocTypes = computeAllocType(NewNode->ContextIds);
1146 // Connect to callees of innermost stack frame in inlined call chain.
1147 // This updates context ids for FirstNode's callee's to reflect those
1148 // moved to NewNode.
1149 connectNewNode(NewNode, FirstNode, /*TowardsCallee=*/true);
1151 // Connect to callers of outermost stack frame in inlined call chain.
1152 // This updates context ids for FirstNode's caller's to reflect those
1153 // moved to NewNode.
1154 connectNewNode(NewNode, LastNode, /*TowardsCallee=*/false);
1156 // Now we need to remove context ids from edges/nodes between First and
1157 // Last Node.
1158 PrevNode = nullptr;
1159 for (auto Id : Ids) {
1160 ContextNode *CurNode = getNodeForStackId(Id);
1161 // We should only have kept stack ids that had nodes.
1162 assert(CurNode);
1164 // Remove the context ids moved to NewNode from CurNode, and the
1165 // edge from the prior node.
1166 set_subtract(CurNode->ContextIds, NewNode->ContextIds);
1167 if (PrevNode) {
1168 auto *PrevEdge = CurNode->findEdgeFromCallee(PrevNode);
1169 assert(PrevEdge);
1170 set_subtract(PrevEdge->getContextIds(), NewNode->ContextIds);
1171 if (PrevEdge->getContextIds().empty()) {
1172 PrevNode->eraseCallerEdge(PrevEdge);
1173 CurNode->eraseCalleeEdge(PrevEdge);
1176 PrevNode = CurNode;
1181 template <typename DerivedCCG, typename FuncTy, typename CallTy>
1182 void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::updateStackNodes() {
1183 // Map of stack id to all calls with that as the last (outermost caller)
1184 // callsite id that has a context node (some might not due to pruning
1185 // performed during matching of the allocation profile contexts).
1186 // The CallContextInfo contains the Call and a list of its stack ids with
1187 // ContextNodes, the function containing Call, and the set of context ids
1188 // the analysis will eventually identify for use in any new node created
1189 // for that callsite.
1190 DenseMap<uint64_t, std::vector<CallContextInfo>> StackIdToMatchingCalls;
1191 for (auto &[Func, CallsWithMetadata] : FuncToCallsWithMetadata) {
1192 for (auto &Call : CallsWithMetadata) {
1193 // Ignore allocations, already handled.
1194 if (AllocationCallToContextNodeMap.count(Call))
1195 continue;
1196 auto StackIdsWithContextNodes =
1197 getStackIdsWithContextNodesForCall(Call.call());
1198 // If there were no nodes created for MIBs on allocs (maybe this was in
1199 // the unambiguous part of the MIB stack that was pruned), ignore.
1200 if (StackIdsWithContextNodes.empty())
1201 continue;
1202 // Otherwise, record this Call along with the list of ids for the last
1203 // (outermost caller) stack id with a node.
1204 StackIdToMatchingCalls[StackIdsWithContextNodes.back()].push_back(
1205 {Call.call(), StackIdsWithContextNodes, Func, {}});
1209 // First make a pass through all stack ids that correspond to a call,
1210 // as identified in the above loop. Compute the context ids corresponding to
1211 // each of these calls when they correspond to multiple stack ids due to
1212 // due to inlining. Perform any duplication of context ids required when
1213 // there is more than one call with the same stack ids. Their (possibly newly
1214 // duplicated) context ids are saved in the StackIdToMatchingCalls map.
1215 DenseMap<uint32_t, DenseSet<uint32_t>> OldToNewContextIds;
1216 for (auto &It : StackIdToMatchingCalls) {
1217 auto &Calls = It.getSecond();
1218 // Skip single calls with a single stack id. These don't need a new node.
1219 if (Calls.size() == 1) {
1220 auto &Ids = std::get<1>(Calls[0]);
1221 if (Ids.size() == 1)
1222 continue;
1224 // In order to do the best and maximal matching of inlined calls to context
1225 // node sequences we will sort the vectors of stack ids in descending order
1226 // of length, and within each length, lexicographically by stack id. The
1227 // latter is so that we can specially handle calls that have identical stack
1228 // id sequences (either due to cloning or artificially because of the MIB
1229 // context pruning).
1230 std::stable_sort(Calls.begin(), Calls.end(),
1231 [](const CallContextInfo &A, const CallContextInfo &B) {
1232 auto &IdsA = std::get<1>(A);
1233 auto &IdsB = std::get<1>(B);
1234 return IdsA.size() > IdsB.size() ||
1235 (IdsA.size() == IdsB.size() && IdsA < IdsB);
1238 // Find the node for the last stack id, which should be the same
1239 // across all calls recorded for this id, and is the id for this
1240 // entry in the StackIdToMatchingCalls map.
1241 uint64_t LastId = It.getFirst();
1242 ContextNode *LastNode = getNodeForStackId(LastId);
1243 // We should only have kept stack ids that had nodes.
1244 assert(LastNode);
1246 if (LastNode->Recursive)
1247 continue;
1249 // Initialize the context ids with the last node's. We will subsequently
1250 // refine the context ids by computing the intersection along all edges.
1251 DenseSet<uint32_t> LastNodeContextIds = LastNode->ContextIds;
1252 assert(!LastNodeContextIds.empty());
1254 for (unsigned I = 0; I < Calls.size(); I++) {
1255 auto &[Call, Ids, Func, SavedContextIds] = Calls[I];
1256 assert(SavedContextIds.empty());
1257 assert(LastId == Ids.back());
1259 // First compute the context ids for this stack id sequence (the
1260 // intersection of the context ids of the corresponding nodes).
1261 // Start with the remaining saved ids for the last node.
1262 assert(!LastNodeContextIds.empty());
1263 DenseSet<uint32_t> StackSequenceContextIds = LastNodeContextIds;
1265 ContextNode *PrevNode = LastNode;
1266 ContextNode *CurNode = LastNode;
1267 bool Skip = false;
1269 // Iterate backwards through the stack Ids, starting after the last Id
1270 // in the list, which was handled once outside for all Calls.
1271 for (auto IdIter = Ids.rbegin() + 1; IdIter != Ids.rend(); IdIter++) {
1272 auto Id = *IdIter;
1273 CurNode = getNodeForStackId(Id);
1274 // We should only have kept stack ids that had nodes.
1275 assert(CurNode);
1277 if (CurNode->Recursive) {
1278 Skip = true;
1279 break;
1282 auto *Edge = CurNode->findEdgeFromCaller(PrevNode);
1283 // If there is no edge then the nodes belong to different MIB contexts,
1284 // and we should skip this inlined context sequence. For example, this
1285 // particular inlined context may include stack ids A->B, and we may
1286 // indeed have nodes for both A and B, but it is possible that they were
1287 // never profiled in sequence in a single MIB for any allocation (i.e.
1288 // we might have profiled an allocation that involves the callsite A,
1289 // but through a different one of its callee callsites, and we might
1290 // have profiled an allocation that involves callsite B, but reached
1291 // from a different caller callsite).
1292 if (!Edge) {
1293 Skip = true;
1294 break;
1296 PrevNode = CurNode;
1298 // Update the context ids, which is the intersection of the ids along
1299 // all edges in the sequence.
1300 set_intersect(StackSequenceContextIds, Edge->getContextIds());
1302 // If we now have no context ids for clone, skip this call.
1303 if (StackSequenceContextIds.empty()) {
1304 Skip = true;
1305 break;
1308 if (Skip)
1309 continue;
1311 // If some of this call's stack ids did not have corresponding nodes (due
1312 // to pruning), don't include any context ids for contexts that extend
1313 // beyond these nodes. Otherwise we would be matching part of unrelated /
1314 // not fully matching stack contexts. To do this, subtract any context ids
1315 // found in caller nodes of the last node found above.
1316 if (Ids.back() != getLastStackId(Call)) {
1317 for (const auto &PE : CurNode->CallerEdges) {
1318 set_subtract(StackSequenceContextIds, PE->getContextIds());
1319 if (StackSequenceContextIds.empty())
1320 break;
1322 // If we now have no context ids for clone, skip this call.
1323 if (StackSequenceContextIds.empty())
1324 continue;
1327 // Check if the next set of stack ids is the same (since the Calls vector
1328 // of tuples is sorted by the stack ids we can just look at the next one).
1329 bool DuplicateContextIds = false;
1330 if (I + 1 < Calls.size()) {
1331 auto NextIds = std::get<1>(Calls[I + 1]);
1332 DuplicateContextIds = Ids == NextIds;
1335 // If we don't have duplicate context ids, then we can assign all the
1336 // context ids computed for the original node sequence to this call.
1337 // If there are duplicate calls with the same stack ids then we synthesize
1338 // new context ids that are duplicates of the originals. These are
1339 // assigned to SavedContextIds, which is a reference into the map entry
1340 // for this call, allowing us to access these ids later on.
1341 OldToNewContextIds.reserve(OldToNewContextIds.size() +
1342 StackSequenceContextIds.size());
1343 SavedContextIds =
1344 DuplicateContextIds
1345 ? duplicateContextIds(StackSequenceContextIds, OldToNewContextIds)
1346 : StackSequenceContextIds;
1347 assert(!SavedContextIds.empty());
1349 if (!DuplicateContextIds) {
1350 // Update saved last node's context ids to remove those that are
1351 // assigned to other calls, so that it is ready for the next call at
1352 // this stack id.
1353 set_subtract(LastNodeContextIds, StackSequenceContextIds);
1354 if (LastNodeContextIds.empty())
1355 break;
1360 // Propagate the duplicate context ids over the graph.
1361 propagateDuplicateContextIds(OldToNewContextIds);
1363 if (VerifyCCG)
1364 check();
1366 // Now perform a post-order traversal over the graph, starting with the
1367 // allocation nodes, essentially processing nodes from callers to callees.
1368 // For any that contains an id in the map, update the graph to contain new
1369 // nodes representing any inlining at interior callsites. Note we move the
1370 // associated context ids over to the new nodes.
1371 DenseSet<const ContextNode *> Visited;
1372 for (auto &Entry : AllocationCallToContextNodeMap)
1373 assignStackNodesPostOrder(Entry.second, Visited, StackIdToMatchingCalls);
1376 uint64_t ModuleCallsiteContextGraph::getLastStackId(Instruction *Call) {
1377 CallStack<MDNode, MDNode::op_iterator> CallsiteContext(
1378 Call->getMetadata(LLVMContext::MD_callsite));
1379 return CallsiteContext.back();
1382 uint64_t IndexCallsiteContextGraph::getLastStackId(IndexCall &Call) {
1383 assert(isa<CallsiteInfo *>(Call.getBase()));
1384 CallStack<CallsiteInfo, SmallVector<unsigned>::const_iterator>
1385 CallsiteContext(dyn_cast_if_present<CallsiteInfo *>(Call.getBase()));
1386 // Need to convert index into stack id.
1387 return Index.getStackIdAtIndex(CallsiteContext.back());
1390 static const std::string MemProfCloneSuffix = ".memprof.";
1392 static std::string getMemProfFuncName(Twine Base, unsigned CloneNo) {
1393 // We use CloneNo == 0 to refer to the original version, which doesn't get
1394 // renamed with a suffix.
1395 if (!CloneNo)
1396 return Base.str();
1397 return (Base + MemProfCloneSuffix + Twine(CloneNo)).str();
1400 std::string ModuleCallsiteContextGraph::getLabel(const Function *Func,
1401 const Instruction *Call,
1402 unsigned CloneNo) const {
1403 return (Twine(Call->getFunction()->getName()) + " -> " +
1404 cast<CallBase>(Call)->getCalledFunction()->getName())
1405 .str();
1408 std::string IndexCallsiteContextGraph::getLabel(const FunctionSummary *Func,
1409 const IndexCall &Call,
1410 unsigned CloneNo) const {
1411 auto VI = FSToVIMap.find(Func);
1412 assert(VI != FSToVIMap.end());
1413 if (isa<AllocInfo *>(Call.getBase()))
1414 return (VI->second.name() + " -> alloc").str();
1415 else {
1416 auto *Callsite = dyn_cast_if_present<CallsiteInfo *>(Call.getBase());
1417 return (VI->second.name() + " -> " +
1418 getMemProfFuncName(Callsite->Callee.name(),
1419 Callsite->Clones[CloneNo]))
1420 .str();
1424 std::vector<uint64_t>
1425 ModuleCallsiteContextGraph::getStackIdsWithContextNodesForCall(
1426 Instruction *Call) {
1427 CallStack<MDNode, MDNode::op_iterator> CallsiteContext(
1428 Call->getMetadata(LLVMContext::MD_callsite));
1429 return getStackIdsWithContextNodes<MDNode, MDNode::op_iterator>(
1430 CallsiteContext);
1433 std::vector<uint64_t>
1434 IndexCallsiteContextGraph::getStackIdsWithContextNodesForCall(IndexCall &Call) {
1435 assert(isa<CallsiteInfo *>(Call.getBase()));
1436 CallStack<CallsiteInfo, SmallVector<unsigned>::const_iterator>
1437 CallsiteContext(dyn_cast_if_present<CallsiteInfo *>(Call.getBase()));
1438 return getStackIdsWithContextNodes<CallsiteInfo,
1439 SmallVector<unsigned>::const_iterator>(
1440 CallsiteContext);
1443 template <typename DerivedCCG, typename FuncTy, typename CallTy>
1444 template <class NodeT, class IteratorT>
1445 std::vector<uint64_t>
1446 CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::getStackIdsWithContextNodes(
1447 CallStack<NodeT, IteratorT> &CallsiteContext) {
1448 std::vector<uint64_t> StackIds;
1449 for (auto IdOrIndex : CallsiteContext) {
1450 auto StackId = getStackId(IdOrIndex);
1451 ContextNode *Node = getNodeForStackId(StackId);
1452 if (!Node)
1453 break;
1454 StackIds.push_back(StackId);
1456 return StackIds;
1459 ModuleCallsiteContextGraph::ModuleCallsiteContextGraph(
1460 Module &M, function_ref<OptimizationRemarkEmitter &(Function *)> OREGetter)
1461 : Mod(M), OREGetter(OREGetter) {
1462 for (auto &F : M) {
1463 std::vector<CallInfo> CallsWithMetadata;
1464 for (auto &BB : F) {
1465 for (auto &I : BB) {
1466 if (!isa<CallBase>(I))
1467 continue;
1468 if (auto *MemProfMD = I.getMetadata(LLVMContext::MD_memprof)) {
1469 CallsWithMetadata.push_back(&I);
1470 auto *AllocNode = addAllocNode(&I, &F);
1471 auto *CallsiteMD = I.getMetadata(LLVMContext::MD_callsite);
1472 assert(CallsiteMD);
1473 CallStack<MDNode, MDNode::op_iterator> CallsiteContext(CallsiteMD);
1474 // Add all of the MIBs and their stack nodes.
1475 for (auto &MDOp : MemProfMD->operands()) {
1476 auto *MIBMD = cast<const MDNode>(MDOp);
1477 MDNode *StackNode = getMIBStackNode(MIBMD);
1478 assert(StackNode);
1479 CallStack<MDNode, MDNode::op_iterator> StackContext(StackNode);
1480 addStackNodesForMIB<MDNode, MDNode::op_iterator>(
1481 AllocNode, StackContext, CallsiteContext,
1482 getMIBAllocType(MIBMD));
1484 assert(AllocNode->AllocTypes != (uint8_t)AllocationType::None);
1485 // Memprof and callsite metadata on memory allocations no longer
1486 // needed.
1487 I.setMetadata(LLVMContext::MD_memprof, nullptr);
1488 I.setMetadata(LLVMContext::MD_callsite, nullptr);
1490 // For callsite metadata, add to list for this function for later use.
1491 else if (I.getMetadata(LLVMContext::MD_callsite))
1492 CallsWithMetadata.push_back(&I);
1495 if (!CallsWithMetadata.empty())
1496 FuncToCallsWithMetadata.push_back({&F, CallsWithMetadata});
1499 if (DumpCCG) {
1500 dbgs() << "CCG before updating call stack chains:\n";
1501 dbgs() << *this;
1504 if (ExportToDot)
1505 exportToDot("prestackupdate");
1507 updateStackNodes();
1509 handleCallsitesWithMultipleTargets();
1511 // Strip off remaining callsite metadata, no longer needed.
1512 for (auto &FuncEntry : FuncToCallsWithMetadata)
1513 for (auto &Call : FuncEntry.second)
1514 Call.call()->setMetadata(LLVMContext::MD_callsite, nullptr);
1517 IndexCallsiteContextGraph::IndexCallsiteContextGraph(
1518 ModuleSummaryIndex &Index,
1519 function_ref<bool(GlobalValue::GUID, const GlobalValueSummary *)>
1520 isPrevailing)
1521 : Index(Index) {
1522 for (auto &I : Index) {
1523 auto VI = Index.getValueInfo(I);
1524 for (auto &S : VI.getSummaryList()) {
1525 // We should only add the prevailing nodes. Otherwise we may try to clone
1526 // in a weak copy that won't be linked (and may be different than the
1527 // prevailing version).
1528 // We only keep the memprof summary on the prevailing copy now when
1529 // building the combined index, as a space optimization, however don't
1530 // rely on this optimization. The linker doesn't resolve local linkage
1531 // values so don't check whether those are prevailing.
1532 if (!GlobalValue::isLocalLinkage(S->linkage()) &&
1533 !isPrevailing(VI.getGUID(), S.get()))
1534 continue;
1535 auto *FS = dyn_cast<FunctionSummary>(S.get());
1536 if (!FS)
1537 continue;
1538 std::vector<CallInfo> CallsWithMetadata;
1539 if (!FS->allocs().empty()) {
1540 for (auto &AN : FS->mutableAllocs()) {
1541 // This can happen because of recursion elimination handling that
1542 // currently exists in ModuleSummaryAnalysis. Skip these for now.
1543 // We still added them to the summary because we need to be able to
1544 // correlate properly in applyImport in the backends.
1545 if (AN.MIBs.empty())
1546 continue;
1547 CallsWithMetadata.push_back({&AN});
1548 auto *AllocNode = addAllocNode({&AN}, FS);
1549 // Pass an empty CallStack to the CallsiteContext (second)
1550 // parameter, since for ThinLTO we already collapsed out the inlined
1551 // stack ids on the allocation call during ModuleSummaryAnalysis.
1552 CallStack<MIBInfo, SmallVector<unsigned>::const_iterator>
1553 EmptyContext;
1554 // Now add all of the MIBs and their stack nodes.
1555 for (auto &MIB : AN.MIBs) {
1556 CallStack<MIBInfo, SmallVector<unsigned>::const_iterator>
1557 StackContext(&MIB);
1558 addStackNodesForMIB<MIBInfo, SmallVector<unsigned>::const_iterator>(
1559 AllocNode, StackContext, EmptyContext, MIB.AllocType);
1561 assert(AllocNode->AllocTypes != (uint8_t)AllocationType::None);
1562 // Initialize version 0 on the summary alloc node to the current alloc
1563 // type, unless it has both types in which case make it default, so
1564 // that in the case where we aren't able to clone the original version
1565 // always ends up with the default allocation behavior.
1566 AN.Versions[0] = (uint8_t)allocTypeToUse(AllocNode->AllocTypes);
1569 // For callsite metadata, add to list for this function for later use.
1570 if (!FS->callsites().empty())
1571 for (auto &SN : FS->mutableCallsites())
1572 CallsWithMetadata.push_back({&SN});
1574 if (!CallsWithMetadata.empty())
1575 FuncToCallsWithMetadata.push_back({FS, CallsWithMetadata});
1577 if (!FS->allocs().empty() || !FS->callsites().empty())
1578 FSToVIMap[FS] = VI;
1582 if (DumpCCG) {
1583 dbgs() << "CCG before updating call stack chains:\n";
1584 dbgs() << *this;
1587 if (ExportToDot)
1588 exportToDot("prestackupdate");
1590 updateStackNodes();
1592 handleCallsitesWithMultipleTargets();
1595 template <typename DerivedCCG, typename FuncTy, typename CallTy>
1596 void CallsiteContextGraph<DerivedCCG, FuncTy,
1597 CallTy>::handleCallsitesWithMultipleTargets() {
1598 // Look for and workaround callsites that call multiple functions.
1599 // This can happen for indirect calls, which needs better handling, and in
1600 // more rare cases (e.g. macro expansion).
1601 // TODO: To fix this for indirect calls we will want to perform speculative
1602 // devirtualization using either the normal PGO info with ICP, or using the
1603 // information in the profiled MemProf contexts. We can do this prior to
1604 // this transformation for regular LTO, and for ThinLTO we can simulate that
1605 // effect in the summary and perform the actual speculative devirtualization
1606 // while cloning in the ThinLTO backend.
1607 for (auto Entry = NonAllocationCallToContextNodeMap.begin();
1608 Entry != NonAllocationCallToContextNodeMap.end();) {
1609 auto *Node = Entry->second;
1610 assert(Node->Clones.empty());
1611 // Check all node callees and see if in the same function.
1612 bool Removed = false;
1613 auto Call = Node->Call.call();
1614 for (auto &Edge : Node->CalleeEdges) {
1615 if (!Edge->Callee->hasCall())
1616 continue;
1617 assert(NodeToCallingFunc.count(Edge->Callee));
1618 // Check if the called function matches that of the callee node.
1619 if (calleeMatchesFunc(Call, NodeToCallingFunc[Edge->Callee]))
1620 continue;
1621 // Work around by setting Node to have a null call, so it gets
1622 // skipped during cloning. Otherwise assignFunctions will assert
1623 // because its data structures are not designed to handle this case.
1624 Entry = NonAllocationCallToContextNodeMap.erase(Entry);
1625 Node->setCall(CallInfo());
1626 Removed = true;
1627 break;
1629 if (!Removed)
1630 Entry++;
1634 uint64_t ModuleCallsiteContextGraph::getStackId(uint64_t IdOrIndex) const {
1635 // In the Module (IR) case this is already the Id.
1636 return IdOrIndex;
1639 uint64_t IndexCallsiteContextGraph::getStackId(uint64_t IdOrIndex) const {
1640 // In the Index case this is an index into the stack id list in the summary
1641 // index, convert it to an Id.
1642 return Index.getStackIdAtIndex(IdOrIndex);
1645 bool ModuleCallsiteContextGraph::calleeMatchesFunc(Instruction *Call,
1646 const Function *Func) {
1647 auto *CB = dyn_cast<CallBase>(Call);
1648 if (!CB->getCalledOperand())
1649 return false;
1650 auto *CalleeVal = CB->getCalledOperand()->stripPointerCasts();
1651 auto *CalleeFunc = dyn_cast<Function>(CalleeVal);
1652 if (CalleeFunc == Func)
1653 return true;
1654 auto *Alias = dyn_cast<GlobalAlias>(CalleeVal);
1655 return Alias && Alias->getAliasee() == Func;
1658 bool IndexCallsiteContextGraph::calleeMatchesFunc(IndexCall &Call,
1659 const FunctionSummary *Func) {
1660 ValueInfo Callee =
1661 dyn_cast_if_present<CallsiteInfo *>(Call.getBase())->Callee;
1662 // If there is no summary list then this is a call to an externally defined
1663 // symbol.
1664 AliasSummary *Alias =
1665 Callee.getSummaryList().empty()
1666 ? nullptr
1667 : dyn_cast<AliasSummary>(Callee.getSummaryList()[0].get());
1668 assert(FSToVIMap.count(Func));
1669 return Callee == FSToVIMap[Func] ||
1670 // If callee is an alias, check the aliasee, since only function
1671 // summary base objects will contain the stack node summaries and thus
1672 // get a context node.
1673 (Alias && Alias->getAliaseeVI() == FSToVIMap[Func]);
1676 static std::string getAllocTypeString(uint8_t AllocTypes) {
1677 if (!AllocTypes)
1678 return "None";
1679 std::string Str;
1680 if (AllocTypes & (uint8_t)AllocationType::NotCold)
1681 Str += "NotCold";
1682 if (AllocTypes & (uint8_t)AllocationType::Cold)
1683 Str += "Cold";
1684 return Str;
1687 template <typename DerivedCCG, typename FuncTy, typename CallTy>
1688 void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode::dump()
1689 const {
1690 print(dbgs());
1691 dbgs() << "\n";
1694 template <typename DerivedCCG, typename FuncTy, typename CallTy>
1695 void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode::print(
1696 raw_ostream &OS) const {
1697 OS << "Node " << this << "\n";
1698 OS << "\t";
1699 printCall(OS);
1700 if (Recursive)
1701 OS << " (recursive)";
1702 OS << "\n";
1703 OS << "\tAllocTypes: " << getAllocTypeString(AllocTypes) << "\n";
1704 OS << "\tContextIds:";
1705 std::vector<uint32_t> SortedIds(ContextIds.begin(), ContextIds.end());
1706 std::sort(SortedIds.begin(), SortedIds.end());
1707 for (auto Id : SortedIds)
1708 OS << " " << Id;
1709 OS << "\n";
1710 OS << "\tCalleeEdges:\n";
1711 for (auto &Edge : CalleeEdges)
1712 OS << "\t\t" << *Edge << "\n";
1713 OS << "\tCallerEdges:\n";
1714 for (auto &Edge : CallerEdges)
1715 OS << "\t\t" << *Edge << "\n";
1716 if (!Clones.empty()) {
1717 OS << "\tClones: ";
1718 FieldSeparator FS;
1719 for (auto *Clone : Clones)
1720 OS << FS << Clone;
1721 OS << "\n";
1722 } else if (CloneOf) {
1723 OS << "\tClone of " << CloneOf << "\n";
1727 template <typename DerivedCCG, typename FuncTy, typename CallTy>
1728 void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextEdge::dump()
1729 const {
1730 print(dbgs());
1731 dbgs() << "\n";
1734 template <typename DerivedCCG, typename FuncTy, typename CallTy>
1735 void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextEdge::print(
1736 raw_ostream &OS) const {
1737 OS << "Edge from Callee " << Callee << " to Caller: " << Caller
1738 << " AllocTypes: " << getAllocTypeString(AllocTypes);
1739 OS << " ContextIds:";
1740 std::vector<uint32_t> SortedIds(ContextIds.begin(), ContextIds.end());
1741 std::sort(SortedIds.begin(), SortedIds.end());
1742 for (auto Id : SortedIds)
1743 OS << " " << Id;
1746 template <typename DerivedCCG, typename FuncTy, typename CallTy>
1747 void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::dump() const {
1748 print(dbgs());
1751 template <typename DerivedCCG, typename FuncTy, typename CallTy>
1752 void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::print(
1753 raw_ostream &OS) const {
1754 OS << "Callsite Context Graph:\n";
1755 using GraphType = const CallsiteContextGraph<DerivedCCG, FuncTy, CallTy> *;
1756 for (const auto Node : nodes<GraphType>(this)) {
1757 if (Node->isRemoved())
1758 continue;
1759 Node->print(OS);
1760 OS << "\n";
1764 template <typename DerivedCCG, typename FuncTy, typename CallTy>
1765 static void checkEdge(
1766 const std::shared_ptr<ContextEdge<DerivedCCG, FuncTy, CallTy>> &Edge) {
1767 // Confirm that alloc type is not None and that we have at least one context
1768 // id.
1769 assert(Edge->AllocTypes != (uint8_t)AllocationType::None);
1770 assert(!Edge->ContextIds.empty());
1773 template <typename DerivedCCG, typename FuncTy, typename CallTy>
1774 static void checkNode(const ContextNode<DerivedCCG, FuncTy, CallTy> *Node,
1775 bool CheckEdges = true) {
1776 if (Node->isRemoved())
1777 return;
1778 // Node's context ids should be the union of both its callee and caller edge
1779 // context ids.
1780 if (Node->CallerEdges.size()) {
1781 auto EI = Node->CallerEdges.begin();
1782 auto &FirstEdge = *EI;
1783 EI++;
1784 DenseSet<uint32_t> CallerEdgeContextIds(FirstEdge->ContextIds);
1785 for (; EI != Node->CallerEdges.end(); EI++) {
1786 const auto &Edge = *EI;
1787 if (CheckEdges)
1788 checkEdge<DerivedCCG, FuncTy, CallTy>(Edge);
1789 set_union(CallerEdgeContextIds, Edge->ContextIds);
1791 // Node can have more context ids than callers if some contexts terminate at
1792 // node and some are longer.
1793 assert(Node->ContextIds == CallerEdgeContextIds ||
1794 set_is_subset(CallerEdgeContextIds, Node->ContextIds));
1796 if (Node->CalleeEdges.size()) {
1797 auto EI = Node->CalleeEdges.begin();
1798 auto &FirstEdge = *EI;
1799 EI++;
1800 DenseSet<uint32_t> CalleeEdgeContextIds(FirstEdge->ContextIds);
1801 for (; EI != Node->CalleeEdges.end(); EI++) {
1802 const auto &Edge = *EI;
1803 if (CheckEdges)
1804 checkEdge<DerivedCCG, FuncTy, CallTy>(Edge);
1805 set_union(CalleeEdgeContextIds, Edge->ContextIds);
1807 assert(Node->ContextIds == CalleeEdgeContextIds);
1811 template <typename DerivedCCG, typename FuncTy, typename CallTy>
1812 void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::check() const {
1813 using GraphType = const CallsiteContextGraph<DerivedCCG, FuncTy, CallTy> *;
1814 for (const auto Node : nodes<GraphType>(this)) {
1815 checkNode<DerivedCCG, FuncTy, CallTy>(Node, /*CheckEdges=*/false);
1816 for (auto &Edge : Node->CallerEdges)
1817 checkEdge<DerivedCCG, FuncTy, CallTy>(Edge);
1821 template <typename DerivedCCG, typename FuncTy, typename CallTy>
1822 struct GraphTraits<const CallsiteContextGraph<DerivedCCG, FuncTy, CallTy> *> {
1823 using GraphType = const CallsiteContextGraph<DerivedCCG, FuncTy, CallTy> *;
1824 using NodeRef = const ContextNode<DerivedCCG, FuncTy, CallTy> *;
1826 using NodePtrTy = std::unique_ptr<ContextNode<DerivedCCG, FuncTy, CallTy>>;
1827 static NodeRef getNode(const NodePtrTy &P) { return P.get(); }
1829 using nodes_iterator =
1830 mapped_iterator<typename std::vector<NodePtrTy>::const_iterator,
1831 decltype(&getNode)>;
1833 static nodes_iterator nodes_begin(GraphType G) {
1834 return nodes_iterator(G->NodeOwner.begin(), &getNode);
1837 static nodes_iterator nodes_end(GraphType G) {
1838 return nodes_iterator(G->NodeOwner.end(), &getNode);
1841 static NodeRef getEntryNode(GraphType G) {
1842 return G->NodeOwner.begin()->get();
1845 using EdgePtrTy = std::shared_ptr<ContextEdge<DerivedCCG, FuncTy, CallTy>>;
1846 static const ContextNode<DerivedCCG, FuncTy, CallTy> *
1847 GetCallee(const EdgePtrTy &P) {
1848 return P->Callee;
1851 using ChildIteratorType =
1852 mapped_iterator<typename std::vector<std::shared_ptr<ContextEdge<
1853 DerivedCCG, FuncTy, CallTy>>>::const_iterator,
1854 decltype(&GetCallee)>;
1856 static ChildIteratorType child_begin(NodeRef N) {
1857 return ChildIteratorType(N->CalleeEdges.begin(), &GetCallee);
1860 static ChildIteratorType child_end(NodeRef N) {
1861 return ChildIteratorType(N->CalleeEdges.end(), &GetCallee);
1865 template <typename DerivedCCG, typename FuncTy, typename CallTy>
1866 struct DOTGraphTraits<const CallsiteContextGraph<DerivedCCG, FuncTy, CallTy> *>
1867 : public DefaultDOTGraphTraits {
1868 DOTGraphTraits(bool IsSimple = false) : DefaultDOTGraphTraits(IsSimple) {}
1870 using GraphType = const CallsiteContextGraph<DerivedCCG, FuncTy, CallTy> *;
1871 using GTraits = GraphTraits<GraphType>;
1872 using NodeRef = typename GTraits::NodeRef;
1873 using ChildIteratorType = typename GTraits::ChildIteratorType;
1875 static std::string getNodeLabel(NodeRef Node, GraphType G) {
1876 std::string LabelString =
1877 (Twine("OrigId: ") + (Node->IsAllocation ? "Alloc" : "") +
1878 Twine(Node->OrigStackOrAllocId))
1879 .str();
1880 LabelString += "\n";
1881 if (Node->hasCall()) {
1882 auto Func = G->NodeToCallingFunc.find(Node);
1883 assert(Func != G->NodeToCallingFunc.end());
1884 LabelString +=
1885 G->getLabel(Func->second, Node->Call.call(), Node->Call.cloneNo());
1886 } else {
1887 LabelString += "null call";
1888 if (Node->Recursive)
1889 LabelString += " (recursive)";
1890 else
1891 LabelString += " (external)";
1893 return LabelString;
1896 static std::string getNodeAttributes(NodeRef Node, GraphType) {
1897 std::string AttributeString = (Twine("tooltip=\"") + getNodeId(Node) + " " +
1898 getContextIds(Node->ContextIds) + "\"")
1899 .str();
1900 AttributeString +=
1901 (Twine(",fillcolor=\"") + getColor(Node->AllocTypes) + "\"").str();
1902 AttributeString += ",style=\"filled\"";
1903 if (Node->CloneOf) {
1904 AttributeString += ",color=\"blue\"";
1905 AttributeString += ",style=\"filled,bold,dashed\"";
1906 } else
1907 AttributeString += ",style=\"filled\"";
1908 return AttributeString;
1911 static std::string getEdgeAttributes(NodeRef, ChildIteratorType ChildIter,
1912 GraphType) {
1913 auto &Edge = *(ChildIter.getCurrent());
1914 return (Twine("tooltip=\"") + getContextIds(Edge->ContextIds) + "\"" +
1915 Twine(",fillcolor=\"") + getColor(Edge->AllocTypes) + "\"")
1916 .str();
1919 // Since the NodeOwners list includes nodes that are no longer connected to
1920 // the graph, skip them here.
1921 static bool isNodeHidden(NodeRef Node, GraphType) {
1922 return Node->isRemoved();
1925 private:
1926 static std::string getContextIds(const DenseSet<uint32_t> &ContextIds) {
1927 std::string IdString = "ContextIds:";
1928 if (ContextIds.size() < 100) {
1929 std::vector<uint32_t> SortedIds(ContextIds.begin(), ContextIds.end());
1930 std::sort(SortedIds.begin(), SortedIds.end());
1931 for (auto Id : SortedIds)
1932 IdString += (" " + Twine(Id)).str();
1933 } else {
1934 IdString += (" (" + Twine(ContextIds.size()) + " ids)").str();
1936 return IdString;
1939 static std::string getColor(uint8_t AllocTypes) {
1940 if (AllocTypes == (uint8_t)AllocationType::NotCold)
1941 // Color "brown1" actually looks like a lighter red.
1942 return "brown1";
1943 if (AllocTypes == (uint8_t)AllocationType::Cold)
1944 return "cyan";
1945 if (AllocTypes ==
1946 ((uint8_t)AllocationType::NotCold | (uint8_t)AllocationType::Cold))
1947 // Lighter purple.
1948 return "mediumorchid1";
1949 return "gray";
1952 static std::string getNodeId(NodeRef Node) {
1953 std::stringstream SStream;
1954 SStream << std::hex << "N0x" << (unsigned long long)Node;
1955 std::string Result = SStream.str();
1956 return Result;
1960 template <typename DerivedCCG, typename FuncTy, typename CallTy>
1961 void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::exportToDot(
1962 std::string Label) const {
1963 WriteGraph(this, "", false, Label,
1964 DotFilePathPrefix + "ccg." + Label + ".dot");
1967 template <typename DerivedCCG, typename FuncTy, typename CallTy>
1968 typename CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode *
1969 CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::moveEdgeToNewCalleeClone(
1970 const std::shared_ptr<ContextEdge> &Edge, EdgeIter *CallerEdgeI) {
1971 ContextNode *Node = Edge->Callee;
1972 NodeOwner.push_back(
1973 std::make_unique<ContextNode>(Node->IsAllocation, Node->Call));
1974 ContextNode *Clone = NodeOwner.back().get();
1975 Node->addClone(Clone);
1976 assert(NodeToCallingFunc.count(Node));
1977 NodeToCallingFunc[Clone] = NodeToCallingFunc[Node];
1978 moveEdgeToExistingCalleeClone(Edge, Clone, CallerEdgeI, /*NewClone=*/true);
1979 return Clone;
1982 template <typename DerivedCCG, typename FuncTy, typename CallTy>
1983 void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::
1984 moveEdgeToExistingCalleeClone(const std::shared_ptr<ContextEdge> &Edge,
1985 ContextNode *NewCallee, EdgeIter *CallerEdgeI,
1986 bool NewClone) {
1987 // NewCallee and Edge's current callee must be clones of the same original
1988 // node (Edge's current callee may be the original node too).
1989 assert(NewCallee->getOrigNode() == Edge->Callee->getOrigNode());
1990 auto &EdgeContextIds = Edge->getContextIds();
1991 ContextNode *OldCallee = Edge->Callee;
1992 if (CallerEdgeI)
1993 *CallerEdgeI = OldCallee->CallerEdges.erase(*CallerEdgeI);
1994 else
1995 OldCallee->eraseCallerEdge(Edge.get());
1996 Edge->Callee = NewCallee;
1997 NewCallee->CallerEdges.push_back(Edge);
1998 // Don't need to update Edge's context ids since we are simply reconnecting
1999 // it.
2000 set_subtract(OldCallee->ContextIds, EdgeContextIds);
2001 NewCallee->ContextIds.insert(EdgeContextIds.begin(), EdgeContextIds.end());
2002 NewCallee->AllocTypes |= Edge->AllocTypes;
2003 OldCallee->AllocTypes = computeAllocType(OldCallee->ContextIds);
2004 // OldCallee alloc type should be None iff its context id set is now empty.
2005 assert((OldCallee->AllocTypes == (uint8_t)AllocationType::None) ==
2006 OldCallee->ContextIds.empty());
2007 // Now walk the old callee node's callee edges and move Edge's context ids
2008 // over to the corresponding edge into the clone (which is created here if
2009 // this is a newly created clone).
2010 for (auto &OldCalleeEdge : OldCallee->CalleeEdges) {
2011 // The context ids moving to the new callee are the subset of this edge's
2012 // context ids and the context ids on the caller edge being moved.
2013 DenseSet<uint32_t> EdgeContextIdsToMove =
2014 set_intersection(OldCalleeEdge->getContextIds(), EdgeContextIds);
2015 set_subtract(OldCalleeEdge->getContextIds(), EdgeContextIdsToMove);
2016 OldCalleeEdge->AllocTypes =
2017 computeAllocType(OldCalleeEdge->getContextIds());
2018 if (!NewClone) {
2019 // Update context ids / alloc type on corresponding edge to NewCallee.
2020 // There is a chance this may not exist if we are reusing an existing
2021 // clone, specifically during function assignment, where we would have
2022 // removed none type edges after creating the clone. If we can't find
2023 // a corresponding edge there, fall through to the cloning below.
2024 if (auto *NewCalleeEdge =
2025 NewCallee->findEdgeFromCallee(OldCalleeEdge->Callee)) {
2026 NewCalleeEdge->getContextIds().insert(EdgeContextIdsToMove.begin(),
2027 EdgeContextIdsToMove.end());
2028 NewCalleeEdge->AllocTypes |= computeAllocType(EdgeContextIdsToMove);
2029 continue;
2032 auto NewEdge = std::make_shared<ContextEdge>(
2033 OldCalleeEdge->Callee, NewCallee,
2034 computeAllocType(EdgeContextIdsToMove), EdgeContextIdsToMove);
2035 NewCallee->CalleeEdges.push_back(NewEdge);
2036 NewEdge->Callee->CallerEdges.push_back(NewEdge);
2038 if (VerifyCCG) {
2039 checkNode<DerivedCCG, FuncTy, CallTy>(OldCallee, /*CheckEdges=*/false);
2040 checkNode<DerivedCCG, FuncTy, CallTy>(NewCallee, /*CheckEdges=*/false);
2041 for (const auto &OldCalleeEdge : OldCallee->CalleeEdges)
2042 checkNode<DerivedCCG, FuncTy, CallTy>(OldCalleeEdge->Callee,
2043 /*CheckEdges=*/false);
2044 for (const auto &NewCalleeEdge : NewCallee->CalleeEdges)
2045 checkNode<DerivedCCG, FuncTy, CallTy>(NewCalleeEdge->Callee,
2046 /*CheckEdges=*/false);
2050 template <typename DerivedCCG, typename FuncTy, typename CallTy>
2051 void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::identifyClones() {
2052 DenseSet<const ContextNode *> Visited;
2053 for (auto &Entry : AllocationCallToContextNodeMap)
2054 identifyClones(Entry.second, Visited);
2057 // helper function to check an AllocType is cold or notcold or both.
2058 bool checkColdOrNotCold(uint8_t AllocType) {
2059 return (AllocType == (uint8_t)AllocationType::Cold) ||
2060 (AllocType == (uint8_t)AllocationType::NotCold) ||
2061 (AllocType ==
2062 ((uint8_t)AllocationType::Cold | (uint8_t)AllocationType::NotCold));
2065 template <typename DerivedCCG, typename FuncTy, typename CallTy>
2066 void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::identifyClones(
2067 ContextNode *Node, DenseSet<const ContextNode *> &Visited) {
2068 if (VerifyNodes)
2069 checkNode<DerivedCCG, FuncTy, CallTy>(Node);
2070 assert(!Node->CloneOf);
2072 // If Node as a null call, then either it wasn't found in the module (regular
2073 // LTO) or summary index (ThinLTO), or there were other conditions blocking
2074 // cloning (e.g. recursion, calls multiple targets, etc).
2075 // Do this here so that we don't try to recursively clone callers below, which
2076 // isn't useful at least for this node.
2077 if (!Node->hasCall())
2078 return;
2080 #ifndef NDEBUG
2081 auto Insert =
2082 #endif
2083 Visited.insert(Node);
2084 // We should not have visited this node yet.
2085 assert(Insert.second);
2086 // The recursive call to identifyClones may delete the current edge from the
2087 // CallerEdges vector. Make a copy and iterate on that, simpler than passing
2088 // in an iterator and having recursive call erase from it. Other edges may
2089 // also get removed during the recursion, which will have null Callee and
2090 // Caller pointers (and are deleted later), so we skip those below.
2092 auto CallerEdges = Node->CallerEdges;
2093 for (auto &Edge : CallerEdges) {
2094 // Skip any that have been removed by an earlier recursive call.
2095 if (Edge->Callee == nullptr && Edge->Caller == nullptr) {
2096 assert(!std::count(Node->CallerEdges.begin(), Node->CallerEdges.end(),
2097 Edge));
2098 continue;
2100 // Ignore any caller we previously visited via another edge.
2101 if (!Visited.count(Edge->Caller) && !Edge->Caller->CloneOf) {
2102 identifyClones(Edge->Caller, Visited);
2107 // Check if we reached an unambiguous call or have have only a single caller.
2108 if (hasSingleAllocType(Node->AllocTypes) || Node->CallerEdges.size() <= 1)
2109 return;
2111 // We need to clone.
2113 // Try to keep the original version as alloc type NotCold. This will make
2114 // cases with indirect calls or any other situation with an unknown call to
2115 // the original function get the default behavior. We do this by sorting the
2116 // CallerEdges of the Node we will clone by alloc type.
2118 // Give NotCold edge the lowest sort priority so those edges are at the end of
2119 // the caller edges vector, and stay on the original version (since the below
2120 // code clones greedily until it finds all remaining edges have the same type
2121 // and leaves the remaining ones on the original Node).
2123 // We shouldn't actually have any None type edges, so the sorting priority for
2124 // that is arbitrary, and we assert in that case below.
2125 const unsigned AllocTypeCloningPriority[] = {/*None*/ 3, /*NotCold*/ 4,
2126 /*Cold*/ 1,
2127 /*NotColdCold*/ 2};
2128 std::stable_sort(Node->CallerEdges.begin(), Node->CallerEdges.end(),
2129 [&](const std::shared_ptr<ContextEdge> &A,
2130 const std::shared_ptr<ContextEdge> &B) {
2131 assert(checkColdOrNotCold(A->AllocTypes) &&
2132 checkColdOrNotCold(B->AllocTypes));
2134 if (A->AllocTypes == B->AllocTypes)
2135 // Use the first context id for each edge as a
2136 // tie-breaker.
2137 return *A->ContextIds.begin() < *B->ContextIds.begin();
2138 return AllocTypeCloningPriority[A->AllocTypes] <
2139 AllocTypeCloningPriority[B->AllocTypes];
2142 assert(Node->AllocTypes != (uint8_t)AllocationType::None);
2144 // Iterate until we find no more opportunities for disambiguating the alloc
2145 // types via cloning. In most cases this loop will terminate once the Node
2146 // has a single allocation type, in which case no more cloning is needed.
2147 // We need to be able to remove Edge from CallerEdges, so need to adjust
2148 // iterator inside the loop.
2149 for (auto EI = Node->CallerEdges.begin(); EI != Node->CallerEdges.end();) {
2150 auto CallerEdge = *EI;
2152 // See if cloning the prior caller edge left this node with a single alloc
2153 // type or a single caller. In that case no more cloning of Node is needed.
2154 if (hasSingleAllocType(Node->AllocTypes) || Node->CallerEdges.size() <= 1)
2155 break;
2157 // Compute the node callee edge alloc types corresponding to the context ids
2158 // for this caller edge.
2159 std::vector<uint8_t> CalleeEdgeAllocTypesForCallerEdge;
2160 CalleeEdgeAllocTypesForCallerEdge.reserve(Node->CalleeEdges.size());
2161 for (auto &CalleeEdge : Node->CalleeEdges)
2162 CalleeEdgeAllocTypesForCallerEdge.push_back(intersectAllocTypes(
2163 CalleeEdge->getContextIds(), CallerEdge->getContextIds()));
2165 // Don't clone if doing so will not disambiguate any alloc types amongst
2166 // caller edges (including the callee edges that would be cloned).
2167 // Otherwise we will simply move all edges to the clone.
2169 // First check if by cloning we will disambiguate the caller allocation
2170 // type from node's allocation type. Query allocTypeToUse so that we don't
2171 // bother cloning to distinguish NotCold+Cold from NotCold. Note that
2172 // neither of these should be None type.
2174 // Then check if by cloning node at least one of the callee edges will be
2175 // disambiguated by splitting out different context ids.
2176 assert(CallerEdge->AllocTypes != (uint8_t)AllocationType::None);
2177 assert(Node->AllocTypes != (uint8_t)AllocationType::None);
2178 if (allocTypeToUse(CallerEdge->AllocTypes) ==
2179 allocTypeToUse(Node->AllocTypes) &&
2180 allocTypesMatch<DerivedCCG, FuncTy, CallTy>(
2181 CalleeEdgeAllocTypesForCallerEdge, Node->CalleeEdges)) {
2182 ++EI;
2183 continue;
2186 // First see if we can use an existing clone. Check each clone and its
2187 // callee edges for matching alloc types.
2188 ContextNode *Clone = nullptr;
2189 for (auto *CurClone : Node->Clones) {
2190 if (allocTypeToUse(CurClone->AllocTypes) !=
2191 allocTypeToUse(CallerEdge->AllocTypes))
2192 continue;
2194 if (!allocTypesMatch<DerivedCCG, FuncTy, CallTy>(
2195 CalleeEdgeAllocTypesForCallerEdge, CurClone->CalleeEdges))
2196 continue;
2197 Clone = CurClone;
2198 break;
2201 // The edge iterator is adjusted when we move the CallerEdge to the clone.
2202 if (Clone)
2203 moveEdgeToExistingCalleeClone(CallerEdge, Clone, &EI);
2204 else
2205 Clone = moveEdgeToNewCalleeClone(CallerEdge, &EI);
2207 assert(EI == Node->CallerEdges.end() ||
2208 Node->AllocTypes != (uint8_t)AllocationType::None);
2209 // Sanity check that no alloc types on clone or its edges are None.
2210 assert(Clone->AllocTypes != (uint8_t)AllocationType::None);
2211 assert(llvm::none_of(
2212 Clone->CallerEdges, [&](const std::shared_ptr<ContextEdge> &E) {
2213 return E->AllocTypes == (uint8_t)AllocationType::None;
2214 }));
2217 // Cloning may have resulted in some cloned callee edges with type None,
2218 // because they aren't carrying any contexts. Remove those edges.
2219 for (auto *Clone : Node->Clones) {
2220 removeNoneTypeCalleeEdges(Clone);
2221 if (VerifyNodes)
2222 checkNode<DerivedCCG, FuncTy, CallTy>(Clone);
2224 // We should still have some context ids on the original Node.
2225 assert(!Node->ContextIds.empty());
2227 // Remove any callee edges that ended up with alloc type None after creating
2228 // clones and updating callee edges.
2229 removeNoneTypeCalleeEdges(Node);
2231 // Sanity check that no alloc types on node or edges are None.
2232 assert(Node->AllocTypes != (uint8_t)AllocationType::None);
2233 assert(llvm::none_of(Node->CalleeEdges,
2234 [&](const std::shared_ptr<ContextEdge> &E) {
2235 return E->AllocTypes == (uint8_t)AllocationType::None;
2236 }));
2237 assert(llvm::none_of(Node->CallerEdges,
2238 [&](const std::shared_ptr<ContextEdge> &E) {
2239 return E->AllocTypes == (uint8_t)AllocationType::None;
2240 }));
2242 if (VerifyNodes)
2243 checkNode<DerivedCCG, FuncTy, CallTy>(Node);
2246 void ModuleCallsiteContextGraph::updateAllocationCall(
2247 CallInfo &Call, AllocationType AllocType) {
2248 std::string AllocTypeString = getAllocTypeAttributeString(AllocType);
2249 auto A = llvm::Attribute::get(Call.call()->getFunction()->getContext(),
2250 "memprof", AllocTypeString);
2251 cast<CallBase>(Call.call())->addFnAttr(A);
2252 OREGetter(Call.call()->getFunction())
2253 .emit(OptimizationRemark(DEBUG_TYPE, "MemprofAttribute", Call.call())
2254 << ore::NV("AllocationCall", Call.call()) << " in clone "
2255 << ore::NV("Caller", Call.call()->getFunction())
2256 << " marked with memprof allocation attribute "
2257 << ore::NV("Attribute", AllocTypeString));
2260 void IndexCallsiteContextGraph::updateAllocationCall(CallInfo &Call,
2261 AllocationType AllocType) {
2262 auto *AI = Call.call().dyn_cast<AllocInfo *>();
2263 assert(AI);
2264 assert(AI->Versions.size() > Call.cloneNo());
2265 AI->Versions[Call.cloneNo()] = (uint8_t)AllocType;
2268 void ModuleCallsiteContextGraph::updateCall(CallInfo &CallerCall,
2269 FuncInfo CalleeFunc) {
2270 if (CalleeFunc.cloneNo() > 0)
2271 cast<CallBase>(CallerCall.call())->setCalledFunction(CalleeFunc.func());
2272 OREGetter(CallerCall.call()->getFunction())
2273 .emit(OptimizationRemark(DEBUG_TYPE, "MemprofCall", CallerCall.call())
2274 << ore::NV("Call", CallerCall.call()) << " in clone "
2275 << ore::NV("Caller", CallerCall.call()->getFunction())
2276 << " assigned to call function clone "
2277 << ore::NV("Callee", CalleeFunc.func()));
2280 void IndexCallsiteContextGraph::updateCall(CallInfo &CallerCall,
2281 FuncInfo CalleeFunc) {
2282 auto *CI = CallerCall.call().dyn_cast<CallsiteInfo *>();
2283 assert(CI &&
2284 "Caller cannot be an allocation which should not have profiled calls");
2285 assert(CI->Clones.size() > CallerCall.cloneNo());
2286 CI->Clones[CallerCall.cloneNo()] = CalleeFunc.cloneNo();
2289 CallsiteContextGraph<ModuleCallsiteContextGraph, Function,
2290 Instruction *>::FuncInfo
2291 ModuleCallsiteContextGraph::cloneFunctionForCallsite(
2292 FuncInfo &Func, CallInfo &Call, std::map<CallInfo, CallInfo> &CallMap,
2293 std::vector<CallInfo> &CallsWithMetadataInFunc, unsigned CloneNo) {
2294 // Use existing LLVM facilities for cloning and obtaining Call in clone
2295 ValueToValueMapTy VMap;
2296 auto *NewFunc = CloneFunction(Func.func(), VMap);
2297 std::string Name = getMemProfFuncName(Func.func()->getName(), CloneNo);
2298 assert(!Func.func()->getParent()->getFunction(Name));
2299 NewFunc->setName(Name);
2300 for (auto &Inst : CallsWithMetadataInFunc) {
2301 // This map always has the initial version in it.
2302 assert(Inst.cloneNo() == 0);
2303 CallMap[Inst] = {cast<Instruction>(VMap[Inst.call()]), CloneNo};
2305 OREGetter(Func.func())
2306 .emit(OptimizationRemark(DEBUG_TYPE, "MemprofClone", Func.func())
2307 << "created clone " << ore::NV("NewFunction", NewFunc));
2308 return {NewFunc, CloneNo};
2311 CallsiteContextGraph<IndexCallsiteContextGraph, FunctionSummary,
2312 IndexCall>::FuncInfo
2313 IndexCallsiteContextGraph::cloneFunctionForCallsite(
2314 FuncInfo &Func, CallInfo &Call, std::map<CallInfo, CallInfo> &CallMap,
2315 std::vector<CallInfo> &CallsWithMetadataInFunc, unsigned CloneNo) {
2316 // Check how many clones we have of Call (and therefore function).
2317 // The next clone number is the current size of versions array.
2318 // Confirm this matches the CloneNo provided by the caller, which is based on
2319 // the number of function clones we have.
2320 assert(CloneNo ==
2321 (Call.call().is<AllocInfo *>()
2322 ? Call.call().dyn_cast<AllocInfo *>()->Versions.size()
2323 : Call.call().dyn_cast<CallsiteInfo *>()->Clones.size()));
2324 // Walk all the instructions in this function. Create a new version for
2325 // each (by adding an entry to the Versions/Clones summary array), and copy
2326 // over the version being called for the function clone being cloned here.
2327 // Additionally, add an entry to the CallMap for the new function clone,
2328 // mapping the original call (clone 0, what is in CallsWithMetadataInFunc)
2329 // to the new call clone.
2330 for (auto &Inst : CallsWithMetadataInFunc) {
2331 // This map always has the initial version in it.
2332 assert(Inst.cloneNo() == 0);
2333 if (auto *AI = Inst.call().dyn_cast<AllocInfo *>()) {
2334 assert(AI->Versions.size() == CloneNo);
2335 // We assign the allocation type later (in updateAllocationCall), just add
2336 // an entry for it here.
2337 AI->Versions.push_back(0);
2338 } else {
2339 auto *CI = Inst.call().dyn_cast<CallsiteInfo *>();
2340 assert(CI && CI->Clones.size() == CloneNo);
2341 // We assign the clone number later (in updateCall), just add an entry for
2342 // it here.
2343 CI->Clones.push_back(0);
2345 CallMap[Inst] = {Inst.call(), CloneNo};
2347 return {Func.func(), CloneNo};
2350 // This method assigns cloned callsites to functions, cloning the functions as
2351 // needed. The assignment is greedy and proceeds roughly as follows:
2353 // For each function Func:
2354 // For each call with graph Node having clones:
2355 // Initialize ClonesWorklist to Node and its clones
2356 // Initialize NodeCloneCount to 0
2357 // While ClonesWorklist is not empty:
2358 // Clone = pop front ClonesWorklist
2359 // NodeCloneCount++
2360 // If Func has been cloned less than NodeCloneCount times:
2361 // If NodeCloneCount is 1:
2362 // Assign Clone to original Func
2363 // Continue
2364 // Create a new function clone
2365 // If other callers not assigned to call a function clone yet:
2366 // Assign them to call new function clone
2367 // Continue
2368 // Assign any other caller calling the cloned version to new clone
2370 // For each caller of Clone:
2371 // If caller is assigned to call a specific function clone:
2372 // If we cannot assign Clone to that function clone:
2373 // Create new callsite Clone NewClone
2374 // Add NewClone to ClonesWorklist
2375 // Continue
2376 // Assign Clone to existing caller's called function clone
2377 // Else:
2378 // If Clone not already assigned to a function clone:
2379 // Assign to first function clone without assignment
2380 // Assign caller to selected function clone
2381 template <typename DerivedCCG, typename FuncTy, typename CallTy>
2382 bool CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::assignFunctions() {
2383 bool Changed = false;
2385 // Keep track of the assignment of nodes (callsites) to function clones they
2386 // call.
2387 DenseMap<ContextNode *, FuncInfo> CallsiteToCalleeFuncCloneMap;
2389 // Update caller node to call function version CalleeFunc, by recording the
2390 // assignment in CallsiteToCalleeFuncCloneMap.
2391 auto RecordCalleeFuncOfCallsite = [&](ContextNode *Caller,
2392 const FuncInfo &CalleeFunc) {
2393 assert(Caller->hasCall());
2394 CallsiteToCalleeFuncCloneMap[Caller] = CalleeFunc;
2397 // Walk all functions for which we saw calls with memprof metadata, and handle
2398 // cloning for each of its calls.
2399 for (auto &[Func, CallsWithMetadata] : FuncToCallsWithMetadata) {
2400 FuncInfo OrigFunc(Func);
2401 // Map from each clone of OrigFunc to a map of remappings of each call of
2402 // interest (from original uncloned call to the corresponding cloned call in
2403 // that function clone).
2404 std::map<FuncInfo, std::map<CallInfo, CallInfo>> FuncClonesToCallMap;
2405 for (auto &Call : CallsWithMetadata) {
2406 ContextNode *Node = getNodeForInst(Call);
2407 // Skip call if we do not have a node for it (all uses of its stack ids
2408 // were either on inlined chains or pruned from the MIBs), or if we did
2409 // not create any clones for it.
2410 if (!Node || Node->Clones.empty())
2411 continue;
2412 assert(Node->hasCall() &&
2413 "Not having a call should have prevented cloning");
2415 // Track the assignment of function clones to clones of the current
2416 // callsite Node being handled.
2417 std::map<FuncInfo, ContextNode *> FuncCloneToCurNodeCloneMap;
2419 // Assign callsite version CallsiteClone to function version FuncClone,
2420 // and also assign (possibly cloned) Call to CallsiteClone.
2421 auto AssignCallsiteCloneToFuncClone = [&](const FuncInfo &FuncClone,
2422 CallInfo &Call,
2423 ContextNode *CallsiteClone,
2424 bool IsAlloc) {
2425 // Record the clone of callsite node assigned to this function clone.
2426 FuncCloneToCurNodeCloneMap[FuncClone] = CallsiteClone;
2428 assert(FuncClonesToCallMap.count(FuncClone));
2429 std::map<CallInfo, CallInfo> &CallMap = FuncClonesToCallMap[FuncClone];
2430 CallInfo CallClone(Call);
2431 if (CallMap.count(Call))
2432 CallClone = CallMap[Call];
2433 CallsiteClone->setCall(CallClone);
2436 // Keep track of the clones of callsite Node that need to be assigned to
2437 // function clones. This list may be expanded in the loop body below if we
2438 // find additional cloning is required.
2439 std::deque<ContextNode *> ClonesWorklist;
2440 // Ignore original Node if we moved all of its contexts to clones.
2441 if (!Node->ContextIds.empty())
2442 ClonesWorklist.push_back(Node);
2443 ClonesWorklist.insert(ClonesWorklist.end(), Node->Clones.begin(),
2444 Node->Clones.end());
2446 // Now walk through all of the clones of this callsite Node that we need,
2447 // and determine the assignment to a corresponding clone of the current
2448 // function (creating new function clones as needed).
2449 unsigned NodeCloneCount = 0;
2450 while (!ClonesWorklist.empty()) {
2451 ContextNode *Clone = ClonesWorklist.front();
2452 ClonesWorklist.pop_front();
2453 NodeCloneCount++;
2454 if (VerifyNodes)
2455 checkNode<DerivedCCG, FuncTy, CallTy>(Clone);
2457 // Need to create a new function clone if we have more callsite clones
2458 // than existing function clones, which would have been assigned to an
2459 // earlier clone in the list (we assign callsite clones to function
2460 // clones greedily).
2461 if (FuncClonesToCallMap.size() < NodeCloneCount) {
2462 // If this is the first callsite copy, assign to original function.
2463 if (NodeCloneCount == 1) {
2464 // Since FuncClonesToCallMap is empty in this case, no clones have
2465 // been created for this function yet, and no callers should have
2466 // been assigned a function clone for this callee node yet.
2467 assert(llvm::none_of(
2468 Clone->CallerEdges, [&](const std::shared_ptr<ContextEdge> &E) {
2469 return CallsiteToCalleeFuncCloneMap.count(E->Caller);
2470 }));
2471 // Initialize with empty call map, assign Clone to original function
2472 // and its callers, and skip to the next clone.
2473 FuncClonesToCallMap[OrigFunc] = {};
2474 AssignCallsiteCloneToFuncClone(
2475 OrigFunc, Call, Clone,
2476 AllocationCallToContextNodeMap.count(Call));
2477 for (auto &CE : Clone->CallerEdges) {
2478 // Ignore any caller that does not have a recorded callsite Call.
2479 if (!CE->Caller->hasCall())
2480 continue;
2481 RecordCalleeFuncOfCallsite(CE->Caller, OrigFunc);
2483 continue;
2486 // First locate which copy of OrigFunc to clone again. If a caller
2487 // of this callsite clone was already assigned to call a particular
2488 // function clone, we need to redirect all of those callers to the
2489 // new function clone, and update their other callees within this
2490 // function.
2491 FuncInfo PreviousAssignedFuncClone;
2492 auto EI = llvm::find_if(
2493 Clone->CallerEdges, [&](const std::shared_ptr<ContextEdge> &E) {
2494 return CallsiteToCalleeFuncCloneMap.count(E->Caller);
2496 bool CallerAssignedToCloneOfFunc = false;
2497 if (EI != Clone->CallerEdges.end()) {
2498 const std::shared_ptr<ContextEdge> &Edge = *EI;
2499 PreviousAssignedFuncClone =
2500 CallsiteToCalleeFuncCloneMap[Edge->Caller];
2501 CallerAssignedToCloneOfFunc = true;
2504 // Clone function and save it along with the CallInfo map created
2505 // during cloning in the FuncClonesToCallMap.
2506 std::map<CallInfo, CallInfo> NewCallMap;
2507 unsigned CloneNo = FuncClonesToCallMap.size();
2508 assert(CloneNo > 0 && "Clone 0 is the original function, which "
2509 "should already exist in the map");
2510 FuncInfo NewFuncClone = cloneFunctionForCallsite(
2511 OrigFunc, Call, NewCallMap, CallsWithMetadata, CloneNo);
2512 FuncClonesToCallMap.emplace(NewFuncClone, std::move(NewCallMap));
2513 FunctionClonesAnalysis++;
2514 Changed = true;
2516 // If no caller callsites were already assigned to a clone of this
2517 // function, we can simply assign this clone to the new func clone
2518 // and update all callers to it, then skip to the next clone.
2519 if (!CallerAssignedToCloneOfFunc) {
2520 AssignCallsiteCloneToFuncClone(
2521 NewFuncClone, Call, Clone,
2522 AllocationCallToContextNodeMap.count(Call));
2523 for (auto &CE : Clone->CallerEdges) {
2524 // Ignore any caller that does not have a recorded callsite Call.
2525 if (!CE->Caller->hasCall())
2526 continue;
2527 RecordCalleeFuncOfCallsite(CE->Caller, NewFuncClone);
2529 continue;
2532 // We may need to do additional node cloning in this case.
2533 // Reset the CallsiteToCalleeFuncCloneMap entry for any callers
2534 // that were previously assigned to call PreviousAssignedFuncClone,
2535 // to record that they now call NewFuncClone.
2536 for (auto CE : Clone->CallerEdges) {
2537 // Ignore any caller that does not have a recorded callsite Call.
2538 if (!CE->Caller->hasCall())
2539 continue;
2541 if (!CallsiteToCalleeFuncCloneMap.count(CE->Caller) ||
2542 // We subsequently fall through to later handling that
2543 // will perform any additional cloning required for
2544 // callers that were calling other function clones.
2545 CallsiteToCalleeFuncCloneMap[CE->Caller] !=
2546 PreviousAssignedFuncClone)
2547 continue;
2549 RecordCalleeFuncOfCallsite(CE->Caller, NewFuncClone);
2551 // If we are cloning a function that was already assigned to some
2552 // callers, then essentially we are creating new callsite clones
2553 // of the other callsites in that function that are reached by those
2554 // callers. Clone the other callees of the current callsite's caller
2555 // that were already assigned to PreviousAssignedFuncClone
2556 // accordingly. This is important since we subsequently update the
2557 // calls from the nodes in the graph and their assignments to callee
2558 // functions recorded in CallsiteToCalleeFuncCloneMap.
2559 for (auto CalleeEdge : CE->Caller->CalleeEdges) {
2560 // Skip any that have been removed on an earlier iteration when
2561 // cleaning up newly None type callee edges.
2562 if (!CalleeEdge)
2563 continue;
2564 ContextNode *Callee = CalleeEdge->Callee;
2565 // Skip the current callsite, we are looking for other
2566 // callsites Caller calls, as well as any that does not have a
2567 // recorded callsite Call.
2568 if (Callee == Clone || !Callee->hasCall())
2569 continue;
2570 ContextNode *NewClone = moveEdgeToNewCalleeClone(CalleeEdge);
2571 removeNoneTypeCalleeEdges(NewClone);
2572 // Moving the edge may have resulted in some none type
2573 // callee edges on the original Callee.
2574 removeNoneTypeCalleeEdges(Callee);
2575 assert(NewClone->AllocTypes != (uint8_t)AllocationType::None);
2576 // If the Callee node was already assigned to call a specific
2577 // function version, make sure its new clone is assigned to call
2578 // that same function clone.
2579 if (CallsiteToCalleeFuncCloneMap.count(Callee))
2580 RecordCalleeFuncOfCallsite(
2581 NewClone, CallsiteToCalleeFuncCloneMap[Callee]);
2582 // Update NewClone with the new Call clone of this callsite's Call
2583 // created for the new function clone created earlier.
2584 // Recall that we have already ensured when building the graph
2585 // that each caller can only call callsites within the same
2586 // function, so we are guaranteed that Callee Call is in the
2587 // current OrigFunc.
2588 // CallMap is set up as indexed by original Call at clone 0.
2589 CallInfo OrigCall(Callee->getOrigNode()->Call);
2590 OrigCall.setCloneNo(0);
2591 std::map<CallInfo, CallInfo> &CallMap =
2592 FuncClonesToCallMap[NewFuncClone];
2593 assert(CallMap.count(OrigCall));
2594 CallInfo NewCall(CallMap[OrigCall]);
2595 assert(NewCall);
2596 NewClone->setCall(NewCall);
2599 // Fall through to handling below to perform the recording of the
2600 // function for this callsite clone. This enables handling of cases
2601 // where the callers were assigned to different clones of a function.
2604 // See if we can use existing function clone. Walk through
2605 // all caller edges to see if any have already been assigned to
2606 // a clone of this callsite's function. If we can use it, do so. If not,
2607 // because that function clone is already assigned to a different clone
2608 // of this callsite, then we need to clone again.
2609 // Basically, this checking is needed to handle the case where different
2610 // caller functions/callsites may need versions of this function
2611 // containing different mixes of callsite clones across the different
2612 // callsites within the function. If that happens, we need to create
2613 // additional function clones to handle the various combinations.
2615 // Keep track of any new clones of this callsite created by the
2616 // following loop, as well as any existing clone that we decided to
2617 // assign this clone to.
2618 std::map<FuncInfo, ContextNode *> FuncCloneToNewCallsiteCloneMap;
2619 FuncInfo FuncCloneAssignedToCurCallsiteClone;
2620 // We need to be able to remove Edge from CallerEdges, so need to adjust
2621 // iterator in the loop.
2622 for (auto EI = Clone->CallerEdges.begin();
2623 EI != Clone->CallerEdges.end();) {
2624 auto Edge = *EI;
2625 // Ignore any caller that does not have a recorded callsite Call.
2626 if (!Edge->Caller->hasCall()) {
2627 EI++;
2628 continue;
2630 // If this caller already assigned to call a version of OrigFunc, need
2631 // to ensure we can assign this callsite clone to that function clone.
2632 if (CallsiteToCalleeFuncCloneMap.count(Edge->Caller)) {
2633 FuncInfo FuncCloneCalledByCaller =
2634 CallsiteToCalleeFuncCloneMap[Edge->Caller];
2635 // First we need to confirm that this function clone is available
2636 // for use by this callsite node clone.
2638 // While FuncCloneToCurNodeCloneMap is built only for this Node and
2639 // its callsite clones, one of those callsite clones X could have
2640 // been assigned to the same function clone called by Edge's caller
2641 // - if Edge's caller calls another callsite within Node's original
2642 // function, and that callsite has another caller reaching clone X.
2643 // We need to clone Node again in this case.
2644 if ((FuncCloneToCurNodeCloneMap.count(FuncCloneCalledByCaller) &&
2645 FuncCloneToCurNodeCloneMap[FuncCloneCalledByCaller] !=
2646 Clone) ||
2647 // Detect when we have multiple callers of this callsite that
2648 // have already been assigned to specific, and different, clones
2649 // of OrigFunc (due to other unrelated callsites in Func they
2650 // reach via call contexts). Is this Clone of callsite Node
2651 // assigned to a different clone of OrigFunc? If so, clone Node
2652 // again.
2653 (FuncCloneAssignedToCurCallsiteClone &&
2654 FuncCloneAssignedToCurCallsiteClone !=
2655 FuncCloneCalledByCaller)) {
2656 // We need to use a different newly created callsite clone, in
2657 // order to assign it to another new function clone on a
2658 // subsequent iteration over the Clones array (adjusted below).
2659 // Note we specifically do not reset the
2660 // CallsiteToCalleeFuncCloneMap entry for this caller, so that
2661 // when this new clone is processed later we know which version of
2662 // the function to copy (so that other callsite clones we have
2663 // assigned to that function clone are properly cloned over). See
2664 // comments in the function cloning handling earlier.
2666 // Check if we already have cloned this callsite again while
2667 // walking through caller edges, for a caller calling the same
2668 // function clone. If so, we can move this edge to that new clone
2669 // rather than creating yet another new clone.
2670 if (FuncCloneToNewCallsiteCloneMap.count(
2671 FuncCloneCalledByCaller)) {
2672 ContextNode *NewClone =
2673 FuncCloneToNewCallsiteCloneMap[FuncCloneCalledByCaller];
2674 moveEdgeToExistingCalleeClone(Edge, NewClone, &EI);
2675 // Cleanup any none type edges cloned over.
2676 removeNoneTypeCalleeEdges(NewClone);
2677 } else {
2678 // Create a new callsite clone.
2679 ContextNode *NewClone = moveEdgeToNewCalleeClone(Edge, &EI);
2680 removeNoneTypeCalleeEdges(NewClone);
2681 FuncCloneToNewCallsiteCloneMap[FuncCloneCalledByCaller] =
2682 NewClone;
2683 // Add to list of clones and process later.
2684 ClonesWorklist.push_back(NewClone);
2685 assert(EI == Clone->CallerEdges.end() ||
2686 Clone->AllocTypes != (uint8_t)AllocationType::None);
2687 assert(NewClone->AllocTypes != (uint8_t)AllocationType::None);
2689 // Moving the caller edge may have resulted in some none type
2690 // callee edges.
2691 removeNoneTypeCalleeEdges(Clone);
2692 // We will handle the newly created callsite clone in a subsequent
2693 // iteration over this Node's Clones. Continue here since we
2694 // already adjusted iterator EI while moving the edge.
2695 continue;
2698 // Otherwise, we can use the function clone already assigned to this
2699 // caller.
2700 if (!FuncCloneAssignedToCurCallsiteClone) {
2701 FuncCloneAssignedToCurCallsiteClone = FuncCloneCalledByCaller;
2702 // Assign Clone to FuncCloneCalledByCaller
2703 AssignCallsiteCloneToFuncClone(
2704 FuncCloneCalledByCaller, Call, Clone,
2705 AllocationCallToContextNodeMap.count(Call));
2706 } else
2707 // Don't need to do anything - callsite is already calling this
2708 // function clone.
2709 assert(FuncCloneAssignedToCurCallsiteClone ==
2710 FuncCloneCalledByCaller);
2712 } else {
2713 // We have not already assigned this caller to a version of
2714 // OrigFunc. Do the assignment now.
2716 // First check if we have already assigned this callsite clone to a
2717 // clone of OrigFunc for another caller during this iteration over
2718 // its caller edges.
2719 if (!FuncCloneAssignedToCurCallsiteClone) {
2720 // Find first function in FuncClonesToCallMap without an assigned
2721 // clone of this callsite Node. We should always have one
2722 // available at this point due to the earlier cloning when the
2723 // FuncClonesToCallMap size was smaller than the clone number.
2724 for (auto &CF : FuncClonesToCallMap) {
2725 if (!FuncCloneToCurNodeCloneMap.count(CF.first)) {
2726 FuncCloneAssignedToCurCallsiteClone = CF.first;
2727 break;
2730 assert(FuncCloneAssignedToCurCallsiteClone);
2731 // Assign Clone to FuncCloneAssignedToCurCallsiteClone
2732 AssignCallsiteCloneToFuncClone(
2733 FuncCloneAssignedToCurCallsiteClone, Call, Clone,
2734 AllocationCallToContextNodeMap.count(Call));
2735 } else
2736 assert(FuncCloneToCurNodeCloneMap
2737 [FuncCloneAssignedToCurCallsiteClone] == Clone);
2738 // Update callers to record function version called.
2739 RecordCalleeFuncOfCallsite(Edge->Caller,
2740 FuncCloneAssignedToCurCallsiteClone);
2743 EI++;
2746 if (VerifyCCG) {
2747 checkNode<DerivedCCG, FuncTy, CallTy>(Node);
2748 for (const auto &PE : Node->CalleeEdges)
2749 checkNode<DerivedCCG, FuncTy, CallTy>(PE->Callee);
2750 for (const auto &CE : Node->CallerEdges)
2751 checkNode<DerivedCCG, FuncTy, CallTy>(CE->Caller);
2752 for (auto *Clone : Node->Clones) {
2753 checkNode<DerivedCCG, FuncTy, CallTy>(Clone);
2754 for (const auto &PE : Clone->CalleeEdges)
2755 checkNode<DerivedCCG, FuncTy, CallTy>(PE->Callee);
2756 for (const auto &CE : Clone->CallerEdges)
2757 checkNode<DerivedCCG, FuncTy, CallTy>(CE->Caller);
2763 auto UpdateCalls = [&](ContextNode *Node,
2764 DenseSet<const ContextNode *> &Visited,
2765 auto &&UpdateCalls) {
2766 auto Inserted = Visited.insert(Node);
2767 if (!Inserted.second)
2768 return;
2770 for (auto *Clone : Node->Clones)
2771 UpdateCalls(Clone, Visited, UpdateCalls);
2773 for (auto &Edge : Node->CallerEdges)
2774 UpdateCalls(Edge->Caller, Visited, UpdateCalls);
2776 // Skip if either no call to update, or if we ended up with no context ids
2777 // (we moved all edges onto other clones).
2778 if (!Node->hasCall() || Node->ContextIds.empty())
2779 return;
2781 if (Node->IsAllocation) {
2782 updateAllocationCall(Node->Call, allocTypeToUse(Node->AllocTypes));
2783 return;
2786 if (!CallsiteToCalleeFuncCloneMap.count(Node))
2787 return;
2789 auto CalleeFunc = CallsiteToCalleeFuncCloneMap[Node];
2790 updateCall(Node->Call, CalleeFunc);
2793 // Performs DFS traversal starting from allocation nodes to update calls to
2794 // reflect cloning decisions recorded earlier. For regular LTO this will
2795 // update the actual calls in the IR to call the appropriate function clone
2796 // (and add attributes to allocation calls), whereas for ThinLTO the decisions
2797 // are recorded in the summary entries.
2798 DenseSet<const ContextNode *> Visited;
2799 for (auto &Entry : AllocationCallToContextNodeMap)
2800 UpdateCalls(Entry.second, Visited, UpdateCalls);
2802 return Changed;
2805 static SmallVector<std::unique_ptr<ValueToValueMapTy>, 4> createFunctionClones(
2806 Function &F, unsigned NumClones, Module &M, OptimizationRemarkEmitter &ORE,
2807 std::map<const Function *, SmallPtrSet<const GlobalAlias *, 1>>
2808 &FuncToAliasMap) {
2809 // The first "clone" is the original copy, we should only call this if we
2810 // needed to create new clones.
2811 assert(NumClones > 1);
2812 SmallVector<std::unique_ptr<ValueToValueMapTy>, 4> VMaps;
2813 VMaps.reserve(NumClones - 1);
2814 FunctionsClonedThinBackend++;
2815 for (unsigned I = 1; I < NumClones; I++) {
2816 VMaps.emplace_back(std::make_unique<ValueToValueMapTy>());
2817 auto *NewF = CloneFunction(&F, *VMaps.back());
2818 FunctionClonesThinBackend++;
2819 // Strip memprof and callsite metadata from clone as they are no longer
2820 // needed.
2821 for (auto &BB : *NewF) {
2822 for (auto &Inst : BB) {
2823 Inst.setMetadata(LLVMContext::MD_memprof, nullptr);
2824 Inst.setMetadata(LLVMContext::MD_callsite, nullptr);
2827 std::string Name = getMemProfFuncName(F.getName(), I);
2828 auto *PrevF = M.getFunction(Name);
2829 if (PrevF) {
2830 // We might have created this when adjusting callsite in another
2831 // function. It should be a declaration.
2832 assert(PrevF->isDeclaration());
2833 NewF->takeName(PrevF);
2834 PrevF->replaceAllUsesWith(NewF);
2835 PrevF->eraseFromParent();
2836 } else
2837 NewF->setName(Name);
2838 ORE.emit(OptimizationRemark(DEBUG_TYPE, "MemprofClone", &F)
2839 << "created clone " << ore::NV("NewFunction", NewF));
2841 // Now handle aliases to this function, and clone those as well.
2842 if (!FuncToAliasMap.count(&F))
2843 continue;
2844 for (auto *A : FuncToAliasMap[&F]) {
2845 std::string Name = getMemProfFuncName(A->getName(), I);
2846 auto *PrevA = M.getNamedAlias(Name);
2847 auto *NewA = GlobalAlias::create(A->getValueType(),
2848 A->getType()->getPointerAddressSpace(),
2849 A->getLinkage(), Name, NewF);
2850 NewA->copyAttributesFrom(A);
2851 if (PrevA) {
2852 // We might have created this when adjusting callsite in another
2853 // function. It should be a declaration.
2854 assert(PrevA->isDeclaration());
2855 NewA->takeName(PrevA);
2856 PrevA->replaceAllUsesWith(NewA);
2857 PrevA->eraseFromParent();
2861 return VMaps;
2864 // Locate the summary for F. This is complicated by the fact that it might
2865 // have been internalized or promoted.
2866 static ValueInfo findValueInfoForFunc(const Function &F, const Module &M,
2867 const ModuleSummaryIndex *ImportSummary) {
2868 // FIXME: Ideally we would retain the original GUID in some fashion on the
2869 // function (e.g. as metadata), but for now do our best to locate the
2870 // summary without that information.
2871 ValueInfo TheFnVI = ImportSummary->getValueInfo(F.getGUID());
2872 if (!TheFnVI)
2873 // See if theFn was internalized, by checking index directly with
2874 // original name (this avoids the name adjustment done by getGUID() for
2875 // internal symbols).
2876 TheFnVI = ImportSummary->getValueInfo(GlobalValue::getGUID(F.getName()));
2877 if (TheFnVI)
2878 return TheFnVI;
2879 // Now query with the original name before any promotion was performed.
2880 StringRef OrigName =
2881 ModuleSummaryIndex::getOriginalNameBeforePromote(F.getName());
2882 std::string OrigId = GlobalValue::getGlobalIdentifier(
2883 OrigName, GlobalValue::InternalLinkage, M.getSourceFileName());
2884 TheFnVI = ImportSummary->getValueInfo(GlobalValue::getGUID(OrigId));
2885 if (TheFnVI)
2886 return TheFnVI;
2887 // Could be a promoted local imported from another module. We need to pass
2888 // down more info here to find the original module id. For now, try with
2889 // the OrigName which might have been stored in the OidGuidMap in the
2890 // index. This would not work if there were same-named locals in multiple
2891 // modules, however.
2892 auto OrigGUID =
2893 ImportSummary->getGUIDFromOriginalID(GlobalValue::getGUID(OrigName));
2894 if (OrigGUID)
2895 TheFnVI = ImportSummary->getValueInfo(OrigGUID);
2896 return TheFnVI;
2899 bool MemProfContextDisambiguation::applyImport(Module &M) {
2900 assert(ImportSummary);
2901 bool Changed = false;
2903 auto IsMemProfClone = [](const Function &F) {
2904 return F.getName().contains(MemProfCloneSuffix);
2907 // We also need to clone any aliases that reference cloned functions, because
2908 // the modified callsites may invoke via the alias. Keep track of the aliases
2909 // for each function.
2910 std::map<const Function *, SmallPtrSet<const GlobalAlias *, 1>>
2911 FuncToAliasMap;
2912 for (auto &A : M.aliases()) {
2913 auto *Aliasee = A.getAliaseeObject();
2914 if (auto *F = dyn_cast<Function>(Aliasee))
2915 FuncToAliasMap[F].insert(&A);
2918 for (auto &F : M) {
2919 if (F.isDeclaration() || IsMemProfClone(F))
2920 continue;
2922 OptimizationRemarkEmitter ORE(&F);
2924 SmallVector<std::unique_ptr<ValueToValueMapTy>, 4> VMaps;
2925 bool ClonesCreated = false;
2926 unsigned NumClonesCreated = 0;
2927 auto CloneFuncIfNeeded = [&](unsigned NumClones) {
2928 // We should at least have version 0 which is the original copy.
2929 assert(NumClones > 0);
2930 // If only one copy needed use original.
2931 if (NumClones == 1)
2932 return;
2933 // If we already performed cloning of this function, confirm that the
2934 // requested number of clones matches (the thin link should ensure the
2935 // number of clones for each constituent callsite is consistent within
2936 // each function), before returning.
2937 if (ClonesCreated) {
2938 assert(NumClonesCreated == NumClones);
2939 return;
2941 VMaps = createFunctionClones(F, NumClones, M, ORE, FuncToAliasMap);
2942 // The first "clone" is the original copy, which doesn't have a VMap.
2943 assert(VMaps.size() == NumClones - 1);
2944 Changed = true;
2945 ClonesCreated = true;
2946 NumClonesCreated = NumClones;
2949 // Locate the summary for F.
2950 ValueInfo TheFnVI = findValueInfoForFunc(F, M, ImportSummary);
2951 // If not found, this could be an imported local (see comment in
2952 // findValueInfoForFunc). Skip for now as it will be cloned in its original
2953 // module (where it would have been promoted to global scope so should
2954 // satisfy any reference in this module).
2955 if (!TheFnVI)
2956 continue;
2958 auto *GVSummary =
2959 ImportSummary->findSummaryInModule(TheFnVI, M.getModuleIdentifier());
2960 if (!GVSummary)
2961 // Must have been imported, use the first summary (might be multiple if
2962 // this was a linkonce_odr).
2963 GVSummary = TheFnVI.getSummaryList().front().get();
2965 // If this was an imported alias skip it as we won't have the function
2966 // summary, and it should be cloned in the original module.
2967 if (isa<AliasSummary>(GVSummary))
2968 continue;
2970 auto *FS = cast<FunctionSummary>(GVSummary->getBaseObject());
2972 if (FS->allocs().empty() && FS->callsites().empty())
2973 continue;
2975 auto SI = FS->callsites().begin();
2976 auto AI = FS->allocs().begin();
2978 // Assume for now that the instructions are in the exact same order
2979 // as when the summary was created, but confirm this is correct by
2980 // matching the stack ids.
2981 for (auto &BB : F) {
2982 for (auto &I : BB) {
2983 auto *CB = dyn_cast<CallBase>(&I);
2984 // Same handling as when creating module summary.
2985 if (!mayHaveMemprofSummary(CB))
2986 continue;
2988 CallStack<MDNode, MDNode::op_iterator> CallsiteContext(
2989 I.getMetadata(LLVMContext::MD_callsite));
2990 auto *MemProfMD = I.getMetadata(LLVMContext::MD_memprof);
2992 // Include allocs that were already assigned a memprof function
2993 // attribute in the statistics.
2994 if (CB->getAttributes().hasFnAttr("memprof")) {
2995 assert(!MemProfMD);
2996 CB->getAttributes().getFnAttr("memprof").getValueAsString() == "cold"
2997 ? AllocTypeColdThinBackend++
2998 : AllocTypeNotColdThinBackend++;
2999 OrigAllocsThinBackend++;
3000 AllocVersionsThinBackend++;
3001 if (!MaxAllocVersionsThinBackend)
3002 MaxAllocVersionsThinBackend = 1;
3003 // Remove any remaining callsite metadata and we can skip the rest of
3004 // the handling for this instruction, since no cloning needed.
3005 I.setMetadata(LLVMContext::MD_callsite, nullptr);
3006 continue;
3009 if (MemProfMD) {
3010 // Consult the next alloc node.
3011 assert(AI != FS->allocs().end());
3012 auto &AllocNode = *(AI++);
3014 // Sanity check that the MIB stack ids match between the summary and
3015 // instruction metadata.
3016 auto MIBIter = AllocNode.MIBs.begin();
3017 for (auto &MDOp : MemProfMD->operands()) {
3018 assert(MIBIter != AllocNode.MIBs.end());
3019 LLVM_ATTRIBUTE_UNUSED auto StackIdIndexIter =
3020 MIBIter->StackIdIndices.begin();
3021 auto *MIBMD = cast<const MDNode>(MDOp);
3022 MDNode *StackMDNode = getMIBStackNode(MIBMD);
3023 assert(StackMDNode);
3024 SmallVector<unsigned> StackIdsFromMetadata;
3025 CallStack<MDNode, MDNode::op_iterator> StackContext(StackMDNode);
3026 for (auto ContextIter =
3027 StackContext.beginAfterSharedPrefix(CallsiteContext);
3028 ContextIter != StackContext.end(); ++ContextIter) {
3029 // If this is a direct recursion, simply skip the duplicate
3030 // entries, to be consistent with how the summary ids were
3031 // generated during ModuleSummaryAnalysis.
3032 if (!StackIdsFromMetadata.empty() &&
3033 StackIdsFromMetadata.back() == *ContextIter)
3034 continue;
3035 assert(StackIdIndexIter != MIBIter->StackIdIndices.end());
3036 assert(ImportSummary->getStackIdAtIndex(*StackIdIndexIter) ==
3037 *ContextIter);
3038 StackIdIndexIter++;
3040 MIBIter++;
3043 // Perform cloning if not yet done.
3044 CloneFuncIfNeeded(/*NumClones=*/AllocNode.Versions.size());
3046 OrigAllocsThinBackend++;
3047 AllocVersionsThinBackend += AllocNode.Versions.size();
3048 if (MaxAllocVersionsThinBackend < AllocNode.Versions.size())
3049 MaxAllocVersionsThinBackend = AllocNode.Versions.size();
3051 // If there is only one version that means we didn't end up
3052 // considering this function for cloning, and in that case the alloc
3053 // will still be none type or should have gotten the default NotCold.
3054 // Skip that after calling clone helper since that does some sanity
3055 // checks that confirm we haven't decided yet that we need cloning.
3056 if (AllocNode.Versions.size() == 1) {
3057 assert((AllocationType)AllocNode.Versions[0] ==
3058 AllocationType::NotCold ||
3059 (AllocationType)AllocNode.Versions[0] ==
3060 AllocationType::None);
3061 UnclonableAllocsThinBackend++;
3062 continue;
3065 // All versions should have a singular allocation type.
3066 assert(llvm::none_of(AllocNode.Versions, [](uint8_t Type) {
3067 return Type == ((uint8_t)AllocationType::NotCold |
3068 (uint8_t)AllocationType::Cold);
3069 }));
3071 // Update the allocation types per the summary info.
3072 for (unsigned J = 0; J < AllocNode.Versions.size(); J++) {
3073 // Ignore any that didn't get an assigned allocation type.
3074 if (AllocNode.Versions[J] == (uint8_t)AllocationType::None)
3075 continue;
3076 AllocationType AllocTy = (AllocationType)AllocNode.Versions[J];
3077 AllocTy == AllocationType::Cold ? AllocTypeColdThinBackend++
3078 : AllocTypeNotColdThinBackend++;
3079 std::string AllocTypeString = getAllocTypeAttributeString(AllocTy);
3080 auto A = llvm::Attribute::get(F.getContext(), "memprof",
3081 AllocTypeString);
3082 CallBase *CBClone;
3083 // Copy 0 is the original function.
3084 if (!J)
3085 CBClone = CB;
3086 else
3087 // Since VMaps are only created for new clones, we index with
3088 // clone J-1 (J==0 is the original clone and does not have a VMaps
3089 // entry).
3090 CBClone = cast<CallBase>((*VMaps[J - 1])[CB]);
3091 CBClone->addFnAttr(A);
3092 ORE.emit(OptimizationRemark(DEBUG_TYPE, "MemprofAttribute", CBClone)
3093 << ore::NV("AllocationCall", CBClone) << " in clone "
3094 << ore::NV("Caller", CBClone->getFunction())
3095 << " marked with memprof allocation attribute "
3096 << ore::NV("Attribute", AllocTypeString));
3098 } else if (!CallsiteContext.empty()) {
3099 // Consult the next callsite node.
3100 assert(SI != FS->callsites().end());
3101 auto &StackNode = *(SI++);
3103 #ifndef NDEBUG
3104 // Sanity check that the stack ids match between the summary and
3105 // instruction metadata.
3106 auto StackIdIndexIter = StackNode.StackIdIndices.begin();
3107 for (auto StackId : CallsiteContext) {
3108 assert(StackIdIndexIter != StackNode.StackIdIndices.end());
3109 assert(ImportSummary->getStackIdAtIndex(*StackIdIndexIter) ==
3110 StackId);
3111 StackIdIndexIter++;
3113 #endif
3115 // Perform cloning if not yet done.
3116 CloneFuncIfNeeded(/*NumClones=*/StackNode.Clones.size());
3118 // Should have skipped indirect calls via mayHaveMemprofSummary.
3119 assert(CB->getCalledFunction());
3120 assert(!IsMemProfClone(*CB->getCalledFunction()));
3122 // Update the calls per the summary info.
3123 // Save orig name since it gets updated in the first iteration
3124 // below.
3125 auto CalleeOrigName = CB->getCalledFunction()->getName();
3126 for (unsigned J = 0; J < StackNode.Clones.size(); J++) {
3127 // Do nothing if this version calls the original version of its
3128 // callee.
3129 if (!StackNode.Clones[J])
3130 continue;
3131 auto NewF = M.getOrInsertFunction(
3132 getMemProfFuncName(CalleeOrigName, StackNode.Clones[J]),
3133 CB->getCalledFunction()->getFunctionType());
3134 CallBase *CBClone;
3135 // Copy 0 is the original function.
3136 if (!J)
3137 CBClone = CB;
3138 else
3139 CBClone = cast<CallBase>((*VMaps[J - 1])[CB]);
3140 CBClone->setCalledFunction(NewF);
3141 ORE.emit(OptimizationRemark(DEBUG_TYPE, "MemprofCall", CBClone)
3142 << ore::NV("Call", CBClone) << " in clone "
3143 << ore::NV("Caller", CBClone->getFunction())
3144 << " assigned to call function clone "
3145 << ore::NV("Callee", NewF.getCallee()));
3148 // Memprof and callsite metadata on memory allocations no longer needed.
3149 I.setMetadata(LLVMContext::MD_memprof, nullptr);
3150 I.setMetadata(LLVMContext::MD_callsite, nullptr);
3155 return Changed;
3158 template <typename DerivedCCG, typename FuncTy, typename CallTy>
3159 bool CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::process() {
3160 if (DumpCCG) {
3161 dbgs() << "CCG before cloning:\n";
3162 dbgs() << *this;
3164 if (ExportToDot)
3165 exportToDot("postbuild");
3167 if (VerifyCCG) {
3168 check();
3171 identifyClones();
3173 if (VerifyCCG) {
3174 check();
3177 if (DumpCCG) {
3178 dbgs() << "CCG after cloning:\n";
3179 dbgs() << *this;
3181 if (ExportToDot)
3182 exportToDot("cloned");
3184 bool Changed = assignFunctions();
3186 if (DumpCCG) {
3187 dbgs() << "CCG after assigning function clones:\n";
3188 dbgs() << *this;
3190 if (ExportToDot)
3191 exportToDot("clonefuncassign");
3193 return Changed;
3196 bool MemProfContextDisambiguation::processModule(
3197 Module &M,
3198 function_ref<OptimizationRemarkEmitter &(Function *)> OREGetter) {
3200 // If we have an import summary, then the cloning decisions were made during
3201 // the thin link on the index. Apply them and return.
3202 if (ImportSummary)
3203 return applyImport(M);
3205 // TODO: If/when other types of memprof cloning are enabled beyond just for
3206 // hot and cold, we will need to change this to individually control the
3207 // AllocationType passed to addStackNodesForMIB during CCG construction.
3208 // Note that we specifically check this after applying imports above, so that
3209 // the option isn't needed to be passed to distributed ThinLTO backend
3210 // clang processes, which won't necessarily have visibility into the linker
3211 // dependences. Instead the information is communicated from the LTO link to
3212 // the backends via the combined summary index.
3213 if (!SupportsHotColdNew)
3214 return false;
3216 ModuleCallsiteContextGraph CCG(M, OREGetter);
3217 return CCG.process();
3220 MemProfContextDisambiguation::MemProfContextDisambiguation(
3221 const ModuleSummaryIndex *Summary)
3222 : ImportSummary(Summary) {
3223 if (ImportSummary) {
3224 // The MemProfImportSummary should only be used for testing ThinLTO
3225 // distributed backend handling via opt, in which case we don't have a
3226 // summary from the pass pipeline.
3227 assert(MemProfImportSummary.empty());
3228 return;
3230 if (MemProfImportSummary.empty())
3231 return;
3233 auto ReadSummaryFile =
3234 errorOrToExpected(MemoryBuffer::getFile(MemProfImportSummary));
3235 if (!ReadSummaryFile) {
3236 logAllUnhandledErrors(ReadSummaryFile.takeError(), errs(),
3237 "Error loading file '" + MemProfImportSummary +
3238 "': ");
3239 return;
3241 auto ImportSummaryForTestingOrErr = getModuleSummaryIndex(**ReadSummaryFile);
3242 if (!ImportSummaryForTestingOrErr) {
3243 logAllUnhandledErrors(ImportSummaryForTestingOrErr.takeError(), errs(),
3244 "Error parsing file '" + MemProfImportSummary +
3245 "': ");
3246 return;
3248 ImportSummaryForTesting = std::move(*ImportSummaryForTestingOrErr);
3249 ImportSummary = ImportSummaryForTesting.get();
3252 PreservedAnalyses MemProfContextDisambiguation::run(Module &M,
3253 ModuleAnalysisManager &AM) {
3254 auto &FAM = AM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager();
3255 auto OREGetter = [&](Function *F) -> OptimizationRemarkEmitter & {
3256 return FAM.getResult<OptimizationRemarkEmitterAnalysis>(*F);
3258 if (!processModule(M, OREGetter))
3259 return PreservedAnalyses::all();
3260 return PreservedAnalyses::none();
3263 void MemProfContextDisambiguation::run(
3264 ModuleSummaryIndex &Index,
3265 function_ref<bool(GlobalValue::GUID, const GlobalValueSummary *)>
3266 isPrevailing) {
3267 // TODO: If/when other types of memprof cloning are enabled beyond just for
3268 // hot and cold, we will need to change this to individually control the
3269 // AllocationType passed to addStackNodesForMIB during CCG construction.
3270 // The index was set from the option, so these should be in sync.
3271 assert(Index.withSupportsHotColdNew() == SupportsHotColdNew);
3272 if (!SupportsHotColdNew)
3273 return;
3275 IndexCallsiteContextGraph CCG(Index, isPrevailing);
3276 CCG.process();