1 //===-- MemoryProfileInfo.cpp - memory profile info ------------------------==//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 // This file contains utilities to analyze memory profile information.
11 //===----------------------------------------------------------------------===//
13 #include "llvm/Analysis/MemoryProfileInfo.h"
14 #include "llvm/IR/Constants.h"
15 #include "llvm/Support/CommandLine.h"
18 using namespace llvm::memprof
;
20 #define DEBUG_TYPE "memory-profile-info"
22 // Upper bound on lifetime access density (accesses per byte per lifetime sec)
23 // for marking an allocation cold.
24 cl::opt
<float> MemProfLifetimeAccessDensityColdThreshold(
25 "memprof-lifetime-access-density-cold-threshold", cl::init(0.05),
27 cl::desc("The threshold the lifetime access density (accesses per byte per "
28 "lifetime sec) must be under to consider an allocation cold"));
30 // Lower bound on lifetime to mark an allocation cold (in addition to accesses
31 // per byte per sec above). This is to avoid pessimizing short lived objects.
32 cl::opt
<unsigned> MemProfAveLifetimeColdThreshold(
33 "memprof-ave-lifetime-cold-threshold", cl::init(200), cl::Hidden
,
34 cl::desc("The average lifetime (s) for an allocation to be considered "
37 // Lower bound on average lifetime accesses density (total life time access
38 // density / alloc count) for marking an allocation hot.
39 cl::opt
<unsigned> MemProfMinAveLifetimeAccessDensityHotThreshold(
40 "memprof-min-ave-lifetime-access-density-hot-threshold", cl::init(1000),
42 cl::desc("The minimum TotalLifetimeAccessDensity / AllocCount for an "
43 "allocation to be considered hot"));
46 MemProfUseHotHints("memprof-use-hot-hints", cl::init(false), cl::Hidden
,
47 cl::desc("Enable use of hot hints (only supported for "
48 "unambigously hot allocations)"));
50 cl::opt
<bool> MemProfReportHintedSizes(
51 "memprof-report-hinted-sizes", cl::init(false), cl::Hidden
,
52 cl::desc("Report total allocation sizes of hinted allocations"));
54 // This is useful if we have enabled reporting of hinted sizes, and want to get
55 // information from the indexing step for all contexts (especially for testing),
56 // or have specified a value less than 100% for -memprof-cloning-cold-threshold.
57 cl::opt
<bool> MemProfKeepAllNotColdContexts(
58 "memprof-keep-all-not-cold-contexts", cl::init(false), cl::Hidden
,
59 cl::desc("Keep all non-cold contexts (increases cloning overheads)"));
61 AllocationType
llvm::memprof::getAllocType(uint64_t TotalLifetimeAccessDensity
,
63 uint64_t TotalLifetime
) {
64 // The access densities are multiplied by 100 to hold 2 decimal places of
65 // precision, so need to divide by 100.
66 if (((float)TotalLifetimeAccessDensity
) / AllocCount
/ 100 <
67 MemProfLifetimeAccessDensityColdThreshold
68 // Lifetime is expected to be in ms, so convert the threshold to ms.
69 && ((float)TotalLifetime
) / AllocCount
>=
70 MemProfAveLifetimeColdThreshold
* 1000)
71 return AllocationType::Cold
;
73 // The access densities are multiplied by 100 to hold 2 decimal places of
74 // precision, so need to divide by 100.
75 if (MemProfUseHotHints
&&
76 ((float)TotalLifetimeAccessDensity
) / AllocCount
/ 100 >
77 MemProfMinAveLifetimeAccessDensityHotThreshold
)
78 return AllocationType::Hot
;
80 return AllocationType::NotCold
;
83 MDNode
*llvm::memprof::buildCallstackMetadata(ArrayRef
<uint64_t> CallStack
,
85 SmallVector
<Metadata
*, 8> StackVals
;
86 StackVals
.reserve(CallStack
.size());
87 for (auto Id
: CallStack
) {
89 ValueAsMetadata::get(ConstantInt::get(Type::getInt64Ty(Ctx
), Id
));
90 StackVals
.push_back(StackValMD
);
92 return MDNode::get(Ctx
, StackVals
);
95 MDNode
*llvm::memprof::getMIBStackNode(const MDNode
*MIB
) {
96 assert(MIB
->getNumOperands() >= 2);
97 // The stack metadata is the first operand of each memprof MIB metadata.
98 return cast
<MDNode
>(MIB
->getOperand(0));
101 AllocationType
llvm::memprof::getMIBAllocType(const MDNode
*MIB
) {
102 assert(MIB
->getNumOperands() >= 2);
103 // The allocation type is currently the second operand of each memprof
104 // MIB metadata. This will need to change as we add additional allocation
105 // types that can be applied based on the allocation profile data.
106 auto *MDS
= dyn_cast
<MDString
>(MIB
->getOperand(1));
108 if (MDS
->getString() == "cold") {
109 return AllocationType::Cold
;
110 } else if (MDS
->getString() == "hot") {
111 return AllocationType::Hot
;
113 return AllocationType::NotCold
;
116 std::string
llvm::memprof::getAllocTypeAttributeString(AllocationType Type
) {
118 case AllocationType::NotCold
:
121 case AllocationType::Cold
:
124 case AllocationType::Hot
:
128 assert(false && "Unexpected alloc type");
130 llvm_unreachable("invalid alloc type");
133 static void addAllocTypeAttribute(LLVMContext
&Ctx
, CallBase
*CI
,
134 AllocationType AllocType
) {
135 auto AllocTypeString
= getAllocTypeAttributeString(AllocType
);
136 auto A
= llvm::Attribute::get(Ctx
, "memprof", AllocTypeString
);
140 bool llvm::memprof::hasSingleAllocType(uint8_t AllocTypes
) {
141 const unsigned NumAllocTypes
= llvm::popcount(AllocTypes
);
142 assert(NumAllocTypes
!= 0);
143 return NumAllocTypes
== 1;
146 void CallStackTrie::addCallStack(
147 AllocationType AllocType
, ArrayRef
<uint64_t> StackIds
,
148 std::vector
<ContextTotalSize
> ContextSizeInfo
) {
150 CallStackTrieNode
*Curr
= nullptr;
151 for (auto StackId
: StackIds
) {
152 // If this is the first stack frame, add or update alloc node.
156 assert(AllocStackId
== StackId
);
157 Alloc
->addAllocType(AllocType
);
159 AllocStackId
= StackId
;
160 Alloc
= new CallStackTrieNode(AllocType
);
165 // Update existing caller node if it exists.
166 CallStackTrieNode
*Prev
= nullptr;
167 auto Next
= Curr
->Callers
.find(StackId
);
168 if (Next
!= Curr
->Callers
.end()) {
171 Curr
->addAllocType(AllocType
);
172 // If this node has an ambiguous alloc type, its callee is not the deepest
173 // point where we have an ambigous allocation type.
174 if (!hasSingleAllocType(Curr
->AllocTypes
))
175 Prev
->DeepestAmbiguousAllocType
= false;
178 // Otherwise add a new caller node.
179 auto *New
= new CallStackTrieNode(AllocType
);
180 Curr
->Callers
[StackId
] = New
;
184 Curr
->ContextSizeInfo
.insert(Curr
->ContextSizeInfo
.end(),
185 ContextSizeInfo
.begin(), ContextSizeInfo
.end());
188 void CallStackTrie::addCallStack(MDNode
*MIB
) {
189 MDNode
*StackMD
= getMIBStackNode(MIB
);
191 std::vector
<uint64_t> CallStack
;
192 CallStack
.reserve(StackMD
->getNumOperands());
193 for (const auto &MIBStackIter
: StackMD
->operands()) {
194 auto *StackId
= mdconst::dyn_extract
<ConstantInt
>(MIBStackIter
);
196 CallStack
.push_back(StackId
->getZExtValue());
198 std::vector
<ContextTotalSize
> ContextSizeInfo
;
199 // Collect the context size information if it exists.
200 if (MIB
->getNumOperands() > 2) {
201 for (unsigned I
= 2; I
< MIB
->getNumOperands(); I
++) {
202 MDNode
*ContextSizePair
= dyn_cast
<MDNode
>(MIB
->getOperand(I
));
203 assert(ContextSizePair
->getNumOperands() == 2);
204 uint64_t FullStackId
=
205 mdconst::dyn_extract
<ConstantInt
>(ContextSizePair
->getOperand(0))
208 mdconst::dyn_extract
<ConstantInt
>(ContextSizePair
->getOperand(1))
210 ContextSizeInfo
.push_back({FullStackId
, TotalSize
});
213 addCallStack(getMIBAllocType(MIB
), CallStack
, std::move(ContextSizeInfo
));
216 static MDNode
*createMIBNode(LLVMContext
&Ctx
, ArrayRef
<uint64_t> MIBCallStack
,
217 AllocationType AllocType
,
218 ArrayRef
<ContextTotalSize
> ContextSizeInfo
) {
219 SmallVector
<Metadata
*> MIBPayload(
220 {buildCallstackMetadata(MIBCallStack
, Ctx
)});
221 MIBPayload
.push_back(
222 MDString::get(Ctx
, getAllocTypeAttributeString(AllocType
)));
223 if (!ContextSizeInfo
.empty()) {
224 for (const auto &[FullStackId
, TotalSize
] : ContextSizeInfo
) {
225 auto *FullStackIdMD
= ValueAsMetadata::get(
226 ConstantInt::get(Type::getInt64Ty(Ctx
), FullStackId
));
227 auto *TotalSizeMD
= ValueAsMetadata::get(
228 ConstantInt::get(Type::getInt64Ty(Ctx
), TotalSize
));
229 auto *ContextSizeMD
= MDNode::get(Ctx
, {FullStackIdMD
, TotalSizeMD
});
230 MIBPayload
.push_back(ContextSizeMD
);
233 return MDNode::get(Ctx
, MIBPayload
);
236 void CallStackTrie::collectContextSizeInfo(
237 CallStackTrieNode
*Node
, std::vector
<ContextTotalSize
> &ContextSizeInfo
) {
238 ContextSizeInfo
.insert(ContextSizeInfo
.end(), Node
->ContextSizeInfo
.begin(),
239 Node
->ContextSizeInfo
.end());
240 for (auto &Caller
: Node
->Callers
)
241 collectContextSizeInfo(Caller
.second
, ContextSizeInfo
);
244 void CallStackTrie::convertHotToNotCold(CallStackTrieNode
*Node
) {
245 if (Node
->hasAllocType(AllocationType::Hot
)) {
246 Node
->removeAllocType(AllocationType::Hot
);
247 Node
->addAllocType(AllocationType::NotCold
);
249 for (auto &Caller
: Node
->Callers
)
250 convertHotToNotCold(Caller
.second
);
253 // Recursive helper to trim contexts and create metadata nodes.
254 // Caller should have pushed Node's loc to MIBCallStack. Doing this in the
255 // caller makes it simpler to handle the many early returns in this method.
256 bool CallStackTrie::buildMIBNodes(CallStackTrieNode
*Node
, LLVMContext
&Ctx
,
257 std::vector
<uint64_t> &MIBCallStack
,
258 std::vector
<Metadata
*> &MIBNodes
,
259 bool CalleeHasAmbiguousCallerContext
,
260 bool &CalleeDeepestAmbiguousAllocType
) {
261 // Trim context below the first node in a prefix with a single alloc type.
262 // Add an MIB record for the current call stack prefix.
263 if (hasSingleAllocType(Node
->AllocTypes
)) {
264 // Because we only clone cold contexts (we don't clone for exposing NotCold
265 // contexts as that is the default allocation behavior), we create MIB
266 // metadata for this context if any of the following are true:
268 // 2) The immediate callee is the deepest point where we have an ambiguous
269 // allocation type (i.e. the other callers that are cold need to know
270 // that we have a not cold context overlapping to this point so that we
271 // know how deep to clone).
272 // 3) MemProfKeepAllNotColdContexts is enabled, which is useful if we are
273 // reporting hinted sizes, and want to get information from the indexing
274 // step for all contexts, or have specified a value less than 100% for
275 // -memprof-cloning-cold-threshold.
276 if (Node
->hasAllocType(AllocationType::Cold
) ||
277 CalleeDeepestAmbiguousAllocType
|| MemProfKeepAllNotColdContexts
) {
278 std::vector
<ContextTotalSize
> ContextSizeInfo
;
279 collectContextSizeInfo(Node
, ContextSizeInfo
);
280 MIBNodes
.push_back(createMIBNode(Ctx
, MIBCallStack
,
281 (AllocationType
)Node
->AllocTypes
,
283 // If we just emitted an MIB for a not cold caller, don't need to emit
284 // another one for the callee to correctly disambiguate its cold callers.
285 if (!Node
->hasAllocType(AllocationType::Cold
))
286 CalleeDeepestAmbiguousAllocType
= false;
291 // We don't have a single allocation for all the contexts sharing this prefix,
292 // so recursively descend into callers in trie.
293 if (!Node
->Callers
.empty()) {
294 bool NodeHasAmbiguousCallerContext
= Node
->Callers
.size() > 1;
295 bool AddedMIBNodesForAllCallerContexts
= true;
296 for (auto &Caller
: Node
->Callers
) {
297 MIBCallStack
.push_back(Caller
.first
);
298 AddedMIBNodesForAllCallerContexts
&= buildMIBNodes(
299 Caller
.second
, Ctx
, MIBCallStack
, MIBNodes
,
300 NodeHasAmbiguousCallerContext
, Node
->DeepestAmbiguousAllocType
);
302 MIBCallStack
.pop_back();
304 if (AddedMIBNodesForAllCallerContexts
)
306 // We expect that the callers should be forced to add MIBs to disambiguate
307 // the context in this case (see below).
308 assert(!NodeHasAmbiguousCallerContext
);
311 // If we reached here, then this node does not have a single allocation type,
312 // and we didn't add metadata for a longer call stack prefix including any of
313 // Node's callers. That means we never hit a single allocation type along all
314 // call stacks with this prefix. This can happen due to recursion collapsing
315 // or the stack being deeper than tracked by the profiler runtime, leading to
316 // contexts with different allocation types being merged. In that case, we
317 // trim the context just below the deepest context split, which is this
318 // node if the callee has an ambiguous caller context (multiple callers),
319 // since the recursive calls above returned false. Conservatively give it
320 // non-cold allocation type.
321 if (!CalleeHasAmbiguousCallerContext
)
323 std::vector
<ContextTotalSize
> ContextSizeInfo
;
324 collectContextSizeInfo(Node
, ContextSizeInfo
);
325 MIBNodes
.push_back(createMIBNode(Ctx
, MIBCallStack
, AllocationType::NotCold
,
330 void CallStackTrie::addSingleAllocTypeAttribute(CallBase
*CI
, AllocationType AT
,
331 StringRef Descriptor
) {
332 addAllocTypeAttribute(CI
->getContext(), CI
, AT
);
333 if (MemProfReportHintedSizes
) {
334 std::vector
<ContextTotalSize
> ContextSizeInfo
;
335 collectContextSizeInfo(Alloc
, ContextSizeInfo
);
336 for (const auto &[FullStackId
, TotalSize
] : ContextSizeInfo
) {
337 errs() << "MemProf hinting: Total size for full allocation context hash "
338 << FullStackId
<< " and " << Descriptor
<< " alloc type "
339 << getAllocTypeAttributeString(AT
) << ": " << TotalSize
<< "\n";
344 // Build and attach the minimal necessary MIB metadata. If the alloc has a
345 // single allocation type, add a function attribute instead. Returns true if
346 // memprof metadata attached, false if not (attribute added).
347 bool CallStackTrie::buildAndAttachMIBMetadata(CallBase
*CI
) {
348 if (hasSingleAllocType(Alloc
->AllocTypes
)) {
349 addSingleAllocTypeAttribute(CI
, (AllocationType
)Alloc
->AllocTypes
,
353 // If there were any hot allocation contexts, the Alloc trie node would have
354 // the Hot type set. If so, because we don't currently support cloning for hot
355 // contexts, they should be converted to NotCold. This happens in the cloning
356 // support anyway, however, doing this now enables more aggressive context
357 // trimming when building the MIB metadata (and possibly may make the
358 // allocation have a single NotCold allocation type), greatly reducing
359 // overheads in bitcode, cloning memory and cloning time.
360 if (Alloc
->hasAllocType(AllocationType::Hot
)) {
361 convertHotToNotCold(Alloc
);
362 // Check whether we now have a single alloc type.
363 if (hasSingleAllocType(Alloc
->AllocTypes
)) {
364 addSingleAllocTypeAttribute(CI
, (AllocationType
)Alloc
->AllocTypes
,
369 auto &Ctx
= CI
->getContext();
370 std::vector
<uint64_t> MIBCallStack
;
371 MIBCallStack
.push_back(AllocStackId
);
372 std::vector
<Metadata
*> MIBNodes
;
373 assert(!Alloc
->Callers
.empty() && "addCallStack has not been called yet");
374 // The CalleeHasAmbiguousCallerContext flag is meant to say whether the
375 // callee of the given node has more than one caller. Here the node being
376 // passed in is the alloc and it has no callees. So it's false.
377 // Similarly, the last parameter is meant to say whether the callee of the
378 // given node is the deepest point where we have ambiguous alloc types, which
379 // is also false as the alloc has no callees.
380 bool DeepestAmbiguousAllocType
= true;
381 if (buildMIBNodes(Alloc
, Ctx
, MIBCallStack
, MIBNodes
,
382 /*CalleeHasAmbiguousCallerContext=*/false,
383 DeepestAmbiguousAllocType
)) {
384 assert(MIBCallStack
.size() == 1 &&
385 "Should only be left with Alloc's location in stack");
386 CI
->setMetadata(LLVMContext::MD_memprof
, MDNode::get(Ctx
, MIBNodes
));
389 // If there exists corner case that CallStackTrie has one chain to leaf
390 // and all node in the chain have multi alloc type, conservatively give
391 // it non-cold allocation type.
392 // FIXME: Avoid this case before memory profile created. Alternatively, select
393 // hint based on fraction cold.
394 addSingleAllocTypeAttribute(CI
, AllocationType::NotCold
, "indistinguishable");
399 CallStack
<MDNode
, MDNode::op_iterator
>::CallStackIterator::CallStackIterator(
400 const MDNode
*N
, bool End
)
404 Iter
= End
? N
->op_end() : N
->op_begin();
409 CallStack
<MDNode
, MDNode::op_iterator
>::CallStackIterator::operator*() {
410 assert(Iter
!= N
->op_end());
411 ConstantInt
*StackIdCInt
= mdconst::dyn_extract
<ConstantInt
>(*Iter
);
413 return StackIdCInt
->getZExtValue();
416 template <> uint64_t CallStack
<MDNode
, MDNode::op_iterator
>::back() const {
418 return mdconst::dyn_extract
<ConstantInt
>(N
->operands().back())
422 MDNode
*MDNode::getMergedMemProfMetadata(MDNode
*A
, MDNode
*B
) {
423 // TODO: Support more sophisticated merging, such as selecting the one with
424 // more bytes allocated, or implement support for carrying multiple allocation
425 // leaf contexts. For now, keep the first one.
431 MDNode
*MDNode::getMergedCallsiteMetadata(MDNode
*A
, MDNode
*B
) {
432 // TODO: Support more sophisticated merging, which will require support for
433 // carrying multiple contexts. For now, keep the first one.