1 //===- RawMemProfReader.cpp - Instrumented memory profiling reader --------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 // This file contains support for reading MemProf profiling data.
11 //===----------------------------------------------------------------------===//
16 #include <type_traits>
18 #include "llvm/ADT/ArrayRef.h"
19 #include "llvm/ADT/DenseMap.h"
20 #include "llvm/ADT/SetVector.h"
21 #include "llvm/ADT/SmallSet.h"
22 #include "llvm/ADT/SmallVector.h"
23 #include "llvm/ADT/StringExtras.h"
24 #include "llvm/ADT/Twine.h"
25 #include "llvm/DebugInfo/DWARF/DWARFContext.h"
26 #include "llvm/DebugInfo/Symbolize/SymbolizableModule.h"
27 #include "llvm/DebugInfo/Symbolize/SymbolizableObjectFile.h"
28 #include "llvm/Object/Binary.h"
29 #include "llvm/Object/BuildID.h"
30 #include "llvm/Object/ELFObjectFile.h"
31 #include "llvm/Object/ObjectFile.h"
32 #include "llvm/ProfileData/InstrProf.h"
33 #include "llvm/ProfileData/MemProf.h"
34 #include "llvm/ProfileData/MemProfData.inc"
35 #include "llvm/ProfileData/MemProfReader.h"
36 #include "llvm/ProfileData/SampleProf.h"
37 #include "llvm/Support/Debug.h"
38 #include "llvm/Support/Endian.h"
39 #include "llvm/Support/Error.h"
40 #include "llvm/Support/MemoryBuffer.h"
41 #include "llvm/Support/Path.h"
43 #define DEBUG_TYPE "memprof"
47 template <class T
= uint64_t> inline T
alignedRead(const char *Ptr
) {
48 static_assert(std::is_pod
<T
>::value
, "Not a pod type.");
49 assert(reinterpret_cast<size_t>(Ptr
) % sizeof(T
) == 0 && "Unaligned Read");
50 return *reinterpret_cast<const T
*>(Ptr
);
53 Error
checkBuffer(const MemoryBuffer
&Buffer
) {
54 if (!RawMemProfReader::hasFormat(Buffer
))
55 return make_error
<InstrProfError
>(instrprof_error::bad_magic
);
57 if (Buffer
.getBufferSize() == 0)
58 return make_error
<InstrProfError
>(instrprof_error::empty_raw_profile
);
60 if (Buffer
.getBufferSize() < sizeof(Header
)) {
61 return make_error
<InstrProfError
>(instrprof_error::truncated
);
64 // The size of the buffer can be > header total size since we allow repeated
65 // serialization of memprof profiles to the same file.
66 uint64_t TotalSize
= 0;
67 const char *Next
= Buffer
.getBufferStart();
68 while (Next
< Buffer
.getBufferEnd()) {
69 const auto *H
= reinterpret_cast<const Header
*>(Next
);
71 // Check if the version in header is among the supported versions.
72 bool IsSupported
= false;
73 for (auto SupportedVersion
: MEMPROF_RAW_SUPPORTED_VERSIONS
) {
74 if (H
->Version
== SupportedVersion
)
78 return make_error
<InstrProfError
>(instrprof_error::unsupported_version
);
81 TotalSize
+= H
->TotalSize
;
85 if (Buffer
.getBufferSize() != TotalSize
) {
86 return make_error
<InstrProfError
>(instrprof_error::malformed
);
88 return Error::success();
91 llvm::SmallVector
<SegmentEntry
> readSegmentEntries(const char *Ptr
) {
92 using namespace support
;
94 const uint64_t NumItemsToRead
=
95 endian::readNext
<uint64_t, llvm::endianness::little
>(Ptr
);
96 llvm::SmallVector
<SegmentEntry
> Items
;
97 for (uint64_t I
= 0; I
< NumItemsToRead
; I
++) {
98 Items
.push_back(*reinterpret_cast<const SegmentEntry
*>(
99 Ptr
+ I
* sizeof(SegmentEntry
)));
104 llvm::SmallVector
<std::pair
<uint64_t, MemInfoBlock
>>
105 readMemInfoBlocksV3(const char *Ptr
) {
106 using namespace support
;
108 const uint64_t NumItemsToRead
=
109 endian::readNext
<uint64_t, llvm::endianness::little
, unaligned
>(Ptr
);
111 llvm::SmallVector
<std::pair
<uint64_t, MemInfoBlock
>> Items
;
112 for (uint64_t I
= 0; I
< NumItemsToRead
; I
++) {
114 endian::readNext
<uint64_t, llvm::endianness::little
, unaligned
>(Ptr
);
116 // We cheat a bit here and remove the const from cast to set the
117 // Histogram Pointer to newly allocated buffer. We also cheat, since V3 and
118 // V4 do not have the same fields. V3 is missing AccessHistogramSize and
119 // AccessHistogram. This means we read "dirty" data in here, but it should
120 // not segfault, since there will be callstack data placed after this in the
122 MemInfoBlock MIB
= *reinterpret_cast<const MemInfoBlock
*>(Ptr
);
123 // Overwrite dirty data.
124 MIB
.AccessHistogramSize
= 0;
125 MIB
.AccessHistogram
= 0;
127 Items
.push_back({Id
, MIB
});
128 // Only increment by the size of MIB in V3.
129 Ptr
+= MEMPROF_V3_MIB_SIZE
;
134 llvm::SmallVector
<std::pair
<uint64_t, MemInfoBlock
>>
135 readMemInfoBlocksV4(const char *Ptr
) {
136 using namespace support
;
138 const uint64_t NumItemsToRead
=
139 endian::readNext
<uint64_t, llvm::endianness::little
, unaligned
>(Ptr
);
141 llvm::SmallVector
<std::pair
<uint64_t, MemInfoBlock
>> Items
;
142 for (uint64_t I
= 0; I
< NumItemsToRead
; I
++) {
144 endian::readNext
<uint64_t, llvm::endianness::little
, unaligned
>(Ptr
);
145 // We cheat a bit here and remove the const from cast to set the
146 // Histogram Pointer to newly allocated buffer.
147 MemInfoBlock MIB
= *reinterpret_cast<const MemInfoBlock
*>(Ptr
);
149 // Only increment by size of MIB since readNext implicitly increments.
150 Ptr
+= sizeof(MemInfoBlock
);
152 if (MIB
.AccessHistogramSize
> 0) {
153 MIB
.AccessHistogram
=
154 (uintptr_t)malloc(MIB
.AccessHistogramSize
* sizeof(uint64_t));
157 for (uint64_t J
= 0; J
< MIB
.AccessHistogramSize
; J
++) {
158 ((uint64_t *)MIB
.AccessHistogram
)[J
] =
159 endian::readNext
<uint64_t, llvm::endianness::little
, unaligned
>(Ptr
);
161 Items
.push_back({Id
, MIB
});
166 CallStackMap
readStackInfo(const char *Ptr
) {
167 using namespace support
;
169 const uint64_t NumItemsToRead
=
170 endian::readNext
<uint64_t, llvm::endianness::little
>(Ptr
);
173 for (uint64_t I
= 0; I
< NumItemsToRead
; I
++) {
174 const uint64_t StackId
=
175 endian::readNext
<uint64_t, llvm::endianness::little
>(Ptr
);
176 const uint64_t NumPCs
=
177 endian::readNext
<uint64_t, llvm::endianness::little
>(Ptr
);
179 SmallVector
<uint64_t> CallStack
;
180 CallStack
.reserve(NumPCs
);
181 for (uint64_t J
= 0; J
< NumPCs
; J
++) {
183 endian::readNext
<uint64_t, llvm::endianness::little
>(Ptr
));
186 Items
[StackId
] = CallStack
;
191 // Merges the contents of stack information in \p From to \p To. Returns true if
192 // any stack ids observed previously map to a different set of program counter
194 bool mergeStackMap(const CallStackMap
&From
, CallStackMap
&To
) {
195 for (const auto &[Id
, Stack
] : From
) {
196 auto I
= To
.find(Id
);
200 // Check that the PCs are the same (in order).
201 if (Stack
!= I
->second
)
208 Error
report(Error E
, const StringRef Context
) {
209 return joinErrors(createStringError(inconvertibleErrorCode(), Context
),
213 bool isRuntimePath(const StringRef Path
) {
214 const StringRef Filename
= llvm::sys::path::filename(Path
);
215 // This list should be updated in case new files with additional interceptors
216 // are added to the memprof runtime.
217 return Filename
== "memprof_malloc_linux.cpp" ||
218 Filename
== "memprof_interceptors.cpp" ||
219 Filename
== "memprof_new_delete.cpp";
222 std::string
getBuildIdString(const SegmentEntry
&Entry
) {
223 // If the build id is unset print a helpful string instead of all zeros.
224 if (Entry
.BuildIdSize
== 0)
228 raw_string_ostream
OS(Str
);
229 for (size_t I
= 0; I
< Entry
.BuildIdSize
; I
++) {
230 OS
<< format_hex_no_prefix(Entry
.BuildId
[I
], 2);
236 MemProfReader::MemProfReader(
237 llvm::DenseMap
<FrameId
, Frame
> FrameIdMap
,
238 llvm::MapVector
<GlobalValue::GUID
, IndexedMemProfRecord
> ProfData
)
239 : IdToFrame(std::move(FrameIdMap
)),
240 FunctionProfileData(std::move(ProfData
)) {
241 // Populate CSId in each IndexedAllocationInfo and IndexedMemProfRecord
242 // while storing CallStack in CSIdToCallStack.
243 for (auto &KV
: FunctionProfileData
) {
244 IndexedMemProfRecord
&Record
= KV
.second
;
245 for (auto &AS
: Record
.AllocSites
) {
246 CallStackId CSId
= hashCallStack(AS
.CallStack
);
248 CSIdToCallStack
.insert({CSId
, AS
.CallStack
});
250 for (auto &CS
: Record
.CallSites
) {
251 CallStackId CSId
= hashCallStack(CS
);
252 Record
.CallSiteIds
.push_back(CSId
);
253 CSIdToCallStack
.insert({CSId
, CS
});
258 Expected
<std::unique_ptr
<RawMemProfReader
>>
259 RawMemProfReader::create(const Twine
&Path
, const StringRef ProfiledBinary
,
261 auto BufferOr
= MemoryBuffer::getFileOrSTDIN(Path
);
262 if (std::error_code EC
= BufferOr
.getError())
263 return report(errorCodeToError(EC
), Path
.getSingleStringRef());
265 std::unique_ptr
<MemoryBuffer
> Buffer(BufferOr
.get().release());
266 return create(std::move(Buffer
), ProfiledBinary
, KeepName
);
269 Expected
<std::unique_ptr
<RawMemProfReader
>>
270 RawMemProfReader::create(std::unique_ptr
<MemoryBuffer
> Buffer
,
271 const StringRef ProfiledBinary
, bool KeepName
) {
272 if (Error E
= checkBuffer(*Buffer
))
273 return report(std::move(E
), Buffer
->getBufferIdentifier());
275 if (ProfiledBinary
.empty()) {
276 // Peek the build ids to print a helpful error message.
277 const std::vector
<std::string
> BuildIds
= peekBuildIds(Buffer
.get());
278 std::string
ErrorMessage(
279 R
"(Path to profiled binary is empty, expected binary with one of the following build ids:
281 for (const auto &Id
: BuildIds
) {
282 ErrorMessage
+= "\n BuildId: ";
286 make_error
<StringError
>(ErrorMessage
, inconvertibleErrorCode()),
290 auto BinaryOr
= llvm::object::createBinary(ProfiledBinary
);
292 return report(BinaryOr
.takeError(), ProfiledBinary
);
295 // Use new here since constructor is private.
296 std::unique_ptr
<RawMemProfReader
> Reader(
297 new RawMemProfReader(std::move(BinaryOr
.get()), KeepName
));
298 if (Error E
= Reader
->initialize(std::move(Buffer
))) {
301 return std::move(Reader
);
304 // We need to make sure that all leftover MIB histograms that have not been
305 // freed by merge are freed here.
306 RawMemProfReader::~RawMemProfReader() {
307 for (auto &[_
, MIB
] : CallstackProfileData
) {
308 if (MemprofRawVersion
>= 4ULL && MIB
.AccessHistogramSize
> 0) {
309 free((void *)MIB
.AccessHistogram
);
314 bool RawMemProfReader::hasFormat(const StringRef Path
) {
315 auto BufferOr
= MemoryBuffer::getFileOrSTDIN(Path
);
319 std::unique_ptr
<MemoryBuffer
> Buffer(BufferOr
.get().release());
320 return hasFormat(*Buffer
);
323 bool RawMemProfReader::hasFormat(const MemoryBuffer
&Buffer
) {
324 if (Buffer
.getBufferSize() < sizeof(uint64_t))
326 // Aligned read to sanity check that the buffer was allocated with at least 8b
328 const uint64_t Magic
= alignedRead(Buffer
.getBufferStart());
329 return Magic
== MEMPROF_RAW_MAGIC_64
;
332 void RawMemProfReader::printYAML(raw_ostream
&OS
) {
333 uint64_t NumAllocFunctions
= 0, NumMibInfo
= 0;
334 for (const auto &KV
: FunctionProfileData
) {
335 const size_t NumAllocSites
= KV
.second
.AllocSites
.size();
336 if (NumAllocSites
> 0) {
338 NumMibInfo
+= NumAllocSites
;
342 OS
<< "MemprofProfile:\n";
344 OS
<< " Version: " << MemprofRawVersion
<< "\n";
345 OS
<< " NumSegments: " << SegmentInfo
.size() << "\n";
346 OS
<< " NumMibInfo: " << NumMibInfo
<< "\n";
347 OS
<< " NumAllocFunctions: " << NumAllocFunctions
<< "\n";
348 OS
<< " NumStackOffsets: " << StackMap
.size() << "\n";
349 // Print out the segment information.
350 OS
<< " Segments:\n";
351 for (const auto &Entry
: SegmentInfo
) {
353 OS
<< " BuildId: " << getBuildIdString(Entry
) << "\n";
354 OS
<< " Start: 0x" << llvm::utohexstr(Entry
.Start
) << "\n";
355 OS
<< " End: 0x" << llvm::utohexstr(Entry
.End
) << "\n";
356 OS
<< " Offset: 0x" << llvm::utohexstr(Entry
.Offset
) << "\n";
358 // Print out the merged contents of the profiles.
360 for (const auto &[GUID
, Record
] : *this) {
362 OS
<< " FunctionGUID: " << GUID
<< "\n";
367 Error
RawMemProfReader::initialize(std::unique_ptr
<MemoryBuffer
> DataBuffer
) {
368 const StringRef FileName
= Binary
.getBinary()->getFileName();
370 auto *ElfObject
= dyn_cast
<object::ELFObjectFileBase
>(Binary
.getBinary());
372 return report(make_error
<StringError
>(Twine("Not an ELF file: "),
373 inconvertibleErrorCode()),
377 // Check whether the profiled binary was built with position independent code
378 // (PIC). Perform sanity checks for assumptions we rely on to simplify
380 auto *Elf64LEObject
= llvm::cast
<llvm::object::ELF64LEObjectFile
>(ElfObject
);
381 const llvm::object::ELF64LEFile
&ElfFile
= Elf64LEObject
->getELFFile();
382 auto PHdrsOr
= ElfFile
.program_headers();
385 make_error
<StringError
>(Twine("Could not read program headers: "),
386 inconvertibleErrorCode()),
389 int NumExecutableSegments
= 0;
390 for (const auto &Phdr
: *PHdrsOr
) {
391 if (Phdr
.p_type
== ELF::PT_LOAD
) {
392 if (Phdr
.p_flags
& ELF::PF_X
) {
393 // We assume only one text segment in the main binary for simplicity and
394 // reduce the overhead of checking multiple ranges during symbolization.
395 if (++NumExecutableSegments
> 1) {
397 make_error
<StringError
>(
398 "Expect only one executable load segment in the binary",
399 inconvertibleErrorCode()),
402 // Segment will always be loaded at a page boundary, expect it to be
403 // aligned already. Assume 4K pagesize for the machine from which the
404 // profile has been collected. This should be fine for now, in case we
405 // want to support other pagesizes it can be recorded in the raw profile
406 // during collection.
407 PreferredTextSegmentAddress
= Phdr
.p_vaddr
;
408 assert(Phdr
.p_vaddr
== (Phdr
.p_vaddr
& ~(0x1000 - 1U)) &&
409 "Expect p_vaddr to always be page aligned");
410 assert(Phdr
.p_offset
== 0 && "Expect p_offset = 0 for symbolization.");
415 auto Triple
= ElfObject
->makeTriple();
417 return report(make_error
<StringError
>(Twine("Unsupported target: ") +
418 Triple
.getArchName(),
419 inconvertibleErrorCode()),
422 // Process the raw profile.
423 if (Error E
= readRawProfile(std::move(DataBuffer
)))
426 if (Error E
= setupForSymbolization())
429 auto *Object
= cast
<object::ObjectFile
>(Binary
.getBinary());
430 std::unique_ptr
<DIContext
> Context
= DWARFContext::create(
431 *Object
, DWARFContext::ProcessDebugRelocations::Process
);
433 auto SOFOr
= symbolize::SymbolizableObjectFile::create(
434 Object
, std::move(Context
), /*UntagAddresses=*/false);
436 return report(SOFOr
.takeError(), FileName
);
437 auto Symbolizer
= std::move(SOFOr
.get());
439 // The symbolizer ownership is moved into symbolizeAndFilterStackFrames so
440 // that it is freed automatically at the end, when it is no longer used. This
441 // reduces peak memory since it won't be live while also mapping the raw
442 // profile into records afterwards.
443 if (Error E
= symbolizeAndFilterStackFrames(std::move(Symbolizer
)))
446 return mapRawProfileToRecords();
449 Error
RawMemProfReader::setupForSymbolization() {
450 auto *Object
= cast
<object::ObjectFile
>(Binary
.getBinary());
451 object::BuildIDRef BinaryId
= object::getBuildID(Object
);
452 if (BinaryId
.empty())
453 return make_error
<StringError
>(Twine("No build id found in binary ") +
454 Binary
.getBinary()->getFileName(),
455 inconvertibleErrorCode());
458 for (const auto &Entry
: SegmentInfo
) {
459 llvm::ArrayRef
<uint8_t> SegmentId(Entry
.BuildId
, Entry
.BuildIdSize
);
460 if (BinaryId
== SegmentId
) {
461 // We assume only one text segment in the main binary for simplicity and
462 // reduce the overhead of checking multiple ranges during symbolization.
463 if (++NumMatched
> 1) {
464 return make_error
<StringError
>(
465 "We expect only one executable segment in the profiled binary",
466 inconvertibleErrorCode());
468 ProfiledTextSegmentStart
= Entry
.Start
;
469 ProfiledTextSegmentEnd
= Entry
.End
;
472 assert(NumMatched
!= 0 && "No matching executable segments in segment info.");
473 assert((PreferredTextSegmentAddress
== 0 ||
474 (PreferredTextSegmentAddress
== ProfiledTextSegmentStart
)) &&
475 "Expect text segment address to be 0 or equal to profiled text "
477 return Error::success();
480 Error
RawMemProfReader::mapRawProfileToRecords() {
481 // Hold a mapping from function to each callsite location we encounter within
482 // it that is part of some dynamic allocation context. The location is stored
483 // as a pointer to a symbolized list of inline frames.
484 using LocationPtr
= const llvm::SmallVector
<FrameId
> *;
485 llvm::MapVector
<GlobalValue::GUID
, llvm::SetVector
<LocationPtr
>>
486 PerFunctionCallSites
;
488 // Convert the raw profile callstack data into memprof records. While doing so
489 // keep track of related contexts so that we can fill these in later.
490 for (const auto &[StackId
, MIB
] : CallstackProfileData
) {
491 auto It
= StackMap
.find(StackId
);
492 if (It
== StackMap
.end())
493 return make_error
<InstrProfError
>(
494 instrprof_error::malformed
,
495 "memprof callstack record does not contain id: " + Twine(StackId
));
497 // Construct the symbolized callstack.
498 llvm::SmallVector
<FrameId
> Callstack
;
499 Callstack
.reserve(It
->getSecond().size());
501 llvm::ArrayRef
<uint64_t> Addresses
= It
->getSecond();
502 for (size_t I
= 0; I
< Addresses
.size(); I
++) {
503 const uint64_t Address
= Addresses
[I
];
504 assert(SymbolizedFrame
.count(Address
) > 0 &&
505 "Address not found in SymbolizedFrame map");
506 const SmallVector
<FrameId
> &Frames
= SymbolizedFrame
[Address
];
508 assert(!idToFrame(Frames
.back()).IsInlineFrame
&&
509 "The last frame should not be inlined");
511 // Record the callsites for each function. Skip the first frame of the
512 // first address since it is the allocation site itself that is recorded
514 for (size_t J
= 0; J
< Frames
.size(); J
++) {
515 if (I
== 0 && J
== 0)
517 // We attach the entire bottom-up frame here for the callsite even
518 // though we only need the frames up to and including the frame for
519 // Frames[J].Function. This will enable better deduplication for
520 // compression in the future.
521 const GlobalValue::GUID Guid
= idToFrame(Frames
[J
]).Function
;
522 PerFunctionCallSites
[Guid
].insert(&Frames
);
525 // Add all the frames to the current allocation callstack.
526 Callstack
.append(Frames
.begin(), Frames
.end());
529 CallStackId CSId
= hashCallStack(Callstack
);
530 CSIdToCallStack
.insert({CSId
, Callstack
});
532 // We attach the memprof record to each function bottom-up including the
533 // first non-inline frame.
534 for (size_t I
= 0; /*Break out using the condition below*/; I
++) {
535 const Frame
&F
= idToFrame(Callstack
[I
]);
537 FunctionProfileData
.insert({F
.Function
, IndexedMemProfRecord()});
538 IndexedMemProfRecord
&Record
= Result
.first
->second
;
539 Record
.AllocSites
.emplace_back(Callstack
, CSId
, MIB
);
541 if (!F
.IsInlineFrame
)
546 // Fill in the related callsites per function.
547 for (const auto &[Id
, Locs
] : PerFunctionCallSites
) {
548 // Some functions may have only callsite data and no allocation data. Here
549 // we insert a new entry for callsite data if we need to.
550 auto Result
= FunctionProfileData
.insert({Id
, IndexedMemProfRecord()});
551 IndexedMemProfRecord
&Record
= Result
.first
->second
;
552 for (LocationPtr Loc
: Locs
) {
553 CallStackId CSId
= hashCallStack(*Loc
);
554 CSIdToCallStack
.insert({CSId
, *Loc
});
555 Record
.CallSites
.push_back(*Loc
);
556 Record
.CallSiteIds
.push_back(CSId
);
560 verifyFunctionProfileData(FunctionProfileData
);
562 return Error::success();
565 Error
RawMemProfReader::symbolizeAndFilterStackFrames(
566 std::unique_ptr
<llvm::symbolize::SymbolizableModule
> Symbolizer
) {
567 // The specifier to use when symbolization is requested.
568 const DILineInfoSpecifier
Specifier(
569 DILineInfoSpecifier::FileLineInfoKind::RawValue
,
570 DILineInfoSpecifier::FunctionNameKind::LinkageName
);
572 // For entries where all PCs in the callstack are discarded, we erase the
573 // entry from the stack map.
574 llvm::SmallVector
<uint64_t> EntriesToErase
;
575 // We keep track of all prior discarded entries so that we can avoid invoking
576 // the symbolizer for such entries.
577 llvm::DenseSet
<uint64_t> AllVAddrsToDiscard
;
578 for (auto &Entry
: StackMap
) {
579 for (const uint64_t VAddr
: Entry
.getSecond()) {
580 // Check if we have already symbolized and cached the result or if we
581 // don't want to attempt symbolization since we know this address is bad.
582 // In this case the address is also removed from the current callstack.
583 if (SymbolizedFrame
.count(VAddr
) > 0 ||
584 AllVAddrsToDiscard
.contains(VAddr
))
587 Expected
<DIInliningInfo
> DIOr
= Symbolizer
->symbolizeInlinedCode(
588 getModuleOffset(VAddr
), Specifier
, /*UseSymbolTable=*/false);
590 return DIOr
.takeError();
591 DIInliningInfo DI
= DIOr
.get();
593 // Drop frames which we can't symbolize or if they belong to the runtime.
594 if (DI
.getFrame(0).FunctionName
== DILineInfo::BadString
||
595 isRuntimePath(DI
.getFrame(0).FileName
)) {
596 AllVAddrsToDiscard
.insert(VAddr
);
600 for (size_t I
= 0, NumFrames
= DI
.getNumberOfFrames(); I
< NumFrames
;
602 const auto &DIFrame
= DI
.getFrame(I
);
603 const uint64_t Guid
=
604 IndexedMemProfRecord::getGUID(DIFrame
.FunctionName
);
605 const Frame
F(Guid
, DIFrame
.Line
- DIFrame
.StartLine
, DIFrame
.Column
,
606 // Only the last entry is not an inlined location.
608 // Here we retain a mapping from the GUID to canonical symbol name
609 // instead of adding it to the frame object directly to reduce memory
610 // overhead. This is because there can be many unique frames,
611 // particularly for callsite frames.
612 if (KeepSymbolName
) {
613 StringRef CanonicalName
=
614 sampleprof::FunctionSamples::getCanonicalFnName(
615 DIFrame
.FunctionName
);
616 GuidToSymbolName
.insert({Guid
, CanonicalName
.str()});
619 const FrameId Hash
= F
.hash();
620 IdToFrame
.insert({Hash
, F
});
621 SymbolizedFrame
[VAddr
].push_back(Hash
);
625 auto &CallStack
= Entry
.getSecond();
626 llvm::erase_if(CallStack
, [&AllVAddrsToDiscard
](const uint64_t A
) {
627 return AllVAddrsToDiscard
.contains(A
);
629 if (CallStack
.empty())
630 EntriesToErase
.push_back(Entry
.getFirst());
633 // Drop the entries where the callstack is empty.
634 for (const uint64_t Id
: EntriesToErase
) {
636 if(CallstackProfileData
[Id
].AccessHistogramSize
> 0)
637 free((void*) CallstackProfileData
[Id
].AccessHistogram
);
638 CallstackProfileData
.erase(Id
);
641 if (StackMap
.empty())
642 return make_error
<InstrProfError
>(
643 instrprof_error::malformed
,
644 "no entries in callstack map after symbolization");
646 return Error::success();
649 std::vector
<std::string
>
650 RawMemProfReader::peekBuildIds(MemoryBuffer
*DataBuffer
) {
651 const char *Next
= DataBuffer
->getBufferStart();
652 // Use a SetVector since a profile file may contain multiple raw profile
653 // dumps, each with segment information. We want them unique and in order they
654 // were stored in the profile; the profiled binary should be the first entry.
655 // The runtime uses dl_iterate_phdr and the "... first object visited by
656 // callback is the main program."
657 // https://man7.org/linux/man-pages/man3/dl_iterate_phdr.3.html
658 llvm::SetVector
<std::string
, std::vector
<std::string
>,
659 llvm::SmallSet
<std::string
, 10>>
661 while (Next
< DataBuffer
->getBufferEnd()) {
662 const auto *Header
= reinterpret_cast<const memprof::Header
*>(Next
);
664 const llvm::SmallVector
<SegmentEntry
> Entries
=
665 readSegmentEntries(Next
+ Header
->SegmentOffset
);
667 for (const auto &Entry
: Entries
)
668 BuildIds
.insert(getBuildIdString(Entry
));
670 Next
+= Header
->TotalSize
;
672 return BuildIds
.takeVector();
675 // FIXME: Add a schema for serializing similiar to IndexedMemprofReader. This
676 // will help being able to deserialize different versions raw memprof versions
678 llvm::SmallVector
<std::pair
<uint64_t, MemInfoBlock
>>
679 RawMemProfReader::readMemInfoBlocks(const char *Ptr
) {
680 if (MemprofRawVersion
== 3ULL)
681 return readMemInfoBlocksV3(Ptr
);
682 if (MemprofRawVersion
== 4ULL)
683 return readMemInfoBlocksV4(Ptr
);
685 "Panic: Unsupported version number when reading MemInfoBlocks");
688 Error
RawMemProfReader::readRawProfile(
689 std::unique_ptr
<MemoryBuffer
> DataBuffer
) {
690 const char *Next
= DataBuffer
->getBufferStart();
692 while (Next
< DataBuffer
->getBufferEnd()) {
693 const auto *Header
= reinterpret_cast<const memprof::Header
*>(Next
);
695 // Set Reader version to memprof raw version of profile. Checking if version
696 // is supported is checked before creating the reader.
697 MemprofRawVersion
= Header
->Version
;
699 // Read in the segment information, check whether its the same across all
700 // profiles in this binary file.
701 const llvm::SmallVector
<SegmentEntry
> Entries
=
702 readSegmentEntries(Next
+ Header
->SegmentOffset
);
703 if (!SegmentInfo
.empty() && SegmentInfo
!= Entries
) {
704 // We do not expect segment information to change when deserializing from
705 // the same binary profile file. This can happen if dynamic libraries are
706 // loaded/unloaded between profile dumping.
707 return make_error
<InstrProfError
>(
708 instrprof_error::malformed
,
709 "memprof raw profile has different segment information");
711 SegmentInfo
.assign(Entries
.begin(), Entries
.end());
713 // Read in the MemInfoBlocks. Merge them based on stack id - we assume that
714 // raw profiles in the same binary file are from the same process so the
715 // stackdepot ids are the same.
716 for (const auto &[Id
, MIB
] : readMemInfoBlocks(Next
+ Header
->MIBOffset
)) {
717 if (CallstackProfileData
.count(Id
)) {
719 if (MemprofRawVersion
>= 4ULL &&
720 (CallstackProfileData
[Id
].AccessHistogramSize
> 0 ||
721 MIB
.AccessHistogramSize
> 0)) {
722 uintptr_t ShorterHistogram
;
723 if (CallstackProfileData
[Id
].AccessHistogramSize
>
724 MIB
.AccessHistogramSize
)
725 ShorterHistogram
= MIB
.AccessHistogram
;
727 ShorterHistogram
= CallstackProfileData
[Id
].AccessHistogram
;
728 CallstackProfileData
[Id
].Merge(MIB
);
729 free((void *)ShorterHistogram
);
731 CallstackProfileData
[Id
].Merge(MIB
);
734 CallstackProfileData
[Id
] = MIB
;
738 // Read in the callstack for each ids. For multiple raw profiles in the same
739 // file, we expect that the callstack is the same for a unique id.
740 const CallStackMap CSM
= readStackInfo(Next
+ Header
->StackOffset
);
741 if (StackMap
.empty()) {
744 if (mergeStackMap(CSM
, StackMap
))
745 return make_error
<InstrProfError
>(
746 instrprof_error::malformed
,
747 "memprof raw profile got different call stack for same id");
750 Next
+= Header
->TotalSize
;
753 return Error::success();
756 object::SectionedAddress
757 RawMemProfReader::getModuleOffset(const uint64_t VirtualAddress
) {
758 if (VirtualAddress
> ProfiledTextSegmentStart
&&
759 VirtualAddress
<= ProfiledTextSegmentEnd
) {
760 // For PIE binaries, the preferred address is zero and we adjust the virtual
761 // address by start of the profiled segment assuming that the offset of the
762 // segment in the binary is zero. For non-PIE binaries the preferred and
763 // profiled segment addresses should be equal and this is a no-op.
764 const uint64_t AdjustedAddress
=
765 VirtualAddress
+ PreferredTextSegmentAddress
- ProfiledTextSegmentStart
;
766 return object::SectionedAddress
{AdjustedAddress
};
768 // Addresses which do not originate from the profiled text segment in the
769 // binary are not adjusted. These will fail symbolization and be filtered out
770 // during processing.
771 return object::SectionedAddress
{VirtualAddress
};
774 Error
RawMemProfReader::readNextRecord(
775 GuidMemProfRecordPair
&GuidRecord
,
776 std::function
<const Frame(const FrameId
)> Callback
) {
777 // Create a new callback for the RawMemProfRecord iterator so that we can
778 // provide the symbol name if the reader was initialized with KeepSymbolName =
779 // true. This is useful for debugging and testing.
780 auto IdToFrameCallback
= [this](const FrameId Id
) {
781 Frame F
= this->idToFrame(Id
);
782 if (!this->KeepSymbolName
)
784 auto Iter
= this->GuidToSymbolName
.find(F
.Function
);
785 assert(Iter
!= this->GuidToSymbolName
.end());
786 F
.SymbolName
= std::make_unique
<std::string
>(Iter
->getSecond());
789 return MemProfReader::readNextRecord(GuidRecord
, IdToFrameCallback
);
791 } // namespace memprof