1 //===- SampleProfReader.cpp - Read LLVM sample profile data ---------------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 // This file implements the class that reads LLVM sample profiles. It
10 // supports three file formats: text, binary and gcov.
12 // The textual representation is useful for debugging and testing purposes. The
13 // binary representation is more compact, resulting in smaller file sizes.
15 // The gcov encoding is the one generated by GCC's AutoFDO profile creation
16 // tool (https://github.com/google/autofdo)
18 // All three encodings can be used interchangeably as an input sample profile.
20 //===----------------------------------------------------------------------===//
22 #include "llvm/ProfileData/SampleProfReader.h"
23 #include "llvm/ADT/DenseMap.h"
24 #include "llvm/ADT/STLExtras.h"
25 #include "llvm/ADT/StringRef.h"
26 #include "llvm/IR/ProfileSummary.h"
27 #include "llvm/ProfileData/ProfileCommon.h"
28 #include "llvm/ProfileData/SampleProf.h"
29 #include "llvm/Support/Compression.h"
30 #include "llvm/Support/ErrorOr.h"
31 #include "llvm/Support/LEB128.h"
32 #include "llvm/Support/LineIterator.h"
33 #include "llvm/Support/MD5.h"
34 #include "llvm/Support/MemoryBuffer.h"
35 #include "llvm/Support/raw_ostream.h"
41 #include <system_error>
45 using namespace sampleprof
;
47 /// Dump the function profile for \p FName.
49 /// \param FName Name of the function to print.
50 /// \param OS Stream to emit the output to.
51 void SampleProfileReader::dumpFunctionProfile(StringRef FName
,
53 OS
<< "Function: " << FName
<< ": " << Profiles
[FName
];
56 /// Dump all the function profiles found on stream \p OS.
57 void SampleProfileReader::dump(raw_ostream
&OS
) {
58 for (const auto &I
: Profiles
)
59 dumpFunctionProfile(I
.getKey(), OS
);
62 /// Parse \p Input as function head.
64 /// Parse one line of \p Input, and update function name in \p FName,
65 /// function's total sample count in \p NumSamples, function's entry
66 /// count in \p NumHeadSamples.
68 /// \returns true if parsing is successful.
69 static bool ParseHead(const StringRef
&Input
, StringRef
&FName
,
70 uint64_t &NumSamples
, uint64_t &NumHeadSamples
) {
73 size_t n2
= Input
.rfind(':');
74 size_t n1
= Input
.rfind(':', n2
- 1);
75 FName
= Input
.substr(0, n1
);
76 if (Input
.substr(n1
+ 1, n2
- n1
- 1).getAsInteger(10, NumSamples
))
78 if (Input
.substr(n2
+ 1).getAsInteger(10, NumHeadSamples
))
83 /// Returns true if line offset \p L is legal (only has 16 bits).
84 static bool isOffsetLegal(unsigned L
) { return (L
& 0xffff) == L
; }
86 /// Parse \p Input as line sample.
88 /// \param Input input line.
89 /// \param IsCallsite true if the line represents an inlined callsite.
90 /// \param Depth the depth of the inline stack.
91 /// \param NumSamples total samples of the line/inlined callsite.
92 /// \param LineOffset line offset to the start of the function.
93 /// \param Discriminator discriminator of the line.
94 /// \param TargetCountMap map from indirect call target to count.
96 /// returns true if parsing is successful.
97 static bool ParseLine(const StringRef
&Input
, bool &IsCallsite
, uint32_t &Depth
,
98 uint64_t &NumSamples
, uint32_t &LineOffset
,
99 uint32_t &Discriminator
, StringRef
&CalleeName
,
100 DenseMap
<StringRef
, uint64_t> &TargetCountMap
) {
101 for (Depth
= 0; Input
[Depth
] == ' '; Depth
++)
106 size_t n1
= Input
.find(':');
107 StringRef Loc
= Input
.substr(Depth
, n1
- Depth
);
108 size_t n2
= Loc
.find('.');
109 if (n2
== StringRef::npos
) {
110 if (Loc
.getAsInteger(10, LineOffset
) || !isOffsetLegal(LineOffset
))
114 if (Loc
.substr(0, n2
).getAsInteger(10, LineOffset
))
116 if (Loc
.substr(n2
+ 1).getAsInteger(10, Discriminator
))
120 StringRef Rest
= Input
.substr(n1
+ 2);
121 if (Rest
[0] >= '0' && Rest
[0] <= '9') {
123 size_t n3
= Rest
.find(' ');
124 if (n3
== StringRef::npos
) {
125 if (Rest
.getAsInteger(10, NumSamples
))
128 if (Rest
.substr(0, n3
).getAsInteger(10, NumSamples
))
131 // Find call targets and their sample counts.
132 // Note: In some cases, there are symbols in the profile which are not
133 // mangled. To accommodate such cases, use colon + integer pairs as the
136 // _M_construct<char *>:1000 string_view<std::allocator<char> >:437
137 // ":1000" and ":437" are used as anchor points so the string above will
139 // target: _M_construct<char *>
141 // target: string_view<std::allocator<char> >
143 while (n3
!= StringRef::npos
) {
144 n3
+= Rest
.substr(n3
).find_first_not_of(' ');
145 Rest
= Rest
.substr(n3
);
146 n3
= Rest
.find_first_of(':');
147 if (n3
== StringRef::npos
|| n3
== 0)
153 // Get the segment after the current colon.
154 StringRef AfterColon
= Rest
.substr(n3
+ 1);
155 // Get the target symbol before the current colon.
156 Target
= Rest
.substr(0, n3
);
157 // Check if the word after the current colon is an integer.
158 n4
= AfterColon
.find_first_of(' ');
159 n4
= (n4
!= StringRef::npos
) ? n3
+ n4
+ 1 : Rest
.size();
160 StringRef WordAfterColon
= Rest
.substr(n3
+ 1, n4
- n3
- 1);
161 if (!WordAfterColon
.getAsInteger(10, count
))
164 // Try to find the next colon.
165 uint64_t n5
= AfterColon
.find_first_of(':');
166 if (n5
== StringRef::npos
)
171 // An anchor point is found. Save the {target, count} pair
172 TargetCountMap
[Target
] = count
;
173 if (n4
== Rest
.size())
175 // Change n3 to the next blank space after colon + integer pair.
180 size_t n3
= Rest
.find_last_of(':');
181 CalleeName
= Rest
.substr(0, n3
);
182 if (Rest
.substr(n3
+ 1).getAsInteger(10, NumSamples
))
188 /// Load samples from a text file.
190 /// See the documentation at the top of the file for an explanation of
191 /// the expected format.
193 /// \returns true if the file was loaded successfully, false otherwise.
194 std::error_code
SampleProfileReaderText::readImpl() {
195 line_iterator
LineIt(*Buffer
, /*SkipBlanks=*/true, '#');
196 sampleprof_error Result
= sampleprof_error::success
;
198 InlineCallStack InlineStack
;
200 for (; !LineIt
.is_at_eof(); ++LineIt
) {
201 if ((*LineIt
)[(*LineIt
).find_first_not_of(' ')] == '#')
203 // Read the header of each function.
205 // Note that for function identifiers we are actually expecting
206 // mangled names, but we may not always get them. This happens when
207 // the compiler decides not to emit the function (e.g., it was inlined
208 // and removed). In this case, the binary will not have the linkage
209 // name for the function, so the profiler will emit the function's
210 // unmangled name, which may contain characters like ':' and '>' in its
211 // name (member functions, templates, etc).
213 // The only requirement we place on the identifier, then, is that it
214 // should not begin with a number.
215 if ((*LineIt
)[0] != ' ') {
216 uint64_t NumSamples
, NumHeadSamples
;
218 if (!ParseHead(*LineIt
, FName
, NumSamples
, NumHeadSamples
)) {
219 reportError(LineIt
.line_number(),
220 "Expected 'mangled_name:NUM:NUM', found " + *LineIt
);
221 return sampleprof_error::malformed
;
223 Profiles
[FName
] = FunctionSamples();
224 FunctionSamples
&FProfile
= Profiles
[FName
];
225 FProfile
.setName(FName
);
226 MergeResult(Result
, FProfile
.addTotalSamples(NumSamples
));
227 MergeResult(Result
, FProfile
.addHeadSamples(NumHeadSamples
));
229 InlineStack
.push_back(&FProfile
);
233 DenseMap
<StringRef
, uint64_t> TargetCountMap
;
235 uint32_t Depth
, LineOffset
, Discriminator
;
236 if (!ParseLine(*LineIt
, IsCallsite
, Depth
, NumSamples
, LineOffset
,
237 Discriminator
, FName
, TargetCountMap
)) {
238 reportError(LineIt
.line_number(),
239 "Expected 'NUM[.NUM]: NUM[ mangled_name:NUM]*', found " +
241 return sampleprof_error::malformed
;
244 while (InlineStack
.size() > Depth
) {
245 InlineStack
.pop_back();
247 FunctionSamples
&FSamples
= InlineStack
.back()->functionSamplesAt(
248 LineLocation(LineOffset
, Discriminator
))[FName
];
249 FSamples
.setName(FName
);
250 MergeResult(Result
, FSamples
.addTotalSamples(NumSamples
));
251 InlineStack
.push_back(&FSamples
);
253 while (InlineStack
.size() > Depth
) {
254 InlineStack
.pop_back();
256 FunctionSamples
&FProfile
= *InlineStack
.back();
257 for (const auto &name_count
: TargetCountMap
) {
258 MergeResult(Result
, FProfile
.addCalledTargetSamples(
259 LineOffset
, Discriminator
, name_count
.first
,
262 MergeResult(Result
, FProfile
.addBodySamples(LineOffset
, Discriminator
,
267 if (Result
== sampleprof_error::success
)
273 bool SampleProfileReaderText::hasFormat(const MemoryBuffer
&Buffer
) {
276 // Check that the first non-comment line is a valid function header.
277 line_iterator
LineIt(Buffer
, /*SkipBlanks=*/true, '#');
278 if (!LineIt
.is_at_eof()) {
279 if ((*LineIt
)[0] != ' ') {
280 uint64_t NumSamples
, NumHeadSamples
;
282 result
= ParseHead(*LineIt
, FName
, NumSamples
, NumHeadSamples
);
289 template <typename T
> ErrorOr
<T
> SampleProfileReaderBinary::readNumber() {
290 unsigned NumBytesRead
= 0;
292 uint64_t Val
= decodeULEB128(Data
, &NumBytesRead
);
294 if (Val
> std::numeric_limits
<T
>::max())
295 EC
= sampleprof_error::malformed
;
296 else if (Data
+ NumBytesRead
> End
)
297 EC
= sampleprof_error::truncated
;
299 EC
= sampleprof_error::success
;
302 reportError(0, EC
.message());
306 Data
+= NumBytesRead
;
307 return static_cast<T
>(Val
);
310 ErrorOr
<StringRef
> SampleProfileReaderBinary::readString() {
312 StringRef
Str(reinterpret_cast<const char *>(Data
));
313 if (Data
+ Str
.size() + 1 > End
) {
314 EC
= sampleprof_error::truncated
;
315 reportError(0, EC
.message());
319 Data
+= Str
.size() + 1;
323 template <typename T
>
324 ErrorOr
<T
> SampleProfileReaderBinary::readUnencodedNumber() {
327 if (Data
+ sizeof(T
) > End
) {
328 EC
= sampleprof_error::truncated
;
329 reportError(0, EC
.message());
333 using namespace support
;
334 T Val
= endian::readNext
<T
, little
, unaligned
>(Data
);
338 template <typename T
>
339 inline ErrorOr
<uint32_t> SampleProfileReaderBinary::readStringIndex(T
&Table
) {
341 auto Idx
= readNumber
<uint32_t>();
342 if (std::error_code EC
= Idx
.getError())
344 if (*Idx
>= Table
.size())
345 return sampleprof_error::truncated_name_table
;
349 ErrorOr
<StringRef
> SampleProfileReaderBinary::readStringFromTable() {
350 auto Idx
= readStringIndex(NameTable
);
351 if (std::error_code EC
= Idx
.getError())
354 return NameTable
[*Idx
];
357 ErrorOr
<StringRef
> SampleProfileReaderCompactBinary::readStringFromTable() {
358 auto Idx
= readStringIndex(NameTable
);
359 if (std::error_code EC
= Idx
.getError())
362 return StringRef(NameTable
[*Idx
]);
366 SampleProfileReaderBinary::readProfile(FunctionSamples
&FProfile
) {
367 auto NumSamples
= readNumber
<uint64_t>();
368 if (std::error_code EC
= NumSamples
.getError())
370 FProfile
.addTotalSamples(*NumSamples
);
372 // Read the samples in the body.
373 auto NumRecords
= readNumber
<uint32_t>();
374 if (std::error_code EC
= NumRecords
.getError())
377 for (uint32_t I
= 0; I
< *NumRecords
; ++I
) {
378 auto LineOffset
= readNumber
<uint64_t>();
379 if (std::error_code EC
= LineOffset
.getError())
382 if (!isOffsetLegal(*LineOffset
)) {
383 return std::error_code();
386 auto Discriminator
= readNumber
<uint64_t>();
387 if (std::error_code EC
= Discriminator
.getError())
390 auto NumSamples
= readNumber
<uint64_t>();
391 if (std::error_code EC
= NumSamples
.getError())
394 auto NumCalls
= readNumber
<uint32_t>();
395 if (std::error_code EC
= NumCalls
.getError())
398 for (uint32_t J
= 0; J
< *NumCalls
; ++J
) {
399 auto CalledFunction(readStringFromTable());
400 if (std::error_code EC
= CalledFunction
.getError())
403 auto CalledFunctionSamples
= readNumber
<uint64_t>();
404 if (std::error_code EC
= CalledFunctionSamples
.getError())
407 FProfile
.addCalledTargetSamples(*LineOffset
, *Discriminator
,
408 *CalledFunction
, *CalledFunctionSamples
);
411 FProfile
.addBodySamples(*LineOffset
, *Discriminator
, *NumSamples
);
414 // Read all the samples for inlined function calls.
415 auto NumCallsites
= readNumber
<uint32_t>();
416 if (std::error_code EC
= NumCallsites
.getError())
419 for (uint32_t J
= 0; J
< *NumCallsites
; ++J
) {
420 auto LineOffset
= readNumber
<uint64_t>();
421 if (std::error_code EC
= LineOffset
.getError())
424 auto Discriminator
= readNumber
<uint64_t>();
425 if (std::error_code EC
= Discriminator
.getError())
428 auto FName(readStringFromTable());
429 if (std::error_code EC
= FName
.getError())
432 FunctionSamples
&CalleeProfile
= FProfile
.functionSamplesAt(
433 LineLocation(*LineOffset
, *Discriminator
))[*FName
];
434 CalleeProfile
.setName(*FName
);
435 if (std::error_code EC
= readProfile(CalleeProfile
))
439 return sampleprof_error::success
;
443 SampleProfileReaderBinary::readFuncProfile(const uint8_t *Start
) {
445 auto NumHeadSamples
= readNumber
<uint64_t>();
446 if (std::error_code EC
= NumHeadSamples
.getError())
449 auto FName(readStringFromTable());
450 if (std::error_code EC
= FName
.getError())
453 Profiles
[*FName
] = FunctionSamples();
454 FunctionSamples
&FProfile
= Profiles
[*FName
];
455 FProfile
.setName(*FName
);
457 FProfile
.addHeadSamples(*NumHeadSamples
);
459 if (std::error_code EC
= readProfile(FProfile
))
461 return sampleprof_error::success
;
464 std::error_code
SampleProfileReaderBinary::readImpl() {
466 if (std::error_code EC
= readFuncProfile(Data
))
470 return sampleprof_error::success
;
474 SampleProfileReaderExtBinary::readOneSection(const uint8_t *Start
,
475 uint64_t Size
, SecType Type
) {
480 if (std::error_code EC
= readSummary())
484 if (std::error_code EC
= readNameTable())
488 if (std::error_code EC
= readFuncProfiles())
491 case SecProfileSymbolList
:
492 if (std::error_code EC
= readProfileSymbolList())
495 case SecFuncOffsetTable
:
496 if (std::error_code EC
= readFuncOffsetTable())
502 return sampleprof_error::success
;
505 void SampleProfileReaderExtBinary::collectFuncsFrom(const Module
&M
) {
509 FuncsToUse
.insert(FunctionSamples::getCanonicalFnName(F
));
512 std::error_code
SampleProfileReaderExtBinary::readFuncOffsetTable() {
513 auto Size
= readNumber
<uint64_t>();
514 if (std::error_code EC
= Size
.getError())
517 FuncOffsetTable
.reserve(*Size
);
518 for (uint32_t I
= 0; I
< *Size
; ++I
) {
519 auto FName(readStringFromTable());
520 if (std::error_code EC
= FName
.getError())
523 auto Offset
= readNumber
<uint64_t>();
524 if (std::error_code EC
= Offset
.getError())
527 FuncOffsetTable
[*FName
] = *Offset
;
529 return sampleprof_error::success
;
532 std::error_code
SampleProfileReaderExtBinary::readFuncProfiles() {
533 const uint8_t *Start
= Data
;
536 if (std::error_code EC
= readFuncProfile(Data
))
539 assert(Data
== End
&& "More data is read than expected");
540 return sampleprof_error::success
;
544 for (auto Name
: FuncsToUse
) {
545 Remapper
->insert(Name
);
549 for (auto NameOffset
: FuncOffsetTable
) {
550 auto FuncName
= NameOffset
.first
;
551 if (!FuncsToUse
.count(FuncName
) &&
552 (!Remapper
|| !Remapper
->exist(FuncName
)))
554 const uint8_t *FuncProfileAddr
= Start
+ NameOffset
.second
;
555 assert(FuncProfileAddr
< End
&& "out of LBRProfile section");
556 if (std::error_code EC
= readFuncProfile(FuncProfileAddr
))
561 return sampleprof_error::success
;
564 std::error_code
SampleProfileReaderExtBinary::readProfileSymbolList() {
566 ProfSymList
= std::make_unique
<ProfileSymbolList
>();
568 if (std::error_code EC
= ProfSymList
->read(Data
, End
- Data
))
572 return sampleprof_error::success
;
575 std::error_code
SampleProfileReaderExtBinaryBase::decompressSection(
576 const uint8_t *SecStart
, const uint64_t SecSize
,
577 const uint8_t *&DecompressBuf
, uint64_t &DecompressBufSize
) {
579 End
= SecStart
+ SecSize
;
580 auto DecompressSize
= readNumber
<uint64_t>();
581 if (std::error_code EC
= DecompressSize
.getError())
583 DecompressBufSize
= *DecompressSize
;
585 auto CompressSize
= readNumber
<uint64_t>();
586 if (std::error_code EC
= CompressSize
.getError())
589 if (!llvm::zlib::isAvailable())
590 return sampleprof_error::zlib_unavailable
;
592 StringRef
CompressedStrings(reinterpret_cast<const char *>(Data
),
594 char *Buffer
= Allocator
.Allocate
<char>(DecompressBufSize
);
595 size_t UCSize
= DecompressBufSize
;
597 zlib::uncompress(CompressedStrings
, Buffer
, UCSize
);
599 return sampleprof_error::uncompress_failed
;
600 DecompressBuf
= reinterpret_cast<const uint8_t *>(Buffer
);
601 return sampleprof_error::success
;
604 std::error_code
SampleProfileReaderExtBinaryBase::readImpl() {
605 const uint8_t *BufStart
=
606 reinterpret_cast<const uint8_t *>(Buffer
->getBufferStart());
608 for (auto &Entry
: SecHdrTable
) {
609 // Skip empty section.
613 const uint8_t *SecStart
= BufStart
+ Entry
.Offset
;
614 uint64_t SecSize
= Entry
.Size
;
616 // If the section is compressed, decompress it into a buffer
617 // DecompressBuf before reading the actual data. The pointee of
618 // 'Data' will be changed to buffer hold by DecompressBuf
619 // temporarily when reading the actual data.
620 bool isCompressed
= hasSecFlag(Entry
, SecFlagCompress
);
622 const uint8_t *DecompressBuf
;
623 uint64_t DecompressBufSize
;
624 if (std::error_code EC
= decompressSection(
625 SecStart
, SecSize
, DecompressBuf
, DecompressBufSize
))
627 SecStart
= DecompressBuf
;
628 SecSize
= DecompressBufSize
;
631 if (std::error_code EC
= readOneSection(SecStart
, SecSize
, Entry
.Type
))
633 if (Data
!= SecStart
+ SecSize
)
634 return sampleprof_error::malformed
;
636 // Change the pointee of 'Data' from DecompressBuf to original Buffer.
638 Data
= BufStart
+ Entry
.Offset
;
639 End
= BufStart
+ Buffer
->getBufferSize();
643 return sampleprof_error::success
;
646 std::error_code
SampleProfileReaderCompactBinary::readImpl() {
647 std::vector
<uint64_t> OffsetsToUse
;
649 for (auto FuncEntry
: FuncOffsetTable
) {
650 OffsetsToUse
.push_back(FuncEntry
.second
);
654 for (auto Name
: FuncsToUse
) {
655 auto GUID
= std::to_string(MD5Hash(Name
));
656 auto iter
= FuncOffsetTable
.find(StringRef(GUID
));
657 if (iter
== FuncOffsetTable
.end())
659 OffsetsToUse
.push_back(iter
->second
);
663 for (auto Offset
: OffsetsToUse
) {
664 const uint8_t *SavedData
= Data
;
665 if (std::error_code EC
= readFuncProfile(
666 reinterpret_cast<const uint8_t *>(Buffer
->getBufferStart()) +
671 return sampleprof_error::success
;
674 std::error_code
SampleProfileReaderRawBinary::verifySPMagic(uint64_t Magic
) {
675 if (Magic
== SPMagic())
676 return sampleprof_error::success
;
677 return sampleprof_error::bad_magic
;
680 std::error_code
SampleProfileReaderExtBinary::verifySPMagic(uint64_t Magic
) {
681 if (Magic
== SPMagic(SPF_Ext_Binary
))
682 return sampleprof_error::success
;
683 return sampleprof_error::bad_magic
;
687 SampleProfileReaderCompactBinary::verifySPMagic(uint64_t Magic
) {
688 if (Magic
== SPMagic(SPF_Compact_Binary
))
689 return sampleprof_error::success
;
690 return sampleprof_error::bad_magic
;
693 std::error_code
SampleProfileReaderBinary::readNameTable() {
694 auto Size
= readNumber
<uint32_t>();
695 if (std::error_code EC
= Size
.getError())
697 NameTable
.reserve(*Size
);
698 for (uint32_t I
= 0; I
< *Size
; ++I
) {
699 auto Name(readString());
700 if (std::error_code EC
= Name
.getError())
702 NameTable
.push_back(*Name
);
705 return sampleprof_error::success
;
708 std::error_code
SampleProfileReaderCompactBinary::readNameTable() {
709 auto Size
= readNumber
<uint64_t>();
710 if (std::error_code EC
= Size
.getError())
712 NameTable
.reserve(*Size
);
713 for (uint32_t I
= 0; I
< *Size
; ++I
) {
714 auto FID
= readNumber
<uint64_t>();
715 if (std::error_code EC
= FID
.getError())
717 NameTable
.push_back(std::to_string(*FID
));
719 return sampleprof_error::success
;
722 std::error_code
SampleProfileReaderExtBinaryBase::readSecHdrTableEntry() {
723 SecHdrTableEntry Entry
;
724 auto Type
= readUnencodedNumber
<uint64_t>();
725 if (std::error_code EC
= Type
.getError())
727 Entry
.Type
= static_cast<SecType
>(*Type
);
729 auto Flags
= readUnencodedNumber
<uint64_t>();
730 if (std::error_code EC
= Flags
.getError())
732 Entry
.Flags
= *Flags
;
734 auto Offset
= readUnencodedNumber
<uint64_t>();
735 if (std::error_code EC
= Offset
.getError())
737 Entry
.Offset
= *Offset
;
739 auto Size
= readUnencodedNumber
<uint64_t>();
740 if (std::error_code EC
= Size
.getError())
744 SecHdrTable
.push_back(std::move(Entry
));
745 return sampleprof_error::success
;
748 std::error_code
SampleProfileReaderExtBinaryBase::readSecHdrTable() {
749 auto EntryNum
= readUnencodedNumber
<uint64_t>();
750 if (std::error_code EC
= EntryNum
.getError())
753 for (uint32_t i
= 0; i
< (*EntryNum
); i
++)
754 if (std::error_code EC
= readSecHdrTableEntry())
757 return sampleprof_error::success
;
760 std::error_code
SampleProfileReaderExtBinaryBase::readHeader() {
761 const uint8_t *BufStart
=
762 reinterpret_cast<const uint8_t *>(Buffer
->getBufferStart());
764 End
= BufStart
+ Buffer
->getBufferSize();
766 if (std::error_code EC
= readMagicIdent())
769 if (std::error_code EC
= readSecHdrTable())
772 return sampleprof_error::success
;
775 uint64_t SampleProfileReaderExtBinaryBase::getSectionSize(SecType Type
) {
776 for (auto &Entry
: SecHdrTable
) {
777 if (Entry
.Type
== Type
)
783 uint64_t SampleProfileReaderExtBinaryBase::getFileSize() {
784 // Sections in SecHdrTable is not necessarily in the same order as
785 // sections in the profile because section like FuncOffsetTable needs
786 // to be written after section LBRProfile but needs to be read before
787 // section LBRProfile, so we cannot simply use the last entry in
788 // SecHdrTable to calculate the file size.
789 uint64_t FileSize
= 0;
790 for (auto &Entry
: SecHdrTable
) {
791 FileSize
= std::max(Entry
.Offset
+ Entry
.Size
, FileSize
);
796 bool SampleProfileReaderExtBinaryBase::dumpSectionInfo(raw_ostream
&OS
) {
797 uint64_t TotalSecsSize
= 0;
798 for (auto &Entry
: SecHdrTable
) {
799 OS
<< getSecName(Entry
.Type
) << " - Offset: " << Entry
.Offset
800 << ", Size: " << Entry
.Size
<< "\n";
801 TotalSecsSize
+= getSectionSize(Entry
.Type
);
803 uint64_t HeaderSize
= SecHdrTable
.front().Offset
;
804 assert(HeaderSize
+ TotalSecsSize
== getFileSize() &&
805 "Size of 'header + sections' doesn't match the total size of profile");
807 OS
<< "Header Size: " << HeaderSize
<< "\n";
808 OS
<< "Total Sections Size: " << TotalSecsSize
<< "\n";
809 OS
<< "File Size: " << getFileSize() << "\n";
813 std::error_code
SampleProfileReaderBinary::readMagicIdent() {
814 // Read and check the magic identifier.
815 auto Magic
= readNumber
<uint64_t>();
816 if (std::error_code EC
= Magic
.getError())
818 else if (std::error_code EC
= verifySPMagic(*Magic
))
821 // Read the version number.
822 auto Version
= readNumber
<uint64_t>();
823 if (std::error_code EC
= Version
.getError())
825 else if (*Version
!= SPVersion())
826 return sampleprof_error::unsupported_version
;
828 return sampleprof_error::success
;
831 std::error_code
SampleProfileReaderBinary::readHeader() {
832 Data
= reinterpret_cast<const uint8_t *>(Buffer
->getBufferStart());
833 End
= Data
+ Buffer
->getBufferSize();
835 if (std::error_code EC
= readMagicIdent())
838 if (std::error_code EC
= readSummary())
841 if (std::error_code EC
= readNameTable())
843 return sampleprof_error::success
;
846 std::error_code
SampleProfileReaderCompactBinary::readHeader() {
847 SampleProfileReaderBinary::readHeader();
848 if (std::error_code EC
= readFuncOffsetTable())
850 return sampleprof_error::success
;
853 std::error_code
SampleProfileReaderCompactBinary::readFuncOffsetTable() {
854 auto TableOffset
= readUnencodedNumber
<uint64_t>();
855 if (std::error_code EC
= TableOffset
.getError())
858 const uint8_t *SavedData
= Data
;
859 const uint8_t *TableStart
=
860 reinterpret_cast<const uint8_t *>(Buffer
->getBufferStart()) +
864 auto Size
= readNumber
<uint64_t>();
865 if (std::error_code EC
= Size
.getError())
868 FuncOffsetTable
.reserve(*Size
);
869 for (uint32_t I
= 0; I
< *Size
; ++I
) {
870 auto FName(readStringFromTable());
871 if (std::error_code EC
= FName
.getError())
874 auto Offset
= readNumber
<uint64_t>();
875 if (std::error_code EC
= Offset
.getError())
878 FuncOffsetTable
[*FName
] = *Offset
;
882 return sampleprof_error::success
;
885 void SampleProfileReaderCompactBinary::collectFuncsFrom(const Module
&M
) {
889 FuncsToUse
.insert(FunctionSamples::getCanonicalFnName(F
));
892 std::error_code
SampleProfileReaderBinary::readSummaryEntry(
893 std::vector
<ProfileSummaryEntry
> &Entries
) {
894 auto Cutoff
= readNumber
<uint64_t>();
895 if (std::error_code EC
= Cutoff
.getError())
898 auto MinBlockCount
= readNumber
<uint64_t>();
899 if (std::error_code EC
= MinBlockCount
.getError())
902 auto NumBlocks
= readNumber
<uint64_t>();
903 if (std::error_code EC
= NumBlocks
.getError())
906 Entries
.emplace_back(*Cutoff
, *MinBlockCount
, *NumBlocks
);
907 return sampleprof_error::success
;
910 std::error_code
SampleProfileReaderBinary::readSummary() {
911 auto TotalCount
= readNumber
<uint64_t>();
912 if (std::error_code EC
= TotalCount
.getError())
915 auto MaxBlockCount
= readNumber
<uint64_t>();
916 if (std::error_code EC
= MaxBlockCount
.getError())
919 auto MaxFunctionCount
= readNumber
<uint64_t>();
920 if (std::error_code EC
= MaxFunctionCount
.getError())
923 auto NumBlocks
= readNumber
<uint64_t>();
924 if (std::error_code EC
= NumBlocks
.getError())
927 auto NumFunctions
= readNumber
<uint64_t>();
928 if (std::error_code EC
= NumFunctions
.getError())
931 auto NumSummaryEntries
= readNumber
<uint64_t>();
932 if (std::error_code EC
= NumSummaryEntries
.getError())
935 std::vector
<ProfileSummaryEntry
> Entries
;
936 for (unsigned i
= 0; i
< *NumSummaryEntries
; i
++) {
937 std::error_code EC
= readSummaryEntry(Entries
);
938 if (EC
!= sampleprof_error::success
)
941 Summary
= std::make_unique
<ProfileSummary
>(
942 ProfileSummary::PSK_Sample
, Entries
, *TotalCount
, *MaxBlockCount
, 0,
943 *MaxFunctionCount
, *NumBlocks
, *NumFunctions
);
945 return sampleprof_error::success
;
948 bool SampleProfileReaderRawBinary::hasFormat(const MemoryBuffer
&Buffer
) {
949 const uint8_t *Data
=
950 reinterpret_cast<const uint8_t *>(Buffer
.getBufferStart());
951 uint64_t Magic
= decodeULEB128(Data
);
952 return Magic
== SPMagic();
955 bool SampleProfileReaderExtBinary::hasFormat(const MemoryBuffer
&Buffer
) {
956 const uint8_t *Data
=
957 reinterpret_cast<const uint8_t *>(Buffer
.getBufferStart());
958 uint64_t Magic
= decodeULEB128(Data
);
959 return Magic
== SPMagic(SPF_Ext_Binary
);
962 bool SampleProfileReaderCompactBinary::hasFormat(const MemoryBuffer
&Buffer
) {
963 const uint8_t *Data
=
964 reinterpret_cast<const uint8_t *>(Buffer
.getBufferStart());
965 uint64_t Magic
= decodeULEB128(Data
);
966 return Magic
== SPMagic(SPF_Compact_Binary
);
969 std::error_code
SampleProfileReaderGCC::skipNextWord() {
971 if (!GcovBuffer
.readInt(dummy
))
972 return sampleprof_error::truncated
;
973 return sampleprof_error::success
;
976 template <typename T
> ErrorOr
<T
> SampleProfileReaderGCC::readNumber() {
977 if (sizeof(T
) <= sizeof(uint32_t)) {
979 if (GcovBuffer
.readInt(Val
) && Val
<= std::numeric_limits
<T
>::max())
980 return static_cast<T
>(Val
);
981 } else if (sizeof(T
) <= sizeof(uint64_t)) {
983 if (GcovBuffer
.readInt64(Val
) && Val
<= std::numeric_limits
<T
>::max())
984 return static_cast<T
>(Val
);
987 std::error_code EC
= sampleprof_error::malformed
;
988 reportError(0, EC
.message());
992 ErrorOr
<StringRef
> SampleProfileReaderGCC::readString() {
994 if (!GcovBuffer
.readString(Str
))
995 return sampleprof_error::truncated
;
999 std::error_code
SampleProfileReaderGCC::readHeader() {
1000 // Read the magic identifier.
1001 if (!GcovBuffer
.readGCDAFormat())
1002 return sampleprof_error::unrecognized_format
;
1004 // Read the version number. Note - the GCC reader does not validate this
1005 // version, but the profile creator generates v704.
1006 GCOV::GCOVVersion version
;
1007 if (!GcovBuffer
.readGCOVVersion(version
))
1008 return sampleprof_error::unrecognized_format
;
1010 if (version
!= GCOV::V704
)
1011 return sampleprof_error::unsupported_version
;
1013 // Skip the empty integer.
1014 if (std::error_code EC
= skipNextWord())
1017 return sampleprof_error::success
;
1020 std::error_code
SampleProfileReaderGCC::readSectionTag(uint32_t Expected
) {
1022 if (!GcovBuffer
.readInt(Tag
))
1023 return sampleprof_error::truncated
;
1025 if (Tag
!= Expected
)
1026 return sampleprof_error::malformed
;
1028 if (std::error_code EC
= skipNextWord())
1031 return sampleprof_error::success
;
1034 std::error_code
SampleProfileReaderGCC::readNameTable() {
1035 if (std::error_code EC
= readSectionTag(GCOVTagAFDOFileNames
))
1039 if (!GcovBuffer
.readInt(Size
))
1040 return sampleprof_error::truncated
;
1042 for (uint32_t I
= 0; I
< Size
; ++I
) {
1044 if (!GcovBuffer
.readString(Str
))
1045 return sampleprof_error::truncated
;
1046 Names
.push_back(Str
);
1049 return sampleprof_error::success
;
1052 std::error_code
SampleProfileReaderGCC::readFunctionProfiles() {
1053 if (std::error_code EC
= readSectionTag(GCOVTagAFDOFunction
))
1056 uint32_t NumFunctions
;
1057 if (!GcovBuffer
.readInt(NumFunctions
))
1058 return sampleprof_error::truncated
;
1060 InlineCallStack Stack
;
1061 for (uint32_t I
= 0; I
< NumFunctions
; ++I
)
1062 if (std::error_code EC
= readOneFunctionProfile(Stack
, true, 0))
1066 return sampleprof_error::success
;
1069 std::error_code
SampleProfileReaderGCC::readOneFunctionProfile(
1070 const InlineCallStack
&InlineStack
, bool Update
, uint32_t Offset
) {
1071 uint64_t HeadCount
= 0;
1072 if (InlineStack
.size() == 0)
1073 if (!GcovBuffer
.readInt64(HeadCount
))
1074 return sampleprof_error::truncated
;
1077 if (!GcovBuffer
.readInt(NameIdx
))
1078 return sampleprof_error::truncated
;
1080 StringRef
Name(Names
[NameIdx
]);
1082 uint32_t NumPosCounts
;
1083 if (!GcovBuffer
.readInt(NumPosCounts
))
1084 return sampleprof_error::truncated
;
1086 uint32_t NumCallsites
;
1087 if (!GcovBuffer
.readInt(NumCallsites
))
1088 return sampleprof_error::truncated
;
1090 FunctionSamples
*FProfile
= nullptr;
1091 if (InlineStack
.size() == 0) {
1092 // If this is a top function that we have already processed, do not
1093 // update its profile again. This happens in the presence of
1094 // function aliases. Since these aliases share the same function
1095 // body, there will be identical replicated profiles for the
1096 // original function. In this case, we simply not bother updating
1097 // the profile of the original function.
1098 FProfile
= &Profiles
[Name
];
1099 FProfile
->addHeadSamples(HeadCount
);
1100 if (FProfile
->getTotalSamples() > 0)
1103 // Otherwise, we are reading an inlined instance. The top of the
1104 // inline stack contains the profile of the caller. Insert this
1105 // callee in the caller's CallsiteMap.
1106 FunctionSamples
*CallerProfile
= InlineStack
.front();
1107 uint32_t LineOffset
= Offset
>> 16;
1108 uint32_t Discriminator
= Offset
& 0xffff;
1109 FProfile
= &CallerProfile
->functionSamplesAt(
1110 LineLocation(LineOffset
, Discriminator
))[Name
];
1112 FProfile
->setName(Name
);
1114 for (uint32_t I
= 0; I
< NumPosCounts
; ++I
) {
1116 if (!GcovBuffer
.readInt(Offset
))
1117 return sampleprof_error::truncated
;
1119 uint32_t NumTargets
;
1120 if (!GcovBuffer
.readInt(NumTargets
))
1121 return sampleprof_error::truncated
;
1124 if (!GcovBuffer
.readInt64(Count
))
1125 return sampleprof_error::truncated
;
1127 // The line location is encoded in the offset as:
1128 // high 16 bits: line offset to the start of the function.
1129 // low 16 bits: discriminator.
1130 uint32_t LineOffset
= Offset
>> 16;
1131 uint32_t Discriminator
= Offset
& 0xffff;
1133 InlineCallStack NewStack
;
1134 NewStack
.push_back(FProfile
);
1135 NewStack
.insert(NewStack
.end(), InlineStack
.begin(), InlineStack
.end());
1137 // Walk up the inline stack, adding the samples on this line to
1138 // the total sample count of the callers in the chain.
1139 for (auto CallerProfile
: NewStack
)
1140 CallerProfile
->addTotalSamples(Count
);
1142 // Update the body samples for the current profile.
1143 FProfile
->addBodySamples(LineOffset
, Discriminator
, Count
);
1146 // Process the list of functions called at an indirect call site.
1147 // These are all the targets that a function pointer (or virtual
1148 // function) resolved at runtime.
1149 for (uint32_t J
= 0; J
< NumTargets
; J
++) {
1151 if (!GcovBuffer
.readInt(HistVal
))
1152 return sampleprof_error::truncated
;
1154 if (HistVal
!= HIST_TYPE_INDIR_CALL_TOPN
)
1155 return sampleprof_error::malformed
;
1158 if (!GcovBuffer
.readInt64(TargetIdx
))
1159 return sampleprof_error::truncated
;
1160 StringRef
TargetName(Names
[TargetIdx
]);
1162 uint64_t TargetCount
;
1163 if (!GcovBuffer
.readInt64(TargetCount
))
1164 return sampleprof_error::truncated
;
1167 FProfile
->addCalledTargetSamples(LineOffset
, Discriminator
,
1168 TargetName
, TargetCount
);
1172 // Process all the inlined callers into the current function. These
1173 // are all the callsites that were inlined into this function.
1174 for (uint32_t I
= 0; I
< NumCallsites
; I
++) {
1175 // The offset is encoded as:
1176 // high 16 bits: line offset to the start of the function.
1177 // low 16 bits: discriminator.
1179 if (!GcovBuffer
.readInt(Offset
))
1180 return sampleprof_error::truncated
;
1181 InlineCallStack NewStack
;
1182 NewStack
.push_back(FProfile
);
1183 NewStack
.insert(NewStack
.end(), InlineStack
.begin(), InlineStack
.end());
1184 if (std::error_code EC
= readOneFunctionProfile(NewStack
, Update
, Offset
))
1188 return sampleprof_error::success
;
1191 /// Read a GCC AutoFDO profile.
1193 /// This format is generated by the Linux Perf conversion tool at
1194 /// https://github.com/google/autofdo.
1195 std::error_code
SampleProfileReaderGCC::readImpl() {
1196 // Read the string table.
1197 if (std::error_code EC
= readNameTable())
1200 // Read the source profile.
1201 if (std::error_code EC
= readFunctionProfiles())
1204 return sampleprof_error::success
;
1207 bool SampleProfileReaderGCC::hasFormat(const MemoryBuffer
&Buffer
) {
1208 StringRef
Magic(reinterpret_cast<const char *>(Buffer
.getBufferStart()));
1209 return Magic
== "adcg*704";
1212 void SampleProfileReaderItaniumRemapper::applyRemapping(LLVMContext
&Ctx
) {
1213 // If the reader is in compact format, we can't remap it because
1214 // we don't know what the original function names were.
1215 if (Reader
.getFormat() == SPF_Compact_Binary
) {
1216 Ctx
.diagnose(DiagnosticInfoSampleProfile(
1217 Reader
.getBuffer()->getBufferIdentifier(),
1218 "Profile data remapping cannot be applied to profile data "
1219 "in compact format (original mangled names are not available).",
1224 assert(Remappings
&& "should be initialized while creating remapper");
1225 for (auto &Sample
: Reader
.getProfiles())
1226 if (auto Key
= Remappings
->insert(Sample
.first()))
1227 SampleMap
.insert({Key
, &Sample
.second
});
1229 RemappingApplied
= true;
1233 SampleProfileReaderItaniumRemapper::getSamplesFor(StringRef Fname
) {
1234 if (auto Key
= Remappings
->lookup(Fname
))
1235 return SampleMap
.lookup(Key
);
1239 /// Prepare a memory buffer for the contents of \p Filename.
1241 /// \returns an error code indicating the status of the buffer.
1242 static ErrorOr
<std::unique_ptr
<MemoryBuffer
>>
1243 setupMemoryBuffer(const Twine
&Filename
) {
1244 auto BufferOrErr
= MemoryBuffer::getFileOrSTDIN(Filename
);
1245 if (std::error_code EC
= BufferOrErr
.getError())
1247 auto Buffer
= std::move(BufferOrErr
.get());
1249 // Sanity check the file.
1250 if (uint64_t(Buffer
->getBufferSize()) > std::numeric_limits
<uint32_t>::max())
1251 return sampleprof_error::too_large
;
1253 return std::move(Buffer
);
1256 /// Create a sample profile reader based on the format of the input file.
1258 /// \param Filename The file to open.
1260 /// \param C The LLVM context to use to emit diagnostics.
1262 /// \param RemapFilename The file used for profile remapping.
1264 /// \returns an error code indicating the status of the created reader.
1265 ErrorOr
<std::unique_ptr
<SampleProfileReader
>>
1266 SampleProfileReader::create(const std::string Filename
, LLVMContext
&C
,
1267 const std::string RemapFilename
) {
1268 auto BufferOrError
= setupMemoryBuffer(Filename
);
1269 if (std::error_code EC
= BufferOrError
.getError())
1271 return create(BufferOrError
.get(), C
, RemapFilename
);
1274 /// Create a sample profile remapper from the given input, to remap the
1275 /// function names in the given profile data.
1277 /// \param Filename The file to open.
1279 /// \param Reader The profile reader the remapper is going to be applied to.
1281 /// \param C The LLVM context to use to emit diagnostics.
1283 /// \returns an error code indicating the status of the created reader.
1284 ErrorOr
<std::unique_ptr
<SampleProfileReaderItaniumRemapper
>>
1285 SampleProfileReaderItaniumRemapper::create(const std::string Filename
,
1286 SampleProfileReader
&Reader
,
1288 auto BufferOrError
= setupMemoryBuffer(Filename
);
1289 if (std::error_code EC
= BufferOrError
.getError())
1291 return create(BufferOrError
.get(), Reader
, C
);
1294 /// Create a sample profile remapper from the given input, to remap the
1295 /// function names in the given profile data.
1297 /// \param B The memory buffer to create the reader from (assumes ownership).
1299 /// \param C The LLVM context to use to emit diagnostics.
1301 /// \param Reader The profile reader the remapper is going to be applied to.
1303 /// \returns an error code indicating the status of the created reader.
1304 ErrorOr
<std::unique_ptr
<SampleProfileReaderItaniumRemapper
>>
1305 SampleProfileReaderItaniumRemapper::create(std::unique_ptr
<MemoryBuffer
> &B
,
1306 SampleProfileReader
&Reader
,
1308 auto Remappings
= std::make_unique
<SymbolRemappingReader
>();
1309 if (Error E
= Remappings
->read(*B
.get())) {
1311 std::move(E
), [&](const SymbolRemappingParseError
&ParseError
) {
1312 C
.diagnose(DiagnosticInfoSampleProfile(B
->getBufferIdentifier(),
1313 ParseError
.getLineNum(),
1314 ParseError
.getMessage()));
1316 return sampleprof_error::malformed
;
1319 return std::make_unique
<SampleProfileReaderItaniumRemapper
>(
1320 std::move(B
), std::move(Remappings
), Reader
);
1323 /// Create a sample profile reader based on the format of the input data.
1325 /// \param B The memory buffer to create the reader from (assumes ownership).
1327 /// \param C The LLVM context to use to emit diagnostics.
1329 /// \param RemapFilename The file used for profile remapping.
1331 /// \returns an error code indicating the status of the created reader.
1332 ErrorOr
<std::unique_ptr
<SampleProfileReader
>>
1333 SampleProfileReader::create(std::unique_ptr
<MemoryBuffer
> &B
, LLVMContext
&C
,
1334 const std::string RemapFilename
) {
1335 std::unique_ptr
<SampleProfileReader
> Reader
;
1336 if (SampleProfileReaderRawBinary::hasFormat(*B
))
1337 Reader
.reset(new SampleProfileReaderRawBinary(std::move(B
), C
));
1338 else if (SampleProfileReaderExtBinary::hasFormat(*B
))
1339 Reader
.reset(new SampleProfileReaderExtBinary(std::move(B
), C
));
1340 else if (SampleProfileReaderCompactBinary::hasFormat(*B
))
1341 Reader
.reset(new SampleProfileReaderCompactBinary(std::move(B
), C
));
1342 else if (SampleProfileReaderGCC::hasFormat(*B
))
1343 Reader
.reset(new SampleProfileReaderGCC(std::move(B
), C
));
1344 else if (SampleProfileReaderText::hasFormat(*B
))
1345 Reader
.reset(new SampleProfileReaderText(std::move(B
), C
));
1347 return sampleprof_error::unrecognized_format
;
1349 if (!RemapFilename
.empty()) {
1351 SampleProfileReaderItaniumRemapper::create(RemapFilename
, *Reader
, C
);
1352 if (std::error_code EC
= ReaderOrErr
.getError()) {
1353 std::string Msg
= "Could not create remapper: " + EC
.message();
1354 C
.diagnose(DiagnosticInfoSampleProfile(RemapFilename
, Msg
));
1357 Reader
->Remapper
= std::move(ReaderOrErr
.get());
1360 FunctionSamples::Format
= Reader
->getFormat();
1361 if (std::error_code EC
= Reader
->readHeader()) {
1365 return std::move(Reader
);
1368 // For text and GCC file formats, we compute the summary after reading the
1369 // profile. Binary format has the profile summary in its header.
1370 void SampleProfileReader::computeSummary() {
1371 SampleProfileSummaryBuilder
Builder(ProfileSummaryBuilder::DefaultCutoffs
);
1372 for (const auto &I
: Profiles
) {
1373 const FunctionSamples
&Profile
= I
.second
;
1374 Builder
.addRecord(Profile
);
1376 Summary
= Builder
.getSummary();