1 //===- SampleProfReader.cpp - Read LLVM sample profile data ---------------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 // This file implements the class that reads LLVM sample profiles. It
10 // supports three file formats: text, binary and gcov.
12 // The textual representation is useful for debugging and testing purposes. The
13 // binary representation is more compact, resulting in smaller file sizes.
15 // The gcov encoding is the one generated by GCC's AutoFDO profile creation
16 // tool (https://github.com/google/autofdo)
18 // All three encodings can be used interchangeably as an input sample profile.
20 //===----------------------------------------------------------------------===//
22 #include "llvm/ProfileData/SampleProfReader.h"
23 #include "llvm/ADT/DenseMap.h"
24 #include "llvm/ADT/STLExtras.h"
25 #include "llvm/ADT/StringRef.h"
26 #include "llvm/IR/ProfileSummary.h"
27 #include "llvm/ProfileData/ProfileCommon.h"
28 #include "llvm/ProfileData/SampleProf.h"
29 #include "llvm/Support/ErrorOr.h"
30 #include "llvm/Support/LEB128.h"
31 #include "llvm/Support/LineIterator.h"
32 #include "llvm/Support/MD5.h"
33 #include "llvm/Support/MemoryBuffer.h"
34 #include "llvm/Support/raw_ostream.h"
40 #include <system_error>
44 using namespace sampleprof
;
46 /// Dump the function profile for \p FName.
48 /// \param FName Name of the function to print.
49 /// \param OS Stream to emit the output to.
50 void SampleProfileReader::dumpFunctionProfile(StringRef FName
,
52 OS
<< "Function: " << FName
<< ": " << Profiles
[FName
];
55 /// Dump all the function profiles found on stream \p OS.
56 void SampleProfileReader::dump(raw_ostream
&OS
) {
57 for (const auto &I
: Profiles
)
58 dumpFunctionProfile(I
.getKey(), OS
);
61 /// Parse \p Input as function head.
63 /// Parse one line of \p Input, and update function name in \p FName,
64 /// function's total sample count in \p NumSamples, function's entry
65 /// count in \p NumHeadSamples.
67 /// \returns true if parsing is successful.
68 static bool ParseHead(const StringRef
&Input
, StringRef
&FName
,
69 uint64_t &NumSamples
, uint64_t &NumHeadSamples
) {
72 size_t n2
= Input
.rfind(':');
73 size_t n1
= Input
.rfind(':', n2
- 1);
74 FName
= Input
.substr(0, n1
);
75 if (Input
.substr(n1
+ 1, n2
- n1
- 1).getAsInteger(10, NumSamples
))
77 if (Input
.substr(n2
+ 1).getAsInteger(10, NumHeadSamples
))
82 /// Returns true if line offset \p L is legal (only has 16 bits).
83 static bool isOffsetLegal(unsigned L
) { return (L
& 0xffff) == L
; }
85 /// Parse \p Input as line sample.
87 /// \param Input input line.
88 /// \param IsCallsite true if the line represents an inlined callsite.
89 /// \param Depth the depth of the inline stack.
90 /// \param NumSamples total samples of the line/inlined callsite.
91 /// \param LineOffset line offset to the start of the function.
92 /// \param Discriminator discriminator of the line.
93 /// \param TargetCountMap map from indirect call target to count.
95 /// returns true if parsing is successful.
96 static bool ParseLine(const StringRef
&Input
, bool &IsCallsite
, uint32_t &Depth
,
97 uint64_t &NumSamples
, uint32_t &LineOffset
,
98 uint32_t &Discriminator
, StringRef
&CalleeName
,
99 DenseMap
<StringRef
, uint64_t> &TargetCountMap
) {
100 for (Depth
= 0; Input
[Depth
] == ' '; Depth
++)
105 size_t n1
= Input
.find(':');
106 StringRef Loc
= Input
.substr(Depth
, n1
- Depth
);
107 size_t n2
= Loc
.find('.');
108 if (n2
== StringRef::npos
) {
109 if (Loc
.getAsInteger(10, LineOffset
) || !isOffsetLegal(LineOffset
))
113 if (Loc
.substr(0, n2
).getAsInteger(10, LineOffset
))
115 if (Loc
.substr(n2
+ 1).getAsInteger(10, Discriminator
))
119 StringRef Rest
= Input
.substr(n1
+ 2);
120 if (Rest
[0] >= '0' && Rest
[0] <= '9') {
122 size_t n3
= Rest
.find(' ');
123 if (n3
== StringRef::npos
) {
124 if (Rest
.getAsInteger(10, NumSamples
))
127 if (Rest
.substr(0, n3
).getAsInteger(10, NumSamples
))
130 // Find call targets and their sample counts.
131 // Note: In some cases, there are symbols in the profile which are not
132 // mangled. To accommodate such cases, use colon + integer pairs as the
135 // _M_construct<char *>:1000 string_view<std::allocator<char> >:437
136 // ":1000" and ":437" are used as anchor points so the string above will
138 // target: _M_construct<char *>
140 // target: string_view<std::allocator<char> >
142 while (n3
!= StringRef::npos
) {
143 n3
+= Rest
.substr(n3
).find_first_not_of(' ');
144 Rest
= Rest
.substr(n3
);
145 n3
= Rest
.find_first_of(':');
146 if (n3
== StringRef::npos
|| n3
== 0)
152 // Get the segment after the current colon.
153 StringRef AfterColon
= Rest
.substr(n3
+ 1);
154 // Get the target symbol before the current colon.
155 Target
= Rest
.substr(0, n3
);
156 // Check if the word after the current colon is an integer.
157 n4
= AfterColon
.find_first_of(' ');
158 n4
= (n4
!= StringRef::npos
) ? n3
+ n4
+ 1 : Rest
.size();
159 StringRef WordAfterColon
= Rest
.substr(n3
+ 1, n4
- n3
- 1);
160 if (!WordAfterColon
.getAsInteger(10, count
))
163 // Try to find the next colon.
164 uint64_t n5
= AfterColon
.find_first_of(':');
165 if (n5
== StringRef::npos
)
170 // An anchor point is found. Save the {target, count} pair
171 TargetCountMap
[Target
] = count
;
172 if (n4
== Rest
.size())
174 // Change n3 to the next blank space after colon + integer pair.
179 size_t n3
= Rest
.find_last_of(':');
180 CalleeName
= Rest
.substr(0, n3
);
181 if (Rest
.substr(n3
+ 1).getAsInteger(10, NumSamples
))
187 /// Load samples from a text file.
189 /// See the documentation at the top of the file for an explanation of
190 /// the expected format.
192 /// \returns true if the file was loaded successfully, false otherwise.
193 std::error_code
SampleProfileReaderText::read() {
194 line_iterator
LineIt(*Buffer
, /*SkipBlanks=*/true, '#');
195 sampleprof_error Result
= sampleprof_error::success
;
197 InlineCallStack InlineStack
;
199 for (; !LineIt
.is_at_eof(); ++LineIt
) {
200 if ((*LineIt
)[(*LineIt
).find_first_not_of(' ')] == '#')
202 // Read the header of each function.
204 // Note that for function identifiers we are actually expecting
205 // mangled names, but we may not always get them. This happens when
206 // the compiler decides not to emit the function (e.g., it was inlined
207 // and removed). In this case, the binary will not have the linkage
208 // name for the function, so the profiler will emit the function's
209 // unmangled name, which may contain characters like ':' and '>' in its
210 // name (member functions, templates, etc).
212 // The only requirement we place on the identifier, then, is that it
213 // should not begin with a number.
214 if ((*LineIt
)[0] != ' ') {
215 uint64_t NumSamples
, NumHeadSamples
;
217 if (!ParseHead(*LineIt
, FName
, NumSamples
, NumHeadSamples
)) {
218 reportError(LineIt
.line_number(),
219 "Expected 'mangled_name:NUM:NUM', found " + *LineIt
);
220 return sampleprof_error::malformed
;
222 Profiles
[FName
] = FunctionSamples();
223 FunctionSamples
&FProfile
= Profiles
[FName
];
224 FProfile
.setName(FName
);
225 MergeResult(Result
, FProfile
.addTotalSamples(NumSamples
));
226 MergeResult(Result
, FProfile
.addHeadSamples(NumHeadSamples
));
228 InlineStack
.push_back(&FProfile
);
232 DenseMap
<StringRef
, uint64_t> TargetCountMap
;
234 uint32_t Depth
, LineOffset
, Discriminator
;
235 if (!ParseLine(*LineIt
, IsCallsite
, Depth
, NumSamples
, LineOffset
,
236 Discriminator
, FName
, TargetCountMap
)) {
237 reportError(LineIt
.line_number(),
238 "Expected 'NUM[.NUM]: NUM[ mangled_name:NUM]*', found " +
240 return sampleprof_error::malformed
;
243 while (InlineStack
.size() > Depth
) {
244 InlineStack
.pop_back();
246 FunctionSamples
&FSamples
= InlineStack
.back()->functionSamplesAt(
247 LineLocation(LineOffset
, Discriminator
))[FName
];
248 FSamples
.setName(FName
);
249 MergeResult(Result
, FSamples
.addTotalSamples(NumSamples
));
250 InlineStack
.push_back(&FSamples
);
252 while (InlineStack
.size() > Depth
) {
253 InlineStack
.pop_back();
255 FunctionSamples
&FProfile
= *InlineStack
.back();
256 for (const auto &name_count
: TargetCountMap
) {
257 MergeResult(Result
, FProfile
.addCalledTargetSamples(
258 LineOffset
, Discriminator
, name_count
.first
,
261 MergeResult(Result
, FProfile
.addBodySamples(LineOffset
, Discriminator
,
266 if (Result
== sampleprof_error::success
)
272 bool SampleProfileReaderText::hasFormat(const MemoryBuffer
&Buffer
) {
275 // Check that the first non-comment line is a valid function header.
276 line_iterator
LineIt(Buffer
, /*SkipBlanks=*/true, '#');
277 if (!LineIt
.is_at_eof()) {
278 if ((*LineIt
)[0] != ' ') {
279 uint64_t NumSamples
, NumHeadSamples
;
281 result
= ParseHead(*LineIt
, FName
, NumSamples
, NumHeadSamples
);
288 template <typename T
> ErrorOr
<T
> SampleProfileReaderBinary::readNumber() {
289 unsigned NumBytesRead
= 0;
291 uint64_t Val
= decodeULEB128(Data
, &NumBytesRead
);
293 if (Val
> std::numeric_limits
<T
>::max())
294 EC
= sampleprof_error::malformed
;
295 else if (Data
+ NumBytesRead
> End
)
296 EC
= sampleprof_error::truncated
;
298 EC
= sampleprof_error::success
;
301 reportError(0, EC
.message());
305 Data
+= NumBytesRead
;
306 return static_cast<T
>(Val
);
309 ErrorOr
<StringRef
> SampleProfileReaderBinary::readString() {
311 StringRef
Str(reinterpret_cast<const char *>(Data
));
312 if (Data
+ Str
.size() + 1 > End
) {
313 EC
= sampleprof_error::truncated
;
314 reportError(0, EC
.message());
318 Data
+= Str
.size() + 1;
322 template <typename T
>
323 ErrorOr
<T
> SampleProfileReaderBinary::readUnencodedNumber() {
326 if (Data
+ sizeof(T
) > End
) {
327 EC
= sampleprof_error::truncated
;
328 reportError(0, EC
.message());
332 using namespace support
;
333 T Val
= endian::readNext
<T
, little
, unaligned
>(Data
);
337 template <typename T
>
338 inline ErrorOr
<uint32_t> SampleProfileReaderBinary::readStringIndex(T
&Table
) {
340 auto Idx
= readNumber
<uint32_t>();
341 if (std::error_code EC
= Idx
.getError())
343 if (*Idx
>= Table
.size())
344 return sampleprof_error::truncated_name_table
;
348 ErrorOr
<StringRef
> SampleProfileReaderBinary::readStringFromTable() {
349 auto Idx
= readStringIndex(NameTable
);
350 if (std::error_code EC
= Idx
.getError())
353 return NameTable
[*Idx
];
356 ErrorOr
<StringRef
> SampleProfileReaderCompactBinary::readStringFromTable() {
357 auto Idx
= readStringIndex(NameTable
);
358 if (std::error_code EC
= Idx
.getError())
361 return StringRef(NameTable
[*Idx
]);
365 SampleProfileReaderBinary::readProfile(FunctionSamples
&FProfile
) {
366 auto NumSamples
= readNumber
<uint64_t>();
367 if (std::error_code EC
= NumSamples
.getError())
369 FProfile
.addTotalSamples(*NumSamples
);
371 // Read the samples in the body.
372 auto NumRecords
= readNumber
<uint32_t>();
373 if (std::error_code EC
= NumRecords
.getError())
376 for (uint32_t I
= 0; I
< *NumRecords
; ++I
) {
377 auto LineOffset
= readNumber
<uint64_t>();
378 if (std::error_code EC
= LineOffset
.getError())
381 if (!isOffsetLegal(*LineOffset
)) {
382 return std::error_code();
385 auto Discriminator
= readNumber
<uint64_t>();
386 if (std::error_code EC
= Discriminator
.getError())
389 auto NumSamples
= readNumber
<uint64_t>();
390 if (std::error_code EC
= NumSamples
.getError())
393 auto NumCalls
= readNumber
<uint32_t>();
394 if (std::error_code EC
= NumCalls
.getError())
397 for (uint32_t J
= 0; J
< *NumCalls
; ++J
) {
398 auto CalledFunction(readStringFromTable());
399 if (std::error_code EC
= CalledFunction
.getError())
402 auto CalledFunctionSamples
= readNumber
<uint64_t>();
403 if (std::error_code EC
= CalledFunctionSamples
.getError())
406 FProfile
.addCalledTargetSamples(*LineOffset
, *Discriminator
,
407 *CalledFunction
, *CalledFunctionSamples
);
410 FProfile
.addBodySamples(*LineOffset
, *Discriminator
, *NumSamples
);
413 // Read all the samples for inlined function calls.
414 auto NumCallsites
= readNumber
<uint32_t>();
415 if (std::error_code EC
= NumCallsites
.getError())
418 for (uint32_t J
= 0; J
< *NumCallsites
; ++J
) {
419 auto LineOffset
= readNumber
<uint64_t>();
420 if (std::error_code EC
= LineOffset
.getError())
423 auto Discriminator
= readNumber
<uint64_t>();
424 if (std::error_code EC
= Discriminator
.getError())
427 auto FName(readStringFromTable());
428 if (std::error_code EC
= FName
.getError())
431 FunctionSamples
&CalleeProfile
= FProfile
.functionSamplesAt(
432 LineLocation(*LineOffset
, *Discriminator
))[*FName
];
433 CalleeProfile
.setName(*FName
);
434 if (std::error_code EC
= readProfile(CalleeProfile
))
438 return sampleprof_error::success
;
441 std::error_code
SampleProfileReaderBinary::readFuncProfile() {
442 auto NumHeadSamples
= readNumber
<uint64_t>();
443 if (std::error_code EC
= NumHeadSamples
.getError())
446 auto FName(readStringFromTable());
447 if (std::error_code EC
= FName
.getError())
450 Profiles
[*FName
] = FunctionSamples();
451 FunctionSamples
&FProfile
= Profiles
[*FName
];
452 FProfile
.setName(*FName
);
454 FProfile
.addHeadSamples(*NumHeadSamples
);
456 if (std::error_code EC
= readProfile(FProfile
))
458 return sampleprof_error::success
;
461 std::error_code
SampleProfileReaderBinary::read() {
463 if (std::error_code EC
= readFuncProfile())
467 return sampleprof_error::success
;
471 SampleProfileReaderExtBinary::readOneSection(const uint8_t *Start
,
472 uint64_t Size
, SecType Type
) {
476 if (std::error_code EC
= readSummary())
480 if (std::error_code EC
= readNameTable())
484 while (Data
< Start
+ Size
) {
485 if (std::error_code EC
= readFuncProfile())
489 case SecProfileSymbolList
:
490 if (std::error_code EC
= readProfileSymbolList())
496 return sampleprof_error::success
;
499 std::error_code
SampleProfileReaderExtBinary::readProfileSymbolList() {
500 auto UncompressSize
= readNumber
<uint64_t>();
501 if (std::error_code EC
= UncompressSize
.getError())
504 auto CompressSize
= readNumber
<uint64_t>();
505 if (std::error_code EC
= CompressSize
.getError())
509 ProfSymList
= std::make_unique
<ProfileSymbolList
>();
511 if (std::error_code EC
=
512 ProfSymList
->read(*CompressSize
, *UncompressSize
, Data
))
515 // CompressSize is zero only when ProfileSymbolList is not compressed.
516 if (*CompressSize
== 0)
517 Data
= Data
+ *UncompressSize
;
519 Data
= Data
+ *CompressSize
;
520 return sampleprof_error::success
;
523 std::error_code
SampleProfileReaderExtBinaryBase::read() {
524 const uint8_t *BufStart
=
525 reinterpret_cast<const uint8_t *>(Buffer
->getBufferStart());
527 for (auto &Entry
: SecHdrTable
) {
528 // Skip empty section.
531 const uint8_t *SecStart
= BufStart
+ Entry
.Offset
;
532 if (std::error_code EC
= readOneSection(SecStart
, Entry
.Size
, Entry
.Type
))
534 if (Data
!= SecStart
+ Entry
.Size
)
535 return sampleprof_error::malformed
;
538 return sampleprof_error::success
;
541 std::error_code
SampleProfileReaderCompactBinary::read() {
542 std::vector
<uint64_t> OffsetsToUse
;
544 for (auto FuncEntry
: FuncOffsetTable
) {
545 OffsetsToUse
.push_back(FuncEntry
.second
);
549 for (auto Name
: FuncsToUse
) {
550 auto GUID
= std::to_string(MD5Hash(Name
));
551 auto iter
= FuncOffsetTable
.find(StringRef(GUID
));
552 if (iter
== FuncOffsetTable
.end())
554 OffsetsToUse
.push_back(iter
->second
);
558 for (auto Offset
: OffsetsToUse
) {
559 const uint8_t *SavedData
= Data
;
560 Data
= reinterpret_cast<const uint8_t *>(Buffer
->getBufferStart()) +
562 if (std::error_code EC
= readFuncProfile())
566 return sampleprof_error::success
;
569 std::error_code
SampleProfileReaderRawBinary::verifySPMagic(uint64_t Magic
) {
570 if (Magic
== SPMagic())
571 return sampleprof_error::success
;
572 return sampleprof_error::bad_magic
;
575 std::error_code
SampleProfileReaderExtBinary::verifySPMagic(uint64_t Magic
) {
576 if (Magic
== SPMagic(SPF_Ext_Binary
))
577 return sampleprof_error::success
;
578 return sampleprof_error::bad_magic
;
582 SampleProfileReaderCompactBinary::verifySPMagic(uint64_t Magic
) {
583 if (Magic
== SPMagic(SPF_Compact_Binary
))
584 return sampleprof_error::success
;
585 return sampleprof_error::bad_magic
;
588 std::error_code
SampleProfileReaderBinary::readNameTable() {
589 auto Size
= readNumber
<uint32_t>();
590 if (std::error_code EC
= Size
.getError())
592 NameTable
.reserve(*Size
);
593 for (uint32_t I
= 0; I
< *Size
; ++I
) {
594 auto Name(readString());
595 if (std::error_code EC
= Name
.getError())
597 NameTable
.push_back(*Name
);
600 return sampleprof_error::success
;
603 std::error_code
SampleProfileReaderCompactBinary::readNameTable() {
604 auto Size
= readNumber
<uint64_t>();
605 if (std::error_code EC
= Size
.getError())
607 NameTable
.reserve(*Size
);
608 for (uint32_t I
= 0; I
< *Size
; ++I
) {
609 auto FID
= readNumber
<uint64_t>();
610 if (std::error_code EC
= FID
.getError())
612 NameTable
.push_back(std::to_string(*FID
));
614 return sampleprof_error::success
;
617 std::error_code
SampleProfileReaderExtBinaryBase::readSecHdrTableEntry() {
618 SecHdrTableEntry Entry
;
619 auto Type
= readUnencodedNumber
<uint64_t>();
620 if (std::error_code EC
= Type
.getError())
622 Entry
.Type
= static_cast<SecType
>(*Type
);
624 auto Flag
= readUnencodedNumber
<uint64_t>();
625 if (std::error_code EC
= Flag
.getError())
629 auto Offset
= readUnencodedNumber
<uint64_t>();
630 if (std::error_code EC
= Offset
.getError())
632 Entry
.Offset
= *Offset
;
634 auto Size
= readUnencodedNumber
<uint64_t>();
635 if (std::error_code EC
= Size
.getError())
639 SecHdrTable
.push_back(std::move(Entry
));
640 return sampleprof_error::success
;
643 std::error_code
SampleProfileReaderExtBinaryBase::readSecHdrTable() {
644 auto EntryNum
= readUnencodedNumber
<uint64_t>();
645 if (std::error_code EC
= EntryNum
.getError())
648 for (uint32_t i
= 0; i
< (*EntryNum
); i
++)
649 if (std::error_code EC
= readSecHdrTableEntry())
652 return sampleprof_error::success
;
655 std::error_code
SampleProfileReaderExtBinaryBase::readHeader() {
656 const uint8_t *BufStart
=
657 reinterpret_cast<const uint8_t *>(Buffer
->getBufferStart());
659 End
= BufStart
+ Buffer
->getBufferSize();
661 if (std::error_code EC
= readMagicIdent())
664 if (std::error_code EC
= readSecHdrTable())
667 return sampleprof_error::success
;
670 uint64_t SampleProfileReaderExtBinaryBase::getSectionSize(SecType Type
) {
671 for (auto &Entry
: SecHdrTable
) {
672 if (Entry
.Type
== Type
)
678 uint64_t SampleProfileReaderExtBinaryBase::getFileSize() {
679 auto &LastEntry
= SecHdrTable
.back();
680 return LastEntry
.Offset
+ LastEntry
.Size
;
683 bool SampleProfileReaderExtBinaryBase::dumpSectionInfo(raw_ostream
&OS
) {
684 uint64_t TotalSecsSize
= 0;
685 for (auto &Entry
: SecHdrTable
) {
686 OS
<< getSecName(Entry
.Type
) << " - Offset: " << Entry
.Offset
687 << ", Size: " << Entry
.Size
<< "\n";
688 TotalSecsSize
+= getSectionSize(Entry
.Type
);
690 uint64_t HeaderSize
= SecHdrTable
.front().Offset
;
691 assert(HeaderSize
+ TotalSecsSize
== getFileSize() &&
692 "Size of 'header + sections' doesn't match the total size of profile");
694 OS
<< "Header Size: " << HeaderSize
<< "\n";
695 OS
<< "Total Sections Size: " << TotalSecsSize
<< "\n";
696 OS
<< "File Size: " << getFileSize() << "\n";
700 std::error_code
SampleProfileReaderBinary::readMagicIdent() {
701 // Read and check the magic identifier.
702 auto Magic
= readNumber
<uint64_t>();
703 if (std::error_code EC
= Magic
.getError())
705 else if (std::error_code EC
= verifySPMagic(*Magic
))
708 // Read the version number.
709 auto Version
= readNumber
<uint64_t>();
710 if (std::error_code EC
= Version
.getError())
712 else if (*Version
!= SPVersion())
713 return sampleprof_error::unsupported_version
;
715 return sampleprof_error::success
;
718 std::error_code
SampleProfileReaderBinary::readHeader() {
719 Data
= reinterpret_cast<const uint8_t *>(Buffer
->getBufferStart());
720 End
= Data
+ Buffer
->getBufferSize();
722 if (std::error_code EC
= readMagicIdent())
725 if (std::error_code EC
= readSummary())
728 if (std::error_code EC
= readNameTable())
730 return sampleprof_error::success
;
733 std::error_code
SampleProfileReaderCompactBinary::readHeader() {
734 SampleProfileReaderBinary::readHeader();
735 if (std::error_code EC
= readFuncOffsetTable())
737 return sampleprof_error::success
;
740 std::error_code
SampleProfileReaderCompactBinary::readFuncOffsetTable() {
741 auto TableOffset
= readUnencodedNumber
<uint64_t>();
742 if (std::error_code EC
= TableOffset
.getError())
745 const uint8_t *SavedData
= Data
;
746 const uint8_t *TableStart
=
747 reinterpret_cast<const uint8_t *>(Buffer
->getBufferStart()) +
751 auto Size
= readNumber
<uint64_t>();
752 if (std::error_code EC
= Size
.getError())
755 FuncOffsetTable
.reserve(*Size
);
756 for (uint32_t I
= 0; I
< *Size
; ++I
) {
757 auto FName(readStringFromTable());
758 if (std::error_code EC
= FName
.getError())
761 auto Offset
= readNumber
<uint64_t>();
762 if (std::error_code EC
= Offset
.getError())
765 FuncOffsetTable
[*FName
] = *Offset
;
769 return sampleprof_error::success
;
772 void SampleProfileReaderCompactBinary::collectFuncsToUse(const Module
&M
) {
776 StringRef CanonName
= FunctionSamples::getCanonicalFnName(F
);
777 FuncsToUse
.insert(CanonName
);
781 std::error_code
SampleProfileReaderBinary::readSummaryEntry(
782 std::vector
<ProfileSummaryEntry
> &Entries
) {
783 auto Cutoff
= readNumber
<uint64_t>();
784 if (std::error_code EC
= Cutoff
.getError())
787 auto MinBlockCount
= readNumber
<uint64_t>();
788 if (std::error_code EC
= MinBlockCount
.getError())
791 auto NumBlocks
= readNumber
<uint64_t>();
792 if (std::error_code EC
= NumBlocks
.getError())
795 Entries
.emplace_back(*Cutoff
, *MinBlockCount
, *NumBlocks
);
796 return sampleprof_error::success
;
799 std::error_code
SampleProfileReaderBinary::readSummary() {
800 auto TotalCount
= readNumber
<uint64_t>();
801 if (std::error_code EC
= TotalCount
.getError())
804 auto MaxBlockCount
= readNumber
<uint64_t>();
805 if (std::error_code EC
= MaxBlockCount
.getError())
808 auto MaxFunctionCount
= readNumber
<uint64_t>();
809 if (std::error_code EC
= MaxFunctionCount
.getError())
812 auto NumBlocks
= readNumber
<uint64_t>();
813 if (std::error_code EC
= NumBlocks
.getError())
816 auto NumFunctions
= readNumber
<uint64_t>();
817 if (std::error_code EC
= NumFunctions
.getError())
820 auto NumSummaryEntries
= readNumber
<uint64_t>();
821 if (std::error_code EC
= NumSummaryEntries
.getError())
824 std::vector
<ProfileSummaryEntry
> Entries
;
825 for (unsigned i
= 0; i
< *NumSummaryEntries
; i
++) {
826 std::error_code EC
= readSummaryEntry(Entries
);
827 if (EC
!= sampleprof_error::success
)
830 Summary
= std::make_unique
<ProfileSummary
>(
831 ProfileSummary::PSK_Sample
, Entries
, *TotalCount
, *MaxBlockCount
, 0,
832 *MaxFunctionCount
, *NumBlocks
, *NumFunctions
);
834 return sampleprof_error::success
;
837 bool SampleProfileReaderRawBinary::hasFormat(const MemoryBuffer
&Buffer
) {
838 const uint8_t *Data
=
839 reinterpret_cast<const uint8_t *>(Buffer
.getBufferStart());
840 uint64_t Magic
= decodeULEB128(Data
);
841 return Magic
== SPMagic();
844 bool SampleProfileReaderExtBinary::hasFormat(const MemoryBuffer
&Buffer
) {
845 const uint8_t *Data
=
846 reinterpret_cast<const uint8_t *>(Buffer
.getBufferStart());
847 uint64_t Magic
= decodeULEB128(Data
);
848 return Magic
== SPMagic(SPF_Ext_Binary
);
851 bool SampleProfileReaderCompactBinary::hasFormat(const MemoryBuffer
&Buffer
) {
852 const uint8_t *Data
=
853 reinterpret_cast<const uint8_t *>(Buffer
.getBufferStart());
854 uint64_t Magic
= decodeULEB128(Data
);
855 return Magic
== SPMagic(SPF_Compact_Binary
);
858 std::error_code
SampleProfileReaderGCC::skipNextWord() {
860 if (!GcovBuffer
.readInt(dummy
))
861 return sampleprof_error::truncated
;
862 return sampleprof_error::success
;
865 template <typename T
> ErrorOr
<T
> SampleProfileReaderGCC::readNumber() {
866 if (sizeof(T
) <= sizeof(uint32_t)) {
868 if (GcovBuffer
.readInt(Val
) && Val
<= std::numeric_limits
<T
>::max())
869 return static_cast<T
>(Val
);
870 } else if (sizeof(T
) <= sizeof(uint64_t)) {
872 if (GcovBuffer
.readInt64(Val
) && Val
<= std::numeric_limits
<T
>::max())
873 return static_cast<T
>(Val
);
876 std::error_code EC
= sampleprof_error::malformed
;
877 reportError(0, EC
.message());
881 ErrorOr
<StringRef
> SampleProfileReaderGCC::readString() {
883 if (!GcovBuffer
.readString(Str
))
884 return sampleprof_error::truncated
;
888 std::error_code
SampleProfileReaderGCC::readHeader() {
889 // Read the magic identifier.
890 if (!GcovBuffer
.readGCDAFormat())
891 return sampleprof_error::unrecognized_format
;
893 // Read the version number. Note - the GCC reader does not validate this
894 // version, but the profile creator generates v704.
895 GCOV::GCOVVersion version
;
896 if (!GcovBuffer
.readGCOVVersion(version
))
897 return sampleprof_error::unrecognized_format
;
899 if (version
!= GCOV::V704
)
900 return sampleprof_error::unsupported_version
;
902 // Skip the empty integer.
903 if (std::error_code EC
= skipNextWord())
906 return sampleprof_error::success
;
909 std::error_code
SampleProfileReaderGCC::readSectionTag(uint32_t Expected
) {
911 if (!GcovBuffer
.readInt(Tag
))
912 return sampleprof_error::truncated
;
915 return sampleprof_error::malformed
;
917 if (std::error_code EC
= skipNextWord())
920 return sampleprof_error::success
;
923 std::error_code
SampleProfileReaderGCC::readNameTable() {
924 if (std::error_code EC
= readSectionTag(GCOVTagAFDOFileNames
))
928 if (!GcovBuffer
.readInt(Size
))
929 return sampleprof_error::truncated
;
931 for (uint32_t I
= 0; I
< Size
; ++I
) {
933 if (!GcovBuffer
.readString(Str
))
934 return sampleprof_error::truncated
;
935 Names
.push_back(Str
);
938 return sampleprof_error::success
;
941 std::error_code
SampleProfileReaderGCC::readFunctionProfiles() {
942 if (std::error_code EC
= readSectionTag(GCOVTagAFDOFunction
))
945 uint32_t NumFunctions
;
946 if (!GcovBuffer
.readInt(NumFunctions
))
947 return sampleprof_error::truncated
;
949 InlineCallStack Stack
;
950 for (uint32_t I
= 0; I
< NumFunctions
; ++I
)
951 if (std::error_code EC
= readOneFunctionProfile(Stack
, true, 0))
955 return sampleprof_error::success
;
958 std::error_code
SampleProfileReaderGCC::readOneFunctionProfile(
959 const InlineCallStack
&InlineStack
, bool Update
, uint32_t Offset
) {
960 uint64_t HeadCount
= 0;
961 if (InlineStack
.size() == 0)
962 if (!GcovBuffer
.readInt64(HeadCount
))
963 return sampleprof_error::truncated
;
966 if (!GcovBuffer
.readInt(NameIdx
))
967 return sampleprof_error::truncated
;
969 StringRef
Name(Names
[NameIdx
]);
971 uint32_t NumPosCounts
;
972 if (!GcovBuffer
.readInt(NumPosCounts
))
973 return sampleprof_error::truncated
;
975 uint32_t NumCallsites
;
976 if (!GcovBuffer
.readInt(NumCallsites
))
977 return sampleprof_error::truncated
;
979 FunctionSamples
*FProfile
= nullptr;
980 if (InlineStack
.size() == 0) {
981 // If this is a top function that we have already processed, do not
982 // update its profile again. This happens in the presence of
983 // function aliases. Since these aliases share the same function
984 // body, there will be identical replicated profiles for the
985 // original function. In this case, we simply not bother updating
986 // the profile of the original function.
987 FProfile
= &Profiles
[Name
];
988 FProfile
->addHeadSamples(HeadCount
);
989 if (FProfile
->getTotalSamples() > 0)
992 // Otherwise, we are reading an inlined instance. The top of the
993 // inline stack contains the profile of the caller. Insert this
994 // callee in the caller's CallsiteMap.
995 FunctionSamples
*CallerProfile
= InlineStack
.front();
996 uint32_t LineOffset
= Offset
>> 16;
997 uint32_t Discriminator
= Offset
& 0xffff;
998 FProfile
= &CallerProfile
->functionSamplesAt(
999 LineLocation(LineOffset
, Discriminator
))[Name
];
1001 FProfile
->setName(Name
);
1003 for (uint32_t I
= 0; I
< NumPosCounts
; ++I
) {
1005 if (!GcovBuffer
.readInt(Offset
))
1006 return sampleprof_error::truncated
;
1008 uint32_t NumTargets
;
1009 if (!GcovBuffer
.readInt(NumTargets
))
1010 return sampleprof_error::truncated
;
1013 if (!GcovBuffer
.readInt64(Count
))
1014 return sampleprof_error::truncated
;
1016 // The line location is encoded in the offset as:
1017 // high 16 bits: line offset to the start of the function.
1018 // low 16 bits: discriminator.
1019 uint32_t LineOffset
= Offset
>> 16;
1020 uint32_t Discriminator
= Offset
& 0xffff;
1022 InlineCallStack NewStack
;
1023 NewStack
.push_back(FProfile
);
1024 NewStack
.insert(NewStack
.end(), InlineStack
.begin(), InlineStack
.end());
1026 // Walk up the inline stack, adding the samples on this line to
1027 // the total sample count of the callers in the chain.
1028 for (auto CallerProfile
: NewStack
)
1029 CallerProfile
->addTotalSamples(Count
);
1031 // Update the body samples for the current profile.
1032 FProfile
->addBodySamples(LineOffset
, Discriminator
, Count
);
1035 // Process the list of functions called at an indirect call site.
1036 // These are all the targets that a function pointer (or virtual
1037 // function) resolved at runtime.
1038 for (uint32_t J
= 0; J
< NumTargets
; J
++) {
1040 if (!GcovBuffer
.readInt(HistVal
))
1041 return sampleprof_error::truncated
;
1043 if (HistVal
!= HIST_TYPE_INDIR_CALL_TOPN
)
1044 return sampleprof_error::malformed
;
1047 if (!GcovBuffer
.readInt64(TargetIdx
))
1048 return sampleprof_error::truncated
;
1049 StringRef
TargetName(Names
[TargetIdx
]);
1051 uint64_t TargetCount
;
1052 if (!GcovBuffer
.readInt64(TargetCount
))
1053 return sampleprof_error::truncated
;
1056 FProfile
->addCalledTargetSamples(LineOffset
, Discriminator
,
1057 TargetName
, TargetCount
);
1061 // Process all the inlined callers into the current function. These
1062 // are all the callsites that were inlined into this function.
1063 for (uint32_t I
= 0; I
< NumCallsites
; I
++) {
1064 // The offset is encoded as:
1065 // high 16 bits: line offset to the start of the function.
1066 // low 16 bits: discriminator.
1068 if (!GcovBuffer
.readInt(Offset
))
1069 return sampleprof_error::truncated
;
1070 InlineCallStack NewStack
;
1071 NewStack
.push_back(FProfile
);
1072 NewStack
.insert(NewStack
.end(), InlineStack
.begin(), InlineStack
.end());
1073 if (std::error_code EC
= readOneFunctionProfile(NewStack
, Update
, Offset
))
1077 return sampleprof_error::success
;
1080 /// Read a GCC AutoFDO profile.
1082 /// This format is generated by the Linux Perf conversion tool at
1083 /// https://github.com/google/autofdo.
1084 std::error_code
SampleProfileReaderGCC::read() {
1085 // Read the string table.
1086 if (std::error_code EC
= readNameTable())
1089 // Read the source profile.
1090 if (std::error_code EC
= readFunctionProfiles())
1093 return sampleprof_error::success
;
1096 bool SampleProfileReaderGCC::hasFormat(const MemoryBuffer
&Buffer
) {
1097 StringRef
Magic(reinterpret_cast<const char *>(Buffer
.getBufferStart()));
1098 return Magic
== "adcg*704";
1101 std::error_code
SampleProfileReaderItaniumRemapper::read() {
1102 // If the underlying data is in compact format, we can't remap it because
1103 // we don't know what the original function names were.
1104 if (getFormat() == SPF_Compact_Binary
) {
1105 Ctx
.diagnose(DiagnosticInfoSampleProfile(
1106 Buffer
->getBufferIdentifier(),
1107 "Profile data remapping cannot be applied to profile data "
1108 "in compact format (original mangled names are not available).",
1110 return sampleprof_error::success
;
1113 if (Error E
= Remappings
.read(*Buffer
)) {
1115 std::move(E
), [&](const SymbolRemappingParseError
&ParseError
) {
1116 reportError(ParseError
.getLineNum(), ParseError
.getMessage());
1118 return sampleprof_error::malformed
;
1121 for (auto &Sample
: getProfiles())
1122 if (auto Key
= Remappings
.insert(Sample
.first()))
1123 SampleMap
.insert({Key
, &Sample
.second
});
1125 return sampleprof_error::success
;
1129 SampleProfileReaderItaniumRemapper::getSamplesFor(StringRef Fname
) {
1130 if (auto Key
= Remappings
.lookup(Fname
))
1131 return SampleMap
.lookup(Key
);
1132 return SampleProfileReader::getSamplesFor(Fname
);
1135 /// Prepare a memory buffer for the contents of \p Filename.
1137 /// \returns an error code indicating the status of the buffer.
1138 static ErrorOr
<std::unique_ptr
<MemoryBuffer
>>
1139 setupMemoryBuffer(const Twine
&Filename
) {
1140 auto BufferOrErr
= MemoryBuffer::getFileOrSTDIN(Filename
);
1141 if (std::error_code EC
= BufferOrErr
.getError())
1143 auto Buffer
= std::move(BufferOrErr
.get());
1145 // Sanity check the file.
1146 if (uint64_t(Buffer
->getBufferSize()) > std::numeric_limits
<uint32_t>::max())
1147 return sampleprof_error::too_large
;
1149 return std::move(Buffer
);
1152 /// Create a sample profile reader based on the format of the input file.
1154 /// \param Filename The file to open.
1156 /// \param C The LLVM context to use to emit diagnostics.
1158 /// \returns an error code indicating the status of the created reader.
1159 ErrorOr
<std::unique_ptr
<SampleProfileReader
>>
1160 SampleProfileReader::create(const Twine
&Filename
, LLVMContext
&C
) {
1161 auto BufferOrError
= setupMemoryBuffer(Filename
);
1162 if (std::error_code EC
= BufferOrError
.getError())
1164 return create(BufferOrError
.get(), C
);
1167 /// Create a sample profile remapper from the given input, to remap the
1168 /// function names in the given profile data.
1170 /// \param Filename The file to open.
1172 /// \param C The LLVM context to use to emit diagnostics.
1174 /// \param Underlying The underlying profile data reader to remap.
1176 /// \returns an error code indicating the status of the created reader.
1177 ErrorOr
<std::unique_ptr
<SampleProfileReader
>>
1178 SampleProfileReaderItaniumRemapper::create(
1179 const Twine
&Filename
, LLVMContext
&C
,
1180 std::unique_ptr
<SampleProfileReader
> Underlying
) {
1181 auto BufferOrError
= setupMemoryBuffer(Filename
);
1182 if (std::error_code EC
= BufferOrError
.getError())
1184 return std::make_unique
<SampleProfileReaderItaniumRemapper
>(
1185 std::move(BufferOrError
.get()), C
, std::move(Underlying
));
1188 /// Create a sample profile reader based on the format of the input data.
1190 /// \param B The memory buffer to create the reader from (assumes ownership).
1192 /// \param C The LLVM context to use to emit diagnostics.
1194 /// \returns an error code indicating the status of the created reader.
1195 ErrorOr
<std::unique_ptr
<SampleProfileReader
>>
1196 SampleProfileReader::create(std::unique_ptr
<MemoryBuffer
> &B
, LLVMContext
&C
) {
1197 std::unique_ptr
<SampleProfileReader
> Reader
;
1198 if (SampleProfileReaderRawBinary::hasFormat(*B
))
1199 Reader
.reset(new SampleProfileReaderRawBinary(std::move(B
), C
));
1200 else if (SampleProfileReaderExtBinary::hasFormat(*B
))
1201 Reader
.reset(new SampleProfileReaderExtBinary(std::move(B
), C
));
1202 else if (SampleProfileReaderCompactBinary::hasFormat(*B
))
1203 Reader
.reset(new SampleProfileReaderCompactBinary(std::move(B
), C
));
1204 else if (SampleProfileReaderGCC::hasFormat(*B
))
1205 Reader
.reset(new SampleProfileReaderGCC(std::move(B
), C
));
1206 else if (SampleProfileReaderText::hasFormat(*B
))
1207 Reader
.reset(new SampleProfileReaderText(std::move(B
), C
));
1209 return sampleprof_error::unrecognized_format
;
1211 FunctionSamples::Format
= Reader
->getFormat();
1212 if (std::error_code EC
= Reader
->readHeader()) {
1216 return std::move(Reader
);
1219 // For text and GCC file formats, we compute the summary after reading the
1220 // profile. Binary format has the profile summary in its header.
1221 void SampleProfileReader::computeSummary() {
1222 SampleProfileSummaryBuilder
Builder(ProfileSummaryBuilder::DefaultCutoffs
);
1223 for (const auto &I
: Profiles
) {
1224 const FunctionSamples
&Profile
= I
.second
;
1225 Builder
.addRecord(Profile
);
1227 Summary
= Builder
.getSummary();