1 //===- SampleProfReader.cpp - Read LLVM sample profile data ---------------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 // This file implements the class that reads LLVM sample profiles. It
10 // supports three file formats: text, binary and gcov.
12 // The textual representation is useful for debugging and testing purposes. The
13 // binary representation is more compact, resulting in smaller file sizes.
15 // The gcov encoding is the one generated by GCC's AutoFDO profile creation
16 // tool (https://github.com/google/autofdo)
18 // All three encodings can be used interchangeably as an input sample profile.
20 //===----------------------------------------------------------------------===//
22 #include "llvm/ProfileData/SampleProfReader.h"
23 #include "llvm/ADT/DenseMap.h"
24 #include "llvm/ADT/STLExtras.h"
25 #include "llvm/ADT/StringRef.h"
26 #include "llvm/IR/Module.h"
27 #include "llvm/IR/ProfileSummary.h"
28 #include "llvm/ProfileData/ProfileCommon.h"
29 #include "llvm/ProfileData/SampleProf.h"
30 #include "llvm/Support/CommandLine.h"
31 #include "llvm/Support/Compression.h"
32 #include "llvm/Support/ErrorOr.h"
33 #include "llvm/Support/LEB128.h"
34 #include "llvm/Support/LineIterator.h"
35 #include "llvm/Support/MD5.h"
36 #include "llvm/Support/MemoryBuffer.h"
37 #include "llvm/Support/raw_ostream.h"
43 #include <system_error>
47 using namespace sampleprof
;
49 #define DEBUG_TYPE "samplepgo-reader"
51 // This internal option specifies if the profile uses FS discriminators.
52 // It only applies to text, binary and compact binary format profiles.
53 // For ext-binary format profiles, the flag is set in the summary.
54 static cl::opt
<bool> ProfileIsFSDisciminator(
55 "profile-isfs", cl::Hidden
, cl::init(false),
56 cl::desc("Profile uses flow sensitive discriminators"));
58 /// Dump the function profile for \p FName.
60 /// \param FContext Name + context of the function to print.
61 /// \param OS Stream to emit the output to.
62 void SampleProfileReader::dumpFunctionProfile(SampleContext FContext
,
64 OS
<< "Function: " << FContext
.toString() << ": " << Profiles
[FContext
];
67 /// Dump all the function profiles found on stream \p OS.
68 void SampleProfileReader::dump(raw_ostream
&OS
) {
69 std::vector
<NameFunctionSamples
> V
;
70 sortFuncProfiles(Profiles
, V
);
71 for (const auto &I
: V
)
72 dumpFunctionProfile(I
.first
, OS
);
75 /// Parse \p Input as function head.
77 /// Parse one line of \p Input, and update function name in \p FName,
78 /// function's total sample count in \p NumSamples, function's entry
79 /// count in \p NumHeadSamples.
81 /// \returns true if parsing is successful.
82 static bool ParseHead(const StringRef
&Input
, StringRef
&FName
,
83 uint64_t &NumSamples
, uint64_t &NumHeadSamples
) {
86 size_t n2
= Input
.rfind(':');
87 size_t n1
= Input
.rfind(':', n2
- 1);
88 FName
= Input
.substr(0, n1
);
89 if (Input
.substr(n1
+ 1, n2
- n1
- 1).getAsInteger(10, NumSamples
))
91 if (Input
.substr(n2
+ 1).getAsInteger(10, NumHeadSamples
))
96 /// Returns true if line offset \p L is legal (only has 16 bits).
97 static bool isOffsetLegal(unsigned L
) { return (L
& 0xffff) == L
; }
99 /// Parse \p Input that contains metadata.
100 /// Possible metadata:
101 /// - CFG Checksum information:
102 /// !CFGChecksum: 12345
103 /// - CFG Checksum information:
105 /// Stores the FunctionHash (a.k.a. CFG Checksum) into \p FunctionHash.
106 static bool parseMetadata(const StringRef
&Input
, uint64_t &FunctionHash
,
107 uint32_t &Attributes
) {
108 if (Input
.startswith("!CFGChecksum:")) {
109 StringRef CFGInfo
= Input
.substr(strlen("!CFGChecksum:")).trim();
110 return !CFGInfo
.getAsInteger(10, FunctionHash
);
113 if (Input
.startswith("!Attributes:")) {
114 StringRef Attrib
= Input
.substr(strlen("!Attributes:")).trim();
115 return !Attrib
.getAsInteger(10, Attributes
);
121 enum class LineType
{
127 /// Parse \p Input as line sample.
129 /// \param Input input line.
130 /// \param LineTy Type of this line.
131 /// \param Depth the depth of the inline stack.
132 /// \param NumSamples total samples of the line/inlined callsite.
133 /// \param LineOffset line offset to the start of the function.
134 /// \param Discriminator discriminator of the line.
135 /// \param TargetCountMap map from indirect call target to count.
136 /// \param FunctionHash the function's CFG hash, used by pseudo probe.
138 /// returns true if parsing is successful.
139 static bool ParseLine(const StringRef
&Input
, LineType
&LineTy
, uint32_t &Depth
,
140 uint64_t &NumSamples
, uint32_t &LineOffset
,
141 uint32_t &Discriminator
, StringRef
&CalleeName
,
142 DenseMap
<StringRef
, uint64_t> &TargetCountMap
,
143 uint64_t &FunctionHash
, uint32_t &Attributes
) {
144 for (Depth
= 0; Input
[Depth
] == ' '; Depth
++)
149 if (Input
[Depth
] == '!') {
150 LineTy
= LineType::Metadata
;
151 return parseMetadata(Input
.substr(Depth
), FunctionHash
, Attributes
);
154 size_t n1
= Input
.find(':');
155 StringRef Loc
= Input
.substr(Depth
, n1
- Depth
);
156 size_t n2
= Loc
.find('.');
157 if (n2
== StringRef::npos
) {
158 if (Loc
.getAsInteger(10, LineOffset
) || !isOffsetLegal(LineOffset
))
162 if (Loc
.substr(0, n2
).getAsInteger(10, LineOffset
))
164 if (Loc
.substr(n2
+ 1).getAsInteger(10, Discriminator
))
168 StringRef Rest
= Input
.substr(n1
+ 2);
169 if (isDigit(Rest
[0])) {
170 LineTy
= LineType::BodyProfile
;
171 size_t n3
= Rest
.find(' ');
172 if (n3
== StringRef::npos
) {
173 if (Rest
.getAsInteger(10, NumSamples
))
176 if (Rest
.substr(0, n3
).getAsInteger(10, NumSamples
))
179 // Find call targets and their sample counts.
180 // Note: In some cases, there are symbols in the profile which are not
181 // mangled. To accommodate such cases, use colon + integer pairs as the
184 // _M_construct<char *>:1000 string_view<std::allocator<char> >:437
185 // ":1000" and ":437" are used as anchor points so the string above will
187 // target: _M_construct<char *>
189 // target: string_view<std::allocator<char> >
191 while (n3
!= StringRef::npos
) {
192 n3
+= Rest
.substr(n3
).find_first_not_of(' ');
193 Rest
= Rest
.substr(n3
);
194 n3
= Rest
.find_first_of(':');
195 if (n3
== StringRef::npos
|| n3
== 0)
201 // Get the segment after the current colon.
202 StringRef AfterColon
= Rest
.substr(n3
+ 1);
203 // Get the target symbol before the current colon.
204 Target
= Rest
.substr(0, n3
);
205 // Check if the word after the current colon is an integer.
206 n4
= AfterColon
.find_first_of(' ');
207 n4
= (n4
!= StringRef::npos
) ? n3
+ n4
+ 1 : Rest
.size();
208 StringRef WordAfterColon
= Rest
.substr(n3
+ 1, n4
- n3
- 1);
209 if (!WordAfterColon
.getAsInteger(10, count
))
212 // Try to find the next colon.
213 uint64_t n5
= AfterColon
.find_first_of(':');
214 if (n5
== StringRef::npos
)
219 // An anchor point is found. Save the {target, count} pair
220 TargetCountMap
[Target
] = count
;
221 if (n4
== Rest
.size())
223 // Change n3 to the next blank space after colon + integer pair.
227 LineTy
= LineType::CallSiteProfile
;
228 size_t n3
= Rest
.find_last_of(':');
229 CalleeName
= Rest
.substr(0, n3
);
230 if (Rest
.substr(n3
+ 1).getAsInteger(10, NumSamples
))
236 /// Load samples from a text file.
238 /// See the documentation at the top of the file for an explanation of
239 /// the expected format.
241 /// \returns true if the file was loaded successfully, false otherwise.
242 std::error_code
SampleProfileReaderText::readImpl() {
243 line_iterator
LineIt(*Buffer
, /*SkipBlanks=*/true, '#');
244 sampleprof_error Result
= sampleprof_error::success
;
246 InlineCallStack InlineStack
;
247 uint32_t TopLevelProbeProfileCount
= 0;
249 // DepthMetadata tracks whether we have processed metadata for the current
250 // top-level or nested function profile.
251 uint32_t DepthMetadata
= 0;
253 ProfileIsFS
= ProfileIsFSDisciminator
;
254 FunctionSamples::ProfileIsFS
= ProfileIsFS
;
255 for (; !LineIt
.is_at_eof(); ++LineIt
) {
256 if ((*LineIt
)[(*LineIt
).find_first_not_of(' ')] == '#')
258 // Read the header of each function.
260 // Note that for function identifiers we are actually expecting
261 // mangled names, but we may not always get them. This happens when
262 // the compiler decides not to emit the function (e.g., it was inlined
263 // and removed). In this case, the binary will not have the linkage
264 // name for the function, so the profiler will emit the function's
265 // unmangled name, which may contain characters like ':' and '>' in its
266 // name (member functions, templates, etc).
268 // The only requirement we place on the identifier, then, is that it
269 // should not begin with a number.
270 if ((*LineIt
)[0] != ' ') {
271 uint64_t NumSamples
, NumHeadSamples
;
273 if (!ParseHead(*LineIt
, FName
, NumSamples
, NumHeadSamples
)) {
274 reportError(LineIt
.line_number(),
275 "Expected 'mangled_name:NUM:NUM', found " + *LineIt
);
276 return sampleprof_error::malformed
;
279 SampleContext
FContext(FName
, CSNameTable
);
280 if (FContext
.hasContext())
282 Profiles
[FContext
] = FunctionSamples();
283 FunctionSamples
&FProfile
= Profiles
[FContext
];
284 FProfile
.setContext(FContext
);
285 MergeResult(Result
, FProfile
.addTotalSamples(NumSamples
));
286 MergeResult(Result
, FProfile
.addHeadSamples(NumHeadSamples
));
288 InlineStack
.push_back(&FProfile
);
292 DenseMap
<StringRef
, uint64_t> TargetCountMap
;
293 uint32_t Depth
, LineOffset
, Discriminator
;
295 uint64_t FunctionHash
= 0;
296 uint32_t Attributes
= 0;
297 if (!ParseLine(*LineIt
, LineTy
, Depth
, NumSamples
, LineOffset
,
298 Discriminator
, FName
, TargetCountMap
, FunctionHash
,
300 reportError(LineIt
.line_number(),
301 "Expected 'NUM[.NUM]: NUM[ mangled_name:NUM]*', found " +
303 return sampleprof_error::malformed
;
305 if (LineTy
!= LineType::Metadata
&& Depth
== DepthMetadata
) {
306 // Metadata must be put at the end of a function profile.
307 reportError(LineIt
.line_number(),
308 "Found non-metadata after metadata: " + *LineIt
);
309 return sampleprof_error::malformed
;
312 // Here we handle FS discriminators.
313 Discriminator
&= getDiscriminatorMask();
315 while (InlineStack
.size() > Depth
) {
316 InlineStack
.pop_back();
319 case LineType::CallSiteProfile
: {
320 FunctionSamples
&FSamples
= InlineStack
.back()->functionSamplesAt(
321 LineLocation(LineOffset
, Discriminator
))[std::string(FName
)];
322 FSamples
.setName(FName
);
323 MergeResult(Result
, FSamples
.addTotalSamples(NumSamples
));
324 InlineStack
.push_back(&FSamples
);
328 case LineType::BodyProfile
: {
329 while (InlineStack
.size() > Depth
) {
330 InlineStack
.pop_back();
332 FunctionSamples
&FProfile
= *InlineStack
.back();
333 for (const auto &name_count
: TargetCountMap
) {
334 MergeResult(Result
, FProfile
.addCalledTargetSamples(
335 LineOffset
, Discriminator
, name_count
.first
,
338 MergeResult(Result
, FProfile
.addBodySamples(LineOffset
, Discriminator
,
342 case LineType::Metadata
: {
343 FunctionSamples
&FProfile
= *InlineStack
.back();
345 FProfile
.setFunctionHash(FunctionHash
);
347 ++TopLevelProbeProfileCount
;
349 FProfile
.getContext().setAllAttributes(Attributes
);
350 if (Attributes
& (uint32_t)ContextShouldBeInlined
)
351 ProfileIsCSNested
= true;
352 DepthMetadata
= Depth
;
359 assert((CSProfileCount
== 0 || CSProfileCount
== Profiles
.size()) &&
360 "Cannot have both context-sensitive and regular profile");
361 ProfileIsCSFlat
= (CSProfileCount
> 0);
362 assert((TopLevelProbeProfileCount
== 0 ||
363 TopLevelProbeProfileCount
== Profiles
.size()) &&
364 "Cannot have both probe-based profiles and regular profiles");
365 ProfileIsProbeBased
= (TopLevelProbeProfileCount
> 0);
366 FunctionSamples::ProfileIsProbeBased
= ProfileIsProbeBased
;
367 FunctionSamples::ProfileIsCSFlat
= ProfileIsCSFlat
;
368 FunctionSamples::ProfileIsCSNested
= ProfileIsCSNested
;
370 if (Result
== sampleprof_error::success
)
376 bool SampleProfileReaderText::hasFormat(const MemoryBuffer
&Buffer
) {
379 // Check that the first non-comment line is a valid function header.
380 line_iterator
LineIt(Buffer
, /*SkipBlanks=*/true, '#');
381 if (!LineIt
.is_at_eof()) {
382 if ((*LineIt
)[0] != ' ') {
383 uint64_t NumSamples
, NumHeadSamples
;
385 result
= ParseHead(*LineIt
, FName
, NumSamples
, NumHeadSamples
);
392 template <typename T
> ErrorOr
<T
> SampleProfileReaderBinary::readNumber() {
393 unsigned NumBytesRead
= 0;
395 uint64_t Val
= decodeULEB128(Data
, &NumBytesRead
);
397 if (Val
> std::numeric_limits
<T
>::max())
398 EC
= sampleprof_error::malformed
;
399 else if (Data
+ NumBytesRead
> End
)
400 EC
= sampleprof_error::truncated
;
402 EC
= sampleprof_error::success
;
405 reportError(0, EC
.message());
409 Data
+= NumBytesRead
;
410 return static_cast<T
>(Val
);
413 ErrorOr
<StringRef
> SampleProfileReaderBinary::readString() {
415 StringRef
Str(reinterpret_cast<const char *>(Data
));
416 if (Data
+ Str
.size() + 1 > End
) {
417 EC
= sampleprof_error::truncated
;
418 reportError(0, EC
.message());
422 Data
+= Str
.size() + 1;
426 template <typename T
>
427 ErrorOr
<T
> SampleProfileReaderBinary::readUnencodedNumber() {
430 if (Data
+ sizeof(T
) > End
) {
431 EC
= sampleprof_error::truncated
;
432 reportError(0, EC
.message());
436 using namespace support
;
437 T Val
= endian::readNext
<T
, little
, unaligned
>(Data
);
441 template <typename T
>
442 inline ErrorOr
<uint32_t> SampleProfileReaderBinary::readStringIndex(T
&Table
) {
444 auto Idx
= readNumber
<uint32_t>();
445 if (std::error_code EC
= Idx
.getError())
447 if (*Idx
>= Table
.size())
448 return sampleprof_error::truncated_name_table
;
452 ErrorOr
<StringRef
> SampleProfileReaderBinary::readStringFromTable() {
453 auto Idx
= readStringIndex(NameTable
);
454 if (std::error_code EC
= Idx
.getError())
457 return NameTable
[*Idx
];
460 ErrorOr
<SampleContext
> SampleProfileReaderBinary::readSampleContextFromTable() {
461 auto FName(readStringFromTable());
462 if (std::error_code EC
= FName
.getError())
464 return SampleContext(*FName
);
467 ErrorOr
<StringRef
> SampleProfileReaderExtBinaryBase::readStringFromTable() {
469 return SampleProfileReaderBinary::readStringFromTable();
471 // read NameTable index.
472 auto Idx
= readStringIndex(NameTable
);
473 if (std::error_code EC
= Idx
.getError())
476 // Check whether the name to be accessed has been accessed before,
477 // if not, read it from memory directly.
478 StringRef
&SR
= NameTable
[*Idx
];
480 const uint8_t *SavedData
= Data
;
481 Data
= MD5NameMemStart
+ ((*Idx
) * sizeof(uint64_t));
482 auto FID
= readUnencodedNumber
<uint64_t>();
483 if (std::error_code EC
= FID
.getError())
485 // Save the string converted from uint64_t in MD5StringBuf. All the
486 // references to the name are all StringRefs refering to the string
488 MD5StringBuf
->push_back(std::to_string(*FID
));
489 SR
= MD5StringBuf
->back();
495 ErrorOr
<StringRef
> SampleProfileReaderCompactBinary::readStringFromTable() {
496 auto Idx
= readStringIndex(NameTable
);
497 if (std::error_code EC
= Idx
.getError())
500 return StringRef(NameTable
[*Idx
]);
504 SampleProfileReaderBinary::readProfile(FunctionSamples
&FProfile
) {
505 auto NumSamples
= readNumber
<uint64_t>();
506 if (std::error_code EC
= NumSamples
.getError())
508 FProfile
.addTotalSamples(*NumSamples
);
510 // Read the samples in the body.
511 auto NumRecords
= readNumber
<uint32_t>();
512 if (std::error_code EC
= NumRecords
.getError())
515 for (uint32_t I
= 0; I
< *NumRecords
; ++I
) {
516 auto LineOffset
= readNumber
<uint64_t>();
517 if (std::error_code EC
= LineOffset
.getError())
520 if (!isOffsetLegal(*LineOffset
)) {
521 return std::error_code();
524 auto Discriminator
= readNumber
<uint64_t>();
525 if (std::error_code EC
= Discriminator
.getError())
528 auto NumSamples
= readNumber
<uint64_t>();
529 if (std::error_code EC
= NumSamples
.getError())
532 auto NumCalls
= readNumber
<uint32_t>();
533 if (std::error_code EC
= NumCalls
.getError())
536 // Here we handle FS discriminators:
537 uint32_t DiscriminatorVal
= (*Discriminator
) & getDiscriminatorMask();
539 for (uint32_t J
= 0; J
< *NumCalls
; ++J
) {
540 auto CalledFunction(readStringFromTable());
541 if (std::error_code EC
= CalledFunction
.getError())
544 auto CalledFunctionSamples
= readNumber
<uint64_t>();
545 if (std::error_code EC
= CalledFunctionSamples
.getError())
548 FProfile
.addCalledTargetSamples(*LineOffset
, DiscriminatorVal
,
549 *CalledFunction
, *CalledFunctionSamples
);
552 FProfile
.addBodySamples(*LineOffset
, DiscriminatorVal
, *NumSamples
);
555 // Read all the samples for inlined function calls.
556 auto NumCallsites
= readNumber
<uint32_t>();
557 if (std::error_code EC
= NumCallsites
.getError())
560 for (uint32_t J
= 0; J
< *NumCallsites
; ++J
) {
561 auto LineOffset
= readNumber
<uint64_t>();
562 if (std::error_code EC
= LineOffset
.getError())
565 auto Discriminator
= readNumber
<uint64_t>();
566 if (std::error_code EC
= Discriminator
.getError())
569 auto FName(readStringFromTable());
570 if (std::error_code EC
= FName
.getError())
573 // Here we handle FS discriminators:
574 uint32_t DiscriminatorVal
= (*Discriminator
) & getDiscriminatorMask();
576 FunctionSamples
&CalleeProfile
= FProfile
.functionSamplesAt(
577 LineLocation(*LineOffset
, DiscriminatorVal
))[std::string(*FName
)];
578 CalleeProfile
.setName(*FName
);
579 if (std::error_code EC
= readProfile(CalleeProfile
))
583 return sampleprof_error::success
;
587 SampleProfileReaderBinary::readFuncProfile(const uint8_t *Start
) {
589 auto NumHeadSamples
= readNumber
<uint64_t>();
590 if (std::error_code EC
= NumHeadSamples
.getError())
593 ErrorOr
<SampleContext
> FContext(readSampleContextFromTable());
594 if (std::error_code EC
= FContext
.getError())
597 Profiles
[*FContext
] = FunctionSamples();
598 FunctionSamples
&FProfile
= Profiles
[*FContext
];
599 FProfile
.setContext(*FContext
);
600 FProfile
.addHeadSamples(*NumHeadSamples
);
602 if (FContext
->hasContext())
605 if (std::error_code EC
= readProfile(FProfile
))
607 return sampleprof_error::success
;
610 std::error_code
SampleProfileReaderBinary::readImpl() {
611 ProfileIsFS
= ProfileIsFSDisciminator
;
612 FunctionSamples::ProfileIsFS
= ProfileIsFS
;
614 if (std::error_code EC
= readFuncProfile(Data
))
618 return sampleprof_error::success
;
621 ErrorOr
<SampleContextFrames
>
622 SampleProfileReaderExtBinaryBase::readContextFromTable() {
623 auto ContextIdx
= readNumber
<uint32_t>();
624 if (std::error_code EC
= ContextIdx
.getError())
626 if (*ContextIdx
>= CSNameTable
->size())
627 return sampleprof_error::truncated_name_table
;
628 return (*CSNameTable
)[*ContextIdx
];
631 ErrorOr
<SampleContext
>
632 SampleProfileReaderExtBinaryBase::readSampleContextFromTable() {
633 if (ProfileIsCSFlat
) {
634 auto FContext(readContextFromTable());
635 if (std::error_code EC
= FContext
.getError())
637 return SampleContext(*FContext
);
639 auto FName(readStringFromTable());
640 if (std::error_code EC
= FName
.getError())
642 return SampleContext(*FName
);
646 std::error_code
SampleProfileReaderExtBinaryBase::readOneSection(
647 const uint8_t *Start
, uint64_t Size
, const SecHdrTableEntry
&Entry
) {
650 switch (Entry
.Type
) {
652 if (std::error_code EC
= readSummary())
654 if (hasSecFlag(Entry
, SecProfSummaryFlags::SecFlagPartial
))
655 Summary
->setPartialProfile(true);
656 if (hasSecFlag(Entry
, SecProfSummaryFlags::SecFlagFullContext
))
657 FunctionSamples::ProfileIsCSFlat
= ProfileIsCSFlat
= true;
658 if (hasSecFlag(Entry
, SecProfSummaryFlags::SecFlagIsCSNested
))
659 FunctionSamples::ProfileIsCSNested
= ProfileIsCSNested
= true;
660 if (hasSecFlag(Entry
, SecProfSummaryFlags::SecFlagFSDiscriminator
))
661 FunctionSamples::ProfileIsFS
= ProfileIsFS
= true;
665 hasSecFlag(Entry
, SecNameTableFlags::SecFlagFixedLengthMD5
);
666 bool UseMD5
= hasSecFlag(Entry
, SecNameTableFlags::SecFlagMD5Name
);
667 assert((!FixedLengthMD5
|| UseMD5
) &&
668 "If FixedLengthMD5 is true, UseMD5 has to be true");
669 FunctionSamples::HasUniqSuffix
=
670 hasSecFlag(Entry
, SecNameTableFlags::SecFlagUniqSuffix
);
671 if (std::error_code EC
= readNameTableSec(UseMD5
))
675 case SecCSNameTable
: {
676 if (std::error_code EC
= readCSNameTableSec())
681 if (std::error_code EC
= readFuncProfiles())
684 case SecFuncOffsetTable
:
685 FuncOffsetsOrdered
= hasSecFlag(Entry
, SecFuncOffsetFlags::SecFlagOrdered
);
686 if (std::error_code EC
= readFuncOffsetTable())
689 case SecFuncMetadata
: {
690 ProfileIsProbeBased
=
691 hasSecFlag(Entry
, SecFuncMetadataFlags::SecFlagIsProbeBased
);
692 FunctionSamples::ProfileIsProbeBased
= ProfileIsProbeBased
;
694 hasSecFlag(Entry
, SecFuncMetadataFlags::SecFlagHasAttribute
);
695 if (std::error_code EC
= readFuncMetadata(HasAttribute
))
699 case SecProfileSymbolList
:
700 if (std::error_code EC
= readProfileSymbolList())
704 if (std::error_code EC
= readCustomSection(Entry
))
708 return sampleprof_error::success
;
711 bool SampleProfileReaderExtBinaryBase::collectFuncsFromModule() {
716 FuncsToUse
.insert(FunctionSamples::getCanonicalFnName(F
));
720 std::error_code
SampleProfileReaderExtBinaryBase::readFuncOffsetTable() {
721 // If there are more than one FuncOffsetTable, the profile read associated
722 // with previous FuncOffsetTable has to be done before next FuncOffsetTable
724 FuncOffsetTable
.clear();
726 auto Size
= readNumber
<uint64_t>();
727 if (std::error_code EC
= Size
.getError())
730 FuncOffsetTable
.reserve(*Size
);
732 if (FuncOffsetsOrdered
) {
734 std::make_unique
<std::vector
<std::pair
<SampleContext
, uint64_t>>>();
735 OrderedFuncOffsets
->reserve(*Size
);
738 for (uint32_t I
= 0; I
< *Size
; ++I
) {
739 auto FContext(readSampleContextFromTable());
740 if (std::error_code EC
= FContext
.getError())
743 auto Offset
= readNumber
<uint64_t>();
744 if (std::error_code EC
= Offset
.getError())
747 FuncOffsetTable
[*FContext
] = *Offset
;
748 if (FuncOffsetsOrdered
)
749 OrderedFuncOffsets
->emplace_back(*FContext
, *Offset
);
752 return sampleprof_error::success
;
755 std::error_code
SampleProfileReaderExtBinaryBase::readFuncProfiles() {
756 // Collect functions used by current module if the Reader has been
758 // collectFuncsFromModule uses FunctionSamples::getCanonicalFnName
759 // which will query FunctionSamples::HasUniqSuffix, so it has to be
760 // called after FunctionSamples::HasUniqSuffix is set, i.e. after
761 // NameTable section is read.
762 bool LoadFuncsToBeUsed
= collectFuncsFromModule();
764 // When LoadFuncsToBeUsed is false, load all the function profiles.
765 const uint8_t *Start
= Data
;
766 if (!LoadFuncsToBeUsed
) {
768 if (std::error_code EC
= readFuncProfile(Data
))
771 assert(Data
== End
&& "More data is read than expected");
773 // Load function profiles on demand.
775 for (auto Name
: FuncsToUse
) {
776 Remapper
->insert(Name
);
780 if (ProfileIsCSFlat
) {
781 DenseSet
<uint64_t> FuncGuidsToUse
;
783 for (auto Name
: FuncsToUse
)
784 FuncGuidsToUse
.insert(Function::getGUID(Name
));
787 // For each function in current module, load all context profiles for
788 // the function as well as their callee contexts which can help profile
789 // guided importing for ThinLTO. This can be achieved by walking
790 // through an ordered context container, where contexts are laid out
791 // as if they were walked in preorder of a context trie. While
792 // traversing the trie, a link to the highest common ancestor node is
793 // kept so that all of its decendants will be loaded.
794 assert(OrderedFuncOffsets
.get() &&
795 "func offset table should always be sorted in CS profile");
796 const SampleContext
*CommonContext
= nullptr;
797 for (const auto &NameOffset
: *OrderedFuncOffsets
) {
798 const auto &FContext
= NameOffset
.first
;
799 auto FName
= FContext
.getName();
800 // For function in the current module, keep its farthest ancestor
801 // context. This can be used to load itself and its child and
803 if ((useMD5() && FuncGuidsToUse
.count(std::stoull(FName
.data()))) ||
804 (!useMD5() && (FuncsToUse
.count(FName
) ||
805 (Remapper
&& Remapper
->exist(FName
))))) {
806 if (!CommonContext
|| !CommonContext
->IsPrefixOf(FContext
))
807 CommonContext
= &FContext
;
810 if (CommonContext
== &FContext
||
811 (CommonContext
&& CommonContext
->IsPrefixOf(FContext
))) {
812 // Load profile for the current context which originated from
813 // the common ancestor.
814 const uint8_t *FuncProfileAddr
= Start
+ NameOffset
.second
;
815 assert(FuncProfileAddr
< End
&& "out of LBRProfile section");
816 if (std::error_code EC
= readFuncProfile(FuncProfileAddr
))
822 for (auto Name
: FuncsToUse
) {
823 auto GUID
= std::to_string(MD5Hash(Name
));
824 auto iter
= FuncOffsetTable
.find(StringRef(GUID
));
825 if (iter
== FuncOffsetTable
.end())
827 const uint8_t *FuncProfileAddr
= Start
+ iter
->second
;
828 assert(FuncProfileAddr
< End
&& "out of LBRProfile section");
829 if (std::error_code EC
= readFuncProfile(FuncProfileAddr
))
833 for (auto NameOffset
: FuncOffsetTable
) {
834 SampleContext
FContext(NameOffset
.first
);
835 auto FuncName
= FContext
.getName();
836 if (!FuncsToUse
.count(FuncName
) &&
837 (!Remapper
|| !Remapper
->exist(FuncName
)))
839 const uint8_t *FuncProfileAddr
= Start
+ NameOffset
.second
;
840 assert(FuncProfileAddr
< End
&& "out of LBRProfile section");
841 if (std::error_code EC
= readFuncProfile(FuncProfileAddr
))
848 assert((CSProfileCount
== 0 || CSProfileCount
== Profiles
.size()) &&
849 "Cannot have both context-sensitive and regular profile");
850 assert((!CSProfileCount
|| ProfileIsCSFlat
) &&
851 "Section flag should be consistent with actual profile");
852 return sampleprof_error::success
;
855 std::error_code
SampleProfileReaderExtBinaryBase::readProfileSymbolList() {
857 ProfSymList
= std::make_unique
<ProfileSymbolList
>();
859 if (std::error_code EC
= ProfSymList
->read(Data
, End
- Data
))
863 return sampleprof_error::success
;
866 std::error_code
SampleProfileReaderExtBinaryBase::decompressSection(
867 const uint8_t *SecStart
, const uint64_t SecSize
,
868 const uint8_t *&DecompressBuf
, uint64_t &DecompressBufSize
) {
870 End
= SecStart
+ SecSize
;
871 auto DecompressSize
= readNumber
<uint64_t>();
872 if (std::error_code EC
= DecompressSize
.getError())
874 DecompressBufSize
= *DecompressSize
;
876 auto CompressSize
= readNumber
<uint64_t>();
877 if (std::error_code EC
= CompressSize
.getError())
880 if (!llvm::zlib::isAvailable())
881 return sampleprof_error::zlib_unavailable
;
883 StringRef
CompressedStrings(reinterpret_cast<const char *>(Data
),
885 char *Buffer
= Allocator
.Allocate
<char>(DecompressBufSize
);
886 size_t UCSize
= DecompressBufSize
;
888 zlib::uncompress(CompressedStrings
, Buffer
, UCSize
);
890 return sampleprof_error::uncompress_failed
;
891 DecompressBuf
= reinterpret_cast<const uint8_t *>(Buffer
);
892 return sampleprof_error::success
;
895 std::error_code
SampleProfileReaderExtBinaryBase::readImpl() {
896 const uint8_t *BufStart
=
897 reinterpret_cast<const uint8_t *>(Buffer
->getBufferStart());
899 for (auto &Entry
: SecHdrTable
) {
900 // Skip empty section.
904 // Skip sections without context when SkipFlatProf is true.
905 if (SkipFlatProf
&& hasSecFlag(Entry
, SecCommonFlags::SecFlagFlat
))
908 const uint8_t *SecStart
= BufStart
+ Entry
.Offset
;
909 uint64_t SecSize
= Entry
.Size
;
911 // If the section is compressed, decompress it into a buffer
912 // DecompressBuf before reading the actual data. The pointee of
913 // 'Data' will be changed to buffer hold by DecompressBuf
914 // temporarily when reading the actual data.
915 bool isCompressed
= hasSecFlag(Entry
, SecCommonFlags::SecFlagCompress
);
917 const uint8_t *DecompressBuf
;
918 uint64_t DecompressBufSize
;
919 if (std::error_code EC
= decompressSection(
920 SecStart
, SecSize
, DecompressBuf
, DecompressBufSize
))
922 SecStart
= DecompressBuf
;
923 SecSize
= DecompressBufSize
;
926 if (std::error_code EC
= readOneSection(SecStart
, SecSize
, Entry
))
928 if (Data
!= SecStart
+ SecSize
)
929 return sampleprof_error::malformed
;
931 // Change the pointee of 'Data' from DecompressBuf to original Buffer.
933 Data
= BufStart
+ Entry
.Offset
;
934 End
= BufStart
+ Buffer
->getBufferSize();
938 return sampleprof_error::success
;
941 std::error_code
SampleProfileReaderCompactBinary::readImpl() {
942 // Collect functions used by current module if the Reader has been
944 bool LoadFuncsToBeUsed
= collectFuncsFromModule();
945 ProfileIsFS
= ProfileIsFSDisciminator
;
946 FunctionSamples::ProfileIsFS
= ProfileIsFS
;
947 std::vector
<uint64_t> OffsetsToUse
;
948 if (!LoadFuncsToBeUsed
) {
949 // load all the function profiles.
950 for (auto FuncEntry
: FuncOffsetTable
) {
951 OffsetsToUse
.push_back(FuncEntry
.second
);
954 // load function profiles on demand.
955 for (auto Name
: FuncsToUse
) {
956 auto GUID
= std::to_string(MD5Hash(Name
));
957 auto iter
= FuncOffsetTable
.find(StringRef(GUID
));
958 if (iter
== FuncOffsetTable
.end())
960 OffsetsToUse
.push_back(iter
->second
);
964 for (auto Offset
: OffsetsToUse
) {
965 const uint8_t *SavedData
= Data
;
966 if (std::error_code EC
= readFuncProfile(
967 reinterpret_cast<const uint8_t *>(Buffer
->getBufferStart()) +
972 return sampleprof_error::success
;
975 std::error_code
SampleProfileReaderRawBinary::verifySPMagic(uint64_t Magic
) {
976 if (Magic
== SPMagic())
977 return sampleprof_error::success
;
978 return sampleprof_error::bad_magic
;
981 std::error_code
SampleProfileReaderExtBinary::verifySPMagic(uint64_t Magic
) {
982 if (Magic
== SPMagic(SPF_Ext_Binary
))
983 return sampleprof_error::success
;
984 return sampleprof_error::bad_magic
;
988 SampleProfileReaderCompactBinary::verifySPMagic(uint64_t Magic
) {
989 if (Magic
== SPMagic(SPF_Compact_Binary
))
990 return sampleprof_error::success
;
991 return sampleprof_error::bad_magic
;
994 std::error_code
SampleProfileReaderBinary::readNameTable() {
995 auto Size
= readNumber
<uint32_t>();
996 if (std::error_code EC
= Size
.getError())
998 NameTable
.reserve(*Size
+ NameTable
.size());
999 for (uint32_t I
= 0; I
< *Size
; ++I
) {
1000 auto Name(readString());
1001 if (std::error_code EC
= Name
.getError())
1003 NameTable
.push_back(*Name
);
1006 return sampleprof_error::success
;
1009 std::error_code
SampleProfileReaderExtBinaryBase::readMD5NameTable() {
1010 auto Size
= readNumber
<uint64_t>();
1011 if (std::error_code EC
= Size
.getError())
1013 MD5StringBuf
= std::make_unique
<std::vector
<std::string
>>();
1014 MD5StringBuf
->reserve(*Size
);
1015 if (FixedLengthMD5
) {
1016 // Preallocate and initialize NameTable so we can check whether a name
1017 // index has been read before by checking whether the element in the
1018 // NameTable is empty, meanwhile readStringIndex can do the boundary
1019 // check using the size of NameTable.
1020 NameTable
.resize(*Size
+ NameTable
.size());
1022 MD5NameMemStart
= Data
;
1023 Data
= Data
+ (*Size
) * sizeof(uint64_t);
1024 return sampleprof_error::success
;
1026 NameTable
.reserve(*Size
);
1027 for (uint32_t I
= 0; I
< *Size
; ++I
) {
1028 auto FID
= readNumber
<uint64_t>();
1029 if (std::error_code EC
= FID
.getError())
1031 MD5StringBuf
->push_back(std::to_string(*FID
));
1032 // NameTable is a vector of StringRef. Here it is pushing back a
1033 // StringRef initialized with the last string in MD5stringBuf.
1034 NameTable
.push_back(MD5StringBuf
->back());
1036 return sampleprof_error::success
;
1039 std::error_code
SampleProfileReaderExtBinaryBase::readNameTableSec(bool IsMD5
) {
1041 return readMD5NameTable();
1042 return SampleProfileReaderBinary::readNameTable();
1045 // Read in the CS name table section, which basically contains a list of context
1046 // vectors. Each element of a context vector, aka a frame, refers to the
1047 // underlying raw function names that are stored in the name table, as well as
1048 // a callsite identifier that only makes sense for non-leaf frames.
1049 std::error_code
SampleProfileReaderExtBinaryBase::readCSNameTableSec() {
1050 auto Size
= readNumber
<uint32_t>();
1051 if (std::error_code EC
= Size
.getError())
1054 std::vector
<SampleContextFrameVector
> *PNameVec
=
1055 new std::vector
<SampleContextFrameVector
>();
1056 PNameVec
->reserve(*Size
);
1057 for (uint32_t I
= 0; I
< *Size
; ++I
) {
1058 PNameVec
->emplace_back(SampleContextFrameVector());
1059 auto ContextSize
= readNumber
<uint32_t>();
1060 if (std::error_code EC
= ContextSize
.getError())
1062 for (uint32_t J
= 0; J
< *ContextSize
; ++J
) {
1063 auto FName(readStringFromTable());
1064 if (std::error_code EC
= FName
.getError())
1066 auto LineOffset
= readNumber
<uint64_t>();
1067 if (std::error_code EC
= LineOffset
.getError())
1070 if (!isOffsetLegal(*LineOffset
))
1071 return std::error_code();
1073 auto Discriminator
= readNumber
<uint64_t>();
1074 if (std::error_code EC
= Discriminator
.getError())
1077 PNameVec
->back().emplace_back(
1078 FName
.get(), LineLocation(LineOffset
.get(), Discriminator
.get()));
1082 // From this point the underlying object of CSNameTable should be immutable.
1083 CSNameTable
.reset(PNameVec
);
1084 return sampleprof_error::success
;
1089 SampleProfileReaderExtBinaryBase::readFuncMetadata(bool ProfileHasAttribute
,
1090 FunctionSamples
*FProfile
) {
1092 if (ProfileIsProbeBased
) {
1093 auto Checksum
= readNumber
<uint64_t>();
1094 if (std::error_code EC
= Checksum
.getError())
1097 FProfile
->setFunctionHash(*Checksum
);
1100 if (ProfileHasAttribute
) {
1101 auto Attributes
= readNumber
<uint32_t>();
1102 if (std::error_code EC
= Attributes
.getError())
1105 FProfile
->getContext().setAllAttributes(*Attributes
);
1108 if (!ProfileIsCSFlat
) {
1109 // Read all the attributes for inlined function calls.
1110 auto NumCallsites
= readNumber
<uint32_t>();
1111 if (std::error_code EC
= NumCallsites
.getError())
1114 for (uint32_t J
= 0; J
< *NumCallsites
; ++J
) {
1115 auto LineOffset
= readNumber
<uint64_t>();
1116 if (std::error_code EC
= LineOffset
.getError())
1119 auto Discriminator
= readNumber
<uint64_t>();
1120 if (std::error_code EC
= Discriminator
.getError())
1123 auto FContext(readSampleContextFromTable());
1124 if (std::error_code EC
= FContext
.getError())
1127 FunctionSamples
*CalleeProfile
= nullptr;
1129 CalleeProfile
= const_cast<FunctionSamples
*>(
1130 &FProfile
->functionSamplesAt(LineLocation(
1132 *Discriminator
))[std::string(FContext
.get().getName())]);
1134 if (std::error_code EC
=
1135 readFuncMetadata(ProfileHasAttribute
, CalleeProfile
))
1141 return sampleprof_error::success
;
1145 SampleProfileReaderExtBinaryBase::readFuncMetadata(bool ProfileHasAttribute
) {
1146 while (Data
< End
) {
1147 auto FContext(readSampleContextFromTable());
1148 if (std::error_code EC
= FContext
.getError())
1150 FunctionSamples
*FProfile
= nullptr;
1151 auto It
= Profiles
.find(*FContext
);
1152 if (It
!= Profiles
.end())
1153 FProfile
= &It
->second
;
1155 if (std::error_code EC
= readFuncMetadata(ProfileHasAttribute
, FProfile
))
1159 assert(Data
== End
&& "More data is read than expected");
1160 return sampleprof_error::success
;
1163 std::error_code
SampleProfileReaderCompactBinary::readNameTable() {
1164 auto Size
= readNumber
<uint64_t>();
1165 if (std::error_code EC
= Size
.getError())
1167 NameTable
.reserve(*Size
);
1168 for (uint32_t I
= 0; I
< *Size
; ++I
) {
1169 auto FID
= readNumber
<uint64_t>();
1170 if (std::error_code EC
= FID
.getError())
1172 NameTable
.push_back(std::to_string(*FID
));
1174 return sampleprof_error::success
;
1178 SampleProfileReaderExtBinaryBase::readSecHdrTableEntry(uint32_t Idx
) {
1179 SecHdrTableEntry Entry
;
1180 auto Type
= readUnencodedNumber
<uint64_t>();
1181 if (std::error_code EC
= Type
.getError())
1183 Entry
.Type
= static_cast<SecType
>(*Type
);
1185 auto Flags
= readUnencodedNumber
<uint64_t>();
1186 if (std::error_code EC
= Flags
.getError())
1188 Entry
.Flags
= *Flags
;
1190 auto Offset
= readUnencodedNumber
<uint64_t>();
1191 if (std::error_code EC
= Offset
.getError())
1193 Entry
.Offset
= *Offset
;
1195 auto Size
= readUnencodedNumber
<uint64_t>();
1196 if (std::error_code EC
= Size
.getError())
1200 Entry
.LayoutIndex
= Idx
;
1201 SecHdrTable
.push_back(std::move(Entry
));
1202 return sampleprof_error::success
;
1205 std::error_code
SampleProfileReaderExtBinaryBase::readSecHdrTable() {
1206 auto EntryNum
= readUnencodedNumber
<uint64_t>();
1207 if (std::error_code EC
= EntryNum
.getError())
1210 for (uint32_t i
= 0; i
< (*EntryNum
); i
++)
1211 if (std::error_code EC
= readSecHdrTableEntry(i
))
1214 return sampleprof_error::success
;
1217 std::error_code
SampleProfileReaderExtBinaryBase::readHeader() {
1218 const uint8_t *BufStart
=
1219 reinterpret_cast<const uint8_t *>(Buffer
->getBufferStart());
1221 End
= BufStart
+ Buffer
->getBufferSize();
1223 if (std::error_code EC
= readMagicIdent())
1226 if (std::error_code EC
= readSecHdrTable())
1229 return sampleprof_error::success
;
1232 uint64_t SampleProfileReaderExtBinaryBase::getSectionSize(SecType Type
) {
1234 for (auto &Entry
: SecHdrTable
) {
1235 if (Entry
.Type
== Type
)
1241 uint64_t SampleProfileReaderExtBinaryBase::getFileSize() {
1242 // Sections in SecHdrTable is not necessarily in the same order as
1243 // sections in the profile because section like FuncOffsetTable needs
1244 // to be written after section LBRProfile but needs to be read before
1245 // section LBRProfile, so we cannot simply use the last entry in
1246 // SecHdrTable to calculate the file size.
1247 uint64_t FileSize
= 0;
1248 for (auto &Entry
: SecHdrTable
) {
1249 FileSize
= std::max(Entry
.Offset
+ Entry
.Size
, FileSize
);
1254 static std::string
getSecFlagsStr(const SecHdrTableEntry
&Entry
) {
1256 if (hasSecFlag(Entry
, SecCommonFlags::SecFlagCompress
))
1257 Flags
.append("{compressed,");
1261 if (hasSecFlag(Entry
, SecCommonFlags::SecFlagFlat
))
1262 Flags
.append("flat,");
1264 switch (Entry
.Type
) {
1266 if (hasSecFlag(Entry
, SecNameTableFlags::SecFlagFixedLengthMD5
))
1267 Flags
.append("fixlenmd5,");
1268 else if (hasSecFlag(Entry
, SecNameTableFlags::SecFlagMD5Name
))
1269 Flags
.append("md5,");
1270 if (hasSecFlag(Entry
, SecNameTableFlags::SecFlagUniqSuffix
))
1271 Flags
.append("uniq,");
1273 case SecProfSummary
:
1274 if (hasSecFlag(Entry
, SecProfSummaryFlags::SecFlagPartial
))
1275 Flags
.append("partial,");
1276 if (hasSecFlag(Entry
, SecProfSummaryFlags::SecFlagFullContext
))
1277 Flags
.append("context,");
1278 if (hasSecFlag(Entry
, SecProfSummaryFlags::SecFlagIsCSNested
))
1279 Flags
.append("context-nested,");
1280 if (hasSecFlag(Entry
, SecProfSummaryFlags::SecFlagFSDiscriminator
))
1281 Flags
.append("fs-discriminator,");
1283 case SecFuncOffsetTable
:
1284 if (hasSecFlag(Entry
, SecFuncOffsetFlags::SecFlagOrdered
))
1285 Flags
.append("ordered,");
1287 case SecFuncMetadata
:
1288 if (hasSecFlag(Entry
, SecFuncMetadataFlags::SecFlagIsProbeBased
))
1289 Flags
.append("probe,");
1290 if (hasSecFlag(Entry
, SecFuncMetadataFlags::SecFlagHasAttribute
))
1291 Flags
.append("attr,");
1296 char &last
= Flags
.back();
1304 bool SampleProfileReaderExtBinaryBase::dumpSectionInfo(raw_ostream
&OS
) {
1305 uint64_t TotalSecsSize
= 0;
1306 for (auto &Entry
: SecHdrTable
) {
1307 OS
<< getSecName(Entry
.Type
) << " - Offset: " << Entry
.Offset
1308 << ", Size: " << Entry
.Size
<< ", Flags: " << getSecFlagsStr(Entry
)
1311 TotalSecsSize
+= Entry
.Size
;
1313 uint64_t HeaderSize
= SecHdrTable
.front().Offset
;
1314 assert(HeaderSize
+ TotalSecsSize
== getFileSize() &&
1315 "Size of 'header + sections' doesn't match the total size of profile");
1317 OS
<< "Header Size: " << HeaderSize
<< "\n";
1318 OS
<< "Total Sections Size: " << TotalSecsSize
<< "\n";
1319 OS
<< "File Size: " << getFileSize() << "\n";
1323 std::error_code
SampleProfileReaderBinary::readMagicIdent() {
1324 // Read and check the magic identifier.
1325 auto Magic
= readNumber
<uint64_t>();
1326 if (std::error_code EC
= Magic
.getError())
1328 else if (std::error_code EC
= verifySPMagic(*Magic
))
1331 // Read the version number.
1332 auto Version
= readNumber
<uint64_t>();
1333 if (std::error_code EC
= Version
.getError())
1335 else if (*Version
!= SPVersion())
1336 return sampleprof_error::unsupported_version
;
1338 return sampleprof_error::success
;
1341 std::error_code
SampleProfileReaderBinary::readHeader() {
1342 Data
= reinterpret_cast<const uint8_t *>(Buffer
->getBufferStart());
1343 End
= Data
+ Buffer
->getBufferSize();
1345 if (std::error_code EC
= readMagicIdent())
1348 if (std::error_code EC
= readSummary())
1351 if (std::error_code EC
= readNameTable())
1353 return sampleprof_error::success
;
1356 std::error_code
SampleProfileReaderCompactBinary::readHeader() {
1357 SampleProfileReaderBinary::readHeader();
1358 if (std::error_code EC
= readFuncOffsetTable())
1360 return sampleprof_error::success
;
1363 std::error_code
SampleProfileReaderCompactBinary::readFuncOffsetTable() {
1364 auto TableOffset
= readUnencodedNumber
<uint64_t>();
1365 if (std::error_code EC
= TableOffset
.getError())
1368 const uint8_t *SavedData
= Data
;
1369 const uint8_t *TableStart
=
1370 reinterpret_cast<const uint8_t *>(Buffer
->getBufferStart()) +
1374 auto Size
= readNumber
<uint64_t>();
1375 if (std::error_code EC
= Size
.getError())
1378 FuncOffsetTable
.reserve(*Size
);
1379 for (uint32_t I
= 0; I
< *Size
; ++I
) {
1380 auto FName(readStringFromTable());
1381 if (std::error_code EC
= FName
.getError())
1384 auto Offset
= readNumber
<uint64_t>();
1385 if (std::error_code EC
= Offset
.getError())
1388 FuncOffsetTable
[*FName
] = *Offset
;
1392 return sampleprof_error::success
;
1395 bool SampleProfileReaderCompactBinary::collectFuncsFromModule() {
1400 FuncsToUse
.insert(FunctionSamples::getCanonicalFnName(F
));
1404 std::error_code
SampleProfileReaderBinary::readSummaryEntry(
1405 std::vector
<ProfileSummaryEntry
> &Entries
) {
1406 auto Cutoff
= readNumber
<uint64_t>();
1407 if (std::error_code EC
= Cutoff
.getError())
1410 auto MinBlockCount
= readNumber
<uint64_t>();
1411 if (std::error_code EC
= MinBlockCount
.getError())
1414 auto NumBlocks
= readNumber
<uint64_t>();
1415 if (std::error_code EC
= NumBlocks
.getError())
1418 Entries
.emplace_back(*Cutoff
, *MinBlockCount
, *NumBlocks
);
1419 return sampleprof_error::success
;
1422 std::error_code
SampleProfileReaderBinary::readSummary() {
1423 auto TotalCount
= readNumber
<uint64_t>();
1424 if (std::error_code EC
= TotalCount
.getError())
1427 auto MaxBlockCount
= readNumber
<uint64_t>();
1428 if (std::error_code EC
= MaxBlockCount
.getError())
1431 auto MaxFunctionCount
= readNumber
<uint64_t>();
1432 if (std::error_code EC
= MaxFunctionCount
.getError())
1435 auto NumBlocks
= readNumber
<uint64_t>();
1436 if (std::error_code EC
= NumBlocks
.getError())
1439 auto NumFunctions
= readNumber
<uint64_t>();
1440 if (std::error_code EC
= NumFunctions
.getError())
1443 auto NumSummaryEntries
= readNumber
<uint64_t>();
1444 if (std::error_code EC
= NumSummaryEntries
.getError())
1447 std::vector
<ProfileSummaryEntry
> Entries
;
1448 for (unsigned i
= 0; i
< *NumSummaryEntries
; i
++) {
1449 std::error_code EC
= readSummaryEntry(Entries
);
1450 if (EC
!= sampleprof_error::success
)
1453 Summary
= std::make_unique
<ProfileSummary
>(
1454 ProfileSummary::PSK_Sample
, Entries
, *TotalCount
, *MaxBlockCount
, 0,
1455 *MaxFunctionCount
, *NumBlocks
, *NumFunctions
);
1457 return sampleprof_error::success
;
1460 bool SampleProfileReaderRawBinary::hasFormat(const MemoryBuffer
&Buffer
) {
1461 const uint8_t *Data
=
1462 reinterpret_cast<const uint8_t *>(Buffer
.getBufferStart());
1463 uint64_t Magic
= decodeULEB128(Data
);
1464 return Magic
== SPMagic();
1467 bool SampleProfileReaderExtBinary::hasFormat(const MemoryBuffer
&Buffer
) {
1468 const uint8_t *Data
=
1469 reinterpret_cast<const uint8_t *>(Buffer
.getBufferStart());
1470 uint64_t Magic
= decodeULEB128(Data
);
1471 return Magic
== SPMagic(SPF_Ext_Binary
);
1474 bool SampleProfileReaderCompactBinary::hasFormat(const MemoryBuffer
&Buffer
) {
1475 const uint8_t *Data
=
1476 reinterpret_cast<const uint8_t *>(Buffer
.getBufferStart());
1477 uint64_t Magic
= decodeULEB128(Data
);
1478 return Magic
== SPMagic(SPF_Compact_Binary
);
1481 std::error_code
SampleProfileReaderGCC::skipNextWord() {
1483 if (!GcovBuffer
.readInt(dummy
))
1484 return sampleprof_error::truncated
;
1485 return sampleprof_error::success
;
1488 template <typename T
> ErrorOr
<T
> SampleProfileReaderGCC::readNumber() {
1489 if (sizeof(T
) <= sizeof(uint32_t)) {
1491 if (GcovBuffer
.readInt(Val
) && Val
<= std::numeric_limits
<T
>::max())
1492 return static_cast<T
>(Val
);
1493 } else if (sizeof(T
) <= sizeof(uint64_t)) {
1495 if (GcovBuffer
.readInt64(Val
) && Val
<= std::numeric_limits
<T
>::max())
1496 return static_cast<T
>(Val
);
1499 std::error_code EC
= sampleprof_error::malformed
;
1500 reportError(0, EC
.message());
1504 ErrorOr
<StringRef
> SampleProfileReaderGCC::readString() {
1506 if (!GcovBuffer
.readString(Str
))
1507 return sampleprof_error::truncated
;
1511 std::error_code
SampleProfileReaderGCC::readHeader() {
1512 // Read the magic identifier.
1513 if (!GcovBuffer
.readGCDAFormat())
1514 return sampleprof_error::unrecognized_format
;
1516 // Read the version number. Note - the GCC reader does not validate this
1517 // version, but the profile creator generates v704.
1518 GCOV::GCOVVersion version
;
1519 if (!GcovBuffer
.readGCOVVersion(version
))
1520 return sampleprof_error::unrecognized_format
;
1522 if (version
!= GCOV::V407
)
1523 return sampleprof_error::unsupported_version
;
1525 // Skip the empty integer.
1526 if (std::error_code EC
= skipNextWord())
1529 return sampleprof_error::success
;
1532 std::error_code
SampleProfileReaderGCC::readSectionTag(uint32_t Expected
) {
1534 if (!GcovBuffer
.readInt(Tag
))
1535 return sampleprof_error::truncated
;
1537 if (Tag
!= Expected
)
1538 return sampleprof_error::malformed
;
1540 if (std::error_code EC
= skipNextWord())
1543 return sampleprof_error::success
;
1546 std::error_code
SampleProfileReaderGCC::readNameTable() {
1547 if (std::error_code EC
= readSectionTag(GCOVTagAFDOFileNames
))
1551 if (!GcovBuffer
.readInt(Size
))
1552 return sampleprof_error::truncated
;
1554 for (uint32_t I
= 0; I
< Size
; ++I
) {
1556 if (!GcovBuffer
.readString(Str
))
1557 return sampleprof_error::truncated
;
1558 Names
.push_back(std::string(Str
));
1561 return sampleprof_error::success
;
1564 std::error_code
SampleProfileReaderGCC::readFunctionProfiles() {
1565 if (std::error_code EC
= readSectionTag(GCOVTagAFDOFunction
))
1568 uint32_t NumFunctions
;
1569 if (!GcovBuffer
.readInt(NumFunctions
))
1570 return sampleprof_error::truncated
;
1572 InlineCallStack Stack
;
1573 for (uint32_t I
= 0; I
< NumFunctions
; ++I
)
1574 if (std::error_code EC
= readOneFunctionProfile(Stack
, true, 0))
1578 return sampleprof_error::success
;
1581 std::error_code
SampleProfileReaderGCC::readOneFunctionProfile(
1582 const InlineCallStack
&InlineStack
, bool Update
, uint32_t Offset
) {
1583 uint64_t HeadCount
= 0;
1584 if (InlineStack
.size() == 0)
1585 if (!GcovBuffer
.readInt64(HeadCount
))
1586 return sampleprof_error::truncated
;
1589 if (!GcovBuffer
.readInt(NameIdx
))
1590 return sampleprof_error::truncated
;
1592 StringRef
Name(Names
[NameIdx
]);
1594 uint32_t NumPosCounts
;
1595 if (!GcovBuffer
.readInt(NumPosCounts
))
1596 return sampleprof_error::truncated
;
1598 uint32_t NumCallsites
;
1599 if (!GcovBuffer
.readInt(NumCallsites
))
1600 return sampleprof_error::truncated
;
1602 FunctionSamples
*FProfile
= nullptr;
1603 if (InlineStack
.size() == 0) {
1604 // If this is a top function that we have already processed, do not
1605 // update its profile again. This happens in the presence of
1606 // function aliases. Since these aliases share the same function
1607 // body, there will be identical replicated profiles for the
1608 // original function. In this case, we simply not bother updating
1609 // the profile of the original function.
1610 FProfile
= &Profiles
[Name
];
1611 FProfile
->addHeadSamples(HeadCount
);
1612 if (FProfile
->getTotalSamples() > 0)
1615 // Otherwise, we are reading an inlined instance. The top of the
1616 // inline stack contains the profile of the caller. Insert this
1617 // callee in the caller's CallsiteMap.
1618 FunctionSamples
*CallerProfile
= InlineStack
.front();
1619 uint32_t LineOffset
= Offset
>> 16;
1620 uint32_t Discriminator
= Offset
& 0xffff;
1621 FProfile
= &CallerProfile
->functionSamplesAt(
1622 LineLocation(LineOffset
, Discriminator
))[std::string(Name
)];
1624 FProfile
->setName(Name
);
1626 for (uint32_t I
= 0; I
< NumPosCounts
; ++I
) {
1628 if (!GcovBuffer
.readInt(Offset
))
1629 return sampleprof_error::truncated
;
1631 uint32_t NumTargets
;
1632 if (!GcovBuffer
.readInt(NumTargets
))
1633 return sampleprof_error::truncated
;
1636 if (!GcovBuffer
.readInt64(Count
))
1637 return sampleprof_error::truncated
;
1639 // The line location is encoded in the offset as:
1640 // high 16 bits: line offset to the start of the function.
1641 // low 16 bits: discriminator.
1642 uint32_t LineOffset
= Offset
>> 16;
1643 uint32_t Discriminator
= Offset
& 0xffff;
1645 InlineCallStack NewStack
;
1646 NewStack
.push_back(FProfile
);
1647 llvm::append_range(NewStack
, InlineStack
);
1649 // Walk up the inline stack, adding the samples on this line to
1650 // the total sample count of the callers in the chain.
1651 for (auto CallerProfile
: NewStack
)
1652 CallerProfile
->addTotalSamples(Count
);
1654 // Update the body samples for the current profile.
1655 FProfile
->addBodySamples(LineOffset
, Discriminator
, Count
);
1658 // Process the list of functions called at an indirect call site.
1659 // These are all the targets that a function pointer (or virtual
1660 // function) resolved at runtime.
1661 for (uint32_t J
= 0; J
< NumTargets
; J
++) {
1663 if (!GcovBuffer
.readInt(HistVal
))
1664 return sampleprof_error::truncated
;
1666 if (HistVal
!= HIST_TYPE_INDIR_CALL_TOPN
)
1667 return sampleprof_error::malformed
;
1670 if (!GcovBuffer
.readInt64(TargetIdx
))
1671 return sampleprof_error::truncated
;
1672 StringRef
TargetName(Names
[TargetIdx
]);
1674 uint64_t TargetCount
;
1675 if (!GcovBuffer
.readInt64(TargetCount
))
1676 return sampleprof_error::truncated
;
1679 FProfile
->addCalledTargetSamples(LineOffset
, Discriminator
,
1680 TargetName
, TargetCount
);
1684 // Process all the inlined callers into the current function. These
1685 // are all the callsites that were inlined into this function.
1686 for (uint32_t I
= 0; I
< NumCallsites
; I
++) {
1687 // The offset is encoded as:
1688 // high 16 bits: line offset to the start of the function.
1689 // low 16 bits: discriminator.
1691 if (!GcovBuffer
.readInt(Offset
))
1692 return sampleprof_error::truncated
;
1693 InlineCallStack NewStack
;
1694 NewStack
.push_back(FProfile
);
1695 llvm::append_range(NewStack
, InlineStack
);
1696 if (std::error_code EC
= readOneFunctionProfile(NewStack
, Update
, Offset
))
1700 return sampleprof_error::success
;
1703 /// Read a GCC AutoFDO profile.
1705 /// This format is generated by the Linux Perf conversion tool at
1706 /// https://github.com/google/autofdo.
1707 std::error_code
SampleProfileReaderGCC::readImpl() {
1708 assert(!ProfileIsFSDisciminator
&& "Gcc profiles not support FSDisciminator");
1709 // Read the string table.
1710 if (std::error_code EC
= readNameTable())
1713 // Read the source profile.
1714 if (std::error_code EC
= readFunctionProfiles())
1717 return sampleprof_error::success
;
1720 bool SampleProfileReaderGCC::hasFormat(const MemoryBuffer
&Buffer
) {
1721 StringRef
Magic(reinterpret_cast<const char *>(Buffer
.getBufferStart()));
1722 return Magic
== "adcg*704";
1725 void SampleProfileReaderItaniumRemapper::applyRemapping(LLVMContext
&Ctx
) {
1726 // If the reader uses MD5 to represent string, we can't remap it because
1727 // we don't know what the original function names were.
1728 if (Reader
.useMD5()) {
1729 Ctx
.diagnose(DiagnosticInfoSampleProfile(
1730 Reader
.getBuffer()->getBufferIdentifier(),
1731 "Profile data remapping cannot be applied to profile data "
1732 "in compact format (original mangled names are not available).",
1737 // CSSPGO-TODO: Remapper is not yet supported.
1738 // We will need to remap the entire context string.
1739 assert(Remappings
&& "should be initialized while creating remapper");
1740 for (auto &Sample
: Reader
.getProfiles()) {
1741 DenseSet
<StringRef
> NamesInSample
;
1742 Sample
.second
.findAllNames(NamesInSample
);
1743 for (auto &Name
: NamesInSample
)
1744 if (auto Key
= Remappings
->insert(Name
))
1745 NameMap
.insert({Key
, Name
});
1748 RemappingApplied
= true;
1752 SampleProfileReaderItaniumRemapper::lookUpNameInProfile(StringRef Fname
) {
1753 if (auto Key
= Remappings
->lookup(Fname
))
1754 return NameMap
.lookup(Key
);
1758 /// Prepare a memory buffer for the contents of \p Filename.
1760 /// \returns an error code indicating the status of the buffer.
1761 static ErrorOr
<std::unique_ptr
<MemoryBuffer
>>
1762 setupMemoryBuffer(const Twine
&Filename
) {
1763 auto BufferOrErr
= MemoryBuffer::getFileOrSTDIN(Filename
, /*IsText=*/true);
1764 if (std::error_code EC
= BufferOrErr
.getError())
1766 auto Buffer
= std::move(BufferOrErr
.get());
1769 if (uint64_t(Buffer
->getBufferSize()) > std::numeric_limits
<uint32_t>::max())
1770 return sampleprof_error::too_large
;
1772 return std::move(Buffer
);
1775 /// Create a sample profile reader based on the format of the input file.
1777 /// \param Filename The file to open.
1779 /// \param C The LLVM context to use to emit diagnostics.
1781 /// \param P The FSDiscriminatorPass.
1783 /// \param RemapFilename The file used for profile remapping.
1785 /// \returns an error code indicating the status of the created reader.
1786 ErrorOr
<std::unique_ptr
<SampleProfileReader
>>
1787 SampleProfileReader::create(const std::string Filename
, LLVMContext
&C
,
1788 FSDiscriminatorPass P
,
1789 const std::string RemapFilename
) {
1790 auto BufferOrError
= setupMemoryBuffer(Filename
);
1791 if (std::error_code EC
= BufferOrError
.getError())
1793 return create(BufferOrError
.get(), C
, P
, RemapFilename
);
1796 /// Create a sample profile remapper from the given input, to remap the
1797 /// function names in the given profile data.
1799 /// \param Filename The file to open.
1801 /// \param Reader The profile reader the remapper is going to be applied to.
1803 /// \param C The LLVM context to use to emit diagnostics.
1805 /// \returns an error code indicating the status of the created reader.
1806 ErrorOr
<std::unique_ptr
<SampleProfileReaderItaniumRemapper
>>
1807 SampleProfileReaderItaniumRemapper::create(const std::string Filename
,
1808 SampleProfileReader
&Reader
,
1810 auto BufferOrError
= setupMemoryBuffer(Filename
);
1811 if (std::error_code EC
= BufferOrError
.getError())
1813 return create(BufferOrError
.get(), Reader
, C
);
1816 /// Create a sample profile remapper from the given input, to remap the
1817 /// function names in the given profile data.
1819 /// \param B The memory buffer to create the reader from (assumes ownership).
1821 /// \param C The LLVM context to use to emit diagnostics.
1823 /// \param Reader The profile reader the remapper is going to be applied to.
1825 /// \returns an error code indicating the status of the created reader.
1826 ErrorOr
<std::unique_ptr
<SampleProfileReaderItaniumRemapper
>>
1827 SampleProfileReaderItaniumRemapper::create(std::unique_ptr
<MemoryBuffer
> &B
,
1828 SampleProfileReader
&Reader
,
1830 auto Remappings
= std::make_unique
<SymbolRemappingReader
>();
1831 if (Error E
= Remappings
->read(*B
)) {
1833 std::move(E
), [&](const SymbolRemappingParseError
&ParseError
) {
1834 C
.diagnose(DiagnosticInfoSampleProfile(B
->getBufferIdentifier(),
1835 ParseError
.getLineNum(),
1836 ParseError
.getMessage()));
1838 return sampleprof_error::malformed
;
1841 return std::make_unique
<SampleProfileReaderItaniumRemapper
>(
1842 std::move(B
), std::move(Remappings
), Reader
);
1845 /// Create a sample profile reader based on the format of the input data.
1847 /// \param B The memory buffer to create the reader from (assumes ownership).
1849 /// \param C The LLVM context to use to emit diagnostics.
1851 /// \param P The FSDiscriminatorPass.
1853 /// \param RemapFilename The file used for profile remapping.
1855 /// \returns an error code indicating the status of the created reader.
1856 ErrorOr
<std::unique_ptr
<SampleProfileReader
>>
1857 SampleProfileReader::create(std::unique_ptr
<MemoryBuffer
> &B
, LLVMContext
&C
,
1858 FSDiscriminatorPass P
,
1859 const std::string RemapFilename
) {
1860 std::unique_ptr
<SampleProfileReader
> Reader
;
1861 if (SampleProfileReaderRawBinary::hasFormat(*B
))
1862 Reader
.reset(new SampleProfileReaderRawBinary(std::move(B
), C
));
1863 else if (SampleProfileReaderExtBinary::hasFormat(*B
))
1864 Reader
.reset(new SampleProfileReaderExtBinary(std::move(B
), C
));
1865 else if (SampleProfileReaderCompactBinary::hasFormat(*B
))
1866 Reader
.reset(new SampleProfileReaderCompactBinary(std::move(B
), C
));
1867 else if (SampleProfileReaderGCC::hasFormat(*B
))
1868 Reader
.reset(new SampleProfileReaderGCC(std::move(B
), C
));
1869 else if (SampleProfileReaderText::hasFormat(*B
))
1870 Reader
.reset(new SampleProfileReaderText(std::move(B
), C
));
1872 return sampleprof_error::unrecognized_format
;
1874 if (!RemapFilename
.empty()) {
1876 SampleProfileReaderItaniumRemapper::create(RemapFilename
, *Reader
, C
);
1877 if (std::error_code EC
= ReaderOrErr
.getError()) {
1878 std::string Msg
= "Could not create remapper: " + EC
.message();
1879 C
.diagnose(DiagnosticInfoSampleProfile(RemapFilename
, Msg
));
1882 Reader
->Remapper
= std::move(ReaderOrErr
.get());
1885 if (std::error_code EC
= Reader
->readHeader()) {
1889 Reader
->setDiscriminatorMaskedBitFrom(P
);
1891 return std::move(Reader
);
1894 // For text and GCC file formats, we compute the summary after reading the
1895 // profile. Binary format has the profile summary in its header.
1896 void SampleProfileReader::computeSummary() {
1897 SampleProfileSummaryBuilder
Builder(ProfileSummaryBuilder::DefaultCutoffs
);
1898 Summary
= Builder
.computeSummaryForProfiles(Profiles
);