[clang][extract-api] Emit "navigator" property of "name" in SymbolGraph
[llvm-project.git] / llvm / lib / ProfileData / SampleProfReader.cpp
blobde1cdc6bce6ae370459040113da67bee61c365f4
1 //===- SampleProfReader.cpp - Read LLVM sample profile data ---------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements the class that reads LLVM sample profiles. It
10 // supports three file formats: text, binary and gcov.
12 // The textual representation is useful for debugging and testing purposes. The
13 // binary representation is more compact, resulting in smaller file sizes.
15 // The gcov encoding is the one generated by GCC's AutoFDO profile creation
16 // tool (https://github.com/google/autofdo)
18 // All three encodings can be used interchangeably as an input sample profile.
20 //===----------------------------------------------------------------------===//
22 #include "llvm/ProfileData/SampleProfReader.h"
23 #include "llvm/ADT/DenseMap.h"
24 #include "llvm/ADT/STLExtras.h"
25 #include "llvm/ADT/StringRef.h"
26 #include "llvm/IR/Module.h"
27 #include "llvm/IR/ProfileSummary.h"
28 #include "llvm/ProfileData/ProfileCommon.h"
29 #include "llvm/ProfileData/SampleProf.h"
30 #include "llvm/Support/CommandLine.h"
31 #include "llvm/Support/Compression.h"
32 #include "llvm/Support/ErrorOr.h"
33 #include "llvm/Support/LEB128.h"
34 #include "llvm/Support/LineIterator.h"
35 #include "llvm/Support/MD5.h"
36 #include "llvm/Support/MemoryBuffer.h"
37 #include "llvm/Support/raw_ostream.h"
38 #include <algorithm>
39 #include <cstddef>
40 #include <cstdint>
41 #include <limits>
42 #include <memory>
43 #include <system_error>
44 #include <vector>
46 using namespace llvm;
47 using namespace sampleprof;
49 #define DEBUG_TYPE "samplepgo-reader"
51 // This internal option specifies if the profile uses FS discriminators.
52 // It only applies to text, binary and compact binary format profiles.
53 // For ext-binary format profiles, the flag is set in the summary.
54 static cl::opt<bool> ProfileIsFSDisciminator(
55 "profile-isfs", cl::Hidden, cl::init(false),
56 cl::desc("Profile uses flow sensitive discriminators"));
58 /// Dump the function profile for \p FName.
59 ///
60 /// \param FContext Name + context of the function to print.
61 /// \param OS Stream to emit the output to.
62 void SampleProfileReader::dumpFunctionProfile(SampleContext FContext,
63 raw_ostream &OS) {
64 OS << "Function: " << FContext.toString() << ": " << Profiles[FContext];
67 /// Dump all the function profiles found on stream \p OS.
68 void SampleProfileReader::dump(raw_ostream &OS) {
69 std::vector<NameFunctionSamples> V;
70 sortFuncProfiles(Profiles, V);
71 for (const auto &I : V)
72 dumpFunctionProfile(I.first, OS);
75 /// Parse \p Input as function head.
76 ///
77 /// Parse one line of \p Input, and update function name in \p FName,
78 /// function's total sample count in \p NumSamples, function's entry
79 /// count in \p NumHeadSamples.
80 ///
81 /// \returns true if parsing is successful.
82 static bool ParseHead(const StringRef &Input, StringRef &FName,
83 uint64_t &NumSamples, uint64_t &NumHeadSamples) {
84 if (Input[0] == ' ')
85 return false;
86 size_t n2 = Input.rfind(':');
87 size_t n1 = Input.rfind(':', n2 - 1);
88 FName = Input.substr(0, n1);
89 if (Input.substr(n1 + 1, n2 - n1 - 1).getAsInteger(10, NumSamples))
90 return false;
91 if (Input.substr(n2 + 1).getAsInteger(10, NumHeadSamples))
92 return false;
93 return true;
96 /// Returns true if line offset \p L is legal (only has 16 bits).
97 static bool isOffsetLegal(unsigned L) { return (L & 0xffff) == L; }
99 /// Parse \p Input that contains metadata.
100 /// Possible metadata:
101 /// - CFG Checksum information:
102 /// !CFGChecksum: 12345
103 /// - CFG Checksum information:
104 /// !Attributes: 1
105 /// Stores the FunctionHash (a.k.a. CFG Checksum) into \p FunctionHash.
106 static bool parseMetadata(const StringRef &Input, uint64_t &FunctionHash,
107 uint32_t &Attributes) {
108 if (Input.startswith("!CFGChecksum:")) {
109 StringRef CFGInfo = Input.substr(strlen("!CFGChecksum:")).trim();
110 return !CFGInfo.getAsInteger(10, FunctionHash);
113 if (Input.startswith("!Attributes:")) {
114 StringRef Attrib = Input.substr(strlen("!Attributes:")).trim();
115 return !Attrib.getAsInteger(10, Attributes);
118 return false;
121 enum class LineType {
122 CallSiteProfile,
123 BodyProfile,
124 Metadata,
127 /// Parse \p Input as line sample.
129 /// \param Input input line.
130 /// \param LineTy Type of this line.
131 /// \param Depth the depth of the inline stack.
132 /// \param NumSamples total samples of the line/inlined callsite.
133 /// \param LineOffset line offset to the start of the function.
134 /// \param Discriminator discriminator of the line.
135 /// \param TargetCountMap map from indirect call target to count.
136 /// \param FunctionHash the function's CFG hash, used by pseudo probe.
138 /// returns true if parsing is successful.
139 static bool ParseLine(const StringRef &Input, LineType &LineTy, uint32_t &Depth,
140 uint64_t &NumSamples, uint32_t &LineOffset,
141 uint32_t &Discriminator, StringRef &CalleeName,
142 DenseMap<StringRef, uint64_t> &TargetCountMap,
143 uint64_t &FunctionHash, uint32_t &Attributes) {
144 for (Depth = 0; Input[Depth] == ' '; Depth++)
146 if (Depth == 0)
147 return false;
149 if (Input[Depth] == '!') {
150 LineTy = LineType::Metadata;
151 return parseMetadata(Input.substr(Depth), FunctionHash, Attributes);
154 size_t n1 = Input.find(':');
155 StringRef Loc = Input.substr(Depth, n1 - Depth);
156 size_t n2 = Loc.find('.');
157 if (n2 == StringRef::npos) {
158 if (Loc.getAsInteger(10, LineOffset) || !isOffsetLegal(LineOffset))
159 return false;
160 Discriminator = 0;
161 } else {
162 if (Loc.substr(0, n2).getAsInteger(10, LineOffset))
163 return false;
164 if (Loc.substr(n2 + 1).getAsInteger(10, Discriminator))
165 return false;
168 StringRef Rest = Input.substr(n1 + 2);
169 if (isDigit(Rest[0])) {
170 LineTy = LineType::BodyProfile;
171 size_t n3 = Rest.find(' ');
172 if (n3 == StringRef::npos) {
173 if (Rest.getAsInteger(10, NumSamples))
174 return false;
175 } else {
176 if (Rest.substr(0, n3).getAsInteger(10, NumSamples))
177 return false;
179 // Find call targets and their sample counts.
180 // Note: In some cases, there are symbols in the profile which are not
181 // mangled. To accommodate such cases, use colon + integer pairs as the
182 // anchor points.
183 // An example:
184 // _M_construct<char *>:1000 string_view<std::allocator<char> >:437
185 // ":1000" and ":437" are used as anchor points so the string above will
186 // be interpreted as
187 // target: _M_construct<char *>
188 // count: 1000
189 // target: string_view<std::allocator<char> >
190 // count: 437
191 while (n3 != StringRef::npos) {
192 n3 += Rest.substr(n3).find_first_not_of(' ');
193 Rest = Rest.substr(n3);
194 n3 = Rest.find_first_of(':');
195 if (n3 == StringRef::npos || n3 == 0)
196 return false;
198 StringRef Target;
199 uint64_t count, n4;
200 while (true) {
201 // Get the segment after the current colon.
202 StringRef AfterColon = Rest.substr(n3 + 1);
203 // Get the target symbol before the current colon.
204 Target = Rest.substr(0, n3);
205 // Check if the word after the current colon is an integer.
206 n4 = AfterColon.find_first_of(' ');
207 n4 = (n4 != StringRef::npos) ? n3 + n4 + 1 : Rest.size();
208 StringRef WordAfterColon = Rest.substr(n3 + 1, n4 - n3 - 1);
209 if (!WordAfterColon.getAsInteger(10, count))
210 break;
212 // Try to find the next colon.
213 uint64_t n5 = AfterColon.find_first_of(':');
214 if (n5 == StringRef::npos)
215 return false;
216 n3 += n5 + 1;
219 // An anchor point is found. Save the {target, count} pair
220 TargetCountMap[Target] = count;
221 if (n4 == Rest.size())
222 break;
223 // Change n3 to the next blank space after colon + integer pair.
224 n3 = n4;
226 } else {
227 LineTy = LineType::CallSiteProfile;
228 size_t n3 = Rest.find_last_of(':');
229 CalleeName = Rest.substr(0, n3);
230 if (Rest.substr(n3 + 1).getAsInteger(10, NumSamples))
231 return false;
233 return true;
236 /// Load samples from a text file.
238 /// See the documentation at the top of the file for an explanation of
239 /// the expected format.
241 /// \returns true if the file was loaded successfully, false otherwise.
242 std::error_code SampleProfileReaderText::readImpl() {
243 line_iterator LineIt(*Buffer, /*SkipBlanks=*/true, '#');
244 sampleprof_error Result = sampleprof_error::success;
246 InlineCallStack InlineStack;
247 uint32_t TopLevelProbeProfileCount = 0;
249 // DepthMetadata tracks whether we have processed metadata for the current
250 // top-level or nested function profile.
251 uint32_t DepthMetadata = 0;
253 ProfileIsFS = ProfileIsFSDisciminator;
254 FunctionSamples::ProfileIsFS = ProfileIsFS;
255 for (; !LineIt.is_at_eof(); ++LineIt) {
256 if ((*LineIt)[(*LineIt).find_first_not_of(' ')] == '#')
257 continue;
258 // Read the header of each function.
260 // Note that for function identifiers we are actually expecting
261 // mangled names, but we may not always get them. This happens when
262 // the compiler decides not to emit the function (e.g., it was inlined
263 // and removed). In this case, the binary will not have the linkage
264 // name for the function, so the profiler will emit the function's
265 // unmangled name, which may contain characters like ':' and '>' in its
266 // name (member functions, templates, etc).
268 // The only requirement we place on the identifier, then, is that it
269 // should not begin with a number.
270 if ((*LineIt)[0] != ' ') {
271 uint64_t NumSamples, NumHeadSamples;
272 StringRef FName;
273 if (!ParseHead(*LineIt, FName, NumSamples, NumHeadSamples)) {
274 reportError(LineIt.line_number(),
275 "Expected 'mangled_name:NUM:NUM', found " + *LineIt);
276 return sampleprof_error::malformed;
278 DepthMetadata = 0;
279 SampleContext FContext(FName, CSNameTable);
280 if (FContext.hasContext())
281 ++CSProfileCount;
282 Profiles[FContext] = FunctionSamples();
283 FunctionSamples &FProfile = Profiles[FContext];
284 FProfile.setContext(FContext);
285 MergeResult(Result, FProfile.addTotalSamples(NumSamples));
286 MergeResult(Result, FProfile.addHeadSamples(NumHeadSamples));
287 InlineStack.clear();
288 InlineStack.push_back(&FProfile);
289 } else {
290 uint64_t NumSamples;
291 StringRef FName;
292 DenseMap<StringRef, uint64_t> TargetCountMap;
293 uint32_t Depth, LineOffset, Discriminator;
294 LineType LineTy;
295 uint64_t FunctionHash = 0;
296 uint32_t Attributes = 0;
297 if (!ParseLine(*LineIt, LineTy, Depth, NumSamples, LineOffset,
298 Discriminator, FName, TargetCountMap, FunctionHash,
299 Attributes)) {
300 reportError(LineIt.line_number(),
301 "Expected 'NUM[.NUM]: NUM[ mangled_name:NUM]*', found " +
302 *LineIt);
303 return sampleprof_error::malformed;
305 if (LineTy != LineType::Metadata && Depth == DepthMetadata) {
306 // Metadata must be put at the end of a function profile.
307 reportError(LineIt.line_number(),
308 "Found non-metadata after metadata: " + *LineIt);
309 return sampleprof_error::malformed;
312 // Here we handle FS discriminators.
313 Discriminator &= getDiscriminatorMask();
315 while (InlineStack.size() > Depth) {
316 InlineStack.pop_back();
318 switch (LineTy) {
319 case LineType::CallSiteProfile: {
320 FunctionSamples &FSamples = InlineStack.back()->functionSamplesAt(
321 LineLocation(LineOffset, Discriminator))[std::string(FName)];
322 FSamples.setName(FName);
323 MergeResult(Result, FSamples.addTotalSamples(NumSamples));
324 InlineStack.push_back(&FSamples);
325 DepthMetadata = 0;
326 break;
328 case LineType::BodyProfile: {
329 while (InlineStack.size() > Depth) {
330 InlineStack.pop_back();
332 FunctionSamples &FProfile = *InlineStack.back();
333 for (const auto &name_count : TargetCountMap) {
334 MergeResult(Result, FProfile.addCalledTargetSamples(
335 LineOffset, Discriminator, name_count.first,
336 name_count.second));
338 MergeResult(Result, FProfile.addBodySamples(LineOffset, Discriminator,
339 NumSamples));
340 break;
342 case LineType::Metadata: {
343 FunctionSamples &FProfile = *InlineStack.back();
344 if (FunctionHash) {
345 FProfile.setFunctionHash(FunctionHash);
346 if (Depth == 1)
347 ++TopLevelProbeProfileCount;
349 FProfile.getContext().setAllAttributes(Attributes);
350 if (Attributes & (uint32_t)ContextShouldBeInlined)
351 ProfileIsCSNested = true;
352 DepthMetadata = Depth;
353 break;
359 assert((CSProfileCount == 0 || CSProfileCount == Profiles.size()) &&
360 "Cannot have both context-sensitive and regular profile");
361 ProfileIsCSFlat = (CSProfileCount > 0);
362 assert((TopLevelProbeProfileCount == 0 ||
363 TopLevelProbeProfileCount == Profiles.size()) &&
364 "Cannot have both probe-based profiles and regular profiles");
365 ProfileIsProbeBased = (TopLevelProbeProfileCount > 0);
366 FunctionSamples::ProfileIsProbeBased = ProfileIsProbeBased;
367 FunctionSamples::ProfileIsCSFlat = ProfileIsCSFlat;
368 FunctionSamples::ProfileIsCSNested = ProfileIsCSNested;
370 if (Result == sampleprof_error::success)
371 computeSummary();
373 return Result;
376 bool SampleProfileReaderText::hasFormat(const MemoryBuffer &Buffer) {
377 bool result = false;
379 // Check that the first non-comment line is a valid function header.
380 line_iterator LineIt(Buffer, /*SkipBlanks=*/true, '#');
381 if (!LineIt.is_at_eof()) {
382 if ((*LineIt)[0] != ' ') {
383 uint64_t NumSamples, NumHeadSamples;
384 StringRef FName;
385 result = ParseHead(*LineIt, FName, NumSamples, NumHeadSamples);
389 return result;
392 template <typename T> ErrorOr<T> SampleProfileReaderBinary::readNumber() {
393 unsigned NumBytesRead = 0;
394 std::error_code EC;
395 uint64_t Val = decodeULEB128(Data, &NumBytesRead);
397 if (Val > std::numeric_limits<T>::max())
398 EC = sampleprof_error::malformed;
399 else if (Data + NumBytesRead > End)
400 EC = sampleprof_error::truncated;
401 else
402 EC = sampleprof_error::success;
404 if (EC) {
405 reportError(0, EC.message());
406 return EC;
409 Data += NumBytesRead;
410 return static_cast<T>(Val);
413 ErrorOr<StringRef> SampleProfileReaderBinary::readString() {
414 std::error_code EC;
415 StringRef Str(reinterpret_cast<const char *>(Data));
416 if (Data + Str.size() + 1 > End) {
417 EC = sampleprof_error::truncated;
418 reportError(0, EC.message());
419 return EC;
422 Data += Str.size() + 1;
423 return Str;
426 template <typename T>
427 ErrorOr<T> SampleProfileReaderBinary::readUnencodedNumber() {
428 std::error_code EC;
430 if (Data + sizeof(T) > End) {
431 EC = sampleprof_error::truncated;
432 reportError(0, EC.message());
433 return EC;
436 using namespace support;
437 T Val = endian::readNext<T, little, unaligned>(Data);
438 return Val;
441 template <typename T>
442 inline ErrorOr<uint32_t> SampleProfileReaderBinary::readStringIndex(T &Table) {
443 std::error_code EC;
444 auto Idx = readNumber<uint32_t>();
445 if (std::error_code EC = Idx.getError())
446 return EC;
447 if (*Idx >= Table.size())
448 return sampleprof_error::truncated_name_table;
449 return *Idx;
452 ErrorOr<StringRef> SampleProfileReaderBinary::readStringFromTable() {
453 auto Idx = readStringIndex(NameTable);
454 if (std::error_code EC = Idx.getError())
455 return EC;
457 return NameTable[*Idx];
460 ErrorOr<SampleContext> SampleProfileReaderBinary::readSampleContextFromTable() {
461 auto FName(readStringFromTable());
462 if (std::error_code EC = FName.getError())
463 return EC;
464 return SampleContext(*FName);
467 ErrorOr<StringRef> SampleProfileReaderExtBinaryBase::readStringFromTable() {
468 if (!FixedLengthMD5)
469 return SampleProfileReaderBinary::readStringFromTable();
471 // read NameTable index.
472 auto Idx = readStringIndex(NameTable);
473 if (std::error_code EC = Idx.getError())
474 return EC;
476 // Check whether the name to be accessed has been accessed before,
477 // if not, read it from memory directly.
478 StringRef &SR = NameTable[*Idx];
479 if (SR.empty()) {
480 const uint8_t *SavedData = Data;
481 Data = MD5NameMemStart + ((*Idx) * sizeof(uint64_t));
482 auto FID = readUnencodedNumber<uint64_t>();
483 if (std::error_code EC = FID.getError())
484 return EC;
485 // Save the string converted from uint64_t in MD5StringBuf. All the
486 // references to the name are all StringRefs refering to the string
487 // in MD5StringBuf.
488 MD5StringBuf->push_back(std::to_string(*FID));
489 SR = MD5StringBuf->back();
490 Data = SavedData;
492 return SR;
495 ErrorOr<StringRef> SampleProfileReaderCompactBinary::readStringFromTable() {
496 auto Idx = readStringIndex(NameTable);
497 if (std::error_code EC = Idx.getError())
498 return EC;
500 return StringRef(NameTable[*Idx]);
503 std::error_code
504 SampleProfileReaderBinary::readProfile(FunctionSamples &FProfile) {
505 auto NumSamples = readNumber<uint64_t>();
506 if (std::error_code EC = NumSamples.getError())
507 return EC;
508 FProfile.addTotalSamples(*NumSamples);
510 // Read the samples in the body.
511 auto NumRecords = readNumber<uint32_t>();
512 if (std::error_code EC = NumRecords.getError())
513 return EC;
515 for (uint32_t I = 0; I < *NumRecords; ++I) {
516 auto LineOffset = readNumber<uint64_t>();
517 if (std::error_code EC = LineOffset.getError())
518 return EC;
520 if (!isOffsetLegal(*LineOffset)) {
521 return std::error_code();
524 auto Discriminator = readNumber<uint64_t>();
525 if (std::error_code EC = Discriminator.getError())
526 return EC;
528 auto NumSamples = readNumber<uint64_t>();
529 if (std::error_code EC = NumSamples.getError())
530 return EC;
532 auto NumCalls = readNumber<uint32_t>();
533 if (std::error_code EC = NumCalls.getError())
534 return EC;
536 // Here we handle FS discriminators:
537 uint32_t DiscriminatorVal = (*Discriminator) & getDiscriminatorMask();
539 for (uint32_t J = 0; J < *NumCalls; ++J) {
540 auto CalledFunction(readStringFromTable());
541 if (std::error_code EC = CalledFunction.getError())
542 return EC;
544 auto CalledFunctionSamples = readNumber<uint64_t>();
545 if (std::error_code EC = CalledFunctionSamples.getError())
546 return EC;
548 FProfile.addCalledTargetSamples(*LineOffset, DiscriminatorVal,
549 *CalledFunction, *CalledFunctionSamples);
552 FProfile.addBodySamples(*LineOffset, DiscriminatorVal, *NumSamples);
555 // Read all the samples for inlined function calls.
556 auto NumCallsites = readNumber<uint32_t>();
557 if (std::error_code EC = NumCallsites.getError())
558 return EC;
560 for (uint32_t J = 0; J < *NumCallsites; ++J) {
561 auto LineOffset = readNumber<uint64_t>();
562 if (std::error_code EC = LineOffset.getError())
563 return EC;
565 auto Discriminator = readNumber<uint64_t>();
566 if (std::error_code EC = Discriminator.getError())
567 return EC;
569 auto FName(readStringFromTable());
570 if (std::error_code EC = FName.getError())
571 return EC;
573 // Here we handle FS discriminators:
574 uint32_t DiscriminatorVal = (*Discriminator) & getDiscriminatorMask();
576 FunctionSamples &CalleeProfile = FProfile.functionSamplesAt(
577 LineLocation(*LineOffset, DiscriminatorVal))[std::string(*FName)];
578 CalleeProfile.setName(*FName);
579 if (std::error_code EC = readProfile(CalleeProfile))
580 return EC;
583 return sampleprof_error::success;
586 std::error_code
587 SampleProfileReaderBinary::readFuncProfile(const uint8_t *Start) {
588 Data = Start;
589 auto NumHeadSamples = readNumber<uint64_t>();
590 if (std::error_code EC = NumHeadSamples.getError())
591 return EC;
593 ErrorOr<SampleContext> FContext(readSampleContextFromTable());
594 if (std::error_code EC = FContext.getError())
595 return EC;
597 Profiles[*FContext] = FunctionSamples();
598 FunctionSamples &FProfile = Profiles[*FContext];
599 FProfile.setContext(*FContext);
600 FProfile.addHeadSamples(*NumHeadSamples);
602 if (FContext->hasContext())
603 CSProfileCount++;
605 if (std::error_code EC = readProfile(FProfile))
606 return EC;
607 return sampleprof_error::success;
610 std::error_code SampleProfileReaderBinary::readImpl() {
611 ProfileIsFS = ProfileIsFSDisciminator;
612 FunctionSamples::ProfileIsFS = ProfileIsFS;
613 while (!at_eof()) {
614 if (std::error_code EC = readFuncProfile(Data))
615 return EC;
618 return sampleprof_error::success;
621 ErrorOr<SampleContextFrames>
622 SampleProfileReaderExtBinaryBase::readContextFromTable() {
623 auto ContextIdx = readNumber<uint32_t>();
624 if (std::error_code EC = ContextIdx.getError())
625 return EC;
626 if (*ContextIdx >= CSNameTable->size())
627 return sampleprof_error::truncated_name_table;
628 return (*CSNameTable)[*ContextIdx];
631 ErrorOr<SampleContext>
632 SampleProfileReaderExtBinaryBase::readSampleContextFromTable() {
633 if (ProfileIsCSFlat) {
634 auto FContext(readContextFromTable());
635 if (std::error_code EC = FContext.getError())
636 return EC;
637 return SampleContext(*FContext);
638 } else {
639 auto FName(readStringFromTable());
640 if (std::error_code EC = FName.getError())
641 return EC;
642 return SampleContext(*FName);
646 std::error_code SampleProfileReaderExtBinaryBase::readOneSection(
647 const uint8_t *Start, uint64_t Size, const SecHdrTableEntry &Entry) {
648 Data = Start;
649 End = Start + Size;
650 switch (Entry.Type) {
651 case SecProfSummary:
652 if (std::error_code EC = readSummary())
653 return EC;
654 if (hasSecFlag(Entry, SecProfSummaryFlags::SecFlagPartial))
655 Summary->setPartialProfile(true);
656 if (hasSecFlag(Entry, SecProfSummaryFlags::SecFlagFullContext))
657 FunctionSamples::ProfileIsCSFlat = ProfileIsCSFlat = true;
658 if (hasSecFlag(Entry, SecProfSummaryFlags::SecFlagIsCSNested))
659 FunctionSamples::ProfileIsCSNested = ProfileIsCSNested = true;
660 if (hasSecFlag(Entry, SecProfSummaryFlags::SecFlagFSDiscriminator))
661 FunctionSamples::ProfileIsFS = ProfileIsFS = true;
662 break;
663 case SecNameTable: {
664 FixedLengthMD5 =
665 hasSecFlag(Entry, SecNameTableFlags::SecFlagFixedLengthMD5);
666 bool UseMD5 = hasSecFlag(Entry, SecNameTableFlags::SecFlagMD5Name);
667 assert((!FixedLengthMD5 || UseMD5) &&
668 "If FixedLengthMD5 is true, UseMD5 has to be true");
669 FunctionSamples::HasUniqSuffix =
670 hasSecFlag(Entry, SecNameTableFlags::SecFlagUniqSuffix);
671 if (std::error_code EC = readNameTableSec(UseMD5))
672 return EC;
673 break;
675 case SecCSNameTable: {
676 if (std::error_code EC = readCSNameTableSec())
677 return EC;
678 break;
680 case SecLBRProfile:
681 if (std::error_code EC = readFuncProfiles())
682 return EC;
683 break;
684 case SecFuncOffsetTable:
685 FuncOffsetsOrdered = hasSecFlag(Entry, SecFuncOffsetFlags::SecFlagOrdered);
686 if (std::error_code EC = readFuncOffsetTable())
687 return EC;
688 break;
689 case SecFuncMetadata: {
690 ProfileIsProbeBased =
691 hasSecFlag(Entry, SecFuncMetadataFlags::SecFlagIsProbeBased);
692 FunctionSamples::ProfileIsProbeBased = ProfileIsProbeBased;
693 bool HasAttribute =
694 hasSecFlag(Entry, SecFuncMetadataFlags::SecFlagHasAttribute);
695 if (std::error_code EC = readFuncMetadata(HasAttribute))
696 return EC;
697 break;
699 case SecProfileSymbolList:
700 if (std::error_code EC = readProfileSymbolList())
701 return EC;
702 break;
703 default:
704 if (std::error_code EC = readCustomSection(Entry))
705 return EC;
706 break;
708 return sampleprof_error::success;
711 bool SampleProfileReaderExtBinaryBase::collectFuncsFromModule() {
712 if (!M)
713 return false;
714 FuncsToUse.clear();
715 for (auto &F : *M)
716 FuncsToUse.insert(FunctionSamples::getCanonicalFnName(F));
717 return true;
720 std::error_code SampleProfileReaderExtBinaryBase::readFuncOffsetTable() {
721 // If there are more than one FuncOffsetTable, the profile read associated
722 // with previous FuncOffsetTable has to be done before next FuncOffsetTable
723 // is read.
724 FuncOffsetTable.clear();
726 auto Size = readNumber<uint64_t>();
727 if (std::error_code EC = Size.getError())
728 return EC;
730 FuncOffsetTable.reserve(*Size);
732 if (FuncOffsetsOrdered) {
733 OrderedFuncOffsets =
734 std::make_unique<std::vector<std::pair<SampleContext, uint64_t>>>();
735 OrderedFuncOffsets->reserve(*Size);
738 for (uint32_t I = 0; I < *Size; ++I) {
739 auto FContext(readSampleContextFromTable());
740 if (std::error_code EC = FContext.getError())
741 return EC;
743 auto Offset = readNumber<uint64_t>();
744 if (std::error_code EC = Offset.getError())
745 return EC;
747 FuncOffsetTable[*FContext] = *Offset;
748 if (FuncOffsetsOrdered)
749 OrderedFuncOffsets->emplace_back(*FContext, *Offset);
752 return sampleprof_error::success;
755 std::error_code SampleProfileReaderExtBinaryBase::readFuncProfiles() {
756 // Collect functions used by current module if the Reader has been
757 // given a module.
758 // collectFuncsFromModule uses FunctionSamples::getCanonicalFnName
759 // which will query FunctionSamples::HasUniqSuffix, so it has to be
760 // called after FunctionSamples::HasUniqSuffix is set, i.e. after
761 // NameTable section is read.
762 bool LoadFuncsToBeUsed = collectFuncsFromModule();
764 // When LoadFuncsToBeUsed is false, load all the function profiles.
765 const uint8_t *Start = Data;
766 if (!LoadFuncsToBeUsed) {
767 while (Data < End) {
768 if (std::error_code EC = readFuncProfile(Data))
769 return EC;
771 assert(Data == End && "More data is read than expected");
772 } else {
773 // Load function profiles on demand.
774 if (Remapper) {
775 for (auto Name : FuncsToUse) {
776 Remapper->insert(Name);
780 if (ProfileIsCSFlat) {
781 DenseSet<uint64_t> FuncGuidsToUse;
782 if (useMD5()) {
783 for (auto Name : FuncsToUse)
784 FuncGuidsToUse.insert(Function::getGUID(Name));
787 // For each function in current module, load all context profiles for
788 // the function as well as their callee contexts which can help profile
789 // guided importing for ThinLTO. This can be achieved by walking
790 // through an ordered context container, where contexts are laid out
791 // as if they were walked in preorder of a context trie. While
792 // traversing the trie, a link to the highest common ancestor node is
793 // kept so that all of its decendants will be loaded.
794 assert(OrderedFuncOffsets.get() &&
795 "func offset table should always be sorted in CS profile");
796 const SampleContext *CommonContext = nullptr;
797 for (const auto &NameOffset : *OrderedFuncOffsets) {
798 const auto &FContext = NameOffset.first;
799 auto FName = FContext.getName();
800 // For function in the current module, keep its farthest ancestor
801 // context. This can be used to load itself and its child and
802 // sibling contexts.
803 if ((useMD5() && FuncGuidsToUse.count(std::stoull(FName.data()))) ||
804 (!useMD5() && (FuncsToUse.count(FName) ||
805 (Remapper && Remapper->exist(FName))))) {
806 if (!CommonContext || !CommonContext->IsPrefixOf(FContext))
807 CommonContext = &FContext;
810 if (CommonContext == &FContext ||
811 (CommonContext && CommonContext->IsPrefixOf(FContext))) {
812 // Load profile for the current context which originated from
813 // the common ancestor.
814 const uint8_t *FuncProfileAddr = Start + NameOffset.second;
815 assert(FuncProfileAddr < End && "out of LBRProfile section");
816 if (std::error_code EC = readFuncProfile(FuncProfileAddr))
817 return EC;
820 } else {
821 if (useMD5()) {
822 for (auto Name : FuncsToUse) {
823 auto GUID = std::to_string(MD5Hash(Name));
824 auto iter = FuncOffsetTable.find(StringRef(GUID));
825 if (iter == FuncOffsetTable.end())
826 continue;
827 const uint8_t *FuncProfileAddr = Start + iter->second;
828 assert(FuncProfileAddr < End && "out of LBRProfile section");
829 if (std::error_code EC = readFuncProfile(FuncProfileAddr))
830 return EC;
832 } else {
833 for (auto NameOffset : FuncOffsetTable) {
834 SampleContext FContext(NameOffset.first);
835 auto FuncName = FContext.getName();
836 if (!FuncsToUse.count(FuncName) &&
837 (!Remapper || !Remapper->exist(FuncName)))
838 continue;
839 const uint8_t *FuncProfileAddr = Start + NameOffset.second;
840 assert(FuncProfileAddr < End && "out of LBRProfile section");
841 if (std::error_code EC = readFuncProfile(FuncProfileAddr))
842 return EC;
846 Data = End;
848 assert((CSProfileCount == 0 || CSProfileCount == Profiles.size()) &&
849 "Cannot have both context-sensitive and regular profile");
850 assert((!CSProfileCount || ProfileIsCSFlat) &&
851 "Section flag should be consistent with actual profile");
852 return sampleprof_error::success;
855 std::error_code SampleProfileReaderExtBinaryBase::readProfileSymbolList() {
856 if (!ProfSymList)
857 ProfSymList = std::make_unique<ProfileSymbolList>();
859 if (std::error_code EC = ProfSymList->read(Data, End - Data))
860 return EC;
862 Data = End;
863 return sampleprof_error::success;
866 std::error_code SampleProfileReaderExtBinaryBase::decompressSection(
867 const uint8_t *SecStart, const uint64_t SecSize,
868 const uint8_t *&DecompressBuf, uint64_t &DecompressBufSize) {
869 Data = SecStart;
870 End = SecStart + SecSize;
871 auto DecompressSize = readNumber<uint64_t>();
872 if (std::error_code EC = DecompressSize.getError())
873 return EC;
874 DecompressBufSize = *DecompressSize;
876 auto CompressSize = readNumber<uint64_t>();
877 if (std::error_code EC = CompressSize.getError())
878 return EC;
880 if (!llvm::zlib::isAvailable())
881 return sampleprof_error::zlib_unavailable;
883 StringRef CompressedStrings(reinterpret_cast<const char *>(Data),
884 *CompressSize);
885 char *Buffer = Allocator.Allocate<char>(DecompressBufSize);
886 size_t UCSize = DecompressBufSize;
887 llvm::Error E =
888 zlib::uncompress(CompressedStrings, Buffer, UCSize);
889 if (E)
890 return sampleprof_error::uncompress_failed;
891 DecompressBuf = reinterpret_cast<const uint8_t *>(Buffer);
892 return sampleprof_error::success;
895 std::error_code SampleProfileReaderExtBinaryBase::readImpl() {
896 const uint8_t *BufStart =
897 reinterpret_cast<const uint8_t *>(Buffer->getBufferStart());
899 for (auto &Entry : SecHdrTable) {
900 // Skip empty section.
901 if (!Entry.Size)
902 continue;
904 // Skip sections without context when SkipFlatProf is true.
905 if (SkipFlatProf && hasSecFlag(Entry, SecCommonFlags::SecFlagFlat))
906 continue;
908 const uint8_t *SecStart = BufStart + Entry.Offset;
909 uint64_t SecSize = Entry.Size;
911 // If the section is compressed, decompress it into a buffer
912 // DecompressBuf before reading the actual data. The pointee of
913 // 'Data' will be changed to buffer hold by DecompressBuf
914 // temporarily when reading the actual data.
915 bool isCompressed = hasSecFlag(Entry, SecCommonFlags::SecFlagCompress);
916 if (isCompressed) {
917 const uint8_t *DecompressBuf;
918 uint64_t DecompressBufSize;
919 if (std::error_code EC = decompressSection(
920 SecStart, SecSize, DecompressBuf, DecompressBufSize))
921 return EC;
922 SecStart = DecompressBuf;
923 SecSize = DecompressBufSize;
926 if (std::error_code EC = readOneSection(SecStart, SecSize, Entry))
927 return EC;
928 if (Data != SecStart + SecSize)
929 return sampleprof_error::malformed;
931 // Change the pointee of 'Data' from DecompressBuf to original Buffer.
932 if (isCompressed) {
933 Data = BufStart + Entry.Offset;
934 End = BufStart + Buffer->getBufferSize();
938 return sampleprof_error::success;
941 std::error_code SampleProfileReaderCompactBinary::readImpl() {
942 // Collect functions used by current module if the Reader has been
943 // given a module.
944 bool LoadFuncsToBeUsed = collectFuncsFromModule();
945 ProfileIsFS = ProfileIsFSDisciminator;
946 FunctionSamples::ProfileIsFS = ProfileIsFS;
947 std::vector<uint64_t> OffsetsToUse;
948 if (!LoadFuncsToBeUsed) {
949 // load all the function profiles.
950 for (auto FuncEntry : FuncOffsetTable) {
951 OffsetsToUse.push_back(FuncEntry.second);
953 } else {
954 // load function profiles on demand.
955 for (auto Name : FuncsToUse) {
956 auto GUID = std::to_string(MD5Hash(Name));
957 auto iter = FuncOffsetTable.find(StringRef(GUID));
958 if (iter == FuncOffsetTable.end())
959 continue;
960 OffsetsToUse.push_back(iter->second);
964 for (auto Offset : OffsetsToUse) {
965 const uint8_t *SavedData = Data;
966 if (std::error_code EC = readFuncProfile(
967 reinterpret_cast<const uint8_t *>(Buffer->getBufferStart()) +
968 Offset))
969 return EC;
970 Data = SavedData;
972 return sampleprof_error::success;
975 std::error_code SampleProfileReaderRawBinary::verifySPMagic(uint64_t Magic) {
976 if (Magic == SPMagic())
977 return sampleprof_error::success;
978 return sampleprof_error::bad_magic;
981 std::error_code SampleProfileReaderExtBinary::verifySPMagic(uint64_t Magic) {
982 if (Magic == SPMagic(SPF_Ext_Binary))
983 return sampleprof_error::success;
984 return sampleprof_error::bad_magic;
987 std::error_code
988 SampleProfileReaderCompactBinary::verifySPMagic(uint64_t Magic) {
989 if (Magic == SPMagic(SPF_Compact_Binary))
990 return sampleprof_error::success;
991 return sampleprof_error::bad_magic;
994 std::error_code SampleProfileReaderBinary::readNameTable() {
995 auto Size = readNumber<uint32_t>();
996 if (std::error_code EC = Size.getError())
997 return EC;
998 NameTable.reserve(*Size + NameTable.size());
999 for (uint32_t I = 0; I < *Size; ++I) {
1000 auto Name(readString());
1001 if (std::error_code EC = Name.getError())
1002 return EC;
1003 NameTable.push_back(*Name);
1006 return sampleprof_error::success;
1009 std::error_code SampleProfileReaderExtBinaryBase::readMD5NameTable() {
1010 auto Size = readNumber<uint64_t>();
1011 if (std::error_code EC = Size.getError())
1012 return EC;
1013 MD5StringBuf = std::make_unique<std::vector<std::string>>();
1014 MD5StringBuf->reserve(*Size);
1015 if (FixedLengthMD5) {
1016 // Preallocate and initialize NameTable so we can check whether a name
1017 // index has been read before by checking whether the element in the
1018 // NameTable is empty, meanwhile readStringIndex can do the boundary
1019 // check using the size of NameTable.
1020 NameTable.resize(*Size + NameTable.size());
1022 MD5NameMemStart = Data;
1023 Data = Data + (*Size) * sizeof(uint64_t);
1024 return sampleprof_error::success;
1026 NameTable.reserve(*Size);
1027 for (uint32_t I = 0; I < *Size; ++I) {
1028 auto FID = readNumber<uint64_t>();
1029 if (std::error_code EC = FID.getError())
1030 return EC;
1031 MD5StringBuf->push_back(std::to_string(*FID));
1032 // NameTable is a vector of StringRef. Here it is pushing back a
1033 // StringRef initialized with the last string in MD5stringBuf.
1034 NameTable.push_back(MD5StringBuf->back());
1036 return sampleprof_error::success;
1039 std::error_code SampleProfileReaderExtBinaryBase::readNameTableSec(bool IsMD5) {
1040 if (IsMD5)
1041 return readMD5NameTable();
1042 return SampleProfileReaderBinary::readNameTable();
1045 // Read in the CS name table section, which basically contains a list of context
1046 // vectors. Each element of a context vector, aka a frame, refers to the
1047 // underlying raw function names that are stored in the name table, as well as
1048 // a callsite identifier that only makes sense for non-leaf frames.
1049 std::error_code SampleProfileReaderExtBinaryBase::readCSNameTableSec() {
1050 auto Size = readNumber<uint32_t>();
1051 if (std::error_code EC = Size.getError())
1052 return EC;
1054 std::vector<SampleContextFrameVector> *PNameVec =
1055 new std::vector<SampleContextFrameVector>();
1056 PNameVec->reserve(*Size);
1057 for (uint32_t I = 0; I < *Size; ++I) {
1058 PNameVec->emplace_back(SampleContextFrameVector());
1059 auto ContextSize = readNumber<uint32_t>();
1060 if (std::error_code EC = ContextSize.getError())
1061 return EC;
1062 for (uint32_t J = 0; J < *ContextSize; ++J) {
1063 auto FName(readStringFromTable());
1064 if (std::error_code EC = FName.getError())
1065 return EC;
1066 auto LineOffset = readNumber<uint64_t>();
1067 if (std::error_code EC = LineOffset.getError())
1068 return EC;
1070 if (!isOffsetLegal(*LineOffset))
1071 return std::error_code();
1073 auto Discriminator = readNumber<uint64_t>();
1074 if (std::error_code EC = Discriminator.getError())
1075 return EC;
1077 PNameVec->back().emplace_back(
1078 FName.get(), LineLocation(LineOffset.get(), Discriminator.get()));
1082 // From this point the underlying object of CSNameTable should be immutable.
1083 CSNameTable.reset(PNameVec);
1084 return sampleprof_error::success;
1087 std::error_code
1089 SampleProfileReaderExtBinaryBase::readFuncMetadata(bool ProfileHasAttribute,
1090 FunctionSamples *FProfile) {
1091 if (Data < End) {
1092 if (ProfileIsProbeBased) {
1093 auto Checksum = readNumber<uint64_t>();
1094 if (std::error_code EC = Checksum.getError())
1095 return EC;
1096 if (FProfile)
1097 FProfile->setFunctionHash(*Checksum);
1100 if (ProfileHasAttribute) {
1101 auto Attributes = readNumber<uint32_t>();
1102 if (std::error_code EC = Attributes.getError())
1103 return EC;
1104 if (FProfile)
1105 FProfile->getContext().setAllAttributes(*Attributes);
1108 if (!ProfileIsCSFlat) {
1109 // Read all the attributes for inlined function calls.
1110 auto NumCallsites = readNumber<uint32_t>();
1111 if (std::error_code EC = NumCallsites.getError())
1112 return EC;
1114 for (uint32_t J = 0; J < *NumCallsites; ++J) {
1115 auto LineOffset = readNumber<uint64_t>();
1116 if (std::error_code EC = LineOffset.getError())
1117 return EC;
1119 auto Discriminator = readNumber<uint64_t>();
1120 if (std::error_code EC = Discriminator.getError())
1121 return EC;
1123 auto FContext(readSampleContextFromTable());
1124 if (std::error_code EC = FContext.getError())
1125 return EC;
1127 FunctionSamples *CalleeProfile = nullptr;
1128 if (FProfile) {
1129 CalleeProfile = const_cast<FunctionSamples *>(
1130 &FProfile->functionSamplesAt(LineLocation(
1131 *LineOffset,
1132 *Discriminator))[std::string(FContext.get().getName())]);
1134 if (std::error_code EC =
1135 readFuncMetadata(ProfileHasAttribute, CalleeProfile))
1136 return EC;
1141 return sampleprof_error::success;
1144 std::error_code
1145 SampleProfileReaderExtBinaryBase::readFuncMetadata(bool ProfileHasAttribute) {
1146 while (Data < End) {
1147 auto FContext(readSampleContextFromTable());
1148 if (std::error_code EC = FContext.getError())
1149 return EC;
1150 FunctionSamples *FProfile = nullptr;
1151 auto It = Profiles.find(*FContext);
1152 if (It != Profiles.end())
1153 FProfile = &It->second;
1155 if (std::error_code EC = readFuncMetadata(ProfileHasAttribute, FProfile))
1156 return EC;
1159 assert(Data == End && "More data is read than expected");
1160 return sampleprof_error::success;
1163 std::error_code SampleProfileReaderCompactBinary::readNameTable() {
1164 auto Size = readNumber<uint64_t>();
1165 if (std::error_code EC = Size.getError())
1166 return EC;
1167 NameTable.reserve(*Size);
1168 for (uint32_t I = 0; I < *Size; ++I) {
1169 auto FID = readNumber<uint64_t>();
1170 if (std::error_code EC = FID.getError())
1171 return EC;
1172 NameTable.push_back(std::to_string(*FID));
1174 return sampleprof_error::success;
1177 std::error_code
1178 SampleProfileReaderExtBinaryBase::readSecHdrTableEntry(uint32_t Idx) {
1179 SecHdrTableEntry Entry;
1180 auto Type = readUnencodedNumber<uint64_t>();
1181 if (std::error_code EC = Type.getError())
1182 return EC;
1183 Entry.Type = static_cast<SecType>(*Type);
1185 auto Flags = readUnencodedNumber<uint64_t>();
1186 if (std::error_code EC = Flags.getError())
1187 return EC;
1188 Entry.Flags = *Flags;
1190 auto Offset = readUnencodedNumber<uint64_t>();
1191 if (std::error_code EC = Offset.getError())
1192 return EC;
1193 Entry.Offset = *Offset;
1195 auto Size = readUnencodedNumber<uint64_t>();
1196 if (std::error_code EC = Size.getError())
1197 return EC;
1198 Entry.Size = *Size;
1200 Entry.LayoutIndex = Idx;
1201 SecHdrTable.push_back(std::move(Entry));
1202 return sampleprof_error::success;
1205 std::error_code SampleProfileReaderExtBinaryBase::readSecHdrTable() {
1206 auto EntryNum = readUnencodedNumber<uint64_t>();
1207 if (std::error_code EC = EntryNum.getError())
1208 return EC;
1210 for (uint32_t i = 0; i < (*EntryNum); i++)
1211 if (std::error_code EC = readSecHdrTableEntry(i))
1212 return EC;
1214 return sampleprof_error::success;
1217 std::error_code SampleProfileReaderExtBinaryBase::readHeader() {
1218 const uint8_t *BufStart =
1219 reinterpret_cast<const uint8_t *>(Buffer->getBufferStart());
1220 Data = BufStart;
1221 End = BufStart + Buffer->getBufferSize();
1223 if (std::error_code EC = readMagicIdent())
1224 return EC;
1226 if (std::error_code EC = readSecHdrTable())
1227 return EC;
1229 return sampleprof_error::success;
1232 uint64_t SampleProfileReaderExtBinaryBase::getSectionSize(SecType Type) {
1233 uint64_t Size = 0;
1234 for (auto &Entry : SecHdrTable) {
1235 if (Entry.Type == Type)
1236 Size += Entry.Size;
1238 return Size;
1241 uint64_t SampleProfileReaderExtBinaryBase::getFileSize() {
1242 // Sections in SecHdrTable is not necessarily in the same order as
1243 // sections in the profile because section like FuncOffsetTable needs
1244 // to be written after section LBRProfile but needs to be read before
1245 // section LBRProfile, so we cannot simply use the last entry in
1246 // SecHdrTable to calculate the file size.
1247 uint64_t FileSize = 0;
1248 for (auto &Entry : SecHdrTable) {
1249 FileSize = std::max(Entry.Offset + Entry.Size, FileSize);
1251 return FileSize;
1254 static std::string getSecFlagsStr(const SecHdrTableEntry &Entry) {
1255 std::string Flags;
1256 if (hasSecFlag(Entry, SecCommonFlags::SecFlagCompress))
1257 Flags.append("{compressed,");
1258 else
1259 Flags.append("{");
1261 if (hasSecFlag(Entry, SecCommonFlags::SecFlagFlat))
1262 Flags.append("flat,");
1264 switch (Entry.Type) {
1265 case SecNameTable:
1266 if (hasSecFlag(Entry, SecNameTableFlags::SecFlagFixedLengthMD5))
1267 Flags.append("fixlenmd5,");
1268 else if (hasSecFlag(Entry, SecNameTableFlags::SecFlagMD5Name))
1269 Flags.append("md5,");
1270 if (hasSecFlag(Entry, SecNameTableFlags::SecFlagUniqSuffix))
1271 Flags.append("uniq,");
1272 break;
1273 case SecProfSummary:
1274 if (hasSecFlag(Entry, SecProfSummaryFlags::SecFlagPartial))
1275 Flags.append("partial,");
1276 if (hasSecFlag(Entry, SecProfSummaryFlags::SecFlagFullContext))
1277 Flags.append("context,");
1278 if (hasSecFlag(Entry, SecProfSummaryFlags::SecFlagIsCSNested))
1279 Flags.append("context-nested,");
1280 if (hasSecFlag(Entry, SecProfSummaryFlags::SecFlagFSDiscriminator))
1281 Flags.append("fs-discriminator,");
1282 break;
1283 case SecFuncOffsetTable:
1284 if (hasSecFlag(Entry, SecFuncOffsetFlags::SecFlagOrdered))
1285 Flags.append("ordered,");
1286 break;
1287 case SecFuncMetadata:
1288 if (hasSecFlag(Entry, SecFuncMetadataFlags::SecFlagIsProbeBased))
1289 Flags.append("probe,");
1290 if (hasSecFlag(Entry, SecFuncMetadataFlags::SecFlagHasAttribute))
1291 Flags.append("attr,");
1292 break;
1293 default:
1294 break;
1296 char &last = Flags.back();
1297 if (last == ',')
1298 last = '}';
1299 else
1300 Flags.append("}");
1301 return Flags;
1304 bool SampleProfileReaderExtBinaryBase::dumpSectionInfo(raw_ostream &OS) {
1305 uint64_t TotalSecsSize = 0;
1306 for (auto &Entry : SecHdrTable) {
1307 OS << getSecName(Entry.Type) << " - Offset: " << Entry.Offset
1308 << ", Size: " << Entry.Size << ", Flags: " << getSecFlagsStr(Entry)
1309 << "\n";
1311 TotalSecsSize += Entry.Size;
1313 uint64_t HeaderSize = SecHdrTable.front().Offset;
1314 assert(HeaderSize + TotalSecsSize == getFileSize() &&
1315 "Size of 'header + sections' doesn't match the total size of profile");
1317 OS << "Header Size: " << HeaderSize << "\n";
1318 OS << "Total Sections Size: " << TotalSecsSize << "\n";
1319 OS << "File Size: " << getFileSize() << "\n";
1320 return true;
1323 std::error_code SampleProfileReaderBinary::readMagicIdent() {
1324 // Read and check the magic identifier.
1325 auto Magic = readNumber<uint64_t>();
1326 if (std::error_code EC = Magic.getError())
1327 return EC;
1328 else if (std::error_code EC = verifySPMagic(*Magic))
1329 return EC;
1331 // Read the version number.
1332 auto Version = readNumber<uint64_t>();
1333 if (std::error_code EC = Version.getError())
1334 return EC;
1335 else if (*Version != SPVersion())
1336 return sampleprof_error::unsupported_version;
1338 return sampleprof_error::success;
1341 std::error_code SampleProfileReaderBinary::readHeader() {
1342 Data = reinterpret_cast<const uint8_t *>(Buffer->getBufferStart());
1343 End = Data + Buffer->getBufferSize();
1345 if (std::error_code EC = readMagicIdent())
1346 return EC;
1348 if (std::error_code EC = readSummary())
1349 return EC;
1351 if (std::error_code EC = readNameTable())
1352 return EC;
1353 return sampleprof_error::success;
1356 std::error_code SampleProfileReaderCompactBinary::readHeader() {
1357 SampleProfileReaderBinary::readHeader();
1358 if (std::error_code EC = readFuncOffsetTable())
1359 return EC;
1360 return sampleprof_error::success;
1363 std::error_code SampleProfileReaderCompactBinary::readFuncOffsetTable() {
1364 auto TableOffset = readUnencodedNumber<uint64_t>();
1365 if (std::error_code EC = TableOffset.getError())
1366 return EC;
1368 const uint8_t *SavedData = Data;
1369 const uint8_t *TableStart =
1370 reinterpret_cast<const uint8_t *>(Buffer->getBufferStart()) +
1371 *TableOffset;
1372 Data = TableStart;
1374 auto Size = readNumber<uint64_t>();
1375 if (std::error_code EC = Size.getError())
1376 return EC;
1378 FuncOffsetTable.reserve(*Size);
1379 for (uint32_t I = 0; I < *Size; ++I) {
1380 auto FName(readStringFromTable());
1381 if (std::error_code EC = FName.getError())
1382 return EC;
1384 auto Offset = readNumber<uint64_t>();
1385 if (std::error_code EC = Offset.getError())
1386 return EC;
1388 FuncOffsetTable[*FName] = *Offset;
1390 End = TableStart;
1391 Data = SavedData;
1392 return sampleprof_error::success;
1395 bool SampleProfileReaderCompactBinary::collectFuncsFromModule() {
1396 if (!M)
1397 return false;
1398 FuncsToUse.clear();
1399 for (auto &F : *M)
1400 FuncsToUse.insert(FunctionSamples::getCanonicalFnName(F));
1401 return true;
1404 std::error_code SampleProfileReaderBinary::readSummaryEntry(
1405 std::vector<ProfileSummaryEntry> &Entries) {
1406 auto Cutoff = readNumber<uint64_t>();
1407 if (std::error_code EC = Cutoff.getError())
1408 return EC;
1410 auto MinBlockCount = readNumber<uint64_t>();
1411 if (std::error_code EC = MinBlockCount.getError())
1412 return EC;
1414 auto NumBlocks = readNumber<uint64_t>();
1415 if (std::error_code EC = NumBlocks.getError())
1416 return EC;
1418 Entries.emplace_back(*Cutoff, *MinBlockCount, *NumBlocks);
1419 return sampleprof_error::success;
1422 std::error_code SampleProfileReaderBinary::readSummary() {
1423 auto TotalCount = readNumber<uint64_t>();
1424 if (std::error_code EC = TotalCount.getError())
1425 return EC;
1427 auto MaxBlockCount = readNumber<uint64_t>();
1428 if (std::error_code EC = MaxBlockCount.getError())
1429 return EC;
1431 auto MaxFunctionCount = readNumber<uint64_t>();
1432 if (std::error_code EC = MaxFunctionCount.getError())
1433 return EC;
1435 auto NumBlocks = readNumber<uint64_t>();
1436 if (std::error_code EC = NumBlocks.getError())
1437 return EC;
1439 auto NumFunctions = readNumber<uint64_t>();
1440 if (std::error_code EC = NumFunctions.getError())
1441 return EC;
1443 auto NumSummaryEntries = readNumber<uint64_t>();
1444 if (std::error_code EC = NumSummaryEntries.getError())
1445 return EC;
1447 std::vector<ProfileSummaryEntry> Entries;
1448 for (unsigned i = 0; i < *NumSummaryEntries; i++) {
1449 std::error_code EC = readSummaryEntry(Entries);
1450 if (EC != sampleprof_error::success)
1451 return EC;
1453 Summary = std::make_unique<ProfileSummary>(
1454 ProfileSummary::PSK_Sample, Entries, *TotalCount, *MaxBlockCount, 0,
1455 *MaxFunctionCount, *NumBlocks, *NumFunctions);
1457 return sampleprof_error::success;
1460 bool SampleProfileReaderRawBinary::hasFormat(const MemoryBuffer &Buffer) {
1461 const uint8_t *Data =
1462 reinterpret_cast<const uint8_t *>(Buffer.getBufferStart());
1463 uint64_t Magic = decodeULEB128(Data);
1464 return Magic == SPMagic();
1467 bool SampleProfileReaderExtBinary::hasFormat(const MemoryBuffer &Buffer) {
1468 const uint8_t *Data =
1469 reinterpret_cast<const uint8_t *>(Buffer.getBufferStart());
1470 uint64_t Magic = decodeULEB128(Data);
1471 return Magic == SPMagic(SPF_Ext_Binary);
1474 bool SampleProfileReaderCompactBinary::hasFormat(const MemoryBuffer &Buffer) {
1475 const uint8_t *Data =
1476 reinterpret_cast<const uint8_t *>(Buffer.getBufferStart());
1477 uint64_t Magic = decodeULEB128(Data);
1478 return Magic == SPMagic(SPF_Compact_Binary);
1481 std::error_code SampleProfileReaderGCC::skipNextWord() {
1482 uint32_t dummy;
1483 if (!GcovBuffer.readInt(dummy))
1484 return sampleprof_error::truncated;
1485 return sampleprof_error::success;
1488 template <typename T> ErrorOr<T> SampleProfileReaderGCC::readNumber() {
1489 if (sizeof(T) <= sizeof(uint32_t)) {
1490 uint32_t Val;
1491 if (GcovBuffer.readInt(Val) && Val <= std::numeric_limits<T>::max())
1492 return static_cast<T>(Val);
1493 } else if (sizeof(T) <= sizeof(uint64_t)) {
1494 uint64_t Val;
1495 if (GcovBuffer.readInt64(Val) && Val <= std::numeric_limits<T>::max())
1496 return static_cast<T>(Val);
1499 std::error_code EC = sampleprof_error::malformed;
1500 reportError(0, EC.message());
1501 return EC;
1504 ErrorOr<StringRef> SampleProfileReaderGCC::readString() {
1505 StringRef Str;
1506 if (!GcovBuffer.readString(Str))
1507 return sampleprof_error::truncated;
1508 return Str;
1511 std::error_code SampleProfileReaderGCC::readHeader() {
1512 // Read the magic identifier.
1513 if (!GcovBuffer.readGCDAFormat())
1514 return sampleprof_error::unrecognized_format;
1516 // Read the version number. Note - the GCC reader does not validate this
1517 // version, but the profile creator generates v704.
1518 GCOV::GCOVVersion version;
1519 if (!GcovBuffer.readGCOVVersion(version))
1520 return sampleprof_error::unrecognized_format;
1522 if (version != GCOV::V407)
1523 return sampleprof_error::unsupported_version;
1525 // Skip the empty integer.
1526 if (std::error_code EC = skipNextWord())
1527 return EC;
1529 return sampleprof_error::success;
1532 std::error_code SampleProfileReaderGCC::readSectionTag(uint32_t Expected) {
1533 uint32_t Tag;
1534 if (!GcovBuffer.readInt(Tag))
1535 return sampleprof_error::truncated;
1537 if (Tag != Expected)
1538 return sampleprof_error::malformed;
1540 if (std::error_code EC = skipNextWord())
1541 return EC;
1543 return sampleprof_error::success;
1546 std::error_code SampleProfileReaderGCC::readNameTable() {
1547 if (std::error_code EC = readSectionTag(GCOVTagAFDOFileNames))
1548 return EC;
1550 uint32_t Size;
1551 if (!GcovBuffer.readInt(Size))
1552 return sampleprof_error::truncated;
1554 for (uint32_t I = 0; I < Size; ++I) {
1555 StringRef Str;
1556 if (!GcovBuffer.readString(Str))
1557 return sampleprof_error::truncated;
1558 Names.push_back(std::string(Str));
1561 return sampleprof_error::success;
1564 std::error_code SampleProfileReaderGCC::readFunctionProfiles() {
1565 if (std::error_code EC = readSectionTag(GCOVTagAFDOFunction))
1566 return EC;
1568 uint32_t NumFunctions;
1569 if (!GcovBuffer.readInt(NumFunctions))
1570 return sampleprof_error::truncated;
1572 InlineCallStack Stack;
1573 for (uint32_t I = 0; I < NumFunctions; ++I)
1574 if (std::error_code EC = readOneFunctionProfile(Stack, true, 0))
1575 return EC;
1577 computeSummary();
1578 return sampleprof_error::success;
1581 std::error_code SampleProfileReaderGCC::readOneFunctionProfile(
1582 const InlineCallStack &InlineStack, bool Update, uint32_t Offset) {
1583 uint64_t HeadCount = 0;
1584 if (InlineStack.size() == 0)
1585 if (!GcovBuffer.readInt64(HeadCount))
1586 return sampleprof_error::truncated;
1588 uint32_t NameIdx;
1589 if (!GcovBuffer.readInt(NameIdx))
1590 return sampleprof_error::truncated;
1592 StringRef Name(Names[NameIdx]);
1594 uint32_t NumPosCounts;
1595 if (!GcovBuffer.readInt(NumPosCounts))
1596 return sampleprof_error::truncated;
1598 uint32_t NumCallsites;
1599 if (!GcovBuffer.readInt(NumCallsites))
1600 return sampleprof_error::truncated;
1602 FunctionSamples *FProfile = nullptr;
1603 if (InlineStack.size() == 0) {
1604 // If this is a top function that we have already processed, do not
1605 // update its profile again. This happens in the presence of
1606 // function aliases. Since these aliases share the same function
1607 // body, there will be identical replicated profiles for the
1608 // original function. In this case, we simply not bother updating
1609 // the profile of the original function.
1610 FProfile = &Profiles[Name];
1611 FProfile->addHeadSamples(HeadCount);
1612 if (FProfile->getTotalSamples() > 0)
1613 Update = false;
1614 } else {
1615 // Otherwise, we are reading an inlined instance. The top of the
1616 // inline stack contains the profile of the caller. Insert this
1617 // callee in the caller's CallsiteMap.
1618 FunctionSamples *CallerProfile = InlineStack.front();
1619 uint32_t LineOffset = Offset >> 16;
1620 uint32_t Discriminator = Offset & 0xffff;
1621 FProfile = &CallerProfile->functionSamplesAt(
1622 LineLocation(LineOffset, Discriminator))[std::string(Name)];
1624 FProfile->setName(Name);
1626 for (uint32_t I = 0; I < NumPosCounts; ++I) {
1627 uint32_t Offset;
1628 if (!GcovBuffer.readInt(Offset))
1629 return sampleprof_error::truncated;
1631 uint32_t NumTargets;
1632 if (!GcovBuffer.readInt(NumTargets))
1633 return sampleprof_error::truncated;
1635 uint64_t Count;
1636 if (!GcovBuffer.readInt64(Count))
1637 return sampleprof_error::truncated;
1639 // The line location is encoded in the offset as:
1640 // high 16 bits: line offset to the start of the function.
1641 // low 16 bits: discriminator.
1642 uint32_t LineOffset = Offset >> 16;
1643 uint32_t Discriminator = Offset & 0xffff;
1645 InlineCallStack NewStack;
1646 NewStack.push_back(FProfile);
1647 llvm::append_range(NewStack, InlineStack);
1648 if (Update) {
1649 // Walk up the inline stack, adding the samples on this line to
1650 // the total sample count of the callers in the chain.
1651 for (auto CallerProfile : NewStack)
1652 CallerProfile->addTotalSamples(Count);
1654 // Update the body samples for the current profile.
1655 FProfile->addBodySamples(LineOffset, Discriminator, Count);
1658 // Process the list of functions called at an indirect call site.
1659 // These are all the targets that a function pointer (or virtual
1660 // function) resolved at runtime.
1661 for (uint32_t J = 0; J < NumTargets; J++) {
1662 uint32_t HistVal;
1663 if (!GcovBuffer.readInt(HistVal))
1664 return sampleprof_error::truncated;
1666 if (HistVal != HIST_TYPE_INDIR_CALL_TOPN)
1667 return sampleprof_error::malformed;
1669 uint64_t TargetIdx;
1670 if (!GcovBuffer.readInt64(TargetIdx))
1671 return sampleprof_error::truncated;
1672 StringRef TargetName(Names[TargetIdx]);
1674 uint64_t TargetCount;
1675 if (!GcovBuffer.readInt64(TargetCount))
1676 return sampleprof_error::truncated;
1678 if (Update)
1679 FProfile->addCalledTargetSamples(LineOffset, Discriminator,
1680 TargetName, TargetCount);
1684 // Process all the inlined callers into the current function. These
1685 // are all the callsites that were inlined into this function.
1686 for (uint32_t I = 0; I < NumCallsites; I++) {
1687 // The offset is encoded as:
1688 // high 16 bits: line offset to the start of the function.
1689 // low 16 bits: discriminator.
1690 uint32_t Offset;
1691 if (!GcovBuffer.readInt(Offset))
1692 return sampleprof_error::truncated;
1693 InlineCallStack NewStack;
1694 NewStack.push_back(FProfile);
1695 llvm::append_range(NewStack, InlineStack);
1696 if (std::error_code EC = readOneFunctionProfile(NewStack, Update, Offset))
1697 return EC;
1700 return sampleprof_error::success;
1703 /// Read a GCC AutoFDO profile.
1705 /// This format is generated by the Linux Perf conversion tool at
1706 /// https://github.com/google/autofdo.
1707 std::error_code SampleProfileReaderGCC::readImpl() {
1708 assert(!ProfileIsFSDisciminator && "Gcc profiles not support FSDisciminator");
1709 // Read the string table.
1710 if (std::error_code EC = readNameTable())
1711 return EC;
1713 // Read the source profile.
1714 if (std::error_code EC = readFunctionProfiles())
1715 return EC;
1717 return sampleprof_error::success;
1720 bool SampleProfileReaderGCC::hasFormat(const MemoryBuffer &Buffer) {
1721 StringRef Magic(reinterpret_cast<const char *>(Buffer.getBufferStart()));
1722 return Magic == "adcg*704";
1725 void SampleProfileReaderItaniumRemapper::applyRemapping(LLVMContext &Ctx) {
1726 // If the reader uses MD5 to represent string, we can't remap it because
1727 // we don't know what the original function names were.
1728 if (Reader.useMD5()) {
1729 Ctx.diagnose(DiagnosticInfoSampleProfile(
1730 Reader.getBuffer()->getBufferIdentifier(),
1731 "Profile data remapping cannot be applied to profile data "
1732 "in compact format (original mangled names are not available).",
1733 DS_Warning));
1734 return;
1737 // CSSPGO-TODO: Remapper is not yet supported.
1738 // We will need to remap the entire context string.
1739 assert(Remappings && "should be initialized while creating remapper");
1740 for (auto &Sample : Reader.getProfiles()) {
1741 DenseSet<StringRef> NamesInSample;
1742 Sample.second.findAllNames(NamesInSample);
1743 for (auto &Name : NamesInSample)
1744 if (auto Key = Remappings->insert(Name))
1745 NameMap.insert({Key, Name});
1748 RemappingApplied = true;
1751 Optional<StringRef>
1752 SampleProfileReaderItaniumRemapper::lookUpNameInProfile(StringRef Fname) {
1753 if (auto Key = Remappings->lookup(Fname))
1754 return NameMap.lookup(Key);
1755 return None;
1758 /// Prepare a memory buffer for the contents of \p Filename.
1760 /// \returns an error code indicating the status of the buffer.
1761 static ErrorOr<std::unique_ptr<MemoryBuffer>>
1762 setupMemoryBuffer(const Twine &Filename) {
1763 auto BufferOrErr = MemoryBuffer::getFileOrSTDIN(Filename, /*IsText=*/true);
1764 if (std::error_code EC = BufferOrErr.getError())
1765 return EC;
1766 auto Buffer = std::move(BufferOrErr.get());
1768 // Check the file.
1769 if (uint64_t(Buffer->getBufferSize()) > std::numeric_limits<uint32_t>::max())
1770 return sampleprof_error::too_large;
1772 return std::move(Buffer);
1775 /// Create a sample profile reader based on the format of the input file.
1777 /// \param Filename The file to open.
1779 /// \param C The LLVM context to use to emit diagnostics.
1781 /// \param P The FSDiscriminatorPass.
1783 /// \param RemapFilename The file used for profile remapping.
1785 /// \returns an error code indicating the status of the created reader.
1786 ErrorOr<std::unique_ptr<SampleProfileReader>>
1787 SampleProfileReader::create(const std::string Filename, LLVMContext &C,
1788 FSDiscriminatorPass P,
1789 const std::string RemapFilename) {
1790 auto BufferOrError = setupMemoryBuffer(Filename);
1791 if (std::error_code EC = BufferOrError.getError())
1792 return EC;
1793 return create(BufferOrError.get(), C, P, RemapFilename);
1796 /// Create a sample profile remapper from the given input, to remap the
1797 /// function names in the given profile data.
1799 /// \param Filename The file to open.
1801 /// \param Reader The profile reader the remapper is going to be applied to.
1803 /// \param C The LLVM context to use to emit diagnostics.
1805 /// \returns an error code indicating the status of the created reader.
1806 ErrorOr<std::unique_ptr<SampleProfileReaderItaniumRemapper>>
1807 SampleProfileReaderItaniumRemapper::create(const std::string Filename,
1808 SampleProfileReader &Reader,
1809 LLVMContext &C) {
1810 auto BufferOrError = setupMemoryBuffer(Filename);
1811 if (std::error_code EC = BufferOrError.getError())
1812 return EC;
1813 return create(BufferOrError.get(), Reader, C);
1816 /// Create a sample profile remapper from the given input, to remap the
1817 /// function names in the given profile data.
1819 /// \param B The memory buffer to create the reader from (assumes ownership).
1821 /// \param C The LLVM context to use to emit diagnostics.
1823 /// \param Reader The profile reader the remapper is going to be applied to.
1825 /// \returns an error code indicating the status of the created reader.
1826 ErrorOr<std::unique_ptr<SampleProfileReaderItaniumRemapper>>
1827 SampleProfileReaderItaniumRemapper::create(std::unique_ptr<MemoryBuffer> &B,
1828 SampleProfileReader &Reader,
1829 LLVMContext &C) {
1830 auto Remappings = std::make_unique<SymbolRemappingReader>();
1831 if (Error E = Remappings->read(*B)) {
1832 handleAllErrors(
1833 std::move(E), [&](const SymbolRemappingParseError &ParseError) {
1834 C.diagnose(DiagnosticInfoSampleProfile(B->getBufferIdentifier(),
1835 ParseError.getLineNum(),
1836 ParseError.getMessage()));
1838 return sampleprof_error::malformed;
1841 return std::make_unique<SampleProfileReaderItaniumRemapper>(
1842 std::move(B), std::move(Remappings), Reader);
1845 /// Create a sample profile reader based on the format of the input data.
1847 /// \param B The memory buffer to create the reader from (assumes ownership).
1849 /// \param C The LLVM context to use to emit diagnostics.
1851 /// \param P The FSDiscriminatorPass.
1853 /// \param RemapFilename The file used for profile remapping.
1855 /// \returns an error code indicating the status of the created reader.
1856 ErrorOr<std::unique_ptr<SampleProfileReader>>
1857 SampleProfileReader::create(std::unique_ptr<MemoryBuffer> &B, LLVMContext &C,
1858 FSDiscriminatorPass P,
1859 const std::string RemapFilename) {
1860 std::unique_ptr<SampleProfileReader> Reader;
1861 if (SampleProfileReaderRawBinary::hasFormat(*B))
1862 Reader.reset(new SampleProfileReaderRawBinary(std::move(B), C));
1863 else if (SampleProfileReaderExtBinary::hasFormat(*B))
1864 Reader.reset(new SampleProfileReaderExtBinary(std::move(B), C));
1865 else if (SampleProfileReaderCompactBinary::hasFormat(*B))
1866 Reader.reset(new SampleProfileReaderCompactBinary(std::move(B), C));
1867 else if (SampleProfileReaderGCC::hasFormat(*B))
1868 Reader.reset(new SampleProfileReaderGCC(std::move(B), C));
1869 else if (SampleProfileReaderText::hasFormat(*B))
1870 Reader.reset(new SampleProfileReaderText(std::move(B), C));
1871 else
1872 return sampleprof_error::unrecognized_format;
1874 if (!RemapFilename.empty()) {
1875 auto ReaderOrErr =
1876 SampleProfileReaderItaniumRemapper::create(RemapFilename, *Reader, C);
1877 if (std::error_code EC = ReaderOrErr.getError()) {
1878 std::string Msg = "Could not create remapper: " + EC.message();
1879 C.diagnose(DiagnosticInfoSampleProfile(RemapFilename, Msg));
1880 return EC;
1882 Reader->Remapper = std::move(ReaderOrErr.get());
1885 if (std::error_code EC = Reader->readHeader()) {
1886 return EC;
1889 Reader->setDiscriminatorMaskedBitFrom(P);
1891 return std::move(Reader);
1894 // For text and GCC file formats, we compute the summary after reading the
1895 // profile. Binary format has the profile summary in its header.
1896 void SampleProfileReader::computeSummary() {
1897 SampleProfileSummaryBuilder Builder(ProfileSummaryBuilder::DefaultCutoffs);
1898 Summary = Builder.computeSummaryForProfiles(Profiles);