[ORC] Add std::tuple support to SimplePackedSerialization.
[llvm-project.git] / llvm / lib / ProfileData / SampleProfReader.cpp
blob7fc95520951fbed981a793dc6411900a5d023d73
1 //===- SampleProfReader.cpp - Read LLVM sample profile data ---------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements the class that reads LLVM sample profiles. It
10 // supports three file formats: text, binary and gcov.
12 // The textual representation is useful for debugging and testing purposes. The
13 // binary representation is more compact, resulting in smaller file sizes.
15 // The gcov encoding is the one generated by GCC's AutoFDO profile creation
16 // tool (https://github.com/google/autofdo)
18 // All three encodings can be used interchangeably as an input sample profile.
20 //===----------------------------------------------------------------------===//
22 #include "llvm/ProfileData/SampleProfReader.h"
23 #include "llvm/ADT/DenseMap.h"
24 #include "llvm/ADT/STLExtras.h"
25 #include "llvm/ADT/StringRef.h"
26 #include "llvm/IR/ProfileSummary.h"
27 #include "llvm/ProfileData/ProfileCommon.h"
28 #include "llvm/ProfileData/SampleProf.h"
29 #include "llvm/Support/CommandLine.h"
30 #include "llvm/Support/Compression.h"
31 #include "llvm/Support/ErrorOr.h"
32 #include "llvm/Support/LEB128.h"
33 #include "llvm/Support/LineIterator.h"
34 #include "llvm/Support/MD5.h"
35 #include "llvm/Support/MemoryBuffer.h"
36 #include "llvm/Support/raw_ostream.h"
37 #include <algorithm>
38 #include <cstddef>
39 #include <cstdint>
40 #include <limits>
41 #include <memory>
42 #include <set>
43 #include <system_error>
44 #include <vector>
46 using namespace llvm;
47 using namespace sampleprof;
49 #define DEBUG_TYPE "samplepgo-reader"
51 // This internal option specifies if the profile uses FS discriminators.
52 // It only applies to text, binary and compact binary format profiles.
53 // For ext-binary format profiles, the flag is set in the summary.
54 static cl::opt<bool> ProfileIsFSDisciminator(
55 "profile-isfs", cl::Hidden, cl::init(false),
56 cl::desc("Profile uses flow sensitive discriminators"));
58 /// Dump the function profile for \p FName.
59 ///
60 /// \param FName Name of the function to print.
61 /// \param OS Stream to emit the output to.
62 void SampleProfileReader::dumpFunctionProfile(StringRef FName,
63 raw_ostream &OS) {
64 OS << "Function: " << FName << ": " << Profiles[FName];
67 /// Dump all the function profiles found on stream \p OS.
68 void SampleProfileReader::dump(raw_ostream &OS) {
69 std::vector<NameFunctionSamples> V;
70 sortFuncProfiles(Profiles, V);
71 for (const auto &I : V)
72 dumpFunctionProfile(I.first, OS);
75 /// Parse \p Input as function head.
76 ///
77 /// Parse one line of \p Input, and update function name in \p FName,
78 /// function's total sample count in \p NumSamples, function's entry
79 /// count in \p NumHeadSamples.
80 ///
81 /// \returns true if parsing is successful.
82 static bool ParseHead(const StringRef &Input, StringRef &FName,
83 uint64_t &NumSamples, uint64_t &NumHeadSamples) {
84 if (Input[0] == ' ')
85 return false;
86 size_t n2 = Input.rfind(':');
87 size_t n1 = Input.rfind(':', n2 - 1);
88 FName = Input.substr(0, n1);
89 if (Input.substr(n1 + 1, n2 - n1 - 1).getAsInteger(10, NumSamples))
90 return false;
91 if (Input.substr(n2 + 1).getAsInteger(10, NumHeadSamples))
92 return false;
93 return true;
96 /// Returns true if line offset \p L is legal (only has 16 bits).
97 static bool isOffsetLegal(unsigned L) { return (L & 0xffff) == L; }
99 /// Parse \p Input that contains metadata.
100 /// Possible metadata:
101 /// - CFG Checksum information:
102 /// !CFGChecksum: 12345
103 /// - CFG Checksum information:
104 /// !Attributes: 1
105 /// Stores the FunctionHash (a.k.a. CFG Checksum) into \p FunctionHash.
106 static bool parseMetadata(const StringRef &Input, uint64_t &FunctionHash,
107 uint32_t &Attributes) {
108 if (Input.startswith("!CFGChecksum:")) {
109 StringRef CFGInfo = Input.substr(strlen("!CFGChecksum:")).trim();
110 return !CFGInfo.getAsInteger(10, FunctionHash);
113 if (Input.startswith("!Attributes:")) {
114 StringRef Attrib = Input.substr(strlen("!Attributes:")).trim();
115 return !Attrib.getAsInteger(10, Attributes);
118 return false;
121 enum class LineType {
122 CallSiteProfile,
123 BodyProfile,
124 Metadata,
127 /// Parse \p Input as line sample.
129 /// \param Input input line.
130 /// \param LineTy Type of this line.
131 /// \param Depth the depth of the inline stack.
132 /// \param NumSamples total samples of the line/inlined callsite.
133 /// \param LineOffset line offset to the start of the function.
134 /// \param Discriminator discriminator of the line.
135 /// \param TargetCountMap map from indirect call target to count.
136 /// \param FunctionHash the function's CFG hash, used by pseudo probe.
138 /// returns true if parsing is successful.
139 static bool ParseLine(const StringRef &Input, LineType &LineTy, uint32_t &Depth,
140 uint64_t &NumSamples, uint32_t &LineOffset,
141 uint32_t &Discriminator, StringRef &CalleeName,
142 DenseMap<StringRef, uint64_t> &TargetCountMap,
143 uint64_t &FunctionHash, uint32_t &Attributes) {
144 for (Depth = 0; Input[Depth] == ' '; Depth++)
146 if (Depth == 0)
147 return false;
149 if (Depth == 1 && Input[Depth] == '!') {
150 LineTy = LineType::Metadata;
151 return parseMetadata(Input.substr(Depth), FunctionHash, Attributes);
154 size_t n1 = Input.find(':');
155 StringRef Loc = Input.substr(Depth, n1 - Depth);
156 size_t n2 = Loc.find('.');
157 if (n2 == StringRef::npos) {
158 if (Loc.getAsInteger(10, LineOffset) || !isOffsetLegal(LineOffset))
159 return false;
160 Discriminator = 0;
161 } else {
162 if (Loc.substr(0, n2).getAsInteger(10, LineOffset))
163 return false;
164 if (Loc.substr(n2 + 1).getAsInteger(10, Discriminator))
165 return false;
168 StringRef Rest = Input.substr(n1 + 2);
169 if (isDigit(Rest[0])) {
170 LineTy = LineType::BodyProfile;
171 size_t n3 = Rest.find(' ');
172 if (n3 == StringRef::npos) {
173 if (Rest.getAsInteger(10, NumSamples))
174 return false;
175 } else {
176 if (Rest.substr(0, n3).getAsInteger(10, NumSamples))
177 return false;
179 // Find call targets and their sample counts.
180 // Note: In some cases, there are symbols in the profile which are not
181 // mangled. To accommodate such cases, use colon + integer pairs as the
182 // anchor points.
183 // An example:
184 // _M_construct<char *>:1000 string_view<std::allocator<char> >:437
185 // ":1000" and ":437" are used as anchor points so the string above will
186 // be interpreted as
187 // target: _M_construct<char *>
188 // count: 1000
189 // target: string_view<std::allocator<char> >
190 // count: 437
191 while (n3 != StringRef::npos) {
192 n3 += Rest.substr(n3).find_first_not_of(' ');
193 Rest = Rest.substr(n3);
194 n3 = Rest.find_first_of(':');
195 if (n3 == StringRef::npos || n3 == 0)
196 return false;
198 StringRef Target;
199 uint64_t count, n4;
200 while (true) {
201 // Get the segment after the current colon.
202 StringRef AfterColon = Rest.substr(n3 + 1);
203 // Get the target symbol before the current colon.
204 Target = Rest.substr(0, n3);
205 // Check if the word after the current colon is an integer.
206 n4 = AfterColon.find_first_of(' ');
207 n4 = (n4 != StringRef::npos) ? n3 + n4 + 1 : Rest.size();
208 StringRef WordAfterColon = Rest.substr(n3 + 1, n4 - n3 - 1);
209 if (!WordAfterColon.getAsInteger(10, count))
210 break;
212 // Try to find the next colon.
213 uint64_t n5 = AfterColon.find_first_of(':');
214 if (n5 == StringRef::npos)
215 return false;
216 n3 += n5 + 1;
219 // An anchor point is found. Save the {target, count} pair
220 TargetCountMap[Target] = count;
221 if (n4 == Rest.size())
222 break;
223 // Change n3 to the next blank space after colon + integer pair.
224 n3 = n4;
226 } else {
227 LineTy = LineType::CallSiteProfile;
228 size_t n3 = Rest.find_last_of(':');
229 CalleeName = Rest.substr(0, n3);
230 if (Rest.substr(n3 + 1).getAsInteger(10, NumSamples))
231 return false;
233 return true;
236 /// Load samples from a text file.
238 /// See the documentation at the top of the file for an explanation of
239 /// the expected format.
241 /// \returns true if the file was loaded successfully, false otherwise.
242 std::error_code SampleProfileReaderText::readImpl() {
243 line_iterator LineIt(*Buffer, /*SkipBlanks=*/true, '#');
244 sampleprof_error Result = sampleprof_error::success;
246 InlineCallStack InlineStack;
247 uint32_t ProbeProfileCount = 0;
249 // SeenMetadata tracks whether we have processed metadata for the current
250 // top-level function profile.
251 bool SeenMetadata = false;
253 ProfileIsFS = ProfileIsFSDisciminator;
254 for (; !LineIt.is_at_eof(); ++LineIt) {
255 if ((*LineIt)[(*LineIt).find_first_not_of(' ')] == '#')
256 continue;
257 // Read the header of each function.
259 // Note that for function identifiers we are actually expecting
260 // mangled names, but we may not always get them. This happens when
261 // the compiler decides not to emit the function (e.g., it was inlined
262 // and removed). In this case, the binary will not have the linkage
263 // name for the function, so the profiler will emit the function's
264 // unmangled name, which may contain characters like ':' and '>' in its
265 // name (member functions, templates, etc).
267 // The only requirement we place on the identifier, then, is that it
268 // should not begin with a number.
269 if ((*LineIt)[0] != ' ') {
270 uint64_t NumSamples, NumHeadSamples;
271 StringRef FName;
272 if (!ParseHead(*LineIt, FName, NumSamples, NumHeadSamples)) {
273 reportError(LineIt.line_number(),
274 "Expected 'mangled_name:NUM:NUM', found " + *LineIt);
275 return sampleprof_error::malformed;
277 SeenMetadata = false;
278 SampleContext FContext(FName);
279 if (FContext.hasContext())
280 ++CSProfileCount;
281 Profiles[FContext] = FunctionSamples();
282 FunctionSamples &FProfile = Profiles[FContext];
283 FProfile.setName(FContext.getNameWithoutContext());
284 FProfile.setContext(FContext);
285 MergeResult(Result, FProfile.addTotalSamples(NumSamples));
286 MergeResult(Result, FProfile.addHeadSamples(NumHeadSamples));
287 InlineStack.clear();
288 InlineStack.push_back(&FProfile);
289 } else {
290 uint64_t NumSamples;
291 StringRef FName;
292 DenseMap<StringRef, uint64_t> TargetCountMap;
293 uint32_t Depth, LineOffset, Discriminator;
294 LineType LineTy;
295 uint64_t FunctionHash = 0;
296 uint32_t Attributes = 0;
297 if (!ParseLine(*LineIt, LineTy, Depth, NumSamples, LineOffset,
298 Discriminator, FName, TargetCountMap, FunctionHash,
299 Attributes)) {
300 reportError(LineIt.line_number(),
301 "Expected 'NUM[.NUM]: NUM[ mangled_name:NUM]*', found " +
302 *LineIt);
303 return sampleprof_error::malformed;
305 if (SeenMetadata && LineTy != LineType::Metadata) {
306 // Metadata must be put at the end of a function profile.
307 reportError(LineIt.line_number(),
308 "Found non-metadata after metadata: " + *LineIt);
309 return sampleprof_error::malformed;
312 // Here we handle FS discriminators.
313 Discriminator &= getDiscriminatorMask();
315 while (InlineStack.size() > Depth) {
316 InlineStack.pop_back();
318 switch (LineTy) {
319 case LineType::CallSiteProfile: {
320 FunctionSamples &FSamples = InlineStack.back()->functionSamplesAt(
321 LineLocation(LineOffset, Discriminator))[std::string(FName)];
322 FSamples.setName(FName);
323 MergeResult(Result, FSamples.addTotalSamples(NumSamples));
324 InlineStack.push_back(&FSamples);
325 break;
327 case LineType::BodyProfile: {
328 while (InlineStack.size() > Depth) {
329 InlineStack.pop_back();
331 FunctionSamples &FProfile = *InlineStack.back();
332 for (const auto &name_count : TargetCountMap) {
333 MergeResult(Result, FProfile.addCalledTargetSamples(
334 LineOffset, Discriminator, name_count.first,
335 name_count.second));
337 MergeResult(Result, FProfile.addBodySamples(LineOffset, Discriminator,
338 NumSamples));
339 break;
341 case LineType::Metadata: {
342 FunctionSamples &FProfile = *InlineStack.back();
343 if (FunctionHash) {
344 FProfile.setFunctionHash(FunctionHash);
345 ++ProbeProfileCount;
347 if (Attributes)
348 FProfile.getContext().setAllAttributes(Attributes);
349 SeenMetadata = true;
350 break;
356 assert((CSProfileCount == 0 || CSProfileCount == Profiles.size()) &&
357 "Cannot have both context-sensitive and regular profile");
358 ProfileIsCS = (CSProfileCount > 0);
359 assert((ProbeProfileCount == 0 || ProbeProfileCount == Profiles.size()) &&
360 "Cannot have both probe-based profiles and regular profiles");
361 ProfileIsProbeBased = (ProbeProfileCount > 0);
362 FunctionSamples::ProfileIsProbeBased = ProfileIsProbeBased;
363 FunctionSamples::ProfileIsCS = ProfileIsCS;
365 if (Result == sampleprof_error::success)
366 computeSummary();
368 return Result;
371 bool SampleProfileReaderText::hasFormat(const MemoryBuffer &Buffer) {
372 bool result = false;
374 // Check that the first non-comment line is a valid function header.
375 line_iterator LineIt(Buffer, /*SkipBlanks=*/true, '#');
376 if (!LineIt.is_at_eof()) {
377 if ((*LineIt)[0] != ' ') {
378 uint64_t NumSamples, NumHeadSamples;
379 StringRef FName;
380 result = ParseHead(*LineIt, FName, NumSamples, NumHeadSamples);
384 return result;
387 template <typename T> ErrorOr<T> SampleProfileReaderBinary::readNumber() {
388 unsigned NumBytesRead = 0;
389 std::error_code EC;
390 uint64_t Val = decodeULEB128(Data, &NumBytesRead);
392 if (Val > std::numeric_limits<T>::max())
393 EC = sampleprof_error::malformed;
394 else if (Data + NumBytesRead > End)
395 EC = sampleprof_error::truncated;
396 else
397 EC = sampleprof_error::success;
399 if (EC) {
400 reportError(0, EC.message());
401 return EC;
404 Data += NumBytesRead;
405 return static_cast<T>(Val);
408 ErrorOr<StringRef> SampleProfileReaderBinary::readString() {
409 std::error_code EC;
410 StringRef Str(reinterpret_cast<const char *>(Data));
411 if (Data + Str.size() + 1 > End) {
412 EC = sampleprof_error::truncated;
413 reportError(0, EC.message());
414 return EC;
417 Data += Str.size() + 1;
418 return Str;
421 template <typename T>
422 ErrorOr<T> SampleProfileReaderBinary::readUnencodedNumber() {
423 std::error_code EC;
425 if (Data + sizeof(T) > End) {
426 EC = sampleprof_error::truncated;
427 reportError(0, EC.message());
428 return EC;
431 using namespace support;
432 T Val = endian::readNext<T, little, unaligned>(Data);
433 return Val;
436 template <typename T>
437 inline ErrorOr<uint32_t> SampleProfileReaderBinary::readStringIndex(T &Table) {
438 std::error_code EC;
439 auto Idx = readNumber<uint32_t>();
440 if (std::error_code EC = Idx.getError())
441 return EC;
442 if (*Idx >= Table.size())
443 return sampleprof_error::truncated_name_table;
444 return *Idx;
447 ErrorOr<StringRef> SampleProfileReaderBinary::readStringFromTable() {
448 auto Idx = readStringIndex(NameTable);
449 if (std::error_code EC = Idx.getError())
450 return EC;
452 return NameTable[*Idx];
455 ErrorOr<StringRef> SampleProfileReaderExtBinaryBase::readStringFromTable() {
456 if (!FixedLengthMD5)
457 return SampleProfileReaderBinary::readStringFromTable();
459 // read NameTable index.
460 auto Idx = readStringIndex(NameTable);
461 if (std::error_code EC = Idx.getError())
462 return EC;
464 // Check whether the name to be accessed has been accessed before,
465 // if not, read it from memory directly.
466 StringRef &SR = NameTable[*Idx];
467 if (SR.empty()) {
468 const uint8_t *SavedData = Data;
469 Data = MD5NameMemStart + ((*Idx) * sizeof(uint64_t));
470 auto FID = readUnencodedNumber<uint64_t>();
471 if (std::error_code EC = FID.getError())
472 return EC;
473 // Save the string converted from uint64_t in MD5StringBuf. All the
474 // references to the name are all StringRefs refering to the string
475 // in MD5StringBuf.
476 MD5StringBuf->push_back(std::to_string(*FID));
477 SR = MD5StringBuf->back();
478 Data = SavedData;
480 return SR;
483 ErrorOr<StringRef> SampleProfileReaderCompactBinary::readStringFromTable() {
484 auto Idx = readStringIndex(NameTable);
485 if (std::error_code EC = Idx.getError())
486 return EC;
488 return StringRef(NameTable[*Idx]);
491 std::error_code
492 SampleProfileReaderBinary::readProfile(FunctionSamples &FProfile) {
493 auto NumSamples = readNumber<uint64_t>();
494 if (std::error_code EC = NumSamples.getError())
495 return EC;
496 FProfile.addTotalSamples(*NumSamples);
498 // Read the samples in the body.
499 auto NumRecords = readNumber<uint32_t>();
500 if (std::error_code EC = NumRecords.getError())
501 return EC;
503 for (uint32_t I = 0; I < *NumRecords; ++I) {
504 auto LineOffset = readNumber<uint64_t>();
505 if (std::error_code EC = LineOffset.getError())
506 return EC;
508 if (!isOffsetLegal(*LineOffset)) {
509 return std::error_code();
512 auto Discriminator = readNumber<uint64_t>();
513 if (std::error_code EC = Discriminator.getError())
514 return EC;
516 auto NumSamples = readNumber<uint64_t>();
517 if (std::error_code EC = NumSamples.getError())
518 return EC;
520 auto NumCalls = readNumber<uint32_t>();
521 if (std::error_code EC = NumCalls.getError())
522 return EC;
524 // Here we handle FS discriminators:
525 uint32_t DiscriminatorVal = (*Discriminator) & getDiscriminatorMask();
527 for (uint32_t J = 0; J < *NumCalls; ++J) {
528 auto CalledFunction(readStringFromTable());
529 if (std::error_code EC = CalledFunction.getError())
530 return EC;
532 auto CalledFunctionSamples = readNumber<uint64_t>();
533 if (std::error_code EC = CalledFunctionSamples.getError())
534 return EC;
536 FProfile.addCalledTargetSamples(*LineOffset, DiscriminatorVal,
537 *CalledFunction, *CalledFunctionSamples);
540 FProfile.addBodySamples(*LineOffset, DiscriminatorVal, *NumSamples);
543 // Read all the samples for inlined function calls.
544 auto NumCallsites = readNumber<uint32_t>();
545 if (std::error_code EC = NumCallsites.getError())
546 return EC;
548 for (uint32_t J = 0; J < *NumCallsites; ++J) {
549 auto LineOffset = readNumber<uint64_t>();
550 if (std::error_code EC = LineOffset.getError())
551 return EC;
553 auto Discriminator = readNumber<uint64_t>();
554 if (std::error_code EC = Discriminator.getError())
555 return EC;
557 auto FName(readStringFromTable());
558 if (std::error_code EC = FName.getError())
559 return EC;
561 // Here we handle FS discriminators:
562 uint32_t DiscriminatorVal = (*Discriminator) & getDiscriminatorMask();
564 FunctionSamples &CalleeProfile = FProfile.functionSamplesAt(
565 LineLocation(*LineOffset, DiscriminatorVal))[std::string(*FName)];
566 CalleeProfile.setName(*FName);
567 if (std::error_code EC = readProfile(CalleeProfile))
568 return EC;
571 return sampleprof_error::success;
574 std::error_code
575 SampleProfileReaderBinary::readFuncProfile(const uint8_t *Start) {
576 Data = Start;
577 auto NumHeadSamples = readNumber<uint64_t>();
578 if (std::error_code EC = NumHeadSamples.getError())
579 return EC;
581 auto FName(readStringFromTable());
582 if (std::error_code EC = FName.getError())
583 return EC;
585 SampleContext FContext(*FName);
586 Profiles[FContext] = FunctionSamples();
587 FunctionSamples &FProfile = Profiles[FContext];
588 FProfile.setName(FContext.getNameWithoutContext());
589 FProfile.setContext(FContext);
590 FProfile.addHeadSamples(*NumHeadSamples);
592 if (FContext.hasContext())
593 CSProfileCount++;
595 if (std::error_code EC = readProfile(FProfile))
596 return EC;
597 return sampleprof_error::success;
600 std::error_code SampleProfileReaderBinary::readImpl() {
601 ProfileIsFS = ProfileIsFSDisciminator;
602 while (!at_eof()) {
603 if (std::error_code EC = readFuncProfile(Data))
604 return EC;
607 return sampleprof_error::success;
610 std::error_code SampleProfileReaderExtBinaryBase::readOneSection(
611 const uint8_t *Start, uint64_t Size, const SecHdrTableEntry &Entry) {
612 Data = Start;
613 End = Start + Size;
614 switch (Entry.Type) {
615 case SecProfSummary:
616 if (std::error_code EC = readSummary())
617 return EC;
618 if (hasSecFlag(Entry, SecProfSummaryFlags::SecFlagPartial))
619 Summary->setPartialProfile(true);
620 if (hasSecFlag(Entry, SecProfSummaryFlags::SecFlagFullContext))
621 FunctionSamples::ProfileIsCS = ProfileIsCS = true;
622 if (hasSecFlag(Entry, SecProfSummaryFlags::SecFlagFSDiscriminator))
623 FunctionSamples::ProfileIsFS = ProfileIsFS = true;
624 break;
625 case SecNameTable: {
626 FixedLengthMD5 =
627 hasSecFlag(Entry, SecNameTableFlags::SecFlagFixedLengthMD5);
628 bool UseMD5 = hasSecFlag(Entry, SecNameTableFlags::SecFlagMD5Name);
629 assert((!FixedLengthMD5 || UseMD5) &&
630 "If FixedLengthMD5 is true, UseMD5 has to be true");
631 FunctionSamples::HasUniqSuffix =
632 hasSecFlag(Entry, SecNameTableFlags::SecFlagUniqSuffix);
633 if (std::error_code EC = readNameTableSec(UseMD5))
634 return EC;
635 break;
637 case SecLBRProfile:
638 if (std::error_code EC = readFuncProfiles())
639 return EC;
640 break;
641 case SecFuncOffsetTable:
642 if (std::error_code EC = readFuncOffsetTable())
643 return EC;
644 break;
645 case SecFuncMetadata: {
646 ProfileIsProbeBased =
647 hasSecFlag(Entry, SecFuncMetadataFlags::SecFlagIsProbeBased);
648 FunctionSamples::ProfileIsProbeBased = ProfileIsProbeBased;
649 bool HasAttribute =
650 hasSecFlag(Entry, SecFuncMetadataFlags::SecFlagHasAttribute);
651 if (std::error_code EC = readFuncMetadata(HasAttribute))
652 return EC;
653 break;
655 case SecProfileSymbolList:
656 if (std::error_code EC = readProfileSymbolList())
657 return EC;
658 break;
659 default:
660 if (std::error_code EC = readCustomSection(Entry))
661 return EC;
662 break;
664 return sampleprof_error::success;
667 bool SampleProfileReaderExtBinaryBase::collectFuncsFromModule() {
668 if (!M)
669 return false;
670 FuncsToUse.clear();
671 for (auto &F : *M)
672 FuncsToUse.insert(FunctionSamples::getCanonicalFnName(F));
673 return true;
676 std::error_code SampleProfileReaderExtBinaryBase::readFuncOffsetTable() {
677 // If there are more than one FuncOffsetTable, the profile read associated
678 // with previous FuncOffsetTable has to be done before next FuncOffsetTable
679 // is read.
680 FuncOffsetTable.clear();
682 auto Size = readNumber<uint64_t>();
683 if (std::error_code EC = Size.getError())
684 return EC;
686 FuncOffsetTable.reserve(*Size);
687 for (uint32_t I = 0; I < *Size; ++I) {
688 auto FName(readStringFromTable());
689 if (std::error_code EC = FName.getError())
690 return EC;
692 auto Offset = readNumber<uint64_t>();
693 if (std::error_code EC = Offset.getError())
694 return EC;
696 FuncOffsetTable[*FName] = *Offset;
698 return sampleprof_error::success;
701 std::error_code SampleProfileReaderExtBinaryBase::readFuncProfiles() {
702 // Collect functions used by current module if the Reader has been
703 // given a module.
704 // collectFuncsFromModule uses FunctionSamples::getCanonicalFnName
705 // which will query FunctionSamples::HasUniqSuffix, so it has to be
706 // called after FunctionSamples::HasUniqSuffix is set, i.e. after
707 // NameTable section is read.
708 bool LoadFuncsToBeUsed = collectFuncsFromModule();
710 // When LoadFuncsToBeUsed is false, load all the function profiles.
711 const uint8_t *Start = Data;
712 if (!LoadFuncsToBeUsed) {
713 while (Data < End) {
714 if (std::error_code EC = readFuncProfile(Data))
715 return EC;
717 assert(Data == End && "More data is read than expected");
718 } else {
719 // Load function profiles on demand.
720 if (Remapper) {
721 for (auto Name : FuncsToUse) {
722 Remapper->insert(Name);
726 if (useMD5()) {
727 for (auto Name : FuncsToUse) {
728 auto GUID = std::to_string(MD5Hash(Name));
729 auto iter = FuncOffsetTable.find(StringRef(GUID));
730 if (iter == FuncOffsetTable.end())
731 continue;
732 const uint8_t *FuncProfileAddr = Start + iter->second;
733 assert(FuncProfileAddr < End && "out of LBRProfile section");
734 if (std::error_code EC = readFuncProfile(FuncProfileAddr))
735 return EC;
737 } else if (FunctionSamples::ProfileIsCS) {
738 // Compute the ordered set of names, so we can
739 // get all context profiles under a subtree by
740 // iterating through the ordered names.
741 struct Comparer {
742 // Ignore the closing ']' when ordering context
743 bool operator()(const StringRef &L, const StringRef &R) const {
744 return L.substr(0, L.size() - 1) < R.substr(0, R.size() - 1);
747 std::set<StringRef, Comparer> OrderedNames;
748 for (auto Name : FuncOffsetTable) {
749 OrderedNames.insert(Name.first);
752 // For each function in current module, load all
753 // context profiles for the function.
754 for (auto NameOffset : FuncOffsetTable) {
755 StringRef ContextName = NameOffset.first;
756 SampleContext FContext(ContextName);
757 auto FuncName = FContext.getNameWithoutContext();
758 if (!FuncsToUse.count(FuncName) &&
759 (!Remapper || !Remapper->exist(FuncName)))
760 continue;
762 // For each context profile we need, try to load
763 // all context profile in the subtree. This can
764 // help profile guided importing for ThinLTO.
765 auto It = OrderedNames.find(ContextName);
766 while (It != OrderedNames.end() &&
767 It->startswith(ContextName.substr(0, ContextName.size() - 1))) {
768 const uint8_t *FuncProfileAddr = Start + FuncOffsetTable[*It];
769 assert(FuncProfileAddr < End && "out of LBRProfile section");
770 if (std::error_code EC = readFuncProfile(FuncProfileAddr))
771 return EC;
772 // Remove loaded context profile so we won't
773 // load it repeatedly.
774 It = OrderedNames.erase(It);
777 } else {
778 for (auto NameOffset : FuncOffsetTable) {
779 SampleContext FContext(NameOffset.first);
780 auto FuncName = FContext.getNameWithoutContext();
781 if (!FuncsToUse.count(FuncName) &&
782 (!Remapper || !Remapper->exist(FuncName)))
783 continue;
784 const uint8_t *FuncProfileAddr = Start + NameOffset.second;
785 assert(FuncProfileAddr < End && "out of LBRProfile section");
786 if (std::error_code EC = readFuncProfile(FuncProfileAddr))
787 return EC;
790 Data = End;
792 assert((CSProfileCount == 0 || CSProfileCount == Profiles.size()) &&
793 "Cannot have both context-sensitive and regular profile");
794 assert(ProfileIsCS == (CSProfileCount > 0) &&
795 "Section flag should be consistent with actual profile");
796 return sampleprof_error::success;
799 std::error_code SampleProfileReaderExtBinaryBase::readProfileSymbolList() {
800 if (!ProfSymList)
801 ProfSymList = std::make_unique<ProfileSymbolList>();
803 if (std::error_code EC = ProfSymList->read(Data, End - Data))
804 return EC;
806 Data = End;
807 return sampleprof_error::success;
810 std::error_code SampleProfileReaderExtBinaryBase::decompressSection(
811 const uint8_t *SecStart, const uint64_t SecSize,
812 const uint8_t *&DecompressBuf, uint64_t &DecompressBufSize) {
813 Data = SecStart;
814 End = SecStart + SecSize;
815 auto DecompressSize = readNumber<uint64_t>();
816 if (std::error_code EC = DecompressSize.getError())
817 return EC;
818 DecompressBufSize = *DecompressSize;
820 auto CompressSize = readNumber<uint64_t>();
821 if (std::error_code EC = CompressSize.getError())
822 return EC;
824 if (!llvm::zlib::isAvailable())
825 return sampleprof_error::zlib_unavailable;
827 StringRef CompressedStrings(reinterpret_cast<const char *>(Data),
828 *CompressSize);
829 char *Buffer = Allocator.Allocate<char>(DecompressBufSize);
830 size_t UCSize = DecompressBufSize;
831 llvm::Error E =
832 zlib::uncompress(CompressedStrings, Buffer, UCSize);
833 if (E)
834 return sampleprof_error::uncompress_failed;
835 DecompressBuf = reinterpret_cast<const uint8_t *>(Buffer);
836 return sampleprof_error::success;
839 std::error_code SampleProfileReaderExtBinaryBase::readImpl() {
840 const uint8_t *BufStart =
841 reinterpret_cast<const uint8_t *>(Buffer->getBufferStart());
843 for (auto &Entry : SecHdrTable) {
844 // Skip empty section.
845 if (!Entry.Size)
846 continue;
848 // Skip sections without context when SkipFlatProf is true.
849 if (SkipFlatProf && hasSecFlag(Entry, SecCommonFlags::SecFlagFlat))
850 continue;
852 const uint8_t *SecStart = BufStart + Entry.Offset;
853 uint64_t SecSize = Entry.Size;
855 // If the section is compressed, decompress it into a buffer
856 // DecompressBuf before reading the actual data. The pointee of
857 // 'Data' will be changed to buffer hold by DecompressBuf
858 // temporarily when reading the actual data.
859 bool isCompressed = hasSecFlag(Entry, SecCommonFlags::SecFlagCompress);
860 if (isCompressed) {
861 const uint8_t *DecompressBuf;
862 uint64_t DecompressBufSize;
863 if (std::error_code EC = decompressSection(
864 SecStart, SecSize, DecompressBuf, DecompressBufSize))
865 return EC;
866 SecStart = DecompressBuf;
867 SecSize = DecompressBufSize;
870 if (std::error_code EC = readOneSection(SecStart, SecSize, Entry))
871 return EC;
872 if (Data != SecStart + SecSize)
873 return sampleprof_error::malformed;
875 // Change the pointee of 'Data' from DecompressBuf to original Buffer.
876 if (isCompressed) {
877 Data = BufStart + Entry.Offset;
878 End = BufStart + Buffer->getBufferSize();
882 return sampleprof_error::success;
885 std::error_code SampleProfileReaderCompactBinary::readImpl() {
886 // Collect functions used by current module if the Reader has been
887 // given a module.
888 bool LoadFuncsToBeUsed = collectFuncsFromModule();
889 ProfileIsFS = ProfileIsFSDisciminator;
890 std::vector<uint64_t> OffsetsToUse;
891 if (!LoadFuncsToBeUsed) {
892 // load all the function profiles.
893 for (auto FuncEntry : FuncOffsetTable) {
894 OffsetsToUse.push_back(FuncEntry.second);
896 } else {
897 // load function profiles on demand.
898 for (auto Name : FuncsToUse) {
899 auto GUID = std::to_string(MD5Hash(Name));
900 auto iter = FuncOffsetTable.find(StringRef(GUID));
901 if (iter == FuncOffsetTable.end())
902 continue;
903 OffsetsToUse.push_back(iter->second);
907 for (auto Offset : OffsetsToUse) {
908 const uint8_t *SavedData = Data;
909 if (std::error_code EC = readFuncProfile(
910 reinterpret_cast<const uint8_t *>(Buffer->getBufferStart()) +
911 Offset))
912 return EC;
913 Data = SavedData;
915 return sampleprof_error::success;
918 std::error_code SampleProfileReaderRawBinary::verifySPMagic(uint64_t Magic) {
919 if (Magic == SPMagic())
920 return sampleprof_error::success;
921 return sampleprof_error::bad_magic;
924 std::error_code SampleProfileReaderExtBinary::verifySPMagic(uint64_t Magic) {
925 if (Magic == SPMagic(SPF_Ext_Binary))
926 return sampleprof_error::success;
927 return sampleprof_error::bad_magic;
930 std::error_code
931 SampleProfileReaderCompactBinary::verifySPMagic(uint64_t Magic) {
932 if (Magic == SPMagic(SPF_Compact_Binary))
933 return sampleprof_error::success;
934 return sampleprof_error::bad_magic;
937 std::error_code SampleProfileReaderBinary::readNameTable() {
938 auto Size = readNumber<uint32_t>();
939 if (std::error_code EC = Size.getError())
940 return EC;
941 NameTable.reserve(*Size + NameTable.size());
942 for (uint32_t I = 0; I < *Size; ++I) {
943 auto Name(readString());
944 if (std::error_code EC = Name.getError())
945 return EC;
946 NameTable.push_back(*Name);
949 return sampleprof_error::success;
952 std::error_code SampleProfileReaderExtBinaryBase::readMD5NameTable() {
953 auto Size = readNumber<uint64_t>();
954 if (std::error_code EC = Size.getError())
955 return EC;
956 MD5StringBuf = std::make_unique<std::vector<std::string>>();
957 MD5StringBuf->reserve(*Size);
958 if (FixedLengthMD5) {
959 // Preallocate and initialize NameTable so we can check whether a name
960 // index has been read before by checking whether the element in the
961 // NameTable is empty, meanwhile readStringIndex can do the boundary
962 // check using the size of NameTable.
963 NameTable.resize(*Size + NameTable.size());
965 MD5NameMemStart = Data;
966 Data = Data + (*Size) * sizeof(uint64_t);
967 return sampleprof_error::success;
969 NameTable.reserve(*Size);
970 for (uint32_t I = 0; I < *Size; ++I) {
971 auto FID = readNumber<uint64_t>();
972 if (std::error_code EC = FID.getError())
973 return EC;
974 MD5StringBuf->push_back(std::to_string(*FID));
975 // NameTable is a vector of StringRef. Here it is pushing back a
976 // StringRef initialized with the last string in MD5stringBuf.
977 NameTable.push_back(MD5StringBuf->back());
979 return sampleprof_error::success;
982 std::error_code SampleProfileReaderExtBinaryBase::readNameTableSec(bool IsMD5) {
983 if (IsMD5)
984 return readMD5NameTable();
985 return SampleProfileReaderBinary::readNameTable();
988 std::error_code
989 SampleProfileReaderExtBinaryBase::readFuncMetadata(bool ProfileHasAttribute) {
990 while (Data < End) {
991 auto FName(readStringFromTable());
992 if (std::error_code EC = FName.getError())
993 return EC;
995 SampleContext FContext(*FName);
996 bool ProfileInMap = Profiles.count(FContext);
998 if (ProfileIsProbeBased) {
999 auto Checksum = readNumber<uint64_t>();
1000 if (std::error_code EC = Checksum.getError())
1001 return EC;
1002 if (ProfileInMap)
1003 Profiles[FContext].setFunctionHash(*Checksum);
1006 if (ProfileHasAttribute) {
1007 auto Attributes = readNumber<uint32_t>();
1008 if (std::error_code EC = Attributes.getError())
1009 return EC;
1010 if (ProfileInMap)
1011 Profiles[FContext].getContext().setAllAttributes(*Attributes);
1015 assert(Data == End && "More data is read than expected");
1016 return sampleprof_error::success;
1019 std::error_code SampleProfileReaderCompactBinary::readNameTable() {
1020 auto Size = readNumber<uint64_t>();
1021 if (std::error_code EC = Size.getError())
1022 return EC;
1023 NameTable.reserve(*Size);
1024 for (uint32_t I = 0; I < *Size; ++I) {
1025 auto FID = readNumber<uint64_t>();
1026 if (std::error_code EC = FID.getError())
1027 return EC;
1028 NameTable.push_back(std::to_string(*FID));
1030 return sampleprof_error::success;
1033 std::error_code
1034 SampleProfileReaderExtBinaryBase::readSecHdrTableEntry(uint32_t Idx) {
1035 SecHdrTableEntry Entry;
1036 auto Type = readUnencodedNumber<uint64_t>();
1037 if (std::error_code EC = Type.getError())
1038 return EC;
1039 Entry.Type = static_cast<SecType>(*Type);
1041 auto Flags = readUnencodedNumber<uint64_t>();
1042 if (std::error_code EC = Flags.getError())
1043 return EC;
1044 Entry.Flags = *Flags;
1046 auto Offset = readUnencodedNumber<uint64_t>();
1047 if (std::error_code EC = Offset.getError())
1048 return EC;
1049 Entry.Offset = *Offset;
1051 auto Size = readUnencodedNumber<uint64_t>();
1052 if (std::error_code EC = Size.getError())
1053 return EC;
1054 Entry.Size = *Size;
1056 Entry.LayoutIndex = Idx;
1057 SecHdrTable.push_back(std::move(Entry));
1058 return sampleprof_error::success;
1061 std::error_code SampleProfileReaderExtBinaryBase::readSecHdrTable() {
1062 auto EntryNum = readUnencodedNumber<uint64_t>();
1063 if (std::error_code EC = EntryNum.getError())
1064 return EC;
1066 for (uint32_t i = 0; i < (*EntryNum); i++)
1067 if (std::error_code EC = readSecHdrTableEntry(i))
1068 return EC;
1070 return sampleprof_error::success;
1073 std::error_code SampleProfileReaderExtBinaryBase::readHeader() {
1074 const uint8_t *BufStart =
1075 reinterpret_cast<const uint8_t *>(Buffer->getBufferStart());
1076 Data = BufStart;
1077 End = BufStart + Buffer->getBufferSize();
1079 if (std::error_code EC = readMagicIdent())
1080 return EC;
1082 if (std::error_code EC = readSecHdrTable())
1083 return EC;
1085 return sampleprof_error::success;
1088 uint64_t SampleProfileReaderExtBinaryBase::getSectionSize(SecType Type) {
1089 uint64_t Size = 0;
1090 for (auto &Entry : SecHdrTable) {
1091 if (Entry.Type == Type)
1092 Size += Entry.Size;
1094 return Size;
1097 uint64_t SampleProfileReaderExtBinaryBase::getFileSize() {
1098 // Sections in SecHdrTable is not necessarily in the same order as
1099 // sections in the profile because section like FuncOffsetTable needs
1100 // to be written after section LBRProfile but needs to be read before
1101 // section LBRProfile, so we cannot simply use the last entry in
1102 // SecHdrTable to calculate the file size.
1103 uint64_t FileSize = 0;
1104 for (auto &Entry : SecHdrTable) {
1105 FileSize = std::max(Entry.Offset + Entry.Size, FileSize);
1107 return FileSize;
1110 static std::string getSecFlagsStr(const SecHdrTableEntry &Entry) {
1111 std::string Flags;
1112 if (hasSecFlag(Entry, SecCommonFlags::SecFlagCompress))
1113 Flags.append("{compressed,");
1114 else
1115 Flags.append("{");
1117 if (hasSecFlag(Entry, SecCommonFlags::SecFlagFlat))
1118 Flags.append("flat,");
1120 switch (Entry.Type) {
1121 case SecNameTable:
1122 if (hasSecFlag(Entry, SecNameTableFlags::SecFlagFixedLengthMD5))
1123 Flags.append("fixlenmd5,");
1124 else if (hasSecFlag(Entry, SecNameTableFlags::SecFlagMD5Name))
1125 Flags.append("md5,");
1126 if (hasSecFlag(Entry, SecNameTableFlags::SecFlagUniqSuffix))
1127 Flags.append("uniq,");
1128 break;
1129 case SecProfSummary:
1130 if (hasSecFlag(Entry, SecProfSummaryFlags::SecFlagPartial))
1131 Flags.append("partial,");
1132 if (hasSecFlag(Entry, SecProfSummaryFlags::SecFlagFullContext))
1133 Flags.append("context,");
1134 if (hasSecFlag(Entry, SecProfSummaryFlags::SecFlagFSDiscriminator))
1135 Flags.append("fs-discriminator,");
1136 break;
1137 default:
1138 break;
1140 char &last = Flags.back();
1141 if (last == ',')
1142 last = '}';
1143 else
1144 Flags.append("}");
1145 return Flags;
1148 bool SampleProfileReaderExtBinaryBase::dumpSectionInfo(raw_ostream &OS) {
1149 uint64_t TotalSecsSize = 0;
1150 for (auto &Entry : SecHdrTable) {
1151 OS << getSecName(Entry.Type) << " - Offset: " << Entry.Offset
1152 << ", Size: " << Entry.Size << ", Flags: " << getSecFlagsStr(Entry)
1153 << "\n";
1155 TotalSecsSize += Entry.Size;
1157 uint64_t HeaderSize = SecHdrTable.front().Offset;
1158 assert(HeaderSize + TotalSecsSize == getFileSize() &&
1159 "Size of 'header + sections' doesn't match the total size of profile");
1161 OS << "Header Size: " << HeaderSize << "\n";
1162 OS << "Total Sections Size: " << TotalSecsSize << "\n";
1163 OS << "File Size: " << getFileSize() << "\n";
1164 return true;
1167 std::error_code SampleProfileReaderBinary::readMagicIdent() {
1168 // Read and check the magic identifier.
1169 auto Magic = readNumber<uint64_t>();
1170 if (std::error_code EC = Magic.getError())
1171 return EC;
1172 else if (std::error_code EC = verifySPMagic(*Magic))
1173 return EC;
1175 // Read the version number.
1176 auto Version = readNumber<uint64_t>();
1177 if (std::error_code EC = Version.getError())
1178 return EC;
1179 else if (*Version != SPVersion())
1180 return sampleprof_error::unsupported_version;
1182 return sampleprof_error::success;
1185 std::error_code SampleProfileReaderBinary::readHeader() {
1186 Data = reinterpret_cast<const uint8_t *>(Buffer->getBufferStart());
1187 End = Data + Buffer->getBufferSize();
1189 if (std::error_code EC = readMagicIdent())
1190 return EC;
1192 if (std::error_code EC = readSummary())
1193 return EC;
1195 if (std::error_code EC = readNameTable())
1196 return EC;
1197 return sampleprof_error::success;
1200 std::error_code SampleProfileReaderCompactBinary::readHeader() {
1201 SampleProfileReaderBinary::readHeader();
1202 if (std::error_code EC = readFuncOffsetTable())
1203 return EC;
1204 return sampleprof_error::success;
1207 std::error_code SampleProfileReaderCompactBinary::readFuncOffsetTable() {
1208 auto TableOffset = readUnencodedNumber<uint64_t>();
1209 if (std::error_code EC = TableOffset.getError())
1210 return EC;
1212 const uint8_t *SavedData = Data;
1213 const uint8_t *TableStart =
1214 reinterpret_cast<const uint8_t *>(Buffer->getBufferStart()) +
1215 *TableOffset;
1216 Data = TableStart;
1218 auto Size = readNumber<uint64_t>();
1219 if (std::error_code EC = Size.getError())
1220 return EC;
1222 FuncOffsetTable.reserve(*Size);
1223 for (uint32_t I = 0; I < *Size; ++I) {
1224 auto FName(readStringFromTable());
1225 if (std::error_code EC = FName.getError())
1226 return EC;
1228 auto Offset = readNumber<uint64_t>();
1229 if (std::error_code EC = Offset.getError())
1230 return EC;
1232 FuncOffsetTable[*FName] = *Offset;
1234 End = TableStart;
1235 Data = SavedData;
1236 return sampleprof_error::success;
1239 bool SampleProfileReaderCompactBinary::collectFuncsFromModule() {
1240 if (!M)
1241 return false;
1242 FuncsToUse.clear();
1243 for (auto &F : *M)
1244 FuncsToUse.insert(FunctionSamples::getCanonicalFnName(F));
1245 return true;
1248 std::error_code SampleProfileReaderBinary::readSummaryEntry(
1249 std::vector<ProfileSummaryEntry> &Entries) {
1250 auto Cutoff = readNumber<uint64_t>();
1251 if (std::error_code EC = Cutoff.getError())
1252 return EC;
1254 auto MinBlockCount = readNumber<uint64_t>();
1255 if (std::error_code EC = MinBlockCount.getError())
1256 return EC;
1258 auto NumBlocks = readNumber<uint64_t>();
1259 if (std::error_code EC = NumBlocks.getError())
1260 return EC;
1262 Entries.emplace_back(*Cutoff, *MinBlockCount, *NumBlocks);
1263 return sampleprof_error::success;
1266 std::error_code SampleProfileReaderBinary::readSummary() {
1267 auto TotalCount = readNumber<uint64_t>();
1268 if (std::error_code EC = TotalCount.getError())
1269 return EC;
1271 auto MaxBlockCount = readNumber<uint64_t>();
1272 if (std::error_code EC = MaxBlockCount.getError())
1273 return EC;
1275 auto MaxFunctionCount = readNumber<uint64_t>();
1276 if (std::error_code EC = MaxFunctionCount.getError())
1277 return EC;
1279 auto NumBlocks = readNumber<uint64_t>();
1280 if (std::error_code EC = NumBlocks.getError())
1281 return EC;
1283 auto NumFunctions = readNumber<uint64_t>();
1284 if (std::error_code EC = NumFunctions.getError())
1285 return EC;
1287 auto NumSummaryEntries = readNumber<uint64_t>();
1288 if (std::error_code EC = NumSummaryEntries.getError())
1289 return EC;
1291 std::vector<ProfileSummaryEntry> Entries;
1292 for (unsigned i = 0; i < *NumSummaryEntries; i++) {
1293 std::error_code EC = readSummaryEntry(Entries);
1294 if (EC != sampleprof_error::success)
1295 return EC;
1297 Summary = std::make_unique<ProfileSummary>(
1298 ProfileSummary::PSK_Sample, Entries, *TotalCount, *MaxBlockCount, 0,
1299 *MaxFunctionCount, *NumBlocks, *NumFunctions);
1301 return sampleprof_error::success;
1304 bool SampleProfileReaderRawBinary::hasFormat(const MemoryBuffer &Buffer) {
1305 const uint8_t *Data =
1306 reinterpret_cast<const uint8_t *>(Buffer.getBufferStart());
1307 uint64_t Magic = decodeULEB128(Data);
1308 return Magic == SPMagic();
1311 bool SampleProfileReaderExtBinary::hasFormat(const MemoryBuffer &Buffer) {
1312 const uint8_t *Data =
1313 reinterpret_cast<const uint8_t *>(Buffer.getBufferStart());
1314 uint64_t Magic = decodeULEB128(Data);
1315 return Magic == SPMagic(SPF_Ext_Binary);
1318 bool SampleProfileReaderCompactBinary::hasFormat(const MemoryBuffer &Buffer) {
1319 const uint8_t *Data =
1320 reinterpret_cast<const uint8_t *>(Buffer.getBufferStart());
1321 uint64_t Magic = decodeULEB128(Data);
1322 return Magic == SPMagic(SPF_Compact_Binary);
1325 std::error_code SampleProfileReaderGCC::skipNextWord() {
1326 uint32_t dummy;
1327 if (!GcovBuffer.readInt(dummy))
1328 return sampleprof_error::truncated;
1329 return sampleprof_error::success;
1332 template <typename T> ErrorOr<T> SampleProfileReaderGCC::readNumber() {
1333 if (sizeof(T) <= sizeof(uint32_t)) {
1334 uint32_t Val;
1335 if (GcovBuffer.readInt(Val) && Val <= std::numeric_limits<T>::max())
1336 return static_cast<T>(Val);
1337 } else if (sizeof(T) <= sizeof(uint64_t)) {
1338 uint64_t Val;
1339 if (GcovBuffer.readInt64(Val) && Val <= std::numeric_limits<T>::max())
1340 return static_cast<T>(Val);
1343 std::error_code EC = sampleprof_error::malformed;
1344 reportError(0, EC.message());
1345 return EC;
1348 ErrorOr<StringRef> SampleProfileReaderGCC::readString() {
1349 StringRef Str;
1350 if (!GcovBuffer.readString(Str))
1351 return sampleprof_error::truncated;
1352 return Str;
1355 std::error_code SampleProfileReaderGCC::readHeader() {
1356 // Read the magic identifier.
1357 if (!GcovBuffer.readGCDAFormat())
1358 return sampleprof_error::unrecognized_format;
1360 // Read the version number. Note - the GCC reader does not validate this
1361 // version, but the profile creator generates v704.
1362 GCOV::GCOVVersion version;
1363 if (!GcovBuffer.readGCOVVersion(version))
1364 return sampleprof_error::unrecognized_format;
1366 if (version != GCOV::V407)
1367 return sampleprof_error::unsupported_version;
1369 // Skip the empty integer.
1370 if (std::error_code EC = skipNextWord())
1371 return EC;
1373 return sampleprof_error::success;
1376 std::error_code SampleProfileReaderGCC::readSectionTag(uint32_t Expected) {
1377 uint32_t Tag;
1378 if (!GcovBuffer.readInt(Tag))
1379 return sampleprof_error::truncated;
1381 if (Tag != Expected)
1382 return sampleprof_error::malformed;
1384 if (std::error_code EC = skipNextWord())
1385 return EC;
1387 return sampleprof_error::success;
1390 std::error_code SampleProfileReaderGCC::readNameTable() {
1391 if (std::error_code EC = readSectionTag(GCOVTagAFDOFileNames))
1392 return EC;
1394 uint32_t Size;
1395 if (!GcovBuffer.readInt(Size))
1396 return sampleprof_error::truncated;
1398 for (uint32_t I = 0; I < Size; ++I) {
1399 StringRef Str;
1400 if (!GcovBuffer.readString(Str))
1401 return sampleprof_error::truncated;
1402 Names.push_back(std::string(Str));
1405 return sampleprof_error::success;
1408 std::error_code SampleProfileReaderGCC::readFunctionProfiles() {
1409 if (std::error_code EC = readSectionTag(GCOVTagAFDOFunction))
1410 return EC;
1412 uint32_t NumFunctions;
1413 if (!GcovBuffer.readInt(NumFunctions))
1414 return sampleprof_error::truncated;
1416 InlineCallStack Stack;
1417 for (uint32_t I = 0; I < NumFunctions; ++I)
1418 if (std::error_code EC = readOneFunctionProfile(Stack, true, 0))
1419 return EC;
1421 computeSummary();
1422 return sampleprof_error::success;
1425 std::error_code SampleProfileReaderGCC::readOneFunctionProfile(
1426 const InlineCallStack &InlineStack, bool Update, uint32_t Offset) {
1427 uint64_t HeadCount = 0;
1428 if (InlineStack.size() == 0)
1429 if (!GcovBuffer.readInt64(HeadCount))
1430 return sampleprof_error::truncated;
1432 uint32_t NameIdx;
1433 if (!GcovBuffer.readInt(NameIdx))
1434 return sampleprof_error::truncated;
1436 StringRef Name(Names[NameIdx]);
1438 uint32_t NumPosCounts;
1439 if (!GcovBuffer.readInt(NumPosCounts))
1440 return sampleprof_error::truncated;
1442 uint32_t NumCallsites;
1443 if (!GcovBuffer.readInt(NumCallsites))
1444 return sampleprof_error::truncated;
1446 FunctionSamples *FProfile = nullptr;
1447 if (InlineStack.size() == 0) {
1448 // If this is a top function that we have already processed, do not
1449 // update its profile again. This happens in the presence of
1450 // function aliases. Since these aliases share the same function
1451 // body, there will be identical replicated profiles for the
1452 // original function. In this case, we simply not bother updating
1453 // the profile of the original function.
1454 FProfile = &Profiles[Name];
1455 FProfile->addHeadSamples(HeadCount);
1456 if (FProfile->getTotalSamples() > 0)
1457 Update = false;
1458 } else {
1459 // Otherwise, we are reading an inlined instance. The top of the
1460 // inline stack contains the profile of the caller. Insert this
1461 // callee in the caller's CallsiteMap.
1462 FunctionSamples *CallerProfile = InlineStack.front();
1463 uint32_t LineOffset = Offset >> 16;
1464 uint32_t Discriminator = Offset & 0xffff;
1465 FProfile = &CallerProfile->functionSamplesAt(
1466 LineLocation(LineOffset, Discriminator))[std::string(Name)];
1468 FProfile->setName(Name);
1470 for (uint32_t I = 0; I < NumPosCounts; ++I) {
1471 uint32_t Offset;
1472 if (!GcovBuffer.readInt(Offset))
1473 return sampleprof_error::truncated;
1475 uint32_t NumTargets;
1476 if (!GcovBuffer.readInt(NumTargets))
1477 return sampleprof_error::truncated;
1479 uint64_t Count;
1480 if (!GcovBuffer.readInt64(Count))
1481 return sampleprof_error::truncated;
1483 // The line location is encoded in the offset as:
1484 // high 16 bits: line offset to the start of the function.
1485 // low 16 bits: discriminator.
1486 uint32_t LineOffset = Offset >> 16;
1487 uint32_t Discriminator = Offset & 0xffff;
1489 InlineCallStack NewStack;
1490 NewStack.push_back(FProfile);
1491 llvm::append_range(NewStack, InlineStack);
1492 if (Update) {
1493 // Walk up the inline stack, adding the samples on this line to
1494 // the total sample count of the callers in the chain.
1495 for (auto CallerProfile : NewStack)
1496 CallerProfile->addTotalSamples(Count);
1498 // Update the body samples for the current profile.
1499 FProfile->addBodySamples(LineOffset, Discriminator, Count);
1502 // Process the list of functions called at an indirect call site.
1503 // These are all the targets that a function pointer (or virtual
1504 // function) resolved at runtime.
1505 for (uint32_t J = 0; J < NumTargets; J++) {
1506 uint32_t HistVal;
1507 if (!GcovBuffer.readInt(HistVal))
1508 return sampleprof_error::truncated;
1510 if (HistVal != HIST_TYPE_INDIR_CALL_TOPN)
1511 return sampleprof_error::malformed;
1513 uint64_t TargetIdx;
1514 if (!GcovBuffer.readInt64(TargetIdx))
1515 return sampleprof_error::truncated;
1516 StringRef TargetName(Names[TargetIdx]);
1518 uint64_t TargetCount;
1519 if (!GcovBuffer.readInt64(TargetCount))
1520 return sampleprof_error::truncated;
1522 if (Update)
1523 FProfile->addCalledTargetSamples(LineOffset, Discriminator,
1524 TargetName, TargetCount);
1528 // Process all the inlined callers into the current function. These
1529 // are all the callsites that were inlined into this function.
1530 for (uint32_t I = 0; I < NumCallsites; I++) {
1531 // The offset is encoded as:
1532 // high 16 bits: line offset to the start of the function.
1533 // low 16 bits: discriminator.
1534 uint32_t Offset;
1535 if (!GcovBuffer.readInt(Offset))
1536 return sampleprof_error::truncated;
1537 InlineCallStack NewStack;
1538 NewStack.push_back(FProfile);
1539 llvm::append_range(NewStack, InlineStack);
1540 if (std::error_code EC = readOneFunctionProfile(NewStack, Update, Offset))
1541 return EC;
1544 return sampleprof_error::success;
1547 /// Read a GCC AutoFDO profile.
1549 /// This format is generated by the Linux Perf conversion tool at
1550 /// https://github.com/google/autofdo.
1551 std::error_code SampleProfileReaderGCC::readImpl() {
1552 assert(!ProfileIsFSDisciminator && "Gcc profiles not support FSDisciminator");
1553 // Read the string table.
1554 if (std::error_code EC = readNameTable())
1555 return EC;
1557 // Read the source profile.
1558 if (std::error_code EC = readFunctionProfiles())
1559 return EC;
1561 return sampleprof_error::success;
1564 bool SampleProfileReaderGCC::hasFormat(const MemoryBuffer &Buffer) {
1565 StringRef Magic(reinterpret_cast<const char *>(Buffer.getBufferStart()));
1566 return Magic == "adcg*704";
1569 void SampleProfileReaderItaniumRemapper::applyRemapping(LLVMContext &Ctx) {
1570 // If the reader uses MD5 to represent string, we can't remap it because
1571 // we don't know what the original function names were.
1572 if (Reader.useMD5()) {
1573 Ctx.diagnose(DiagnosticInfoSampleProfile(
1574 Reader.getBuffer()->getBufferIdentifier(),
1575 "Profile data remapping cannot be applied to profile data "
1576 "in compact format (original mangled names are not available).",
1577 DS_Warning));
1578 return;
1581 // CSSPGO-TODO: Remapper is not yet supported.
1582 // We will need to remap the entire context string.
1583 assert(Remappings && "should be initialized while creating remapper");
1584 for (auto &Sample : Reader.getProfiles()) {
1585 DenseSet<StringRef> NamesInSample;
1586 Sample.second.findAllNames(NamesInSample);
1587 for (auto &Name : NamesInSample)
1588 if (auto Key = Remappings->insert(Name))
1589 NameMap.insert({Key, Name});
1592 RemappingApplied = true;
1595 Optional<StringRef>
1596 SampleProfileReaderItaniumRemapper::lookUpNameInProfile(StringRef Fname) {
1597 if (auto Key = Remappings->lookup(Fname))
1598 return NameMap.lookup(Key);
1599 return None;
1602 /// Prepare a memory buffer for the contents of \p Filename.
1604 /// \returns an error code indicating the status of the buffer.
1605 static ErrorOr<std::unique_ptr<MemoryBuffer>>
1606 setupMemoryBuffer(const Twine &Filename) {
1607 auto BufferOrErr = MemoryBuffer::getFileOrSTDIN(Filename, /*IsText=*/true);
1608 if (std::error_code EC = BufferOrErr.getError())
1609 return EC;
1610 auto Buffer = std::move(BufferOrErr.get());
1612 // Sanity check the file.
1613 if (uint64_t(Buffer->getBufferSize()) > std::numeric_limits<uint32_t>::max())
1614 return sampleprof_error::too_large;
1616 return std::move(Buffer);
1619 /// Create a sample profile reader based on the format of the input file.
1621 /// \param Filename The file to open.
1623 /// \param C The LLVM context to use to emit diagnostics.
1625 /// \param P The FSDiscriminatorPass.
1627 /// \param RemapFilename The file used for profile remapping.
1629 /// \returns an error code indicating the status of the created reader.
1630 ErrorOr<std::unique_ptr<SampleProfileReader>>
1631 SampleProfileReader::create(const std::string Filename, LLVMContext &C,
1632 FSDiscriminatorPass P,
1633 const std::string RemapFilename) {
1634 auto BufferOrError = setupMemoryBuffer(Filename);
1635 if (std::error_code EC = BufferOrError.getError())
1636 return EC;
1637 return create(BufferOrError.get(), C, P, RemapFilename);
1640 /// Create a sample profile remapper from the given input, to remap the
1641 /// function names in the given profile data.
1643 /// \param Filename The file to open.
1645 /// \param Reader The profile reader the remapper is going to be applied to.
1647 /// \param C The LLVM context to use to emit diagnostics.
1649 /// \returns an error code indicating the status of the created reader.
1650 ErrorOr<std::unique_ptr<SampleProfileReaderItaniumRemapper>>
1651 SampleProfileReaderItaniumRemapper::create(const std::string Filename,
1652 SampleProfileReader &Reader,
1653 LLVMContext &C) {
1654 auto BufferOrError = setupMemoryBuffer(Filename);
1655 if (std::error_code EC = BufferOrError.getError())
1656 return EC;
1657 return create(BufferOrError.get(), Reader, C);
1660 /// Create a sample profile remapper from the given input, to remap the
1661 /// function names in the given profile data.
1663 /// \param B The memory buffer to create the reader from (assumes ownership).
1665 /// \param C The LLVM context to use to emit diagnostics.
1667 /// \param Reader The profile reader the remapper is going to be applied to.
1669 /// \returns an error code indicating the status of the created reader.
1670 ErrorOr<std::unique_ptr<SampleProfileReaderItaniumRemapper>>
1671 SampleProfileReaderItaniumRemapper::create(std::unique_ptr<MemoryBuffer> &B,
1672 SampleProfileReader &Reader,
1673 LLVMContext &C) {
1674 auto Remappings = std::make_unique<SymbolRemappingReader>();
1675 if (Error E = Remappings->read(*B.get())) {
1676 handleAllErrors(
1677 std::move(E), [&](const SymbolRemappingParseError &ParseError) {
1678 C.diagnose(DiagnosticInfoSampleProfile(B->getBufferIdentifier(),
1679 ParseError.getLineNum(),
1680 ParseError.getMessage()));
1682 return sampleprof_error::malformed;
1685 return std::make_unique<SampleProfileReaderItaniumRemapper>(
1686 std::move(B), std::move(Remappings), Reader);
1689 /// Create a sample profile reader based on the format of the input data.
1691 /// \param B The memory buffer to create the reader from (assumes ownership).
1693 /// \param C The LLVM context to use to emit diagnostics.
1695 /// \param P The FSDiscriminatorPass.
1697 /// \param RemapFilename The file used for profile remapping.
1699 /// \returns an error code indicating the status of the created reader.
1700 ErrorOr<std::unique_ptr<SampleProfileReader>>
1701 SampleProfileReader::create(std::unique_ptr<MemoryBuffer> &B, LLVMContext &C,
1702 FSDiscriminatorPass P,
1703 const std::string RemapFilename) {
1704 std::unique_ptr<SampleProfileReader> Reader;
1705 if (SampleProfileReaderRawBinary::hasFormat(*B))
1706 Reader.reset(new SampleProfileReaderRawBinary(std::move(B), C));
1707 else if (SampleProfileReaderExtBinary::hasFormat(*B))
1708 Reader.reset(new SampleProfileReaderExtBinary(std::move(B), C));
1709 else if (SampleProfileReaderCompactBinary::hasFormat(*B))
1710 Reader.reset(new SampleProfileReaderCompactBinary(std::move(B), C));
1711 else if (SampleProfileReaderGCC::hasFormat(*B))
1712 Reader.reset(new SampleProfileReaderGCC(std::move(B), C));
1713 else if (SampleProfileReaderText::hasFormat(*B))
1714 Reader.reset(new SampleProfileReaderText(std::move(B), C));
1715 else
1716 return sampleprof_error::unrecognized_format;
1718 if (!RemapFilename.empty()) {
1719 auto ReaderOrErr =
1720 SampleProfileReaderItaniumRemapper::create(RemapFilename, *Reader, C);
1721 if (std::error_code EC = ReaderOrErr.getError()) {
1722 std::string Msg = "Could not create remapper: " + EC.message();
1723 C.diagnose(DiagnosticInfoSampleProfile(RemapFilename, Msg));
1724 return EC;
1726 Reader->Remapper = std::move(ReaderOrErr.get());
1729 FunctionSamples::Format = Reader->getFormat();
1730 if (std::error_code EC = Reader->readHeader()) {
1731 return EC;
1734 Reader->setDiscriminatorMaskedBitFrom(P);
1736 return std::move(Reader);
1739 // For text and GCC file formats, we compute the summary after reading the
1740 // profile. Binary format has the profile summary in its header.
1741 void SampleProfileReader::computeSummary() {
1742 SampleProfileSummaryBuilder Builder(ProfileSummaryBuilder::DefaultCutoffs);
1743 Summary = Builder.computeSummaryForProfiles(Profiles);