[AMDGPU][AsmParser][NFC] Translate parsed MIMG instructions to MCInsts automatically.
[llvm-project.git] / llvm / lib / ProfileData / InstrProf.cpp
blob5ce474b3bf583c29c926afbc22c22cede77f4092
1 //===- InstrProf.cpp - Instrumented profiling format support --------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file contains support for clang's instrumentation based PGO and
10 // coverage.
12 //===----------------------------------------------------------------------===//
14 #include "llvm/ProfileData/InstrProf.h"
15 #include "llvm/ADT/ArrayRef.h"
16 #include "llvm/ADT/SetVector.h"
17 #include "llvm/ADT/SmallString.h"
18 #include "llvm/ADT/SmallVector.h"
19 #include "llvm/ADT/StringExtras.h"
20 #include "llvm/ADT/StringRef.h"
21 #include "llvm/Config/config.h"
22 #include "llvm/IR/Constant.h"
23 #include "llvm/IR/Constants.h"
24 #include "llvm/IR/Function.h"
25 #include "llvm/IR/GlobalValue.h"
26 #include "llvm/IR/GlobalVariable.h"
27 #include "llvm/IR/Instruction.h"
28 #include "llvm/IR/LLVMContext.h"
29 #include "llvm/IR/MDBuilder.h"
30 #include "llvm/IR/Metadata.h"
31 #include "llvm/IR/Module.h"
32 #include "llvm/IR/Type.h"
33 #include "llvm/ProfileData/InstrProfReader.h"
34 #include "llvm/Support/Casting.h"
35 #include "llvm/Support/CommandLine.h"
36 #include "llvm/Support/Compiler.h"
37 #include "llvm/Support/Compression.h"
38 #include "llvm/Support/Endian.h"
39 #include "llvm/Support/Error.h"
40 #include "llvm/Support/ErrorHandling.h"
41 #include "llvm/Support/LEB128.h"
42 #include "llvm/Support/MathExtras.h"
43 #include "llvm/Support/Path.h"
44 #include "llvm/Support/SwapByteOrder.h"
45 #include "llvm/Support/VirtualFileSystem.h"
46 #include "llvm/TargetParser/Triple.h"
47 #include <algorithm>
48 #include <cassert>
49 #include <cstddef>
50 #include <cstdint>
51 #include <cstring>
52 #include <memory>
53 #include <string>
54 #include <system_error>
55 #include <type_traits>
56 #include <utility>
57 #include <vector>
59 using namespace llvm;
61 static cl::opt<bool> StaticFuncFullModulePrefix(
62 "static-func-full-module-prefix", cl::init(true), cl::Hidden,
63 cl::desc("Use full module build paths in the profile counter names for "
64 "static functions."));
66 // This option is tailored to users that have different top-level directory in
67 // profile-gen and profile-use compilation. Users need to specific the number
68 // of levels to strip. A value larger than the number of directories in the
69 // source file will strip all the directory names and only leave the basename.
71 // Note current ThinLTO module importing for the indirect-calls assumes
72 // the source directory name not being stripped. A non-zero option value here
73 // can potentially prevent some inter-module indirect-call-promotions.
74 static cl::opt<unsigned> StaticFuncStripDirNamePrefix(
75 "static-func-strip-dirname-prefix", cl::init(0), cl::Hidden,
76 cl::desc("Strip specified level of directory name from source path in "
77 "the profile counter name for static functions."));
79 static std::string getInstrProfErrString(instrprof_error Err,
80 const std::string &ErrMsg = "") {
81 std::string Msg;
82 raw_string_ostream OS(Msg);
84 switch (Err) {
85 case instrprof_error::success:
86 OS << "success";
87 break;
88 case instrprof_error::eof:
89 OS << "end of File";
90 break;
91 case instrprof_error::unrecognized_format:
92 OS << "unrecognized instrumentation profile encoding format";
93 break;
94 case instrprof_error::bad_magic:
95 OS << "invalid instrumentation profile data (bad magic)";
96 break;
97 case instrprof_error::bad_header:
98 OS << "invalid instrumentation profile data (file header is corrupt)";
99 break;
100 case instrprof_error::unsupported_version:
101 OS << "unsupported instrumentation profile format version";
102 break;
103 case instrprof_error::unsupported_hash_type:
104 OS << "unsupported instrumentation profile hash type";
105 break;
106 case instrprof_error::too_large:
107 OS << "too much profile data";
108 break;
109 case instrprof_error::truncated:
110 OS << "truncated profile data";
111 break;
112 case instrprof_error::malformed:
113 OS << "malformed instrumentation profile data";
114 break;
115 case instrprof_error::missing_debug_info_for_correlation:
116 OS << "debug info for correlation is required";
117 break;
118 case instrprof_error::unexpected_debug_info_for_correlation:
119 OS << "debug info for correlation is not necessary";
120 break;
121 case instrprof_error::unable_to_correlate_profile:
122 OS << "unable to correlate profile";
123 break;
124 case instrprof_error::invalid_prof:
125 OS << "invalid profile created. Please file a bug "
126 "at: " BUG_REPORT_URL
127 " and include the profraw files that caused this error.";
128 break;
129 case instrprof_error::unknown_function:
130 OS << "no profile data available for function";
131 break;
132 case instrprof_error::hash_mismatch:
133 OS << "function control flow change detected (hash mismatch)";
134 break;
135 case instrprof_error::count_mismatch:
136 OS << "function basic block count change detected (counter mismatch)";
137 break;
138 case instrprof_error::counter_overflow:
139 OS << "counter overflow";
140 break;
141 case instrprof_error::value_site_count_mismatch:
142 OS << "function value site count change detected (counter mismatch)";
143 break;
144 case instrprof_error::compress_failed:
145 OS << "failed to compress data (zlib)";
146 break;
147 case instrprof_error::uncompress_failed:
148 OS << "failed to uncompress data (zlib)";
149 break;
150 case instrprof_error::empty_raw_profile:
151 OS << "empty raw profile file";
152 break;
153 case instrprof_error::zlib_unavailable:
154 OS << "profile uses zlib compression but the profile reader was built "
155 "without zlib support";
156 break;
157 case instrprof_error::raw_profile_version_mismatch:
158 OS << "raw profile version mismatch";
159 break;
162 // If optional error message is not empty, append it to the message.
163 if (!ErrMsg.empty())
164 OS << ": " << ErrMsg;
166 return OS.str();
169 namespace {
171 // FIXME: This class is only here to support the transition to llvm::Error. It
172 // will be removed once this transition is complete. Clients should prefer to
173 // deal with the Error value directly, rather than converting to error_code.
174 class InstrProfErrorCategoryType : public std::error_category {
175 const char *name() const noexcept override { return "llvm.instrprof"; }
177 std::string message(int IE) const override {
178 return getInstrProfErrString(static_cast<instrprof_error>(IE));
182 } // end anonymous namespace
184 const std::error_category &llvm::instrprof_category() {
185 static InstrProfErrorCategoryType ErrorCategory;
186 return ErrorCategory;
189 namespace {
191 const char *InstrProfSectNameCommon[] = {
192 #define INSTR_PROF_SECT_ENTRY(Kind, SectNameCommon, SectNameCoff, Prefix) \
193 SectNameCommon,
194 #include "llvm/ProfileData/InstrProfData.inc"
197 const char *InstrProfSectNameCoff[] = {
198 #define INSTR_PROF_SECT_ENTRY(Kind, SectNameCommon, SectNameCoff, Prefix) \
199 SectNameCoff,
200 #include "llvm/ProfileData/InstrProfData.inc"
203 const char *InstrProfSectNamePrefix[] = {
204 #define INSTR_PROF_SECT_ENTRY(Kind, SectNameCommon, SectNameCoff, Prefix) \
205 Prefix,
206 #include "llvm/ProfileData/InstrProfData.inc"
209 } // namespace
211 namespace llvm {
213 cl::opt<bool> DoInstrProfNameCompression(
214 "enable-name-compression",
215 cl::desc("Enable name/filename string compression"), cl::init(true));
217 std::string getInstrProfSectionName(InstrProfSectKind IPSK,
218 Triple::ObjectFormatType OF,
219 bool AddSegmentInfo) {
220 std::string SectName;
222 if (OF == Triple::MachO && AddSegmentInfo)
223 SectName = InstrProfSectNamePrefix[IPSK];
225 if (OF == Triple::COFF)
226 SectName += InstrProfSectNameCoff[IPSK];
227 else
228 SectName += InstrProfSectNameCommon[IPSK];
230 if (OF == Triple::MachO && IPSK == IPSK_data && AddSegmentInfo)
231 SectName += ",regular,live_support";
233 return SectName;
236 std::string InstrProfError::message() const {
237 return getInstrProfErrString(Err, Msg);
240 char InstrProfError::ID = 0;
242 std::string getPGOFuncName(StringRef RawFuncName,
243 GlobalValue::LinkageTypes Linkage,
244 StringRef FileName,
245 uint64_t Version LLVM_ATTRIBUTE_UNUSED) {
246 return GlobalValue::getGlobalIdentifier(RawFuncName, Linkage, FileName);
249 // Strip NumPrefix level of directory name from PathNameStr. If the number of
250 // directory separators is less than NumPrefix, strip all the directories and
251 // leave base file name only.
252 static StringRef stripDirPrefix(StringRef PathNameStr, uint32_t NumPrefix) {
253 uint32_t Count = NumPrefix;
254 uint32_t Pos = 0, LastPos = 0;
255 for (auto & CI : PathNameStr) {
256 ++Pos;
257 if (llvm::sys::path::is_separator(CI)) {
258 LastPos = Pos;
259 --Count;
261 if (Count == 0)
262 break;
264 return PathNameStr.substr(LastPos);
267 // Return the PGOFuncName. This function has some special handling when called
268 // in LTO optimization. The following only applies when calling in LTO passes
269 // (when \c InLTO is true): LTO's internalization privatizes many global linkage
270 // symbols. This happens after value profile annotation, but those internal
271 // linkage functions should not have a source prefix.
272 // Additionally, for ThinLTO mode, exported internal functions are promoted
273 // and renamed. We need to ensure that the original internal PGO name is
274 // used when computing the GUID that is compared against the profiled GUIDs.
275 // To differentiate compiler generated internal symbols from original ones,
276 // PGOFuncName meta data are created and attached to the original internal
277 // symbols in the value profile annotation step
278 // (PGOUseFunc::annotateIndirectCallSites). If a symbol does not have the meta
279 // data, its original linkage must be non-internal.
280 std::string getPGOFuncName(const Function &F, bool InLTO, uint64_t Version) {
281 if (!InLTO) {
282 StringRef FileName(F.getParent()->getSourceFileName());
283 uint32_t StripLevel = StaticFuncFullModulePrefix ? 0 : (uint32_t)-1;
284 if (StripLevel < StaticFuncStripDirNamePrefix)
285 StripLevel = StaticFuncStripDirNamePrefix;
286 if (StripLevel)
287 FileName = stripDirPrefix(FileName, StripLevel);
288 return getPGOFuncName(F.getName(), F.getLinkage(), FileName, Version);
291 // In LTO mode (when InLTO is true), first check if there is a meta data.
292 if (MDNode *MD = getPGOFuncNameMetadata(F)) {
293 StringRef S = cast<MDString>(MD->getOperand(0))->getString();
294 return S.str();
297 // If there is no meta data, the function must be a global before the value
298 // profile annotation pass. Its current linkage may be internal if it is
299 // internalized in LTO mode.
300 return getPGOFuncName(F.getName(), GlobalValue::ExternalLinkage, "");
303 StringRef getFuncNameWithoutPrefix(StringRef PGOFuncName, StringRef FileName) {
304 if (FileName.empty())
305 return PGOFuncName;
306 // Drop the file name including ':'. See also getPGOFuncName.
307 if (PGOFuncName.startswith(FileName))
308 PGOFuncName = PGOFuncName.drop_front(FileName.size() + 1);
309 return PGOFuncName;
312 // \p FuncName is the string used as profile lookup key for the function. A
313 // symbol is created to hold the name. Return the legalized symbol name.
314 std::string getPGOFuncNameVarName(StringRef FuncName,
315 GlobalValue::LinkageTypes Linkage) {
316 std::string VarName = std::string(getInstrProfNameVarPrefix());
317 VarName += FuncName;
319 if (!GlobalValue::isLocalLinkage(Linkage))
320 return VarName;
322 // Now fix up illegal chars in local VarName that may upset the assembler.
323 const char *InvalidChars = "-:<>/\"'";
324 size_t found = VarName.find_first_of(InvalidChars);
325 while (found != std::string::npos) {
326 VarName[found] = '_';
327 found = VarName.find_first_of(InvalidChars, found + 1);
329 return VarName;
332 GlobalVariable *createPGOFuncNameVar(Module &M,
333 GlobalValue::LinkageTypes Linkage,
334 StringRef PGOFuncName) {
335 // We generally want to match the function's linkage, but available_externally
336 // and extern_weak both have the wrong semantics, and anything that doesn't
337 // need to link across compilation units doesn't need to be visible at all.
338 if (Linkage == GlobalValue::ExternalWeakLinkage)
339 Linkage = GlobalValue::LinkOnceAnyLinkage;
340 else if (Linkage == GlobalValue::AvailableExternallyLinkage)
341 Linkage = GlobalValue::LinkOnceODRLinkage;
342 else if (Linkage == GlobalValue::InternalLinkage ||
343 Linkage == GlobalValue::ExternalLinkage)
344 Linkage = GlobalValue::PrivateLinkage;
346 auto *Value =
347 ConstantDataArray::getString(M.getContext(), PGOFuncName, false);
348 auto FuncNameVar =
349 new GlobalVariable(M, Value->getType(), true, Linkage, Value,
350 getPGOFuncNameVarName(PGOFuncName, Linkage));
352 // Hide the symbol so that we correctly get a copy for each executable.
353 if (!GlobalValue::isLocalLinkage(FuncNameVar->getLinkage()))
354 FuncNameVar->setVisibility(GlobalValue::HiddenVisibility);
356 return FuncNameVar;
359 GlobalVariable *createPGOFuncNameVar(Function &F, StringRef PGOFuncName) {
360 return createPGOFuncNameVar(*F.getParent(), F.getLinkage(), PGOFuncName);
363 Error InstrProfSymtab::create(Module &M, bool InLTO) {
364 for (Function &F : M) {
365 // Function may not have a name: like using asm("") to overwrite the name.
366 // Ignore in this case.
367 if (!F.hasName())
368 continue;
369 const std::string &PGOFuncName = getPGOFuncName(F, InLTO);
370 if (Error E = addFuncName(PGOFuncName))
371 return E;
372 MD5FuncMap.emplace_back(Function::getGUID(PGOFuncName), &F);
373 // In ThinLTO, local function may have been promoted to global and have
374 // suffix ".llvm." added to the function name. We need to add the
375 // stripped function name to the symbol table so that we can find a match
376 // from profile.
378 // We may have other suffixes similar as ".llvm." which are needed to
379 // be stripped before the matching, but ".__uniq." suffix which is used
380 // to differentiate internal linkage functions in different modules
381 // should be kept. Now this is the only suffix with the pattern ".xxx"
382 // which is kept before matching.
383 const std::string UniqSuffix = ".__uniq.";
384 auto pos = PGOFuncName.find(UniqSuffix);
385 // Search '.' after ".__uniq." if ".__uniq." exists, otherwise
386 // search '.' from the beginning.
387 if (pos != std::string::npos)
388 pos += UniqSuffix.length();
389 else
390 pos = 0;
391 pos = PGOFuncName.find('.', pos);
392 if (pos != std::string::npos && pos != 0) {
393 const std::string &OtherFuncName = PGOFuncName.substr(0, pos);
394 if (Error E = addFuncName(OtherFuncName))
395 return E;
396 MD5FuncMap.emplace_back(Function::getGUID(OtherFuncName), &F);
399 Sorted = false;
400 finalizeSymtab();
401 return Error::success();
404 uint64_t InstrProfSymtab::getFunctionHashFromAddress(uint64_t Address) {
405 finalizeSymtab();
406 auto It = partition_point(AddrToMD5Map, [=](std::pair<uint64_t, uint64_t> A) {
407 return A.first < Address;
409 // Raw function pointer collected by value profiler may be from
410 // external functions that are not instrumented. They won't have
411 // mapping data to be used by the deserializer. Force the value to
412 // be 0 in this case.
413 if (It != AddrToMD5Map.end() && It->first == Address)
414 return (uint64_t)It->second;
415 return 0;
418 Error collectPGOFuncNameStrings(ArrayRef<std::string> NameStrs,
419 bool doCompression, std::string &Result) {
420 assert(!NameStrs.empty() && "No name data to emit");
422 uint8_t Header[16], *P = Header;
423 std::string UncompressedNameStrings =
424 join(NameStrs.begin(), NameStrs.end(), getInstrProfNameSeparator());
426 assert(StringRef(UncompressedNameStrings)
427 .count(getInstrProfNameSeparator()) == (NameStrs.size() - 1) &&
428 "PGO name is invalid (contains separator token)");
430 unsigned EncLen = encodeULEB128(UncompressedNameStrings.length(), P);
431 P += EncLen;
433 auto WriteStringToResult = [&](size_t CompressedLen, StringRef InputStr) {
434 EncLen = encodeULEB128(CompressedLen, P);
435 P += EncLen;
436 char *HeaderStr = reinterpret_cast<char *>(&Header[0]);
437 unsigned HeaderLen = P - &Header[0];
438 Result.append(HeaderStr, HeaderLen);
439 Result += InputStr;
440 return Error::success();
443 if (!doCompression) {
444 return WriteStringToResult(0, UncompressedNameStrings);
447 SmallVector<uint8_t, 128> CompressedNameStrings;
448 compression::zlib::compress(arrayRefFromStringRef(UncompressedNameStrings),
449 CompressedNameStrings,
450 compression::zlib::BestSizeCompression);
452 return WriteStringToResult(CompressedNameStrings.size(),
453 toStringRef(CompressedNameStrings));
456 StringRef getPGOFuncNameVarInitializer(GlobalVariable *NameVar) {
457 auto *Arr = cast<ConstantDataArray>(NameVar->getInitializer());
458 StringRef NameStr =
459 Arr->isCString() ? Arr->getAsCString() : Arr->getAsString();
460 return NameStr;
463 Error collectPGOFuncNameStrings(ArrayRef<GlobalVariable *> NameVars,
464 std::string &Result, bool doCompression) {
465 std::vector<std::string> NameStrs;
466 for (auto *NameVar : NameVars) {
467 NameStrs.push_back(std::string(getPGOFuncNameVarInitializer(NameVar)));
469 return collectPGOFuncNameStrings(
470 NameStrs, compression::zlib::isAvailable() && doCompression, Result);
473 Error readPGOFuncNameStrings(StringRef NameStrings, InstrProfSymtab &Symtab) {
474 const uint8_t *P = NameStrings.bytes_begin();
475 const uint8_t *EndP = NameStrings.bytes_end();
476 while (P < EndP) {
477 uint32_t N;
478 uint64_t UncompressedSize = decodeULEB128(P, &N);
479 P += N;
480 uint64_t CompressedSize = decodeULEB128(P, &N);
481 P += N;
482 bool isCompressed = (CompressedSize != 0);
483 SmallVector<uint8_t, 128> UncompressedNameStrings;
484 StringRef NameStrings;
485 if (isCompressed) {
486 if (!llvm::compression::zlib::isAvailable())
487 return make_error<InstrProfError>(instrprof_error::zlib_unavailable);
489 if (Error E = compression::zlib::decompress(ArrayRef(P, CompressedSize),
490 UncompressedNameStrings,
491 UncompressedSize)) {
492 consumeError(std::move(E));
493 return make_error<InstrProfError>(instrprof_error::uncompress_failed);
495 P += CompressedSize;
496 NameStrings = toStringRef(UncompressedNameStrings);
497 } else {
498 NameStrings =
499 StringRef(reinterpret_cast<const char *>(P), UncompressedSize);
500 P += UncompressedSize;
502 // Now parse the name strings.
503 SmallVector<StringRef, 0> Names;
504 NameStrings.split(Names, getInstrProfNameSeparator());
505 for (StringRef &Name : Names)
506 if (Error E = Symtab.addFuncName(Name))
507 return E;
509 while (P < EndP && *P == 0)
510 P++;
512 return Error::success();
515 void InstrProfRecord::accumulateCounts(CountSumOrPercent &Sum) const {
516 uint64_t FuncSum = 0;
517 Sum.NumEntries += Counts.size();
518 for (uint64_t Count : Counts)
519 FuncSum += Count;
520 Sum.CountSum += FuncSum;
522 for (uint32_t VK = IPVK_First; VK <= IPVK_Last; ++VK) {
523 uint64_t KindSum = 0;
524 uint32_t NumValueSites = getNumValueSites(VK);
525 for (size_t I = 0; I < NumValueSites; ++I) {
526 uint32_t NV = getNumValueDataForSite(VK, I);
527 std::unique_ptr<InstrProfValueData[]> VD = getValueForSite(VK, I);
528 for (uint32_t V = 0; V < NV; V++)
529 KindSum += VD[V].Count;
531 Sum.ValueCounts[VK] += KindSum;
535 void InstrProfValueSiteRecord::overlap(InstrProfValueSiteRecord &Input,
536 uint32_t ValueKind,
537 OverlapStats &Overlap,
538 OverlapStats &FuncLevelOverlap) {
539 this->sortByTargetValues();
540 Input.sortByTargetValues();
541 double Score = 0.0f, FuncLevelScore = 0.0f;
542 auto I = ValueData.begin();
543 auto IE = ValueData.end();
544 auto J = Input.ValueData.begin();
545 auto JE = Input.ValueData.end();
546 while (I != IE && J != JE) {
547 if (I->Value == J->Value) {
548 Score += OverlapStats::score(I->Count, J->Count,
549 Overlap.Base.ValueCounts[ValueKind],
550 Overlap.Test.ValueCounts[ValueKind]);
551 FuncLevelScore += OverlapStats::score(
552 I->Count, J->Count, FuncLevelOverlap.Base.ValueCounts[ValueKind],
553 FuncLevelOverlap.Test.ValueCounts[ValueKind]);
554 ++I;
555 } else if (I->Value < J->Value) {
556 ++I;
557 continue;
559 ++J;
561 Overlap.Overlap.ValueCounts[ValueKind] += Score;
562 FuncLevelOverlap.Overlap.ValueCounts[ValueKind] += FuncLevelScore;
565 // Return false on mismatch.
566 void InstrProfRecord::overlapValueProfData(uint32_t ValueKind,
567 InstrProfRecord &Other,
568 OverlapStats &Overlap,
569 OverlapStats &FuncLevelOverlap) {
570 uint32_t ThisNumValueSites = getNumValueSites(ValueKind);
571 assert(ThisNumValueSites == Other.getNumValueSites(ValueKind));
572 if (!ThisNumValueSites)
573 return;
575 std::vector<InstrProfValueSiteRecord> &ThisSiteRecords =
576 getOrCreateValueSitesForKind(ValueKind);
577 MutableArrayRef<InstrProfValueSiteRecord> OtherSiteRecords =
578 Other.getValueSitesForKind(ValueKind);
579 for (uint32_t I = 0; I < ThisNumValueSites; I++)
580 ThisSiteRecords[I].overlap(OtherSiteRecords[I], ValueKind, Overlap,
581 FuncLevelOverlap);
584 void InstrProfRecord::overlap(InstrProfRecord &Other, OverlapStats &Overlap,
585 OverlapStats &FuncLevelOverlap,
586 uint64_t ValueCutoff) {
587 // FuncLevel CountSum for other should already computed and nonzero.
588 assert(FuncLevelOverlap.Test.CountSum >= 1.0f);
589 accumulateCounts(FuncLevelOverlap.Base);
590 bool Mismatch = (Counts.size() != Other.Counts.size());
592 // Check if the value profiles mismatch.
593 if (!Mismatch) {
594 for (uint32_t Kind = IPVK_First; Kind <= IPVK_Last; ++Kind) {
595 uint32_t ThisNumValueSites = getNumValueSites(Kind);
596 uint32_t OtherNumValueSites = Other.getNumValueSites(Kind);
597 if (ThisNumValueSites != OtherNumValueSites) {
598 Mismatch = true;
599 break;
603 if (Mismatch) {
604 Overlap.addOneMismatch(FuncLevelOverlap.Test);
605 return;
608 // Compute overlap for value counts.
609 for (uint32_t Kind = IPVK_First; Kind <= IPVK_Last; ++Kind)
610 overlapValueProfData(Kind, Other, Overlap, FuncLevelOverlap);
612 double Score = 0.0;
613 uint64_t MaxCount = 0;
614 // Compute overlap for edge counts.
615 for (size_t I = 0, E = Other.Counts.size(); I < E; ++I) {
616 Score += OverlapStats::score(Counts[I], Other.Counts[I],
617 Overlap.Base.CountSum, Overlap.Test.CountSum);
618 MaxCount = std::max(Other.Counts[I], MaxCount);
620 Overlap.Overlap.CountSum += Score;
621 Overlap.Overlap.NumEntries += 1;
623 if (MaxCount >= ValueCutoff) {
624 double FuncScore = 0.0;
625 for (size_t I = 0, E = Other.Counts.size(); I < E; ++I)
626 FuncScore += OverlapStats::score(Counts[I], Other.Counts[I],
627 FuncLevelOverlap.Base.CountSum,
628 FuncLevelOverlap.Test.CountSum);
629 FuncLevelOverlap.Overlap.CountSum = FuncScore;
630 FuncLevelOverlap.Overlap.NumEntries = Other.Counts.size();
631 FuncLevelOverlap.Valid = true;
635 void InstrProfValueSiteRecord::merge(InstrProfValueSiteRecord &Input,
636 uint64_t Weight,
637 function_ref<void(instrprof_error)> Warn) {
638 this->sortByTargetValues();
639 Input.sortByTargetValues();
640 auto I = ValueData.begin();
641 auto IE = ValueData.end();
642 for (const InstrProfValueData &J : Input.ValueData) {
643 while (I != IE && I->Value < J.Value)
644 ++I;
645 if (I != IE && I->Value == J.Value) {
646 bool Overflowed;
647 I->Count = SaturatingMultiplyAdd(J.Count, Weight, I->Count, &Overflowed);
648 if (Overflowed)
649 Warn(instrprof_error::counter_overflow);
650 ++I;
651 continue;
653 ValueData.insert(I, J);
657 void InstrProfValueSiteRecord::scale(uint64_t N, uint64_t D,
658 function_ref<void(instrprof_error)> Warn) {
659 for (InstrProfValueData &I : ValueData) {
660 bool Overflowed;
661 I.Count = SaturatingMultiply(I.Count, N, &Overflowed) / D;
662 if (Overflowed)
663 Warn(instrprof_error::counter_overflow);
667 // Merge Value Profile data from Src record to this record for ValueKind.
668 // Scale merged value counts by \p Weight.
669 void InstrProfRecord::mergeValueProfData(
670 uint32_t ValueKind, InstrProfRecord &Src, uint64_t Weight,
671 function_ref<void(instrprof_error)> Warn) {
672 uint32_t ThisNumValueSites = getNumValueSites(ValueKind);
673 uint32_t OtherNumValueSites = Src.getNumValueSites(ValueKind);
674 if (ThisNumValueSites != OtherNumValueSites) {
675 Warn(instrprof_error::value_site_count_mismatch);
676 return;
678 if (!ThisNumValueSites)
679 return;
680 std::vector<InstrProfValueSiteRecord> &ThisSiteRecords =
681 getOrCreateValueSitesForKind(ValueKind);
682 MutableArrayRef<InstrProfValueSiteRecord> OtherSiteRecords =
683 Src.getValueSitesForKind(ValueKind);
684 for (uint32_t I = 0; I < ThisNumValueSites; I++)
685 ThisSiteRecords[I].merge(OtherSiteRecords[I], Weight, Warn);
688 void InstrProfRecord::merge(InstrProfRecord &Other, uint64_t Weight,
689 function_ref<void(instrprof_error)> Warn) {
690 // If the number of counters doesn't match we either have bad data
691 // or a hash collision.
692 if (Counts.size() != Other.Counts.size()) {
693 Warn(instrprof_error::count_mismatch);
694 return;
697 // Special handling of the first count as the PseudoCount.
698 CountPseudoKind OtherKind = Other.getCountPseudoKind();
699 CountPseudoKind ThisKind = getCountPseudoKind();
700 if (OtherKind != NotPseudo || ThisKind != NotPseudo) {
701 // We don't allow the merge of a profile with pseudo counts and
702 // a normal profile (i.e. without pesudo counts).
703 // Profile supplimenation should be done after the profile merge.
704 if (OtherKind == NotPseudo || ThisKind == NotPseudo) {
705 Warn(instrprof_error::count_mismatch);
706 return;
708 if (OtherKind == PseudoHot || ThisKind == PseudoHot)
709 setPseudoCount(PseudoHot);
710 else
711 setPseudoCount(PseudoWarm);
712 return;
715 for (size_t I = 0, E = Other.Counts.size(); I < E; ++I) {
716 bool Overflowed;
717 uint64_t Value =
718 SaturatingMultiplyAdd(Other.Counts[I], Weight, Counts[I], &Overflowed);
719 if (Value > getInstrMaxCountValue()) {
720 Value = getInstrMaxCountValue();
721 Overflowed = true;
723 Counts[I] = Value;
724 if (Overflowed)
725 Warn(instrprof_error::counter_overflow);
728 for (uint32_t Kind = IPVK_First; Kind <= IPVK_Last; ++Kind)
729 mergeValueProfData(Kind, Other, Weight, Warn);
732 void InstrProfRecord::scaleValueProfData(
733 uint32_t ValueKind, uint64_t N, uint64_t D,
734 function_ref<void(instrprof_error)> Warn) {
735 for (auto &R : getValueSitesForKind(ValueKind))
736 R.scale(N, D, Warn);
739 void InstrProfRecord::scale(uint64_t N, uint64_t D,
740 function_ref<void(instrprof_error)> Warn) {
741 assert(D != 0 && "D cannot be 0");
742 for (auto &Count : this->Counts) {
743 bool Overflowed;
744 Count = SaturatingMultiply(Count, N, &Overflowed) / D;
745 if (Count > getInstrMaxCountValue()) {
746 Count = getInstrMaxCountValue();
747 Overflowed = true;
749 if (Overflowed)
750 Warn(instrprof_error::counter_overflow);
752 for (uint32_t Kind = IPVK_First; Kind <= IPVK_Last; ++Kind)
753 scaleValueProfData(Kind, N, D, Warn);
756 // Map indirect call target name hash to name string.
757 uint64_t InstrProfRecord::remapValue(uint64_t Value, uint32_t ValueKind,
758 InstrProfSymtab *SymTab) {
759 if (!SymTab)
760 return Value;
762 if (ValueKind == IPVK_IndirectCallTarget)
763 return SymTab->getFunctionHashFromAddress(Value);
765 return Value;
768 void InstrProfRecord::addValueData(uint32_t ValueKind, uint32_t Site,
769 InstrProfValueData *VData, uint32_t N,
770 InstrProfSymtab *ValueMap) {
771 for (uint32_t I = 0; I < N; I++) {
772 VData[I].Value = remapValue(VData[I].Value, ValueKind, ValueMap);
774 std::vector<InstrProfValueSiteRecord> &ValueSites =
775 getOrCreateValueSitesForKind(ValueKind);
776 if (N == 0)
777 ValueSites.emplace_back();
778 else
779 ValueSites.emplace_back(VData, VData + N);
782 std::vector<BPFunctionNode> TemporalProfTraceTy::createBPFunctionNodes(
783 ArrayRef<TemporalProfTraceTy> Traces) {
784 using IDT = BPFunctionNode::IDT;
785 using UtilityNodeT = BPFunctionNode::UtilityNodeT;
786 // Collect all function IDs ordered by their smallest timestamp. This will be
787 // used as the initial FunctionNode order.
788 SetVector<IDT> FunctionIds;
789 size_t LargestTraceSize = 0;
790 for (auto &Trace : Traces)
791 LargestTraceSize =
792 std::max(LargestTraceSize, Trace.FunctionNameRefs.size());
793 for (size_t Timestamp = 0; Timestamp < LargestTraceSize; Timestamp++)
794 for (auto &Trace : Traces)
795 if (Timestamp < Trace.FunctionNameRefs.size())
796 FunctionIds.insert(Trace.FunctionNameRefs[Timestamp]);
798 int N = std::ceil(std::log2(LargestTraceSize));
800 // TODO: We need to use the Trace.Weight field to give more weight to more
801 // important utilities
802 DenseMap<IDT, SmallVector<UtilityNodeT, 4>> FuncGroups;
803 for (size_t TraceIdx = 0; TraceIdx < Traces.size(); TraceIdx++) {
804 auto &Trace = Traces[TraceIdx].FunctionNameRefs;
805 for (size_t Timestamp = 0; Timestamp < Trace.size(); Timestamp++) {
806 for (int I = std::floor(std::log2(Timestamp + 1)); I < N; I++) {
807 auto &FunctionId = Trace[Timestamp];
808 UtilityNodeT GroupId = TraceIdx * N + I;
809 FuncGroups[FunctionId].push_back(GroupId);
814 std::vector<BPFunctionNode> Nodes;
815 for (auto &Id : FunctionIds) {
816 auto &UNs = FuncGroups[Id];
817 llvm::sort(UNs);
818 UNs.erase(std::unique(UNs.begin(), UNs.end()), UNs.end());
819 Nodes.emplace_back(Id, UNs);
821 return Nodes;
824 #define INSTR_PROF_COMMON_API_IMPL
825 #include "llvm/ProfileData/InstrProfData.inc"
828 * ValueProfRecordClosure Interface implementation for InstrProfRecord
829 * class. These C wrappers are used as adaptors so that C++ code can be
830 * invoked as callbacks.
832 uint32_t getNumValueKindsInstrProf(const void *Record) {
833 return reinterpret_cast<const InstrProfRecord *>(Record)->getNumValueKinds();
836 uint32_t getNumValueSitesInstrProf(const void *Record, uint32_t VKind) {
837 return reinterpret_cast<const InstrProfRecord *>(Record)
838 ->getNumValueSites(VKind);
841 uint32_t getNumValueDataInstrProf(const void *Record, uint32_t VKind) {
842 return reinterpret_cast<const InstrProfRecord *>(Record)
843 ->getNumValueData(VKind);
846 uint32_t getNumValueDataForSiteInstrProf(const void *R, uint32_t VK,
847 uint32_t S) {
848 return reinterpret_cast<const InstrProfRecord *>(R)
849 ->getNumValueDataForSite(VK, S);
852 void getValueForSiteInstrProf(const void *R, InstrProfValueData *Dst,
853 uint32_t K, uint32_t S) {
854 reinterpret_cast<const InstrProfRecord *>(R)->getValueForSite(Dst, K, S);
857 ValueProfData *allocValueProfDataInstrProf(size_t TotalSizeInBytes) {
858 ValueProfData *VD =
859 (ValueProfData *)(new (::operator new(TotalSizeInBytes)) ValueProfData());
860 memset(VD, 0, TotalSizeInBytes);
861 return VD;
864 static ValueProfRecordClosure InstrProfRecordClosure = {
865 nullptr,
866 getNumValueKindsInstrProf,
867 getNumValueSitesInstrProf,
868 getNumValueDataInstrProf,
869 getNumValueDataForSiteInstrProf,
870 nullptr,
871 getValueForSiteInstrProf,
872 allocValueProfDataInstrProf};
874 // Wrapper implementation using the closure mechanism.
875 uint32_t ValueProfData::getSize(const InstrProfRecord &Record) {
876 auto Closure = InstrProfRecordClosure;
877 Closure.Record = &Record;
878 return getValueProfDataSize(&Closure);
881 // Wrapper implementation using the closure mechanism.
882 std::unique_ptr<ValueProfData>
883 ValueProfData::serializeFrom(const InstrProfRecord &Record) {
884 InstrProfRecordClosure.Record = &Record;
886 std::unique_ptr<ValueProfData> VPD(
887 serializeValueProfDataFrom(&InstrProfRecordClosure, nullptr));
888 return VPD;
891 void ValueProfRecord::deserializeTo(InstrProfRecord &Record,
892 InstrProfSymtab *SymTab) {
893 Record.reserveSites(Kind, NumValueSites);
895 InstrProfValueData *ValueData = getValueProfRecordValueData(this);
896 for (uint64_t VSite = 0; VSite < NumValueSites; ++VSite) {
897 uint8_t ValueDataCount = this->SiteCountArray[VSite];
898 Record.addValueData(Kind, VSite, ValueData, ValueDataCount, SymTab);
899 ValueData += ValueDataCount;
903 // For writing/serializing, Old is the host endianness, and New is
904 // byte order intended on disk. For Reading/deserialization, Old
905 // is the on-disk source endianness, and New is the host endianness.
906 void ValueProfRecord::swapBytes(support::endianness Old,
907 support::endianness New) {
908 using namespace support;
910 if (Old == New)
911 return;
913 if (getHostEndianness() != Old) {
914 sys::swapByteOrder<uint32_t>(NumValueSites);
915 sys::swapByteOrder<uint32_t>(Kind);
917 uint32_t ND = getValueProfRecordNumValueData(this);
918 InstrProfValueData *VD = getValueProfRecordValueData(this);
920 // No need to swap byte array: SiteCountArrray.
921 for (uint32_t I = 0; I < ND; I++) {
922 sys::swapByteOrder<uint64_t>(VD[I].Value);
923 sys::swapByteOrder<uint64_t>(VD[I].Count);
925 if (getHostEndianness() == Old) {
926 sys::swapByteOrder<uint32_t>(NumValueSites);
927 sys::swapByteOrder<uint32_t>(Kind);
931 void ValueProfData::deserializeTo(InstrProfRecord &Record,
932 InstrProfSymtab *SymTab) {
933 if (NumValueKinds == 0)
934 return;
936 ValueProfRecord *VR = getFirstValueProfRecord(this);
937 for (uint32_t K = 0; K < NumValueKinds; K++) {
938 VR->deserializeTo(Record, SymTab);
939 VR = getValueProfRecordNext(VR);
943 template <class T>
944 static T swapToHostOrder(const unsigned char *&D, support::endianness Orig) {
945 using namespace support;
947 if (Orig == little)
948 return endian::readNext<T, little, unaligned>(D);
949 else
950 return endian::readNext<T, big, unaligned>(D);
953 static std::unique_ptr<ValueProfData> allocValueProfData(uint32_t TotalSize) {
954 return std::unique_ptr<ValueProfData>(new (::operator new(TotalSize))
955 ValueProfData());
958 Error ValueProfData::checkIntegrity() {
959 if (NumValueKinds > IPVK_Last + 1)
960 return make_error<InstrProfError>(
961 instrprof_error::malformed, "number of value profile kinds is invalid");
962 // Total size needs to be multiple of quadword size.
963 if (TotalSize % sizeof(uint64_t))
964 return make_error<InstrProfError>(
965 instrprof_error::malformed, "total size is not multiples of quardword");
967 ValueProfRecord *VR = getFirstValueProfRecord(this);
968 for (uint32_t K = 0; K < this->NumValueKinds; K++) {
969 if (VR->Kind > IPVK_Last)
970 return make_error<InstrProfError>(instrprof_error::malformed,
971 "value kind is invalid");
972 VR = getValueProfRecordNext(VR);
973 if ((char *)VR - (char *)this > (ptrdiff_t)TotalSize)
974 return make_error<InstrProfError>(
975 instrprof_error::malformed,
976 "value profile address is greater than total size");
978 return Error::success();
981 Expected<std::unique_ptr<ValueProfData>>
982 ValueProfData::getValueProfData(const unsigned char *D,
983 const unsigned char *const BufferEnd,
984 support::endianness Endianness) {
985 using namespace support;
987 if (D + sizeof(ValueProfData) > BufferEnd)
988 return make_error<InstrProfError>(instrprof_error::truncated);
990 const unsigned char *Header = D;
991 uint32_t TotalSize = swapToHostOrder<uint32_t>(Header, Endianness);
992 if (D + TotalSize > BufferEnd)
993 return make_error<InstrProfError>(instrprof_error::too_large);
995 std::unique_ptr<ValueProfData> VPD = allocValueProfData(TotalSize);
996 memcpy(VPD.get(), D, TotalSize);
997 // Byte swap.
998 VPD->swapBytesToHost(Endianness);
1000 Error E = VPD->checkIntegrity();
1001 if (E)
1002 return std::move(E);
1004 return std::move(VPD);
1007 void ValueProfData::swapBytesToHost(support::endianness Endianness) {
1008 using namespace support;
1010 if (Endianness == getHostEndianness())
1011 return;
1013 sys::swapByteOrder<uint32_t>(TotalSize);
1014 sys::swapByteOrder<uint32_t>(NumValueKinds);
1016 ValueProfRecord *VR = getFirstValueProfRecord(this);
1017 for (uint32_t K = 0; K < NumValueKinds; K++) {
1018 VR->swapBytes(Endianness, getHostEndianness());
1019 VR = getValueProfRecordNext(VR);
1023 void ValueProfData::swapBytesFromHost(support::endianness Endianness) {
1024 using namespace support;
1026 if (Endianness == getHostEndianness())
1027 return;
1029 ValueProfRecord *VR = getFirstValueProfRecord(this);
1030 for (uint32_t K = 0; K < NumValueKinds; K++) {
1031 ValueProfRecord *NVR = getValueProfRecordNext(VR);
1032 VR->swapBytes(getHostEndianness(), Endianness);
1033 VR = NVR;
1035 sys::swapByteOrder<uint32_t>(TotalSize);
1036 sys::swapByteOrder<uint32_t>(NumValueKinds);
1039 void annotateValueSite(Module &M, Instruction &Inst,
1040 const InstrProfRecord &InstrProfR,
1041 InstrProfValueKind ValueKind, uint32_t SiteIdx,
1042 uint32_t MaxMDCount) {
1043 uint32_t NV = InstrProfR.getNumValueDataForSite(ValueKind, SiteIdx);
1044 if (!NV)
1045 return;
1047 uint64_t Sum = 0;
1048 std::unique_ptr<InstrProfValueData[]> VD =
1049 InstrProfR.getValueForSite(ValueKind, SiteIdx, &Sum);
1051 ArrayRef<InstrProfValueData> VDs(VD.get(), NV);
1052 annotateValueSite(M, Inst, VDs, Sum, ValueKind, MaxMDCount);
1055 void annotateValueSite(Module &M, Instruction &Inst,
1056 ArrayRef<InstrProfValueData> VDs,
1057 uint64_t Sum, InstrProfValueKind ValueKind,
1058 uint32_t MaxMDCount) {
1059 LLVMContext &Ctx = M.getContext();
1060 MDBuilder MDHelper(Ctx);
1061 SmallVector<Metadata *, 3> Vals;
1062 // Tag
1063 Vals.push_back(MDHelper.createString("VP"));
1064 // Value Kind
1065 Vals.push_back(MDHelper.createConstant(
1066 ConstantInt::get(Type::getInt32Ty(Ctx), ValueKind)));
1067 // Total Count
1068 Vals.push_back(
1069 MDHelper.createConstant(ConstantInt::get(Type::getInt64Ty(Ctx), Sum)));
1071 // Value Profile Data
1072 uint32_t MDCount = MaxMDCount;
1073 for (auto &VD : VDs) {
1074 Vals.push_back(MDHelper.createConstant(
1075 ConstantInt::get(Type::getInt64Ty(Ctx), VD.Value)));
1076 Vals.push_back(MDHelper.createConstant(
1077 ConstantInt::get(Type::getInt64Ty(Ctx), VD.Count)));
1078 if (--MDCount == 0)
1079 break;
1081 Inst.setMetadata(LLVMContext::MD_prof, MDNode::get(Ctx, Vals));
1084 bool getValueProfDataFromInst(const Instruction &Inst,
1085 InstrProfValueKind ValueKind,
1086 uint32_t MaxNumValueData,
1087 InstrProfValueData ValueData[],
1088 uint32_t &ActualNumValueData, uint64_t &TotalC,
1089 bool GetNoICPValue) {
1090 MDNode *MD = Inst.getMetadata(LLVMContext::MD_prof);
1091 if (!MD)
1092 return false;
1094 unsigned NOps = MD->getNumOperands();
1096 if (NOps < 5)
1097 return false;
1099 // Operand 0 is a string tag "VP":
1100 MDString *Tag = cast<MDString>(MD->getOperand(0));
1101 if (!Tag)
1102 return false;
1104 if (!Tag->getString().equals("VP"))
1105 return false;
1107 // Now check kind:
1108 ConstantInt *KindInt = mdconst::dyn_extract<ConstantInt>(MD->getOperand(1));
1109 if (!KindInt)
1110 return false;
1111 if (KindInt->getZExtValue() != ValueKind)
1112 return false;
1114 // Get total count
1115 ConstantInt *TotalCInt = mdconst::dyn_extract<ConstantInt>(MD->getOperand(2));
1116 if (!TotalCInt)
1117 return false;
1118 TotalC = TotalCInt->getZExtValue();
1120 ActualNumValueData = 0;
1122 for (unsigned I = 3; I < NOps; I += 2) {
1123 if (ActualNumValueData >= MaxNumValueData)
1124 break;
1125 ConstantInt *Value = mdconst::dyn_extract<ConstantInt>(MD->getOperand(I));
1126 ConstantInt *Count =
1127 mdconst::dyn_extract<ConstantInt>(MD->getOperand(I + 1));
1128 if (!Value || !Count)
1129 return false;
1130 uint64_t CntValue = Count->getZExtValue();
1131 if (!GetNoICPValue && (CntValue == NOMORE_ICP_MAGICNUM))
1132 continue;
1133 ValueData[ActualNumValueData].Value = Value->getZExtValue();
1134 ValueData[ActualNumValueData].Count = CntValue;
1135 ActualNumValueData++;
1137 return true;
1140 MDNode *getPGOFuncNameMetadata(const Function &F) {
1141 return F.getMetadata(getPGOFuncNameMetadataName());
1144 void createPGOFuncNameMetadata(Function &F, StringRef PGOFuncName) {
1145 // Only for internal linkage functions.
1146 if (PGOFuncName == F.getName())
1147 return;
1148 // Don't create duplicated meta-data.
1149 if (getPGOFuncNameMetadata(F))
1150 return;
1151 LLVMContext &C = F.getContext();
1152 MDNode *N = MDNode::get(C, MDString::get(C, PGOFuncName));
1153 F.setMetadata(getPGOFuncNameMetadataName(), N);
1156 bool needsComdatForCounter(const Function &F, const Module &M) {
1157 if (F.hasComdat())
1158 return true;
1160 if (!Triple(M.getTargetTriple()).supportsCOMDAT())
1161 return false;
1163 // See createPGOFuncNameVar for more details. To avoid link errors, profile
1164 // counters for function with available_externally linkage needs to be changed
1165 // to linkonce linkage. On ELF based systems, this leads to weak symbols to be
1166 // created. Without using comdat, duplicate entries won't be removed by the
1167 // linker leading to increased data segement size and raw profile size. Even
1168 // worse, since the referenced counter from profile per-function data object
1169 // will be resolved to the common strong definition, the profile counts for
1170 // available_externally functions will end up being duplicated in raw profile
1171 // data. This can result in distorted profile as the counts of those dups
1172 // will be accumulated by the profile merger.
1173 GlobalValue::LinkageTypes Linkage = F.getLinkage();
1174 if (Linkage != GlobalValue::ExternalWeakLinkage &&
1175 Linkage != GlobalValue::AvailableExternallyLinkage)
1176 return false;
1178 return true;
1181 // Check if INSTR_PROF_RAW_VERSION_VAR is defined.
1182 bool isIRPGOFlagSet(const Module *M) {
1183 auto IRInstrVar =
1184 M->getNamedGlobal(INSTR_PROF_QUOTE(INSTR_PROF_RAW_VERSION_VAR));
1185 if (!IRInstrVar || IRInstrVar->hasLocalLinkage())
1186 return false;
1188 // For CSPGO+LTO, this variable might be marked as non-prevailing and we only
1189 // have the decl.
1190 if (IRInstrVar->isDeclaration())
1191 return true;
1193 // Check if the flag is set.
1194 if (!IRInstrVar->hasInitializer())
1195 return false;
1197 auto *InitVal = dyn_cast_or_null<ConstantInt>(IRInstrVar->getInitializer());
1198 if (!InitVal)
1199 return false;
1200 return (InitVal->getZExtValue() & VARIANT_MASK_IR_PROF) != 0;
1203 // Check if we can safely rename this Comdat function.
1204 bool canRenameComdatFunc(const Function &F, bool CheckAddressTaken) {
1205 if (F.getName().empty())
1206 return false;
1207 if (!needsComdatForCounter(F, *(F.getParent())))
1208 return false;
1209 // Unsafe to rename the address-taken function (which can be used in
1210 // function comparison).
1211 if (CheckAddressTaken && F.hasAddressTaken())
1212 return false;
1213 // Only safe to do if this function may be discarded if it is not used
1214 // in the compilation unit.
1215 if (!GlobalValue::isDiscardableIfUnused(F.getLinkage()))
1216 return false;
1218 // For AvailableExternallyLinkage functions.
1219 if (!F.hasComdat()) {
1220 assert(F.getLinkage() == GlobalValue::AvailableExternallyLinkage);
1221 return true;
1223 return true;
1226 // Create the variable for the profile file name.
1227 void createProfileFileNameVar(Module &M, StringRef InstrProfileOutput) {
1228 if (InstrProfileOutput.empty())
1229 return;
1230 Constant *ProfileNameConst =
1231 ConstantDataArray::getString(M.getContext(), InstrProfileOutput, true);
1232 GlobalVariable *ProfileNameVar = new GlobalVariable(
1233 M, ProfileNameConst->getType(), true, GlobalValue::WeakAnyLinkage,
1234 ProfileNameConst, INSTR_PROF_QUOTE(INSTR_PROF_PROFILE_NAME_VAR));
1235 ProfileNameVar->setVisibility(GlobalValue::HiddenVisibility);
1236 Triple TT(M.getTargetTriple());
1237 if (TT.supportsCOMDAT()) {
1238 ProfileNameVar->setLinkage(GlobalValue::ExternalLinkage);
1239 ProfileNameVar->setComdat(M.getOrInsertComdat(
1240 StringRef(INSTR_PROF_QUOTE(INSTR_PROF_PROFILE_NAME_VAR))));
1244 Error OverlapStats::accumulateCounts(const std::string &BaseFilename,
1245 const std::string &TestFilename,
1246 bool IsCS) {
1247 auto getProfileSum = [IsCS](const std::string &Filename,
1248 CountSumOrPercent &Sum) -> Error {
1249 // This function is only used from llvm-profdata that doesn't use any kind
1250 // of VFS. Just create a default RealFileSystem to read profiles.
1251 auto FS = vfs::getRealFileSystem();
1252 auto ReaderOrErr = InstrProfReader::create(Filename, *FS);
1253 if (Error E = ReaderOrErr.takeError()) {
1254 return E;
1256 auto Reader = std::move(ReaderOrErr.get());
1257 Reader->accumulateCounts(Sum, IsCS);
1258 return Error::success();
1260 auto Ret = getProfileSum(BaseFilename, Base);
1261 if (Ret)
1262 return Ret;
1263 Ret = getProfileSum(TestFilename, Test);
1264 if (Ret)
1265 return Ret;
1266 this->BaseFilename = &BaseFilename;
1267 this->TestFilename = &TestFilename;
1268 Valid = true;
1269 return Error::success();
1272 void OverlapStats::addOneMismatch(const CountSumOrPercent &MismatchFunc) {
1273 Mismatch.NumEntries += 1;
1274 Mismatch.CountSum += MismatchFunc.CountSum / Test.CountSum;
1275 for (unsigned I = 0; I < IPVK_Last - IPVK_First + 1; I++) {
1276 if (Test.ValueCounts[I] >= 1.0f)
1277 Mismatch.ValueCounts[I] +=
1278 MismatchFunc.ValueCounts[I] / Test.ValueCounts[I];
1282 void OverlapStats::addOneUnique(const CountSumOrPercent &UniqueFunc) {
1283 Unique.NumEntries += 1;
1284 Unique.CountSum += UniqueFunc.CountSum / Test.CountSum;
1285 for (unsigned I = 0; I < IPVK_Last - IPVK_First + 1; I++) {
1286 if (Test.ValueCounts[I] >= 1.0f)
1287 Unique.ValueCounts[I] += UniqueFunc.ValueCounts[I] / Test.ValueCounts[I];
1291 void OverlapStats::dump(raw_fd_ostream &OS) const {
1292 if (!Valid)
1293 return;
1295 const char *EntryName =
1296 (Level == ProgramLevel ? "functions" : "edge counters");
1297 if (Level == ProgramLevel) {
1298 OS << "Profile overlap infomation for base_profile: " << *BaseFilename
1299 << " and test_profile: " << *TestFilename << "\nProgram level:\n";
1300 } else {
1301 OS << "Function level:\n"
1302 << " Function: " << FuncName << " (Hash=" << FuncHash << ")\n";
1305 OS << " # of " << EntryName << " overlap: " << Overlap.NumEntries << "\n";
1306 if (Mismatch.NumEntries)
1307 OS << " # of " << EntryName << " mismatch: " << Mismatch.NumEntries
1308 << "\n";
1309 if (Unique.NumEntries)
1310 OS << " # of " << EntryName
1311 << " only in test_profile: " << Unique.NumEntries << "\n";
1313 OS << " Edge profile overlap: " << format("%.3f%%", Overlap.CountSum * 100)
1314 << "\n";
1315 if (Mismatch.NumEntries)
1316 OS << " Mismatched count percentage (Edge): "
1317 << format("%.3f%%", Mismatch.CountSum * 100) << "\n";
1318 if (Unique.NumEntries)
1319 OS << " Percentage of Edge profile only in test_profile: "
1320 << format("%.3f%%", Unique.CountSum * 100) << "\n";
1321 OS << " Edge profile base count sum: " << format("%.0f", Base.CountSum)
1322 << "\n"
1323 << " Edge profile test count sum: " << format("%.0f", Test.CountSum)
1324 << "\n";
1326 for (unsigned I = 0; I < IPVK_Last - IPVK_First + 1; I++) {
1327 if (Base.ValueCounts[I] < 1.0f && Test.ValueCounts[I] < 1.0f)
1328 continue;
1329 char ProfileKindName[20];
1330 switch (I) {
1331 case IPVK_IndirectCallTarget:
1332 strncpy(ProfileKindName, "IndirectCall", 19);
1333 break;
1334 case IPVK_MemOPSize:
1335 strncpy(ProfileKindName, "MemOP", 19);
1336 break;
1337 default:
1338 snprintf(ProfileKindName, 19, "VP[%d]", I);
1339 break;
1341 OS << " " << ProfileKindName
1342 << " profile overlap: " << format("%.3f%%", Overlap.ValueCounts[I] * 100)
1343 << "\n";
1344 if (Mismatch.NumEntries)
1345 OS << " Mismatched count percentage (" << ProfileKindName
1346 << "): " << format("%.3f%%", Mismatch.ValueCounts[I] * 100) << "\n";
1347 if (Unique.NumEntries)
1348 OS << " Percentage of " << ProfileKindName
1349 << " profile only in test_profile: "
1350 << format("%.3f%%", Unique.ValueCounts[I] * 100) << "\n";
1351 OS << " " << ProfileKindName
1352 << " profile base count sum: " << format("%.0f", Base.ValueCounts[I])
1353 << "\n"
1354 << " " << ProfileKindName
1355 << " profile test count sum: " << format("%.0f", Test.ValueCounts[I])
1356 << "\n";
1360 namespace IndexedInstrProf {
1361 // A C++14 compatible version of the offsetof macro.
1362 template <typename T1, typename T2>
1363 inline size_t constexpr offsetOf(T1 T2::*Member) {
1364 constexpr T2 Object{};
1365 return size_t(&(Object.*Member)) - size_t(&Object);
1368 static inline uint64_t read(const unsigned char *Buffer, size_t Offset) {
1369 return *reinterpret_cast<const uint64_t *>(Buffer + Offset);
1372 uint64_t Header::formatVersion() const {
1373 using namespace support;
1374 return endian::byte_swap<uint64_t, little>(Version);
1377 Expected<Header> Header::readFromBuffer(const unsigned char *Buffer) {
1378 using namespace support;
1379 static_assert(std::is_standard_layout_v<Header>,
1380 "The header should be standard layout type since we use offset "
1381 "of fields to read.");
1382 Header H;
1384 H.Magic = read(Buffer, offsetOf(&Header::Magic));
1385 // Check the magic number.
1386 uint64_t Magic = endian::byte_swap<uint64_t, little>(H.Magic);
1387 if (Magic != IndexedInstrProf::Magic)
1388 return make_error<InstrProfError>(instrprof_error::bad_magic);
1390 // Read the version.
1391 H.Version = read(Buffer, offsetOf(&Header::Version));
1392 if (GET_VERSION(H.formatVersion()) >
1393 IndexedInstrProf::ProfVersion::CurrentVersion)
1394 return make_error<InstrProfError>(instrprof_error::unsupported_version);
1396 switch (GET_VERSION(H.formatVersion())) {
1397 // When a new field is added in the header add a case statement here to
1398 // populate it.
1399 static_assert(
1400 IndexedInstrProf::ProfVersion::CurrentVersion == Version10,
1401 "Please update the reading code below if a new field has been added, "
1402 "if not add a case statement to fall through to the latest version.");
1403 case 10ull:
1404 H.TemporalProfTracesOffset =
1405 read(Buffer, offsetOf(&Header::TemporalProfTracesOffset));
1406 [[fallthrough]];
1407 case 9ull:
1408 H.BinaryIdOffset = read(Buffer, offsetOf(&Header::BinaryIdOffset));
1409 [[fallthrough]];
1410 case 8ull:
1411 H.MemProfOffset = read(Buffer, offsetOf(&Header::MemProfOffset));
1412 [[fallthrough]];
1413 default: // Version7 (when the backwards compatible header was introduced).
1414 H.HashType = read(Buffer, offsetOf(&Header::HashType));
1415 H.HashOffset = read(Buffer, offsetOf(&Header::HashOffset));
1418 return H;
1421 size_t Header::size() const {
1422 switch (GET_VERSION(formatVersion())) {
1423 // When a new field is added to the header add a case statement here to
1424 // compute the size as offset of the new field + size of the new field. This
1425 // relies on the field being added to the end of the list.
1426 static_assert(IndexedInstrProf::ProfVersion::CurrentVersion == Version10,
1427 "Please update the size computation below if a new field has "
1428 "been added to the header, if not add a case statement to "
1429 "fall through to the latest version.");
1430 case 10ull:
1431 return offsetOf(&Header::TemporalProfTracesOffset) +
1432 sizeof(Header::TemporalProfTracesOffset);
1433 case 9ull:
1434 return offsetOf(&Header::BinaryIdOffset) + sizeof(Header::BinaryIdOffset);
1435 case 8ull:
1436 return offsetOf(&Header::MemProfOffset) + sizeof(Header::MemProfOffset);
1437 default: // Version7 (when the backwards compatible header was introduced).
1438 return offsetOf(&Header::HashOffset) + sizeof(Header::HashOffset);
1442 } // namespace IndexedInstrProf
1444 } // end namespace llvm