1 //===- InstrProf.cpp - Instrumented profiling format support --------------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 // This file contains support for clang's instrumentation based PGO and
12 //===----------------------------------------------------------------------===//
14 #include "llvm/ProfileData/InstrProf.h"
15 #include "llvm/ADT/ArrayRef.h"
16 #include "llvm/ADT/SetVector.h"
17 #include "llvm/ADT/SmallVector.h"
18 #include "llvm/ADT/StringExtras.h"
19 #include "llvm/ADT/StringRef.h"
20 #include "llvm/Config/config.h"
21 #include "llvm/IR/Constant.h"
22 #include "llvm/IR/Constants.h"
23 #include "llvm/IR/Function.h"
24 #include "llvm/IR/GlobalValue.h"
25 #include "llvm/IR/GlobalVariable.h"
26 #include "llvm/IR/Instruction.h"
27 #include "llvm/IR/LLVMContext.h"
28 #include "llvm/IR/MDBuilder.h"
29 #include "llvm/IR/Metadata.h"
30 #include "llvm/IR/Module.h"
31 #include "llvm/IR/Type.h"
32 #include "llvm/ProfileData/InstrProfReader.h"
33 #include "llvm/Support/Casting.h"
34 #include "llvm/Support/CommandLine.h"
35 #include "llvm/Support/Compiler.h"
36 #include "llvm/Support/Compression.h"
37 #include "llvm/Support/Endian.h"
38 #include "llvm/Support/Error.h"
39 #include "llvm/Support/ErrorHandling.h"
40 #include "llvm/Support/LEB128.h"
41 #include "llvm/Support/MathExtras.h"
42 #include "llvm/Support/Path.h"
43 #include "llvm/Support/SwapByteOrder.h"
44 #include "llvm/Support/VirtualFileSystem.h"
45 #include "llvm/TargetParser/Triple.h"
53 #include <system_error>
54 #include <type_traits>
60 static cl::opt
<bool> StaticFuncFullModulePrefix(
61 "static-func-full-module-prefix", cl::init(true), cl::Hidden
,
62 cl::desc("Use full module build paths in the profile counter names for "
63 "static functions."));
65 // This option is tailored to users that have different top-level directory in
66 // profile-gen and profile-use compilation. Users need to specific the number
67 // of levels to strip. A value larger than the number of directories in the
68 // source file will strip all the directory names and only leave the basename.
70 // Note current ThinLTO module importing for the indirect-calls assumes
71 // the source directory name not being stripped. A non-zero option value here
72 // can potentially prevent some inter-module indirect-call-promotions.
73 static cl::opt
<unsigned> StaticFuncStripDirNamePrefix(
74 "static-func-strip-dirname-prefix", cl::init(0), cl::Hidden
,
75 cl::desc("Strip specified level of directory name from source path in "
76 "the profile counter name for static functions."));
78 static std::string
getInstrProfErrString(instrprof_error Err
,
79 const std::string
&ErrMsg
= "") {
81 raw_string_ostream
OS(Msg
);
84 case instrprof_error::success
:
87 case instrprof_error::eof
:
90 case instrprof_error::unrecognized_format
:
91 OS
<< "unrecognized instrumentation profile encoding format";
93 case instrprof_error::bad_magic
:
94 OS
<< "invalid instrumentation profile data (bad magic)";
96 case instrprof_error::bad_header
:
97 OS
<< "invalid instrumentation profile data (file header is corrupt)";
99 case instrprof_error::unsupported_version
:
100 OS
<< "unsupported instrumentation profile format version";
102 case instrprof_error::unsupported_hash_type
:
103 OS
<< "unsupported instrumentation profile hash type";
105 case instrprof_error::too_large
:
106 OS
<< "too much profile data";
108 case instrprof_error::truncated
:
109 OS
<< "truncated profile data";
111 case instrprof_error::malformed
:
112 OS
<< "malformed instrumentation profile data";
114 case instrprof_error::missing_correlation_info
:
115 OS
<< "debug info/binary for correlation is required";
117 case instrprof_error::unexpected_correlation_info
:
118 OS
<< "debug info/binary for correlation is not necessary";
120 case instrprof_error::unable_to_correlate_profile
:
121 OS
<< "unable to correlate profile";
123 case instrprof_error::invalid_prof
:
124 OS
<< "invalid profile created. Please file a bug "
125 "at: " BUG_REPORT_URL
126 " and include the profraw files that caused this error.";
128 case instrprof_error::unknown_function
:
129 OS
<< "no profile data available for function";
131 case instrprof_error::hash_mismatch
:
132 OS
<< "function control flow change detected (hash mismatch)";
134 case instrprof_error::count_mismatch
:
135 OS
<< "function basic block count change detected (counter mismatch)";
137 case instrprof_error::bitmap_mismatch
:
138 OS
<< "function bitmap size change detected (bitmap size mismatch)";
140 case instrprof_error::counter_overflow
:
141 OS
<< "counter overflow";
143 case instrprof_error::value_site_count_mismatch
:
144 OS
<< "function value site count change detected (counter mismatch)";
146 case instrprof_error::compress_failed
:
147 OS
<< "failed to compress data (zlib)";
149 case instrprof_error::uncompress_failed
:
150 OS
<< "failed to uncompress data (zlib)";
152 case instrprof_error::empty_raw_profile
:
153 OS
<< "empty raw profile file";
155 case instrprof_error::zlib_unavailable
:
156 OS
<< "profile uses zlib compression but the profile reader was built "
157 "without zlib support";
159 case instrprof_error::raw_profile_version_mismatch
:
160 OS
<< "raw profile version mismatch";
162 case instrprof_error::counter_value_too_large
:
163 OS
<< "excessively large counter value suggests corrupted profile data";
167 // If optional error message is not empty, append it to the message.
169 OS
<< ": " << ErrMsg
;
176 // FIXME: This class is only here to support the transition to llvm::Error. It
177 // will be removed once this transition is complete. Clients should prefer to
178 // deal with the Error value directly, rather than converting to error_code.
179 class InstrProfErrorCategoryType
: public std::error_category
{
180 const char *name() const noexcept override
{ return "llvm.instrprof"; }
182 std::string
message(int IE
) const override
{
183 return getInstrProfErrString(static_cast<instrprof_error
>(IE
));
187 } // end anonymous namespace
189 const std::error_category
&llvm::instrprof_category() {
190 static InstrProfErrorCategoryType ErrorCategory
;
191 return ErrorCategory
;
196 const char *InstrProfSectNameCommon
[] = {
197 #define INSTR_PROF_SECT_ENTRY(Kind, SectNameCommon, SectNameCoff, Prefix) \
199 #include "llvm/ProfileData/InstrProfData.inc"
202 const char *InstrProfSectNameCoff
[] = {
203 #define INSTR_PROF_SECT_ENTRY(Kind, SectNameCommon, SectNameCoff, Prefix) \
205 #include "llvm/ProfileData/InstrProfData.inc"
208 const char *InstrProfSectNamePrefix
[] = {
209 #define INSTR_PROF_SECT_ENTRY(Kind, SectNameCommon, SectNameCoff, Prefix) \
211 #include "llvm/ProfileData/InstrProfData.inc"
218 cl::opt
<bool> DoInstrProfNameCompression(
219 "enable-name-compression",
220 cl::desc("Enable name/filename string compression"), cl::init(true));
222 std::string
getInstrProfSectionName(InstrProfSectKind IPSK
,
223 Triple::ObjectFormatType OF
,
224 bool AddSegmentInfo
) {
225 std::string SectName
;
227 if (OF
== Triple::MachO
&& AddSegmentInfo
)
228 SectName
= InstrProfSectNamePrefix
[IPSK
];
230 if (OF
== Triple::COFF
)
231 SectName
+= InstrProfSectNameCoff
[IPSK
];
233 SectName
+= InstrProfSectNameCommon
[IPSK
];
235 if (OF
== Triple::MachO
&& IPSK
== IPSK_data
&& AddSegmentInfo
)
236 SectName
+= ",regular,live_support";
241 std::string
InstrProfError::message() const {
242 return getInstrProfErrString(Err
, Msg
);
245 char InstrProfError::ID
= 0;
247 std::string
getPGOFuncName(StringRef Name
, GlobalValue::LinkageTypes Linkage
,
249 uint64_t Version LLVM_ATTRIBUTE_UNUSED
) {
250 // Value names may be prefixed with a binary '1' to indicate
251 // that the backend should not modify the symbols due to any platform
252 // naming convention. Do not include that '1' in the PGO profile name.
254 Name
= Name
.substr(1);
256 std::string NewName
= std::string(Name
);
257 if (llvm::GlobalValue::isLocalLinkage(Linkage
)) {
258 // For local symbols, prepend the main file name to distinguish them.
259 // Do not include the full path in the file name since there's no guarantee
260 // that it will stay the same, e.g., if the files are checked out from
261 // version control in different locations.
262 if (FileName
.empty())
263 NewName
= NewName
.insert(0, "<unknown>:");
265 NewName
= NewName
.insert(0, FileName
.str() + ":");
270 // Strip NumPrefix level of directory name from PathNameStr. If the number of
271 // directory separators is less than NumPrefix, strip all the directories and
272 // leave base file name only.
273 static StringRef
stripDirPrefix(StringRef PathNameStr
, uint32_t NumPrefix
) {
274 uint32_t Count
= NumPrefix
;
275 uint32_t Pos
= 0, LastPos
= 0;
276 for (auto & CI
: PathNameStr
) {
278 if (llvm::sys::path::is_separator(CI
)) {
285 return PathNameStr
.substr(LastPos
);
288 static StringRef
getStrippedSourceFileName(const GlobalObject
&GO
) {
289 StringRef
FileName(GO
.getParent()->getSourceFileName());
290 uint32_t StripLevel
= StaticFuncFullModulePrefix
? 0 : (uint32_t)-1;
291 if (StripLevel
< StaticFuncStripDirNamePrefix
)
292 StripLevel
= StaticFuncStripDirNamePrefix
;
294 FileName
= stripDirPrefix(FileName
, StripLevel
);
298 // The PGO name has the format [<filepath>;]<mangled-name> where <filepath>; is
299 // provided if linkage is local and is used to discriminate possibly identical
300 // mangled names. ";" is used because it is unlikely to be found in either
301 // <filepath> or <mangled-name>.
303 // Older compilers used getPGOFuncName() which has the format
304 // [<filepath>:]<mangled-name>. This caused trouble for Objective-C functions
305 // which commonly have :'s in their names. We still need to compute this name to
306 // lookup functions from profiles built by older compilers.
308 getIRPGONameForGlobalObject(const GlobalObject
&GO
,
309 GlobalValue::LinkageTypes Linkage
,
310 StringRef FileName
) {
311 return GlobalValue::getGlobalIdentifier(GO
.getName(), Linkage
, FileName
);
314 static std::optional
<std::string
> lookupPGONameFromMetadata(MDNode
*MD
) {
316 StringRef S
= cast
<MDString
>(MD
->getOperand(0))->getString();
322 // Returns the PGO object name. This function has some special handling
323 // when called in LTO optimization. The following only applies when calling in
324 // LTO passes (when \c InLTO is true): LTO's internalization privatizes many
325 // global linkage symbols. This happens after value profile annotation, but
326 // those internal linkage functions should not have a source prefix.
327 // Additionally, for ThinLTO mode, exported internal functions are promoted
328 // and renamed. We need to ensure that the original internal PGO name is
329 // used when computing the GUID that is compared against the profiled GUIDs.
330 // To differentiate compiler generated internal symbols from original ones,
331 // PGOFuncName meta data are created and attached to the original internal
332 // symbols in the value profile annotation step
333 // (PGOUseFunc::annotateIndirectCallSites). If a symbol does not have the meta
334 // data, its original linkage must be non-internal.
335 static std::string
getIRPGOObjectName(const GlobalObject
&GO
, bool InLTO
,
336 MDNode
*PGONameMetadata
) {
338 auto FileName
= getStrippedSourceFileName(GO
);
339 return getIRPGONameForGlobalObject(GO
, GO
.getLinkage(), FileName
);
342 // In LTO mode (when InLTO is true), first check if there is a meta data.
343 if (auto IRPGOFuncName
= lookupPGONameFromMetadata(PGONameMetadata
))
344 return *IRPGOFuncName
;
346 // If there is no meta data, the function must be a global before the value
347 // profile annotation pass. Its current linkage may be internal if it is
348 // internalized in LTO mode.
349 return getIRPGONameForGlobalObject(GO
, GlobalValue::ExternalLinkage
, "");
352 // Returns the IRPGO function name and does special handling when called
353 // in LTO optimization. See the comments of `getIRPGOObjectName` for details.
354 std::string
getIRPGOFuncName(const Function
&F
, bool InLTO
) {
355 return getIRPGOObjectName(F
, InLTO
, getPGOFuncNameMetadata(F
));
358 // Please use getIRPGOFuncName for LLVM IR instrumentation. This function is
359 // for front-end (Clang, etc) instrumentation.
360 // The implementation is kept for profile matching from older profiles.
361 // This is similar to `getIRPGOFuncName` except that this function calls
362 // 'getPGOFuncName' to get a name and `getIRPGOFuncName` calls
363 // 'getIRPGONameForGlobalObject'. See the difference between two callees in the
364 // comments of `getIRPGONameForGlobalObject`.
365 std::string
getPGOFuncName(const Function
&F
, bool InLTO
, uint64_t Version
) {
367 auto FileName
= getStrippedSourceFileName(F
);
368 return getPGOFuncName(F
.getName(), F
.getLinkage(), FileName
, Version
);
371 // In LTO mode (when InLTO is true), first check if there is a meta data.
372 if (auto PGOFuncName
= lookupPGONameFromMetadata(getPGOFuncNameMetadata(F
)))
375 // If there is no meta data, the function must be a global before the value
376 // profile annotation pass. Its current linkage may be internal if it is
377 // internalized in LTO mode.
378 return getPGOFuncName(F
.getName(), GlobalValue::ExternalLinkage
, "");
381 // See getIRPGOFuncName() for a discription of the format.
382 std::pair
<StringRef
, StringRef
>
383 getParsedIRPGOFuncName(StringRef IRPGOFuncName
) {
384 auto [FileName
, FuncName
] = IRPGOFuncName
.split(';');
385 if (FuncName
.empty())
386 return std::make_pair(StringRef(), IRPGOFuncName
);
387 return std::make_pair(FileName
, FuncName
);
390 StringRef
getFuncNameWithoutPrefix(StringRef PGOFuncName
, StringRef FileName
) {
391 if (FileName
.empty())
393 // Drop the file name including ':' or ';'. See getIRPGONameForGlobalObject as
395 if (PGOFuncName
.starts_with(FileName
))
396 PGOFuncName
= PGOFuncName
.drop_front(FileName
.size() + 1);
400 // \p FuncName is the string used as profile lookup key for the function. A
401 // symbol is created to hold the name. Return the legalized symbol name.
402 std::string
getPGOFuncNameVarName(StringRef FuncName
,
403 GlobalValue::LinkageTypes Linkage
) {
404 std::string VarName
= std::string(getInstrProfNameVarPrefix());
407 if (!GlobalValue::isLocalLinkage(Linkage
))
410 // Now fix up illegal chars in local VarName that may upset the assembler.
411 const char InvalidChars
[] = "-:;<>/\"'";
412 size_t found
= VarName
.find_first_of(InvalidChars
);
413 while (found
!= std::string::npos
) {
414 VarName
[found
] = '_';
415 found
= VarName
.find_first_of(InvalidChars
, found
+ 1);
420 GlobalVariable
*createPGOFuncNameVar(Module
&M
,
421 GlobalValue::LinkageTypes Linkage
,
422 StringRef PGOFuncName
) {
423 // We generally want to match the function's linkage, but available_externally
424 // and extern_weak both have the wrong semantics, and anything that doesn't
425 // need to link across compilation units doesn't need to be visible at all.
426 if (Linkage
== GlobalValue::ExternalWeakLinkage
)
427 Linkage
= GlobalValue::LinkOnceAnyLinkage
;
428 else if (Linkage
== GlobalValue::AvailableExternallyLinkage
)
429 Linkage
= GlobalValue::LinkOnceODRLinkage
;
430 else if (Linkage
== GlobalValue::InternalLinkage
||
431 Linkage
== GlobalValue::ExternalLinkage
)
432 Linkage
= GlobalValue::PrivateLinkage
;
435 ConstantDataArray::getString(M
.getContext(), PGOFuncName
, false);
437 new GlobalVariable(M
, Value
->getType(), true, Linkage
, Value
,
438 getPGOFuncNameVarName(PGOFuncName
, Linkage
));
440 // Hide the symbol so that we correctly get a copy for each executable.
441 if (!GlobalValue::isLocalLinkage(FuncNameVar
->getLinkage()))
442 FuncNameVar
->setVisibility(GlobalValue::HiddenVisibility
);
447 GlobalVariable
*createPGOFuncNameVar(Function
&F
, StringRef PGOFuncName
) {
448 return createPGOFuncNameVar(*F
.getParent(), F
.getLinkage(), PGOFuncName
);
451 Error
InstrProfSymtab::create(Module
&M
, bool InLTO
) {
452 for (Function
&F
: M
) {
453 // Function may not have a name: like using asm("") to overwrite the name.
454 // Ignore in this case.
457 if (Error E
= addFuncWithName(F
, getIRPGOFuncName(F
, InLTO
)))
459 // Also use getPGOFuncName() so that we can find records from older profiles
460 if (Error E
= addFuncWithName(F
, getPGOFuncName(F
, InLTO
)))
465 return Error::success();
468 /// \c NameStrings is a string composed of one of more possibly encoded
469 /// sub-strings. The substrings are separated by 0 or more zero bytes. This
470 /// method decodes the string and calls `NameCallback` for each substring.
472 readAndDecodeStrings(StringRef NameStrings
,
473 std::function
<Error(StringRef
)> NameCallback
) {
474 const uint8_t *P
= NameStrings
.bytes_begin();
475 const uint8_t *EndP
= NameStrings
.bytes_end();
478 uint64_t UncompressedSize
= decodeULEB128(P
, &N
);
480 uint64_t CompressedSize
= decodeULEB128(P
, &N
);
482 bool isCompressed
= (CompressedSize
!= 0);
483 SmallVector
<uint8_t, 128> UncompressedNameStrings
;
484 StringRef NameStrings
;
486 if (!llvm::compression::zlib::isAvailable())
487 return make_error
<InstrProfError
>(instrprof_error::zlib_unavailable
);
489 if (Error E
= compression::zlib::decompress(ArrayRef(P
, CompressedSize
),
490 UncompressedNameStrings
,
492 consumeError(std::move(E
));
493 return make_error
<InstrProfError
>(instrprof_error::uncompress_failed
);
496 NameStrings
= toStringRef(UncompressedNameStrings
);
499 StringRef(reinterpret_cast<const char *>(P
), UncompressedSize
);
500 P
+= UncompressedSize
;
502 // Now parse the name strings.
503 SmallVector
<StringRef
, 0> Names
;
504 NameStrings
.split(Names
, getInstrProfNameSeparator());
505 for (StringRef
&Name
: Names
)
506 if (Error E
= NameCallback(Name
))
509 while (P
< EndP
&& *P
== 0)
512 return Error::success();
515 Error
InstrProfSymtab::create(StringRef NameStrings
) {
516 return readAndDecodeStrings(
518 std::bind(&InstrProfSymtab::addFuncName
, this, std::placeholders::_1
));
521 Error
InstrProfSymtab::addFuncWithName(Function
&F
, StringRef PGOFuncName
) {
522 if (Error E
= addFuncName(PGOFuncName
))
524 MD5FuncMap
.emplace_back(Function::getGUID(PGOFuncName
), &F
);
525 // In ThinLTO, local function may have been promoted to global and have
526 // suffix ".llvm." added to the function name. We need to add the
527 // stripped function name to the symbol table so that we can find a match
530 // We may have other suffixes similar as ".llvm." which are needed to
531 // be stripped before the matching, but ".__uniq." suffix which is used
532 // to differentiate internal linkage functions in different modules
533 // should be kept. Now this is the only suffix with the pattern ".xxx"
534 // which is kept before matching.
535 const std::string UniqSuffix
= ".__uniq.";
536 auto pos
= PGOFuncName
.find(UniqSuffix
);
537 // Search '.' after ".__uniq." if ".__uniq." exists, otherwise
538 // search '.' from the beginning.
539 if (pos
!= std::string::npos
)
540 pos
+= UniqSuffix
.length();
543 pos
= PGOFuncName
.find('.', pos
);
544 if (pos
!= std::string::npos
&& pos
!= 0) {
545 StringRef OtherFuncName
= PGOFuncName
.substr(0, pos
);
546 if (Error E
= addFuncName(OtherFuncName
))
548 MD5FuncMap
.emplace_back(Function::getGUID(OtherFuncName
), &F
);
550 return Error::success();
553 uint64_t InstrProfSymtab::getFunctionHashFromAddress(uint64_t Address
) {
555 auto It
= partition_point(AddrToMD5Map
, [=](std::pair
<uint64_t, uint64_t> A
) {
556 return A
.first
< Address
;
558 // Raw function pointer collected by value profiler may be from
559 // external functions that are not instrumented. They won't have
560 // mapping data to be used by the deserializer. Force the value to
561 // be 0 in this case.
562 if (It
!= AddrToMD5Map
.end() && It
->first
== Address
)
563 return (uint64_t)It
->second
;
567 void InstrProfSymtab::dumpNames(raw_ostream
&OS
) const {
568 SmallVector
<StringRef
, 0> Sorted(NameTab
.keys());
570 for (StringRef S
: Sorted
)
574 Error
collectGlobalObjectNameStrings(ArrayRef
<std::string
> NameStrs
,
575 bool doCompression
, std::string
&Result
) {
576 assert(!NameStrs
.empty() && "No name data to emit");
578 uint8_t Header
[20], *P
= Header
;
579 std::string UncompressedNameStrings
=
580 join(NameStrs
.begin(), NameStrs
.end(), getInstrProfNameSeparator());
582 assert(StringRef(UncompressedNameStrings
)
583 .count(getInstrProfNameSeparator()) == (NameStrs
.size() - 1) &&
584 "PGO name is invalid (contains separator token)");
586 unsigned EncLen
= encodeULEB128(UncompressedNameStrings
.length(), P
);
589 auto WriteStringToResult
= [&](size_t CompressedLen
, StringRef InputStr
) {
590 EncLen
= encodeULEB128(CompressedLen
, P
);
592 char *HeaderStr
= reinterpret_cast<char *>(&Header
[0]);
593 unsigned HeaderLen
= P
- &Header
[0];
594 Result
.append(HeaderStr
, HeaderLen
);
596 return Error::success();
599 if (!doCompression
) {
600 return WriteStringToResult(0, UncompressedNameStrings
);
603 SmallVector
<uint8_t, 128> CompressedNameStrings
;
604 compression::zlib::compress(arrayRefFromStringRef(UncompressedNameStrings
),
605 CompressedNameStrings
,
606 compression::zlib::BestSizeCompression
);
608 return WriteStringToResult(CompressedNameStrings
.size(),
609 toStringRef(CompressedNameStrings
));
612 StringRef
getPGOFuncNameVarInitializer(GlobalVariable
*NameVar
) {
613 auto *Arr
= cast
<ConstantDataArray
>(NameVar
->getInitializer());
615 Arr
->isCString() ? Arr
->getAsCString() : Arr
->getAsString();
619 Error
collectPGOFuncNameStrings(ArrayRef
<GlobalVariable
*> NameVars
,
620 std::string
&Result
, bool doCompression
) {
621 std::vector
<std::string
> NameStrs
;
622 for (auto *NameVar
: NameVars
) {
623 NameStrs
.push_back(std::string(getPGOFuncNameVarInitializer(NameVar
)));
625 return collectGlobalObjectNameStrings(
626 NameStrs
, compression::zlib::isAvailable() && doCompression
, Result
);
629 void InstrProfRecord::accumulateCounts(CountSumOrPercent
&Sum
) const {
630 uint64_t FuncSum
= 0;
631 Sum
.NumEntries
+= Counts
.size();
632 for (uint64_t Count
: Counts
)
634 Sum
.CountSum
+= FuncSum
;
636 for (uint32_t VK
= IPVK_First
; VK
<= IPVK_Last
; ++VK
) {
637 uint64_t KindSum
= 0;
638 uint32_t NumValueSites
= getNumValueSites(VK
);
639 for (size_t I
= 0; I
< NumValueSites
; ++I
) {
640 uint32_t NV
= getNumValueDataForSite(VK
, I
);
641 std::unique_ptr
<InstrProfValueData
[]> VD
= getValueForSite(VK
, I
);
642 for (uint32_t V
= 0; V
< NV
; V
++)
643 KindSum
+= VD
[V
].Count
;
645 Sum
.ValueCounts
[VK
] += KindSum
;
649 void InstrProfValueSiteRecord::overlap(InstrProfValueSiteRecord
&Input
,
651 OverlapStats
&Overlap
,
652 OverlapStats
&FuncLevelOverlap
) {
653 this->sortByTargetValues();
654 Input
.sortByTargetValues();
655 double Score
= 0.0f
, FuncLevelScore
= 0.0f
;
656 auto I
= ValueData
.begin();
657 auto IE
= ValueData
.end();
658 auto J
= Input
.ValueData
.begin();
659 auto JE
= Input
.ValueData
.end();
660 while (I
!= IE
&& J
!= JE
) {
661 if (I
->Value
== J
->Value
) {
662 Score
+= OverlapStats::score(I
->Count
, J
->Count
,
663 Overlap
.Base
.ValueCounts
[ValueKind
],
664 Overlap
.Test
.ValueCounts
[ValueKind
]);
665 FuncLevelScore
+= OverlapStats::score(
666 I
->Count
, J
->Count
, FuncLevelOverlap
.Base
.ValueCounts
[ValueKind
],
667 FuncLevelOverlap
.Test
.ValueCounts
[ValueKind
]);
669 } else if (I
->Value
< J
->Value
) {
675 Overlap
.Overlap
.ValueCounts
[ValueKind
] += Score
;
676 FuncLevelOverlap
.Overlap
.ValueCounts
[ValueKind
] += FuncLevelScore
;
679 // Return false on mismatch.
680 void InstrProfRecord::overlapValueProfData(uint32_t ValueKind
,
681 InstrProfRecord
&Other
,
682 OverlapStats
&Overlap
,
683 OverlapStats
&FuncLevelOverlap
) {
684 uint32_t ThisNumValueSites
= getNumValueSites(ValueKind
);
685 assert(ThisNumValueSites
== Other
.getNumValueSites(ValueKind
));
686 if (!ThisNumValueSites
)
689 std::vector
<InstrProfValueSiteRecord
> &ThisSiteRecords
=
690 getOrCreateValueSitesForKind(ValueKind
);
691 MutableArrayRef
<InstrProfValueSiteRecord
> OtherSiteRecords
=
692 Other
.getValueSitesForKind(ValueKind
);
693 for (uint32_t I
= 0; I
< ThisNumValueSites
; I
++)
694 ThisSiteRecords
[I
].overlap(OtherSiteRecords
[I
], ValueKind
, Overlap
,
698 void InstrProfRecord::overlap(InstrProfRecord
&Other
, OverlapStats
&Overlap
,
699 OverlapStats
&FuncLevelOverlap
,
700 uint64_t ValueCutoff
) {
701 // FuncLevel CountSum for other should already computed and nonzero.
702 assert(FuncLevelOverlap
.Test
.CountSum
>= 1.0f
);
703 accumulateCounts(FuncLevelOverlap
.Base
);
704 bool Mismatch
= (Counts
.size() != Other
.Counts
.size());
706 // Check if the value profiles mismatch.
708 for (uint32_t Kind
= IPVK_First
; Kind
<= IPVK_Last
; ++Kind
) {
709 uint32_t ThisNumValueSites
= getNumValueSites(Kind
);
710 uint32_t OtherNumValueSites
= Other
.getNumValueSites(Kind
);
711 if (ThisNumValueSites
!= OtherNumValueSites
) {
718 Overlap
.addOneMismatch(FuncLevelOverlap
.Test
);
722 // Compute overlap for value counts.
723 for (uint32_t Kind
= IPVK_First
; Kind
<= IPVK_Last
; ++Kind
)
724 overlapValueProfData(Kind
, Other
, Overlap
, FuncLevelOverlap
);
727 uint64_t MaxCount
= 0;
728 // Compute overlap for edge counts.
729 for (size_t I
= 0, E
= Other
.Counts
.size(); I
< E
; ++I
) {
730 Score
+= OverlapStats::score(Counts
[I
], Other
.Counts
[I
],
731 Overlap
.Base
.CountSum
, Overlap
.Test
.CountSum
);
732 MaxCount
= std::max(Other
.Counts
[I
], MaxCount
);
734 Overlap
.Overlap
.CountSum
+= Score
;
735 Overlap
.Overlap
.NumEntries
+= 1;
737 if (MaxCount
>= ValueCutoff
) {
738 double FuncScore
= 0.0;
739 for (size_t I
= 0, E
= Other
.Counts
.size(); I
< E
; ++I
)
740 FuncScore
+= OverlapStats::score(Counts
[I
], Other
.Counts
[I
],
741 FuncLevelOverlap
.Base
.CountSum
,
742 FuncLevelOverlap
.Test
.CountSum
);
743 FuncLevelOverlap
.Overlap
.CountSum
= FuncScore
;
744 FuncLevelOverlap
.Overlap
.NumEntries
= Other
.Counts
.size();
745 FuncLevelOverlap
.Valid
= true;
749 void InstrProfValueSiteRecord::merge(InstrProfValueSiteRecord
&Input
,
751 function_ref
<void(instrprof_error
)> Warn
) {
752 this->sortByTargetValues();
753 Input
.sortByTargetValues();
754 auto I
= ValueData
.begin();
755 auto IE
= ValueData
.end();
756 for (const InstrProfValueData
&J
: Input
.ValueData
) {
757 while (I
!= IE
&& I
->Value
< J
.Value
)
759 if (I
!= IE
&& I
->Value
== J
.Value
) {
761 I
->Count
= SaturatingMultiplyAdd(J
.Count
, Weight
, I
->Count
, &Overflowed
);
763 Warn(instrprof_error::counter_overflow
);
767 ValueData
.insert(I
, J
);
771 void InstrProfValueSiteRecord::scale(uint64_t N
, uint64_t D
,
772 function_ref
<void(instrprof_error
)> Warn
) {
773 for (InstrProfValueData
&I
: ValueData
) {
775 I
.Count
= SaturatingMultiply(I
.Count
, N
, &Overflowed
) / D
;
777 Warn(instrprof_error::counter_overflow
);
781 // Merge Value Profile data from Src record to this record for ValueKind.
782 // Scale merged value counts by \p Weight.
783 void InstrProfRecord::mergeValueProfData(
784 uint32_t ValueKind
, InstrProfRecord
&Src
, uint64_t Weight
,
785 function_ref
<void(instrprof_error
)> Warn
) {
786 uint32_t ThisNumValueSites
= getNumValueSites(ValueKind
);
787 uint32_t OtherNumValueSites
= Src
.getNumValueSites(ValueKind
);
788 if (ThisNumValueSites
!= OtherNumValueSites
) {
789 Warn(instrprof_error::value_site_count_mismatch
);
792 if (!ThisNumValueSites
)
794 std::vector
<InstrProfValueSiteRecord
> &ThisSiteRecords
=
795 getOrCreateValueSitesForKind(ValueKind
);
796 MutableArrayRef
<InstrProfValueSiteRecord
> OtherSiteRecords
=
797 Src
.getValueSitesForKind(ValueKind
);
798 for (uint32_t I
= 0; I
< ThisNumValueSites
; I
++)
799 ThisSiteRecords
[I
].merge(OtherSiteRecords
[I
], Weight
, Warn
);
802 void InstrProfRecord::merge(InstrProfRecord
&Other
, uint64_t Weight
,
803 function_ref
<void(instrprof_error
)> Warn
) {
804 // If the number of counters doesn't match we either have bad data
805 // or a hash collision.
806 if (Counts
.size() != Other
.Counts
.size()) {
807 Warn(instrprof_error::count_mismatch
);
811 // Special handling of the first count as the PseudoCount.
812 CountPseudoKind OtherKind
= Other
.getCountPseudoKind();
813 CountPseudoKind ThisKind
= getCountPseudoKind();
814 if (OtherKind
!= NotPseudo
|| ThisKind
!= NotPseudo
) {
815 // We don't allow the merge of a profile with pseudo counts and
816 // a normal profile (i.e. without pesudo counts).
817 // Profile supplimenation should be done after the profile merge.
818 if (OtherKind
== NotPseudo
|| ThisKind
== NotPseudo
) {
819 Warn(instrprof_error::count_mismatch
);
822 if (OtherKind
== PseudoHot
|| ThisKind
== PseudoHot
)
823 setPseudoCount(PseudoHot
);
825 setPseudoCount(PseudoWarm
);
829 for (size_t I
= 0, E
= Other
.Counts
.size(); I
< E
; ++I
) {
832 SaturatingMultiplyAdd(Other
.Counts
[I
], Weight
, Counts
[I
], &Overflowed
);
833 if (Value
> getInstrMaxCountValue()) {
834 Value
= getInstrMaxCountValue();
839 Warn(instrprof_error::counter_overflow
);
842 // If the number of bitmap bytes doesn't match we either have bad data
843 // or a hash collision.
844 if (BitmapBytes
.size() != Other
.BitmapBytes
.size()) {
845 Warn(instrprof_error::bitmap_mismatch
);
849 // Bitmap bytes are merged by simply ORing them together.
850 for (size_t I
= 0, E
= Other
.BitmapBytes
.size(); I
< E
; ++I
) {
851 BitmapBytes
[I
] = Other
.BitmapBytes
[I
] | BitmapBytes
[I
];
854 for (uint32_t Kind
= IPVK_First
; Kind
<= IPVK_Last
; ++Kind
)
855 mergeValueProfData(Kind
, Other
, Weight
, Warn
);
858 void InstrProfRecord::scaleValueProfData(
859 uint32_t ValueKind
, uint64_t N
, uint64_t D
,
860 function_ref
<void(instrprof_error
)> Warn
) {
861 for (auto &R
: getValueSitesForKind(ValueKind
))
865 void InstrProfRecord::scale(uint64_t N
, uint64_t D
,
866 function_ref
<void(instrprof_error
)> Warn
) {
867 assert(D
!= 0 && "D cannot be 0");
868 for (auto &Count
: this->Counts
) {
870 Count
= SaturatingMultiply(Count
, N
, &Overflowed
) / D
;
871 if (Count
> getInstrMaxCountValue()) {
872 Count
= getInstrMaxCountValue();
876 Warn(instrprof_error::counter_overflow
);
878 for (uint32_t Kind
= IPVK_First
; Kind
<= IPVK_Last
; ++Kind
)
879 scaleValueProfData(Kind
, N
, D
, Warn
);
882 // Map indirect call target name hash to name string.
883 uint64_t InstrProfRecord::remapValue(uint64_t Value
, uint32_t ValueKind
,
884 InstrProfSymtab
*SymTab
) {
888 if (ValueKind
== IPVK_IndirectCallTarget
)
889 return SymTab
->getFunctionHashFromAddress(Value
);
894 void InstrProfRecord::addValueData(uint32_t ValueKind
, uint32_t Site
,
895 InstrProfValueData
*VData
, uint32_t N
,
896 InstrProfSymtab
*ValueMap
) {
897 for (uint32_t I
= 0; I
< N
; I
++) {
898 VData
[I
].Value
= remapValue(VData
[I
].Value
, ValueKind
, ValueMap
);
900 std::vector
<InstrProfValueSiteRecord
> &ValueSites
=
901 getOrCreateValueSitesForKind(ValueKind
);
903 ValueSites
.emplace_back();
905 ValueSites
.emplace_back(VData
, VData
+ N
);
908 std::vector
<BPFunctionNode
> TemporalProfTraceTy::createBPFunctionNodes(
909 ArrayRef
<TemporalProfTraceTy
> Traces
) {
910 using IDT
= BPFunctionNode::IDT
;
911 using UtilityNodeT
= BPFunctionNode::UtilityNodeT
;
912 // Collect all function IDs ordered by their smallest timestamp. This will be
913 // used as the initial FunctionNode order.
914 SetVector
<IDT
> FunctionIds
;
915 size_t LargestTraceSize
= 0;
916 for (auto &Trace
: Traces
)
918 std::max(LargestTraceSize
, Trace
.FunctionNameRefs
.size());
919 for (size_t Timestamp
= 0; Timestamp
< LargestTraceSize
; Timestamp
++)
920 for (auto &Trace
: Traces
)
921 if (Timestamp
< Trace
.FunctionNameRefs
.size())
922 FunctionIds
.insert(Trace
.FunctionNameRefs
[Timestamp
]);
924 const int N
= Log2_64(LargestTraceSize
) + 1;
926 // TODO: We need to use the Trace.Weight field to give more weight to more
927 // important utilities
928 DenseMap
<IDT
, SmallVector
<UtilityNodeT
, 4>> FuncGroups
;
929 for (size_t TraceIdx
= 0; TraceIdx
< Traces
.size(); TraceIdx
++) {
930 auto &Trace
= Traces
[TraceIdx
].FunctionNameRefs
;
931 for (size_t Timestamp
= 0; Timestamp
< Trace
.size(); Timestamp
++) {
932 for (int I
= Log2_64(Timestamp
+ 1); I
< N
; I
++) {
933 auto FunctionId
= Trace
[Timestamp
];
934 UtilityNodeT GroupId
= TraceIdx
* N
+ I
;
935 FuncGroups
[FunctionId
].push_back(GroupId
);
940 std::vector
<BPFunctionNode
> Nodes
;
941 for (auto Id
: FunctionIds
) {
942 auto &UNs
= FuncGroups
[Id
];
944 UNs
.erase(std::unique(UNs
.begin(), UNs
.end()), UNs
.end());
945 Nodes
.emplace_back(Id
, UNs
);
950 #define INSTR_PROF_COMMON_API_IMPL
951 #include "llvm/ProfileData/InstrProfData.inc"
954 * ValueProfRecordClosure Interface implementation for InstrProfRecord
955 * class. These C wrappers are used as adaptors so that C++ code can be
956 * invoked as callbacks.
958 uint32_t getNumValueKindsInstrProf(const void *Record
) {
959 return reinterpret_cast<const InstrProfRecord
*>(Record
)->getNumValueKinds();
962 uint32_t getNumValueSitesInstrProf(const void *Record
, uint32_t VKind
) {
963 return reinterpret_cast<const InstrProfRecord
*>(Record
)
964 ->getNumValueSites(VKind
);
967 uint32_t getNumValueDataInstrProf(const void *Record
, uint32_t VKind
) {
968 return reinterpret_cast<const InstrProfRecord
*>(Record
)
969 ->getNumValueData(VKind
);
972 uint32_t getNumValueDataForSiteInstrProf(const void *R
, uint32_t VK
,
974 return reinterpret_cast<const InstrProfRecord
*>(R
)
975 ->getNumValueDataForSite(VK
, S
);
978 void getValueForSiteInstrProf(const void *R
, InstrProfValueData
*Dst
,
979 uint32_t K
, uint32_t S
) {
980 reinterpret_cast<const InstrProfRecord
*>(R
)->getValueForSite(Dst
, K
, S
);
983 ValueProfData
*allocValueProfDataInstrProf(size_t TotalSizeInBytes
) {
985 (ValueProfData
*)(new (::operator new(TotalSizeInBytes
)) ValueProfData());
986 memset(VD
, 0, TotalSizeInBytes
);
990 static ValueProfRecordClosure InstrProfRecordClosure
= {
992 getNumValueKindsInstrProf
,
993 getNumValueSitesInstrProf
,
994 getNumValueDataInstrProf
,
995 getNumValueDataForSiteInstrProf
,
997 getValueForSiteInstrProf
,
998 allocValueProfDataInstrProf
};
1000 // Wrapper implementation using the closure mechanism.
1001 uint32_t ValueProfData::getSize(const InstrProfRecord
&Record
) {
1002 auto Closure
= InstrProfRecordClosure
;
1003 Closure
.Record
= &Record
;
1004 return getValueProfDataSize(&Closure
);
1007 // Wrapper implementation using the closure mechanism.
1008 std::unique_ptr
<ValueProfData
>
1009 ValueProfData::serializeFrom(const InstrProfRecord
&Record
) {
1010 InstrProfRecordClosure
.Record
= &Record
;
1012 std::unique_ptr
<ValueProfData
> VPD(
1013 serializeValueProfDataFrom(&InstrProfRecordClosure
, nullptr));
1017 void ValueProfRecord::deserializeTo(InstrProfRecord
&Record
,
1018 InstrProfSymtab
*SymTab
) {
1019 Record
.reserveSites(Kind
, NumValueSites
);
1021 InstrProfValueData
*ValueData
= getValueProfRecordValueData(this);
1022 for (uint64_t VSite
= 0; VSite
< NumValueSites
; ++VSite
) {
1023 uint8_t ValueDataCount
= this->SiteCountArray
[VSite
];
1024 Record
.addValueData(Kind
, VSite
, ValueData
, ValueDataCount
, SymTab
);
1025 ValueData
+= ValueDataCount
;
1029 // For writing/serializing, Old is the host endianness, and New is
1030 // byte order intended on disk. For Reading/deserialization, Old
1031 // is the on-disk source endianness, and New is the host endianness.
1032 void ValueProfRecord::swapBytes(llvm::endianness Old
, llvm::endianness New
) {
1033 using namespace support
;
1038 if (llvm::endianness::native
!= Old
) {
1039 sys::swapByteOrder
<uint32_t>(NumValueSites
);
1040 sys::swapByteOrder
<uint32_t>(Kind
);
1042 uint32_t ND
= getValueProfRecordNumValueData(this);
1043 InstrProfValueData
*VD
= getValueProfRecordValueData(this);
1045 // No need to swap byte array: SiteCountArrray.
1046 for (uint32_t I
= 0; I
< ND
; I
++) {
1047 sys::swapByteOrder
<uint64_t>(VD
[I
].Value
);
1048 sys::swapByteOrder
<uint64_t>(VD
[I
].Count
);
1050 if (llvm::endianness::native
== Old
) {
1051 sys::swapByteOrder
<uint32_t>(NumValueSites
);
1052 sys::swapByteOrder
<uint32_t>(Kind
);
1056 void ValueProfData::deserializeTo(InstrProfRecord
&Record
,
1057 InstrProfSymtab
*SymTab
) {
1058 if (NumValueKinds
== 0)
1061 ValueProfRecord
*VR
= getFirstValueProfRecord(this);
1062 for (uint32_t K
= 0; K
< NumValueKinds
; K
++) {
1063 VR
->deserializeTo(Record
, SymTab
);
1064 VR
= getValueProfRecordNext(VR
);
1069 static T
swapToHostOrder(const unsigned char *&D
, llvm::endianness Orig
) {
1070 using namespace support
;
1072 if (Orig
== llvm::endianness::little
)
1073 return endian::readNext
<T
, llvm::endianness::little
, unaligned
>(D
);
1075 return endian::readNext
<T
, llvm::endianness::big
, unaligned
>(D
);
1078 static std::unique_ptr
<ValueProfData
> allocValueProfData(uint32_t TotalSize
) {
1079 return std::unique_ptr
<ValueProfData
>(new (::operator new(TotalSize
))
1083 Error
ValueProfData::checkIntegrity() {
1084 if (NumValueKinds
> IPVK_Last
+ 1)
1085 return make_error
<InstrProfError
>(
1086 instrprof_error::malformed
, "number of value profile kinds is invalid");
1087 // Total size needs to be multiple of quadword size.
1088 if (TotalSize
% sizeof(uint64_t))
1089 return make_error
<InstrProfError
>(
1090 instrprof_error::malformed
, "total size is not multiples of quardword");
1092 ValueProfRecord
*VR
= getFirstValueProfRecord(this);
1093 for (uint32_t K
= 0; K
< this->NumValueKinds
; K
++) {
1094 if (VR
->Kind
> IPVK_Last
)
1095 return make_error
<InstrProfError
>(instrprof_error::malformed
,
1096 "value kind is invalid");
1097 VR
= getValueProfRecordNext(VR
);
1098 if ((char *)VR
- (char *)this > (ptrdiff_t)TotalSize
)
1099 return make_error
<InstrProfError
>(
1100 instrprof_error::malformed
,
1101 "value profile address is greater than total size");
1103 return Error::success();
1106 Expected
<std::unique_ptr
<ValueProfData
>>
1107 ValueProfData::getValueProfData(const unsigned char *D
,
1108 const unsigned char *const BufferEnd
,
1109 llvm::endianness Endianness
) {
1110 using namespace support
;
1112 if (D
+ sizeof(ValueProfData
) > BufferEnd
)
1113 return make_error
<InstrProfError
>(instrprof_error::truncated
);
1115 const unsigned char *Header
= D
;
1116 uint32_t TotalSize
= swapToHostOrder
<uint32_t>(Header
, Endianness
);
1117 if (D
+ TotalSize
> BufferEnd
)
1118 return make_error
<InstrProfError
>(instrprof_error::too_large
);
1120 std::unique_ptr
<ValueProfData
> VPD
= allocValueProfData(TotalSize
);
1121 memcpy(VPD
.get(), D
, TotalSize
);
1123 VPD
->swapBytesToHost(Endianness
);
1125 Error E
= VPD
->checkIntegrity();
1127 return std::move(E
);
1129 return std::move(VPD
);
1132 void ValueProfData::swapBytesToHost(llvm::endianness Endianness
) {
1133 using namespace support
;
1135 if (Endianness
== llvm::endianness::native
)
1138 sys::swapByteOrder
<uint32_t>(TotalSize
);
1139 sys::swapByteOrder
<uint32_t>(NumValueKinds
);
1141 ValueProfRecord
*VR
= getFirstValueProfRecord(this);
1142 for (uint32_t K
= 0; K
< NumValueKinds
; K
++) {
1143 VR
->swapBytes(Endianness
, llvm::endianness::native
);
1144 VR
= getValueProfRecordNext(VR
);
1148 void ValueProfData::swapBytesFromHost(llvm::endianness Endianness
) {
1149 using namespace support
;
1151 if (Endianness
== llvm::endianness::native
)
1154 ValueProfRecord
*VR
= getFirstValueProfRecord(this);
1155 for (uint32_t K
= 0; K
< NumValueKinds
; K
++) {
1156 ValueProfRecord
*NVR
= getValueProfRecordNext(VR
);
1157 VR
->swapBytes(llvm::endianness::native
, Endianness
);
1160 sys::swapByteOrder
<uint32_t>(TotalSize
);
1161 sys::swapByteOrder
<uint32_t>(NumValueKinds
);
1164 void annotateValueSite(Module
&M
, Instruction
&Inst
,
1165 const InstrProfRecord
&InstrProfR
,
1166 InstrProfValueKind ValueKind
, uint32_t SiteIdx
,
1167 uint32_t MaxMDCount
) {
1168 uint32_t NV
= InstrProfR
.getNumValueDataForSite(ValueKind
, SiteIdx
);
1173 std::unique_ptr
<InstrProfValueData
[]> VD
=
1174 InstrProfR
.getValueForSite(ValueKind
, SiteIdx
, &Sum
);
1176 ArrayRef
<InstrProfValueData
> VDs(VD
.get(), NV
);
1177 annotateValueSite(M
, Inst
, VDs
, Sum
, ValueKind
, MaxMDCount
);
1180 void annotateValueSite(Module
&M
, Instruction
&Inst
,
1181 ArrayRef
<InstrProfValueData
> VDs
,
1182 uint64_t Sum
, InstrProfValueKind ValueKind
,
1183 uint32_t MaxMDCount
) {
1184 LLVMContext
&Ctx
= M
.getContext();
1185 MDBuilder
MDHelper(Ctx
);
1186 SmallVector
<Metadata
*, 3> Vals
;
1188 Vals
.push_back(MDHelper
.createString("VP"));
1190 Vals
.push_back(MDHelper
.createConstant(
1191 ConstantInt::get(Type::getInt32Ty(Ctx
), ValueKind
)));
1194 MDHelper
.createConstant(ConstantInt::get(Type::getInt64Ty(Ctx
), Sum
)));
1196 // Value Profile Data
1197 uint32_t MDCount
= MaxMDCount
;
1198 for (auto &VD
: VDs
) {
1199 Vals
.push_back(MDHelper
.createConstant(
1200 ConstantInt::get(Type::getInt64Ty(Ctx
), VD
.Value
)));
1201 Vals
.push_back(MDHelper
.createConstant(
1202 ConstantInt::get(Type::getInt64Ty(Ctx
), VD
.Count
)));
1206 Inst
.setMetadata(LLVMContext::MD_prof
, MDNode::get(Ctx
, Vals
));
1209 bool getValueProfDataFromInst(const Instruction
&Inst
,
1210 InstrProfValueKind ValueKind
,
1211 uint32_t MaxNumValueData
,
1212 InstrProfValueData ValueData
[],
1213 uint32_t &ActualNumValueData
, uint64_t &TotalC
,
1214 bool GetNoICPValue
) {
1215 MDNode
*MD
= Inst
.getMetadata(LLVMContext::MD_prof
);
1219 unsigned NOps
= MD
->getNumOperands();
1224 // Operand 0 is a string tag "VP":
1225 MDString
*Tag
= cast
<MDString
>(MD
->getOperand(0));
1229 if (!Tag
->getString().equals("VP"))
1233 ConstantInt
*KindInt
= mdconst::dyn_extract
<ConstantInt
>(MD
->getOperand(1));
1236 if (KindInt
->getZExtValue() != ValueKind
)
1240 ConstantInt
*TotalCInt
= mdconst::dyn_extract
<ConstantInt
>(MD
->getOperand(2));
1243 TotalC
= TotalCInt
->getZExtValue();
1245 ActualNumValueData
= 0;
1247 for (unsigned I
= 3; I
< NOps
; I
+= 2) {
1248 if (ActualNumValueData
>= MaxNumValueData
)
1250 ConstantInt
*Value
= mdconst::dyn_extract
<ConstantInt
>(MD
->getOperand(I
));
1251 ConstantInt
*Count
=
1252 mdconst::dyn_extract
<ConstantInt
>(MD
->getOperand(I
+ 1));
1253 if (!Value
|| !Count
)
1255 uint64_t CntValue
= Count
->getZExtValue();
1256 if (!GetNoICPValue
&& (CntValue
== NOMORE_ICP_MAGICNUM
))
1258 ValueData
[ActualNumValueData
].Value
= Value
->getZExtValue();
1259 ValueData
[ActualNumValueData
].Count
= CntValue
;
1260 ActualNumValueData
++;
1265 MDNode
*getPGOFuncNameMetadata(const Function
&F
) {
1266 return F
.getMetadata(getPGOFuncNameMetadataName());
1269 void createPGOFuncNameMetadata(Function
&F
, StringRef PGOFuncName
) {
1270 // Only for internal linkage functions.
1271 if (PGOFuncName
== F
.getName())
1273 // Don't create duplicated meta-data.
1274 if (getPGOFuncNameMetadata(F
))
1276 LLVMContext
&C
= F
.getContext();
1277 MDNode
*N
= MDNode::get(C
, MDString::get(C
, PGOFuncName
));
1278 F
.setMetadata(getPGOFuncNameMetadataName(), N
);
1281 bool needsComdatForCounter(const Function
&F
, const Module
&M
) {
1285 if (!Triple(M
.getTargetTriple()).supportsCOMDAT())
1288 // See createPGOFuncNameVar for more details. To avoid link errors, profile
1289 // counters for function with available_externally linkage needs to be changed
1290 // to linkonce linkage. On ELF based systems, this leads to weak symbols to be
1291 // created. Without using comdat, duplicate entries won't be removed by the
1292 // linker leading to increased data segement size and raw profile size. Even
1293 // worse, since the referenced counter from profile per-function data object
1294 // will be resolved to the common strong definition, the profile counts for
1295 // available_externally functions will end up being duplicated in raw profile
1296 // data. This can result in distorted profile as the counts of those dups
1297 // will be accumulated by the profile merger.
1298 GlobalValue::LinkageTypes Linkage
= F
.getLinkage();
1299 if (Linkage
!= GlobalValue::ExternalWeakLinkage
&&
1300 Linkage
!= GlobalValue::AvailableExternallyLinkage
)
1306 // Check if INSTR_PROF_RAW_VERSION_VAR is defined.
1307 bool isIRPGOFlagSet(const Module
*M
) {
1309 M
->getNamedGlobal(INSTR_PROF_QUOTE(INSTR_PROF_RAW_VERSION_VAR
));
1310 if (!IRInstrVar
|| IRInstrVar
->hasLocalLinkage())
1313 // For CSPGO+LTO, this variable might be marked as non-prevailing and we only
1315 if (IRInstrVar
->isDeclaration())
1318 // Check if the flag is set.
1319 if (!IRInstrVar
->hasInitializer())
1322 auto *InitVal
= dyn_cast_or_null
<ConstantInt
>(IRInstrVar
->getInitializer());
1325 return (InitVal
->getZExtValue() & VARIANT_MASK_IR_PROF
) != 0;
1328 // Check if we can safely rename this Comdat function.
1329 bool canRenameComdatFunc(const Function
&F
, bool CheckAddressTaken
) {
1330 if (F
.getName().empty())
1332 if (!needsComdatForCounter(F
, *(F
.getParent())))
1334 // Unsafe to rename the address-taken function (which can be used in
1335 // function comparison).
1336 if (CheckAddressTaken
&& F
.hasAddressTaken())
1338 // Only safe to do if this function may be discarded if it is not used
1339 // in the compilation unit.
1340 if (!GlobalValue::isDiscardableIfUnused(F
.getLinkage()))
1343 // For AvailableExternallyLinkage functions.
1344 if (!F
.hasComdat()) {
1345 assert(F
.getLinkage() == GlobalValue::AvailableExternallyLinkage
);
1351 // Create the variable for the profile file name.
1352 void createProfileFileNameVar(Module
&M
, StringRef InstrProfileOutput
) {
1353 if (InstrProfileOutput
.empty())
1355 Constant
*ProfileNameConst
=
1356 ConstantDataArray::getString(M
.getContext(), InstrProfileOutput
, true);
1357 GlobalVariable
*ProfileNameVar
= new GlobalVariable(
1358 M
, ProfileNameConst
->getType(), true, GlobalValue::WeakAnyLinkage
,
1359 ProfileNameConst
, INSTR_PROF_QUOTE(INSTR_PROF_PROFILE_NAME_VAR
));
1360 ProfileNameVar
->setVisibility(GlobalValue::HiddenVisibility
);
1361 Triple
TT(M
.getTargetTriple());
1362 if (TT
.supportsCOMDAT()) {
1363 ProfileNameVar
->setLinkage(GlobalValue::ExternalLinkage
);
1364 ProfileNameVar
->setComdat(M
.getOrInsertComdat(
1365 StringRef(INSTR_PROF_QUOTE(INSTR_PROF_PROFILE_NAME_VAR
))));
1369 Error
OverlapStats::accumulateCounts(const std::string
&BaseFilename
,
1370 const std::string
&TestFilename
,
1372 auto getProfileSum
= [IsCS
](const std::string
&Filename
,
1373 CountSumOrPercent
&Sum
) -> Error
{
1374 // This function is only used from llvm-profdata that doesn't use any kind
1375 // of VFS. Just create a default RealFileSystem to read profiles.
1376 auto FS
= vfs::getRealFileSystem();
1377 auto ReaderOrErr
= InstrProfReader::create(Filename
, *FS
);
1378 if (Error E
= ReaderOrErr
.takeError()) {
1381 auto Reader
= std::move(ReaderOrErr
.get());
1382 Reader
->accumulateCounts(Sum
, IsCS
);
1383 return Error::success();
1385 auto Ret
= getProfileSum(BaseFilename
, Base
);
1388 Ret
= getProfileSum(TestFilename
, Test
);
1391 this->BaseFilename
= &BaseFilename
;
1392 this->TestFilename
= &TestFilename
;
1394 return Error::success();
1397 void OverlapStats::addOneMismatch(const CountSumOrPercent
&MismatchFunc
) {
1398 Mismatch
.NumEntries
+= 1;
1399 Mismatch
.CountSum
+= MismatchFunc
.CountSum
/ Test
.CountSum
;
1400 for (unsigned I
= 0; I
< IPVK_Last
- IPVK_First
+ 1; I
++) {
1401 if (Test
.ValueCounts
[I
] >= 1.0f
)
1402 Mismatch
.ValueCounts
[I
] +=
1403 MismatchFunc
.ValueCounts
[I
] / Test
.ValueCounts
[I
];
1407 void OverlapStats::addOneUnique(const CountSumOrPercent
&UniqueFunc
) {
1408 Unique
.NumEntries
+= 1;
1409 Unique
.CountSum
+= UniqueFunc
.CountSum
/ Test
.CountSum
;
1410 for (unsigned I
= 0; I
< IPVK_Last
- IPVK_First
+ 1; I
++) {
1411 if (Test
.ValueCounts
[I
] >= 1.0f
)
1412 Unique
.ValueCounts
[I
] += UniqueFunc
.ValueCounts
[I
] / Test
.ValueCounts
[I
];
1416 void OverlapStats::dump(raw_fd_ostream
&OS
) const {
1420 const char *EntryName
=
1421 (Level
== ProgramLevel
? "functions" : "edge counters");
1422 if (Level
== ProgramLevel
) {
1423 OS
<< "Profile overlap infomation for base_profile: " << *BaseFilename
1424 << " and test_profile: " << *TestFilename
<< "\nProgram level:\n";
1426 OS
<< "Function level:\n"
1427 << " Function: " << FuncName
<< " (Hash=" << FuncHash
<< ")\n";
1430 OS
<< " # of " << EntryName
<< " overlap: " << Overlap
.NumEntries
<< "\n";
1431 if (Mismatch
.NumEntries
)
1432 OS
<< " # of " << EntryName
<< " mismatch: " << Mismatch
.NumEntries
1434 if (Unique
.NumEntries
)
1435 OS
<< " # of " << EntryName
1436 << " only in test_profile: " << Unique
.NumEntries
<< "\n";
1438 OS
<< " Edge profile overlap: " << format("%.3f%%", Overlap
.CountSum
* 100)
1440 if (Mismatch
.NumEntries
)
1441 OS
<< " Mismatched count percentage (Edge): "
1442 << format("%.3f%%", Mismatch
.CountSum
* 100) << "\n";
1443 if (Unique
.NumEntries
)
1444 OS
<< " Percentage of Edge profile only in test_profile: "
1445 << format("%.3f%%", Unique
.CountSum
* 100) << "\n";
1446 OS
<< " Edge profile base count sum: " << format("%.0f", Base
.CountSum
)
1448 << " Edge profile test count sum: " << format("%.0f", Test
.CountSum
)
1451 for (unsigned I
= 0; I
< IPVK_Last
- IPVK_First
+ 1; I
++) {
1452 if (Base
.ValueCounts
[I
] < 1.0f
&& Test
.ValueCounts
[I
] < 1.0f
)
1454 char ProfileKindName
[20];
1456 case IPVK_IndirectCallTarget
:
1457 strncpy(ProfileKindName
, "IndirectCall", 19);
1459 case IPVK_MemOPSize
:
1460 strncpy(ProfileKindName
, "MemOP", 19);
1463 snprintf(ProfileKindName
, 19, "VP[%d]", I
);
1466 OS
<< " " << ProfileKindName
1467 << " profile overlap: " << format("%.3f%%", Overlap
.ValueCounts
[I
] * 100)
1469 if (Mismatch
.NumEntries
)
1470 OS
<< " Mismatched count percentage (" << ProfileKindName
1471 << "): " << format("%.3f%%", Mismatch
.ValueCounts
[I
] * 100) << "\n";
1472 if (Unique
.NumEntries
)
1473 OS
<< " Percentage of " << ProfileKindName
1474 << " profile only in test_profile: "
1475 << format("%.3f%%", Unique
.ValueCounts
[I
] * 100) << "\n";
1476 OS
<< " " << ProfileKindName
1477 << " profile base count sum: " << format("%.0f", Base
.ValueCounts
[I
])
1479 << " " << ProfileKindName
1480 << " profile test count sum: " << format("%.0f", Test
.ValueCounts
[I
])
1485 namespace IndexedInstrProf
{
1486 // A C++14 compatible version of the offsetof macro.
1487 template <typename T1
, typename T2
>
1488 inline size_t constexpr offsetOf(T1
T2::*Member
) {
1489 constexpr T2 Object
{};
1490 return size_t(&(Object
.*Member
)) - size_t(&Object
);
1493 static inline uint64_t read(const unsigned char *Buffer
, size_t Offset
) {
1494 return *reinterpret_cast<const uint64_t *>(Buffer
+ Offset
);
1497 uint64_t Header::formatVersion() const {
1498 using namespace support
;
1499 return endian::byte_swap
<uint64_t, llvm::endianness::little
>(Version
);
1502 Expected
<Header
> Header::readFromBuffer(const unsigned char *Buffer
) {
1503 using namespace support
;
1504 static_assert(std::is_standard_layout_v
<Header
>,
1505 "The header should be standard layout type since we use offset "
1506 "of fields to read.");
1509 H
.Magic
= read(Buffer
, offsetOf(&Header::Magic
));
1510 // Check the magic number.
1512 endian::byte_swap
<uint64_t, llvm::endianness::little
>(H
.Magic
);
1513 if (Magic
!= IndexedInstrProf::Magic
)
1514 return make_error
<InstrProfError
>(instrprof_error::bad_magic
);
1516 // Read the version.
1517 H
.Version
= read(Buffer
, offsetOf(&Header::Version
));
1518 if (GET_VERSION(H
.formatVersion()) >
1519 IndexedInstrProf::ProfVersion::CurrentVersion
)
1520 return make_error
<InstrProfError
>(instrprof_error::unsupported_version
);
1522 switch (GET_VERSION(H
.formatVersion())) {
1523 // When a new field is added in the header add a case statement here to
1526 IndexedInstrProf::ProfVersion::CurrentVersion
== Version11
,
1527 "Please update the reading code below if a new field has been added, "
1528 "if not add a case statement to fall through to the latest version.");
1532 H
.TemporalProfTracesOffset
=
1533 read(Buffer
, offsetOf(&Header::TemporalProfTracesOffset
));
1536 H
.BinaryIdOffset
= read(Buffer
, offsetOf(&Header::BinaryIdOffset
));
1539 H
.MemProfOffset
= read(Buffer
, offsetOf(&Header::MemProfOffset
));
1541 default: // Version7 (when the backwards compatible header was introduced).
1542 H
.HashType
= read(Buffer
, offsetOf(&Header::HashType
));
1543 H
.HashOffset
= read(Buffer
, offsetOf(&Header::HashOffset
));
1549 size_t Header::size() const {
1550 switch (GET_VERSION(formatVersion())) {
1551 // When a new field is added to the header add a case statement here to
1552 // compute the size as offset of the new field + size of the new field. This
1553 // relies on the field being added to the end of the list.
1554 static_assert(IndexedInstrProf::ProfVersion::CurrentVersion
== Version11
,
1555 "Please update the size computation below if a new field has "
1556 "been added to the header, if not add a case statement to "
1557 "fall through to the latest version.");
1561 return offsetOf(&Header::TemporalProfTracesOffset
) +
1562 sizeof(Header::TemporalProfTracesOffset
);
1564 return offsetOf(&Header::BinaryIdOffset
) + sizeof(Header::BinaryIdOffset
);
1566 return offsetOf(&Header::MemProfOffset
) + sizeof(Header::MemProfOffset
);
1567 default: // Version7 (when the backwards compatible header was introduced).
1568 return offsetOf(&Header::HashOffset
) + sizeof(Header::HashOffset
);
1572 } // namespace IndexedInstrProf
1574 } // end namespace llvm