1 //===- InstrProf.cpp - Instrumented profiling format support --------------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 // This file contains support for clang's instrumentation based PGO and
12 //===----------------------------------------------------------------------===//
14 #include "llvm/ProfileData/InstrProf.h"
15 #include "llvm/ADT/ArrayRef.h"
16 #include "llvm/ADT/SetVector.h"
17 #include "llvm/ADT/SmallString.h"
18 #include "llvm/ADT/SmallVector.h"
19 #include "llvm/ADT/StringExtras.h"
20 #include "llvm/ADT/StringRef.h"
21 #include "llvm/Config/config.h"
22 #include "llvm/IR/Constant.h"
23 #include "llvm/IR/Constants.h"
24 #include "llvm/IR/Function.h"
25 #include "llvm/IR/GlobalValue.h"
26 #include "llvm/IR/GlobalVariable.h"
27 #include "llvm/IR/Instruction.h"
28 #include "llvm/IR/LLVMContext.h"
29 #include "llvm/IR/MDBuilder.h"
30 #include "llvm/IR/Mangler.h"
31 #include "llvm/IR/Metadata.h"
32 #include "llvm/IR/Module.h"
33 #include "llvm/IR/Type.h"
34 #include "llvm/ProfileData/InstrProfReader.h"
35 #include "llvm/Support/Casting.h"
36 #include "llvm/Support/CommandLine.h"
37 #include "llvm/Support/Compiler.h"
38 #include "llvm/Support/Compression.h"
39 #include "llvm/Support/Endian.h"
40 #include "llvm/Support/Error.h"
41 #include "llvm/Support/ErrorHandling.h"
42 #include "llvm/Support/LEB128.h"
43 #include "llvm/Support/MathExtras.h"
44 #include "llvm/Support/Path.h"
45 #include "llvm/Support/SwapByteOrder.h"
46 #include "llvm/Support/VirtualFileSystem.h"
47 #include "llvm/TargetParser/Triple.h"
55 #include <system_error>
56 #include <type_traits>
62 static cl::opt
<bool> StaticFuncFullModulePrefix(
63 "static-func-full-module-prefix", cl::init(true), cl::Hidden
,
64 cl::desc("Use full module build paths in the profile counter names for "
65 "static functions."));
67 // This option is tailored to users that have different top-level directory in
68 // profile-gen and profile-use compilation. Users need to specific the number
69 // of levels to strip. A value larger than the number of directories in the
70 // source file will strip all the directory names and only leave the basename.
72 // Note current ThinLTO module importing for the indirect-calls assumes
73 // the source directory name not being stripped. A non-zero option value here
74 // can potentially prevent some inter-module indirect-call-promotions.
75 static cl::opt
<unsigned> StaticFuncStripDirNamePrefix(
76 "static-func-strip-dirname-prefix", cl::init(0), cl::Hidden
,
77 cl::desc("Strip specified level of directory name from source path in "
78 "the profile counter name for static functions."));
80 static std::string
getInstrProfErrString(instrprof_error Err
,
81 const std::string
&ErrMsg
= "") {
83 raw_string_ostream
OS(Msg
);
86 case instrprof_error::success
:
89 case instrprof_error::eof
:
92 case instrprof_error::unrecognized_format
:
93 OS
<< "unrecognized instrumentation profile encoding format";
95 case instrprof_error::bad_magic
:
96 OS
<< "invalid instrumentation profile data (bad magic)";
98 case instrprof_error::bad_header
:
99 OS
<< "invalid instrumentation profile data (file header is corrupt)";
101 case instrprof_error::unsupported_version
:
102 OS
<< "unsupported instrumentation profile format version";
104 case instrprof_error::unsupported_hash_type
:
105 OS
<< "unsupported instrumentation profile hash type";
107 case instrprof_error::too_large
:
108 OS
<< "too much profile data";
110 case instrprof_error::truncated
:
111 OS
<< "truncated profile data";
113 case instrprof_error::malformed
:
114 OS
<< "malformed instrumentation profile data";
116 case instrprof_error::missing_debug_info_for_correlation
:
117 OS
<< "debug info for correlation is required";
119 case instrprof_error::unexpected_debug_info_for_correlation
:
120 OS
<< "debug info for correlation is not necessary";
122 case instrprof_error::unable_to_correlate_profile
:
123 OS
<< "unable to correlate profile";
125 case instrprof_error::invalid_prof
:
126 OS
<< "invalid profile created. Please file a bug "
127 "at: " BUG_REPORT_URL
128 " and include the profraw files that caused this error.";
130 case instrprof_error::unknown_function
:
131 OS
<< "no profile data available for function";
133 case instrprof_error::hash_mismatch
:
134 OS
<< "function control flow change detected (hash mismatch)";
136 case instrprof_error::count_mismatch
:
137 OS
<< "function basic block count change detected (counter mismatch)";
139 case instrprof_error::bitmap_mismatch
:
140 OS
<< "function bitmap size change detected (bitmap size mismatch)";
142 case instrprof_error::counter_overflow
:
143 OS
<< "counter overflow";
145 case instrprof_error::value_site_count_mismatch
:
146 OS
<< "function value site count change detected (counter mismatch)";
148 case instrprof_error::compress_failed
:
149 OS
<< "failed to compress data (zlib)";
151 case instrprof_error::uncompress_failed
:
152 OS
<< "failed to uncompress data (zlib)";
154 case instrprof_error::empty_raw_profile
:
155 OS
<< "empty raw profile file";
157 case instrprof_error::zlib_unavailable
:
158 OS
<< "profile uses zlib compression but the profile reader was built "
159 "without zlib support";
161 case instrprof_error::raw_profile_version_mismatch
:
162 OS
<< "raw profile version mismatch";
164 case instrprof_error::counter_value_too_large
:
165 OS
<< "excessively large counter value suggests corrupted profile data";
169 // If optional error message is not empty, append it to the message.
171 OS
<< ": " << ErrMsg
;
178 // FIXME: This class is only here to support the transition to llvm::Error. It
179 // will be removed once this transition is complete. Clients should prefer to
180 // deal with the Error value directly, rather than converting to error_code.
181 class InstrProfErrorCategoryType
: public std::error_category
{
182 const char *name() const noexcept override
{ return "llvm.instrprof"; }
184 std::string
message(int IE
) const override
{
185 return getInstrProfErrString(static_cast<instrprof_error
>(IE
));
189 } // end anonymous namespace
191 const std::error_category
&llvm::instrprof_category() {
192 static InstrProfErrorCategoryType ErrorCategory
;
193 return ErrorCategory
;
198 const char *InstrProfSectNameCommon
[] = {
199 #define INSTR_PROF_SECT_ENTRY(Kind, SectNameCommon, SectNameCoff, Prefix) \
201 #include "llvm/ProfileData/InstrProfData.inc"
204 const char *InstrProfSectNameCoff
[] = {
205 #define INSTR_PROF_SECT_ENTRY(Kind, SectNameCommon, SectNameCoff, Prefix) \
207 #include "llvm/ProfileData/InstrProfData.inc"
210 const char *InstrProfSectNamePrefix
[] = {
211 #define INSTR_PROF_SECT_ENTRY(Kind, SectNameCommon, SectNameCoff, Prefix) \
213 #include "llvm/ProfileData/InstrProfData.inc"
220 cl::opt
<bool> DoInstrProfNameCompression(
221 "enable-name-compression",
222 cl::desc("Enable name/filename string compression"), cl::init(true));
224 std::string
getInstrProfSectionName(InstrProfSectKind IPSK
,
225 Triple::ObjectFormatType OF
,
226 bool AddSegmentInfo
) {
227 std::string SectName
;
229 if (OF
== Triple::MachO
&& AddSegmentInfo
)
230 SectName
= InstrProfSectNamePrefix
[IPSK
];
232 if (OF
== Triple::COFF
)
233 SectName
+= InstrProfSectNameCoff
[IPSK
];
235 SectName
+= InstrProfSectNameCommon
[IPSK
];
237 if (OF
== Triple::MachO
&& IPSK
== IPSK_data
&& AddSegmentInfo
)
238 SectName
+= ",regular,live_support";
243 std::string
InstrProfError::message() const {
244 return getInstrProfErrString(Err
, Msg
);
247 char InstrProfError::ID
= 0;
249 std::string
getPGOFuncName(StringRef RawFuncName
,
250 GlobalValue::LinkageTypes Linkage
,
252 uint64_t Version LLVM_ATTRIBUTE_UNUSED
) {
253 return GlobalValue::getGlobalIdentifier(RawFuncName
, Linkage
, FileName
);
256 // Strip NumPrefix level of directory name from PathNameStr. If the number of
257 // directory separators is less than NumPrefix, strip all the directories and
258 // leave base file name only.
259 static StringRef
stripDirPrefix(StringRef PathNameStr
, uint32_t NumPrefix
) {
260 uint32_t Count
= NumPrefix
;
261 uint32_t Pos
= 0, LastPos
= 0;
262 for (auto & CI
: PathNameStr
) {
264 if (llvm::sys::path::is_separator(CI
)) {
271 return PathNameStr
.substr(LastPos
);
274 static StringRef
getStrippedSourceFileName(const GlobalObject
&GO
) {
275 StringRef
FileName(GO
.getParent()->getSourceFileName());
276 uint32_t StripLevel
= StaticFuncFullModulePrefix
? 0 : (uint32_t)-1;
277 if (StripLevel
< StaticFuncStripDirNamePrefix
)
278 StripLevel
= StaticFuncStripDirNamePrefix
;
280 FileName
= stripDirPrefix(FileName
, StripLevel
);
284 // The PGO name has the format [<filepath>;]<linkage-name> where <filepath>; is
285 // provided if linkage is local and <linkage-name> is the mangled function
286 // name. The filepath is used to discriminate possibly identical function names.
287 // ; is used because it is unlikely to be found in either <filepath> or
290 // Older compilers used getPGOFuncName() which has the format
291 // [<filepath>:]<function-name>. <filepath> is used to discriminate between
292 // possibly identical function names when linkage is local and <function-name>
293 // simply comes from F.getName(). This caused trouble for Objective-C functions
294 // which commonly have :'s in their names. Also, since <function-name> is not
295 // mangled, they cannot be passed to Mach-O linkers via -order_file. We still
296 // need to compute this name to lookup functions from profiles built by older
299 getIRPGONameForGlobalObject(const GlobalObject
&GO
,
300 GlobalValue::LinkageTypes Linkage
,
301 StringRef FileName
) {
302 SmallString
<64> Name
;
303 if (llvm::GlobalValue::isLocalLinkage(Linkage
)) {
304 Name
.append(FileName
.empty() ? "<unknown>" : FileName
);
307 Mangler().getNameWithPrefix(Name
, &GO
, /*CannotUsePrivateLabel=*/true);
308 return Name
.str().str();
311 static std::optional
<std::string
> lookupPGONameFromMetadata(MDNode
*MD
) {
313 StringRef S
= cast
<MDString
>(MD
->getOperand(0))->getString();
319 // Returns the PGO object name. This function has some special handling
320 // when called in LTO optimization. The following only applies when calling in
321 // LTO passes (when \c InLTO is true): LTO's internalization privatizes many
322 // global linkage symbols. This happens after value profile annotation, but
323 // those internal linkage functions should not have a source prefix.
324 // Additionally, for ThinLTO mode, exported internal functions are promoted
325 // and renamed. We need to ensure that the original internal PGO name is
326 // used when computing the GUID that is compared against the profiled GUIDs.
327 // To differentiate compiler generated internal symbols from original ones,
328 // PGOFuncName meta data are created and attached to the original internal
329 // symbols in the value profile annotation step
330 // (PGOUseFunc::annotateIndirectCallSites). If a symbol does not have the meta
331 // data, its original linkage must be non-internal.
332 static std::string
getIRPGOObjectName(const GlobalObject
&GO
, bool InLTO
,
333 MDNode
*PGONameMetadata
) {
335 auto FileName
= getStrippedSourceFileName(GO
);
336 return getIRPGONameForGlobalObject(GO
, GO
.getLinkage(), FileName
);
339 // In LTO mode (when InLTO is true), first check if there is a meta data.
340 if (auto IRPGOFuncName
= lookupPGONameFromMetadata(PGONameMetadata
))
341 return *IRPGOFuncName
;
343 // If there is no meta data, the function must be a global before the value
344 // profile annotation pass. Its current linkage may be internal if it is
345 // internalized in LTO mode.
346 return getIRPGONameForGlobalObject(GO
, GlobalValue::ExternalLinkage
, "");
349 // Returns the IRPGO function name and does special handling when called
350 // in LTO optimization. See the comments of `getIRPGOObjectName` for details.
351 std::string
getIRPGOFuncName(const Function
&F
, bool InLTO
) {
352 return getIRPGOObjectName(F
, InLTO
, getPGOFuncNameMetadata(F
));
355 // This is similar to `getIRPGOFuncName` except that this function calls
356 // 'getPGOFuncName' to get a name and `getIRPGOFuncName` calls
357 // 'getIRPGONameForGlobalObject'. See the difference between two callees in the
358 // comments of `getIRPGONameForGlobalObject`.
359 std::string
getPGOFuncName(const Function
&F
, bool InLTO
, uint64_t Version
) {
361 auto FileName
= getStrippedSourceFileName(F
);
362 return getPGOFuncName(F
.getName(), F
.getLinkage(), FileName
, Version
);
365 // In LTO mode (when InLTO is true), first check if there is a meta data.
366 if (auto PGOFuncName
= lookupPGONameFromMetadata(getPGOFuncNameMetadata(F
)))
369 // If there is no meta data, the function must be a global before the value
370 // profile annotation pass. Its current linkage may be internal if it is
371 // internalized in LTO mode.
372 return getPGOFuncName(F
.getName(), GlobalValue::ExternalLinkage
, "");
375 // See getIRPGOFuncName() for a discription of the format.
376 std::pair
<StringRef
, StringRef
>
377 getParsedIRPGOFuncName(StringRef IRPGOFuncName
) {
378 auto [FileName
, FuncName
] = IRPGOFuncName
.split(';');
379 if (FuncName
.empty())
380 return std::make_pair(StringRef(), IRPGOFuncName
);
381 return std::make_pair(FileName
, FuncName
);
384 StringRef
getFuncNameWithoutPrefix(StringRef PGOFuncName
, StringRef FileName
) {
385 if (FileName
.empty())
387 // Drop the file name including ':'. See also getPGOFuncName.
388 if (PGOFuncName
.startswith(FileName
))
389 PGOFuncName
= PGOFuncName
.drop_front(FileName
.size() + 1);
393 // \p FuncName is the string used as profile lookup key for the function. A
394 // symbol is created to hold the name. Return the legalized symbol name.
395 std::string
getPGOFuncNameVarName(StringRef FuncName
,
396 GlobalValue::LinkageTypes Linkage
) {
397 std::string VarName
= std::string(getInstrProfNameVarPrefix());
400 if (!GlobalValue::isLocalLinkage(Linkage
))
403 // Now fix up illegal chars in local VarName that may upset the assembler.
404 const char InvalidChars
[] = "-:;<>/\"'";
405 size_t found
= VarName
.find_first_of(InvalidChars
);
406 while (found
!= std::string::npos
) {
407 VarName
[found
] = '_';
408 found
= VarName
.find_first_of(InvalidChars
, found
+ 1);
413 GlobalVariable
*createPGOFuncNameVar(Module
&M
,
414 GlobalValue::LinkageTypes Linkage
,
415 StringRef PGOFuncName
) {
416 // We generally want to match the function's linkage, but available_externally
417 // and extern_weak both have the wrong semantics, and anything that doesn't
418 // need to link across compilation units doesn't need to be visible at all.
419 if (Linkage
== GlobalValue::ExternalWeakLinkage
)
420 Linkage
= GlobalValue::LinkOnceAnyLinkage
;
421 else if (Linkage
== GlobalValue::AvailableExternallyLinkage
)
422 Linkage
= GlobalValue::LinkOnceODRLinkage
;
423 else if (Linkage
== GlobalValue::InternalLinkage
||
424 Linkage
== GlobalValue::ExternalLinkage
)
425 Linkage
= GlobalValue::PrivateLinkage
;
428 ConstantDataArray::getString(M
.getContext(), PGOFuncName
, false);
430 new GlobalVariable(M
, Value
->getType(), true, Linkage
, Value
,
431 getPGOFuncNameVarName(PGOFuncName
, Linkage
));
433 // Hide the symbol so that we correctly get a copy for each executable.
434 if (!GlobalValue::isLocalLinkage(FuncNameVar
->getLinkage()))
435 FuncNameVar
->setVisibility(GlobalValue::HiddenVisibility
);
440 GlobalVariable
*createPGOFuncNameVar(Function
&F
, StringRef PGOFuncName
) {
441 return createPGOFuncNameVar(*F
.getParent(), F
.getLinkage(), PGOFuncName
);
444 Error
InstrProfSymtab::create(Module
&M
, bool InLTO
) {
445 for (Function
&F
: M
) {
446 // Function may not have a name: like using asm("") to overwrite the name.
447 // Ignore in this case.
450 if (Error E
= addFuncWithName(F
, getIRPGOFuncName(F
, InLTO
)))
452 // Also use getPGOFuncName() so that we can find records from older profiles
453 if (Error E
= addFuncWithName(F
, getPGOFuncName(F
, InLTO
)))
458 return Error::success();
461 Error
InstrProfSymtab::addFuncWithName(Function
&F
, StringRef PGOFuncName
) {
462 if (Error E
= addFuncName(PGOFuncName
))
464 MD5FuncMap
.emplace_back(Function::getGUID(PGOFuncName
), &F
);
465 // In ThinLTO, local function may have been promoted to global and have
466 // suffix ".llvm." added to the function name. We need to add the
467 // stripped function name to the symbol table so that we can find a match
470 // We may have other suffixes similar as ".llvm." which are needed to
471 // be stripped before the matching, but ".__uniq." suffix which is used
472 // to differentiate internal linkage functions in different modules
473 // should be kept. Now this is the only suffix with the pattern ".xxx"
474 // which is kept before matching.
475 const std::string UniqSuffix
= ".__uniq.";
476 auto pos
= PGOFuncName
.find(UniqSuffix
);
477 // Search '.' after ".__uniq." if ".__uniq." exists, otherwise
478 // search '.' from the beginning.
479 if (pos
!= std::string::npos
)
480 pos
+= UniqSuffix
.length();
483 pos
= PGOFuncName
.find('.', pos
);
484 if (pos
!= std::string::npos
&& pos
!= 0) {
485 StringRef OtherFuncName
= PGOFuncName
.substr(0, pos
);
486 if (Error E
= addFuncName(OtherFuncName
))
488 MD5FuncMap
.emplace_back(Function::getGUID(OtherFuncName
), &F
);
490 return Error::success();
493 uint64_t InstrProfSymtab::getFunctionHashFromAddress(uint64_t Address
) {
495 auto It
= partition_point(AddrToMD5Map
, [=](std::pair
<uint64_t, uint64_t> A
) {
496 return A
.first
< Address
;
498 // Raw function pointer collected by value profiler may be from
499 // external functions that are not instrumented. They won't have
500 // mapping data to be used by the deserializer. Force the value to
501 // be 0 in this case.
502 if (It
!= AddrToMD5Map
.end() && It
->first
== Address
)
503 return (uint64_t)It
->second
;
507 void InstrProfSymtab::dumpNames(raw_ostream
&OS
) const {
508 SmallVector
<StringRef
, 0> Sorted(NameTab
.keys());
510 for (StringRef S
: Sorted
)
514 Error
collectGlobalObjectNameStrings(ArrayRef
<std::string
> NameStrs
,
515 bool doCompression
, std::string
&Result
) {
516 assert(!NameStrs
.empty() && "No name data to emit");
518 uint8_t Header
[20], *P
= Header
;
519 std::string UncompressedNameStrings
=
520 join(NameStrs
.begin(), NameStrs
.end(), getInstrProfNameSeparator());
522 assert(StringRef(UncompressedNameStrings
)
523 .count(getInstrProfNameSeparator()) == (NameStrs
.size() - 1) &&
524 "PGO name is invalid (contains separator token)");
526 unsigned EncLen
= encodeULEB128(UncompressedNameStrings
.length(), P
);
529 auto WriteStringToResult
= [&](size_t CompressedLen
, StringRef InputStr
) {
530 EncLen
= encodeULEB128(CompressedLen
, P
);
532 char *HeaderStr
= reinterpret_cast<char *>(&Header
[0]);
533 unsigned HeaderLen
= P
- &Header
[0];
534 Result
.append(HeaderStr
, HeaderLen
);
536 return Error::success();
539 if (!doCompression
) {
540 return WriteStringToResult(0, UncompressedNameStrings
);
543 SmallVector
<uint8_t, 128> CompressedNameStrings
;
544 compression::zlib::compress(arrayRefFromStringRef(UncompressedNameStrings
),
545 CompressedNameStrings
,
546 compression::zlib::BestSizeCompression
);
548 return WriteStringToResult(CompressedNameStrings
.size(),
549 toStringRef(CompressedNameStrings
));
552 StringRef
getPGOFuncNameVarInitializer(GlobalVariable
*NameVar
) {
553 auto *Arr
= cast
<ConstantDataArray
>(NameVar
->getInitializer());
555 Arr
->isCString() ? Arr
->getAsCString() : Arr
->getAsString();
559 Error
collectPGOFuncNameStrings(ArrayRef
<GlobalVariable
*> NameVars
,
560 std::string
&Result
, bool doCompression
) {
561 std::vector
<std::string
> NameStrs
;
562 for (auto *NameVar
: NameVars
) {
563 NameStrs
.push_back(std::string(getPGOFuncNameVarInitializer(NameVar
)));
565 return collectGlobalObjectNameStrings(
566 NameStrs
, compression::zlib::isAvailable() && doCompression
, Result
);
569 Error
readPGOFuncNameStrings(StringRef NameStrings
, InstrProfSymtab
&Symtab
) {
570 const uint8_t *P
= NameStrings
.bytes_begin();
571 const uint8_t *EndP
= NameStrings
.bytes_end();
574 uint64_t UncompressedSize
= decodeULEB128(P
, &N
);
576 uint64_t CompressedSize
= decodeULEB128(P
, &N
);
578 bool isCompressed
= (CompressedSize
!= 0);
579 SmallVector
<uint8_t, 128> UncompressedNameStrings
;
580 StringRef NameStrings
;
582 if (!llvm::compression::zlib::isAvailable())
583 return make_error
<InstrProfError
>(instrprof_error::zlib_unavailable
);
585 if (Error E
= compression::zlib::decompress(ArrayRef(P
, CompressedSize
),
586 UncompressedNameStrings
,
588 consumeError(std::move(E
));
589 return make_error
<InstrProfError
>(instrprof_error::uncompress_failed
);
592 NameStrings
= toStringRef(UncompressedNameStrings
);
595 StringRef(reinterpret_cast<const char *>(P
), UncompressedSize
);
596 P
+= UncompressedSize
;
598 // Now parse the name strings.
599 SmallVector
<StringRef
, 0> Names
;
600 NameStrings
.split(Names
, getInstrProfNameSeparator());
601 for (StringRef
&Name
: Names
)
602 if (Error E
= Symtab
.addFuncName(Name
))
605 while (P
< EndP
&& *P
== 0)
608 return Error::success();
611 void InstrProfRecord::accumulateCounts(CountSumOrPercent
&Sum
) const {
612 uint64_t FuncSum
= 0;
613 Sum
.NumEntries
+= Counts
.size();
614 for (uint64_t Count
: Counts
)
616 Sum
.CountSum
+= FuncSum
;
618 for (uint32_t VK
= IPVK_First
; VK
<= IPVK_Last
; ++VK
) {
619 uint64_t KindSum
= 0;
620 uint32_t NumValueSites
= getNumValueSites(VK
);
621 for (size_t I
= 0; I
< NumValueSites
; ++I
) {
622 uint32_t NV
= getNumValueDataForSite(VK
, I
);
623 std::unique_ptr
<InstrProfValueData
[]> VD
= getValueForSite(VK
, I
);
624 for (uint32_t V
= 0; V
< NV
; V
++)
625 KindSum
+= VD
[V
].Count
;
627 Sum
.ValueCounts
[VK
] += KindSum
;
631 void InstrProfValueSiteRecord::overlap(InstrProfValueSiteRecord
&Input
,
633 OverlapStats
&Overlap
,
634 OverlapStats
&FuncLevelOverlap
) {
635 this->sortByTargetValues();
636 Input
.sortByTargetValues();
637 double Score
= 0.0f
, FuncLevelScore
= 0.0f
;
638 auto I
= ValueData
.begin();
639 auto IE
= ValueData
.end();
640 auto J
= Input
.ValueData
.begin();
641 auto JE
= Input
.ValueData
.end();
642 while (I
!= IE
&& J
!= JE
) {
643 if (I
->Value
== J
->Value
) {
644 Score
+= OverlapStats::score(I
->Count
, J
->Count
,
645 Overlap
.Base
.ValueCounts
[ValueKind
],
646 Overlap
.Test
.ValueCounts
[ValueKind
]);
647 FuncLevelScore
+= OverlapStats::score(
648 I
->Count
, J
->Count
, FuncLevelOverlap
.Base
.ValueCounts
[ValueKind
],
649 FuncLevelOverlap
.Test
.ValueCounts
[ValueKind
]);
651 } else if (I
->Value
< J
->Value
) {
657 Overlap
.Overlap
.ValueCounts
[ValueKind
] += Score
;
658 FuncLevelOverlap
.Overlap
.ValueCounts
[ValueKind
] += FuncLevelScore
;
661 // Return false on mismatch.
662 void InstrProfRecord::overlapValueProfData(uint32_t ValueKind
,
663 InstrProfRecord
&Other
,
664 OverlapStats
&Overlap
,
665 OverlapStats
&FuncLevelOverlap
) {
666 uint32_t ThisNumValueSites
= getNumValueSites(ValueKind
);
667 assert(ThisNumValueSites
== Other
.getNumValueSites(ValueKind
));
668 if (!ThisNumValueSites
)
671 std::vector
<InstrProfValueSiteRecord
> &ThisSiteRecords
=
672 getOrCreateValueSitesForKind(ValueKind
);
673 MutableArrayRef
<InstrProfValueSiteRecord
> OtherSiteRecords
=
674 Other
.getValueSitesForKind(ValueKind
);
675 for (uint32_t I
= 0; I
< ThisNumValueSites
; I
++)
676 ThisSiteRecords
[I
].overlap(OtherSiteRecords
[I
], ValueKind
, Overlap
,
680 void InstrProfRecord::overlap(InstrProfRecord
&Other
, OverlapStats
&Overlap
,
681 OverlapStats
&FuncLevelOverlap
,
682 uint64_t ValueCutoff
) {
683 // FuncLevel CountSum for other should already computed and nonzero.
684 assert(FuncLevelOverlap
.Test
.CountSum
>= 1.0f
);
685 accumulateCounts(FuncLevelOverlap
.Base
);
686 bool Mismatch
= (Counts
.size() != Other
.Counts
.size());
688 // Check if the value profiles mismatch.
690 for (uint32_t Kind
= IPVK_First
; Kind
<= IPVK_Last
; ++Kind
) {
691 uint32_t ThisNumValueSites
= getNumValueSites(Kind
);
692 uint32_t OtherNumValueSites
= Other
.getNumValueSites(Kind
);
693 if (ThisNumValueSites
!= OtherNumValueSites
) {
700 Overlap
.addOneMismatch(FuncLevelOverlap
.Test
);
704 // Compute overlap for value counts.
705 for (uint32_t Kind
= IPVK_First
; Kind
<= IPVK_Last
; ++Kind
)
706 overlapValueProfData(Kind
, Other
, Overlap
, FuncLevelOverlap
);
709 uint64_t MaxCount
= 0;
710 // Compute overlap for edge counts.
711 for (size_t I
= 0, E
= Other
.Counts
.size(); I
< E
; ++I
) {
712 Score
+= OverlapStats::score(Counts
[I
], Other
.Counts
[I
],
713 Overlap
.Base
.CountSum
, Overlap
.Test
.CountSum
);
714 MaxCount
= std::max(Other
.Counts
[I
], MaxCount
);
716 Overlap
.Overlap
.CountSum
+= Score
;
717 Overlap
.Overlap
.NumEntries
+= 1;
719 if (MaxCount
>= ValueCutoff
) {
720 double FuncScore
= 0.0;
721 for (size_t I
= 0, E
= Other
.Counts
.size(); I
< E
; ++I
)
722 FuncScore
+= OverlapStats::score(Counts
[I
], Other
.Counts
[I
],
723 FuncLevelOverlap
.Base
.CountSum
,
724 FuncLevelOverlap
.Test
.CountSum
);
725 FuncLevelOverlap
.Overlap
.CountSum
= FuncScore
;
726 FuncLevelOverlap
.Overlap
.NumEntries
= Other
.Counts
.size();
727 FuncLevelOverlap
.Valid
= true;
731 void InstrProfValueSiteRecord::merge(InstrProfValueSiteRecord
&Input
,
733 function_ref
<void(instrprof_error
)> Warn
) {
734 this->sortByTargetValues();
735 Input
.sortByTargetValues();
736 auto I
= ValueData
.begin();
737 auto IE
= ValueData
.end();
738 for (const InstrProfValueData
&J
: Input
.ValueData
) {
739 while (I
!= IE
&& I
->Value
< J
.Value
)
741 if (I
!= IE
&& I
->Value
== J
.Value
) {
743 I
->Count
= SaturatingMultiplyAdd(J
.Count
, Weight
, I
->Count
, &Overflowed
);
745 Warn(instrprof_error::counter_overflow
);
749 ValueData
.insert(I
, J
);
753 void InstrProfValueSiteRecord::scale(uint64_t N
, uint64_t D
,
754 function_ref
<void(instrprof_error
)> Warn
) {
755 for (InstrProfValueData
&I
: ValueData
) {
757 I
.Count
= SaturatingMultiply(I
.Count
, N
, &Overflowed
) / D
;
759 Warn(instrprof_error::counter_overflow
);
763 // Merge Value Profile data from Src record to this record for ValueKind.
764 // Scale merged value counts by \p Weight.
765 void InstrProfRecord::mergeValueProfData(
766 uint32_t ValueKind
, InstrProfRecord
&Src
, uint64_t Weight
,
767 function_ref
<void(instrprof_error
)> Warn
) {
768 uint32_t ThisNumValueSites
= getNumValueSites(ValueKind
);
769 uint32_t OtherNumValueSites
= Src
.getNumValueSites(ValueKind
);
770 if (ThisNumValueSites
!= OtherNumValueSites
) {
771 Warn(instrprof_error::value_site_count_mismatch
);
774 if (!ThisNumValueSites
)
776 std::vector
<InstrProfValueSiteRecord
> &ThisSiteRecords
=
777 getOrCreateValueSitesForKind(ValueKind
);
778 MutableArrayRef
<InstrProfValueSiteRecord
> OtherSiteRecords
=
779 Src
.getValueSitesForKind(ValueKind
);
780 for (uint32_t I
= 0; I
< ThisNumValueSites
; I
++)
781 ThisSiteRecords
[I
].merge(OtherSiteRecords
[I
], Weight
, Warn
);
784 void InstrProfRecord::merge(InstrProfRecord
&Other
, uint64_t Weight
,
785 function_ref
<void(instrprof_error
)> Warn
) {
786 // If the number of counters doesn't match we either have bad data
787 // or a hash collision.
788 if (Counts
.size() != Other
.Counts
.size()) {
789 Warn(instrprof_error::count_mismatch
);
793 // Special handling of the first count as the PseudoCount.
794 CountPseudoKind OtherKind
= Other
.getCountPseudoKind();
795 CountPseudoKind ThisKind
= getCountPseudoKind();
796 if (OtherKind
!= NotPseudo
|| ThisKind
!= NotPseudo
) {
797 // We don't allow the merge of a profile with pseudo counts and
798 // a normal profile (i.e. without pesudo counts).
799 // Profile supplimenation should be done after the profile merge.
800 if (OtherKind
== NotPseudo
|| ThisKind
== NotPseudo
) {
801 Warn(instrprof_error::count_mismatch
);
804 if (OtherKind
== PseudoHot
|| ThisKind
== PseudoHot
)
805 setPseudoCount(PseudoHot
);
807 setPseudoCount(PseudoWarm
);
811 for (size_t I
= 0, E
= Other
.Counts
.size(); I
< E
; ++I
) {
814 SaturatingMultiplyAdd(Other
.Counts
[I
], Weight
, Counts
[I
], &Overflowed
);
815 if (Value
> getInstrMaxCountValue()) {
816 Value
= getInstrMaxCountValue();
821 Warn(instrprof_error::counter_overflow
);
824 // If the number of bitmap bytes doesn't match we either have bad data
825 // or a hash collision.
826 if (BitmapBytes
.size() != Other
.BitmapBytes
.size()) {
827 Warn(instrprof_error::bitmap_mismatch
);
831 // Bitmap bytes are merged by simply ORing them together.
832 for (size_t I
= 0, E
= Other
.BitmapBytes
.size(); I
< E
; ++I
) {
833 BitmapBytes
[I
] = Other
.BitmapBytes
[I
] | BitmapBytes
[I
];
836 for (uint32_t Kind
= IPVK_First
; Kind
<= IPVK_Last
; ++Kind
)
837 mergeValueProfData(Kind
, Other
, Weight
, Warn
);
840 void InstrProfRecord::scaleValueProfData(
841 uint32_t ValueKind
, uint64_t N
, uint64_t D
,
842 function_ref
<void(instrprof_error
)> Warn
) {
843 for (auto &R
: getValueSitesForKind(ValueKind
))
847 void InstrProfRecord::scale(uint64_t N
, uint64_t D
,
848 function_ref
<void(instrprof_error
)> Warn
) {
849 assert(D
!= 0 && "D cannot be 0");
850 for (auto &Count
: this->Counts
) {
852 Count
= SaturatingMultiply(Count
, N
, &Overflowed
) / D
;
853 if (Count
> getInstrMaxCountValue()) {
854 Count
= getInstrMaxCountValue();
858 Warn(instrprof_error::counter_overflow
);
860 for (uint32_t Kind
= IPVK_First
; Kind
<= IPVK_Last
; ++Kind
)
861 scaleValueProfData(Kind
, N
, D
, Warn
);
864 // Map indirect call target name hash to name string.
865 uint64_t InstrProfRecord::remapValue(uint64_t Value
, uint32_t ValueKind
,
866 InstrProfSymtab
*SymTab
) {
870 if (ValueKind
== IPVK_IndirectCallTarget
)
871 return SymTab
->getFunctionHashFromAddress(Value
);
876 void InstrProfRecord::addValueData(uint32_t ValueKind
, uint32_t Site
,
877 InstrProfValueData
*VData
, uint32_t N
,
878 InstrProfSymtab
*ValueMap
) {
879 for (uint32_t I
= 0; I
< N
; I
++) {
880 VData
[I
].Value
= remapValue(VData
[I
].Value
, ValueKind
, ValueMap
);
882 std::vector
<InstrProfValueSiteRecord
> &ValueSites
=
883 getOrCreateValueSitesForKind(ValueKind
);
885 ValueSites
.emplace_back();
887 ValueSites
.emplace_back(VData
, VData
+ N
);
890 std::vector
<BPFunctionNode
> TemporalProfTraceTy::createBPFunctionNodes(
891 ArrayRef
<TemporalProfTraceTy
> Traces
) {
892 using IDT
= BPFunctionNode::IDT
;
893 using UtilityNodeT
= BPFunctionNode::UtilityNodeT
;
894 // Collect all function IDs ordered by their smallest timestamp. This will be
895 // used as the initial FunctionNode order.
896 SetVector
<IDT
> FunctionIds
;
897 size_t LargestTraceSize
= 0;
898 for (auto &Trace
: Traces
)
900 std::max(LargestTraceSize
, Trace
.FunctionNameRefs
.size());
901 for (size_t Timestamp
= 0; Timestamp
< LargestTraceSize
; Timestamp
++)
902 for (auto &Trace
: Traces
)
903 if (Timestamp
< Trace
.FunctionNameRefs
.size())
904 FunctionIds
.insert(Trace
.FunctionNameRefs
[Timestamp
]);
906 int N
= std::ceil(std::log2(LargestTraceSize
));
908 // TODO: We need to use the Trace.Weight field to give more weight to more
909 // important utilities
910 DenseMap
<IDT
, SmallVector
<UtilityNodeT
, 4>> FuncGroups
;
911 for (size_t TraceIdx
= 0; TraceIdx
< Traces
.size(); TraceIdx
++) {
912 auto &Trace
= Traces
[TraceIdx
].FunctionNameRefs
;
913 for (size_t Timestamp
= 0; Timestamp
< Trace
.size(); Timestamp
++) {
914 for (int I
= std::floor(std::log2(Timestamp
+ 1)); I
< N
; I
++) {
915 auto &FunctionId
= Trace
[Timestamp
];
916 UtilityNodeT GroupId
= TraceIdx
* N
+ I
;
917 FuncGroups
[FunctionId
].push_back(GroupId
);
922 std::vector
<BPFunctionNode
> Nodes
;
923 for (auto &Id
: FunctionIds
) {
924 auto &UNs
= FuncGroups
[Id
];
926 UNs
.erase(std::unique(UNs
.begin(), UNs
.end()), UNs
.end());
927 Nodes
.emplace_back(Id
, UNs
);
932 #define INSTR_PROF_COMMON_API_IMPL
933 #include "llvm/ProfileData/InstrProfData.inc"
936 * ValueProfRecordClosure Interface implementation for InstrProfRecord
937 * class. These C wrappers are used as adaptors so that C++ code can be
938 * invoked as callbacks.
940 uint32_t getNumValueKindsInstrProf(const void *Record
) {
941 return reinterpret_cast<const InstrProfRecord
*>(Record
)->getNumValueKinds();
944 uint32_t getNumValueSitesInstrProf(const void *Record
, uint32_t VKind
) {
945 return reinterpret_cast<const InstrProfRecord
*>(Record
)
946 ->getNumValueSites(VKind
);
949 uint32_t getNumValueDataInstrProf(const void *Record
, uint32_t VKind
) {
950 return reinterpret_cast<const InstrProfRecord
*>(Record
)
951 ->getNumValueData(VKind
);
954 uint32_t getNumValueDataForSiteInstrProf(const void *R
, uint32_t VK
,
956 return reinterpret_cast<const InstrProfRecord
*>(R
)
957 ->getNumValueDataForSite(VK
, S
);
960 void getValueForSiteInstrProf(const void *R
, InstrProfValueData
*Dst
,
961 uint32_t K
, uint32_t S
) {
962 reinterpret_cast<const InstrProfRecord
*>(R
)->getValueForSite(Dst
, K
, S
);
965 ValueProfData
*allocValueProfDataInstrProf(size_t TotalSizeInBytes
) {
967 (ValueProfData
*)(new (::operator new(TotalSizeInBytes
)) ValueProfData());
968 memset(VD
, 0, TotalSizeInBytes
);
972 static ValueProfRecordClosure InstrProfRecordClosure
= {
974 getNumValueKindsInstrProf
,
975 getNumValueSitesInstrProf
,
976 getNumValueDataInstrProf
,
977 getNumValueDataForSiteInstrProf
,
979 getValueForSiteInstrProf
,
980 allocValueProfDataInstrProf
};
982 // Wrapper implementation using the closure mechanism.
983 uint32_t ValueProfData::getSize(const InstrProfRecord
&Record
) {
984 auto Closure
= InstrProfRecordClosure
;
985 Closure
.Record
= &Record
;
986 return getValueProfDataSize(&Closure
);
989 // Wrapper implementation using the closure mechanism.
990 std::unique_ptr
<ValueProfData
>
991 ValueProfData::serializeFrom(const InstrProfRecord
&Record
) {
992 InstrProfRecordClosure
.Record
= &Record
;
994 std::unique_ptr
<ValueProfData
> VPD(
995 serializeValueProfDataFrom(&InstrProfRecordClosure
, nullptr));
999 void ValueProfRecord::deserializeTo(InstrProfRecord
&Record
,
1000 InstrProfSymtab
*SymTab
) {
1001 Record
.reserveSites(Kind
, NumValueSites
);
1003 InstrProfValueData
*ValueData
= getValueProfRecordValueData(this);
1004 for (uint64_t VSite
= 0; VSite
< NumValueSites
; ++VSite
) {
1005 uint8_t ValueDataCount
= this->SiteCountArray
[VSite
];
1006 Record
.addValueData(Kind
, VSite
, ValueData
, ValueDataCount
, SymTab
);
1007 ValueData
+= ValueDataCount
;
1011 // For writing/serializing, Old is the host endianness, and New is
1012 // byte order intended on disk. For Reading/deserialization, Old
1013 // is the on-disk source endianness, and New is the host endianness.
1014 void ValueProfRecord::swapBytes(llvm::endianness Old
, llvm::endianness New
) {
1015 using namespace support
;
1020 if (llvm::endianness::native
!= Old
) {
1021 sys::swapByteOrder
<uint32_t>(NumValueSites
);
1022 sys::swapByteOrder
<uint32_t>(Kind
);
1024 uint32_t ND
= getValueProfRecordNumValueData(this);
1025 InstrProfValueData
*VD
= getValueProfRecordValueData(this);
1027 // No need to swap byte array: SiteCountArrray.
1028 for (uint32_t I
= 0; I
< ND
; I
++) {
1029 sys::swapByteOrder
<uint64_t>(VD
[I
].Value
);
1030 sys::swapByteOrder
<uint64_t>(VD
[I
].Count
);
1032 if (llvm::endianness::native
== Old
) {
1033 sys::swapByteOrder
<uint32_t>(NumValueSites
);
1034 sys::swapByteOrder
<uint32_t>(Kind
);
1038 void ValueProfData::deserializeTo(InstrProfRecord
&Record
,
1039 InstrProfSymtab
*SymTab
) {
1040 if (NumValueKinds
== 0)
1043 ValueProfRecord
*VR
= getFirstValueProfRecord(this);
1044 for (uint32_t K
= 0; K
< NumValueKinds
; K
++) {
1045 VR
->deserializeTo(Record
, SymTab
);
1046 VR
= getValueProfRecordNext(VR
);
1051 static T
swapToHostOrder(const unsigned char *&D
, llvm::endianness Orig
) {
1052 using namespace support
;
1054 if (Orig
== llvm::endianness::little
)
1055 return endian::readNext
<T
, llvm::endianness::little
, unaligned
>(D
);
1057 return endian::readNext
<T
, llvm::endianness::big
, unaligned
>(D
);
1060 static std::unique_ptr
<ValueProfData
> allocValueProfData(uint32_t TotalSize
) {
1061 return std::unique_ptr
<ValueProfData
>(new (::operator new(TotalSize
))
1065 Error
ValueProfData::checkIntegrity() {
1066 if (NumValueKinds
> IPVK_Last
+ 1)
1067 return make_error
<InstrProfError
>(
1068 instrprof_error::malformed
, "number of value profile kinds is invalid");
1069 // Total size needs to be multiple of quadword size.
1070 if (TotalSize
% sizeof(uint64_t))
1071 return make_error
<InstrProfError
>(
1072 instrprof_error::malformed
, "total size is not multiples of quardword");
1074 ValueProfRecord
*VR
= getFirstValueProfRecord(this);
1075 for (uint32_t K
= 0; K
< this->NumValueKinds
; K
++) {
1076 if (VR
->Kind
> IPVK_Last
)
1077 return make_error
<InstrProfError
>(instrprof_error::malformed
,
1078 "value kind is invalid");
1079 VR
= getValueProfRecordNext(VR
);
1080 if ((char *)VR
- (char *)this > (ptrdiff_t)TotalSize
)
1081 return make_error
<InstrProfError
>(
1082 instrprof_error::malformed
,
1083 "value profile address is greater than total size");
1085 return Error::success();
1088 Expected
<std::unique_ptr
<ValueProfData
>>
1089 ValueProfData::getValueProfData(const unsigned char *D
,
1090 const unsigned char *const BufferEnd
,
1091 llvm::endianness Endianness
) {
1092 using namespace support
;
1094 if (D
+ sizeof(ValueProfData
) > BufferEnd
)
1095 return make_error
<InstrProfError
>(instrprof_error::truncated
);
1097 const unsigned char *Header
= D
;
1098 uint32_t TotalSize
= swapToHostOrder
<uint32_t>(Header
, Endianness
);
1099 if (D
+ TotalSize
> BufferEnd
)
1100 return make_error
<InstrProfError
>(instrprof_error::too_large
);
1102 std::unique_ptr
<ValueProfData
> VPD
= allocValueProfData(TotalSize
);
1103 memcpy(VPD
.get(), D
, TotalSize
);
1105 VPD
->swapBytesToHost(Endianness
);
1107 Error E
= VPD
->checkIntegrity();
1109 return std::move(E
);
1111 return std::move(VPD
);
1114 void ValueProfData::swapBytesToHost(llvm::endianness Endianness
) {
1115 using namespace support
;
1117 if (Endianness
== llvm::endianness::native
)
1120 sys::swapByteOrder
<uint32_t>(TotalSize
);
1121 sys::swapByteOrder
<uint32_t>(NumValueKinds
);
1123 ValueProfRecord
*VR
= getFirstValueProfRecord(this);
1124 for (uint32_t K
= 0; K
< NumValueKinds
; K
++) {
1125 VR
->swapBytes(Endianness
, llvm::endianness::native
);
1126 VR
= getValueProfRecordNext(VR
);
1130 void ValueProfData::swapBytesFromHost(llvm::endianness Endianness
) {
1131 using namespace support
;
1133 if (Endianness
== llvm::endianness::native
)
1136 ValueProfRecord
*VR
= getFirstValueProfRecord(this);
1137 for (uint32_t K
= 0; K
< NumValueKinds
; K
++) {
1138 ValueProfRecord
*NVR
= getValueProfRecordNext(VR
);
1139 VR
->swapBytes(llvm::endianness::native
, Endianness
);
1142 sys::swapByteOrder
<uint32_t>(TotalSize
);
1143 sys::swapByteOrder
<uint32_t>(NumValueKinds
);
1146 void annotateValueSite(Module
&M
, Instruction
&Inst
,
1147 const InstrProfRecord
&InstrProfR
,
1148 InstrProfValueKind ValueKind
, uint32_t SiteIdx
,
1149 uint32_t MaxMDCount
) {
1150 uint32_t NV
= InstrProfR
.getNumValueDataForSite(ValueKind
, SiteIdx
);
1155 std::unique_ptr
<InstrProfValueData
[]> VD
=
1156 InstrProfR
.getValueForSite(ValueKind
, SiteIdx
, &Sum
);
1158 ArrayRef
<InstrProfValueData
> VDs(VD
.get(), NV
);
1159 annotateValueSite(M
, Inst
, VDs
, Sum
, ValueKind
, MaxMDCount
);
1162 void annotateValueSite(Module
&M
, Instruction
&Inst
,
1163 ArrayRef
<InstrProfValueData
> VDs
,
1164 uint64_t Sum
, InstrProfValueKind ValueKind
,
1165 uint32_t MaxMDCount
) {
1166 LLVMContext
&Ctx
= M
.getContext();
1167 MDBuilder
MDHelper(Ctx
);
1168 SmallVector
<Metadata
*, 3> Vals
;
1170 Vals
.push_back(MDHelper
.createString("VP"));
1172 Vals
.push_back(MDHelper
.createConstant(
1173 ConstantInt::get(Type::getInt32Ty(Ctx
), ValueKind
)));
1176 MDHelper
.createConstant(ConstantInt::get(Type::getInt64Ty(Ctx
), Sum
)));
1178 // Value Profile Data
1179 uint32_t MDCount
= MaxMDCount
;
1180 for (auto &VD
: VDs
) {
1181 Vals
.push_back(MDHelper
.createConstant(
1182 ConstantInt::get(Type::getInt64Ty(Ctx
), VD
.Value
)));
1183 Vals
.push_back(MDHelper
.createConstant(
1184 ConstantInt::get(Type::getInt64Ty(Ctx
), VD
.Count
)));
1188 Inst
.setMetadata(LLVMContext::MD_prof
, MDNode::get(Ctx
, Vals
));
1191 bool getValueProfDataFromInst(const Instruction
&Inst
,
1192 InstrProfValueKind ValueKind
,
1193 uint32_t MaxNumValueData
,
1194 InstrProfValueData ValueData
[],
1195 uint32_t &ActualNumValueData
, uint64_t &TotalC
,
1196 bool GetNoICPValue
) {
1197 MDNode
*MD
= Inst
.getMetadata(LLVMContext::MD_prof
);
1201 unsigned NOps
= MD
->getNumOperands();
1206 // Operand 0 is a string tag "VP":
1207 MDString
*Tag
= cast
<MDString
>(MD
->getOperand(0));
1211 if (!Tag
->getString().equals("VP"))
1215 ConstantInt
*KindInt
= mdconst::dyn_extract
<ConstantInt
>(MD
->getOperand(1));
1218 if (KindInt
->getZExtValue() != ValueKind
)
1222 ConstantInt
*TotalCInt
= mdconst::dyn_extract
<ConstantInt
>(MD
->getOperand(2));
1225 TotalC
= TotalCInt
->getZExtValue();
1227 ActualNumValueData
= 0;
1229 for (unsigned I
= 3; I
< NOps
; I
+= 2) {
1230 if (ActualNumValueData
>= MaxNumValueData
)
1232 ConstantInt
*Value
= mdconst::dyn_extract
<ConstantInt
>(MD
->getOperand(I
));
1233 ConstantInt
*Count
=
1234 mdconst::dyn_extract
<ConstantInt
>(MD
->getOperand(I
+ 1));
1235 if (!Value
|| !Count
)
1237 uint64_t CntValue
= Count
->getZExtValue();
1238 if (!GetNoICPValue
&& (CntValue
== NOMORE_ICP_MAGICNUM
))
1240 ValueData
[ActualNumValueData
].Value
= Value
->getZExtValue();
1241 ValueData
[ActualNumValueData
].Count
= CntValue
;
1242 ActualNumValueData
++;
1247 MDNode
*getPGOFuncNameMetadata(const Function
&F
) {
1248 return F
.getMetadata(getPGOFuncNameMetadataName());
1251 void createPGOFuncNameMetadata(Function
&F
, StringRef PGOFuncName
) {
1252 // Only for internal linkage functions.
1253 if (PGOFuncName
== F
.getName())
1255 // Don't create duplicated meta-data.
1256 if (getPGOFuncNameMetadata(F
))
1258 LLVMContext
&C
= F
.getContext();
1259 MDNode
*N
= MDNode::get(C
, MDString::get(C
, PGOFuncName
));
1260 F
.setMetadata(getPGOFuncNameMetadataName(), N
);
1263 bool needsComdatForCounter(const Function
&F
, const Module
&M
) {
1267 if (!Triple(M
.getTargetTriple()).supportsCOMDAT())
1270 // See createPGOFuncNameVar for more details. To avoid link errors, profile
1271 // counters for function with available_externally linkage needs to be changed
1272 // to linkonce linkage. On ELF based systems, this leads to weak symbols to be
1273 // created. Without using comdat, duplicate entries won't be removed by the
1274 // linker leading to increased data segement size and raw profile size. Even
1275 // worse, since the referenced counter from profile per-function data object
1276 // will be resolved to the common strong definition, the profile counts for
1277 // available_externally functions will end up being duplicated in raw profile
1278 // data. This can result in distorted profile as the counts of those dups
1279 // will be accumulated by the profile merger.
1280 GlobalValue::LinkageTypes Linkage
= F
.getLinkage();
1281 if (Linkage
!= GlobalValue::ExternalWeakLinkage
&&
1282 Linkage
!= GlobalValue::AvailableExternallyLinkage
)
1288 // Check if INSTR_PROF_RAW_VERSION_VAR is defined.
1289 bool isIRPGOFlagSet(const Module
*M
) {
1291 M
->getNamedGlobal(INSTR_PROF_QUOTE(INSTR_PROF_RAW_VERSION_VAR
));
1292 if (!IRInstrVar
|| IRInstrVar
->hasLocalLinkage())
1295 // For CSPGO+LTO, this variable might be marked as non-prevailing and we only
1297 if (IRInstrVar
->isDeclaration())
1300 // Check if the flag is set.
1301 if (!IRInstrVar
->hasInitializer())
1304 auto *InitVal
= dyn_cast_or_null
<ConstantInt
>(IRInstrVar
->getInitializer());
1307 return (InitVal
->getZExtValue() & VARIANT_MASK_IR_PROF
) != 0;
1310 // Check if we can safely rename this Comdat function.
1311 bool canRenameComdatFunc(const Function
&F
, bool CheckAddressTaken
) {
1312 if (F
.getName().empty())
1314 if (!needsComdatForCounter(F
, *(F
.getParent())))
1316 // Unsafe to rename the address-taken function (which can be used in
1317 // function comparison).
1318 if (CheckAddressTaken
&& F
.hasAddressTaken())
1320 // Only safe to do if this function may be discarded if it is not used
1321 // in the compilation unit.
1322 if (!GlobalValue::isDiscardableIfUnused(F
.getLinkage()))
1325 // For AvailableExternallyLinkage functions.
1326 if (!F
.hasComdat()) {
1327 assert(F
.getLinkage() == GlobalValue::AvailableExternallyLinkage
);
1333 // Create the variable for the profile file name.
1334 void createProfileFileNameVar(Module
&M
, StringRef InstrProfileOutput
) {
1335 if (InstrProfileOutput
.empty())
1337 Constant
*ProfileNameConst
=
1338 ConstantDataArray::getString(M
.getContext(), InstrProfileOutput
, true);
1339 GlobalVariable
*ProfileNameVar
= new GlobalVariable(
1340 M
, ProfileNameConst
->getType(), true, GlobalValue::WeakAnyLinkage
,
1341 ProfileNameConst
, INSTR_PROF_QUOTE(INSTR_PROF_PROFILE_NAME_VAR
));
1342 ProfileNameVar
->setVisibility(GlobalValue::HiddenVisibility
);
1343 Triple
TT(M
.getTargetTriple());
1344 if (TT
.supportsCOMDAT()) {
1345 ProfileNameVar
->setLinkage(GlobalValue::ExternalLinkage
);
1346 ProfileNameVar
->setComdat(M
.getOrInsertComdat(
1347 StringRef(INSTR_PROF_QUOTE(INSTR_PROF_PROFILE_NAME_VAR
))));
1351 Error
OverlapStats::accumulateCounts(const std::string
&BaseFilename
,
1352 const std::string
&TestFilename
,
1354 auto getProfileSum
= [IsCS
](const std::string
&Filename
,
1355 CountSumOrPercent
&Sum
) -> Error
{
1356 // This function is only used from llvm-profdata that doesn't use any kind
1357 // of VFS. Just create a default RealFileSystem to read profiles.
1358 auto FS
= vfs::getRealFileSystem();
1359 auto ReaderOrErr
= InstrProfReader::create(Filename
, *FS
);
1360 if (Error E
= ReaderOrErr
.takeError()) {
1363 auto Reader
= std::move(ReaderOrErr
.get());
1364 Reader
->accumulateCounts(Sum
, IsCS
);
1365 return Error::success();
1367 auto Ret
= getProfileSum(BaseFilename
, Base
);
1370 Ret
= getProfileSum(TestFilename
, Test
);
1373 this->BaseFilename
= &BaseFilename
;
1374 this->TestFilename
= &TestFilename
;
1376 return Error::success();
1379 void OverlapStats::addOneMismatch(const CountSumOrPercent
&MismatchFunc
) {
1380 Mismatch
.NumEntries
+= 1;
1381 Mismatch
.CountSum
+= MismatchFunc
.CountSum
/ Test
.CountSum
;
1382 for (unsigned I
= 0; I
< IPVK_Last
- IPVK_First
+ 1; I
++) {
1383 if (Test
.ValueCounts
[I
] >= 1.0f
)
1384 Mismatch
.ValueCounts
[I
] +=
1385 MismatchFunc
.ValueCounts
[I
] / Test
.ValueCounts
[I
];
1389 void OverlapStats::addOneUnique(const CountSumOrPercent
&UniqueFunc
) {
1390 Unique
.NumEntries
+= 1;
1391 Unique
.CountSum
+= UniqueFunc
.CountSum
/ Test
.CountSum
;
1392 for (unsigned I
= 0; I
< IPVK_Last
- IPVK_First
+ 1; I
++) {
1393 if (Test
.ValueCounts
[I
] >= 1.0f
)
1394 Unique
.ValueCounts
[I
] += UniqueFunc
.ValueCounts
[I
] / Test
.ValueCounts
[I
];
1398 void OverlapStats::dump(raw_fd_ostream
&OS
) const {
1402 const char *EntryName
=
1403 (Level
== ProgramLevel
? "functions" : "edge counters");
1404 if (Level
== ProgramLevel
) {
1405 OS
<< "Profile overlap infomation for base_profile: " << *BaseFilename
1406 << " and test_profile: " << *TestFilename
<< "\nProgram level:\n";
1408 OS
<< "Function level:\n"
1409 << " Function: " << FuncName
<< " (Hash=" << FuncHash
<< ")\n";
1412 OS
<< " # of " << EntryName
<< " overlap: " << Overlap
.NumEntries
<< "\n";
1413 if (Mismatch
.NumEntries
)
1414 OS
<< " # of " << EntryName
<< " mismatch: " << Mismatch
.NumEntries
1416 if (Unique
.NumEntries
)
1417 OS
<< " # of " << EntryName
1418 << " only in test_profile: " << Unique
.NumEntries
<< "\n";
1420 OS
<< " Edge profile overlap: " << format("%.3f%%", Overlap
.CountSum
* 100)
1422 if (Mismatch
.NumEntries
)
1423 OS
<< " Mismatched count percentage (Edge): "
1424 << format("%.3f%%", Mismatch
.CountSum
* 100) << "\n";
1425 if (Unique
.NumEntries
)
1426 OS
<< " Percentage of Edge profile only in test_profile: "
1427 << format("%.3f%%", Unique
.CountSum
* 100) << "\n";
1428 OS
<< " Edge profile base count sum: " << format("%.0f", Base
.CountSum
)
1430 << " Edge profile test count sum: " << format("%.0f", Test
.CountSum
)
1433 for (unsigned I
= 0; I
< IPVK_Last
- IPVK_First
+ 1; I
++) {
1434 if (Base
.ValueCounts
[I
] < 1.0f
&& Test
.ValueCounts
[I
] < 1.0f
)
1436 char ProfileKindName
[20];
1438 case IPVK_IndirectCallTarget
:
1439 strncpy(ProfileKindName
, "IndirectCall", 19);
1441 case IPVK_MemOPSize
:
1442 strncpy(ProfileKindName
, "MemOP", 19);
1445 snprintf(ProfileKindName
, 19, "VP[%d]", I
);
1448 OS
<< " " << ProfileKindName
1449 << " profile overlap: " << format("%.3f%%", Overlap
.ValueCounts
[I
] * 100)
1451 if (Mismatch
.NumEntries
)
1452 OS
<< " Mismatched count percentage (" << ProfileKindName
1453 << "): " << format("%.3f%%", Mismatch
.ValueCounts
[I
] * 100) << "\n";
1454 if (Unique
.NumEntries
)
1455 OS
<< " Percentage of " << ProfileKindName
1456 << " profile only in test_profile: "
1457 << format("%.3f%%", Unique
.ValueCounts
[I
] * 100) << "\n";
1458 OS
<< " " << ProfileKindName
1459 << " profile base count sum: " << format("%.0f", Base
.ValueCounts
[I
])
1461 << " " << ProfileKindName
1462 << " profile test count sum: " << format("%.0f", Test
.ValueCounts
[I
])
1467 namespace IndexedInstrProf
{
1468 // A C++14 compatible version of the offsetof macro.
1469 template <typename T1
, typename T2
>
1470 inline size_t constexpr offsetOf(T1
T2::*Member
) {
1471 constexpr T2 Object
{};
1472 return size_t(&(Object
.*Member
)) - size_t(&Object
);
1475 static inline uint64_t read(const unsigned char *Buffer
, size_t Offset
) {
1476 return *reinterpret_cast<const uint64_t *>(Buffer
+ Offset
);
1479 uint64_t Header::formatVersion() const {
1480 using namespace support
;
1481 return endian::byte_swap
<uint64_t, llvm::endianness::little
>(Version
);
1484 Expected
<Header
> Header::readFromBuffer(const unsigned char *Buffer
) {
1485 using namespace support
;
1486 static_assert(std::is_standard_layout_v
<Header
>,
1487 "The header should be standard layout type since we use offset "
1488 "of fields to read.");
1491 H
.Magic
= read(Buffer
, offsetOf(&Header::Magic
));
1492 // Check the magic number.
1494 endian::byte_swap
<uint64_t, llvm::endianness::little
>(H
.Magic
);
1495 if (Magic
!= IndexedInstrProf::Magic
)
1496 return make_error
<InstrProfError
>(instrprof_error::bad_magic
);
1498 // Read the version.
1499 H
.Version
= read(Buffer
, offsetOf(&Header::Version
));
1500 if (GET_VERSION(H
.formatVersion()) >
1501 IndexedInstrProf::ProfVersion::CurrentVersion
)
1502 return make_error
<InstrProfError
>(instrprof_error::unsupported_version
);
1504 switch (GET_VERSION(H
.formatVersion())) {
1505 // When a new field is added in the header add a case statement here to
1508 IndexedInstrProf::ProfVersion::CurrentVersion
== Version11
,
1509 "Please update the reading code below if a new field has been added, "
1510 "if not add a case statement to fall through to the latest version.");
1514 H
.TemporalProfTracesOffset
=
1515 read(Buffer
, offsetOf(&Header::TemporalProfTracesOffset
));
1518 H
.BinaryIdOffset
= read(Buffer
, offsetOf(&Header::BinaryIdOffset
));
1521 H
.MemProfOffset
= read(Buffer
, offsetOf(&Header::MemProfOffset
));
1523 default: // Version7 (when the backwards compatible header was introduced).
1524 H
.HashType
= read(Buffer
, offsetOf(&Header::HashType
));
1525 H
.HashOffset
= read(Buffer
, offsetOf(&Header::HashOffset
));
1531 size_t Header::size() const {
1532 switch (GET_VERSION(formatVersion())) {
1533 // When a new field is added to the header add a case statement here to
1534 // compute the size as offset of the new field + size of the new field. This
1535 // relies on the field being added to the end of the list.
1536 static_assert(IndexedInstrProf::ProfVersion::CurrentVersion
== Version11
,
1537 "Please update the size computation below if a new field has "
1538 "been added to the header, if not add a case statement to "
1539 "fall through to the latest version.");
1543 return offsetOf(&Header::TemporalProfTracesOffset
) +
1544 sizeof(Header::TemporalProfTracesOffset
);
1546 return offsetOf(&Header::BinaryIdOffset
) + sizeof(Header::BinaryIdOffset
);
1548 return offsetOf(&Header::MemProfOffset
) + sizeof(Header::MemProfOffset
);
1549 default: // Version7 (when the backwards compatible header was introduced).
1550 return offsetOf(&Header::HashOffset
) + sizeof(Header::HashOffset
);
1554 } // namespace IndexedInstrProf
1556 } // end namespace llvm