1 //===- InstrProf.cpp - Instrumented profiling format support --------------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 // This file contains support for clang's instrumentation based PGO and
12 //===----------------------------------------------------------------------===//
14 #include "llvm/ProfileData/InstrProf.h"
15 #include "llvm/ADT/ArrayRef.h"
16 #include "llvm/ADT/SetVector.h"
17 #include "llvm/ADT/SmallString.h"
18 #include "llvm/ADT/SmallVector.h"
19 #include "llvm/ADT/StringExtras.h"
20 #include "llvm/ADT/StringRef.h"
21 #include "llvm/Config/config.h"
22 #include "llvm/IR/Constant.h"
23 #include "llvm/IR/Constants.h"
24 #include "llvm/IR/Function.h"
25 #include "llvm/IR/GlobalValue.h"
26 #include "llvm/IR/GlobalVariable.h"
27 #include "llvm/IR/Instruction.h"
28 #include "llvm/IR/LLVMContext.h"
29 #include "llvm/IR/MDBuilder.h"
30 #include "llvm/IR/Metadata.h"
31 #include "llvm/IR/Module.h"
32 #include "llvm/IR/Type.h"
33 #include "llvm/ProfileData/InstrProfReader.h"
34 #include "llvm/Support/Casting.h"
35 #include "llvm/Support/CommandLine.h"
36 #include "llvm/Support/Compiler.h"
37 #include "llvm/Support/Compression.h"
38 #include "llvm/Support/Endian.h"
39 #include "llvm/Support/Error.h"
40 #include "llvm/Support/ErrorHandling.h"
41 #include "llvm/Support/LEB128.h"
42 #include "llvm/Support/MathExtras.h"
43 #include "llvm/Support/Path.h"
44 #include "llvm/Support/SwapByteOrder.h"
45 #include "llvm/Support/VirtualFileSystem.h"
46 #include "llvm/TargetParser/Triple.h"
54 #include <system_error>
55 #include <type_traits>
61 static cl::opt
<bool> StaticFuncFullModulePrefix(
62 "static-func-full-module-prefix", cl::init(true), cl::Hidden
,
63 cl::desc("Use full module build paths in the profile counter names for "
64 "static functions."));
66 // This option is tailored to users that have different top-level directory in
67 // profile-gen and profile-use compilation. Users need to specific the number
68 // of levels to strip. A value larger than the number of directories in the
69 // source file will strip all the directory names and only leave the basename.
71 // Note current ThinLTO module importing for the indirect-calls assumes
72 // the source directory name not being stripped. A non-zero option value here
73 // can potentially prevent some inter-module indirect-call-promotions.
74 static cl::opt
<unsigned> StaticFuncStripDirNamePrefix(
75 "static-func-strip-dirname-prefix", cl::init(0), cl::Hidden
,
76 cl::desc("Strip specified level of directory name from source path in "
77 "the profile counter name for static functions."));
79 static std::string
getInstrProfErrString(instrprof_error Err
,
80 const std::string
&ErrMsg
= "") {
82 raw_string_ostream
OS(Msg
);
85 case instrprof_error::success
:
88 case instrprof_error::eof
:
91 case instrprof_error::unrecognized_format
:
92 OS
<< "unrecognized instrumentation profile encoding format";
94 case instrprof_error::bad_magic
:
95 OS
<< "invalid instrumentation profile data (bad magic)";
97 case instrprof_error::bad_header
:
98 OS
<< "invalid instrumentation profile data (file header is corrupt)";
100 case instrprof_error::unsupported_version
:
101 OS
<< "unsupported instrumentation profile format version";
103 case instrprof_error::unsupported_hash_type
:
104 OS
<< "unsupported instrumentation profile hash type";
106 case instrprof_error::too_large
:
107 OS
<< "too much profile data";
109 case instrprof_error::truncated
:
110 OS
<< "truncated profile data";
112 case instrprof_error::malformed
:
113 OS
<< "malformed instrumentation profile data";
115 case instrprof_error::missing_debug_info_for_correlation
:
116 OS
<< "debug info for correlation is required";
118 case instrprof_error::unexpected_debug_info_for_correlation
:
119 OS
<< "debug info for correlation is not necessary";
121 case instrprof_error::unable_to_correlate_profile
:
122 OS
<< "unable to correlate profile";
124 case instrprof_error::invalid_prof
:
125 OS
<< "invalid profile created. Please file a bug "
126 "at: " BUG_REPORT_URL
127 " and include the profraw files that caused this error.";
129 case instrprof_error::unknown_function
:
130 OS
<< "no profile data available for function";
132 case instrprof_error::hash_mismatch
:
133 OS
<< "function control flow change detected (hash mismatch)";
135 case instrprof_error::count_mismatch
:
136 OS
<< "function basic block count change detected (counter mismatch)";
138 case instrprof_error::counter_overflow
:
139 OS
<< "counter overflow";
141 case instrprof_error::value_site_count_mismatch
:
142 OS
<< "function value site count change detected (counter mismatch)";
144 case instrprof_error::compress_failed
:
145 OS
<< "failed to compress data (zlib)";
147 case instrprof_error::uncompress_failed
:
148 OS
<< "failed to uncompress data (zlib)";
150 case instrprof_error::empty_raw_profile
:
151 OS
<< "empty raw profile file";
153 case instrprof_error::zlib_unavailable
:
154 OS
<< "profile uses zlib compression but the profile reader was built "
155 "without zlib support";
157 case instrprof_error::raw_profile_version_mismatch
:
158 OS
<< "raw profile version mismatch";
162 // If optional error message is not empty, append it to the message.
164 OS
<< ": " << ErrMsg
;
171 // FIXME: This class is only here to support the transition to llvm::Error. It
172 // will be removed once this transition is complete. Clients should prefer to
173 // deal with the Error value directly, rather than converting to error_code.
174 class InstrProfErrorCategoryType
: public std::error_category
{
175 const char *name() const noexcept override
{ return "llvm.instrprof"; }
177 std::string
message(int IE
) const override
{
178 return getInstrProfErrString(static_cast<instrprof_error
>(IE
));
182 } // end anonymous namespace
184 const std::error_category
&llvm::instrprof_category() {
185 static InstrProfErrorCategoryType ErrorCategory
;
186 return ErrorCategory
;
191 const char *InstrProfSectNameCommon
[] = {
192 #define INSTR_PROF_SECT_ENTRY(Kind, SectNameCommon, SectNameCoff, Prefix) \
194 #include "llvm/ProfileData/InstrProfData.inc"
197 const char *InstrProfSectNameCoff
[] = {
198 #define INSTR_PROF_SECT_ENTRY(Kind, SectNameCommon, SectNameCoff, Prefix) \
200 #include "llvm/ProfileData/InstrProfData.inc"
203 const char *InstrProfSectNamePrefix
[] = {
204 #define INSTR_PROF_SECT_ENTRY(Kind, SectNameCommon, SectNameCoff, Prefix) \
206 #include "llvm/ProfileData/InstrProfData.inc"
213 cl::opt
<bool> DoInstrProfNameCompression(
214 "enable-name-compression",
215 cl::desc("Enable name/filename string compression"), cl::init(true));
217 std::string
getInstrProfSectionName(InstrProfSectKind IPSK
,
218 Triple::ObjectFormatType OF
,
219 bool AddSegmentInfo
) {
220 std::string SectName
;
222 if (OF
== Triple::MachO
&& AddSegmentInfo
)
223 SectName
= InstrProfSectNamePrefix
[IPSK
];
225 if (OF
== Triple::COFF
)
226 SectName
+= InstrProfSectNameCoff
[IPSK
];
228 SectName
+= InstrProfSectNameCommon
[IPSK
];
230 if (OF
== Triple::MachO
&& IPSK
== IPSK_data
&& AddSegmentInfo
)
231 SectName
+= ",regular,live_support";
236 std::string
InstrProfError::message() const {
237 return getInstrProfErrString(Err
, Msg
);
240 char InstrProfError::ID
= 0;
242 std::string
getPGOFuncName(StringRef RawFuncName
,
243 GlobalValue::LinkageTypes Linkage
,
245 uint64_t Version LLVM_ATTRIBUTE_UNUSED
) {
246 return GlobalValue::getGlobalIdentifier(RawFuncName
, Linkage
, FileName
);
249 // Strip NumPrefix level of directory name from PathNameStr. If the number of
250 // directory separators is less than NumPrefix, strip all the directories and
251 // leave base file name only.
252 static StringRef
stripDirPrefix(StringRef PathNameStr
, uint32_t NumPrefix
) {
253 uint32_t Count
= NumPrefix
;
254 uint32_t Pos
= 0, LastPos
= 0;
255 for (auto & CI
: PathNameStr
) {
257 if (llvm::sys::path::is_separator(CI
)) {
264 return PathNameStr
.substr(LastPos
);
267 // Return the PGOFuncName. This function has some special handling when called
268 // in LTO optimization. The following only applies when calling in LTO passes
269 // (when \c InLTO is true): LTO's internalization privatizes many global linkage
270 // symbols. This happens after value profile annotation, but those internal
271 // linkage functions should not have a source prefix.
272 // Additionally, for ThinLTO mode, exported internal functions are promoted
273 // and renamed. We need to ensure that the original internal PGO name is
274 // used when computing the GUID that is compared against the profiled GUIDs.
275 // To differentiate compiler generated internal symbols from original ones,
276 // PGOFuncName meta data are created and attached to the original internal
277 // symbols in the value profile annotation step
278 // (PGOUseFunc::annotateIndirectCallSites). If a symbol does not have the meta
279 // data, its original linkage must be non-internal.
280 std::string
getPGOFuncName(const Function
&F
, bool InLTO
, uint64_t Version
) {
282 StringRef
FileName(F
.getParent()->getSourceFileName());
283 uint32_t StripLevel
= StaticFuncFullModulePrefix
? 0 : (uint32_t)-1;
284 if (StripLevel
< StaticFuncStripDirNamePrefix
)
285 StripLevel
= StaticFuncStripDirNamePrefix
;
287 FileName
= stripDirPrefix(FileName
, StripLevel
);
288 return getPGOFuncName(F
.getName(), F
.getLinkage(), FileName
, Version
);
291 // In LTO mode (when InLTO is true), first check if there is a meta data.
292 if (MDNode
*MD
= getPGOFuncNameMetadata(F
)) {
293 StringRef S
= cast
<MDString
>(MD
->getOperand(0))->getString();
297 // If there is no meta data, the function must be a global before the value
298 // profile annotation pass. Its current linkage may be internal if it is
299 // internalized in LTO mode.
300 return getPGOFuncName(F
.getName(), GlobalValue::ExternalLinkage
, "");
303 StringRef
getFuncNameWithoutPrefix(StringRef PGOFuncName
, StringRef FileName
) {
304 if (FileName
.empty())
306 // Drop the file name including ':'. See also getPGOFuncName.
307 if (PGOFuncName
.startswith(FileName
))
308 PGOFuncName
= PGOFuncName
.drop_front(FileName
.size() + 1);
312 // \p FuncName is the string used as profile lookup key for the function. A
313 // symbol is created to hold the name. Return the legalized symbol name.
314 std::string
getPGOFuncNameVarName(StringRef FuncName
,
315 GlobalValue::LinkageTypes Linkage
) {
316 std::string VarName
= std::string(getInstrProfNameVarPrefix());
319 if (!GlobalValue::isLocalLinkage(Linkage
))
322 // Now fix up illegal chars in local VarName that may upset the assembler.
323 const char *InvalidChars
= "-:<>/\"'";
324 size_t found
= VarName
.find_first_of(InvalidChars
);
325 while (found
!= std::string::npos
) {
326 VarName
[found
] = '_';
327 found
= VarName
.find_first_of(InvalidChars
, found
+ 1);
332 GlobalVariable
*createPGOFuncNameVar(Module
&M
,
333 GlobalValue::LinkageTypes Linkage
,
334 StringRef PGOFuncName
) {
335 // We generally want to match the function's linkage, but available_externally
336 // and extern_weak both have the wrong semantics, and anything that doesn't
337 // need to link across compilation units doesn't need to be visible at all.
338 if (Linkage
== GlobalValue::ExternalWeakLinkage
)
339 Linkage
= GlobalValue::LinkOnceAnyLinkage
;
340 else if (Linkage
== GlobalValue::AvailableExternallyLinkage
)
341 Linkage
= GlobalValue::LinkOnceODRLinkage
;
342 else if (Linkage
== GlobalValue::InternalLinkage
||
343 Linkage
== GlobalValue::ExternalLinkage
)
344 Linkage
= GlobalValue::PrivateLinkage
;
347 ConstantDataArray::getString(M
.getContext(), PGOFuncName
, false);
349 new GlobalVariable(M
, Value
->getType(), true, Linkage
, Value
,
350 getPGOFuncNameVarName(PGOFuncName
, Linkage
));
352 // Hide the symbol so that we correctly get a copy for each executable.
353 if (!GlobalValue::isLocalLinkage(FuncNameVar
->getLinkage()))
354 FuncNameVar
->setVisibility(GlobalValue::HiddenVisibility
);
359 GlobalVariable
*createPGOFuncNameVar(Function
&F
, StringRef PGOFuncName
) {
360 return createPGOFuncNameVar(*F
.getParent(), F
.getLinkage(), PGOFuncName
);
363 Error
InstrProfSymtab::create(Module
&M
, bool InLTO
) {
364 for (Function
&F
: M
) {
365 // Function may not have a name: like using asm("") to overwrite the name.
366 // Ignore in this case.
369 const std::string
&PGOFuncName
= getPGOFuncName(F
, InLTO
);
370 if (Error E
= addFuncName(PGOFuncName
))
372 MD5FuncMap
.emplace_back(Function::getGUID(PGOFuncName
), &F
);
373 // In ThinLTO, local function may have been promoted to global and have
374 // suffix ".llvm." added to the function name. We need to add the
375 // stripped function name to the symbol table so that we can find a match
378 // We may have other suffixes similar as ".llvm." which are needed to
379 // be stripped before the matching, but ".__uniq." suffix which is used
380 // to differentiate internal linkage functions in different modules
381 // should be kept. Now this is the only suffix with the pattern ".xxx"
382 // which is kept before matching.
383 const std::string UniqSuffix
= ".__uniq.";
384 auto pos
= PGOFuncName
.find(UniqSuffix
);
385 // Search '.' after ".__uniq." if ".__uniq." exists, otherwise
386 // search '.' from the beginning.
387 if (pos
!= std::string::npos
)
388 pos
+= UniqSuffix
.length();
391 pos
= PGOFuncName
.find('.', pos
);
392 if (pos
!= std::string::npos
&& pos
!= 0) {
393 const std::string
&OtherFuncName
= PGOFuncName
.substr(0, pos
);
394 if (Error E
= addFuncName(OtherFuncName
))
396 MD5FuncMap
.emplace_back(Function::getGUID(OtherFuncName
), &F
);
401 return Error::success();
404 uint64_t InstrProfSymtab::getFunctionHashFromAddress(uint64_t Address
) {
406 auto It
= partition_point(AddrToMD5Map
, [=](std::pair
<uint64_t, uint64_t> A
) {
407 return A
.first
< Address
;
409 // Raw function pointer collected by value profiler may be from
410 // external functions that are not instrumented. They won't have
411 // mapping data to be used by the deserializer. Force the value to
412 // be 0 in this case.
413 if (It
!= AddrToMD5Map
.end() && It
->first
== Address
)
414 return (uint64_t)It
->second
;
418 Error
collectPGOFuncNameStrings(ArrayRef
<std::string
> NameStrs
,
419 bool doCompression
, std::string
&Result
) {
420 assert(!NameStrs
.empty() && "No name data to emit");
422 uint8_t Header
[16], *P
= Header
;
423 std::string UncompressedNameStrings
=
424 join(NameStrs
.begin(), NameStrs
.end(), getInstrProfNameSeparator());
426 assert(StringRef(UncompressedNameStrings
)
427 .count(getInstrProfNameSeparator()) == (NameStrs
.size() - 1) &&
428 "PGO name is invalid (contains separator token)");
430 unsigned EncLen
= encodeULEB128(UncompressedNameStrings
.length(), P
);
433 auto WriteStringToResult
= [&](size_t CompressedLen
, StringRef InputStr
) {
434 EncLen
= encodeULEB128(CompressedLen
, P
);
436 char *HeaderStr
= reinterpret_cast<char *>(&Header
[0]);
437 unsigned HeaderLen
= P
- &Header
[0];
438 Result
.append(HeaderStr
, HeaderLen
);
440 return Error::success();
443 if (!doCompression
) {
444 return WriteStringToResult(0, UncompressedNameStrings
);
447 SmallVector
<uint8_t, 128> CompressedNameStrings
;
448 compression::zlib::compress(arrayRefFromStringRef(UncompressedNameStrings
),
449 CompressedNameStrings
,
450 compression::zlib::BestSizeCompression
);
452 return WriteStringToResult(CompressedNameStrings
.size(),
453 toStringRef(CompressedNameStrings
));
456 StringRef
getPGOFuncNameVarInitializer(GlobalVariable
*NameVar
) {
457 auto *Arr
= cast
<ConstantDataArray
>(NameVar
->getInitializer());
459 Arr
->isCString() ? Arr
->getAsCString() : Arr
->getAsString();
463 Error
collectPGOFuncNameStrings(ArrayRef
<GlobalVariable
*> NameVars
,
464 std::string
&Result
, bool doCompression
) {
465 std::vector
<std::string
> NameStrs
;
466 for (auto *NameVar
: NameVars
) {
467 NameStrs
.push_back(std::string(getPGOFuncNameVarInitializer(NameVar
)));
469 return collectPGOFuncNameStrings(
470 NameStrs
, compression::zlib::isAvailable() && doCompression
, Result
);
473 Error
readPGOFuncNameStrings(StringRef NameStrings
, InstrProfSymtab
&Symtab
) {
474 const uint8_t *P
= NameStrings
.bytes_begin();
475 const uint8_t *EndP
= NameStrings
.bytes_end();
478 uint64_t UncompressedSize
= decodeULEB128(P
, &N
);
480 uint64_t CompressedSize
= decodeULEB128(P
, &N
);
482 bool isCompressed
= (CompressedSize
!= 0);
483 SmallVector
<uint8_t, 128> UncompressedNameStrings
;
484 StringRef NameStrings
;
486 if (!llvm::compression::zlib::isAvailable())
487 return make_error
<InstrProfError
>(instrprof_error::zlib_unavailable
);
489 if (Error E
= compression::zlib::decompress(ArrayRef(P
, CompressedSize
),
490 UncompressedNameStrings
,
492 consumeError(std::move(E
));
493 return make_error
<InstrProfError
>(instrprof_error::uncompress_failed
);
496 NameStrings
= toStringRef(UncompressedNameStrings
);
499 StringRef(reinterpret_cast<const char *>(P
), UncompressedSize
);
500 P
+= UncompressedSize
;
502 // Now parse the name strings.
503 SmallVector
<StringRef
, 0> Names
;
504 NameStrings
.split(Names
, getInstrProfNameSeparator());
505 for (StringRef
&Name
: Names
)
506 if (Error E
= Symtab
.addFuncName(Name
))
509 while (P
< EndP
&& *P
== 0)
512 return Error::success();
515 void InstrProfRecord::accumulateCounts(CountSumOrPercent
&Sum
) const {
516 uint64_t FuncSum
= 0;
517 Sum
.NumEntries
+= Counts
.size();
518 for (uint64_t Count
: Counts
)
520 Sum
.CountSum
+= FuncSum
;
522 for (uint32_t VK
= IPVK_First
; VK
<= IPVK_Last
; ++VK
) {
523 uint64_t KindSum
= 0;
524 uint32_t NumValueSites
= getNumValueSites(VK
);
525 for (size_t I
= 0; I
< NumValueSites
; ++I
) {
526 uint32_t NV
= getNumValueDataForSite(VK
, I
);
527 std::unique_ptr
<InstrProfValueData
[]> VD
= getValueForSite(VK
, I
);
528 for (uint32_t V
= 0; V
< NV
; V
++)
529 KindSum
+= VD
[V
].Count
;
531 Sum
.ValueCounts
[VK
] += KindSum
;
535 void InstrProfValueSiteRecord::overlap(InstrProfValueSiteRecord
&Input
,
537 OverlapStats
&Overlap
,
538 OverlapStats
&FuncLevelOverlap
) {
539 this->sortByTargetValues();
540 Input
.sortByTargetValues();
541 double Score
= 0.0f
, FuncLevelScore
= 0.0f
;
542 auto I
= ValueData
.begin();
543 auto IE
= ValueData
.end();
544 auto J
= Input
.ValueData
.begin();
545 auto JE
= Input
.ValueData
.end();
546 while (I
!= IE
&& J
!= JE
) {
547 if (I
->Value
== J
->Value
) {
548 Score
+= OverlapStats::score(I
->Count
, J
->Count
,
549 Overlap
.Base
.ValueCounts
[ValueKind
],
550 Overlap
.Test
.ValueCounts
[ValueKind
]);
551 FuncLevelScore
+= OverlapStats::score(
552 I
->Count
, J
->Count
, FuncLevelOverlap
.Base
.ValueCounts
[ValueKind
],
553 FuncLevelOverlap
.Test
.ValueCounts
[ValueKind
]);
555 } else if (I
->Value
< J
->Value
) {
561 Overlap
.Overlap
.ValueCounts
[ValueKind
] += Score
;
562 FuncLevelOverlap
.Overlap
.ValueCounts
[ValueKind
] += FuncLevelScore
;
565 // Return false on mismatch.
566 void InstrProfRecord::overlapValueProfData(uint32_t ValueKind
,
567 InstrProfRecord
&Other
,
568 OverlapStats
&Overlap
,
569 OverlapStats
&FuncLevelOverlap
) {
570 uint32_t ThisNumValueSites
= getNumValueSites(ValueKind
);
571 assert(ThisNumValueSites
== Other
.getNumValueSites(ValueKind
));
572 if (!ThisNumValueSites
)
575 std::vector
<InstrProfValueSiteRecord
> &ThisSiteRecords
=
576 getOrCreateValueSitesForKind(ValueKind
);
577 MutableArrayRef
<InstrProfValueSiteRecord
> OtherSiteRecords
=
578 Other
.getValueSitesForKind(ValueKind
);
579 for (uint32_t I
= 0; I
< ThisNumValueSites
; I
++)
580 ThisSiteRecords
[I
].overlap(OtherSiteRecords
[I
], ValueKind
, Overlap
,
584 void InstrProfRecord::overlap(InstrProfRecord
&Other
, OverlapStats
&Overlap
,
585 OverlapStats
&FuncLevelOverlap
,
586 uint64_t ValueCutoff
) {
587 // FuncLevel CountSum for other should already computed and nonzero.
588 assert(FuncLevelOverlap
.Test
.CountSum
>= 1.0f
);
589 accumulateCounts(FuncLevelOverlap
.Base
);
590 bool Mismatch
= (Counts
.size() != Other
.Counts
.size());
592 // Check if the value profiles mismatch.
594 for (uint32_t Kind
= IPVK_First
; Kind
<= IPVK_Last
; ++Kind
) {
595 uint32_t ThisNumValueSites
= getNumValueSites(Kind
);
596 uint32_t OtherNumValueSites
= Other
.getNumValueSites(Kind
);
597 if (ThisNumValueSites
!= OtherNumValueSites
) {
604 Overlap
.addOneMismatch(FuncLevelOverlap
.Test
);
608 // Compute overlap for value counts.
609 for (uint32_t Kind
= IPVK_First
; Kind
<= IPVK_Last
; ++Kind
)
610 overlapValueProfData(Kind
, Other
, Overlap
, FuncLevelOverlap
);
613 uint64_t MaxCount
= 0;
614 // Compute overlap for edge counts.
615 for (size_t I
= 0, E
= Other
.Counts
.size(); I
< E
; ++I
) {
616 Score
+= OverlapStats::score(Counts
[I
], Other
.Counts
[I
],
617 Overlap
.Base
.CountSum
, Overlap
.Test
.CountSum
);
618 MaxCount
= std::max(Other
.Counts
[I
], MaxCount
);
620 Overlap
.Overlap
.CountSum
+= Score
;
621 Overlap
.Overlap
.NumEntries
+= 1;
623 if (MaxCount
>= ValueCutoff
) {
624 double FuncScore
= 0.0;
625 for (size_t I
= 0, E
= Other
.Counts
.size(); I
< E
; ++I
)
626 FuncScore
+= OverlapStats::score(Counts
[I
], Other
.Counts
[I
],
627 FuncLevelOverlap
.Base
.CountSum
,
628 FuncLevelOverlap
.Test
.CountSum
);
629 FuncLevelOverlap
.Overlap
.CountSum
= FuncScore
;
630 FuncLevelOverlap
.Overlap
.NumEntries
= Other
.Counts
.size();
631 FuncLevelOverlap
.Valid
= true;
635 void InstrProfValueSiteRecord::merge(InstrProfValueSiteRecord
&Input
,
637 function_ref
<void(instrprof_error
)> Warn
) {
638 this->sortByTargetValues();
639 Input
.sortByTargetValues();
640 auto I
= ValueData
.begin();
641 auto IE
= ValueData
.end();
642 for (const InstrProfValueData
&J
: Input
.ValueData
) {
643 while (I
!= IE
&& I
->Value
< J
.Value
)
645 if (I
!= IE
&& I
->Value
== J
.Value
) {
647 I
->Count
= SaturatingMultiplyAdd(J
.Count
, Weight
, I
->Count
, &Overflowed
);
649 Warn(instrprof_error::counter_overflow
);
653 ValueData
.insert(I
, J
);
657 void InstrProfValueSiteRecord::scale(uint64_t N
, uint64_t D
,
658 function_ref
<void(instrprof_error
)> Warn
) {
659 for (InstrProfValueData
&I
: ValueData
) {
661 I
.Count
= SaturatingMultiply(I
.Count
, N
, &Overflowed
) / D
;
663 Warn(instrprof_error::counter_overflow
);
667 // Merge Value Profile data from Src record to this record for ValueKind.
668 // Scale merged value counts by \p Weight.
669 void InstrProfRecord::mergeValueProfData(
670 uint32_t ValueKind
, InstrProfRecord
&Src
, uint64_t Weight
,
671 function_ref
<void(instrprof_error
)> Warn
) {
672 uint32_t ThisNumValueSites
= getNumValueSites(ValueKind
);
673 uint32_t OtherNumValueSites
= Src
.getNumValueSites(ValueKind
);
674 if (ThisNumValueSites
!= OtherNumValueSites
) {
675 Warn(instrprof_error::value_site_count_mismatch
);
678 if (!ThisNumValueSites
)
680 std::vector
<InstrProfValueSiteRecord
> &ThisSiteRecords
=
681 getOrCreateValueSitesForKind(ValueKind
);
682 MutableArrayRef
<InstrProfValueSiteRecord
> OtherSiteRecords
=
683 Src
.getValueSitesForKind(ValueKind
);
684 for (uint32_t I
= 0; I
< ThisNumValueSites
; I
++)
685 ThisSiteRecords
[I
].merge(OtherSiteRecords
[I
], Weight
, Warn
);
688 void InstrProfRecord::merge(InstrProfRecord
&Other
, uint64_t Weight
,
689 function_ref
<void(instrprof_error
)> Warn
) {
690 // If the number of counters doesn't match we either have bad data
691 // or a hash collision.
692 if (Counts
.size() != Other
.Counts
.size()) {
693 Warn(instrprof_error::count_mismatch
);
697 // Special handling of the first count as the PseudoCount.
698 CountPseudoKind OtherKind
= Other
.getCountPseudoKind();
699 CountPseudoKind ThisKind
= getCountPseudoKind();
700 if (OtherKind
!= NotPseudo
|| ThisKind
!= NotPseudo
) {
701 // We don't allow the merge of a profile with pseudo counts and
702 // a normal profile (i.e. without pesudo counts).
703 // Profile supplimenation should be done after the profile merge.
704 if (OtherKind
== NotPseudo
|| ThisKind
== NotPseudo
) {
705 Warn(instrprof_error::count_mismatch
);
708 if (OtherKind
== PseudoHot
|| ThisKind
== PseudoHot
)
709 setPseudoCount(PseudoHot
);
711 setPseudoCount(PseudoWarm
);
715 for (size_t I
= 0, E
= Other
.Counts
.size(); I
< E
; ++I
) {
718 SaturatingMultiplyAdd(Other
.Counts
[I
], Weight
, Counts
[I
], &Overflowed
);
719 if (Value
> getInstrMaxCountValue()) {
720 Value
= getInstrMaxCountValue();
725 Warn(instrprof_error::counter_overflow
);
728 for (uint32_t Kind
= IPVK_First
; Kind
<= IPVK_Last
; ++Kind
)
729 mergeValueProfData(Kind
, Other
, Weight
, Warn
);
732 void InstrProfRecord::scaleValueProfData(
733 uint32_t ValueKind
, uint64_t N
, uint64_t D
,
734 function_ref
<void(instrprof_error
)> Warn
) {
735 for (auto &R
: getValueSitesForKind(ValueKind
))
739 void InstrProfRecord::scale(uint64_t N
, uint64_t D
,
740 function_ref
<void(instrprof_error
)> Warn
) {
741 assert(D
!= 0 && "D cannot be 0");
742 for (auto &Count
: this->Counts
) {
744 Count
= SaturatingMultiply(Count
, N
, &Overflowed
) / D
;
745 if (Count
> getInstrMaxCountValue()) {
746 Count
= getInstrMaxCountValue();
750 Warn(instrprof_error::counter_overflow
);
752 for (uint32_t Kind
= IPVK_First
; Kind
<= IPVK_Last
; ++Kind
)
753 scaleValueProfData(Kind
, N
, D
, Warn
);
756 // Map indirect call target name hash to name string.
757 uint64_t InstrProfRecord::remapValue(uint64_t Value
, uint32_t ValueKind
,
758 InstrProfSymtab
*SymTab
) {
762 if (ValueKind
== IPVK_IndirectCallTarget
)
763 return SymTab
->getFunctionHashFromAddress(Value
);
768 void InstrProfRecord::addValueData(uint32_t ValueKind
, uint32_t Site
,
769 InstrProfValueData
*VData
, uint32_t N
,
770 InstrProfSymtab
*ValueMap
) {
771 for (uint32_t I
= 0; I
< N
; I
++) {
772 VData
[I
].Value
= remapValue(VData
[I
].Value
, ValueKind
, ValueMap
);
774 std::vector
<InstrProfValueSiteRecord
> &ValueSites
=
775 getOrCreateValueSitesForKind(ValueKind
);
777 ValueSites
.emplace_back();
779 ValueSites
.emplace_back(VData
, VData
+ N
);
782 std::vector
<BPFunctionNode
> TemporalProfTraceTy::createBPFunctionNodes(
783 ArrayRef
<TemporalProfTraceTy
> Traces
) {
784 using IDT
= BPFunctionNode::IDT
;
785 using UtilityNodeT
= BPFunctionNode::UtilityNodeT
;
786 // Collect all function IDs ordered by their smallest timestamp. This will be
787 // used as the initial FunctionNode order.
788 SetVector
<IDT
> FunctionIds
;
789 size_t LargestTraceSize
= 0;
790 for (auto &Trace
: Traces
)
792 std::max(LargestTraceSize
, Trace
.FunctionNameRefs
.size());
793 for (size_t Timestamp
= 0; Timestamp
< LargestTraceSize
; Timestamp
++)
794 for (auto &Trace
: Traces
)
795 if (Timestamp
< Trace
.FunctionNameRefs
.size())
796 FunctionIds
.insert(Trace
.FunctionNameRefs
[Timestamp
]);
798 int N
= std::ceil(std::log2(LargestTraceSize
));
800 // TODO: We need to use the Trace.Weight field to give more weight to more
801 // important utilities
802 DenseMap
<IDT
, SmallVector
<UtilityNodeT
, 4>> FuncGroups
;
803 for (size_t TraceIdx
= 0; TraceIdx
< Traces
.size(); TraceIdx
++) {
804 auto &Trace
= Traces
[TraceIdx
].FunctionNameRefs
;
805 for (size_t Timestamp
= 0; Timestamp
< Trace
.size(); Timestamp
++) {
806 for (int I
= std::floor(std::log2(Timestamp
+ 1)); I
< N
; I
++) {
807 auto &FunctionId
= Trace
[Timestamp
];
808 UtilityNodeT GroupId
= TraceIdx
* N
+ I
;
809 FuncGroups
[FunctionId
].push_back(GroupId
);
814 std::vector
<BPFunctionNode
> Nodes
;
815 for (auto &Id
: FunctionIds
) {
816 auto &UNs
= FuncGroups
[Id
];
818 UNs
.erase(std::unique(UNs
.begin(), UNs
.end()), UNs
.end());
819 Nodes
.emplace_back(Id
, UNs
);
824 #define INSTR_PROF_COMMON_API_IMPL
825 #include "llvm/ProfileData/InstrProfData.inc"
828 * ValueProfRecordClosure Interface implementation for InstrProfRecord
829 * class. These C wrappers are used as adaptors so that C++ code can be
830 * invoked as callbacks.
832 uint32_t getNumValueKindsInstrProf(const void *Record
) {
833 return reinterpret_cast<const InstrProfRecord
*>(Record
)->getNumValueKinds();
836 uint32_t getNumValueSitesInstrProf(const void *Record
, uint32_t VKind
) {
837 return reinterpret_cast<const InstrProfRecord
*>(Record
)
838 ->getNumValueSites(VKind
);
841 uint32_t getNumValueDataInstrProf(const void *Record
, uint32_t VKind
) {
842 return reinterpret_cast<const InstrProfRecord
*>(Record
)
843 ->getNumValueData(VKind
);
846 uint32_t getNumValueDataForSiteInstrProf(const void *R
, uint32_t VK
,
848 return reinterpret_cast<const InstrProfRecord
*>(R
)
849 ->getNumValueDataForSite(VK
, S
);
852 void getValueForSiteInstrProf(const void *R
, InstrProfValueData
*Dst
,
853 uint32_t K
, uint32_t S
) {
854 reinterpret_cast<const InstrProfRecord
*>(R
)->getValueForSite(Dst
, K
, S
);
857 ValueProfData
*allocValueProfDataInstrProf(size_t TotalSizeInBytes
) {
859 (ValueProfData
*)(new (::operator new(TotalSizeInBytes
)) ValueProfData());
860 memset(VD
, 0, TotalSizeInBytes
);
864 static ValueProfRecordClosure InstrProfRecordClosure
= {
866 getNumValueKindsInstrProf
,
867 getNumValueSitesInstrProf
,
868 getNumValueDataInstrProf
,
869 getNumValueDataForSiteInstrProf
,
871 getValueForSiteInstrProf
,
872 allocValueProfDataInstrProf
};
874 // Wrapper implementation using the closure mechanism.
875 uint32_t ValueProfData::getSize(const InstrProfRecord
&Record
) {
876 auto Closure
= InstrProfRecordClosure
;
877 Closure
.Record
= &Record
;
878 return getValueProfDataSize(&Closure
);
881 // Wrapper implementation using the closure mechanism.
882 std::unique_ptr
<ValueProfData
>
883 ValueProfData::serializeFrom(const InstrProfRecord
&Record
) {
884 InstrProfRecordClosure
.Record
= &Record
;
886 std::unique_ptr
<ValueProfData
> VPD(
887 serializeValueProfDataFrom(&InstrProfRecordClosure
, nullptr));
891 void ValueProfRecord::deserializeTo(InstrProfRecord
&Record
,
892 InstrProfSymtab
*SymTab
) {
893 Record
.reserveSites(Kind
, NumValueSites
);
895 InstrProfValueData
*ValueData
= getValueProfRecordValueData(this);
896 for (uint64_t VSite
= 0; VSite
< NumValueSites
; ++VSite
) {
897 uint8_t ValueDataCount
= this->SiteCountArray
[VSite
];
898 Record
.addValueData(Kind
, VSite
, ValueData
, ValueDataCount
, SymTab
);
899 ValueData
+= ValueDataCount
;
903 // For writing/serializing, Old is the host endianness, and New is
904 // byte order intended on disk. For Reading/deserialization, Old
905 // is the on-disk source endianness, and New is the host endianness.
906 void ValueProfRecord::swapBytes(support::endianness Old
,
907 support::endianness New
) {
908 using namespace support
;
913 if (getHostEndianness() != Old
) {
914 sys::swapByteOrder
<uint32_t>(NumValueSites
);
915 sys::swapByteOrder
<uint32_t>(Kind
);
917 uint32_t ND
= getValueProfRecordNumValueData(this);
918 InstrProfValueData
*VD
= getValueProfRecordValueData(this);
920 // No need to swap byte array: SiteCountArrray.
921 for (uint32_t I
= 0; I
< ND
; I
++) {
922 sys::swapByteOrder
<uint64_t>(VD
[I
].Value
);
923 sys::swapByteOrder
<uint64_t>(VD
[I
].Count
);
925 if (getHostEndianness() == Old
) {
926 sys::swapByteOrder
<uint32_t>(NumValueSites
);
927 sys::swapByteOrder
<uint32_t>(Kind
);
931 void ValueProfData::deserializeTo(InstrProfRecord
&Record
,
932 InstrProfSymtab
*SymTab
) {
933 if (NumValueKinds
== 0)
936 ValueProfRecord
*VR
= getFirstValueProfRecord(this);
937 for (uint32_t K
= 0; K
< NumValueKinds
; K
++) {
938 VR
->deserializeTo(Record
, SymTab
);
939 VR
= getValueProfRecordNext(VR
);
944 static T
swapToHostOrder(const unsigned char *&D
, support::endianness Orig
) {
945 using namespace support
;
948 return endian::readNext
<T
, little
, unaligned
>(D
);
950 return endian::readNext
<T
, big
, unaligned
>(D
);
953 static std::unique_ptr
<ValueProfData
> allocValueProfData(uint32_t TotalSize
) {
954 return std::unique_ptr
<ValueProfData
>(new (::operator new(TotalSize
))
958 Error
ValueProfData::checkIntegrity() {
959 if (NumValueKinds
> IPVK_Last
+ 1)
960 return make_error
<InstrProfError
>(
961 instrprof_error::malformed
, "number of value profile kinds is invalid");
962 // Total size needs to be multiple of quadword size.
963 if (TotalSize
% sizeof(uint64_t))
964 return make_error
<InstrProfError
>(
965 instrprof_error::malformed
, "total size is not multiples of quardword");
967 ValueProfRecord
*VR
= getFirstValueProfRecord(this);
968 for (uint32_t K
= 0; K
< this->NumValueKinds
; K
++) {
969 if (VR
->Kind
> IPVK_Last
)
970 return make_error
<InstrProfError
>(instrprof_error::malformed
,
971 "value kind is invalid");
972 VR
= getValueProfRecordNext(VR
);
973 if ((char *)VR
- (char *)this > (ptrdiff_t)TotalSize
)
974 return make_error
<InstrProfError
>(
975 instrprof_error::malformed
,
976 "value profile address is greater than total size");
978 return Error::success();
981 Expected
<std::unique_ptr
<ValueProfData
>>
982 ValueProfData::getValueProfData(const unsigned char *D
,
983 const unsigned char *const BufferEnd
,
984 support::endianness Endianness
) {
985 using namespace support
;
987 if (D
+ sizeof(ValueProfData
) > BufferEnd
)
988 return make_error
<InstrProfError
>(instrprof_error::truncated
);
990 const unsigned char *Header
= D
;
991 uint32_t TotalSize
= swapToHostOrder
<uint32_t>(Header
, Endianness
);
992 if (D
+ TotalSize
> BufferEnd
)
993 return make_error
<InstrProfError
>(instrprof_error::too_large
);
995 std::unique_ptr
<ValueProfData
> VPD
= allocValueProfData(TotalSize
);
996 memcpy(VPD
.get(), D
, TotalSize
);
998 VPD
->swapBytesToHost(Endianness
);
1000 Error E
= VPD
->checkIntegrity();
1002 return std::move(E
);
1004 return std::move(VPD
);
1007 void ValueProfData::swapBytesToHost(support::endianness Endianness
) {
1008 using namespace support
;
1010 if (Endianness
== getHostEndianness())
1013 sys::swapByteOrder
<uint32_t>(TotalSize
);
1014 sys::swapByteOrder
<uint32_t>(NumValueKinds
);
1016 ValueProfRecord
*VR
= getFirstValueProfRecord(this);
1017 for (uint32_t K
= 0; K
< NumValueKinds
; K
++) {
1018 VR
->swapBytes(Endianness
, getHostEndianness());
1019 VR
= getValueProfRecordNext(VR
);
1023 void ValueProfData::swapBytesFromHost(support::endianness Endianness
) {
1024 using namespace support
;
1026 if (Endianness
== getHostEndianness())
1029 ValueProfRecord
*VR
= getFirstValueProfRecord(this);
1030 for (uint32_t K
= 0; K
< NumValueKinds
; K
++) {
1031 ValueProfRecord
*NVR
= getValueProfRecordNext(VR
);
1032 VR
->swapBytes(getHostEndianness(), Endianness
);
1035 sys::swapByteOrder
<uint32_t>(TotalSize
);
1036 sys::swapByteOrder
<uint32_t>(NumValueKinds
);
1039 void annotateValueSite(Module
&M
, Instruction
&Inst
,
1040 const InstrProfRecord
&InstrProfR
,
1041 InstrProfValueKind ValueKind
, uint32_t SiteIdx
,
1042 uint32_t MaxMDCount
) {
1043 uint32_t NV
= InstrProfR
.getNumValueDataForSite(ValueKind
, SiteIdx
);
1048 std::unique_ptr
<InstrProfValueData
[]> VD
=
1049 InstrProfR
.getValueForSite(ValueKind
, SiteIdx
, &Sum
);
1051 ArrayRef
<InstrProfValueData
> VDs(VD
.get(), NV
);
1052 annotateValueSite(M
, Inst
, VDs
, Sum
, ValueKind
, MaxMDCount
);
1055 void annotateValueSite(Module
&M
, Instruction
&Inst
,
1056 ArrayRef
<InstrProfValueData
> VDs
,
1057 uint64_t Sum
, InstrProfValueKind ValueKind
,
1058 uint32_t MaxMDCount
) {
1059 LLVMContext
&Ctx
= M
.getContext();
1060 MDBuilder
MDHelper(Ctx
);
1061 SmallVector
<Metadata
*, 3> Vals
;
1063 Vals
.push_back(MDHelper
.createString("VP"));
1065 Vals
.push_back(MDHelper
.createConstant(
1066 ConstantInt::get(Type::getInt32Ty(Ctx
), ValueKind
)));
1069 MDHelper
.createConstant(ConstantInt::get(Type::getInt64Ty(Ctx
), Sum
)));
1071 // Value Profile Data
1072 uint32_t MDCount
= MaxMDCount
;
1073 for (auto &VD
: VDs
) {
1074 Vals
.push_back(MDHelper
.createConstant(
1075 ConstantInt::get(Type::getInt64Ty(Ctx
), VD
.Value
)));
1076 Vals
.push_back(MDHelper
.createConstant(
1077 ConstantInt::get(Type::getInt64Ty(Ctx
), VD
.Count
)));
1081 Inst
.setMetadata(LLVMContext::MD_prof
, MDNode::get(Ctx
, Vals
));
1084 bool getValueProfDataFromInst(const Instruction
&Inst
,
1085 InstrProfValueKind ValueKind
,
1086 uint32_t MaxNumValueData
,
1087 InstrProfValueData ValueData
[],
1088 uint32_t &ActualNumValueData
, uint64_t &TotalC
,
1089 bool GetNoICPValue
) {
1090 MDNode
*MD
= Inst
.getMetadata(LLVMContext::MD_prof
);
1094 unsigned NOps
= MD
->getNumOperands();
1099 // Operand 0 is a string tag "VP":
1100 MDString
*Tag
= cast
<MDString
>(MD
->getOperand(0));
1104 if (!Tag
->getString().equals("VP"))
1108 ConstantInt
*KindInt
= mdconst::dyn_extract
<ConstantInt
>(MD
->getOperand(1));
1111 if (KindInt
->getZExtValue() != ValueKind
)
1115 ConstantInt
*TotalCInt
= mdconst::dyn_extract
<ConstantInt
>(MD
->getOperand(2));
1118 TotalC
= TotalCInt
->getZExtValue();
1120 ActualNumValueData
= 0;
1122 for (unsigned I
= 3; I
< NOps
; I
+= 2) {
1123 if (ActualNumValueData
>= MaxNumValueData
)
1125 ConstantInt
*Value
= mdconst::dyn_extract
<ConstantInt
>(MD
->getOperand(I
));
1126 ConstantInt
*Count
=
1127 mdconst::dyn_extract
<ConstantInt
>(MD
->getOperand(I
+ 1));
1128 if (!Value
|| !Count
)
1130 uint64_t CntValue
= Count
->getZExtValue();
1131 if (!GetNoICPValue
&& (CntValue
== NOMORE_ICP_MAGICNUM
))
1133 ValueData
[ActualNumValueData
].Value
= Value
->getZExtValue();
1134 ValueData
[ActualNumValueData
].Count
= CntValue
;
1135 ActualNumValueData
++;
1140 MDNode
*getPGOFuncNameMetadata(const Function
&F
) {
1141 return F
.getMetadata(getPGOFuncNameMetadataName());
1144 void createPGOFuncNameMetadata(Function
&F
, StringRef PGOFuncName
) {
1145 // Only for internal linkage functions.
1146 if (PGOFuncName
== F
.getName())
1148 // Don't create duplicated meta-data.
1149 if (getPGOFuncNameMetadata(F
))
1151 LLVMContext
&C
= F
.getContext();
1152 MDNode
*N
= MDNode::get(C
, MDString::get(C
, PGOFuncName
));
1153 F
.setMetadata(getPGOFuncNameMetadataName(), N
);
1156 bool needsComdatForCounter(const Function
&F
, const Module
&M
) {
1160 if (!Triple(M
.getTargetTriple()).supportsCOMDAT())
1163 // See createPGOFuncNameVar for more details. To avoid link errors, profile
1164 // counters for function with available_externally linkage needs to be changed
1165 // to linkonce linkage. On ELF based systems, this leads to weak symbols to be
1166 // created. Without using comdat, duplicate entries won't be removed by the
1167 // linker leading to increased data segement size and raw profile size. Even
1168 // worse, since the referenced counter from profile per-function data object
1169 // will be resolved to the common strong definition, the profile counts for
1170 // available_externally functions will end up being duplicated in raw profile
1171 // data. This can result in distorted profile as the counts of those dups
1172 // will be accumulated by the profile merger.
1173 GlobalValue::LinkageTypes Linkage
= F
.getLinkage();
1174 if (Linkage
!= GlobalValue::ExternalWeakLinkage
&&
1175 Linkage
!= GlobalValue::AvailableExternallyLinkage
)
1181 // Check if INSTR_PROF_RAW_VERSION_VAR is defined.
1182 bool isIRPGOFlagSet(const Module
*M
) {
1184 M
->getNamedGlobal(INSTR_PROF_QUOTE(INSTR_PROF_RAW_VERSION_VAR
));
1185 if (!IRInstrVar
|| IRInstrVar
->hasLocalLinkage())
1188 // For CSPGO+LTO, this variable might be marked as non-prevailing and we only
1190 if (IRInstrVar
->isDeclaration())
1193 // Check if the flag is set.
1194 if (!IRInstrVar
->hasInitializer())
1197 auto *InitVal
= dyn_cast_or_null
<ConstantInt
>(IRInstrVar
->getInitializer());
1200 return (InitVal
->getZExtValue() & VARIANT_MASK_IR_PROF
) != 0;
1203 // Check if we can safely rename this Comdat function.
1204 bool canRenameComdatFunc(const Function
&F
, bool CheckAddressTaken
) {
1205 if (F
.getName().empty())
1207 if (!needsComdatForCounter(F
, *(F
.getParent())))
1209 // Unsafe to rename the address-taken function (which can be used in
1210 // function comparison).
1211 if (CheckAddressTaken
&& F
.hasAddressTaken())
1213 // Only safe to do if this function may be discarded if it is not used
1214 // in the compilation unit.
1215 if (!GlobalValue::isDiscardableIfUnused(F
.getLinkage()))
1218 // For AvailableExternallyLinkage functions.
1219 if (!F
.hasComdat()) {
1220 assert(F
.getLinkage() == GlobalValue::AvailableExternallyLinkage
);
1226 // Create the variable for the profile file name.
1227 void createProfileFileNameVar(Module
&M
, StringRef InstrProfileOutput
) {
1228 if (InstrProfileOutput
.empty())
1230 Constant
*ProfileNameConst
=
1231 ConstantDataArray::getString(M
.getContext(), InstrProfileOutput
, true);
1232 GlobalVariable
*ProfileNameVar
= new GlobalVariable(
1233 M
, ProfileNameConst
->getType(), true, GlobalValue::WeakAnyLinkage
,
1234 ProfileNameConst
, INSTR_PROF_QUOTE(INSTR_PROF_PROFILE_NAME_VAR
));
1235 ProfileNameVar
->setVisibility(GlobalValue::HiddenVisibility
);
1236 Triple
TT(M
.getTargetTriple());
1237 if (TT
.supportsCOMDAT()) {
1238 ProfileNameVar
->setLinkage(GlobalValue::ExternalLinkage
);
1239 ProfileNameVar
->setComdat(M
.getOrInsertComdat(
1240 StringRef(INSTR_PROF_QUOTE(INSTR_PROF_PROFILE_NAME_VAR
))));
1244 Error
OverlapStats::accumulateCounts(const std::string
&BaseFilename
,
1245 const std::string
&TestFilename
,
1247 auto getProfileSum
= [IsCS
](const std::string
&Filename
,
1248 CountSumOrPercent
&Sum
) -> Error
{
1249 // This function is only used from llvm-profdata that doesn't use any kind
1250 // of VFS. Just create a default RealFileSystem to read profiles.
1251 auto FS
= vfs::getRealFileSystem();
1252 auto ReaderOrErr
= InstrProfReader::create(Filename
, *FS
);
1253 if (Error E
= ReaderOrErr
.takeError()) {
1256 auto Reader
= std::move(ReaderOrErr
.get());
1257 Reader
->accumulateCounts(Sum
, IsCS
);
1258 return Error::success();
1260 auto Ret
= getProfileSum(BaseFilename
, Base
);
1263 Ret
= getProfileSum(TestFilename
, Test
);
1266 this->BaseFilename
= &BaseFilename
;
1267 this->TestFilename
= &TestFilename
;
1269 return Error::success();
1272 void OverlapStats::addOneMismatch(const CountSumOrPercent
&MismatchFunc
) {
1273 Mismatch
.NumEntries
+= 1;
1274 Mismatch
.CountSum
+= MismatchFunc
.CountSum
/ Test
.CountSum
;
1275 for (unsigned I
= 0; I
< IPVK_Last
- IPVK_First
+ 1; I
++) {
1276 if (Test
.ValueCounts
[I
] >= 1.0f
)
1277 Mismatch
.ValueCounts
[I
] +=
1278 MismatchFunc
.ValueCounts
[I
] / Test
.ValueCounts
[I
];
1282 void OverlapStats::addOneUnique(const CountSumOrPercent
&UniqueFunc
) {
1283 Unique
.NumEntries
+= 1;
1284 Unique
.CountSum
+= UniqueFunc
.CountSum
/ Test
.CountSum
;
1285 for (unsigned I
= 0; I
< IPVK_Last
- IPVK_First
+ 1; I
++) {
1286 if (Test
.ValueCounts
[I
] >= 1.0f
)
1287 Unique
.ValueCounts
[I
] += UniqueFunc
.ValueCounts
[I
] / Test
.ValueCounts
[I
];
1291 void OverlapStats::dump(raw_fd_ostream
&OS
) const {
1295 const char *EntryName
=
1296 (Level
== ProgramLevel
? "functions" : "edge counters");
1297 if (Level
== ProgramLevel
) {
1298 OS
<< "Profile overlap infomation for base_profile: " << *BaseFilename
1299 << " and test_profile: " << *TestFilename
<< "\nProgram level:\n";
1301 OS
<< "Function level:\n"
1302 << " Function: " << FuncName
<< " (Hash=" << FuncHash
<< ")\n";
1305 OS
<< " # of " << EntryName
<< " overlap: " << Overlap
.NumEntries
<< "\n";
1306 if (Mismatch
.NumEntries
)
1307 OS
<< " # of " << EntryName
<< " mismatch: " << Mismatch
.NumEntries
1309 if (Unique
.NumEntries
)
1310 OS
<< " # of " << EntryName
1311 << " only in test_profile: " << Unique
.NumEntries
<< "\n";
1313 OS
<< " Edge profile overlap: " << format("%.3f%%", Overlap
.CountSum
* 100)
1315 if (Mismatch
.NumEntries
)
1316 OS
<< " Mismatched count percentage (Edge): "
1317 << format("%.3f%%", Mismatch
.CountSum
* 100) << "\n";
1318 if (Unique
.NumEntries
)
1319 OS
<< " Percentage of Edge profile only in test_profile: "
1320 << format("%.3f%%", Unique
.CountSum
* 100) << "\n";
1321 OS
<< " Edge profile base count sum: " << format("%.0f", Base
.CountSum
)
1323 << " Edge profile test count sum: " << format("%.0f", Test
.CountSum
)
1326 for (unsigned I
= 0; I
< IPVK_Last
- IPVK_First
+ 1; I
++) {
1327 if (Base
.ValueCounts
[I
] < 1.0f
&& Test
.ValueCounts
[I
] < 1.0f
)
1329 char ProfileKindName
[20];
1331 case IPVK_IndirectCallTarget
:
1332 strncpy(ProfileKindName
, "IndirectCall", 19);
1334 case IPVK_MemOPSize
:
1335 strncpy(ProfileKindName
, "MemOP", 19);
1338 snprintf(ProfileKindName
, 19, "VP[%d]", I
);
1341 OS
<< " " << ProfileKindName
1342 << " profile overlap: " << format("%.3f%%", Overlap
.ValueCounts
[I
] * 100)
1344 if (Mismatch
.NumEntries
)
1345 OS
<< " Mismatched count percentage (" << ProfileKindName
1346 << "): " << format("%.3f%%", Mismatch
.ValueCounts
[I
] * 100) << "\n";
1347 if (Unique
.NumEntries
)
1348 OS
<< " Percentage of " << ProfileKindName
1349 << " profile only in test_profile: "
1350 << format("%.3f%%", Unique
.ValueCounts
[I
] * 100) << "\n";
1351 OS
<< " " << ProfileKindName
1352 << " profile base count sum: " << format("%.0f", Base
.ValueCounts
[I
])
1354 << " " << ProfileKindName
1355 << " profile test count sum: " << format("%.0f", Test
.ValueCounts
[I
])
1360 namespace IndexedInstrProf
{
1361 // A C++14 compatible version of the offsetof macro.
1362 template <typename T1
, typename T2
>
1363 inline size_t constexpr offsetOf(T1
T2::*Member
) {
1364 constexpr T2 Object
{};
1365 return size_t(&(Object
.*Member
)) - size_t(&Object
);
1368 static inline uint64_t read(const unsigned char *Buffer
, size_t Offset
) {
1369 return *reinterpret_cast<const uint64_t *>(Buffer
+ Offset
);
1372 uint64_t Header::formatVersion() const {
1373 using namespace support
;
1374 return endian::byte_swap
<uint64_t, little
>(Version
);
1377 Expected
<Header
> Header::readFromBuffer(const unsigned char *Buffer
) {
1378 using namespace support
;
1379 static_assert(std::is_standard_layout_v
<Header
>,
1380 "The header should be standard layout type since we use offset "
1381 "of fields to read.");
1384 H
.Magic
= read(Buffer
, offsetOf(&Header::Magic
));
1385 // Check the magic number.
1386 uint64_t Magic
= endian::byte_swap
<uint64_t, little
>(H
.Magic
);
1387 if (Magic
!= IndexedInstrProf::Magic
)
1388 return make_error
<InstrProfError
>(instrprof_error::bad_magic
);
1390 // Read the version.
1391 H
.Version
= read(Buffer
, offsetOf(&Header::Version
));
1392 if (GET_VERSION(H
.formatVersion()) >
1393 IndexedInstrProf::ProfVersion::CurrentVersion
)
1394 return make_error
<InstrProfError
>(instrprof_error::unsupported_version
);
1396 switch (GET_VERSION(H
.formatVersion())) {
1397 // When a new field is added in the header add a case statement here to
1400 IndexedInstrProf::ProfVersion::CurrentVersion
== Version10
,
1401 "Please update the reading code below if a new field has been added, "
1402 "if not add a case statement to fall through to the latest version.");
1404 H
.TemporalProfTracesOffset
=
1405 read(Buffer
, offsetOf(&Header::TemporalProfTracesOffset
));
1408 H
.BinaryIdOffset
= read(Buffer
, offsetOf(&Header::BinaryIdOffset
));
1411 H
.MemProfOffset
= read(Buffer
, offsetOf(&Header::MemProfOffset
));
1413 default: // Version7 (when the backwards compatible header was introduced).
1414 H
.HashType
= read(Buffer
, offsetOf(&Header::HashType
));
1415 H
.HashOffset
= read(Buffer
, offsetOf(&Header::HashOffset
));
1421 size_t Header::size() const {
1422 switch (GET_VERSION(formatVersion())) {
1423 // When a new field is added to the header add a case statement here to
1424 // compute the size as offset of the new field + size of the new field. This
1425 // relies on the field being added to the end of the list.
1426 static_assert(IndexedInstrProf::ProfVersion::CurrentVersion
== Version10
,
1427 "Please update the size computation below if a new field has "
1428 "been added to the header, if not add a case statement to "
1429 "fall through to the latest version.");
1431 return offsetOf(&Header::TemporalProfTracesOffset
) +
1432 sizeof(Header::TemporalProfTracesOffset
);
1434 return offsetOf(&Header::BinaryIdOffset
) + sizeof(Header::BinaryIdOffset
);
1436 return offsetOf(&Header::MemProfOffset
) + sizeof(Header::MemProfOffset
);
1437 default: // Version7 (when the backwards compatible header was introduced).
1438 return offsetOf(&Header::HashOffset
) + sizeof(Header::HashOffset
);
1442 } // namespace IndexedInstrProf
1444 } // end namespace llvm