1 //===- PGOInstrumentation.cpp - MST-based PGO Instrumentation -------------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 // This file implements PGO instrumentation using a minimum spanning tree based
10 // on the following paper:
11 // [1] Donald E. Knuth, Francis R. Stevenson. Optimal measurement of points
12 // for program frequency counts. BIT Numerical Mathematics 1973, Volume 13,
13 // Issue 3, pp 313-322
14 // The idea of the algorithm based on the fact that for each node (except for
15 // the entry and exit), the sum of incoming edge counts equals the sum of
16 // outgoing edge counts. The count of edge on spanning tree can be derived from
17 // those edges not on the spanning tree. Knuth proves this method instruments
18 // the minimum number of edges.
20 // The minimal spanning tree here is actually a maximum weight tree -- on-tree
21 // edges have higher frequencies (more likely to execute). The idea is to
22 // instrument those less frequently executed edges to reduce the runtime
23 // overhead of instrumented binaries.
25 // This file contains two passes:
26 // (1) Pass PGOInstrumentationGen which instruments the IR to generate edge
27 // count profile, and generates the instrumentation for indirect call
29 // (2) Pass PGOInstrumentationUse which reads the edge count profile and
30 // annotates the branch weights. It also reads the indirect call value
31 // profiling records and annotate the indirect call instructions.
33 // To get the precise counter information, These two passes need to invoke at
34 // the same compilation point (so they see the same IR). For pass
35 // PGOInstrumentationGen, the real work is done in instrumentOneFunc(). For
36 // pass PGOInstrumentationUse, the real work in done in class PGOUseFunc and
37 // the profile is opened in module level and passed to each PGOUseFunc instance.
38 // The shared code for PGOInstrumentationGen and PGOInstrumentationUse is put
39 // in class FuncPGOInstrumentation.
41 // Class PGOEdge represents a CFG edge and some auxiliary information. Class
42 // BBInfo contains auxiliary information for each BB. These two classes are used
43 // in pass PGOInstrumentationGen. Class PGOUseEdge and UseBBInfo are the derived
44 // class of PGOEdge and BBInfo, respectively. They contains extra data structure
45 // used in populating profile counters.
46 // The MST implementation is in Class CFGMST (CFGMST.h).
48 //===----------------------------------------------------------------------===//
50 #include "llvm/Transforms/Instrumentation/PGOInstrumentation.h"
51 #include "ValueProfileCollector.h"
52 #include "llvm/ADT/APInt.h"
53 #include "llvm/ADT/ArrayRef.h"
54 #include "llvm/ADT/STLExtras.h"
55 #include "llvm/ADT/SmallVector.h"
56 #include "llvm/ADT/Statistic.h"
57 #include "llvm/ADT/StringRef.h"
58 #include "llvm/ADT/Twine.h"
59 #include "llvm/ADT/iterator.h"
60 #include "llvm/ADT/iterator_range.h"
61 #include "llvm/Analysis/BlockFrequencyInfo.h"
62 #include "llvm/Analysis/BranchProbabilityInfo.h"
63 #include "llvm/Analysis/CFG.h"
64 #include "llvm/Analysis/LoopInfo.h"
65 #include "llvm/Analysis/OptimizationRemarkEmitter.h"
66 #include "llvm/Analysis/ProfileSummaryInfo.h"
67 #include "llvm/Analysis/TargetLibraryInfo.h"
68 #include "llvm/IR/Attributes.h"
69 #include "llvm/IR/BasicBlock.h"
70 #include "llvm/IR/CFG.h"
71 #include "llvm/IR/Comdat.h"
72 #include "llvm/IR/Constant.h"
73 #include "llvm/IR/Constants.h"
74 #include "llvm/IR/DiagnosticInfo.h"
75 #include "llvm/IR/Dominators.h"
76 #include "llvm/IR/EHPersonalities.h"
77 #include "llvm/IR/Function.h"
78 #include "llvm/IR/GlobalAlias.h"
79 #include "llvm/IR/GlobalValue.h"
80 #include "llvm/IR/GlobalVariable.h"
81 #include "llvm/IR/IRBuilder.h"
82 #include "llvm/IR/InstVisitor.h"
83 #include "llvm/IR/InstrTypes.h"
84 #include "llvm/IR/Instruction.h"
85 #include "llvm/IR/Instructions.h"
86 #include "llvm/IR/IntrinsicInst.h"
87 #include "llvm/IR/Intrinsics.h"
88 #include "llvm/IR/LLVMContext.h"
89 #include "llvm/IR/MDBuilder.h"
90 #include "llvm/IR/Module.h"
91 #include "llvm/IR/PassManager.h"
92 #include "llvm/IR/ProfDataUtils.h"
93 #include "llvm/IR/ProfileSummary.h"
94 #include "llvm/IR/Type.h"
95 #include "llvm/IR/Value.h"
96 #include "llvm/ProfileData/InstrProf.h"
97 #include "llvm/ProfileData/InstrProfReader.h"
98 #include "llvm/Support/BranchProbability.h"
99 #include "llvm/Support/CRC.h"
100 #include "llvm/Support/Casting.h"
101 #include "llvm/Support/CommandLine.h"
102 #include "llvm/Support/DOTGraphTraits.h"
103 #include "llvm/Support/Debug.h"
104 #include "llvm/Support/Error.h"
105 #include "llvm/Support/ErrorHandling.h"
106 #include "llvm/Support/GraphWriter.h"
107 #include "llvm/Support/VirtualFileSystem.h"
108 #include "llvm/Support/raw_ostream.h"
109 #include "llvm/TargetParser/Triple.h"
110 #include "llvm/Transforms/Instrumentation.h"
111 #include "llvm/Transforms/Instrumentation/BlockCoverageInference.h"
112 #include "llvm/Transforms/Instrumentation/CFGMST.h"
113 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
114 #include "llvm/Transforms/Utils/MisExpect.h"
115 #include "llvm/Transforms/Utils/ModuleUtils.h"
123 #include <unordered_map>
127 using namespace llvm
;
128 using ProfileCount
= Function::ProfileCount
;
129 using VPCandidateInfo
= ValueProfileCollector::CandidateInfo
;
131 #define DEBUG_TYPE "pgo-instrumentation"
133 STATISTIC(NumOfPGOInstrument
, "Number of edges instrumented.");
134 STATISTIC(NumOfPGOSelectInsts
, "Number of select instruction instrumented.");
135 STATISTIC(NumOfPGOMemIntrinsics
, "Number of mem intrinsics instrumented.");
136 STATISTIC(NumOfPGOEdge
, "Number of edges.");
137 STATISTIC(NumOfPGOBB
, "Number of basic-blocks.");
138 STATISTIC(NumOfPGOSplit
, "Number of critical edge splits.");
139 STATISTIC(NumOfPGOFunc
, "Number of functions having valid profile counts.");
140 STATISTIC(NumOfPGOMismatch
, "Number of functions having mismatch profile.");
141 STATISTIC(NumOfPGOMissing
, "Number of functions without profile.");
142 STATISTIC(NumOfPGOICall
, "Number of indirect call value instrumentations.");
143 STATISTIC(NumOfCSPGOInstrument
, "Number of edges instrumented in CSPGO.");
144 STATISTIC(NumOfCSPGOSelectInsts
,
145 "Number of select instruction instrumented in CSPGO.");
146 STATISTIC(NumOfCSPGOMemIntrinsics
,
147 "Number of mem intrinsics instrumented in CSPGO.");
148 STATISTIC(NumOfCSPGOEdge
, "Number of edges in CSPGO.");
149 STATISTIC(NumOfCSPGOBB
, "Number of basic-blocks in CSPGO.");
150 STATISTIC(NumOfCSPGOSplit
, "Number of critical edge splits in CSPGO.");
151 STATISTIC(NumOfCSPGOFunc
,
152 "Number of functions having valid profile counts in CSPGO.");
153 STATISTIC(NumOfCSPGOMismatch
,
154 "Number of functions having mismatch profile in CSPGO.");
155 STATISTIC(NumOfCSPGOMissing
, "Number of functions without profile in CSPGO.");
156 STATISTIC(NumCoveredBlocks
, "Number of basic blocks that were executed");
158 // Command line option to specify the file to read profile from. This is
159 // mainly used for testing.
160 static cl::opt
<std::string
>
161 PGOTestProfileFile("pgo-test-profile-file", cl::init(""), cl::Hidden
,
162 cl::value_desc("filename"),
163 cl::desc("Specify the path of profile data file. This is"
164 "mainly for test purpose."));
165 static cl::opt
<std::string
> PGOTestProfileRemappingFile(
166 "pgo-test-profile-remapping-file", cl::init(""), cl::Hidden
,
167 cl::value_desc("filename"),
168 cl::desc("Specify the path of profile remapping file. This is mainly for "
171 // Command line option to disable value profiling. The default is false:
172 // i.e. value profiling is enabled by default. This is for debug purpose.
173 static cl::opt
<bool> DisableValueProfiling("disable-vp", cl::init(false),
175 cl::desc("Disable Value Profiling"));
177 // Command line option to set the maximum number of VP annotations to write to
178 // the metadata for a single indirect call callsite.
179 static cl::opt
<unsigned> MaxNumAnnotations(
180 "icp-max-annotations", cl::init(3), cl::Hidden
,
181 cl::desc("Max number of annotations for a single indirect "
184 // Command line option to set the maximum number of value annotations
185 // to write to the metadata for a single memop intrinsic.
186 static cl::opt
<unsigned> MaxNumMemOPAnnotations(
187 "memop-max-annotations", cl::init(4), cl::Hidden
,
188 cl::desc("Max number of preicise value annotations for a single memop"
191 // Command line option to control appending FunctionHash to the name of a COMDAT
192 // function. This is to avoid the hash mismatch caused by the preinliner.
193 static cl::opt
<bool> DoComdatRenaming(
194 "do-comdat-renaming", cl::init(false), cl::Hidden
,
195 cl::desc("Append function hash to the name of COMDAT function to avoid "
196 "function hash mismatch due to the preinliner"));
199 // Command line option to enable/disable the warning about missing profile
201 cl::opt
<bool> PGOWarnMissing("pgo-warn-missing-function", cl::init(false),
203 cl::desc("Use this option to turn on/off "
204 "warnings about missing profile data for "
207 // Command line option to enable/disable the warning about a hash mismatch in
210 NoPGOWarnMismatch("no-pgo-warn-mismatch", cl::init(false), cl::Hidden
,
211 cl::desc("Use this option to turn off/on "
212 "warnings about profile cfg mismatch."));
214 // Command line option to enable/disable the warning about a hash mismatch in
215 // the profile data for Comdat functions, which often turns out to be false
216 // positive due to the pre-instrumentation inline.
217 cl::opt
<bool> NoPGOWarnMismatchComdatWeak(
218 "no-pgo-warn-mismatch-comdat-weak", cl::init(true), cl::Hidden
,
219 cl::desc("The option is used to turn on/off "
220 "warnings about hash mismatch for comdat "
221 "or weak functions."));
224 // Command line option to enable/disable select instruction instrumentation.
226 PGOInstrSelect("pgo-instr-select", cl::init(true), cl::Hidden
,
227 cl::desc("Use this option to turn on/off SELECT "
228 "instruction instrumentation. "));
230 // Command line option to turn on CFG dot or text dump of raw profile counts
231 static cl::opt
<PGOViewCountsType
> PGOViewRawCounts(
232 "pgo-view-raw-counts", cl::Hidden
,
233 cl::desc("A boolean option to show CFG dag or text "
234 "with raw profile counts from "
235 "profile data. See also option "
236 "-pgo-view-counts. To limit graph "
237 "display to only one function, use "
238 "filtering option -view-bfi-func-name."),
239 cl::values(clEnumValN(PGOVCT_None
, "none", "do not show."),
240 clEnumValN(PGOVCT_Graph
, "graph", "show a graph."),
241 clEnumValN(PGOVCT_Text
, "text", "show in text.")));
243 // Command line option to enable/disable memop intrinsic call.size profiling.
245 PGOInstrMemOP("pgo-instr-memop", cl::init(true), cl::Hidden
,
246 cl::desc("Use this option to turn on/off "
247 "memory intrinsic size profiling."));
249 // Emit branch probability as optimization remarks.
251 EmitBranchProbability("pgo-emit-branch-prob", cl::init(false), cl::Hidden
,
252 cl::desc("When this option is on, the annotated "
253 "branch probability will be emitted as "
254 "optimization remarks: -{Rpass|"
255 "pass-remarks}=pgo-instrumentation"));
257 static cl::opt
<bool> PGOInstrumentEntry(
258 "pgo-instrument-entry", cl::init(false), cl::Hidden
,
259 cl::desc("Force to instrument function entry basicblock."));
261 static cl::opt
<bool> PGOFunctionEntryCoverage(
262 "pgo-function-entry-coverage", cl::Hidden
,
264 "Use this option to enable function entry coverage instrumentation."));
266 static cl::opt
<bool> PGOBlockCoverage(
267 "pgo-block-coverage",
268 cl::desc("Use this option to enable basic block coverage instrumentation"));
271 PGOViewBlockCoverageGraph("pgo-view-block-coverage-graph",
272 cl::desc("Create a dot file of CFGs with block "
273 "coverage inference information"));
275 static cl::opt
<bool> PGOTemporalInstrumentation(
276 "pgo-temporal-instrumentation",
277 cl::desc("Use this option to enable temporal instrumentation"));
280 PGOFixEntryCount("pgo-fix-entry-count", cl::init(true), cl::Hidden
,
281 cl::desc("Fix function entry count in profile use."));
283 static cl::opt
<bool> PGOVerifyHotBFI(
284 "pgo-verify-hot-bfi", cl::init(false), cl::Hidden
,
285 cl::desc("Print out the non-match BFI count if a hot raw profile count "
286 "becomes non-hot, or a cold raw profile count becomes hot. "
287 "The print is enabled under -Rpass-analysis=pgo, or "
288 "internal option -pass-remakrs-analysis=pgo."));
290 static cl::opt
<bool> PGOVerifyBFI(
291 "pgo-verify-bfi", cl::init(false), cl::Hidden
,
292 cl::desc("Print out mismatched BFI counts after setting profile metadata "
293 "The print is enabled under -Rpass-analysis=pgo, or "
294 "internal option -pass-remakrs-analysis=pgo."));
296 static cl::opt
<unsigned> PGOVerifyBFIRatio(
297 "pgo-verify-bfi-ratio", cl::init(2), cl::Hidden
,
298 cl::desc("Set the threshold for pgo-verify-bfi: only print out "
299 "mismatched BFI if the difference percentage is greater than "
300 "this value (in percentage)."));
302 static cl::opt
<unsigned> PGOVerifyBFICutoff(
303 "pgo-verify-bfi-cutoff", cl::init(5), cl::Hidden
,
304 cl::desc("Set the threshold for pgo-verify-bfi: skip the counts whose "
305 "profile count value is below."));
307 static cl::opt
<std::string
> PGOTraceFuncHash(
308 "pgo-trace-func-hash", cl::init("-"), cl::Hidden
,
309 cl::value_desc("function name"),
310 cl::desc("Trace the hash of the function with this name."));
312 static cl::opt
<unsigned> PGOFunctionSizeThreshold(
313 "pgo-function-size-threshold", cl::Hidden
,
314 cl::desc("Do not instrument functions smaller than this threshold."));
316 static cl::opt
<unsigned> PGOFunctionCriticalEdgeThreshold(
317 "pgo-critical-edge-threshold", cl::init(20000), cl::Hidden
,
318 cl::desc("Do not instrument functions with the number of critical edges "
319 " greater than this threshold."));
322 // Command line option to turn on CFG dot dump after profile annotation.
323 // Defined in Analysis/BlockFrequencyInfo.cpp: -pgo-view-counts
324 extern cl::opt
<PGOViewCountsType
> PGOViewCounts
;
326 // Command line option to specify the name of the function for CFG dump
327 // Defined in Analysis/BlockFrequencyInfo.cpp: -view-bfi-func-name=
328 extern cl::opt
<std::string
> ViewBlockFreqFuncName
;
330 extern cl::opt
<bool> DebugInfoCorrelate
;
334 PGOOldCFGHashing("pgo-instr-old-cfg-hashing", cl::init(false), cl::Hidden
,
335 cl::desc("Use the old CFG function hashing"));
337 // Return a string describing the branch condition that can be
338 // used in static branch probability heuristics:
339 static std::string
getBranchCondString(Instruction
*TI
) {
340 BranchInst
*BI
= dyn_cast
<BranchInst
>(TI
);
341 if (!BI
|| !BI
->isConditional())
342 return std::string();
344 Value
*Cond
= BI
->getCondition();
345 ICmpInst
*CI
= dyn_cast
<ICmpInst
>(Cond
);
347 return std::string();
350 raw_string_ostream
OS(result
);
351 OS
<< CI
->getPredicate() << "_";
352 CI
->getOperand(0)->getType()->print(OS
, true);
354 Value
*RHS
= CI
->getOperand(1);
355 ConstantInt
*CV
= dyn_cast
<ConstantInt
>(RHS
);
359 else if (CV
->isOne())
361 else if (CV
->isMinusOne())
370 static const char *ValueProfKindDescr
[] = {
371 #define VALUE_PROF_KIND(Enumerator, Value, Descr) Descr,
372 #include "llvm/ProfileData/InstrProfData.inc"
375 // Create a COMDAT variable INSTR_PROF_RAW_VERSION_VAR to make the runtime
376 // aware this is an ir_level profile so it can set the version flag.
377 static GlobalVariable
*createIRLevelProfileFlagVar(Module
&M
, bool IsCS
) {
378 const StringRef
VarName(INSTR_PROF_QUOTE(INSTR_PROF_RAW_VERSION_VAR
));
379 Type
*IntTy64
= Type::getInt64Ty(M
.getContext());
380 uint64_t ProfileVersion
= (INSTR_PROF_RAW_VERSION
| VARIANT_MASK_IR_PROF
);
382 ProfileVersion
|= VARIANT_MASK_CSIR_PROF
;
383 if (PGOInstrumentEntry
)
384 ProfileVersion
|= VARIANT_MASK_INSTR_ENTRY
;
385 if (DebugInfoCorrelate
)
386 ProfileVersion
|= VARIANT_MASK_DBG_CORRELATE
;
387 if (PGOFunctionEntryCoverage
)
389 VARIANT_MASK_BYTE_COVERAGE
| VARIANT_MASK_FUNCTION_ENTRY_ONLY
;
390 if (PGOBlockCoverage
)
391 ProfileVersion
|= VARIANT_MASK_BYTE_COVERAGE
;
392 if (PGOTemporalInstrumentation
)
393 ProfileVersion
|= VARIANT_MASK_TEMPORAL_PROF
;
394 auto IRLevelVersionVariable
= new GlobalVariable(
395 M
, IntTy64
, true, GlobalValue::WeakAnyLinkage
,
396 Constant::getIntegerValue(IntTy64
, APInt(64, ProfileVersion
)), VarName
);
397 IRLevelVersionVariable
->setVisibility(GlobalValue::HiddenVisibility
);
398 Triple
TT(M
.getTargetTriple());
399 if (TT
.supportsCOMDAT()) {
400 IRLevelVersionVariable
->setLinkage(GlobalValue::ExternalLinkage
);
401 IRLevelVersionVariable
->setComdat(M
.getOrInsertComdat(VarName
));
403 return IRLevelVersionVariable
;
408 /// The select instruction visitor plays three roles specified
409 /// by the mode. In \c VM_counting mode, it simply counts the number of
410 /// select instructions. In \c VM_instrument mode, it inserts code to count
411 /// the number times TrueValue of select is taken. In \c VM_annotate mode,
412 /// it reads the profile data and annotate the select instruction with metadata.
413 enum VisitMode
{ VM_counting
, VM_instrument
, VM_annotate
};
416 /// Instruction Visitor class to visit select instructions.
417 struct SelectInstVisitor
: public InstVisitor
<SelectInstVisitor
> {
419 unsigned NSIs
= 0; // Number of select instructions instrumented.
420 VisitMode Mode
= VM_counting
; // Visiting mode.
421 unsigned *CurCtrIdx
= nullptr; // Pointer to current counter index.
422 unsigned TotalNumCtrs
= 0; // Total number of counters
423 GlobalVariable
*FuncNameVar
= nullptr;
424 uint64_t FuncHash
= 0;
425 PGOUseFunc
*UseFunc
= nullptr;
426 bool HasSingleByteCoverage
;
428 SelectInstVisitor(Function
&Func
, bool HasSingleByteCoverage
)
429 : F(Func
), HasSingleByteCoverage(HasSingleByteCoverage
) {}
431 void countSelects() {
437 // Visit the IR stream and instrument all select instructions. \p
438 // Ind is a pointer to the counter index variable; \p TotalNC
439 // is the total number of counters; \p FNV is the pointer to the
440 // PGO function name var; \p FHash is the function hash.
441 void instrumentSelects(unsigned *Ind
, unsigned TotalNC
, GlobalVariable
*FNV
,
443 Mode
= VM_instrument
;
445 TotalNumCtrs
= TotalNC
;
451 // Visit the IR stream and annotate all select instructions.
452 void annotateSelects(PGOUseFunc
*UF
, unsigned *Ind
) {
459 void instrumentOneSelectInst(SelectInst
&SI
);
460 void annotateOneSelectInst(SelectInst
&SI
);
462 // Visit \p SI instruction and perform tasks according to visit mode.
463 void visitSelectInst(SelectInst
&SI
);
465 // Return the number of select instructions. This needs be called after
467 unsigned getNumOfSelectInsts() const { return NSIs
; }
470 /// This class implements the CFG edges for the Minimum Spanning Tree (MST)
471 /// based instrumentation.
472 /// Note that the CFG can be a multi-graph. So there might be multiple edges
473 /// with the same SrcBB and DestBB.
479 bool Removed
= false;
480 bool IsCritical
= false;
482 PGOEdge(BasicBlock
*Src
, BasicBlock
*Dest
, uint64_t W
= 1)
483 : SrcBB(Src
), DestBB(Dest
), Weight(W
) {}
485 /// Return the information string of an edge.
486 std::string
infoString() const {
487 return (Twine(Removed
? "-" : " ") + (InMST
? " " : "*") +
488 (IsCritical
? "c" : " ") + " W=" + Twine(Weight
))
493 /// This class stores the auxiliary information for each BB in the MST.
499 PGOBBInfo(unsigned IX
) : Group(this), Index(IX
) {}
501 /// Return the information string of this object.
502 std::string
infoString() const {
503 return (Twine("Index=") + Twine(Index
)).str();
507 // This class implements the CFG edges. Note the CFG can be a multi-graph.
508 template <class Edge
, class BBInfo
> class FuncPGOInstrumentation
{
512 // Is this is context-sensitive instrumentation.
515 // A map that stores the Comdat group in function F.
516 std::unordered_multimap
<Comdat
*, GlobalValue
*> &ComdatMembers
;
518 ValueProfileCollector VPC
;
520 void computeCFGHash();
521 void renameComdatFunction();
524 const TargetLibraryInfo
&TLI
;
525 std::vector
<std::vector
<VPCandidateInfo
>> ValueSites
;
526 SelectInstVisitor SIVisitor
;
527 std::string FuncName
;
528 std::string DeprecatedFuncName
;
529 GlobalVariable
*FuncNameVar
;
531 // CFG hash value for this function.
532 uint64_t FunctionHash
= 0;
534 // The Minimum Spanning Tree of function CFG.
535 CFGMST
<Edge
, BBInfo
> MST
;
537 const std::optional
<BlockCoverageInference
> BCI
;
539 static std::optional
<BlockCoverageInference
>
540 constructBCI(Function
&Func
, bool HasSingleByteCoverage
,
541 bool InstrumentFuncEntry
) {
542 if (HasSingleByteCoverage
)
543 return BlockCoverageInference(Func
, InstrumentFuncEntry
);
547 // Collect all the BBs that will be instrumented, and store them in
549 void getInstrumentBBs(std::vector
<BasicBlock
*> &InstrumentBBs
);
551 // Give an edge, find the BB that will be instrumented.
552 // Return nullptr if there is no BB to be instrumented.
553 BasicBlock
*getInstrBB(Edge
*E
);
555 // Return the auxiliary BB information.
556 BBInfo
&getBBInfo(const BasicBlock
*BB
) const { return MST
.getBBInfo(BB
); }
558 // Return the auxiliary BB information if available.
559 BBInfo
*findBBInfo(const BasicBlock
*BB
) const { return MST
.findBBInfo(BB
); }
561 // Dump edges and BB information.
562 void dumpInfo(StringRef Str
= "") const {
563 MST
.dumpEdges(dbgs(), Twine("Dump Function ") + FuncName
+
564 " Hash: " + Twine(FunctionHash
) + "\t" + Str
);
567 FuncPGOInstrumentation(
568 Function
&Func
, TargetLibraryInfo
&TLI
,
569 std::unordered_multimap
<Comdat
*, GlobalValue
*> &ComdatMembers
,
570 bool CreateGlobalVar
= false, BranchProbabilityInfo
*BPI
= nullptr,
571 BlockFrequencyInfo
*BFI
= nullptr, bool IsCS
= false,
572 bool InstrumentFuncEntry
= true, bool HasSingleByteCoverage
= false)
573 : F(Func
), IsCS(IsCS
), ComdatMembers(ComdatMembers
), VPC(Func
, TLI
),
574 TLI(TLI
), ValueSites(IPVK_Last
+ 1),
575 SIVisitor(Func
, HasSingleByteCoverage
),
576 MST(F
, InstrumentFuncEntry
, BPI
, BFI
),
577 BCI(constructBCI(Func
, HasSingleByteCoverage
, InstrumentFuncEntry
)) {
578 if (BCI
&& PGOViewBlockCoverageGraph
)
579 BCI
->viewBlockCoverageGraph();
580 // This should be done before CFG hash computation.
581 SIVisitor
.countSelects();
582 ValueSites
[IPVK_MemOPSize
] = VPC
.get(IPVK_MemOPSize
);
584 NumOfPGOSelectInsts
+= SIVisitor
.getNumOfSelectInsts();
585 NumOfPGOMemIntrinsics
+= ValueSites
[IPVK_MemOPSize
].size();
586 NumOfPGOBB
+= MST
.BBInfos
.size();
587 ValueSites
[IPVK_IndirectCallTarget
] = VPC
.get(IPVK_IndirectCallTarget
);
589 NumOfCSPGOSelectInsts
+= SIVisitor
.getNumOfSelectInsts();
590 NumOfCSPGOMemIntrinsics
+= ValueSites
[IPVK_MemOPSize
].size();
591 NumOfCSPGOBB
+= MST
.BBInfos
.size();
594 FuncName
= getIRPGOFuncName(F
);
595 DeprecatedFuncName
= getPGOFuncName(F
);
597 if (!ComdatMembers
.empty())
598 renameComdatFunction();
599 LLVM_DEBUG(dumpInfo("after CFGMST"));
601 for (auto &E
: MST
.AllEdges
) {
604 IsCS
? NumOfCSPGOEdge
++ : NumOfPGOEdge
++;
606 IsCS
? NumOfCSPGOInstrument
++ : NumOfPGOInstrument
++;
610 FuncNameVar
= createPGOFuncNameVar(F
, FuncName
);
614 } // end anonymous namespace
616 // Compute Hash value for the CFG: the lower 32 bits are CRC32 of the index
617 // value of each BB in the CFG. The higher 32 bits are the CRC32 of the numbers
618 // of selects, indirect calls, mem ops and edges.
619 template <class Edge
, class BBInfo
>
620 void FuncPGOInstrumentation
<Edge
, BBInfo
>::computeCFGHash() {
621 std::vector
<uint8_t> Indexes
;
624 const Instruction
*TI
= BB
.getTerminator();
625 for (unsigned I
= 0, E
= TI
->getNumSuccessors(); I
!= E
; ++I
) {
626 BasicBlock
*Succ
= TI
->getSuccessor(I
);
627 auto BI
= findBBInfo(Succ
);
630 uint32_t Index
= BI
->Index
;
631 for (int J
= 0; J
< 4; J
++)
632 Indexes
.push_back((uint8_t)(Index
>> (J
* 8)));
638 if (PGOOldCFGHashing
) {
639 // Hash format for context sensitive profile. Reserve 4 bits for other
641 FunctionHash
= (uint64_t)SIVisitor
.getNumOfSelectInsts() << 56 |
642 (uint64_t)ValueSites
[IPVK_IndirectCallTarget
].size() << 48 |
643 //(uint64_t)ValueSites[IPVK_MemOPSize].size() << 40 |
644 (uint64_t)MST
.AllEdges
.size() << 32 | JC
.getCRC();
646 // The higher 32 bits.
647 auto updateJCH
= [&JCH
](uint64_t Num
) {
649 support::endian::write64le(Data
, Num
);
652 updateJCH((uint64_t)SIVisitor
.getNumOfSelectInsts());
653 updateJCH((uint64_t)ValueSites
[IPVK_IndirectCallTarget
].size());
654 updateJCH((uint64_t)ValueSites
[IPVK_MemOPSize
].size());
656 updateJCH(BCI
->getInstrumentedBlocksHash());
658 updateJCH((uint64_t)MST
.AllEdges
.size());
661 // Hash format for context sensitive profile. Reserve 4 bits for other
663 FunctionHash
= (((uint64_t)JCH
.getCRC()) << 28) + JC
.getCRC();
666 // Reserve bit 60-63 for other information purpose.
667 FunctionHash
&= 0x0FFFFFFFFFFFFFFF;
669 NamedInstrProfRecord::setCSFlagInHash(FunctionHash
);
670 LLVM_DEBUG(dbgs() << "Function Hash Computation for " << F
.getName() << ":\n"
671 << " CRC = " << JC
.getCRC()
672 << ", Selects = " << SIVisitor
.getNumOfSelectInsts()
673 << ", Edges = " << MST
.AllEdges
.size() << ", ICSites = "
674 << ValueSites
[IPVK_IndirectCallTarget
].size());
675 if (!PGOOldCFGHashing
) {
676 LLVM_DEBUG(dbgs() << ", Memops = " << ValueSites
[IPVK_MemOPSize
].size()
677 << ", High32 CRC = " << JCH
.getCRC());
679 LLVM_DEBUG(dbgs() << ", Hash = " << FunctionHash
<< "\n";);
681 if (PGOTraceFuncHash
!= "-" && F
.getName().contains(PGOTraceFuncHash
))
682 dbgs() << "Funcname=" << F
.getName() << ", Hash=" << FunctionHash
683 << " in building " << F
.getParent()->getSourceFileName() << "\n";
686 // Check if we can safely rename this Comdat function.
687 static bool canRenameComdat(
689 std::unordered_multimap
<Comdat
*, GlobalValue
*> &ComdatMembers
) {
690 if (!DoComdatRenaming
|| !canRenameComdatFunc(F
, true))
693 // FIXME: Current only handle those Comdat groups that only containing one
695 // (1) For a Comdat group containing multiple functions, we need to have a
696 // unique postfix based on the hashes for each function. There is a
697 // non-trivial code refactoring to do this efficiently.
698 // (2) Variables can not be renamed, so we can not rename Comdat function in a
699 // group including global vars.
700 Comdat
*C
= F
.getComdat();
701 for (auto &&CM
: make_range(ComdatMembers
.equal_range(C
))) {
702 assert(!isa
<GlobalAlias
>(CM
.second
));
703 Function
*FM
= dyn_cast
<Function
>(CM
.second
);
710 // Append the CFGHash to the Comdat function name.
711 template <class Edge
, class BBInfo
>
712 void FuncPGOInstrumentation
<Edge
, BBInfo
>::renameComdatFunction() {
713 if (!canRenameComdat(F
, ComdatMembers
))
715 std::string OrigName
= F
.getName().str();
716 std::string NewFuncName
=
717 Twine(F
.getName() + "." + Twine(FunctionHash
)).str();
718 F
.setName(Twine(NewFuncName
));
719 GlobalAlias::create(GlobalValue::WeakAnyLinkage
, OrigName
, &F
);
720 FuncName
= Twine(FuncName
+ "." + Twine(FunctionHash
)).str();
722 Module
*M
= F
.getParent();
723 // For AvailableExternallyLinkage functions, change the linkage to
724 // LinkOnceODR and put them into comdat. This is because after renaming, there
725 // is no backup external copy available for the function.
726 if (!F
.hasComdat()) {
727 assert(F
.getLinkage() == GlobalValue::AvailableExternallyLinkage
);
728 NewComdat
= M
->getOrInsertComdat(StringRef(NewFuncName
));
729 F
.setLinkage(GlobalValue::LinkOnceODRLinkage
);
730 F
.setComdat(NewComdat
);
734 // This function belongs to a single function Comdat group.
735 Comdat
*OrigComdat
= F
.getComdat();
736 std::string NewComdatName
=
737 Twine(OrigComdat
->getName() + "." + Twine(FunctionHash
)).str();
738 NewComdat
= M
->getOrInsertComdat(StringRef(NewComdatName
));
739 NewComdat
->setSelectionKind(OrigComdat
->getSelectionKind());
741 for (auto &&CM
: make_range(ComdatMembers
.equal_range(OrigComdat
))) {
742 // Must be a function.
743 cast
<Function
>(CM
.second
)->setComdat(NewComdat
);
747 /// Collect all the BBs that will be instruments and add them to
749 template <class Edge
, class BBInfo
>
750 void FuncPGOInstrumentation
<Edge
, BBInfo
>::getInstrumentBBs(
751 std::vector
<BasicBlock
*> &InstrumentBBs
) {
754 if (BCI
->shouldInstrumentBlock(BB
))
755 InstrumentBBs
.push_back(&BB
);
759 // Use a worklist as we will update the vector during the iteration.
760 std::vector
<Edge
*> EdgeList
;
761 EdgeList
.reserve(MST
.AllEdges
.size());
762 for (auto &E
: MST
.AllEdges
)
763 EdgeList
.push_back(E
.get());
765 for (auto &E
: EdgeList
) {
766 BasicBlock
*InstrBB
= getInstrBB(E
);
768 InstrumentBBs
.push_back(InstrBB
);
772 // Given a CFG E to be instrumented, find which BB to place the instrumented
773 // code. The function will split the critical edge if necessary.
774 template <class Edge
, class BBInfo
>
775 BasicBlock
*FuncPGOInstrumentation
<Edge
, BBInfo
>::getInstrBB(Edge
*E
) {
776 if (E
->InMST
|| E
->Removed
)
779 BasicBlock
*SrcBB
= E
->SrcBB
;
780 BasicBlock
*DestBB
= E
->DestBB
;
781 // For a fake edge, instrument the real BB.
782 if (SrcBB
== nullptr)
784 if (DestBB
== nullptr)
787 auto canInstrument
= [](BasicBlock
*BB
) -> BasicBlock
* {
788 // There are basic blocks (such as catchswitch) cannot be instrumented.
789 // If the returned first insertion point is the end of BB, skip this BB.
790 if (BB
->getFirstInsertionPt() == BB
->end())
795 // Instrument the SrcBB if it has a single successor,
796 // otherwise, the DestBB if this is not a critical edge.
797 Instruction
*TI
= SrcBB
->getTerminator();
798 if (TI
->getNumSuccessors() <= 1)
799 return canInstrument(SrcBB
);
801 return canInstrument(DestBB
);
803 // Some IndirectBr critical edges cannot be split by the previous
804 // SplitIndirectBrCriticalEdges call. Bail out.
805 unsigned SuccNum
= GetSuccessorNumber(SrcBB
, DestBB
);
806 BasicBlock
*InstrBB
=
807 isa
<IndirectBrInst
>(TI
) ? nullptr : SplitCriticalEdge(TI
, SuccNum
);
810 dbgs() << "Fail to split critical edge: not instrument this edge.\n");
813 // For a critical edge, we have to split. Instrument the newly
815 IsCS
? NumOfCSPGOSplit
++ : NumOfPGOSplit
++;
816 LLVM_DEBUG(dbgs() << "Split critical edge: " << getBBInfo(SrcBB
).Index
817 << " --> " << getBBInfo(DestBB
).Index
<< "\n");
818 // Need to add two new edges. First one: Add new edge of SrcBB->InstrBB.
819 MST
.addEdge(SrcBB
, InstrBB
, 0);
820 // Second one: Add new edge of InstrBB->DestBB.
821 Edge
&NewEdge1
= MST
.addEdge(InstrBB
, DestBB
, 0);
822 NewEdge1
.InMST
= true;
825 return canInstrument(InstrBB
);
828 // When generating value profiling calls on Windows routines that make use of
829 // handler funclets for exception processing an operand bundle needs to attached
830 // to the called function. This routine will set \p OpBundles to contain the
831 // funclet information, if any is needed, that should be placed on the generated
832 // value profiling call for the value profile candidate call.
834 populateEHOperandBundle(VPCandidateInfo
&Cand
,
835 DenseMap
<BasicBlock
*, ColorVector
> &BlockColors
,
836 SmallVectorImpl
<OperandBundleDef
> &OpBundles
) {
837 auto *OrigCall
= dyn_cast
<CallBase
>(Cand
.AnnotatedInst
);
841 if (!isa
<IntrinsicInst
>(OrigCall
)) {
842 // The instrumentation call should belong to the same funclet as a
843 // non-intrinsic call, so just copy the operand bundle, if any exists.
844 std::optional
<OperandBundleUse
> ParentFunclet
=
845 OrigCall
->getOperandBundle(LLVMContext::OB_funclet
);
847 OpBundles
.emplace_back(OperandBundleDef(*ParentFunclet
));
849 // Intrinsics or other instructions do not get funclet information from the
850 // front-end. Need to use the BlockColors that was computed by the routine
851 // colorEHFunclets to determine whether a funclet is needed.
852 if (!BlockColors
.empty()) {
853 const ColorVector
&CV
= BlockColors
.find(OrigCall
->getParent())->second
;
854 assert(CV
.size() == 1 && "non-unique color for block!");
855 Instruction
*EHPad
= CV
.front()->getFirstNonPHI();
856 if (EHPad
->isEHPad())
857 OpBundles
.emplace_back("funclet", EHPad
);
862 // Visit all edge and instrument the edges not in MST, and do value profiling.
863 // Critical edges will be split.
864 static void instrumentOneFunc(
865 Function
&F
, Module
*M
, TargetLibraryInfo
&TLI
, BranchProbabilityInfo
*BPI
,
866 BlockFrequencyInfo
*BFI
,
867 std::unordered_multimap
<Comdat
*, GlobalValue
*> &ComdatMembers
,
869 if (!PGOBlockCoverage
) {
870 // Split indirectbr critical edges here before computing the MST rather than
871 // later in getInstrBB() to avoid invalidating it.
872 SplitIndirectBrCriticalEdges(F
, /*IgnoreBlocksWithoutPHI=*/false, BPI
, BFI
);
875 FuncPGOInstrumentation
<PGOEdge
, PGOBBInfo
> FuncInfo(
876 F
, TLI
, ComdatMembers
, true, BPI
, BFI
, IsCS
, PGOInstrumentEntry
,
879 Type
*I8PtrTy
= Type::getInt8PtrTy(M
->getContext());
880 auto Name
= ConstantExpr::getBitCast(FuncInfo
.FuncNameVar
, I8PtrTy
);
881 auto CFGHash
= ConstantInt::get(Type::getInt64Ty(M
->getContext()),
882 FuncInfo
.FunctionHash
);
883 if (PGOFunctionEntryCoverage
) {
884 auto &EntryBB
= F
.getEntryBlock();
885 IRBuilder
<> Builder(&EntryBB
, EntryBB
.getFirstInsertionPt());
886 // llvm.instrprof.cover(i8* <name>, i64 <hash>, i32 <num-counters>,
889 Intrinsic::getDeclaration(M
, Intrinsic::instrprof_cover
),
890 {Name
, CFGHash
, Builder
.getInt32(1), Builder
.getInt32(0)});
894 std::vector
<BasicBlock
*> InstrumentBBs
;
895 FuncInfo
.getInstrumentBBs(InstrumentBBs
);
896 unsigned NumCounters
=
897 InstrumentBBs
.size() + FuncInfo
.SIVisitor
.getNumOfSelectInsts();
900 if (PGOTemporalInstrumentation
) {
901 NumCounters
+= PGOBlockCoverage
? 8 : 1;
902 auto &EntryBB
= F
.getEntryBlock();
903 IRBuilder
<> Builder(&EntryBB
, EntryBB
.getFirstInsertionPt());
904 // llvm.instrprof.timestamp(i8* <name>, i64 <hash>, i32 <num-counters>,
907 Intrinsic::getDeclaration(M
, Intrinsic::instrprof_timestamp
),
908 {Name
, CFGHash
, Builder
.getInt32(NumCounters
), Builder
.getInt32(I
)});
909 I
+= PGOBlockCoverage
? 8 : 1;
912 for (auto *InstrBB
: InstrumentBBs
) {
913 IRBuilder
<> Builder(InstrBB
, InstrBB
->getFirstInsertionPt());
914 assert(Builder
.GetInsertPoint() != InstrBB
->end() &&
915 "Cannot get the Instrumentation point");
916 // llvm.instrprof.increment(i8* <name>, i64 <hash>, i32 <num-counters>,
919 Intrinsic::getDeclaration(M
, PGOBlockCoverage
920 ? Intrinsic::instrprof_cover
921 : Intrinsic::instrprof_increment
),
922 {Name
, CFGHash
, Builder
.getInt32(NumCounters
), Builder
.getInt32(I
++)});
925 // Now instrument select instructions:
926 FuncInfo
.SIVisitor
.instrumentSelects(&I
, NumCounters
, FuncInfo
.FuncNameVar
,
927 FuncInfo
.FunctionHash
);
928 assert(I
== NumCounters
);
930 if (DisableValueProfiling
)
933 NumOfPGOICall
+= FuncInfo
.ValueSites
[IPVK_IndirectCallTarget
].size();
935 // Intrinsic function calls do not have funclet operand bundles needed for
936 // Windows exception handling attached to them. However, if value profiling is
937 // inserted for one of these calls, then a funclet value will need to be set
938 // on the instrumentation call based on the funclet coloring.
939 DenseMap
<BasicBlock
*, ColorVector
> BlockColors
;
940 if (F
.hasPersonalityFn() &&
941 isFuncletEHPersonality(classifyEHPersonality(F
.getPersonalityFn())))
942 BlockColors
= colorEHFunclets(F
);
944 // For each VP Kind, walk the VP candidates and instrument each one.
945 for (uint32_t Kind
= IPVK_First
; Kind
<= IPVK_Last
; ++Kind
) {
946 unsigned SiteIndex
= 0;
947 if (Kind
== IPVK_MemOPSize
&& !PGOInstrMemOP
)
950 for (VPCandidateInfo Cand
: FuncInfo
.ValueSites
[Kind
]) {
951 LLVM_DEBUG(dbgs() << "Instrument one VP " << ValueProfKindDescr
[Kind
]
952 << " site: CallSite Index = " << SiteIndex
<< "\n");
954 IRBuilder
<> Builder(Cand
.InsertPt
);
955 assert(Builder
.GetInsertPoint() != Cand
.InsertPt
->getParent()->end() &&
956 "Cannot get the Instrumentation point");
958 Value
*ToProfile
= nullptr;
959 if (Cand
.V
->getType()->isIntegerTy())
960 ToProfile
= Builder
.CreateZExtOrTrunc(Cand
.V
, Builder
.getInt64Ty());
961 else if (Cand
.V
->getType()->isPointerTy())
962 ToProfile
= Builder
.CreatePtrToInt(Cand
.V
, Builder
.getInt64Ty());
963 assert(ToProfile
&& "value profiling Value is of unexpected type");
965 SmallVector
<OperandBundleDef
, 1> OpBundles
;
966 populateEHOperandBundle(Cand
, BlockColors
, OpBundles
);
968 Intrinsic::getDeclaration(M
, Intrinsic::instrprof_value_profile
),
969 {ConstantExpr::getBitCast(FuncInfo
.FuncNameVar
, I8PtrTy
),
970 Builder
.getInt64(FuncInfo
.FunctionHash
), ToProfile
,
971 Builder
.getInt32(Kind
), Builder
.getInt32(SiteIndex
++)},
974 } // IPVK_First <= Kind <= IPVK_Last
979 // This class represents a CFG edge in profile use compilation.
980 struct PGOUseEdge
: public PGOEdge
{
981 using PGOEdge::PGOEdge
;
983 bool CountValid
= false;
984 uint64_t CountValue
= 0;
986 // Set edge count value
987 void setEdgeCount(uint64_t Value
) {
992 // Return the information string for this object.
993 std::string
infoString() const {
995 return PGOEdge::infoString();
996 return (Twine(PGOEdge::infoString()) + " Count=" + Twine(CountValue
))
1001 using DirectEdges
= SmallVector
<PGOUseEdge
*, 2>;
1003 // This class stores the auxiliary information for each BB.
1004 struct PGOUseBBInfo
: public PGOBBInfo
{
1005 uint64_t CountValue
= 0;
1007 int32_t UnknownCountInEdge
= 0;
1008 int32_t UnknownCountOutEdge
= 0;
1009 DirectEdges InEdges
;
1010 DirectEdges OutEdges
;
1012 PGOUseBBInfo(unsigned IX
) : PGOBBInfo(IX
), CountValid(false) {}
1014 // Set the profile count value for this BB.
1015 void setBBInfoCount(uint64_t Value
) {
1020 // Return the information string of this object.
1021 std::string
infoString() const {
1023 return PGOBBInfo::infoString();
1024 return (Twine(PGOBBInfo::infoString()) + " Count=" + Twine(CountValue
))
1028 // Add an OutEdge and update the edge count.
1029 void addOutEdge(PGOUseEdge
*E
) {
1030 OutEdges
.push_back(E
);
1031 UnknownCountOutEdge
++;
1034 // Add an InEdge and update the edge count.
1035 void addInEdge(PGOUseEdge
*E
) {
1036 InEdges
.push_back(E
);
1037 UnknownCountInEdge
++;
1041 } // end anonymous namespace
1043 // Sum up the count values for all the edges.
1044 static uint64_t sumEdgeCount(const ArrayRef
<PGOUseEdge
*> Edges
) {
1046 for (const auto &E
: Edges
) {
1049 Total
+= E
->CountValue
;
1058 PGOUseFunc(Function
&Func
, Module
*Modu
, TargetLibraryInfo
&TLI
,
1059 std::unordered_multimap
<Comdat
*, GlobalValue
*> &ComdatMembers
,
1060 BranchProbabilityInfo
*BPI
, BlockFrequencyInfo
*BFIin
,
1061 ProfileSummaryInfo
*PSI
, bool IsCS
, bool InstrumentFuncEntry
,
1062 bool HasSingleByteCoverage
)
1063 : F(Func
), M(Modu
), BFI(BFIin
), PSI(PSI
),
1064 FuncInfo(Func
, TLI
, ComdatMembers
, false, BPI
, BFIin
, IsCS
,
1065 InstrumentFuncEntry
, HasSingleByteCoverage
),
1066 FreqAttr(FFA_Normal
), IsCS(IsCS
) {}
1068 void handleInstrProfError(Error Err
, uint64_t MismatchedFuncSum
);
1070 // Read counts for the instrumented BB from profile.
1071 bool readCounters(IndexedInstrProfReader
*PGOReader
, bool &AllZeros
,
1072 InstrProfRecord::CountPseudoKind
&PseudoKind
);
1074 // Populate the counts for all BBs.
1075 void populateCounters();
1077 // Set block coverage based on profile coverage values.
1078 void populateCoverage(IndexedInstrProfReader
*PGOReader
);
1080 // Set the branch weights based on the count values.
1081 void setBranchWeights();
1083 // Annotate the value profile call sites for all value kind.
1084 void annotateValueSites();
1086 // Annotate the value profile call sites for one value kind.
1087 void annotateValueSites(uint32_t Kind
);
1089 // Annotate the irreducible loop header weights.
1090 void annotateIrrLoopHeaderWeights();
1092 // The hotness of the function from the profile count.
1093 enum FuncFreqAttr
{ FFA_Normal
, FFA_Cold
, FFA_Hot
};
1095 // Return the function hotness from the profile.
1096 FuncFreqAttr
getFuncFreqAttr() const { return FreqAttr
; }
1098 // Return the function hash.
1099 uint64_t getFuncHash() const { return FuncInfo
.FunctionHash
; }
1101 // Return the profile record for this function;
1102 InstrProfRecord
&getProfileRecord() { return ProfileRecord
; }
1104 // Return the auxiliary BB information.
1105 PGOUseBBInfo
&getBBInfo(const BasicBlock
*BB
) const {
1106 return FuncInfo
.getBBInfo(BB
);
1109 // Return the auxiliary BB information if available.
1110 PGOUseBBInfo
*findBBInfo(const BasicBlock
*BB
) const {
1111 return FuncInfo
.findBBInfo(BB
);
1114 Function
&getFunc() const { return F
; }
1116 void dumpInfo(StringRef Str
= "") const { FuncInfo
.dumpInfo(Str
); }
1118 uint64_t getProgramMaxCount() const { return ProgramMaxCount
; }
1123 BlockFrequencyInfo
*BFI
;
1124 ProfileSummaryInfo
*PSI
;
1126 // This member stores the shared information with class PGOGenFunc.
1127 FuncPGOInstrumentation
<PGOUseEdge
, PGOUseBBInfo
> FuncInfo
;
1129 // The maximum count value in the profile. This is only used in PGO use
1131 uint64_t ProgramMaxCount
;
1133 // Position of counter that remains to be read.
1134 uint32_t CountPosition
= 0;
1136 // Total size of the profile count for this function.
1137 uint32_t ProfileCountSize
= 0;
1139 // ProfileRecord for this function.
1140 InstrProfRecord ProfileRecord
;
1142 // Function hotness info derived from profile.
1143 FuncFreqAttr FreqAttr
;
1145 // Is to use the context sensitive profile.
1148 // Find the Instrumented BB and set the value. Return false on error.
1149 bool setInstrumentedCounts(const std::vector
<uint64_t> &CountFromProfile
);
1151 // Set the edge counter value for the unknown edge -- there should be only
1152 // one unknown edge.
1153 void setEdgeCount(DirectEdges
&Edges
, uint64_t Value
);
1155 // Set the hot/cold inline hints based on the count values.
1156 // FIXME: This function should be removed once the functionality in
1157 // the inliner is implemented.
1158 void markFunctionAttributes(uint64_t EntryCount
, uint64_t MaxCount
) {
1159 if (PSI
->isHotCount(EntryCount
))
1161 else if (PSI
->isColdCount(MaxCount
))
1162 FreqAttr
= FFA_Cold
;
1166 } // end anonymous namespace
1168 /// Set up InEdges/OutEdges for all BBs in the MST.
1170 setupBBInfoEdges(FuncPGOInstrumentation
<PGOUseEdge
, PGOUseBBInfo
> &FuncInfo
) {
1171 // This is not required when there is block coverage inference.
1174 for (auto &E
: FuncInfo
.MST
.AllEdges
) {
1177 const BasicBlock
*SrcBB
= E
->SrcBB
;
1178 const BasicBlock
*DestBB
= E
->DestBB
;
1179 PGOUseBBInfo
&SrcInfo
= FuncInfo
.getBBInfo(SrcBB
);
1180 PGOUseBBInfo
&DestInfo
= FuncInfo
.getBBInfo(DestBB
);
1181 SrcInfo
.addOutEdge(E
.get());
1182 DestInfo
.addInEdge(E
.get());
1186 // Visit all the edges and assign the count value for the instrumented
1187 // edges and the BB. Return false on error.
1188 bool PGOUseFunc::setInstrumentedCounts(
1189 const std::vector
<uint64_t> &CountFromProfile
) {
1191 std::vector
<BasicBlock
*> InstrumentBBs
;
1192 FuncInfo
.getInstrumentBBs(InstrumentBBs
);
1194 setupBBInfoEdges(FuncInfo
);
1196 unsigned NumCounters
=
1197 InstrumentBBs
.size() + FuncInfo
.SIVisitor
.getNumOfSelectInsts();
1198 // The number of counters here should match the number of counters
1199 // in profile. Return if they mismatch.
1200 if (NumCounters
!= CountFromProfile
.size()) {
1203 auto *FuncEntry
= &*F
.begin();
1205 // Set the profile count to the Instrumented BBs.
1207 for (BasicBlock
*InstrBB
: InstrumentBBs
) {
1208 uint64_t CountValue
= CountFromProfile
[I
++];
1209 PGOUseBBInfo
&Info
= getBBInfo(InstrBB
);
1210 // If we reach here, we know that we have some nonzero count
1211 // values in this function. The entry count should not be 0.
1212 // Fix it if necessary.
1213 if (InstrBB
== FuncEntry
&& CountValue
== 0)
1215 Info
.setBBInfoCount(CountValue
);
1217 ProfileCountSize
= CountFromProfile
.size();
1220 // Set the edge count and update the count of unknown edges for BBs.
1221 auto setEdgeCount
= [this](PGOUseEdge
*E
, uint64_t Value
) -> void {
1222 E
->setEdgeCount(Value
);
1223 this->getBBInfo(E
->SrcBB
).UnknownCountOutEdge
--;
1224 this->getBBInfo(E
->DestBB
).UnknownCountInEdge
--;
1227 // Set the profile count the Instrumented edges. There are BBs that not in
1228 // MST but not instrumented. Need to set the edge count value so that we can
1229 // populate the profile counts later.
1230 for (auto &E
: FuncInfo
.MST
.AllEdges
) {
1231 if (E
->Removed
|| E
->InMST
)
1233 const BasicBlock
*SrcBB
= E
->SrcBB
;
1234 PGOUseBBInfo
&SrcInfo
= getBBInfo(SrcBB
);
1236 // If only one out-edge, the edge profile count should be the same as BB
1238 if (SrcInfo
.CountValid
&& SrcInfo
.OutEdges
.size() == 1)
1239 setEdgeCount(E
.get(), SrcInfo
.CountValue
);
1241 const BasicBlock
*DestBB
= E
->DestBB
;
1242 PGOUseBBInfo
&DestInfo
= getBBInfo(DestBB
);
1243 // If only one in-edge, the edge profile count should be the same as BB
1245 if (DestInfo
.CountValid
&& DestInfo
.InEdges
.size() == 1)
1246 setEdgeCount(E
.get(), DestInfo
.CountValue
);
1250 // E's count should have been set from profile. If not, this meenas E skips
1251 // the instrumentation. We set the count to 0.
1252 setEdgeCount(E
.get(), 0);
1257 // Set the count value for the unknown edge. There should be one and only one
1258 // unknown edge in Edges vector.
1259 void PGOUseFunc::setEdgeCount(DirectEdges
&Edges
, uint64_t Value
) {
1260 for (auto &E
: Edges
) {
1263 E
->setEdgeCount(Value
);
1265 getBBInfo(E
->SrcBB
).UnknownCountOutEdge
--;
1266 getBBInfo(E
->DestBB
).UnknownCountInEdge
--;
1269 llvm_unreachable("Cannot find the unknown count edge");
1272 // Emit function metadata indicating PGO profile mismatch.
1273 static void annotateFunctionWithHashMismatch(Function
&F
, LLVMContext
&ctx
) {
1274 const char MetadataName
[] = "instr_prof_hash_mismatch";
1275 SmallVector
<Metadata
*, 2> Names
;
1276 // If this metadata already exists, ignore.
1277 auto *Existing
= F
.getMetadata(LLVMContext::MD_annotation
);
1279 MDTuple
*Tuple
= cast
<MDTuple
>(Existing
);
1280 for (const auto &N
: Tuple
->operands()) {
1281 if (N
.equalsStr(MetadataName
))
1283 Names
.push_back(N
.get());
1288 Names
.push_back(MDB
.createString(MetadataName
));
1289 MDNode
*MD
= MDTuple::get(ctx
, Names
);
1290 F
.setMetadata(LLVMContext::MD_annotation
, MD
);
1293 void PGOUseFunc::handleInstrProfError(Error Err
, uint64_t MismatchedFuncSum
) {
1294 handleAllErrors(std::move(Err
), [&](const InstrProfError
&IPE
) {
1295 auto &Ctx
= M
->getContext();
1296 auto Err
= IPE
.get();
1297 bool SkipWarning
= false;
1298 LLVM_DEBUG(dbgs() << "Error in reading profile for Func "
1299 << FuncInfo
.FuncName
<< ": ");
1300 if (Err
== instrprof_error::unknown_function
) {
1301 IsCS
? NumOfCSPGOMissing
++ : NumOfPGOMissing
++;
1302 SkipWarning
= !PGOWarnMissing
;
1303 LLVM_DEBUG(dbgs() << "unknown function");
1304 } else if (Err
== instrprof_error::hash_mismatch
||
1305 Err
== instrprof_error::malformed
) {
1306 IsCS
? NumOfCSPGOMismatch
++ : NumOfPGOMismatch
++;
1308 NoPGOWarnMismatch
||
1309 (NoPGOWarnMismatchComdatWeak
&&
1310 (F
.hasComdat() || F
.getLinkage() == GlobalValue::WeakAnyLinkage
||
1311 F
.getLinkage() == GlobalValue::AvailableExternallyLinkage
));
1312 LLVM_DEBUG(dbgs() << "hash mismatch (hash= " << FuncInfo
.FunctionHash
1313 << " skip=" << SkipWarning
<< ")");
1314 // Emit function metadata indicating PGO profile mismatch.
1315 annotateFunctionWithHashMismatch(F
, M
->getContext());
1318 LLVM_DEBUG(dbgs() << " IsCS=" << IsCS
<< "\n");
1323 IPE
.message() + std::string(" ") + F
.getName().str() +
1324 std::string(" Hash = ") + std::to_string(FuncInfo
.FunctionHash
) +
1325 std::string(" up to ") + std::to_string(MismatchedFuncSum
) +
1326 std::string(" count discarded");
1329 DiagnosticInfoPGOProfile(M
->getName().data(), Msg
, DS_Warning
));
1333 // Read the profile from ProfileFileName and assign the value to the
1334 // instrumented BB and the edges. This function also updates ProgramMaxCount.
1335 // Return true if the profile are successfully read, and false on errors.
1336 bool PGOUseFunc::readCounters(IndexedInstrProfReader
*PGOReader
, bool &AllZeros
,
1337 InstrProfRecord::CountPseudoKind
&PseudoKind
) {
1338 auto &Ctx
= M
->getContext();
1339 uint64_t MismatchedFuncSum
= 0;
1340 Expected
<InstrProfRecord
> Result
= PGOReader
->getInstrProfRecord(
1341 FuncInfo
.FuncName
, FuncInfo
.FunctionHash
, FuncInfo
.DeprecatedFuncName
,
1342 &MismatchedFuncSum
);
1343 if (Error E
= Result
.takeError()) {
1344 handleInstrProfError(std::move(E
), MismatchedFuncSum
);
1347 ProfileRecord
= std::move(Result
.get());
1348 PseudoKind
= ProfileRecord
.getCountPseudoKind();
1349 if (PseudoKind
!= InstrProfRecord::NotPseudo
) {
1352 std::vector
<uint64_t> &CountFromProfile
= ProfileRecord
.Counts
;
1354 IsCS
? NumOfCSPGOFunc
++ : NumOfPGOFunc
++;
1355 LLVM_DEBUG(dbgs() << CountFromProfile
.size() << " counts\n");
1357 uint64_t ValueSum
= 0;
1358 for (unsigned I
= 0, S
= CountFromProfile
.size(); I
< S
; I
++) {
1359 LLVM_DEBUG(dbgs() << " " << I
<< ": " << CountFromProfile
[I
] << "\n");
1360 ValueSum
+= CountFromProfile
[I
];
1362 AllZeros
= (ValueSum
== 0);
1364 LLVM_DEBUG(dbgs() << "SUM = " << ValueSum
<< "\n");
1366 getBBInfo(nullptr).UnknownCountOutEdge
= 2;
1367 getBBInfo(nullptr).UnknownCountInEdge
= 2;
1369 if (!setInstrumentedCounts(CountFromProfile
)) {
1371 dbgs() << "Inconsistent number of counts, skipping this function");
1372 Ctx
.diagnose(DiagnosticInfoPGOProfile(
1373 M
->getName().data(),
1374 Twine("Inconsistent number of counts in ") + F
.getName().str() +
1375 Twine(": the profile may be stale or there is a function name "
1380 ProgramMaxCount
= PGOReader
->getMaximumFunctionCount(IsCS
);
1384 void PGOUseFunc::populateCoverage(IndexedInstrProfReader
*PGOReader
) {
1385 uint64_t MismatchedFuncSum
= 0;
1386 Expected
<InstrProfRecord
> Result
= PGOReader
->getInstrProfRecord(
1387 FuncInfo
.FuncName
, FuncInfo
.FunctionHash
, FuncInfo
.DeprecatedFuncName
,
1388 &MismatchedFuncSum
);
1389 if (auto Err
= Result
.takeError()) {
1390 handleInstrProfError(std::move(Err
), MismatchedFuncSum
);
1394 std::vector
<uint64_t> &CountsFromProfile
= Result
.get().Counts
;
1395 DenseMap
<const BasicBlock
*, bool> Coverage
;
1398 if (FuncInfo
.BCI
->shouldInstrumentBlock(BB
))
1399 Coverage
[&BB
] = (CountsFromProfile
[Index
++] != 0);
1400 assert(Index
== CountsFromProfile
.size());
1402 // For each B in InverseDependencies[A], if A is covered then B is covered.
1403 DenseMap
<const BasicBlock
*, DenseSet
<const BasicBlock
*>>
1404 InverseDependencies
;
1405 for (auto &BB
: F
) {
1406 for (auto *Dep
: FuncInfo
.BCI
->getDependencies(BB
)) {
1407 // If Dep is covered then BB is covered.
1408 InverseDependencies
[Dep
].insert(&BB
);
1412 // Infer coverage of the non-instrumented blocks using a flood-fill algorithm.
1413 std::stack
<const BasicBlock
*> CoveredBlocksToProcess
;
1414 for (auto &[BB
, IsCovered
] : Coverage
)
1416 CoveredBlocksToProcess
.push(BB
);
1418 while (!CoveredBlocksToProcess
.empty()) {
1419 auto *CoveredBlock
= CoveredBlocksToProcess
.top();
1420 assert(Coverage
[CoveredBlock
]);
1421 CoveredBlocksToProcess
.pop();
1422 for (auto *BB
: InverseDependencies
[CoveredBlock
]) {
1423 // If CoveredBlock is covered then BB is covered.
1426 Coverage
[BB
] = true;
1427 CoveredBlocksToProcess
.push(BB
);
1431 // Annotate block coverage.
1432 MDBuilder
MDB(F
.getContext());
1433 // We set the entry count to 10000 if the entry block is covered so that BFI
1434 // can propagate a fraction of this count to the other covered blocks.
1435 F
.setEntryCount(Coverage
[&F
.getEntryBlock()] ? 10000 : 0);
1436 for (auto &BB
: F
) {
1437 // For a block A and its successor B, we set the edge weight as follows:
1438 // If A is covered and B is covered, set weight=1.
1439 // If A is covered and B is uncovered, set weight=0.
1440 // If A is uncovered, set weight=1.
1441 // This setup will allow BFI to give nonzero profile counts to only covered
1443 SmallVector
<unsigned, 4> Weights
;
1444 for (auto *Succ
: successors(&BB
))
1445 Weights
.push_back((Coverage
[Succ
] || !Coverage
[&BB
]) ? 1 : 0);
1446 if (Weights
.size() >= 2)
1447 BB
.getTerminator()->setMetadata(LLVMContext::MD_prof
,
1448 MDB
.createBranchWeights(Weights
));
1451 unsigned NumCorruptCoverage
= 0;
1452 DominatorTree
DT(F
);
1454 BranchProbabilityInfo
BPI(F
, LI
);
1455 BlockFrequencyInfo
BFI(F
, BPI
, LI
);
1456 auto IsBlockDead
= [&](const BasicBlock
&BB
) -> std::optional
<bool> {
1457 if (auto C
= BFI
.getBlockProfileCount(&BB
))
1461 LLVM_DEBUG(dbgs() << "Block Coverage: (Instrumented=*, Covered=X)\n");
1462 for (auto &BB
: F
) {
1463 LLVM_DEBUG(dbgs() << (FuncInfo
.BCI
->shouldInstrumentBlock(BB
) ? "* " : " ")
1464 << (Coverage
[&BB
] ? "X " : " ") << " " << BB
.getName()
1466 // In some cases it is possible to find a covered block that has no covered
1467 // successors, e.g., when a block calls a function that may call exit(). In
1468 // those cases, BFI could find its successor to be covered while BCI could
1469 // find its successor to be dead.
1470 if (Coverage
[&BB
] == IsBlockDead(BB
).value_or(false)) {
1472 dbgs() << "Found inconsistent block covearge for " << BB
.getName()
1473 << ": BCI=" << (Coverage
[&BB
] ? "Covered" : "Dead") << " BFI="
1474 << (IsBlockDead(BB
).value() ? "Dead" : "Covered") << "\n");
1475 ++NumCorruptCoverage
;
1480 if (PGOVerifyBFI
&& NumCorruptCoverage
) {
1481 auto &Ctx
= M
->getContext();
1482 Ctx
.diagnose(DiagnosticInfoPGOProfile(
1483 M
->getName().data(),
1484 Twine("Found inconsistent block coverage for function ") + F
.getName() +
1485 " in " + Twine(NumCorruptCoverage
) + " blocks.",
1488 if (PGOViewBlockCoverageGraph
)
1489 FuncInfo
.BCI
->viewBlockCoverageGraph(&Coverage
);
1492 // Populate the counters from instrumented BBs to all BBs.
1493 // In the end of this operation, all BBs should have a valid count value.
1494 void PGOUseFunc::populateCounters() {
1495 bool Changes
= true;
1496 unsigned NumPasses
= 0;
1501 // For efficient traversal, it's better to start from the end as most
1502 // of the instrumented edges are at the end.
1503 for (auto &BB
: reverse(F
)) {
1504 PGOUseBBInfo
*Count
= findBBInfo(&BB
);
1505 if (Count
== nullptr)
1507 if (!Count
->CountValid
) {
1508 if (Count
->UnknownCountOutEdge
== 0) {
1509 Count
->CountValue
= sumEdgeCount(Count
->OutEdges
);
1510 Count
->CountValid
= true;
1512 } else if (Count
->UnknownCountInEdge
== 0) {
1513 Count
->CountValue
= sumEdgeCount(Count
->InEdges
);
1514 Count
->CountValid
= true;
1518 if (Count
->CountValid
) {
1519 if (Count
->UnknownCountOutEdge
== 1) {
1521 uint64_t OutSum
= sumEdgeCount(Count
->OutEdges
);
1522 // If the one of the successor block can early terminate (no-return),
1523 // we can end up with situation where out edge sum count is larger as
1524 // the source BB's count is collected by a post-dominated block.
1525 if (Count
->CountValue
> OutSum
)
1526 Total
= Count
->CountValue
- OutSum
;
1527 setEdgeCount(Count
->OutEdges
, Total
);
1530 if (Count
->UnknownCountInEdge
== 1) {
1532 uint64_t InSum
= sumEdgeCount(Count
->InEdges
);
1533 if (Count
->CountValue
> InSum
)
1534 Total
= Count
->CountValue
- InSum
;
1535 setEdgeCount(Count
->InEdges
, Total
);
1542 LLVM_DEBUG(dbgs() << "Populate counts in " << NumPasses
<< " passes.\n");
1545 // Assert every BB has a valid counter.
1546 for (auto &BB
: F
) {
1547 auto BI
= findBBInfo(&BB
);
1550 assert(BI
->CountValid
&& "BB count is not valid");
1553 uint64_t FuncEntryCount
= getBBInfo(&*F
.begin()).CountValue
;
1554 uint64_t FuncMaxCount
= FuncEntryCount
;
1555 for (auto &BB
: F
) {
1556 auto BI
= findBBInfo(&BB
);
1559 FuncMaxCount
= std::max(FuncMaxCount
, BI
->CountValue
);
1562 // Fix the obviously inconsistent entry count.
1563 if (FuncMaxCount
> 0 && FuncEntryCount
== 0)
1565 F
.setEntryCount(ProfileCount(FuncEntryCount
, Function::PCT_Real
));
1566 markFunctionAttributes(FuncEntryCount
, FuncMaxCount
);
1568 // Now annotate select instructions
1569 FuncInfo
.SIVisitor
.annotateSelects(this, &CountPosition
);
1570 assert(CountPosition
== ProfileCountSize
);
1572 LLVM_DEBUG(FuncInfo
.dumpInfo("after reading profile."));
1575 // Assign the scaled count values to the BB with multiple out edges.
1576 void PGOUseFunc::setBranchWeights() {
1577 // Generate MD_prof metadata for every branch instruction.
1578 LLVM_DEBUG(dbgs() << "\nSetting branch weights for func " << F
.getName()
1579 << " IsCS=" << IsCS
<< "\n");
1580 for (auto &BB
: F
) {
1581 Instruction
*TI
= BB
.getTerminator();
1582 if (TI
->getNumSuccessors() < 2)
1584 if (!(isa
<BranchInst
>(TI
) || isa
<SwitchInst
>(TI
) ||
1585 isa
<IndirectBrInst
>(TI
) || isa
<InvokeInst
>(TI
) ||
1586 isa
<CallBrInst
>(TI
)))
1589 if (getBBInfo(&BB
).CountValue
== 0)
1592 // We have a non-zero Branch BB.
1593 const PGOUseBBInfo
&BBCountInfo
= getBBInfo(&BB
);
1594 unsigned Size
= BBCountInfo
.OutEdges
.size();
1595 SmallVector
<uint64_t, 2> EdgeCounts(Size
, 0);
1596 uint64_t MaxCount
= 0;
1597 for (unsigned s
= 0; s
< Size
; s
++) {
1598 const PGOUseEdge
*E
= BBCountInfo
.OutEdges
[s
];
1599 const BasicBlock
*SrcBB
= E
->SrcBB
;
1600 const BasicBlock
*DestBB
= E
->DestBB
;
1601 if (DestBB
== nullptr)
1603 unsigned SuccNum
= GetSuccessorNumber(SrcBB
, DestBB
);
1604 uint64_t EdgeCount
= E
->CountValue
;
1605 if (EdgeCount
> MaxCount
)
1606 MaxCount
= EdgeCount
;
1607 EdgeCounts
[SuccNum
] = EdgeCount
;
1611 setProfMetadata(M
, TI
, EdgeCounts
, MaxCount
);
1613 // A zero MaxCount can come about when we have a BB with a positive
1614 // count, and whose successor blocks all have 0 count. This can happen
1615 // when there is no exit block and the code exits via a noreturn function.
1616 auto &Ctx
= M
->getContext();
1617 Ctx
.diagnose(DiagnosticInfoPGOProfile(
1618 M
->getName().data(),
1619 Twine("Profile in ") + F
.getName().str() +
1620 Twine(" partially ignored") +
1621 Twine(", possibly due to the lack of a return path."),
1627 static bool isIndirectBrTarget(BasicBlock
*BB
) {
1628 for (BasicBlock
*Pred
: predecessors(BB
)) {
1629 if (isa
<IndirectBrInst
>(Pred
->getTerminator()))
1635 void PGOUseFunc::annotateIrrLoopHeaderWeights() {
1636 LLVM_DEBUG(dbgs() << "\nAnnotating irreducible loop header weights.\n");
1637 // Find irr loop headers
1638 for (auto &BB
: F
) {
1639 // As a heuristic also annotate indrectbr targets as they have a high chance
1640 // to become an irreducible loop header after the indirectbr tail
1642 if (BFI
->isIrrLoopHeader(&BB
) || isIndirectBrTarget(&BB
)) {
1643 Instruction
*TI
= BB
.getTerminator();
1644 const PGOUseBBInfo
&BBCountInfo
= getBBInfo(&BB
);
1645 setIrrLoopHeaderMetadata(M
, TI
, BBCountInfo
.CountValue
);
1650 void SelectInstVisitor::instrumentOneSelectInst(SelectInst
&SI
) {
1651 Module
*M
= F
.getParent();
1652 IRBuilder
<> Builder(&SI
);
1653 Type
*Int64Ty
= Builder
.getInt64Ty();
1654 Type
*I8PtrTy
= Builder
.getInt8PtrTy();
1655 auto *Step
= Builder
.CreateZExt(SI
.getCondition(), Int64Ty
);
1657 Intrinsic::getDeclaration(M
, Intrinsic::instrprof_increment_step
),
1658 {ConstantExpr::getBitCast(FuncNameVar
, I8PtrTy
),
1659 Builder
.getInt64(FuncHash
), Builder
.getInt32(TotalNumCtrs
),
1660 Builder
.getInt32(*CurCtrIdx
), Step
});
1664 void SelectInstVisitor::annotateOneSelectInst(SelectInst
&SI
) {
1665 std::vector
<uint64_t> &CountFromProfile
= UseFunc
->getProfileRecord().Counts
;
1666 assert(*CurCtrIdx
< CountFromProfile
.size() &&
1667 "Out of bound access of counters");
1668 uint64_t SCounts
[2];
1669 SCounts
[0] = CountFromProfile
[*CurCtrIdx
]; // True count
1671 uint64_t TotalCount
= 0;
1672 auto BI
= UseFunc
->findBBInfo(SI
.getParent());
1674 TotalCount
= BI
->CountValue
;
1676 SCounts
[1] = (TotalCount
> SCounts
[0] ? TotalCount
- SCounts
[0] : 0);
1677 uint64_t MaxCount
= std::max(SCounts
[0], SCounts
[1]);
1679 setProfMetadata(F
.getParent(), &SI
, SCounts
, MaxCount
);
1682 void SelectInstVisitor::visitSelectInst(SelectInst
&SI
) {
1683 if (!PGOInstrSelect
|| PGOFunctionEntryCoverage
|| HasSingleByteCoverage
)
1685 // FIXME: do not handle this yet.
1686 if (SI
.getCondition()->getType()->isVectorTy())
1694 instrumentOneSelectInst(SI
);
1697 annotateOneSelectInst(SI
);
1701 llvm_unreachable("Unknown visiting mode");
1704 // Traverse all valuesites and annotate the instructions for all value kind.
1705 void PGOUseFunc::annotateValueSites() {
1706 if (DisableValueProfiling
)
1709 // Create the PGOFuncName meta data.
1710 createPGOFuncNameMetadata(F
, FuncInfo
.FuncName
);
1712 for (uint32_t Kind
= IPVK_First
; Kind
<= IPVK_Last
; ++Kind
)
1713 annotateValueSites(Kind
);
1716 // Annotate the instructions for a specific value kind.
1717 void PGOUseFunc::annotateValueSites(uint32_t Kind
) {
1718 assert(Kind
<= IPVK_Last
);
1719 unsigned ValueSiteIndex
= 0;
1720 auto &ValueSites
= FuncInfo
.ValueSites
[Kind
];
1721 unsigned NumValueSites
= ProfileRecord
.getNumValueSites(Kind
);
1722 if (NumValueSites
!= ValueSites
.size()) {
1723 auto &Ctx
= M
->getContext();
1724 Ctx
.diagnose(DiagnosticInfoPGOProfile(
1725 M
->getName().data(),
1726 Twine("Inconsistent number of value sites for ") +
1727 Twine(ValueProfKindDescr
[Kind
]) + Twine(" profiling in \"") +
1729 Twine("\", possibly due to the use of a stale profile."),
1734 for (VPCandidateInfo
&I
: ValueSites
) {
1735 LLVM_DEBUG(dbgs() << "Read one value site profile (kind = " << Kind
1736 << "): Index = " << ValueSiteIndex
<< " out of "
1737 << NumValueSites
<< "\n");
1738 annotateValueSite(*M
, *I
.AnnotatedInst
, ProfileRecord
,
1739 static_cast<InstrProfValueKind
>(Kind
), ValueSiteIndex
,
1740 Kind
== IPVK_MemOPSize
? MaxNumMemOPAnnotations
1741 : MaxNumAnnotations
);
1746 // Collect the set of members for each Comdat in module M and store
1747 // in ComdatMembers.
1748 static void collectComdatMembers(
1750 std::unordered_multimap
<Comdat
*, GlobalValue
*> &ComdatMembers
) {
1751 if (!DoComdatRenaming
)
1753 for (Function
&F
: M
)
1754 if (Comdat
*C
= F
.getComdat())
1755 ComdatMembers
.insert(std::make_pair(C
, &F
));
1756 for (GlobalVariable
&GV
: M
.globals())
1757 if (Comdat
*C
= GV
.getComdat())
1758 ComdatMembers
.insert(std::make_pair(C
, &GV
));
1759 for (GlobalAlias
&GA
: M
.aliases())
1760 if (Comdat
*C
= GA
.getComdat())
1761 ComdatMembers
.insert(std::make_pair(C
, &GA
));
1764 // Don't perform PGO instrumeatnion / profile-use.
1765 static bool skipPGO(const Function
&F
) {
1766 if (F
.isDeclaration())
1768 if (F
.hasFnAttribute(llvm::Attribute::NoProfile
))
1770 if (F
.hasFnAttribute(llvm::Attribute::SkipProfile
))
1772 if (F
.getInstructionCount() < PGOFunctionSizeThreshold
)
1775 // If there are too many critical edges, PGO might cause
1776 // compiler time problem. Skip PGO if the number of
1777 // critical edges execeed the threshold.
1778 unsigned NumCriticalEdges
= 0;
1779 for (auto &BB
: F
) {
1780 const Instruction
*TI
= BB
.getTerminator();
1781 for (unsigned I
= 0, E
= TI
->getNumSuccessors(); I
!= E
; ++I
) {
1782 if (isCriticalEdge(TI
, I
))
1786 if (NumCriticalEdges
> PGOFunctionCriticalEdgeThreshold
) {
1787 LLVM_DEBUG(dbgs() << "In func " << F
.getName()
1788 << ", NumCriticalEdges=" << NumCriticalEdges
1789 << " exceed the threshold. Skip PGO.\n");
1796 static bool InstrumentAllFunctions(
1797 Module
&M
, function_ref
<TargetLibraryInfo
&(Function
&)> LookupTLI
,
1798 function_ref
<BranchProbabilityInfo
*(Function
&)> LookupBPI
,
1799 function_ref
<BlockFrequencyInfo
*(Function
&)> LookupBFI
, bool IsCS
) {
1800 // For the context-sensitve instrumentation, we should have a separated pass
1801 // (before LTO/ThinLTO linking) to create these variables.
1803 createIRLevelProfileFlagVar(M
, /*IsCS=*/false);
1804 std::unordered_multimap
<Comdat
*, GlobalValue
*> ComdatMembers
;
1805 collectComdatMembers(M
, ComdatMembers
);
1810 auto &TLI
= LookupTLI(F
);
1811 auto *BPI
= LookupBPI(F
);
1812 auto *BFI
= LookupBFI(F
);
1813 instrumentOneFunc(F
, &M
, TLI
, BPI
, BFI
, ComdatMembers
, IsCS
);
1819 PGOInstrumentationGenCreateVar::run(Module
&M
, ModuleAnalysisManager
&MAM
) {
1820 createProfileFileNameVar(M
, CSInstrName
);
1821 // The variable in a comdat may be discarded by LTO. Ensure the declaration
1822 // will be retained.
1823 appendToCompilerUsed(M
, createIRLevelProfileFlagVar(M
, /*IsCS=*/true));
1824 PreservedAnalyses PA
;
1825 PA
.preserve
<FunctionAnalysisManagerModuleProxy
>();
1826 PA
.preserveSet
<AllAnalysesOn
<Function
>>();
1830 PreservedAnalyses
PGOInstrumentationGen::run(Module
&M
,
1831 ModuleAnalysisManager
&MAM
) {
1832 auto &FAM
= MAM
.getResult
<FunctionAnalysisManagerModuleProxy
>(M
).getManager();
1833 auto LookupTLI
= [&FAM
](Function
&F
) -> TargetLibraryInfo
& {
1834 return FAM
.getResult
<TargetLibraryAnalysis
>(F
);
1836 auto LookupBPI
= [&FAM
](Function
&F
) {
1837 return &FAM
.getResult
<BranchProbabilityAnalysis
>(F
);
1839 auto LookupBFI
= [&FAM
](Function
&F
) {
1840 return &FAM
.getResult
<BlockFrequencyAnalysis
>(F
);
1843 if (!InstrumentAllFunctions(M
, LookupTLI
, LookupBPI
, LookupBFI
, IsCS
))
1844 return PreservedAnalyses::all();
1846 return PreservedAnalyses::none();
1849 // Using the ratio b/w sums of profile count values and BFI count values to
1850 // adjust the func entry count.
1851 static void fixFuncEntryCount(PGOUseFunc
&Func
, LoopInfo
&LI
,
1852 BranchProbabilityInfo
&NBPI
) {
1853 Function
&F
= Func
.getFunc();
1854 BlockFrequencyInfo
NBFI(F
, NBPI
, LI
);
1856 auto BFIEntryCount
= F
.getEntryCount();
1857 assert(BFIEntryCount
&& (BFIEntryCount
->getCount() > 0) &&
1858 "Invalid BFI Entrycount");
1860 auto SumCount
= APFloat::getZero(APFloat::IEEEdouble());
1861 auto SumBFICount
= APFloat::getZero(APFloat::IEEEdouble());
1862 for (auto &BBI
: F
) {
1863 uint64_t CountValue
= 0;
1864 uint64_t BFICountValue
= 0;
1865 if (!Func
.findBBInfo(&BBI
))
1867 auto BFICount
= NBFI
.getBlockProfileCount(&BBI
);
1868 CountValue
= Func
.getBBInfo(&BBI
).CountValue
;
1869 BFICountValue
= *BFICount
;
1870 SumCount
.add(APFloat(CountValue
* 1.0), APFloat::rmNearestTiesToEven
);
1871 SumBFICount
.add(APFloat(BFICountValue
* 1.0), APFloat::rmNearestTiesToEven
);
1873 if (SumCount
.isZero())
1876 assert(SumBFICount
.compare(APFloat(0.0)) == APFloat::cmpGreaterThan
&&
1877 "Incorrect sum of BFI counts");
1878 if (SumBFICount
.compare(SumCount
) == APFloat::cmpEqual
)
1880 double Scale
= (SumCount
/ SumBFICount
).convertToDouble();
1881 if (Scale
< 1.001 && Scale
> 0.999)
1884 uint64_t FuncEntryCount
= Func
.getBBInfo(&*F
.begin()).CountValue
;
1885 uint64_t NewEntryCount
= 0.5 + FuncEntryCount
* Scale
;
1886 if (NewEntryCount
== 0)
1888 if (NewEntryCount
!= FuncEntryCount
) {
1889 F
.setEntryCount(ProfileCount(NewEntryCount
, Function::PCT_Real
));
1890 LLVM_DEBUG(dbgs() << "FixFuncEntryCount: in " << F
.getName()
1891 << ", entry_count " << FuncEntryCount
<< " --> "
1892 << NewEntryCount
<< "\n");
1896 // Compare the profile count values with BFI count values, and print out
1897 // the non-matching ones.
1898 static void verifyFuncBFI(PGOUseFunc
&Func
, LoopInfo
&LI
,
1899 BranchProbabilityInfo
&NBPI
,
1900 uint64_t HotCountThreshold
,
1901 uint64_t ColdCountThreshold
) {
1902 Function
&F
= Func
.getFunc();
1903 BlockFrequencyInfo
NBFI(F
, NBPI
, LI
);
1904 // bool PrintFunc = false;
1905 bool HotBBOnly
= PGOVerifyHotBFI
;
1907 OptimizationRemarkEmitter
ORE(&F
);
1909 unsigned BBNum
= 0, BBMisMatchNum
= 0, NonZeroBBNum
= 0;
1910 for (auto &BBI
: F
) {
1911 uint64_t CountValue
= 0;
1912 uint64_t BFICountValue
= 0;
1914 if (Func
.getBBInfo(&BBI
).CountValid
)
1915 CountValue
= Func
.getBBInfo(&BBI
).CountValue
;
1920 auto BFICount
= NBFI
.getBlockProfileCount(&BBI
);
1922 BFICountValue
= *BFICount
;
1925 bool rawIsHot
= CountValue
>= HotCountThreshold
;
1926 bool BFIIsHot
= BFICountValue
>= HotCountThreshold
;
1927 bool rawIsCold
= CountValue
<= ColdCountThreshold
;
1928 bool ShowCount
= false;
1929 if (rawIsHot
&& !BFIIsHot
) {
1930 Msg
= "raw-Hot to BFI-nonHot";
1932 } else if (rawIsCold
&& BFIIsHot
) {
1933 Msg
= "raw-Cold to BFI-Hot";
1939 if ((CountValue
< PGOVerifyBFICutoff
) &&
1940 (BFICountValue
< PGOVerifyBFICutoff
))
1942 uint64_t Diff
= (BFICountValue
>= CountValue
)
1943 ? BFICountValue
- CountValue
1944 : CountValue
- BFICountValue
;
1945 if (Diff
<= CountValue
/ 100 * PGOVerifyBFIRatio
)
1951 OptimizationRemarkAnalysis
Remark(DEBUG_TYPE
, "bfi-verify",
1952 F
.getSubprogram(), &BBI
);
1953 Remark
<< "BB " << ore::NV("Block", BBI
.getName())
1954 << " Count=" << ore::NV("Count", CountValue
)
1955 << " BFI_Count=" << ore::NV("Count", BFICountValue
);
1957 Remark
<< " (" << Msg
<< ")";
1963 return OptimizationRemarkAnalysis(DEBUG_TYPE
, "bfi-verify",
1964 F
.getSubprogram(), &F
.getEntryBlock())
1965 << "In Func " << ore::NV("Function", F
.getName())
1966 << ": Num_of_BB=" << ore::NV("Count", BBNum
)
1967 << ", Num_of_non_zerovalue_BB=" << ore::NV("Count", NonZeroBBNum
)
1968 << ", Num_of_mis_matching_BB=" << ore::NV("Count", BBMisMatchNum
);
1972 static bool annotateAllFunctions(
1973 Module
&M
, StringRef ProfileFileName
, StringRef ProfileRemappingFileName
,
1974 vfs::FileSystem
&FS
,
1975 function_ref
<TargetLibraryInfo
&(Function
&)> LookupTLI
,
1976 function_ref
<BranchProbabilityInfo
*(Function
&)> LookupBPI
,
1977 function_ref
<BlockFrequencyInfo
*(Function
&)> LookupBFI
,
1978 ProfileSummaryInfo
*PSI
, bool IsCS
) {
1979 LLVM_DEBUG(dbgs() << "Read in profile counters: ");
1980 auto &Ctx
= M
.getContext();
1981 // Read the counter array from file.
1982 auto ReaderOrErr
= IndexedInstrProfReader::create(ProfileFileName
, FS
,
1983 ProfileRemappingFileName
);
1984 if (Error E
= ReaderOrErr
.takeError()) {
1985 handleAllErrors(std::move(E
), [&](const ErrorInfoBase
&EI
) {
1987 DiagnosticInfoPGOProfile(ProfileFileName
.data(), EI
.message()));
1992 std::unique_ptr
<IndexedInstrProfReader
> PGOReader
=
1993 std::move(ReaderOrErr
.get());
1995 Ctx
.diagnose(DiagnosticInfoPGOProfile(ProfileFileName
.data(),
1996 StringRef("Cannot get PGOReader")));
1999 if (!PGOReader
->hasCSIRLevelProfile() && IsCS
)
2002 // TODO: might need to change the warning once the clang option is finalized.
2003 if (!PGOReader
->isIRLevelProfile()) {
2004 Ctx
.diagnose(DiagnosticInfoPGOProfile(
2005 ProfileFileName
.data(), "Not an IR level instrumentation profile"));
2008 if (PGOReader
->functionEntryOnly()) {
2009 Ctx
.diagnose(DiagnosticInfoPGOProfile(
2010 ProfileFileName
.data(),
2011 "Function entry profiles are not yet supported for optimization"));
2015 // Add the profile summary (read from the header of the indexed summary) here
2016 // so that we can use it below when reading counters (which checks if the
2017 // function should be marked with a cold or inlinehint attribute).
2018 M
.setProfileSummary(PGOReader
->getSummary(IsCS
).getMD(M
.getContext()),
2019 IsCS
? ProfileSummary::PSK_CSInstr
2020 : ProfileSummary::PSK_Instr
);
2023 std::unordered_multimap
<Comdat
*, GlobalValue
*> ComdatMembers
;
2024 collectComdatMembers(M
, ComdatMembers
);
2025 std::vector
<Function
*> HotFunctions
;
2026 std::vector
<Function
*> ColdFunctions
;
2028 // If the profile marked as always instrument the entry BB, do the
2029 // same. Note this can be overwritten by the internal option in CFGMST.h
2030 bool InstrumentFuncEntry
= PGOReader
->instrEntryBBEnabled();
2031 if (PGOInstrumentEntry
.getNumOccurrences() > 0)
2032 InstrumentFuncEntry
= PGOInstrumentEntry
;
2033 bool HasSingleByteCoverage
= PGOReader
->hasSingleByteCoverage();
2037 auto &TLI
= LookupTLI(F
);
2038 auto *BPI
= LookupBPI(F
);
2039 auto *BFI
= LookupBFI(F
);
2040 if (!HasSingleByteCoverage
) {
2041 // Split indirectbr critical edges here before computing the MST rather
2042 // than later in getInstrBB() to avoid invalidating it.
2043 SplitIndirectBrCriticalEdges(F
, /*IgnoreBlocksWithoutPHI=*/false, BPI
,
2046 PGOUseFunc
Func(F
, &M
, TLI
, ComdatMembers
, BPI
, BFI
, PSI
, IsCS
,
2047 InstrumentFuncEntry
, HasSingleByteCoverage
);
2048 if (HasSingleByteCoverage
) {
2049 Func
.populateCoverage(PGOReader
.get());
2052 // When PseudoKind is set to a vaule other than InstrProfRecord::NotPseudo,
2053 // it means the profile for the function is unrepresentative and this
2054 // function is actually hot / warm. We will reset the function hot / cold
2055 // attribute and drop all the profile counters.
2056 InstrProfRecord::CountPseudoKind PseudoKind
= InstrProfRecord::NotPseudo
;
2057 bool AllZeros
= false;
2058 if (!Func
.readCounters(PGOReader
.get(), AllZeros
, PseudoKind
))
2061 F
.setEntryCount(ProfileCount(0, Function::PCT_Real
));
2062 if (Func
.getProgramMaxCount() != 0)
2063 ColdFunctions
.push_back(&F
);
2066 if (PseudoKind
!= InstrProfRecord::NotPseudo
) {
2067 // Clear function attribute cold.
2068 if (F
.hasFnAttribute(Attribute::Cold
))
2069 F
.removeFnAttr(Attribute::Cold
);
2070 // Set function attribute as hot.
2071 if (PseudoKind
== InstrProfRecord::PseudoHot
)
2072 F
.addFnAttr(Attribute::Hot
);
2075 Func
.populateCounters();
2076 Func
.setBranchWeights();
2077 Func
.annotateValueSites();
2078 Func
.annotateIrrLoopHeaderWeights();
2079 PGOUseFunc::FuncFreqAttr FreqAttr
= Func
.getFuncFreqAttr();
2080 if (FreqAttr
== PGOUseFunc::FFA_Cold
)
2081 ColdFunctions
.push_back(&F
);
2082 else if (FreqAttr
== PGOUseFunc::FFA_Hot
)
2083 HotFunctions
.push_back(&F
);
2084 if (PGOViewCounts
!= PGOVCT_None
&&
2085 (ViewBlockFreqFuncName
.empty() ||
2086 F
.getName().equals(ViewBlockFreqFuncName
))) {
2087 LoopInfo LI
{DominatorTree(F
)};
2088 std::unique_ptr
<BranchProbabilityInfo
> NewBPI
=
2089 std::make_unique
<BranchProbabilityInfo
>(F
, LI
);
2090 std::unique_ptr
<BlockFrequencyInfo
> NewBFI
=
2091 std::make_unique
<BlockFrequencyInfo
>(F
, *NewBPI
, LI
);
2092 if (PGOViewCounts
== PGOVCT_Graph
)
2094 else if (PGOViewCounts
== PGOVCT_Text
) {
2095 dbgs() << "pgo-view-counts: " << Func
.getFunc().getName() << "\n";
2096 NewBFI
->print(dbgs());
2099 if (PGOViewRawCounts
!= PGOVCT_None
&&
2100 (ViewBlockFreqFuncName
.empty() ||
2101 F
.getName().equals(ViewBlockFreqFuncName
))) {
2102 if (PGOViewRawCounts
== PGOVCT_Graph
)
2103 if (ViewBlockFreqFuncName
.empty())
2104 WriteGraph(&Func
, Twine("PGORawCounts_") + Func
.getFunc().getName());
2106 ViewGraph(&Func
, Twine("PGORawCounts_") + Func
.getFunc().getName());
2107 else if (PGOViewRawCounts
== PGOVCT_Text
) {
2108 dbgs() << "pgo-view-raw-counts: " << Func
.getFunc().getName() << "\n";
2113 if (PGOVerifyBFI
|| PGOVerifyHotBFI
|| PGOFixEntryCount
) {
2114 LoopInfo LI
{DominatorTree(F
)};
2115 BranchProbabilityInfo
NBPI(F
, LI
);
2117 // Fix func entry count.
2118 if (PGOFixEntryCount
)
2119 fixFuncEntryCount(Func
, LI
, NBPI
);
2121 // Verify BlockFrequency information.
2122 uint64_t HotCountThreshold
= 0, ColdCountThreshold
= 0;
2123 if (PGOVerifyHotBFI
) {
2124 HotCountThreshold
= PSI
->getOrCompHotCountThreshold();
2125 ColdCountThreshold
= PSI
->getOrCompColdCountThreshold();
2127 verifyFuncBFI(Func
, LI
, NBPI
, HotCountThreshold
, ColdCountThreshold
);
2131 // Set function hotness attribute from the profile.
2132 // We have to apply these attributes at the end because their presence
2133 // can affect the BranchProbabilityInfo of any callers, resulting in an
2134 // inconsistent MST between prof-gen and prof-use.
2135 for (auto &F
: HotFunctions
) {
2136 F
->addFnAttr(Attribute::InlineHint
);
2137 LLVM_DEBUG(dbgs() << "Set inline attribute to function: " << F
->getName()
2140 for (auto &F
: ColdFunctions
) {
2141 // Only set when there is no Attribute::Hot set by the user. For Hot
2142 // attribute, user's annotation has the precedence over the profile.
2143 if (F
->hasFnAttribute(Attribute::Hot
)) {
2144 auto &Ctx
= M
.getContext();
2145 std::string Msg
= std::string("Function ") + F
->getName().str() +
2146 std::string(" is annotated as a hot function but"
2147 " the profile is cold");
2149 DiagnosticInfoPGOProfile(M
.getName().data(), Msg
, DS_Warning
));
2152 F
->addFnAttr(Attribute::Cold
);
2153 LLVM_DEBUG(dbgs() << "Set cold attribute to function: " << F
->getName()
2159 PGOInstrumentationUse::PGOInstrumentationUse(
2160 std::string Filename
, std::string RemappingFilename
, bool IsCS
,
2161 IntrusiveRefCntPtr
<vfs::FileSystem
> VFS
)
2162 : ProfileFileName(std::move(Filename
)),
2163 ProfileRemappingFileName(std::move(RemappingFilename
)), IsCS(IsCS
),
2164 FS(std::move(VFS
)) {
2165 if (!PGOTestProfileFile
.empty())
2166 ProfileFileName
= PGOTestProfileFile
;
2167 if (!PGOTestProfileRemappingFile
.empty())
2168 ProfileRemappingFileName
= PGOTestProfileRemappingFile
;
2170 FS
= vfs::getRealFileSystem();
2173 PreservedAnalyses
PGOInstrumentationUse::run(Module
&M
,
2174 ModuleAnalysisManager
&MAM
) {
2176 auto &FAM
= MAM
.getResult
<FunctionAnalysisManagerModuleProxy
>(M
).getManager();
2177 auto LookupTLI
= [&FAM
](Function
&F
) -> TargetLibraryInfo
& {
2178 return FAM
.getResult
<TargetLibraryAnalysis
>(F
);
2180 auto LookupBPI
= [&FAM
](Function
&F
) {
2181 return &FAM
.getResult
<BranchProbabilityAnalysis
>(F
);
2183 auto LookupBFI
= [&FAM
](Function
&F
) {
2184 return &FAM
.getResult
<BlockFrequencyAnalysis
>(F
);
2187 auto *PSI
= &MAM
.getResult
<ProfileSummaryAnalysis
>(M
);
2189 if (!annotateAllFunctions(M
, ProfileFileName
, ProfileRemappingFileName
, *FS
,
2190 LookupTLI
, LookupBPI
, LookupBFI
, PSI
, IsCS
))
2191 return PreservedAnalyses::all();
2193 return PreservedAnalyses::none();
2196 static std::string
getSimpleNodeName(const BasicBlock
*Node
) {
2197 if (!Node
->getName().empty())
2198 return Node
->getName().str();
2200 std::string SimpleNodeName
;
2201 raw_string_ostream
OS(SimpleNodeName
);
2202 Node
->printAsOperand(OS
, false);
2206 void llvm::setProfMetadata(Module
*M
, Instruction
*TI
,
2207 ArrayRef
<uint64_t> EdgeCounts
, uint64_t MaxCount
) {
2208 MDBuilder
MDB(M
->getContext());
2209 assert(MaxCount
> 0 && "Bad max count");
2210 uint64_t Scale
= calculateCountScale(MaxCount
);
2211 SmallVector
<unsigned, 4> Weights
;
2212 for (const auto &ECI
: EdgeCounts
)
2213 Weights
.push_back(scaleBranchCount(ECI
, Scale
));
2215 LLVM_DEBUG(dbgs() << "Weight is: "; for (const auto &W
2220 misexpect::checkExpectAnnotations(*TI
, Weights
, /*IsFrontend=*/false);
2222 TI
->setMetadata(LLVMContext::MD_prof
, MDB
.createBranchWeights(Weights
));
2223 if (EmitBranchProbability
) {
2224 std::string BrCondStr
= getBranchCondString(TI
);
2225 if (BrCondStr
.empty())
2229 std::accumulate(Weights
.begin(), Weights
.end(), (uint64_t)0,
2230 [](uint64_t w1
, uint64_t w2
) { return w1
+ w2
; });
2231 uint64_t TotalCount
=
2232 std::accumulate(EdgeCounts
.begin(), EdgeCounts
.end(), (uint64_t)0,
2233 [](uint64_t c1
, uint64_t c2
) { return c1
+ c2
; });
2234 Scale
= calculateCountScale(WSum
);
2235 BranchProbability
BP(scaleBranchCount(Weights
[0], Scale
),
2236 scaleBranchCount(WSum
, Scale
));
2237 std::string BranchProbStr
;
2238 raw_string_ostream
OS(BranchProbStr
);
2240 OS
<< " (total count : " << TotalCount
<< ")";
2242 Function
*F
= TI
->getParent()->getParent();
2243 OptimizationRemarkEmitter
ORE(F
);
2245 return OptimizationRemark(DEBUG_TYPE
, "pgo-instrumentation", TI
)
2246 << BrCondStr
<< " is true with probability : " << BranchProbStr
;
2253 void setIrrLoopHeaderMetadata(Module
*M
, Instruction
*TI
, uint64_t Count
) {
2254 MDBuilder
MDB(M
->getContext());
2255 TI
->setMetadata(llvm::LLVMContext::MD_irr_loop
,
2256 MDB
.createIrrLoopHeaderWeight(Count
));
2259 template <> struct GraphTraits
<PGOUseFunc
*> {
2260 using NodeRef
= const BasicBlock
*;
2261 using ChildIteratorType
= const_succ_iterator
;
2262 using nodes_iterator
= pointer_iterator
<Function::const_iterator
>;
2264 static NodeRef
getEntryNode(const PGOUseFunc
*G
) {
2265 return &G
->getFunc().front();
2268 static ChildIteratorType
child_begin(const NodeRef N
) {
2269 return succ_begin(N
);
2272 static ChildIteratorType
child_end(const NodeRef N
) { return succ_end(N
); }
2274 static nodes_iterator
nodes_begin(const PGOUseFunc
*G
) {
2275 return nodes_iterator(G
->getFunc().begin());
2278 static nodes_iterator
nodes_end(const PGOUseFunc
*G
) {
2279 return nodes_iterator(G
->getFunc().end());
2283 template <> struct DOTGraphTraits
<PGOUseFunc
*> : DefaultDOTGraphTraits
{
2284 explicit DOTGraphTraits(bool isSimple
= false)
2285 : DefaultDOTGraphTraits(isSimple
) {}
2287 static std::string
getGraphName(const PGOUseFunc
*G
) {
2288 return std::string(G
->getFunc().getName());
2291 std::string
getNodeLabel(const BasicBlock
*Node
, const PGOUseFunc
*Graph
) {
2293 raw_string_ostream
OS(Result
);
2295 OS
<< getSimpleNodeName(Node
) << ":\\l";
2296 PGOUseBBInfo
*BI
= Graph
->findBBInfo(Node
);
2298 if (BI
&& BI
->CountValid
)
2299 OS
<< BI
->CountValue
<< "\\l";
2303 if (!PGOInstrSelect
)
2306 for (const Instruction
&I
: *Node
) {
2307 if (!isa
<SelectInst
>(&I
))
2309 // Display scaled counts for SELECT instruction:
2310 OS
<< "SELECT : { T = ";
2312 bool HasProf
= extractBranchWeights(I
, TC
, FC
);
2314 OS
<< "Unknown, F = Unknown }\\l";
2316 OS
<< TC
<< ", F = " << FC
<< " }\\l";
2322 } // end namespace llvm