[flang] Apply nocapture attribute to dummy arguments (#116182)
[llvm-project.git] / clang / lib / CodeGen / CodeGenPGO.cpp
blob17d7902f0cfbc7558b9d5709ba3ce3f95994aeb1
1 //===--- CodeGenPGO.cpp - PGO Instrumentation for LLVM CodeGen --*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // Instrumentation-based profile-guided optimization
11 //===----------------------------------------------------------------------===//
13 #include "CodeGenPGO.h"
14 #include "CodeGenFunction.h"
15 #include "CoverageMappingGen.h"
16 #include "clang/AST/RecursiveASTVisitor.h"
17 #include "clang/AST/StmtVisitor.h"
18 #include "llvm/IR/Intrinsics.h"
19 #include "llvm/IR/MDBuilder.h"
20 #include "llvm/Support/CommandLine.h"
21 #include "llvm/Support/Endian.h"
22 #include "llvm/Support/MD5.h"
23 #include <optional>
25 namespace llvm {
26 extern cl::opt<bool> EnableSingleByteCoverage;
27 } // namespace llvm
29 static llvm::cl::opt<bool>
30 EnableValueProfiling("enable-value-profiling",
31 llvm::cl::desc("Enable value profiling"),
32 llvm::cl::Hidden, llvm::cl::init(false));
34 using namespace clang;
35 using namespace CodeGen;
37 void CodeGenPGO::setFuncName(StringRef Name,
38 llvm::GlobalValue::LinkageTypes Linkage) {
39 llvm::IndexedInstrProfReader *PGOReader = CGM.getPGOReader();
40 FuncName = llvm::getPGOFuncName(
41 Name, Linkage, CGM.getCodeGenOpts().MainFileName,
42 PGOReader ? PGOReader->getVersion() : llvm::IndexedInstrProf::Version);
44 // If we're generating a profile, create a variable for the name.
45 if (CGM.getCodeGenOpts().hasProfileClangInstr())
46 FuncNameVar = llvm::createPGOFuncNameVar(CGM.getModule(), Linkage, FuncName);
49 void CodeGenPGO::setFuncName(llvm::Function *Fn) {
50 setFuncName(Fn->getName(), Fn->getLinkage());
51 // Create PGOFuncName meta data.
52 llvm::createPGOFuncNameMetadata(*Fn, FuncName);
55 /// The version of the PGO hash algorithm.
56 enum PGOHashVersion : unsigned {
57 PGO_HASH_V1,
58 PGO_HASH_V2,
59 PGO_HASH_V3,
61 // Keep this set to the latest hash version.
62 PGO_HASH_LATEST = PGO_HASH_V3
65 namespace {
66 /// Stable hasher for PGO region counters.
67 ///
68 /// PGOHash produces a stable hash of a given function's control flow.
69 ///
70 /// Changing the output of this hash will invalidate all previously generated
71 /// profiles -- i.e., don't do it.
72 ///
73 /// \note When this hash does eventually change (years?), we still need to
74 /// support old hashes. We'll need to pull in the version number from the
75 /// profile data format and use the matching hash function.
76 class PGOHash {
77 uint64_t Working;
78 unsigned Count;
79 PGOHashVersion HashVersion;
80 llvm::MD5 MD5;
82 static const int NumBitsPerType = 6;
83 static const unsigned NumTypesPerWord = sizeof(uint64_t) * 8 / NumBitsPerType;
84 static const unsigned TooBig = 1u << NumBitsPerType;
86 public:
87 /// Hash values for AST nodes.
88 ///
89 /// Distinct values for AST nodes that have region counters attached.
90 ///
91 /// These values must be stable. All new members must be added at the end,
92 /// and no members should be removed. Changing the enumeration value for an
93 /// AST node will affect the hash of every function that contains that node.
94 enum HashType : unsigned char {
95 None = 0,
96 LabelStmt = 1,
97 WhileStmt,
98 DoStmt,
99 ForStmt,
100 CXXForRangeStmt,
101 ObjCForCollectionStmt,
102 SwitchStmt,
103 CaseStmt,
104 DefaultStmt,
105 IfStmt,
106 CXXTryStmt,
107 CXXCatchStmt,
108 ConditionalOperator,
109 BinaryOperatorLAnd,
110 BinaryOperatorLOr,
111 BinaryConditionalOperator,
112 // The preceding values are available with PGO_HASH_V1.
114 EndOfScope,
115 IfThenBranch,
116 IfElseBranch,
117 GotoStmt,
118 IndirectGotoStmt,
119 BreakStmt,
120 ContinueStmt,
121 ReturnStmt,
122 ThrowExpr,
123 UnaryOperatorLNot,
124 BinaryOperatorLT,
125 BinaryOperatorGT,
126 BinaryOperatorLE,
127 BinaryOperatorGE,
128 BinaryOperatorEQ,
129 BinaryOperatorNE,
130 // The preceding values are available since PGO_HASH_V2.
132 // Keep this last. It's for the static assert that follows.
133 LastHashType
135 static_assert(LastHashType <= TooBig, "Too many types in HashType");
137 PGOHash(PGOHashVersion HashVersion)
138 : Working(0), Count(0), HashVersion(HashVersion) {}
139 void combine(HashType Type);
140 uint64_t finalize();
141 PGOHashVersion getHashVersion() const { return HashVersion; }
143 const int PGOHash::NumBitsPerType;
144 const unsigned PGOHash::NumTypesPerWord;
145 const unsigned PGOHash::TooBig;
147 /// Get the PGO hash version used in the given indexed profile.
148 static PGOHashVersion getPGOHashVersion(llvm::IndexedInstrProfReader *PGOReader,
149 CodeGenModule &CGM) {
150 if (PGOReader->getVersion() <= 4)
151 return PGO_HASH_V1;
152 if (PGOReader->getVersion() <= 5)
153 return PGO_HASH_V2;
154 return PGO_HASH_V3;
157 /// A RecursiveASTVisitor that fills a map of statements to PGO counters.
158 struct MapRegionCounters : public RecursiveASTVisitor<MapRegionCounters> {
159 using Base = RecursiveASTVisitor<MapRegionCounters>;
161 /// The next counter value to assign.
162 unsigned NextCounter;
163 /// The function hash.
164 PGOHash Hash;
165 /// The map of statements to counters.
166 llvm::DenseMap<const Stmt *, unsigned> &CounterMap;
167 /// The state of MC/DC Coverage in this function.
168 MCDC::State &MCDCState;
169 /// Maximum number of supported MC/DC conditions in a boolean expression.
170 unsigned MCDCMaxCond;
171 /// The profile version.
172 uint64_t ProfileVersion;
173 /// Diagnostics Engine used to report warnings.
174 DiagnosticsEngine &Diag;
176 MapRegionCounters(PGOHashVersion HashVersion, uint64_t ProfileVersion,
177 llvm::DenseMap<const Stmt *, unsigned> &CounterMap,
178 MCDC::State &MCDCState, unsigned MCDCMaxCond,
179 DiagnosticsEngine &Diag)
180 : NextCounter(0), Hash(HashVersion), CounterMap(CounterMap),
181 MCDCState(MCDCState), MCDCMaxCond(MCDCMaxCond),
182 ProfileVersion(ProfileVersion), Diag(Diag) {}
184 // Blocks and lambdas are handled as separate functions, so we need not
185 // traverse them in the parent context.
186 bool TraverseBlockExpr(BlockExpr *BE) { return true; }
187 bool TraverseLambdaExpr(LambdaExpr *LE) {
188 // Traverse the captures, but not the body.
189 for (auto C : zip(LE->captures(), LE->capture_inits()))
190 TraverseLambdaCapture(LE, &std::get<0>(C), std::get<1>(C));
191 return true;
193 bool TraverseCapturedStmt(CapturedStmt *CS) { return true; }
195 bool VisitDecl(const Decl *D) {
196 switch (D->getKind()) {
197 default:
198 break;
199 case Decl::Function:
200 case Decl::CXXMethod:
201 case Decl::CXXConstructor:
202 case Decl::CXXDestructor:
203 case Decl::CXXConversion:
204 case Decl::ObjCMethod:
205 case Decl::Block:
206 case Decl::Captured:
207 CounterMap[D->getBody()] = NextCounter++;
208 break;
210 return true;
213 /// If \p S gets a fresh counter, update the counter mappings. Return the
214 /// V1 hash of \p S.
215 PGOHash::HashType updateCounterMappings(Stmt *S) {
216 auto Type = getHashType(PGO_HASH_V1, S);
217 if (Type != PGOHash::None)
218 CounterMap[S] = NextCounter++;
219 return Type;
222 /// The following stacks are used with dataTraverseStmtPre() and
223 /// dataTraverseStmtPost() to track the depth of nested logical operators in a
224 /// boolean expression in a function. The ultimate purpose is to keep track
225 /// of the number of leaf-level conditions in the boolean expression so that a
226 /// profile bitmap can be allocated based on that number.
228 /// The stacks are also used to find error cases and notify the user. A
229 /// standard logical operator nest for a boolean expression could be in a form
230 /// similar to this: "x = a && b && c && (d || f)"
231 unsigned NumCond = 0;
232 bool SplitNestedLogicalOp = false;
233 SmallVector<const Stmt *, 16> NonLogOpStack;
234 SmallVector<const BinaryOperator *, 16> LogOpStack;
236 // Hook: dataTraverseStmtPre() is invoked prior to visiting an AST Stmt node.
237 bool dataTraverseStmtPre(Stmt *S) {
238 /// If MC/DC is not enabled, MCDCMaxCond will be set to 0. Do nothing.
239 if (MCDCMaxCond == 0)
240 return true;
242 /// At the top of the logical operator nest, reset the number of conditions,
243 /// also forget previously seen split nesting cases.
244 if (LogOpStack.empty()) {
245 NumCond = 0;
246 SplitNestedLogicalOp = false;
249 if (const Expr *E = dyn_cast<Expr>(S)) {
250 const BinaryOperator *BinOp = dyn_cast<BinaryOperator>(E->IgnoreParens());
251 if (BinOp && BinOp->isLogicalOp()) {
252 /// Check for "split-nested" logical operators. This happens when a new
253 /// boolean expression logical-op nest is encountered within an existing
254 /// boolean expression, separated by a non-logical operator. For
255 /// example, in "x = (a && b && c && foo(d && f))", the "d && f" case
256 /// starts a new boolean expression that is separated from the other
257 /// conditions by the operator foo(). Split-nested cases are not
258 /// supported by MC/DC.
259 SplitNestedLogicalOp = SplitNestedLogicalOp || !NonLogOpStack.empty();
261 LogOpStack.push_back(BinOp);
262 return true;
266 /// Keep track of non-logical operators. These are OK as long as we don't
267 /// encounter a new logical operator after seeing one.
268 if (!LogOpStack.empty())
269 NonLogOpStack.push_back(S);
271 return true;
274 // Hook: dataTraverseStmtPost() is invoked by the AST visitor after visiting
275 // an AST Stmt node. MC/DC will use it to to signal when the top of a
276 // logical operation (boolean expression) nest is encountered.
277 bool dataTraverseStmtPost(Stmt *S) {
278 /// If MC/DC is not enabled, MCDCMaxCond will be set to 0. Do nothing.
279 if (MCDCMaxCond == 0)
280 return true;
282 if (const Expr *E = dyn_cast<Expr>(S)) {
283 const BinaryOperator *BinOp = dyn_cast<BinaryOperator>(E->IgnoreParens());
284 if (BinOp && BinOp->isLogicalOp()) {
285 assert(LogOpStack.back() == BinOp);
286 LogOpStack.pop_back();
288 /// At the top of logical operator nest:
289 if (LogOpStack.empty()) {
290 /// Was the "split-nested" logical operator case encountered?
291 if (SplitNestedLogicalOp) {
292 unsigned DiagID = Diag.getCustomDiagID(
293 DiagnosticsEngine::Warning,
294 "unsupported MC/DC boolean expression; "
295 "contains an operation with a nested boolean expression. "
296 "Expression will not be covered");
297 Diag.Report(S->getBeginLoc(), DiagID);
298 return true;
301 /// Was the maximum number of conditions encountered?
302 if (NumCond > MCDCMaxCond) {
303 unsigned DiagID = Diag.getCustomDiagID(
304 DiagnosticsEngine::Warning,
305 "unsupported MC/DC boolean expression; "
306 "number of conditions (%0) exceeds max (%1). "
307 "Expression will not be covered");
308 Diag.Report(S->getBeginLoc(), DiagID) << NumCond << MCDCMaxCond;
309 return true;
312 // Otherwise, allocate the Decision.
313 MCDCState.DecisionByStmt[BinOp].BitmapIdx = 0;
315 return true;
319 if (!LogOpStack.empty())
320 NonLogOpStack.pop_back();
322 return true;
325 /// The RHS of all logical operators gets a fresh counter in order to count
326 /// how many times the RHS evaluates to true or false, depending on the
327 /// semantics of the operator. This is only valid for ">= v7" of the profile
328 /// version so that we facilitate backward compatibility. In addition, in
329 /// order to use MC/DC, count the number of total LHS and RHS conditions.
330 bool VisitBinaryOperator(BinaryOperator *S) {
331 if (S->isLogicalOp()) {
332 if (CodeGenFunction::isInstrumentedCondition(S->getLHS()))
333 NumCond++;
335 if (CodeGenFunction::isInstrumentedCondition(S->getRHS())) {
336 if (ProfileVersion >= llvm::IndexedInstrProf::Version7)
337 CounterMap[S->getRHS()] = NextCounter++;
339 NumCond++;
342 return Base::VisitBinaryOperator(S);
345 bool VisitConditionalOperator(ConditionalOperator *S) {
346 if (llvm::EnableSingleByteCoverage && S->getTrueExpr())
347 CounterMap[S->getTrueExpr()] = NextCounter++;
348 if (llvm::EnableSingleByteCoverage && S->getFalseExpr())
349 CounterMap[S->getFalseExpr()] = NextCounter++;
350 return Base::VisitConditionalOperator(S);
353 /// Include \p S in the function hash.
354 bool VisitStmt(Stmt *S) {
355 auto Type = updateCounterMappings(S);
356 if (Hash.getHashVersion() != PGO_HASH_V1)
357 Type = getHashType(Hash.getHashVersion(), S);
358 if (Type != PGOHash::None)
359 Hash.combine(Type);
360 return true;
363 bool TraverseIfStmt(IfStmt *If) {
364 // If we used the V1 hash, use the default traversal.
365 if (Hash.getHashVersion() == PGO_HASH_V1)
366 return Base::TraverseIfStmt(If);
368 // When single byte coverage mode is enabled, add a counter to then and
369 // else.
370 bool NoSingleByteCoverage = !llvm::EnableSingleByteCoverage;
371 for (Stmt *CS : If->children()) {
372 if (!CS || NoSingleByteCoverage)
373 continue;
374 if (CS == If->getThen())
375 CounterMap[If->getThen()] = NextCounter++;
376 else if (CS == If->getElse())
377 CounterMap[If->getElse()] = NextCounter++;
380 // Otherwise, keep track of which branch we're in while traversing.
381 VisitStmt(If);
383 for (Stmt *CS : If->children()) {
384 if (!CS)
385 continue;
386 if (CS == If->getThen())
387 Hash.combine(PGOHash::IfThenBranch);
388 else if (CS == If->getElse())
389 Hash.combine(PGOHash::IfElseBranch);
390 TraverseStmt(CS);
392 Hash.combine(PGOHash::EndOfScope);
393 return true;
396 bool TraverseWhileStmt(WhileStmt *While) {
397 // When single byte coverage mode is enabled, add a counter to condition and
398 // body.
399 bool NoSingleByteCoverage = !llvm::EnableSingleByteCoverage;
400 for (Stmt *CS : While->children()) {
401 if (!CS || NoSingleByteCoverage)
402 continue;
403 if (CS == While->getCond())
404 CounterMap[While->getCond()] = NextCounter++;
405 else if (CS == While->getBody())
406 CounterMap[While->getBody()] = NextCounter++;
409 Base::TraverseWhileStmt(While);
410 if (Hash.getHashVersion() != PGO_HASH_V1)
411 Hash.combine(PGOHash::EndOfScope);
412 return true;
415 bool TraverseDoStmt(DoStmt *Do) {
416 // When single byte coverage mode is enabled, add a counter to condition and
417 // body.
418 bool NoSingleByteCoverage = !llvm::EnableSingleByteCoverage;
419 for (Stmt *CS : Do->children()) {
420 if (!CS || NoSingleByteCoverage)
421 continue;
422 if (CS == Do->getCond())
423 CounterMap[Do->getCond()] = NextCounter++;
424 else if (CS == Do->getBody())
425 CounterMap[Do->getBody()] = NextCounter++;
428 Base::TraverseDoStmt(Do);
429 if (Hash.getHashVersion() != PGO_HASH_V1)
430 Hash.combine(PGOHash::EndOfScope);
431 return true;
434 bool TraverseForStmt(ForStmt *For) {
435 // When single byte coverage mode is enabled, add a counter to condition,
436 // increment and body.
437 bool NoSingleByteCoverage = !llvm::EnableSingleByteCoverage;
438 for (Stmt *CS : For->children()) {
439 if (!CS || NoSingleByteCoverage)
440 continue;
441 if (CS == For->getCond())
442 CounterMap[For->getCond()] = NextCounter++;
443 else if (CS == For->getInc())
444 CounterMap[For->getInc()] = NextCounter++;
445 else if (CS == For->getBody())
446 CounterMap[For->getBody()] = NextCounter++;
449 Base::TraverseForStmt(For);
450 if (Hash.getHashVersion() != PGO_HASH_V1)
451 Hash.combine(PGOHash::EndOfScope);
452 return true;
455 bool TraverseCXXForRangeStmt(CXXForRangeStmt *ForRange) {
456 // When single byte coverage mode is enabled, add a counter to body.
457 bool NoSingleByteCoverage = !llvm::EnableSingleByteCoverage;
458 for (Stmt *CS : ForRange->children()) {
459 if (!CS || NoSingleByteCoverage)
460 continue;
461 if (CS == ForRange->getBody())
462 CounterMap[ForRange->getBody()] = NextCounter++;
465 Base::TraverseCXXForRangeStmt(ForRange);
466 if (Hash.getHashVersion() != PGO_HASH_V1)
467 Hash.combine(PGOHash::EndOfScope);
468 return true;
471 // If the statement type \p N is nestable, and its nesting impacts profile
472 // stability, define a custom traversal which tracks the end of the statement
473 // in the hash (provided we're not using the V1 hash).
474 #define DEFINE_NESTABLE_TRAVERSAL(N) \
475 bool Traverse##N(N *S) { \
476 Base::Traverse##N(S); \
477 if (Hash.getHashVersion() != PGO_HASH_V1) \
478 Hash.combine(PGOHash::EndOfScope); \
479 return true; \
482 DEFINE_NESTABLE_TRAVERSAL(ObjCForCollectionStmt)
483 DEFINE_NESTABLE_TRAVERSAL(CXXTryStmt)
484 DEFINE_NESTABLE_TRAVERSAL(CXXCatchStmt)
486 /// Get version \p HashVersion of the PGO hash for \p S.
487 PGOHash::HashType getHashType(PGOHashVersion HashVersion, const Stmt *S) {
488 switch (S->getStmtClass()) {
489 default:
490 break;
491 case Stmt::LabelStmtClass:
492 return PGOHash::LabelStmt;
493 case Stmt::WhileStmtClass:
494 return PGOHash::WhileStmt;
495 case Stmt::DoStmtClass:
496 return PGOHash::DoStmt;
497 case Stmt::ForStmtClass:
498 return PGOHash::ForStmt;
499 case Stmt::CXXForRangeStmtClass:
500 return PGOHash::CXXForRangeStmt;
501 case Stmt::ObjCForCollectionStmtClass:
502 return PGOHash::ObjCForCollectionStmt;
503 case Stmt::SwitchStmtClass:
504 return PGOHash::SwitchStmt;
505 case Stmt::CaseStmtClass:
506 return PGOHash::CaseStmt;
507 case Stmt::DefaultStmtClass:
508 return PGOHash::DefaultStmt;
509 case Stmt::IfStmtClass:
510 return PGOHash::IfStmt;
511 case Stmt::CXXTryStmtClass:
512 return PGOHash::CXXTryStmt;
513 case Stmt::CXXCatchStmtClass:
514 return PGOHash::CXXCatchStmt;
515 case Stmt::ConditionalOperatorClass:
516 return PGOHash::ConditionalOperator;
517 case Stmt::BinaryConditionalOperatorClass:
518 return PGOHash::BinaryConditionalOperator;
519 case Stmt::BinaryOperatorClass: {
520 const BinaryOperator *BO = cast<BinaryOperator>(S);
521 if (BO->getOpcode() == BO_LAnd)
522 return PGOHash::BinaryOperatorLAnd;
523 if (BO->getOpcode() == BO_LOr)
524 return PGOHash::BinaryOperatorLOr;
525 if (HashVersion >= PGO_HASH_V2) {
526 switch (BO->getOpcode()) {
527 default:
528 break;
529 case BO_LT:
530 return PGOHash::BinaryOperatorLT;
531 case BO_GT:
532 return PGOHash::BinaryOperatorGT;
533 case BO_LE:
534 return PGOHash::BinaryOperatorLE;
535 case BO_GE:
536 return PGOHash::BinaryOperatorGE;
537 case BO_EQ:
538 return PGOHash::BinaryOperatorEQ;
539 case BO_NE:
540 return PGOHash::BinaryOperatorNE;
543 break;
547 if (HashVersion >= PGO_HASH_V2) {
548 switch (S->getStmtClass()) {
549 default:
550 break;
551 case Stmt::GotoStmtClass:
552 return PGOHash::GotoStmt;
553 case Stmt::IndirectGotoStmtClass:
554 return PGOHash::IndirectGotoStmt;
555 case Stmt::BreakStmtClass:
556 return PGOHash::BreakStmt;
557 case Stmt::ContinueStmtClass:
558 return PGOHash::ContinueStmt;
559 case Stmt::ReturnStmtClass:
560 return PGOHash::ReturnStmt;
561 case Stmt::CXXThrowExprClass:
562 return PGOHash::ThrowExpr;
563 case Stmt::UnaryOperatorClass: {
564 const UnaryOperator *UO = cast<UnaryOperator>(S);
565 if (UO->getOpcode() == UO_LNot)
566 return PGOHash::UnaryOperatorLNot;
567 break;
572 return PGOHash::None;
576 /// A StmtVisitor that propagates the raw counts through the AST and
577 /// records the count at statements where the value may change.
578 struct ComputeRegionCounts : public ConstStmtVisitor<ComputeRegionCounts> {
579 /// PGO state.
580 CodeGenPGO &PGO;
582 /// A flag that is set when the current count should be recorded on the
583 /// next statement, such as at the exit of a loop.
584 bool RecordNextStmtCount;
586 /// The count at the current location in the traversal.
587 uint64_t CurrentCount;
589 /// The map of statements to count values.
590 llvm::DenseMap<const Stmt *, uint64_t> &CountMap;
592 /// BreakContinueStack - Keep counts of breaks and continues inside loops.
593 struct BreakContinue {
594 uint64_t BreakCount = 0;
595 uint64_t ContinueCount = 0;
596 BreakContinue() = default;
598 SmallVector<BreakContinue, 8> BreakContinueStack;
600 ComputeRegionCounts(llvm::DenseMap<const Stmt *, uint64_t> &CountMap,
601 CodeGenPGO &PGO)
602 : PGO(PGO), RecordNextStmtCount(false), CountMap(CountMap) {}
604 void RecordStmtCount(const Stmt *S) {
605 if (RecordNextStmtCount) {
606 CountMap[S] = CurrentCount;
607 RecordNextStmtCount = false;
611 /// Set and return the current count.
612 uint64_t setCount(uint64_t Count) {
613 CurrentCount = Count;
614 return Count;
617 void VisitStmt(const Stmt *S) {
618 RecordStmtCount(S);
619 for (const Stmt *Child : S->children())
620 if (Child)
621 this->Visit(Child);
624 void VisitFunctionDecl(const FunctionDecl *D) {
625 // Counter tracks entry to the function body.
626 uint64_t BodyCount = setCount(PGO.getRegionCount(D->getBody()));
627 CountMap[D->getBody()] = BodyCount;
628 Visit(D->getBody());
631 // Skip lambda expressions. We visit these as FunctionDecls when we're
632 // generating them and aren't interested in the body when generating a
633 // parent context.
634 void VisitLambdaExpr(const LambdaExpr *LE) {}
636 void VisitCapturedDecl(const CapturedDecl *D) {
637 // Counter tracks entry to the capture body.
638 uint64_t BodyCount = setCount(PGO.getRegionCount(D->getBody()));
639 CountMap[D->getBody()] = BodyCount;
640 Visit(D->getBody());
643 void VisitObjCMethodDecl(const ObjCMethodDecl *D) {
644 // Counter tracks entry to the method body.
645 uint64_t BodyCount = setCount(PGO.getRegionCount(D->getBody()));
646 CountMap[D->getBody()] = BodyCount;
647 Visit(D->getBody());
650 void VisitBlockDecl(const BlockDecl *D) {
651 // Counter tracks entry to the block body.
652 uint64_t BodyCount = setCount(PGO.getRegionCount(D->getBody()));
653 CountMap[D->getBody()] = BodyCount;
654 Visit(D->getBody());
657 void VisitReturnStmt(const ReturnStmt *S) {
658 RecordStmtCount(S);
659 if (S->getRetValue())
660 Visit(S->getRetValue());
661 CurrentCount = 0;
662 RecordNextStmtCount = true;
665 void VisitCXXThrowExpr(const CXXThrowExpr *E) {
666 RecordStmtCount(E);
667 if (E->getSubExpr())
668 Visit(E->getSubExpr());
669 CurrentCount = 0;
670 RecordNextStmtCount = true;
673 void VisitGotoStmt(const GotoStmt *S) {
674 RecordStmtCount(S);
675 CurrentCount = 0;
676 RecordNextStmtCount = true;
679 void VisitLabelStmt(const LabelStmt *S) {
680 RecordNextStmtCount = false;
681 // Counter tracks the block following the label.
682 uint64_t BlockCount = setCount(PGO.getRegionCount(S));
683 CountMap[S] = BlockCount;
684 Visit(S->getSubStmt());
687 void VisitBreakStmt(const BreakStmt *S) {
688 RecordStmtCount(S);
689 assert(!BreakContinueStack.empty() && "break not in a loop or switch!");
690 BreakContinueStack.back().BreakCount += CurrentCount;
691 CurrentCount = 0;
692 RecordNextStmtCount = true;
695 void VisitContinueStmt(const ContinueStmt *S) {
696 RecordStmtCount(S);
697 assert(!BreakContinueStack.empty() && "continue stmt not in a loop!");
698 BreakContinueStack.back().ContinueCount += CurrentCount;
699 CurrentCount = 0;
700 RecordNextStmtCount = true;
703 void VisitWhileStmt(const WhileStmt *S) {
704 RecordStmtCount(S);
705 uint64_t ParentCount = CurrentCount;
707 BreakContinueStack.push_back(BreakContinue());
708 // Visit the body region first so the break/continue adjustments can be
709 // included when visiting the condition.
710 uint64_t BodyCount = setCount(PGO.getRegionCount(S));
711 CountMap[S->getBody()] = CurrentCount;
712 Visit(S->getBody());
713 uint64_t BackedgeCount = CurrentCount;
715 // ...then go back and propagate counts through the condition. The count
716 // at the start of the condition is the sum of the incoming edges,
717 // the backedge from the end of the loop body, and the edges from
718 // continue statements.
719 BreakContinue BC = BreakContinueStack.pop_back_val();
720 uint64_t CondCount =
721 setCount(ParentCount + BackedgeCount + BC.ContinueCount);
722 CountMap[S->getCond()] = CondCount;
723 Visit(S->getCond());
724 setCount(BC.BreakCount + CondCount - BodyCount);
725 RecordNextStmtCount = true;
728 void VisitDoStmt(const DoStmt *S) {
729 RecordStmtCount(S);
730 uint64_t LoopCount = PGO.getRegionCount(S);
732 BreakContinueStack.push_back(BreakContinue());
733 // The count doesn't include the fallthrough from the parent scope. Add it.
734 uint64_t BodyCount = setCount(LoopCount + CurrentCount);
735 CountMap[S->getBody()] = BodyCount;
736 Visit(S->getBody());
737 uint64_t BackedgeCount = CurrentCount;
739 BreakContinue BC = BreakContinueStack.pop_back_val();
740 // The count at the start of the condition is equal to the count at the
741 // end of the body, plus any continues.
742 uint64_t CondCount = setCount(BackedgeCount + BC.ContinueCount);
743 CountMap[S->getCond()] = CondCount;
744 Visit(S->getCond());
745 setCount(BC.BreakCount + CondCount - LoopCount);
746 RecordNextStmtCount = true;
749 void VisitForStmt(const ForStmt *S) {
750 RecordStmtCount(S);
751 if (S->getInit())
752 Visit(S->getInit());
754 uint64_t ParentCount = CurrentCount;
756 BreakContinueStack.push_back(BreakContinue());
757 // Visit the body region first. (This is basically the same as a while
758 // loop; see further comments in VisitWhileStmt.)
759 uint64_t BodyCount = setCount(PGO.getRegionCount(S));
760 CountMap[S->getBody()] = BodyCount;
761 Visit(S->getBody());
762 uint64_t BackedgeCount = CurrentCount;
763 BreakContinue BC = BreakContinueStack.pop_back_val();
765 // The increment is essentially part of the body but it needs to include
766 // the count for all the continue statements.
767 if (S->getInc()) {
768 uint64_t IncCount = setCount(BackedgeCount + BC.ContinueCount);
769 CountMap[S->getInc()] = IncCount;
770 Visit(S->getInc());
773 // ...then go back and propagate counts through the condition.
774 uint64_t CondCount =
775 setCount(ParentCount + BackedgeCount + BC.ContinueCount);
776 if (S->getCond()) {
777 CountMap[S->getCond()] = CondCount;
778 Visit(S->getCond());
780 setCount(BC.BreakCount + CondCount - BodyCount);
781 RecordNextStmtCount = true;
784 void VisitCXXForRangeStmt(const CXXForRangeStmt *S) {
785 RecordStmtCount(S);
786 if (S->getInit())
787 Visit(S->getInit());
788 Visit(S->getLoopVarStmt());
789 Visit(S->getRangeStmt());
790 Visit(S->getBeginStmt());
791 Visit(S->getEndStmt());
793 uint64_t ParentCount = CurrentCount;
794 BreakContinueStack.push_back(BreakContinue());
795 // Visit the body region first. (This is basically the same as a while
796 // loop; see further comments in VisitWhileStmt.)
797 uint64_t BodyCount = setCount(PGO.getRegionCount(S));
798 CountMap[S->getBody()] = BodyCount;
799 Visit(S->getBody());
800 uint64_t BackedgeCount = CurrentCount;
801 BreakContinue BC = BreakContinueStack.pop_back_val();
803 // The increment is essentially part of the body but it needs to include
804 // the count for all the continue statements.
805 uint64_t IncCount = setCount(BackedgeCount + BC.ContinueCount);
806 CountMap[S->getInc()] = IncCount;
807 Visit(S->getInc());
809 // ...then go back and propagate counts through the condition.
810 uint64_t CondCount =
811 setCount(ParentCount + BackedgeCount + BC.ContinueCount);
812 CountMap[S->getCond()] = CondCount;
813 Visit(S->getCond());
814 setCount(BC.BreakCount + CondCount - BodyCount);
815 RecordNextStmtCount = true;
818 void VisitObjCForCollectionStmt(const ObjCForCollectionStmt *S) {
819 RecordStmtCount(S);
820 Visit(S->getElement());
821 uint64_t ParentCount = CurrentCount;
822 BreakContinueStack.push_back(BreakContinue());
823 // Counter tracks the body of the loop.
824 uint64_t BodyCount = setCount(PGO.getRegionCount(S));
825 CountMap[S->getBody()] = BodyCount;
826 Visit(S->getBody());
827 uint64_t BackedgeCount = CurrentCount;
828 BreakContinue BC = BreakContinueStack.pop_back_val();
830 setCount(BC.BreakCount + ParentCount + BackedgeCount + BC.ContinueCount -
831 BodyCount);
832 RecordNextStmtCount = true;
835 void VisitSwitchStmt(const SwitchStmt *S) {
836 RecordStmtCount(S);
837 if (S->getInit())
838 Visit(S->getInit());
839 Visit(S->getCond());
840 CurrentCount = 0;
841 BreakContinueStack.push_back(BreakContinue());
842 Visit(S->getBody());
843 // If the switch is inside a loop, add the continue counts.
844 BreakContinue BC = BreakContinueStack.pop_back_val();
845 if (!BreakContinueStack.empty())
846 BreakContinueStack.back().ContinueCount += BC.ContinueCount;
847 // Counter tracks the exit block of the switch.
848 setCount(PGO.getRegionCount(S));
849 RecordNextStmtCount = true;
852 void VisitSwitchCase(const SwitchCase *S) {
853 RecordNextStmtCount = false;
854 // Counter for this particular case. This counts only jumps from the
855 // switch header and does not include fallthrough from the case before
856 // this one.
857 uint64_t CaseCount = PGO.getRegionCount(S);
858 setCount(CurrentCount + CaseCount);
859 // We need the count without fallthrough in the mapping, so it's more useful
860 // for branch probabilities.
861 CountMap[S] = CaseCount;
862 RecordNextStmtCount = true;
863 Visit(S->getSubStmt());
866 void VisitIfStmt(const IfStmt *S) {
867 RecordStmtCount(S);
869 if (S->isConsteval()) {
870 const Stmt *Stm = S->isNegatedConsteval() ? S->getThen() : S->getElse();
871 if (Stm)
872 Visit(Stm);
873 return;
876 uint64_t ParentCount = CurrentCount;
877 if (S->getInit())
878 Visit(S->getInit());
879 Visit(S->getCond());
881 // Counter tracks the "then" part of an if statement. The count for
882 // the "else" part, if it exists, will be calculated from this counter.
883 uint64_t ThenCount = setCount(PGO.getRegionCount(S));
884 CountMap[S->getThen()] = ThenCount;
885 Visit(S->getThen());
886 uint64_t OutCount = CurrentCount;
888 uint64_t ElseCount = ParentCount - ThenCount;
889 if (S->getElse()) {
890 setCount(ElseCount);
891 CountMap[S->getElse()] = ElseCount;
892 Visit(S->getElse());
893 OutCount += CurrentCount;
894 } else
895 OutCount += ElseCount;
896 setCount(OutCount);
897 RecordNextStmtCount = true;
900 void VisitCXXTryStmt(const CXXTryStmt *S) {
901 RecordStmtCount(S);
902 Visit(S->getTryBlock());
903 for (unsigned I = 0, E = S->getNumHandlers(); I < E; ++I)
904 Visit(S->getHandler(I));
905 // Counter tracks the continuation block of the try statement.
906 setCount(PGO.getRegionCount(S));
907 RecordNextStmtCount = true;
910 void VisitCXXCatchStmt(const CXXCatchStmt *S) {
911 RecordNextStmtCount = false;
912 // Counter tracks the catch statement's handler block.
913 uint64_t CatchCount = setCount(PGO.getRegionCount(S));
914 CountMap[S] = CatchCount;
915 Visit(S->getHandlerBlock());
918 void VisitAbstractConditionalOperator(const AbstractConditionalOperator *E) {
919 RecordStmtCount(E);
920 uint64_t ParentCount = CurrentCount;
921 Visit(E->getCond());
923 // Counter tracks the "true" part of a conditional operator. The
924 // count in the "false" part will be calculated from this counter.
925 uint64_t TrueCount = setCount(PGO.getRegionCount(E));
926 CountMap[E->getTrueExpr()] = TrueCount;
927 Visit(E->getTrueExpr());
928 uint64_t OutCount = CurrentCount;
930 uint64_t FalseCount = setCount(ParentCount - TrueCount);
931 CountMap[E->getFalseExpr()] = FalseCount;
932 Visit(E->getFalseExpr());
933 OutCount += CurrentCount;
935 setCount(OutCount);
936 RecordNextStmtCount = true;
939 void VisitBinLAnd(const BinaryOperator *E) {
940 RecordStmtCount(E);
941 uint64_t ParentCount = CurrentCount;
942 Visit(E->getLHS());
943 // Counter tracks the right hand side of a logical and operator.
944 uint64_t RHSCount = setCount(PGO.getRegionCount(E));
945 CountMap[E->getRHS()] = RHSCount;
946 Visit(E->getRHS());
947 setCount(ParentCount + RHSCount - CurrentCount);
948 RecordNextStmtCount = true;
951 void VisitBinLOr(const BinaryOperator *E) {
952 RecordStmtCount(E);
953 uint64_t ParentCount = CurrentCount;
954 Visit(E->getLHS());
955 // Counter tracks the right hand side of a logical or operator.
956 uint64_t RHSCount = setCount(PGO.getRegionCount(E));
957 CountMap[E->getRHS()] = RHSCount;
958 Visit(E->getRHS());
959 setCount(ParentCount + RHSCount - CurrentCount);
960 RecordNextStmtCount = true;
963 } // end anonymous namespace
965 void PGOHash::combine(HashType Type) {
966 // Check that we never combine 0 and only have six bits.
967 assert(Type && "Hash is invalid: unexpected type 0");
968 assert(unsigned(Type) < TooBig && "Hash is invalid: too many types");
970 // Pass through MD5 if enough work has built up.
971 if (Count && Count % NumTypesPerWord == 0) {
972 using namespace llvm::support;
973 uint64_t Swapped =
974 endian::byte_swap<uint64_t, llvm::endianness::little>(Working);
975 MD5.update(llvm::ArrayRef((uint8_t *)&Swapped, sizeof(Swapped)));
976 Working = 0;
979 // Accumulate the current type.
980 ++Count;
981 Working = Working << NumBitsPerType | Type;
984 uint64_t PGOHash::finalize() {
985 // Use Working as the hash directly if we never used MD5.
986 if (Count <= NumTypesPerWord)
987 // No need to byte swap here, since none of the math was endian-dependent.
988 // This number will be byte-swapped as required on endianness transitions,
989 // so we will see the same value on the other side.
990 return Working;
992 // Check for remaining work in Working.
993 if (Working) {
994 // Keep the buggy behavior from v1 and v2 for backward-compatibility. This
995 // is buggy because it converts a uint64_t into an array of uint8_t.
996 if (HashVersion < PGO_HASH_V3) {
997 MD5.update({(uint8_t)Working});
998 } else {
999 using namespace llvm::support;
1000 uint64_t Swapped =
1001 endian::byte_swap<uint64_t, llvm::endianness::little>(Working);
1002 MD5.update(llvm::ArrayRef((uint8_t *)&Swapped, sizeof(Swapped)));
1006 // Finalize the MD5 and return the hash.
1007 llvm::MD5::MD5Result Result;
1008 MD5.final(Result);
1009 return Result.low();
1012 void CodeGenPGO::assignRegionCounters(GlobalDecl GD, llvm::Function *Fn) {
1013 const Decl *D = GD.getDecl();
1014 if (!D->hasBody())
1015 return;
1017 // Skip CUDA/HIP kernel launch stub functions.
1018 if (CGM.getLangOpts().CUDA && !CGM.getLangOpts().CUDAIsDevice &&
1019 D->hasAttr<CUDAGlobalAttr>())
1020 return;
1022 bool InstrumentRegions = CGM.getCodeGenOpts().hasProfileClangInstr();
1023 llvm::IndexedInstrProfReader *PGOReader = CGM.getPGOReader();
1024 if (!InstrumentRegions && !PGOReader)
1025 return;
1026 if (D->isImplicit())
1027 return;
1028 // Constructors and destructors may be represented by several functions in IR.
1029 // If so, instrument only base variant, others are implemented by delegation
1030 // to the base one, it would be counted twice otherwise.
1031 if (CGM.getTarget().getCXXABI().hasConstructorVariants()) {
1032 if (const auto *CCD = dyn_cast<CXXConstructorDecl>(D))
1033 if (GD.getCtorType() != Ctor_Base &&
1034 CodeGenFunction::IsConstructorDelegationValid(CCD))
1035 return;
1037 if (isa<CXXDestructorDecl>(D) && GD.getDtorType() != Dtor_Base)
1038 return;
1040 CGM.ClearUnusedCoverageMapping(D);
1041 if (Fn->hasFnAttribute(llvm::Attribute::NoProfile))
1042 return;
1043 if (Fn->hasFnAttribute(llvm::Attribute::SkipProfile))
1044 return;
1046 SourceManager &SM = CGM.getContext().getSourceManager();
1047 if (!llvm::coverage::SystemHeadersCoverage &&
1048 SM.isInSystemHeader(D->getLocation()))
1049 return;
1051 setFuncName(Fn);
1053 mapRegionCounters(D);
1054 if (CGM.getCodeGenOpts().CoverageMapping)
1055 emitCounterRegionMapping(D);
1056 if (PGOReader) {
1057 loadRegionCounts(PGOReader, SM.isInMainFile(D->getLocation()));
1058 computeRegionCounts(D);
1059 applyFunctionAttributes(PGOReader, Fn);
1063 void CodeGenPGO::mapRegionCounters(const Decl *D) {
1064 // Use the latest hash version when inserting instrumentation, but use the
1065 // version in the indexed profile if we're reading PGO data.
1066 PGOHashVersion HashVersion = PGO_HASH_LATEST;
1067 uint64_t ProfileVersion = llvm::IndexedInstrProf::Version;
1068 if (auto *PGOReader = CGM.getPGOReader()) {
1069 HashVersion = getPGOHashVersion(PGOReader, CGM);
1070 ProfileVersion = PGOReader->getVersion();
1073 // If MC/DC is enabled, set the MaxConditions to a preset value. Otherwise,
1074 // set it to zero. This value impacts the number of conditions accepted in a
1075 // given boolean expression, which impacts the size of the bitmap used to
1076 // track test vector execution for that boolean expression. Because the
1077 // bitmap scales exponentially (2^n) based on the number of conditions seen,
1078 // the maximum value is hard-coded at 6 conditions, which is more than enough
1079 // for most embedded applications. Setting a maximum value prevents the
1080 // bitmap footprint from growing too large without the user's knowledge. In
1081 // the future, this value could be adjusted with a command-line option.
1082 unsigned MCDCMaxConditions =
1083 (CGM.getCodeGenOpts().MCDCCoverage ? CGM.getCodeGenOpts().MCDCMaxConds
1084 : 0);
1086 RegionCounterMap.reset(new llvm::DenseMap<const Stmt *, unsigned>);
1087 RegionMCDCState.reset(new MCDC::State);
1088 MapRegionCounters Walker(HashVersion, ProfileVersion, *RegionCounterMap,
1089 *RegionMCDCState, MCDCMaxConditions, CGM.getDiags());
1090 if (const FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(D))
1091 Walker.TraverseDecl(const_cast<FunctionDecl *>(FD));
1092 else if (const ObjCMethodDecl *MD = dyn_cast_or_null<ObjCMethodDecl>(D))
1093 Walker.TraverseDecl(const_cast<ObjCMethodDecl *>(MD));
1094 else if (const BlockDecl *BD = dyn_cast_or_null<BlockDecl>(D))
1095 Walker.TraverseDecl(const_cast<BlockDecl *>(BD));
1096 else if (const CapturedDecl *CD = dyn_cast_or_null<CapturedDecl>(D))
1097 Walker.TraverseDecl(const_cast<CapturedDecl *>(CD));
1098 assert(Walker.NextCounter > 0 && "no entry counter mapped for decl");
1099 NumRegionCounters = Walker.NextCounter;
1100 FunctionHash = Walker.Hash.finalize();
1103 bool CodeGenPGO::skipRegionMappingForDecl(const Decl *D) {
1104 if (!D->getBody())
1105 return true;
1107 // Skip host-only functions in the CUDA device compilation and device-only
1108 // functions in the host compilation. Just roughly filter them out based on
1109 // the function attributes. If there are effectively host-only or device-only
1110 // ones, their coverage mapping may still be generated.
1111 if (CGM.getLangOpts().CUDA &&
1112 ((CGM.getLangOpts().CUDAIsDevice && !D->hasAttr<CUDADeviceAttr>() &&
1113 !D->hasAttr<CUDAGlobalAttr>()) ||
1114 (!CGM.getLangOpts().CUDAIsDevice &&
1115 (D->hasAttr<CUDAGlobalAttr>() ||
1116 (!D->hasAttr<CUDAHostAttr>() && D->hasAttr<CUDADeviceAttr>())))))
1117 return true;
1119 // Don't map the functions in system headers.
1120 const auto &SM = CGM.getContext().getSourceManager();
1121 auto Loc = D->getBody()->getBeginLoc();
1122 return !llvm::coverage::SystemHeadersCoverage && SM.isInSystemHeader(Loc);
1125 void CodeGenPGO::emitCounterRegionMapping(const Decl *D) {
1126 if (skipRegionMappingForDecl(D))
1127 return;
1129 std::string CoverageMapping;
1130 llvm::raw_string_ostream OS(CoverageMapping);
1131 RegionMCDCState->BranchByStmt.clear();
1132 CoverageMappingGen MappingGen(
1133 *CGM.getCoverageMapping(), CGM.getContext().getSourceManager(),
1134 CGM.getLangOpts(), RegionCounterMap.get(), RegionMCDCState.get());
1135 MappingGen.emitCounterMapping(D, OS);
1137 if (CoverageMapping.empty())
1138 return;
1140 CGM.getCoverageMapping()->addFunctionMappingRecord(
1141 FuncNameVar, FuncName, FunctionHash, CoverageMapping);
1144 void
1145 CodeGenPGO::emitEmptyCounterMapping(const Decl *D, StringRef Name,
1146 llvm::GlobalValue::LinkageTypes Linkage) {
1147 if (skipRegionMappingForDecl(D))
1148 return;
1150 std::string CoverageMapping;
1151 llvm::raw_string_ostream OS(CoverageMapping);
1152 CoverageMappingGen MappingGen(*CGM.getCoverageMapping(),
1153 CGM.getContext().getSourceManager(),
1154 CGM.getLangOpts());
1155 MappingGen.emitEmptyMapping(D, OS);
1157 if (CoverageMapping.empty())
1158 return;
1160 setFuncName(Name, Linkage);
1161 CGM.getCoverageMapping()->addFunctionMappingRecord(
1162 FuncNameVar, FuncName, FunctionHash, CoverageMapping, false);
1165 void CodeGenPGO::computeRegionCounts(const Decl *D) {
1166 StmtCountMap.reset(new llvm::DenseMap<const Stmt *, uint64_t>);
1167 ComputeRegionCounts Walker(*StmtCountMap, *this);
1168 if (const FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(D))
1169 Walker.VisitFunctionDecl(FD);
1170 else if (const ObjCMethodDecl *MD = dyn_cast_or_null<ObjCMethodDecl>(D))
1171 Walker.VisitObjCMethodDecl(MD);
1172 else if (const BlockDecl *BD = dyn_cast_or_null<BlockDecl>(D))
1173 Walker.VisitBlockDecl(BD);
1174 else if (const CapturedDecl *CD = dyn_cast_or_null<CapturedDecl>(D))
1175 Walker.VisitCapturedDecl(const_cast<CapturedDecl *>(CD));
1178 void
1179 CodeGenPGO::applyFunctionAttributes(llvm::IndexedInstrProfReader *PGOReader,
1180 llvm::Function *Fn) {
1181 if (!haveRegionCounts())
1182 return;
1184 uint64_t FunctionCount = getRegionCount(nullptr);
1185 Fn->setEntryCount(FunctionCount);
1188 void CodeGenPGO::emitCounterSetOrIncrement(CGBuilderTy &Builder, const Stmt *S,
1189 llvm::Value *StepV) {
1190 if (!RegionCounterMap || !Builder.GetInsertBlock())
1191 return;
1193 unsigned Counter = (*RegionCounterMap)[S];
1195 // Make sure that pointer to global is passed in with zero addrspace
1196 // This is relevant during GPU profiling
1197 auto *NormalizedFuncNameVarPtr =
1198 llvm::ConstantExpr::getPointerBitCastOrAddrSpaceCast(
1199 FuncNameVar, llvm::PointerType::get(CGM.getLLVMContext(), 0));
1201 llvm::Value *Args[] = {
1202 NormalizedFuncNameVarPtr, Builder.getInt64(FunctionHash),
1203 Builder.getInt32(NumRegionCounters), Builder.getInt32(Counter), StepV};
1205 if (llvm::EnableSingleByteCoverage)
1206 Builder.CreateCall(CGM.getIntrinsic(llvm::Intrinsic::instrprof_cover),
1207 ArrayRef(Args, 4));
1208 else if (!StepV)
1209 Builder.CreateCall(CGM.getIntrinsic(llvm::Intrinsic::instrprof_increment),
1210 ArrayRef(Args, 4));
1211 else
1212 Builder.CreateCall(
1213 CGM.getIntrinsic(llvm::Intrinsic::instrprof_increment_step), Args);
1216 bool CodeGenPGO::canEmitMCDCCoverage(const CGBuilderTy &Builder) {
1217 return (CGM.getCodeGenOpts().hasProfileClangInstr() &&
1218 CGM.getCodeGenOpts().MCDCCoverage && Builder.GetInsertBlock());
1221 void CodeGenPGO::emitMCDCParameters(CGBuilderTy &Builder) {
1222 if (!canEmitMCDCCoverage(Builder) || !RegionMCDCState)
1223 return;
1225 auto *I8PtrTy = llvm::PointerType::getUnqual(CGM.getLLVMContext());
1227 // Emit intrinsic representing MCDC bitmap parameters at function entry.
1228 // This is used by the instrumentation pass, but it isn't actually lowered to
1229 // anything.
1230 llvm::Value *Args[3] = {llvm::ConstantExpr::getBitCast(FuncNameVar, I8PtrTy),
1231 Builder.getInt64(FunctionHash),
1232 Builder.getInt32(RegionMCDCState->BitmapBits)};
1233 Builder.CreateCall(
1234 CGM.getIntrinsic(llvm::Intrinsic::instrprof_mcdc_parameters), Args);
1237 void CodeGenPGO::emitMCDCTestVectorBitmapUpdate(CGBuilderTy &Builder,
1238 const Expr *S,
1239 Address MCDCCondBitmapAddr,
1240 CodeGenFunction &CGF) {
1241 if (!canEmitMCDCCoverage(Builder) || !RegionMCDCState)
1242 return;
1244 S = S->IgnoreParens();
1246 auto DecisionStateIter = RegionMCDCState->DecisionByStmt.find(S);
1247 if (DecisionStateIter == RegionMCDCState->DecisionByStmt.end())
1248 return;
1250 // Don't create tvbitmap_update if the record is allocated but excluded.
1251 // Or `bitmap |= (1 << 0)` would be wrongly executed to the next bitmap.
1252 if (DecisionStateIter->second.Indices.size() == 0)
1253 return;
1255 // Extract the offset of the global bitmap associated with this expression.
1256 unsigned MCDCTestVectorBitmapOffset = DecisionStateIter->second.BitmapIdx;
1257 auto *I8PtrTy = llvm::PointerType::getUnqual(CGM.getLLVMContext());
1259 // Emit intrinsic responsible for updating the global bitmap corresponding to
1260 // a boolean expression. The index being set is based on the value loaded
1261 // from a pointer to a dedicated temporary value on the stack that is itself
1262 // updated via emitMCDCCondBitmapReset() and emitMCDCCondBitmapUpdate(). The
1263 // index represents an executed test vector.
1264 llvm::Value *Args[4] = {llvm::ConstantExpr::getBitCast(FuncNameVar, I8PtrTy),
1265 Builder.getInt64(FunctionHash),
1266 Builder.getInt32(MCDCTestVectorBitmapOffset),
1267 MCDCCondBitmapAddr.emitRawPointer(CGF)};
1268 Builder.CreateCall(
1269 CGM.getIntrinsic(llvm::Intrinsic::instrprof_mcdc_tvbitmap_update), Args);
1272 void CodeGenPGO::emitMCDCCondBitmapReset(CGBuilderTy &Builder, const Expr *S,
1273 Address MCDCCondBitmapAddr) {
1274 if (!canEmitMCDCCoverage(Builder) || !RegionMCDCState)
1275 return;
1277 S = S->IgnoreParens();
1279 if (!RegionMCDCState->DecisionByStmt.contains(S))
1280 return;
1282 // Emit intrinsic that resets a dedicated temporary value on the stack to 0.
1283 Builder.CreateStore(Builder.getInt32(0), MCDCCondBitmapAddr);
1286 void CodeGenPGO::emitMCDCCondBitmapUpdate(CGBuilderTy &Builder, const Expr *S,
1287 Address MCDCCondBitmapAddr,
1288 llvm::Value *Val,
1289 CodeGenFunction &CGF) {
1290 if (!canEmitMCDCCoverage(Builder) || !RegionMCDCState)
1291 return;
1293 // Even though, for simplicity, parentheses and unary logical-NOT operators
1294 // are considered part of their underlying condition for both MC/DC and
1295 // branch coverage, the condition IDs themselves are assigned and tracked
1296 // using the underlying condition itself. This is done solely for
1297 // consistency since parentheses and logical-NOTs are ignored when checking
1298 // whether the condition is actually an instrumentable condition. This can
1299 // also make debugging a bit easier.
1300 S = CodeGenFunction::stripCond(S);
1302 auto BranchStateIter = RegionMCDCState->BranchByStmt.find(S);
1303 if (BranchStateIter == RegionMCDCState->BranchByStmt.end())
1304 return;
1306 // Extract the ID of the condition we are setting in the bitmap.
1307 const auto &Branch = BranchStateIter->second;
1308 assert(Branch.ID >= 0 && "Condition has no ID!");
1309 assert(Branch.DecisionStmt);
1311 // Cancel the emission if the Decision is erased after the allocation.
1312 const auto DecisionIter =
1313 RegionMCDCState->DecisionByStmt.find(Branch.DecisionStmt);
1314 if (DecisionIter == RegionMCDCState->DecisionByStmt.end())
1315 return;
1317 const auto &TVIdxs = DecisionIter->second.Indices[Branch.ID];
1319 auto *CurTV = Builder.CreateLoad(MCDCCondBitmapAddr,
1320 "mcdc." + Twine(Branch.ID + 1) + ".cur");
1321 auto *NewTV = Builder.CreateAdd(CurTV, Builder.getInt32(TVIdxs[true]));
1322 NewTV = Builder.CreateSelect(
1323 Val, NewTV, Builder.CreateAdd(CurTV, Builder.getInt32(TVIdxs[false])));
1324 Builder.CreateStore(NewTV, MCDCCondBitmapAddr);
1327 void CodeGenPGO::setValueProfilingFlag(llvm::Module &M) {
1328 if (CGM.getCodeGenOpts().hasProfileClangInstr())
1329 M.addModuleFlag(llvm::Module::Warning, "EnableValueProfiling",
1330 uint32_t(EnableValueProfiling));
1333 void CodeGenPGO::setProfileVersion(llvm::Module &M) {
1334 if (CGM.getCodeGenOpts().hasProfileClangInstr() &&
1335 llvm::EnableSingleByteCoverage) {
1336 const StringRef VarName(INSTR_PROF_QUOTE(INSTR_PROF_RAW_VERSION_VAR));
1337 llvm::Type *IntTy64 = llvm::Type::getInt64Ty(M.getContext());
1338 uint64_t ProfileVersion =
1339 (INSTR_PROF_RAW_VERSION | VARIANT_MASK_BYTE_COVERAGE);
1341 auto IRLevelVersionVariable = new llvm::GlobalVariable(
1342 M, IntTy64, true, llvm::GlobalValue::WeakAnyLinkage,
1343 llvm::Constant::getIntegerValue(IntTy64,
1344 llvm::APInt(64, ProfileVersion)),
1345 VarName);
1347 IRLevelVersionVariable->setVisibility(llvm::GlobalValue::HiddenVisibility);
1348 llvm::Triple TT(M.getTargetTriple());
1349 if (TT.supportsCOMDAT()) {
1350 IRLevelVersionVariable->setLinkage(llvm::GlobalValue::ExternalLinkage);
1351 IRLevelVersionVariable->setComdat(M.getOrInsertComdat(VarName));
1353 IRLevelVersionVariable->setDSOLocal(true);
1357 // This method either inserts a call to the profile run-time during
1358 // instrumentation or puts profile data into metadata for PGO use.
1359 void CodeGenPGO::valueProfile(CGBuilderTy &Builder, uint32_t ValueKind,
1360 llvm::Instruction *ValueSite, llvm::Value *ValuePtr) {
1362 if (!EnableValueProfiling)
1363 return;
1365 if (!ValuePtr || !ValueSite || !Builder.GetInsertBlock())
1366 return;
1368 if (isa<llvm::Constant>(ValuePtr))
1369 return;
1371 bool InstrumentValueSites = CGM.getCodeGenOpts().hasProfileClangInstr();
1372 if (InstrumentValueSites && RegionCounterMap) {
1373 auto BuilderInsertPoint = Builder.saveIP();
1374 Builder.SetInsertPoint(ValueSite);
1375 llvm::Value *Args[5] = {
1376 FuncNameVar,
1377 Builder.getInt64(FunctionHash),
1378 Builder.CreatePtrToInt(ValuePtr, Builder.getInt64Ty()),
1379 Builder.getInt32(ValueKind),
1380 Builder.getInt32(NumValueSites[ValueKind]++)
1382 Builder.CreateCall(
1383 CGM.getIntrinsic(llvm::Intrinsic::instrprof_value_profile), Args);
1384 Builder.restoreIP(BuilderInsertPoint);
1385 return;
1388 llvm::IndexedInstrProfReader *PGOReader = CGM.getPGOReader();
1389 if (PGOReader && haveRegionCounts()) {
1390 // We record the top most called three functions at each call site.
1391 // Profile metadata contains "VP" string identifying this metadata
1392 // as value profiling data, then a uint32_t value for the value profiling
1393 // kind, a uint64_t value for the total number of times the call is
1394 // executed, followed by the function hash and execution count (uint64_t)
1395 // pairs for each function.
1396 if (NumValueSites[ValueKind] >= ProfRecord->getNumValueSites(ValueKind))
1397 return;
1399 llvm::annotateValueSite(CGM.getModule(), *ValueSite, *ProfRecord,
1400 (llvm::InstrProfValueKind)ValueKind,
1401 NumValueSites[ValueKind]);
1403 NumValueSites[ValueKind]++;
1407 void CodeGenPGO::loadRegionCounts(llvm::IndexedInstrProfReader *PGOReader,
1408 bool IsInMainFile) {
1409 CGM.getPGOStats().addVisited(IsInMainFile);
1410 RegionCounts.clear();
1411 llvm::Expected<llvm::InstrProfRecord> RecordExpected =
1412 PGOReader->getInstrProfRecord(FuncName, FunctionHash);
1413 if (auto E = RecordExpected.takeError()) {
1414 auto IPE = std::get<0>(llvm::InstrProfError::take(std::move(E)));
1415 if (IPE == llvm::instrprof_error::unknown_function)
1416 CGM.getPGOStats().addMissing(IsInMainFile);
1417 else if (IPE == llvm::instrprof_error::hash_mismatch)
1418 CGM.getPGOStats().addMismatched(IsInMainFile);
1419 else if (IPE == llvm::instrprof_error::malformed)
1420 // TODO: Consider a more specific warning for this case.
1421 CGM.getPGOStats().addMismatched(IsInMainFile);
1422 return;
1424 ProfRecord =
1425 std::make_unique<llvm::InstrProfRecord>(std::move(RecordExpected.get()));
1426 RegionCounts = ProfRecord->Counts;
1429 /// Calculate what to divide by to scale weights.
1431 /// Given the maximum weight, calculate a divisor that will scale all the
1432 /// weights to strictly less than UINT32_MAX.
1433 static uint64_t calculateWeightScale(uint64_t MaxWeight) {
1434 return MaxWeight < UINT32_MAX ? 1 : MaxWeight / UINT32_MAX + 1;
1437 /// Scale an individual branch weight (and add 1).
1439 /// Scale a 64-bit weight down to 32-bits using \c Scale.
1441 /// According to Laplace's Rule of Succession, it is better to compute the
1442 /// weight based on the count plus 1, so universally add 1 to the value.
1444 /// \pre \c Scale was calculated by \a calculateWeightScale() with a weight no
1445 /// greater than \c Weight.
1446 static uint32_t scaleBranchWeight(uint64_t Weight, uint64_t Scale) {
1447 assert(Scale && "scale by 0?");
1448 uint64_t Scaled = Weight / Scale + 1;
1449 assert(Scaled <= UINT32_MAX && "overflow 32-bits");
1450 return Scaled;
1453 llvm::MDNode *CodeGenFunction::createProfileWeights(uint64_t TrueCount,
1454 uint64_t FalseCount) const {
1455 // Check for empty weights.
1456 if (!TrueCount && !FalseCount)
1457 return nullptr;
1459 // Calculate how to scale down to 32-bits.
1460 uint64_t Scale = calculateWeightScale(std::max(TrueCount, FalseCount));
1462 llvm::MDBuilder MDHelper(CGM.getLLVMContext());
1463 return MDHelper.createBranchWeights(scaleBranchWeight(TrueCount, Scale),
1464 scaleBranchWeight(FalseCount, Scale));
1467 llvm::MDNode *
1468 CodeGenFunction::createProfileWeights(ArrayRef<uint64_t> Weights) const {
1469 // We need at least two elements to create meaningful weights.
1470 if (Weights.size() < 2)
1471 return nullptr;
1473 // Check for empty weights.
1474 uint64_t MaxWeight = *std::max_element(Weights.begin(), Weights.end());
1475 if (MaxWeight == 0)
1476 return nullptr;
1478 // Calculate how to scale down to 32-bits.
1479 uint64_t Scale = calculateWeightScale(MaxWeight);
1481 SmallVector<uint32_t, 16> ScaledWeights;
1482 ScaledWeights.reserve(Weights.size());
1483 for (uint64_t W : Weights)
1484 ScaledWeights.push_back(scaleBranchWeight(W, Scale));
1486 llvm::MDBuilder MDHelper(CGM.getLLVMContext());
1487 return MDHelper.createBranchWeights(ScaledWeights);
1490 llvm::MDNode *
1491 CodeGenFunction::createProfileWeightsForLoop(const Stmt *Cond,
1492 uint64_t LoopCount) const {
1493 if (!PGO.haveRegionCounts())
1494 return nullptr;
1495 std::optional<uint64_t> CondCount = PGO.getStmtCount(Cond);
1496 if (!CondCount || *CondCount == 0)
1497 return nullptr;
1498 return createProfileWeights(LoopCount,
1499 std::max(*CondCount, LoopCount) - LoopCount);