1 //===-- InstrProfiling.cpp - Frontend instrumentation based profiling -----===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 // This pass lowers instrprof_* intrinsics emitted by a frontend for profiling.
10 // It also builds the data structures and initialization code needed for
11 // updating execution counts and emitting the profile at runtime.
13 //===----------------------------------------------------------------------===//
15 #include "llvm/Transforms/Instrumentation/InstrProfiling.h"
16 #include "llvm/ADT/ArrayRef.h"
17 #include "llvm/ADT/SmallVector.h"
18 #include "llvm/ADT/StringRef.h"
19 #include "llvm/ADT/Triple.h"
20 #include "llvm/ADT/Twine.h"
21 #include "llvm/Analysis/LoopInfo.h"
22 #include "llvm/Analysis/TargetLibraryInfo.h"
23 #include "llvm/IR/Attributes.h"
24 #include "llvm/IR/BasicBlock.h"
25 #include "llvm/IR/Constant.h"
26 #include "llvm/IR/Constants.h"
27 #include "llvm/IR/DerivedTypes.h"
28 #include "llvm/IR/Dominators.h"
29 #include "llvm/IR/Function.h"
30 #include "llvm/IR/GlobalValue.h"
31 #include "llvm/IR/GlobalVariable.h"
32 #include "llvm/IR/IRBuilder.h"
33 #include "llvm/IR/Instruction.h"
34 #include "llvm/IR/Instructions.h"
35 #include "llvm/IR/IntrinsicInst.h"
36 #include "llvm/IR/Module.h"
37 #include "llvm/IR/Type.h"
38 #include "llvm/Pass.h"
39 #include "llvm/ProfileData/InstrProf.h"
40 #include "llvm/Support/Casting.h"
41 #include "llvm/Support/CommandLine.h"
42 #include "llvm/Support/Error.h"
43 #include "llvm/Support/ErrorHandling.h"
44 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
45 #include "llvm/Transforms/Utils/ModuleUtils.h"
46 #include "llvm/Transforms/Utils/SSAUpdater.h"
55 #define DEBUG_TYPE "instrprof"
57 // The start and end values of precise value profile range for memory
59 cl::opt
<std::string
> MemOPSizeRange(
61 cl::desc("Set the range of size in memory intrinsic calls to be profiled "
62 "precisely, in a format of <start_val>:<end_val>"),
65 // The value that considered to be large value in memory intrinsic.
66 cl::opt
<unsigned> MemOPSizeLarge(
68 cl::desc("Set large value thresthold in memory intrinsic size profiling. "
69 "Value of 0 disables the large value profiling."),
74 cl::opt
<bool> DoNameCompression("enable-name-compression",
75 cl::desc("Enable name string compression"),
78 cl::opt
<bool> DoHashBasedCounterSplit(
79 "hash-based-counter-split",
80 cl::desc("Rename counter variable of a comdat function based on cfg hash"),
83 cl::opt
<bool> ValueProfileStaticAlloc(
85 cl::desc("Do static counter allocation for value profiler"),
88 cl::opt
<double> NumCountersPerValueSite(
89 "vp-counters-per-site",
90 cl::desc("The average number of profile counters allocated "
91 "per value profiling site."),
92 // This is set to a very small value because in real programs, only
93 // a very small percentage of value sites have non-zero targets, e.g, 1/30.
94 // For those sites with non-zero profile, the average number of targets
95 // is usually smaller than 2.
98 cl::opt
<bool> AtomicCounterUpdateAll(
99 "instrprof-atomic-counter-update-all", cl::ZeroOrMore
,
100 cl::desc("Make all profile counter updates atomic (for testing only)"),
103 cl::opt
<bool> AtomicCounterUpdatePromoted(
104 "atomic-counter-update-promoted", cl::ZeroOrMore
,
105 cl::desc("Do counter update using atomic fetch add "
106 " for promoted counters only"),
109 // If the option is not specified, the default behavior about whether
110 // counter promotion is done depends on how instrumentaiton lowering
111 // pipeline is setup, i.e., the default value of true of this option
112 // does not mean the promotion will be done by default. Explicitly
113 // setting this option can override the default behavior.
114 cl::opt
<bool> DoCounterPromotion("do-counter-promotion", cl::ZeroOrMore
,
115 cl::desc("Do counter register promotion"),
117 cl::opt
<unsigned> MaxNumOfPromotionsPerLoop(
118 cl::ZeroOrMore
, "max-counter-promotions-per-loop", cl::init(20),
119 cl::desc("Max number counter promotions per loop to avoid"
120 " increasing register pressure too much"));
124 MaxNumOfPromotions(cl::ZeroOrMore
, "max-counter-promotions", cl::init(-1),
125 cl::desc("Max number of allowed counter promotions"));
127 cl::opt
<unsigned> SpeculativeCounterPromotionMaxExiting(
128 cl::ZeroOrMore
, "speculative-counter-promotion-max-exiting", cl::init(3),
129 cl::desc("The max number of exiting blocks of a loop to allow "
130 " speculative counter promotion"));
132 cl::opt
<bool> SpeculativeCounterPromotionToLoop(
133 cl::ZeroOrMore
, "speculative-counter-promotion-to-loop", cl::init(false),
134 cl::desc("When the option is false, if the target block is in a loop, "
135 "the promotion will be disallowed unless the promoted counter "
136 " update can be further/iteratively promoted into an acyclic "
139 cl::opt
<bool> IterativeCounterPromotion(
140 cl::ZeroOrMore
, "iterative-counter-promotion", cl::init(true),
141 cl::desc("Allow counter promotion across the whole loop nest."));
143 class InstrProfilingLegacyPass
: public ModulePass
{
144 InstrProfiling InstrProf
;
149 InstrProfilingLegacyPass() : ModulePass(ID
) {}
150 InstrProfilingLegacyPass(const InstrProfOptions
&Options
)
151 : ModulePass(ID
), InstrProf(Options
) {}
153 StringRef
getPassName() const override
{
154 return "Frontend instrumentation-based coverage lowering";
157 bool runOnModule(Module
&M
) override
{
158 return InstrProf
.run(M
, getAnalysis
<TargetLibraryInfoWrapperPass
>().getTLI());
161 void getAnalysisUsage(AnalysisUsage
&AU
) const override
{
162 AU
.setPreservesCFG();
163 AU
.addRequired
<TargetLibraryInfoWrapperPass
>();
168 /// A helper class to promote one counter RMW operation in the loop
169 /// into register update.
171 /// RWM update for the counter will be sinked out of the loop after
172 /// the transformation.
174 class PGOCounterPromoterHelper
: public LoadAndStorePromoter
{
176 PGOCounterPromoterHelper(
177 Instruction
*L
, Instruction
*S
, SSAUpdater
&SSA
, Value
*Init
,
178 BasicBlock
*PH
, ArrayRef
<BasicBlock
*> ExitBlocks
,
179 ArrayRef
<Instruction
*> InsertPts
,
180 DenseMap
<Loop
*, SmallVector
<LoadStorePair
, 8>> &LoopToCands
,
182 : LoadAndStorePromoter({L
, S
}, SSA
), Store(S
), ExitBlocks(ExitBlocks
),
183 InsertPts(InsertPts
), LoopToCandidates(LoopToCands
), LI(LI
) {
184 assert(isa
<LoadInst
>(L
));
185 assert(isa
<StoreInst
>(S
));
186 SSA
.AddAvailableValue(PH
, Init
);
189 void doExtraRewritesBeforeFinalDeletion() override
{
190 for (unsigned i
= 0, e
= ExitBlocks
.size(); i
!= e
; ++i
) {
191 BasicBlock
*ExitBlock
= ExitBlocks
[i
];
192 Instruction
*InsertPos
= InsertPts
[i
];
193 // Get LiveIn value into the ExitBlock. If there are multiple
194 // predecessors, the value is defined by a PHI node in this
196 Value
*LiveInValue
= SSA
.GetValueInMiddleOfBlock(ExitBlock
);
197 Value
*Addr
= cast
<StoreInst
>(Store
)->getPointerOperand();
198 Type
*Ty
= LiveInValue
->getType();
199 IRBuilder
<> Builder(InsertPos
);
200 if (AtomicCounterUpdatePromoted
)
201 // automic update currently can only be promoted across the current
202 // loop, not the whole loop nest.
203 Builder
.CreateAtomicRMW(AtomicRMWInst::Add
, Addr
, LiveInValue
,
204 AtomicOrdering::SequentiallyConsistent
);
206 LoadInst
*OldVal
= Builder
.CreateLoad(Ty
, Addr
, "pgocount.promoted");
207 auto *NewVal
= Builder
.CreateAdd(OldVal
, LiveInValue
);
208 auto *NewStore
= Builder
.CreateStore(NewVal
, Addr
);
210 // Now update the parent loop's candidate list:
211 if (IterativeCounterPromotion
) {
212 auto *TargetLoop
= LI
.getLoopFor(ExitBlock
);
214 LoopToCandidates
[TargetLoop
].emplace_back(OldVal
, NewStore
);
222 ArrayRef
<BasicBlock
*> ExitBlocks
;
223 ArrayRef
<Instruction
*> InsertPts
;
224 DenseMap
<Loop
*, SmallVector
<LoadStorePair
, 8>> &LoopToCandidates
;
228 /// A helper class to do register promotion for all profile counter
229 /// updates in a loop.
231 class PGOCounterPromoter
{
234 DenseMap
<Loop
*, SmallVector
<LoadStorePair
, 8>> &LoopToCands
,
235 Loop
&CurLoop
, LoopInfo
&LI
)
236 : LoopToCandidates(LoopToCands
), ExitBlocks(), InsertPts(), L(CurLoop
),
239 SmallVector
<BasicBlock
*, 8> LoopExitBlocks
;
240 SmallPtrSet
<BasicBlock
*, 8> BlockSet
;
241 L
.getExitBlocks(LoopExitBlocks
);
243 for (BasicBlock
*ExitBlock
: LoopExitBlocks
) {
244 if (BlockSet
.insert(ExitBlock
).second
) {
245 ExitBlocks
.push_back(ExitBlock
);
246 InsertPts
.push_back(&*ExitBlock
->getFirstInsertionPt());
251 bool run(int64_t *NumPromoted
) {
252 // Skip 'infinite' loops:
253 if (ExitBlocks
.size() == 0)
255 unsigned MaxProm
= getMaxNumOfPromotionsInLoop(&L
);
259 unsigned Promoted
= 0;
260 for (auto &Cand
: LoopToCandidates
[&L
]) {
262 SmallVector
<PHINode
*, 4> NewPHIs
;
263 SSAUpdater
SSA(&NewPHIs
);
264 Value
*InitVal
= ConstantInt::get(Cand
.first
->getType(), 0);
266 PGOCounterPromoterHelper
Promoter(Cand
.first
, Cand
.second
, SSA
, InitVal
,
267 L
.getLoopPreheader(), ExitBlocks
,
268 InsertPts
, LoopToCandidates
, LI
);
269 Promoter
.run(SmallVector
<Instruction
*, 2>({Cand
.first
, Cand
.second
}));
271 if (Promoted
>= MaxProm
)
275 if (MaxNumOfPromotions
!= -1 && *NumPromoted
>= MaxNumOfPromotions
)
279 LLVM_DEBUG(dbgs() << Promoted
<< " counters promoted for loop (depth="
280 << L
.getLoopDepth() << ")\n");
281 return Promoted
!= 0;
285 bool allowSpeculativeCounterPromotion(Loop
*LP
) {
286 SmallVector
<BasicBlock
*, 8> ExitingBlocks
;
287 L
.getExitingBlocks(ExitingBlocks
);
288 // Not considierered speculative.
289 if (ExitingBlocks
.size() == 1)
291 if (ExitingBlocks
.size() > SpeculativeCounterPromotionMaxExiting
)
296 // Returns the max number of Counter Promotions for LP.
297 unsigned getMaxNumOfPromotionsInLoop(Loop
*LP
) {
298 // We can't insert into a catchswitch.
299 SmallVector
<BasicBlock
*, 8> LoopExitBlocks
;
300 LP
->getExitBlocks(LoopExitBlocks
);
301 if (llvm::any_of(LoopExitBlocks
, [](BasicBlock
*Exit
) {
302 return isa
<CatchSwitchInst
>(Exit
->getTerminator());
306 if (!LP
->hasDedicatedExits())
309 BasicBlock
*PH
= LP
->getLoopPreheader();
313 SmallVector
<BasicBlock
*, 8> ExitingBlocks
;
314 LP
->getExitingBlocks(ExitingBlocks
);
315 // Not considierered speculative.
316 if (ExitingBlocks
.size() == 1)
317 return MaxNumOfPromotionsPerLoop
;
319 if (ExitingBlocks
.size() > SpeculativeCounterPromotionMaxExiting
)
322 // Whether the target block is in a loop does not matter:
323 if (SpeculativeCounterPromotionToLoop
)
324 return MaxNumOfPromotionsPerLoop
;
326 // Now check the target block:
327 unsigned MaxProm
= MaxNumOfPromotionsPerLoop
;
328 for (auto *TargetBlock
: LoopExitBlocks
) {
329 auto *TargetLoop
= LI
.getLoopFor(TargetBlock
);
332 unsigned MaxPromForTarget
= getMaxNumOfPromotionsInLoop(TargetLoop
);
333 unsigned PendingCandsInTarget
= LoopToCandidates
[TargetLoop
].size();
335 std::min(MaxProm
, std::max(MaxPromForTarget
, PendingCandsInTarget
) -
336 PendingCandsInTarget
);
341 DenseMap
<Loop
*, SmallVector
<LoadStorePair
, 8>> &LoopToCandidates
;
342 SmallVector
<BasicBlock
*, 8> ExitBlocks
;
343 SmallVector
<Instruction
*, 8> InsertPts
;
348 } // end anonymous namespace
350 PreservedAnalyses
InstrProfiling::run(Module
&M
, ModuleAnalysisManager
&AM
) {
351 auto &TLI
= AM
.getResult
<TargetLibraryAnalysis
>(M
);
353 return PreservedAnalyses::all();
355 return PreservedAnalyses::none();
358 char InstrProfilingLegacyPass::ID
= 0;
359 INITIALIZE_PASS_BEGIN(
360 InstrProfilingLegacyPass
, "instrprof",
361 "Frontend instrumentation-based coverage lowering.", false, false)
362 INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass
)
364 InstrProfilingLegacyPass
, "instrprof",
365 "Frontend instrumentation-based coverage lowering.", false, false)
368 llvm::createInstrProfilingLegacyPass(const InstrProfOptions
&Options
) {
369 return new InstrProfilingLegacyPass(Options
);
372 static InstrProfIncrementInst
*castToIncrementInst(Instruction
*Instr
) {
373 InstrProfIncrementInst
*Inc
= dyn_cast
<InstrProfIncrementInstStep
>(Instr
);
376 return dyn_cast
<InstrProfIncrementInst
>(Instr
);
379 bool InstrProfiling::lowerIntrinsics(Function
*F
) {
380 bool MadeChange
= false;
381 PromotionCandidates
.clear();
382 for (BasicBlock
&BB
: *F
) {
383 for (auto I
= BB
.begin(), E
= BB
.end(); I
!= E
;) {
385 InstrProfIncrementInst
*Inc
= castToIncrementInst(&*Instr
);
389 } else if (auto *Ind
= dyn_cast
<InstrProfValueProfileInst
>(Instr
)) {
390 lowerValueProfileInst(Ind
);
399 promoteCounterLoadStores(F
);
403 bool InstrProfiling::isCounterPromotionEnabled() const {
404 if (DoCounterPromotion
.getNumOccurrences() > 0)
405 return DoCounterPromotion
;
407 return Options
.DoCounterPromotion
;
410 void InstrProfiling::promoteCounterLoadStores(Function
*F
) {
411 if (!isCounterPromotionEnabled())
414 DominatorTree
DT(*F
);
416 DenseMap
<Loop
*, SmallVector
<LoadStorePair
, 8>> LoopPromotionCandidates
;
418 for (const auto &LoadStore
: PromotionCandidates
) {
419 auto *CounterLoad
= LoadStore
.first
;
420 auto *CounterStore
= LoadStore
.second
;
421 BasicBlock
*BB
= CounterLoad
->getParent();
422 Loop
*ParentLoop
= LI
.getLoopFor(BB
);
425 LoopPromotionCandidates
[ParentLoop
].emplace_back(CounterLoad
, CounterStore
);
428 SmallVector
<Loop
*, 4> Loops
= LI
.getLoopsInPreorder();
430 // Do a post-order traversal of the loops so that counter updates can be
431 // iteratively hoisted outside the loop nest.
432 for (auto *Loop
: llvm::reverse(Loops
)) {
433 PGOCounterPromoter
Promoter(LoopPromotionCandidates
, *Loop
, LI
);
434 Promoter
.run(&TotalCountersPromoted
);
438 /// Check if the module contains uses of any profiling intrinsics.
439 static bool containsProfilingIntrinsics(Module
&M
) {
440 if (auto *F
= M
.getFunction(
441 Intrinsic::getName(llvm::Intrinsic::instrprof_increment
)))
444 if (auto *F
= M
.getFunction(
445 Intrinsic::getName(llvm::Intrinsic::instrprof_increment_step
)))
448 if (auto *F
= M
.getFunction(
449 Intrinsic::getName(llvm::Intrinsic::instrprof_value_profile
)))
455 bool InstrProfiling::run(Module
&M
, const TargetLibraryInfo
&TLI
) {
460 ProfileDataMap
.clear();
462 getMemOPSizeRangeFromOption(MemOPSizeRange
, MemOPSizeRangeStart
,
464 TT
= Triple(M
.getTargetTriple());
466 // Emit the runtime hook even if no counters are present.
467 bool MadeChange
= emitRuntimeHook();
469 // Improve compile time by avoiding linear scans when there is no work.
470 GlobalVariable
*CoverageNamesVar
=
471 M
.getNamedGlobal(getCoverageUnusedNamesVarName());
472 if (!containsProfilingIntrinsics(M
) && !CoverageNamesVar
)
475 // We did not know how many value sites there would be inside
476 // the instrumented function. This is counting the number of instrumented
477 // target value sites to enter it as field in the profile data variable.
478 for (Function
&F
: M
) {
479 InstrProfIncrementInst
*FirstProfIncInst
= nullptr;
480 for (BasicBlock
&BB
: F
)
481 for (auto I
= BB
.begin(), E
= BB
.end(); I
!= E
; I
++)
482 if (auto *Ind
= dyn_cast
<InstrProfValueProfileInst
>(I
))
483 computeNumValueSiteCounts(Ind
);
484 else if (FirstProfIncInst
== nullptr)
485 FirstProfIncInst
= dyn_cast
<InstrProfIncrementInst
>(I
);
487 // Value profiling intrinsic lowering requires per-function profile data
488 // variable to be created first.
489 if (FirstProfIncInst
!= nullptr)
490 static_cast<void>(getOrCreateRegionCounters(FirstProfIncInst
));
493 for (Function
&F
: M
)
494 MadeChange
|= lowerIntrinsics(&F
);
496 if (CoverageNamesVar
) {
497 lowerCoverageData(CoverageNamesVar
);
508 emitInitialization();
512 static FunctionCallee
513 getOrInsertValueProfilingCall(Module
&M
, const TargetLibraryInfo
&TLI
,
514 bool IsRange
= false) {
515 LLVMContext
&Ctx
= M
.getContext();
516 auto *ReturnTy
= Type::getVoidTy(M
.getContext());
519 if (auto AK
= TLI
.getExtAttrForI32Param(false))
520 AL
= AL
.addParamAttribute(M
.getContext(), 2, AK
);
523 Type
*ParamTypes
[] = {
524 #define VALUE_PROF_FUNC_PARAM(ParamType, ParamName, ParamLLVMType) ParamLLVMType
525 #include "llvm/ProfileData/InstrProfData.inc"
527 auto *ValueProfilingCallTy
=
528 FunctionType::get(ReturnTy
, makeArrayRef(ParamTypes
), false);
529 return M
.getOrInsertFunction(getInstrProfValueProfFuncName(),
530 ValueProfilingCallTy
, AL
);
532 Type
*RangeParamTypes
[] = {
533 #define VALUE_RANGE_PROF 1
534 #define VALUE_PROF_FUNC_PARAM(ParamType, ParamName, ParamLLVMType) ParamLLVMType
535 #include "llvm/ProfileData/InstrProfData.inc"
536 #undef VALUE_RANGE_PROF
538 auto *ValueRangeProfilingCallTy
=
539 FunctionType::get(ReturnTy
, makeArrayRef(RangeParamTypes
), false);
540 return M
.getOrInsertFunction(getInstrProfValueRangeProfFuncName(),
541 ValueRangeProfilingCallTy
, AL
);
545 void InstrProfiling::computeNumValueSiteCounts(InstrProfValueProfileInst
*Ind
) {
546 GlobalVariable
*Name
= Ind
->getName();
547 uint64_t ValueKind
= Ind
->getValueKind()->getZExtValue();
548 uint64_t Index
= Ind
->getIndex()->getZExtValue();
549 auto It
= ProfileDataMap
.find(Name
);
550 if (It
== ProfileDataMap
.end()) {
551 PerFunctionProfileData PD
;
552 PD
.NumValueSites
[ValueKind
] = Index
+ 1;
553 ProfileDataMap
[Name
] = PD
;
554 } else if (It
->second
.NumValueSites
[ValueKind
] <= Index
)
555 It
->second
.NumValueSites
[ValueKind
] = Index
+ 1;
558 void InstrProfiling::lowerValueProfileInst(InstrProfValueProfileInst
*Ind
) {
559 GlobalVariable
*Name
= Ind
->getName();
560 auto It
= ProfileDataMap
.find(Name
);
561 assert(It
!= ProfileDataMap
.end() && It
->second
.DataVar
&&
562 "value profiling detected in function with no counter incerement");
564 GlobalVariable
*DataVar
= It
->second
.DataVar
;
565 uint64_t ValueKind
= Ind
->getValueKind()->getZExtValue();
566 uint64_t Index
= Ind
->getIndex()->getZExtValue();
567 for (uint32_t Kind
= IPVK_First
; Kind
< ValueKind
; ++Kind
)
568 Index
+= It
->second
.NumValueSites
[Kind
];
570 IRBuilder
<> Builder(Ind
);
571 bool IsRange
= (Ind
->getValueKind()->getZExtValue() ==
572 llvm::InstrProfValueKind::IPVK_MemOPSize
);
573 CallInst
*Call
= nullptr;
575 Value
*Args
[3] = {Ind
->getTargetValue(),
576 Builder
.CreateBitCast(DataVar
, Builder
.getInt8PtrTy()),
577 Builder
.getInt32(Index
)};
578 Call
= Builder
.CreateCall(getOrInsertValueProfilingCall(*M
, *TLI
), Args
);
581 Ind
->getTargetValue(),
582 Builder
.CreateBitCast(DataVar
, Builder
.getInt8PtrTy()),
583 Builder
.getInt32(Index
),
584 Builder
.getInt64(MemOPSizeRangeStart
),
585 Builder
.getInt64(MemOPSizeRangeLast
),
586 Builder
.getInt64(MemOPSizeLarge
== 0 ? INT64_MIN
: MemOPSizeLarge
)};
588 Builder
.CreateCall(getOrInsertValueProfilingCall(*M
, *TLI
, true), Args
);
590 if (auto AK
= TLI
->getExtAttrForI32Param(false))
591 Call
->addParamAttr(2, AK
);
592 Ind
->replaceAllUsesWith(Call
);
593 Ind
->eraseFromParent();
596 void InstrProfiling::lowerIncrement(InstrProfIncrementInst
*Inc
) {
597 GlobalVariable
*Counters
= getOrCreateRegionCounters(Inc
);
599 IRBuilder
<> Builder(Inc
);
600 uint64_t Index
= Inc
->getIndex()->getZExtValue();
601 Value
*Addr
= Builder
.CreateConstInBoundsGEP2_64(Counters
->getValueType(),
604 if (Options
.Atomic
|| AtomicCounterUpdateAll
) {
605 Builder
.CreateAtomicRMW(AtomicRMWInst::Add
, Addr
, Inc
->getStep(),
606 AtomicOrdering::Monotonic
);
608 Value
*IncStep
= Inc
->getStep();
609 Value
*Load
= Builder
.CreateLoad(IncStep
->getType(), Addr
, "pgocount");
610 auto *Count
= Builder
.CreateAdd(Load
, Inc
->getStep());
611 auto *Store
= Builder
.CreateStore(Count
, Addr
);
612 if (isCounterPromotionEnabled())
613 PromotionCandidates
.emplace_back(cast
<Instruction
>(Load
), Store
);
615 Inc
->eraseFromParent();
618 void InstrProfiling::lowerCoverageData(GlobalVariable
*CoverageNamesVar
) {
619 ConstantArray
*Names
=
620 cast
<ConstantArray
>(CoverageNamesVar
->getInitializer());
621 for (unsigned I
= 0, E
= Names
->getNumOperands(); I
< E
; ++I
) {
622 Constant
*NC
= Names
->getOperand(I
);
623 Value
*V
= NC
->stripPointerCasts();
624 assert(isa
<GlobalVariable
>(V
) && "Missing reference to function name");
625 GlobalVariable
*Name
= cast
<GlobalVariable
>(V
);
627 Name
->setLinkage(GlobalValue::PrivateLinkage
);
628 ReferencedNames
.push_back(Name
);
629 NC
->dropAllReferences();
631 CoverageNamesVar
->eraseFromParent();
634 /// Get the name of a profiling variable for a particular function.
635 static std::string
getVarName(InstrProfIncrementInst
*Inc
, StringRef Prefix
) {
636 StringRef NamePrefix
= getInstrProfNameVarPrefix();
637 StringRef Name
= Inc
->getName()->getName().substr(NamePrefix
.size());
638 Function
*F
= Inc
->getParent()->getParent();
639 Module
*M
= F
->getParent();
640 if (!DoHashBasedCounterSplit
|| !isIRPGOFlagSet(M
) ||
641 !canRenameComdatFunc(*F
))
642 return (Prefix
+ Name
).str();
643 uint64_t FuncHash
= Inc
->getHash()->getZExtValue();
644 SmallVector
<char, 24> HashPostfix
;
645 if (Name
.endswith((Twine(".") + Twine(FuncHash
)).toStringRef(HashPostfix
)))
646 return (Prefix
+ Name
).str();
647 return (Prefix
+ Name
+ "." + Twine(FuncHash
)).str();
650 static inline bool shouldRecordFunctionAddr(Function
*F
) {
652 bool HasAvailableExternallyLinkage
= F
->hasAvailableExternallyLinkage();
653 if (!F
->hasLinkOnceLinkage() && !F
->hasLocalLinkage() &&
654 !HasAvailableExternallyLinkage
)
657 // A function marked 'alwaysinline' with available_externally linkage can't
658 // have its address taken. Doing so would create an undefined external ref to
659 // the function, which would fail to link.
660 if (HasAvailableExternallyLinkage
&&
661 F
->hasFnAttribute(Attribute::AlwaysInline
))
664 // Prohibit function address recording if the function is both internal and
665 // COMDAT. This avoids the profile data variable referencing internal symbols
667 if (F
->hasLocalLinkage() && F
->hasComdat())
670 // Check uses of this function for other than direct calls or invokes to it.
671 // Inline virtual functions have linkeOnceODR linkage. When a key method
672 // exists, the vtable will only be emitted in the TU where the key method
673 // is defined. In a TU where vtable is not available, the function won't
674 // be 'addresstaken'. If its address is not recorded here, the profile data
675 // with missing address may be picked by the linker leading to missing
676 // indirect call target info.
677 return F
->hasAddressTaken() || F
->hasLinkOnceLinkage();
680 static bool needsRuntimeRegistrationOfSectionRange(const Triple
&TT
) {
681 // Don't do this for Darwin. compiler-rt uses linker magic.
685 // Use linker script magic to get data/cnts/name start/end.
686 if (TT
.isOSLinux() || TT
.isOSFreeBSD() || TT
.isOSNetBSD() ||
687 TT
.isOSFuchsia() || TT
.isPS4CPU() || TT
.isOSWindows())
694 InstrProfiling::getOrCreateRegionCounters(InstrProfIncrementInst
*Inc
) {
695 GlobalVariable
*NamePtr
= Inc
->getName();
696 auto It
= ProfileDataMap
.find(NamePtr
);
697 PerFunctionProfileData PD
;
698 if (It
!= ProfileDataMap
.end()) {
699 if (It
->second
.RegionCounters
)
700 return It
->second
.RegionCounters
;
704 // Match the linkage and visibility of the name global, except on COFF, where
705 // the linkage must be local and consequentially the visibility must be
707 Function
*Fn
= Inc
->getParent()->getParent();
708 GlobalValue::LinkageTypes Linkage
= NamePtr
->getLinkage();
709 GlobalValue::VisibilityTypes Visibility
= NamePtr
->getVisibility();
710 if (TT
.isOSBinFormatCOFF()) {
711 Linkage
= GlobalValue::InternalLinkage
;
712 Visibility
= GlobalValue::DefaultVisibility
;
715 // Move the name variable to the right section. Place them in a COMDAT group
716 // if the associated function is a COMDAT. This will make sure that only one
717 // copy of counters of the COMDAT function will be emitted after linking.
718 Comdat
*Cmdt
= nullptr;
719 GlobalValue::LinkageTypes CounterLinkage
= Linkage
;
720 if (needsComdatForCounter(*Fn
, *M
)) {
721 if (TT
.isOSBinFormatCOFF()) {
722 // There are two cases that need a comdat on COFF:
723 // 1. Functions that already have comdats (standard case)
724 // 2. available_externally functions (dllimport and C99 inline)
725 // In the first case, put all the data in the original function comdat. In
726 // the second case, create a new comdat group using the counter as the
727 // leader. It's linkage must be external, so use linkonce_odr linkage in
729 if (Comdat
*C
= Fn
->getComdat()) {
732 Cmdt
= M
->getOrInsertComdat(
733 getVarName(Inc
, getInstrProfCountersVarPrefix()));
734 CounterLinkage
= GlobalValue::LinkOnceODRLinkage
;
737 // For other platforms that use comdats (ELF), make a new comdat group for
738 // all the profile data. It will be deduplicated within the current DSO.
739 Cmdt
= M
->getOrInsertComdat(getVarName(Inc
, getInstrProfComdatPrefix()));
743 uint64_t NumCounters
= Inc
->getNumCounters()->getZExtValue();
744 LLVMContext
&Ctx
= M
->getContext();
745 ArrayType
*CounterTy
= ArrayType::get(Type::getInt64Ty(Ctx
), NumCounters
);
747 // Create the counters variable.
749 new GlobalVariable(*M
, CounterTy
, false, Linkage
,
750 Constant::getNullValue(CounterTy
),
751 getVarName(Inc
, getInstrProfCountersVarPrefix()));
752 CounterPtr
->setVisibility(Visibility
);
753 CounterPtr
->setSection(
754 getInstrProfSectionName(IPSK_cnts
, TT
.getObjectFormat()));
755 CounterPtr
->setAlignment(8);
756 CounterPtr
->setComdat(Cmdt
);
757 CounterPtr
->setLinkage(CounterLinkage
);
759 auto *Int8PtrTy
= Type::getInt8PtrTy(Ctx
);
760 // Allocate statically the array of pointers to value profile nodes for
761 // the current function.
762 Constant
*ValuesPtrExpr
= ConstantPointerNull::get(Int8PtrTy
);
763 if (ValueProfileStaticAlloc
&& !needsRuntimeRegistrationOfSectionRange(TT
)) {
765 for (uint32_t Kind
= IPVK_First
; Kind
<= IPVK_Last
; ++Kind
)
766 NS
+= PD
.NumValueSites
[Kind
];
768 ArrayType
*ValuesTy
= ArrayType::get(Type::getInt64Ty(Ctx
), NS
);
771 new GlobalVariable(*M
, ValuesTy
, false, Linkage
,
772 Constant::getNullValue(ValuesTy
),
773 getVarName(Inc
, getInstrProfValuesVarPrefix()));
774 ValuesVar
->setVisibility(Visibility
);
775 ValuesVar
->setSection(
776 getInstrProfSectionName(IPSK_vals
, TT
.getObjectFormat()));
777 ValuesVar
->setAlignment(8);
778 ValuesVar
->setComdat(Cmdt
);
780 ConstantExpr::getBitCast(ValuesVar
, Type::getInt8PtrTy(Ctx
));
784 // Create data variable.
785 auto *Int16Ty
= Type::getInt16Ty(Ctx
);
786 auto *Int16ArrayTy
= ArrayType::get(Int16Ty
, IPVK_Last
+ 1);
787 Type
*DataTypes
[] = {
788 #define INSTR_PROF_DATA(Type, LLVMType, Name, Init) LLVMType,
789 #include "llvm/ProfileData/InstrProfData.inc"
791 auto *DataTy
= StructType::get(Ctx
, makeArrayRef(DataTypes
));
793 Constant
*FunctionAddr
= shouldRecordFunctionAddr(Fn
)
794 ? ConstantExpr::getBitCast(Fn
, Int8PtrTy
)
795 : ConstantPointerNull::get(Int8PtrTy
);
797 Constant
*Int16ArrayVals
[IPVK_Last
+ 1];
798 for (uint32_t Kind
= IPVK_First
; Kind
<= IPVK_Last
; ++Kind
)
799 Int16ArrayVals
[Kind
] = ConstantInt::get(Int16Ty
, PD
.NumValueSites
[Kind
]);
801 Constant
*DataVals
[] = {
802 #define INSTR_PROF_DATA(Type, LLVMType, Name, Init) Init,
803 #include "llvm/ProfileData/InstrProfData.inc"
805 auto *Data
= new GlobalVariable(*M
, DataTy
, false, Linkage
,
806 ConstantStruct::get(DataTy
, DataVals
),
807 getVarName(Inc
, getInstrProfDataVarPrefix()));
808 Data
->setVisibility(Visibility
);
809 Data
->setSection(getInstrProfSectionName(IPSK_data
, TT
.getObjectFormat()));
810 Data
->setAlignment(INSTR_PROF_DATA_ALIGNMENT
);
811 Data
->setComdat(Cmdt
);
813 PD
.RegionCounters
= CounterPtr
;
815 ProfileDataMap
[NamePtr
] = PD
;
817 // Mark the data variable as used so that it isn't stripped out.
818 UsedVars
.push_back(Data
);
819 // Now that the linkage set by the FE has been passed to the data and counter
820 // variables, reset Name variable's linkage and visibility to private so that
821 // it can be removed later by the compiler.
822 NamePtr
->setLinkage(GlobalValue::PrivateLinkage
);
823 // Collect the referenced names to be used by emitNameData.
824 ReferencedNames
.push_back(NamePtr
);
829 void InstrProfiling::emitVNodes() {
830 if (!ValueProfileStaticAlloc
)
833 // For now only support this on platforms that do
834 // not require runtime registration to discover
835 // named section start/end.
836 if (needsRuntimeRegistrationOfSectionRange(TT
))
840 for (auto &PD
: ProfileDataMap
) {
841 for (uint32_t Kind
= IPVK_First
; Kind
<= IPVK_Last
; ++Kind
)
842 TotalNS
+= PD
.second
.NumValueSites
[Kind
];
848 uint64_t NumCounters
= TotalNS
* NumCountersPerValueSite
;
849 // Heuristic for small programs with very few total value sites.
850 // The default value of vp-counters-per-site is chosen based on
851 // the observation that large apps usually have a low percentage
852 // of value sites that actually have any profile data, and thus
853 // the average number of counters per site is low. For small
854 // apps with very few sites, this may not be true. Bump up the
855 // number of counters in this case.
856 #define INSTR_PROF_MIN_VAL_COUNTS 10
857 if (NumCounters
< INSTR_PROF_MIN_VAL_COUNTS
)
858 NumCounters
= std::max(INSTR_PROF_MIN_VAL_COUNTS
, (int)NumCounters
* 2);
860 auto &Ctx
= M
->getContext();
861 Type
*VNodeTypes
[] = {
862 #define INSTR_PROF_VALUE_NODE(Type, LLVMType, Name, Init) LLVMType,
863 #include "llvm/ProfileData/InstrProfData.inc"
865 auto *VNodeTy
= StructType::get(Ctx
, makeArrayRef(VNodeTypes
));
867 ArrayType
*VNodesTy
= ArrayType::get(VNodeTy
, NumCounters
);
868 auto *VNodesVar
= new GlobalVariable(
869 *M
, VNodesTy
, false, GlobalValue::PrivateLinkage
,
870 Constant::getNullValue(VNodesTy
), getInstrProfVNodesVarName());
871 VNodesVar
->setSection(
872 getInstrProfSectionName(IPSK_vnodes
, TT
.getObjectFormat()));
873 UsedVars
.push_back(VNodesVar
);
876 void InstrProfiling::emitNameData() {
877 std::string UncompressedData
;
879 if (ReferencedNames
.empty())
882 std::string CompressedNameStr
;
883 if (Error E
= collectPGOFuncNameStrings(ReferencedNames
, CompressedNameStr
,
884 DoNameCompression
)) {
885 report_fatal_error(toString(std::move(E
)), false);
888 auto &Ctx
= M
->getContext();
889 auto *NamesVal
= ConstantDataArray::getString(
890 Ctx
, StringRef(CompressedNameStr
), false);
891 NamesVar
= new GlobalVariable(*M
, NamesVal
->getType(), true,
892 GlobalValue::PrivateLinkage
, NamesVal
,
893 getInstrProfNamesVarName());
894 NamesSize
= CompressedNameStr
.size();
895 NamesVar
->setSection(
896 getInstrProfSectionName(IPSK_name
, TT
.getObjectFormat()));
897 // On COFF, it's important to reduce the alignment down to 1 to prevent the
898 // linker from inserting padding before the start of the names section or
899 // between names entries.
900 NamesVar
->setAlignment(1);
901 UsedVars
.push_back(NamesVar
);
903 for (auto *NamePtr
: ReferencedNames
)
904 NamePtr
->eraseFromParent();
907 void InstrProfiling::emitRegistration() {
908 if (!needsRuntimeRegistrationOfSectionRange(TT
))
911 // Construct the function.
912 auto *VoidTy
= Type::getVoidTy(M
->getContext());
913 auto *VoidPtrTy
= Type::getInt8PtrTy(M
->getContext());
914 auto *Int64Ty
= Type::getInt64Ty(M
->getContext());
915 auto *RegisterFTy
= FunctionType::get(VoidTy
, false);
916 auto *RegisterF
= Function::Create(RegisterFTy
, GlobalValue::InternalLinkage
,
917 getInstrProfRegFuncsName(), M
);
918 RegisterF
->setUnnamedAddr(GlobalValue::UnnamedAddr::Global
);
919 if (Options
.NoRedZone
)
920 RegisterF
->addFnAttr(Attribute::NoRedZone
);
922 auto *RuntimeRegisterTy
= FunctionType::get(VoidTy
, VoidPtrTy
, false);
923 auto *RuntimeRegisterF
=
924 Function::Create(RuntimeRegisterTy
, GlobalVariable::ExternalLinkage
,
925 getInstrProfRegFuncName(), M
);
927 IRBuilder
<> IRB(BasicBlock::Create(M
->getContext(), "", RegisterF
));
928 for (Value
*Data
: UsedVars
)
929 if (Data
!= NamesVar
&& !isa
<Function
>(Data
))
930 IRB
.CreateCall(RuntimeRegisterF
, IRB
.CreateBitCast(Data
, VoidPtrTy
));
933 Type
*ParamTypes
[] = {VoidPtrTy
, Int64Ty
};
934 auto *NamesRegisterTy
=
935 FunctionType::get(VoidTy
, makeArrayRef(ParamTypes
), false);
936 auto *NamesRegisterF
=
937 Function::Create(NamesRegisterTy
, GlobalVariable::ExternalLinkage
,
938 getInstrProfNamesRegFuncName(), M
);
939 IRB
.CreateCall(NamesRegisterF
, {IRB
.CreateBitCast(NamesVar
, VoidPtrTy
),
940 IRB
.getInt64(NamesSize
)});
946 bool InstrProfiling::emitRuntimeHook() {
947 // We expect the linker to be invoked with -u<hook_var> flag for linux,
948 // for which case there is no need to emit the user function.
952 // If the module's provided its own runtime, we don't need to do anything.
953 if (M
->getGlobalVariable(getInstrProfRuntimeHookVarName()))
956 // Declare an external variable that will pull in the runtime initialization.
957 auto *Int32Ty
= Type::getInt32Ty(M
->getContext());
959 new GlobalVariable(*M
, Int32Ty
, false, GlobalValue::ExternalLinkage
,
960 nullptr, getInstrProfRuntimeHookVarName());
962 // Make a function that uses it.
963 auto *User
= Function::Create(FunctionType::get(Int32Ty
, false),
964 GlobalValue::LinkOnceODRLinkage
,
965 getInstrProfRuntimeHookVarUseFuncName(), M
);
966 User
->addFnAttr(Attribute::NoInline
);
967 if (Options
.NoRedZone
)
968 User
->addFnAttr(Attribute::NoRedZone
);
969 User
->setVisibility(GlobalValue::HiddenVisibility
);
970 if (TT
.supportsCOMDAT())
971 User
->setComdat(M
->getOrInsertComdat(User
->getName()));
973 IRBuilder
<> IRB(BasicBlock::Create(M
->getContext(), "", User
));
974 auto *Load
= IRB
.CreateLoad(Int32Ty
, Var
);
977 // Mark the user variable as used so that it isn't stripped out.
978 UsedVars
.push_back(User
);
982 void InstrProfiling::emitUses() {
983 if (!UsedVars
.empty())
984 appendToUsed(*M
, UsedVars
);
987 void InstrProfiling::emitInitialization() {
988 // Create variable for profile name.
989 createProfileFileNameVar(*M
, Options
.InstrProfileOutput
);
990 Function
*RegisterF
= M
->getFunction(getInstrProfRegFuncsName());
994 // Create the initialization function.
995 auto *VoidTy
= Type::getVoidTy(M
->getContext());
996 auto *F
= Function::Create(FunctionType::get(VoidTy
, false),
997 GlobalValue::InternalLinkage
,
998 getInstrProfInitFuncName(), M
);
999 F
->setUnnamedAddr(GlobalValue::UnnamedAddr::Global
);
1000 F
->addFnAttr(Attribute::NoInline
);
1001 if (Options
.NoRedZone
)
1002 F
->addFnAttr(Attribute::NoRedZone
);
1004 // Add the basic block and the necessary calls.
1005 IRBuilder
<> IRB(BasicBlock::Create(M
->getContext(), "", F
));
1006 IRB
.CreateCall(RegisterF
, {});
1007 IRB
.CreateRetVoid();
1009 appendToGlobalCtors(*M
, F
, 0);