1 //===-- InstrProfiling.cpp - Frontend instrumentation based profiling -----===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 // This pass lowers instrprof_* intrinsics emitted by a frontend for profiling.
10 // It also builds the data structures and initialization code needed for
11 // updating execution counts and emitting the profile at runtime.
13 //===----------------------------------------------------------------------===//
15 #include "llvm/Transforms/Instrumentation/InstrProfiling.h"
16 #include "llvm/ADT/ArrayRef.h"
17 #include "llvm/ADT/SmallVector.h"
18 #include "llvm/ADT/StringRef.h"
19 #include "llvm/ADT/Triple.h"
20 #include "llvm/ADT/Twine.h"
21 #include "llvm/Analysis/BlockFrequencyInfo.h"
22 #include "llvm/Analysis/BranchProbabilityInfo.h"
23 #include "llvm/Analysis/LoopInfo.h"
24 #include "llvm/Analysis/TargetLibraryInfo.h"
25 #include "llvm/IR/Attributes.h"
26 #include "llvm/IR/BasicBlock.h"
27 #include "llvm/IR/Constant.h"
28 #include "llvm/IR/Constants.h"
29 #include "llvm/IR/DerivedTypes.h"
30 #include "llvm/IR/Dominators.h"
31 #include "llvm/IR/Function.h"
32 #include "llvm/IR/GlobalValue.h"
33 #include "llvm/IR/GlobalVariable.h"
34 #include "llvm/IR/IRBuilder.h"
35 #include "llvm/IR/Instruction.h"
36 #include "llvm/IR/Instructions.h"
37 #include "llvm/IR/IntrinsicInst.h"
38 #include "llvm/IR/Module.h"
39 #include "llvm/IR/Type.h"
40 #include "llvm/Pass.h"
41 #include "llvm/ProfileData/InstrProf.h"
42 #include "llvm/Support/Casting.h"
43 #include "llvm/Support/CommandLine.h"
44 #include "llvm/Support/Error.h"
45 #include "llvm/Support/ErrorHandling.h"
46 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
47 #include "llvm/Transforms/Utils/ModuleUtils.h"
48 #include "llvm/Transforms/Utils/SSAUpdater.h"
57 #define DEBUG_TYPE "instrprof"
59 // The start and end values of precise value profile range for memory
61 cl::opt
<std::string
> MemOPSizeRange(
63 cl::desc("Set the range of size in memory intrinsic calls to be profiled "
64 "precisely, in a format of <start_val>:<end_val>"),
67 // The value that considered to be large value in memory intrinsic.
68 cl::opt
<unsigned> MemOPSizeLarge(
70 cl::desc("Set large value thresthold in memory intrinsic size profiling. "
71 "Value of 0 disables the large value profiling."),
76 cl::opt
<bool> DoNameCompression("enable-name-compression",
77 cl::desc("Enable name string compression"),
80 cl::opt
<bool> DoHashBasedCounterSplit(
81 "hash-based-counter-split",
82 cl::desc("Rename counter variable of a comdat function based on cfg hash"),
85 cl::opt
<bool> ValueProfileStaticAlloc(
87 cl::desc("Do static counter allocation for value profiler"),
90 cl::opt
<double> NumCountersPerValueSite(
91 "vp-counters-per-site",
92 cl::desc("The average number of profile counters allocated "
93 "per value profiling site."),
94 // This is set to a very small value because in real programs, only
95 // a very small percentage of value sites have non-zero targets, e.g, 1/30.
96 // For those sites with non-zero profile, the average number of targets
97 // is usually smaller than 2.
100 cl::opt
<bool> AtomicCounterUpdateAll(
101 "instrprof-atomic-counter-update-all", cl::ZeroOrMore
,
102 cl::desc("Make all profile counter updates atomic (for testing only)"),
105 cl::opt
<bool> AtomicCounterUpdatePromoted(
106 "atomic-counter-update-promoted", cl::ZeroOrMore
,
107 cl::desc("Do counter update using atomic fetch add "
108 " for promoted counters only"),
111 // If the option is not specified, the default behavior about whether
112 // counter promotion is done depends on how instrumentaiton lowering
113 // pipeline is setup, i.e., the default value of true of this option
114 // does not mean the promotion will be done by default. Explicitly
115 // setting this option can override the default behavior.
116 cl::opt
<bool> DoCounterPromotion("do-counter-promotion", cl::ZeroOrMore
,
117 cl::desc("Do counter register promotion"),
119 cl::opt
<unsigned> MaxNumOfPromotionsPerLoop(
120 cl::ZeroOrMore
, "max-counter-promotions-per-loop", cl::init(20),
121 cl::desc("Max number counter promotions per loop to avoid"
122 " increasing register pressure too much"));
126 MaxNumOfPromotions(cl::ZeroOrMore
, "max-counter-promotions", cl::init(-1),
127 cl::desc("Max number of allowed counter promotions"));
129 cl::opt
<unsigned> SpeculativeCounterPromotionMaxExiting(
130 cl::ZeroOrMore
, "speculative-counter-promotion-max-exiting", cl::init(3),
131 cl::desc("The max number of exiting blocks of a loop to allow "
132 " speculative counter promotion"));
134 cl::opt
<bool> SpeculativeCounterPromotionToLoop(
135 cl::ZeroOrMore
, "speculative-counter-promotion-to-loop", cl::init(false),
136 cl::desc("When the option is false, if the target block is in a loop, "
137 "the promotion will be disallowed unless the promoted counter "
138 " update can be further/iteratively promoted into an acyclic "
141 cl::opt
<bool> IterativeCounterPromotion(
142 cl::ZeroOrMore
, "iterative-counter-promotion", cl::init(true),
143 cl::desc("Allow counter promotion across the whole loop nest."));
145 class InstrProfilingLegacyPass
: public ModulePass
{
146 InstrProfiling InstrProf
;
151 InstrProfilingLegacyPass() : ModulePass(ID
) {}
152 InstrProfilingLegacyPass(const InstrProfOptions
&Options
, bool IsCS
= false)
153 : ModulePass(ID
), InstrProf(Options
, IsCS
) {}
155 StringRef
getPassName() const override
{
156 return "Frontend instrumentation-based coverage lowering";
159 bool runOnModule(Module
&M
) override
{
160 return InstrProf
.run(M
, getAnalysis
<TargetLibraryInfoWrapperPass
>().getTLI());
163 void getAnalysisUsage(AnalysisUsage
&AU
) const override
{
164 AU
.setPreservesCFG();
165 AU
.addRequired
<TargetLibraryInfoWrapperPass
>();
170 /// A helper class to promote one counter RMW operation in the loop
171 /// into register update.
173 /// RWM update for the counter will be sinked out of the loop after
174 /// the transformation.
176 class PGOCounterPromoterHelper
: public LoadAndStorePromoter
{
178 PGOCounterPromoterHelper(
179 Instruction
*L
, Instruction
*S
, SSAUpdater
&SSA
, Value
*Init
,
180 BasicBlock
*PH
, ArrayRef
<BasicBlock
*> ExitBlocks
,
181 ArrayRef
<Instruction
*> InsertPts
,
182 DenseMap
<Loop
*, SmallVector
<LoadStorePair
, 8>> &LoopToCands
,
184 : LoadAndStorePromoter({L
, S
}, SSA
), Store(S
), ExitBlocks(ExitBlocks
),
185 InsertPts(InsertPts
), LoopToCandidates(LoopToCands
), LI(LI
) {
186 assert(isa
<LoadInst
>(L
));
187 assert(isa
<StoreInst
>(S
));
188 SSA
.AddAvailableValue(PH
, Init
);
191 void doExtraRewritesBeforeFinalDeletion() override
{
192 for (unsigned i
= 0, e
= ExitBlocks
.size(); i
!= e
; ++i
) {
193 BasicBlock
*ExitBlock
= ExitBlocks
[i
];
194 Instruction
*InsertPos
= InsertPts
[i
];
195 // Get LiveIn value into the ExitBlock. If there are multiple
196 // predecessors, the value is defined by a PHI node in this
198 Value
*LiveInValue
= SSA
.GetValueInMiddleOfBlock(ExitBlock
);
199 Value
*Addr
= cast
<StoreInst
>(Store
)->getPointerOperand();
200 Type
*Ty
= LiveInValue
->getType();
201 IRBuilder
<> Builder(InsertPos
);
202 if (AtomicCounterUpdatePromoted
)
203 // automic update currently can only be promoted across the current
204 // loop, not the whole loop nest.
205 Builder
.CreateAtomicRMW(AtomicRMWInst::Add
, Addr
, LiveInValue
,
206 AtomicOrdering::SequentiallyConsistent
);
208 LoadInst
*OldVal
= Builder
.CreateLoad(Ty
, Addr
, "pgocount.promoted");
209 auto *NewVal
= Builder
.CreateAdd(OldVal
, LiveInValue
);
210 auto *NewStore
= Builder
.CreateStore(NewVal
, Addr
);
212 // Now update the parent loop's candidate list:
213 if (IterativeCounterPromotion
) {
214 auto *TargetLoop
= LI
.getLoopFor(ExitBlock
);
216 LoopToCandidates
[TargetLoop
].emplace_back(OldVal
, NewStore
);
224 ArrayRef
<BasicBlock
*> ExitBlocks
;
225 ArrayRef
<Instruction
*> InsertPts
;
226 DenseMap
<Loop
*, SmallVector
<LoadStorePair
, 8>> &LoopToCandidates
;
230 /// A helper class to do register promotion for all profile counter
231 /// updates in a loop.
233 class PGOCounterPromoter
{
236 DenseMap
<Loop
*, SmallVector
<LoadStorePair
, 8>> &LoopToCands
,
237 Loop
&CurLoop
, LoopInfo
&LI
, BlockFrequencyInfo
*BFI
)
238 : LoopToCandidates(LoopToCands
), ExitBlocks(), InsertPts(), L(CurLoop
),
241 SmallVector
<BasicBlock
*, 8> LoopExitBlocks
;
242 SmallPtrSet
<BasicBlock
*, 8> BlockSet
;
243 L
.getExitBlocks(LoopExitBlocks
);
245 for (BasicBlock
*ExitBlock
: LoopExitBlocks
) {
246 if (BlockSet
.insert(ExitBlock
).second
) {
247 ExitBlocks
.push_back(ExitBlock
);
248 InsertPts
.push_back(&*ExitBlock
->getFirstInsertionPt());
253 bool run(int64_t *NumPromoted
) {
254 // Skip 'infinite' loops:
255 if (ExitBlocks
.size() == 0)
257 unsigned MaxProm
= getMaxNumOfPromotionsInLoop(&L
);
261 unsigned Promoted
= 0;
262 for (auto &Cand
: LoopToCandidates
[&L
]) {
264 SmallVector
<PHINode
*, 4> NewPHIs
;
265 SSAUpdater
SSA(&NewPHIs
);
266 Value
*InitVal
= ConstantInt::get(Cand
.first
->getType(), 0);
268 // If BFI is set, we will use it to guide the promotions.
270 auto *BB
= Cand
.first
->getParent();
271 auto InstrCount
= BFI
->getBlockProfileCount(BB
);
274 auto PreheaderCount
= BFI
->getBlockProfileCount(L
.getLoopPreheader());
275 // If the average loop trip count is not greater than 1.5, we skip
277 if (PreheaderCount
&&
278 (PreheaderCount
.getValue() * 3) >= (InstrCount
.getValue() * 2))
282 PGOCounterPromoterHelper
Promoter(Cand
.first
, Cand
.second
, SSA
, InitVal
,
283 L
.getLoopPreheader(), ExitBlocks
,
284 InsertPts
, LoopToCandidates
, LI
);
285 Promoter
.run(SmallVector
<Instruction
*, 2>({Cand
.first
, Cand
.second
}));
287 if (Promoted
>= MaxProm
)
291 if (MaxNumOfPromotions
!= -1 && *NumPromoted
>= MaxNumOfPromotions
)
295 LLVM_DEBUG(dbgs() << Promoted
<< " counters promoted for loop (depth="
296 << L
.getLoopDepth() << ")\n");
297 return Promoted
!= 0;
301 bool allowSpeculativeCounterPromotion(Loop
*LP
) {
302 SmallVector
<BasicBlock
*, 8> ExitingBlocks
;
303 L
.getExitingBlocks(ExitingBlocks
);
304 // Not considierered speculative.
305 if (ExitingBlocks
.size() == 1)
307 if (ExitingBlocks
.size() > SpeculativeCounterPromotionMaxExiting
)
312 // Returns the max number of Counter Promotions for LP.
313 unsigned getMaxNumOfPromotionsInLoop(Loop
*LP
) {
314 // We can't insert into a catchswitch.
315 SmallVector
<BasicBlock
*, 8> LoopExitBlocks
;
316 LP
->getExitBlocks(LoopExitBlocks
);
317 if (llvm::any_of(LoopExitBlocks
, [](BasicBlock
*Exit
) {
318 return isa
<CatchSwitchInst
>(Exit
->getTerminator());
322 if (!LP
->hasDedicatedExits())
325 BasicBlock
*PH
= LP
->getLoopPreheader();
329 SmallVector
<BasicBlock
*, 8> ExitingBlocks
;
330 LP
->getExitingBlocks(ExitingBlocks
);
332 // If BFI is set, we do more aggressive promotions based on BFI.
336 // Not considierered speculative.
337 if (ExitingBlocks
.size() == 1)
338 return MaxNumOfPromotionsPerLoop
;
340 if (ExitingBlocks
.size() > SpeculativeCounterPromotionMaxExiting
)
343 // Whether the target block is in a loop does not matter:
344 if (SpeculativeCounterPromotionToLoop
)
345 return MaxNumOfPromotionsPerLoop
;
347 // Now check the target block:
348 unsigned MaxProm
= MaxNumOfPromotionsPerLoop
;
349 for (auto *TargetBlock
: LoopExitBlocks
) {
350 auto *TargetLoop
= LI
.getLoopFor(TargetBlock
);
353 unsigned MaxPromForTarget
= getMaxNumOfPromotionsInLoop(TargetLoop
);
354 unsigned PendingCandsInTarget
= LoopToCandidates
[TargetLoop
].size();
356 std::min(MaxProm
, std::max(MaxPromForTarget
, PendingCandsInTarget
) -
357 PendingCandsInTarget
);
362 DenseMap
<Loop
*, SmallVector
<LoadStorePair
, 8>> &LoopToCandidates
;
363 SmallVector
<BasicBlock
*, 8> ExitBlocks
;
364 SmallVector
<Instruction
*, 8> InsertPts
;
367 BlockFrequencyInfo
*BFI
;
370 } // end anonymous namespace
372 PreservedAnalyses
InstrProfiling::run(Module
&M
, ModuleAnalysisManager
&AM
) {
373 auto &TLI
= AM
.getResult
<TargetLibraryAnalysis
>(M
);
375 return PreservedAnalyses::all();
377 return PreservedAnalyses::none();
380 char InstrProfilingLegacyPass::ID
= 0;
381 INITIALIZE_PASS_BEGIN(
382 InstrProfilingLegacyPass
, "instrprof",
383 "Frontend instrumentation-based coverage lowering.", false, false)
384 INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass
)
386 InstrProfilingLegacyPass
, "instrprof",
387 "Frontend instrumentation-based coverage lowering.", false, false)
390 llvm::createInstrProfilingLegacyPass(const InstrProfOptions
&Options
,
392 return new InstrProfilingLegacyPass(Options
, IsCS
);
395 static InstrProfIncrementInst
*castToIncrementInst(Instruction
*Instr
) {
396 InstrProfIncrementInst
*Inc
= dyn_cast
<InstrProfIncrementInstStep
>(Instr
);
399 return dyn_cast
<InstrProfIncrementInst
>(Instr
);
402 bool InstrProfiling::lowerIntrinsics(Function
*F
) {
403 bool MadeChange
= false;
404 PromotionCandidates
.clear();
405 for (BasicBlock
&BB
: *F
) {
406 for (auto I
= BB
.begin(), E
= BB
.end(); I
!= E
;) {
408 InstrProfIncrementInst
*Inc
= castToIncrementInst(&*Instr
);
412 } else if (auto *Ind
= dyn_cast
<InstrProfValueProfileInst
>(Instr
)) {
413 lowerValueProfileInst(Ind
);
422 promoteCounterLoadStores(F
);
426 bool InstrProfiling::isCounterPromotionEnabled() const {
427 if (DoCounterPromotion
.getNumOccurrences() > 0)
428 return DoCounterPromotion
;
430 return Options
.DoCounterPromotion
;
433 void InstrProfiling::promoteCounterLoadStores(Function
*F
) {
434 if (!isCounterPromotionEnabled())
437 DominatorTree
DT(*F
);
439 DenseMap
<Loop
*, SmallVector
<LoadStorePair
, 8>> LoopPromotionCandidates
;
441 std::unique_ptr
<BlockFrequencyInfo
> BFI
;
442 if (Options
.UseBFIInPromotion
) {
443 std::unique_ptr
<BranchProbabilityInfo
> BPI
;
444 BPI
.reset(new BranchProbabilityInfo(*F
, LI
, TLI
));
445 BFI
.reset(new BlockFrequencyInfo(*F
, *BPI
, LI
));
448 for (const auto &LoadStore
: PromotionCandidates
) {
449 auto *CounterLoad
= LoadStore
.first
;
450 auto *CounterStore
= LoadStore
.second
;
451 BasicBlock
*BB
= CounterLoad
->getParent();
452 Loop
*ParentLoop
= LI
.getLoopFor(BB
);
455 LoopPromotionCandidates
[ParentLoop
].emplace_back(CounterLoad
, CounterStore
);
458 SmallVector
<Loop
*, 4> Loops
= LI
.getLoopsInPreorder();
460 // Do a post-order traversal of the loops so that counter updates can be
461 // iteratively hoisted outside the loop nest.
462 for (auto *Loop
: llvm::reverse(Loops
)) {
463 PGOCounterPromoter
Promoter(LoopPromotionCandidates
, *Loop
, LI
, BFI
.get());
464 Promoter
.run(&TotalCountersPromoted
);
468 /// Check if the module contains uses of any profiling intrinsics.
469 static bool containsProfilingIntrinsics(Module
&M
) {
470 if (auto *F
= M
.getFunction(
471 Intrinsic::getName(llvm::Intrinsic::instrprof_increment
)))
474 if (auto *F
= M
.getFunction(
475 Intrinsic::getName(llvm::Intrinsic::instrprof_increment_step
)))
478 if (auto *F
= M
.getFunction(
479 Intrinsic::getName(llvm::Intrinsic::instrprof_value_profile
)))
485 bool InstrProfiling::run(Module
&M
, const TargetLibraryInfo
&TLI
) {
490 ProfileDataMap
.clear();
492 getMemOPSizeRangeFromOption(MemOPSizeRange
, MemOPSizeRangeStart
,
494 TT
= Triple(M
.getTargetTriple());
496 // Emit the runtime hook even if no counters are present.
497 bool MadeChange
= emitRuntimeHook();
499 // Improve compile time by avoiding linear scans when there is no work.
500 GlobalVariable
*CoverageNamesVar
=
501 M
.getNamedGlobal(getCoverageUnusedNamesVarName());
502 if (!containsProfilingIntrinsics(M
) && !CoverageNamesVar
)
505 // We did not know how many value sites there would be inside
506 // the instrumented function. This is counting the number of instrumented
507 // target value sites to enter it as field in the profile data variable.
508 for (Function
&F
: M
) {
509 InstrProfIncrementInst
*FirstProfIncInst
= nullptr;
510 for (BasicBlock
&BB
: F
)
511 for (auto I
= BB
.begin(), E
= BB
.end(); I
!= E
; I
++)
512 if (auto *Ind
= dyn_cast
<InstrProfValueProfileInst
>(I
))
513 computeNumValueSiteCounts(Ind
);
514 else if (FirstProfIncInst
== nullptr)
515 FirstProfIncInst
= dyn_cast
<InstrProfIncrementInst
>(I
);
517 // Value profiling intrinsic lowering requires per-function profile data
518 // variable to be created first.
519 if (FirstProfIncInst
!= nullptr)
520 static_cast<void>(getOrCreateRegionCounters(FirstProfIncInst
));
523 for (Function
&F
: M
)
524 MadeChange
|= lowerIntrinsics(&F
);
526 if (CoverageNamesVar
) {
527 lowerCoverageData(CoverageNamesVar
);
538 emitInitialization();
542 static FunctionCallee
543 getOrInsertValueProfilingCall(Module
&M
, const TargetLibraryInfo
&TLI
,
544 bool IsRange
= false) {
545 LLVMContext
&Ctx
= M
.getContext();
546 auto *ReturnTy
= Type::getVoidTy(M
.getContext());
549 if (auto AK
= TLI
.getExtAttrForI32Param(false))
550 AL
= AL
.addParamAttribute(M
.getContext(), 2, AK
);
553 Type
*ParamTypes
[] = {
554 #define VALUE_PROF_FUNC_PARAM(ParamType, ParamName, ParamLLVMType) ParamLLVMType
555 #include "llvm/ProfileData/InstrProfData.inc"
557 auto *ValueProfilingCallTy
=
558 FunctionType::get(ReturnTy
, makeArrayRef(ParamTypes
), false);
559 return M
.getOrInsertFunction(getInstrProfValueProfFuncName(),
560 ValueProfilingCallTy
, AL
);
562 Type
*RangeParamTypes
[] = {
563 #define VALUE_RANGE_PROF 1
564 #define VALUE_PROF_FUNC_PARAM(ParamType, ParamName, ParamLLVMType) ParamLLVMType
565 #include "llvm/ProfileData/InstrProfData.inc"
566 #undef VALUE_RANGE_PROF
568 auto *ValueRangeProfilingCallTy
=
569 FunctionType::get(ReturnTy
, makeArrayRef(RangeParamTypes
), false);
570 return M
.getOrInsertFunction(getInstrProfValueRangeProfFuncName(),
571 ValueRangeProfilingCallTy
, AL
);
575 void InstrProfiling::computeNumValueSiteCounts(InstrProfValueProfileInst
*Ind
) {
576 GlobalVariable
*Name
= Ind
->getName();
577 uint64_t ValueKind
= Ind
->getValueKind()->getZExtValue();
578 uint64_t Index
= Ind
->getIndex()->getZExtValue();
579 auto It
= ProfileDataMap
.find(Name
);
580 if (It
== ProfileDataMap
.end()) {
581 PerFunctionProfileData PD
;
582 PD
.NumValueSites
[ValueKind
] = Index
+ 1;
583 ProfileDataMap
[Name
] = PD
;
584 } else if (It
->second
.NumValueSites
[ValueKind
] <= Index
)
585 It
->second
.NumValueSites
[ValueKind
] = Index
+ 1;
588 void InstrProfiling::lowerValueProfileInst(InstrProfValueProfileInst
*Ind
) {
589 GlobalVariable
*Name
= Ind
->getName();
590 auto It
= ProfileDataMap
.find(Name
);
591 assert(It
!= ProfileDataMap
.end() && It
->second
.DataVar
&&
592 "value profiling detected in function with no counter incerement");
594 GlobalVariable
*DataVar
= It
->second
.DataVar
;
595 uint64_t ValueKind
= Ind
->getValueKind()->getZExtValue();
596 uint64_t Index
= Ind
->getIndex()->getZExtValue();
597 for (uint32_t Kind
= IPVK_First
; Kind
< ValueKind
; ++Kind
)
598 Index
+= It
->second
.NumValueSites
[Kind
];
600 IRBuilder
<> Builder(Ind
);
601 bool IsRange
= (Ind
->getValueKind()->getZExtValue() ==
602 llvm::InstrProfValueKind::IPVK_MemOPSize
);
603 CallInst
*Call
= nullptr;
605 Value
*Args
[3] = {Ind
->getTargetValue(),
606 Builder
.CreateBitCast(DataVar
, Builder
.getInt8PtrTy()),
607 Builder
.getInt32(Index
)};
608 Call
= Builder
.CreateCall(getOrInsertValueProfilingCall(*M
, *TLI
), Args
);
611 Ind
->getTargetValue(),
612 Builder
.CreateBitCast(DataVar
, Builder
.getInt8PtrTy()),
613 Builder
.getInt32(Index
),
614 Builder
.getInt64(MemOPSizeRangeStart
),
615 Builder
.getInt64(MemOPSizeRangeLast
),
616 Builder
.getInt64(MemOPSizeLarge
== 0 ? INT64_MIN
: MemOPSizeLarge
)};
618 Builder
.CreateCall(getOrInsertValueProfilingCall(*M
, *TLI
, true), Args
);
620 if (auto AK
= TLI
->getExtAttrForI32Param(false))
621 Call
->addParamAttr(2, AK
);
622 Ind
->replaceAllUsesWith(Call
);
623 Ind
->eraseFromParent();
626 void InstrProfiling::lowerIncrement(InstrProfIncrementInst
*Inc
) {
627 GlobalVariable
*Counters
= getOrCreateRegionCounters(Inc
);
629 IRBuilder
<> Builder(Inc
);
630 uint64_t Index
= Inc
->getIndex()->getZExtValue();
631 Value
*Addr
= Builder
.CreateConstInBoundsGEP2_64(Counters
->getValueType(),
634 if (Options
.Atomic
|| AtomicCounterUpdateAll
) {
635 Builder
.CreateAtomicRMW(AtomicRMWInst::Add
, Addr
, Inc
->getStep(),
636 AtomicOrdering::Monotonic
);
638 Value
*IncStep
= Inc
->getStep();
639 Value
*Load
= Builder
.CreateLoad(IncStep
->getType(), Addr
, "pgocount");
640 auto *Count
= Builder
.CreateAdd(Load
, Inc
->getStep());
641 auto *Store
= Builder
.CreateStore(Count
, Addr
);
642 if (isCounterPromotionEnabled())
643 PromotionCandidates
.emplace_back(cast
<Instruction
>(Load
), Store
);
645 Inc
->eraseFromParent();
648 void InstrProfiling::lowerCoverageData(GlobalVariable
*CoverageNamesVar
) {
649 ConstantArray
*Names
=
650 cast
<ConstantArray
>(CoverageNamesVar
->getInitializer());
651 for (unsigned I
= 0, E
= Names
->getNumOperands(); I
< E
; ++I
) {
652 Constant
*NC
= Names
->getOperand(I
);
653 Value
*V
= NC
->stripPointerCasts();
654 assert(isa
<GlobalVariable
>(V
) && "Missing reference to function name");
655 GlobalVariable
*Name
= cast
<GlobalVariable
>(V
);
657 Name
->setLinkage(GlobalValue::PrivateLinkage
);
658 ReferencedNames
.push_back(Name
);
659 NC
->dropAllReferences();
661 CoverageNamesVar
->eraseFromParent();
664 /// Get the name of a profiling variable for a particular function.
665 static std::string
getVarName(InstrProfIncrementInst
*Inc
, StringRef Prefix
) {
666 StringRef NamePrefix
= getInstrProfNameVarPrefix();
667 StringRef Name
= Inc
->getName()->getName().substr(NamePrefix
.size());
668 Function
*F
= Inc
->getParent()->getParent();
669 Module
*M
= F
->getParent();
670 if (!DoHashBasedCounterSplit
|| !isIRPGOFlagSet(M
) ||
671 !canRenameComdatFunc(*F
))
672 return (Prefix
+ Name
).str();
673 uint64_t FuncHash
= Inc
->getHash()->getZExtValue();
674 SmallVector
<char, 24> HashPostfix
;
675 if (Name
.endswith((Twine(".") + Twine(FuncHash
)).toStringRef(HashPostfix
)))
676 return (Prefix
+ Name
).str();
677 return (Prefix
+ Name
+ "." + Twine(FuncHash
)).str();
680 static inline bool shouldRecordFunctionAddr(Function
*F
) {
682 bool HasAvailableExternallyLinkage
= F
->hasAvailableExternallyLinkage();
683 if (!F
->hasLinkOnceLinkage() && !F
->hasLocalLinkage() &&
684 !HasAvailableExternallyLinkage
)
687 // A function marked 'alwaysinline' with available_externally linkage can't
688 // have its address taken. Doing so would create an undefined external ref to
689 // the function, which would fail to link.
690 if (HasAvailableExternallyLinkage
&&
691 F
->hasFnAttribute(Attribute::AlwaysInline
))
694 // Prohibit function address recording if the function is both internal and
695 // COMDAT. This avoids the profile data variable referencing internal symbols
697 if (F
->hasLocalLinkage() && F
->hasComdat())
700 // Check uses of this function for other than direct calls or invokes to it.
701 // Inline virtual functions have linkeOnceODR linkage. When a key method
702 // exists, the vtable will only be emitted in the TU where the key method
703 // is defined. In a TU where vtable is not available, the function won't
704 // be 'addresstaken'. If its address is not recorded here, the profile data
705 // with missing address may be picked by the linker leading to missing
706 // indirect call target info.
707 return F
->hasAddressTaken() || F
->hasLinkOnceLinkage();
710 static bool needsRuntimeRegistrationOfSectionRange(const Triple
&TT
) {
711 // Don't do this for Darwin. compiler-rt uses linker magic.
714 // Use linker script magic to get data/cnts/name start/end.
715 if (TT
.isOSLinux() || TT
.isOSFreeBSD() || TT
.isOSNetBSD() ||
716 TT
.isOSSolaris() || TT
.isOSFuchsia() || TT
.isPS4CPU() ||
724 InstrProfiling::getOrCreateRegionCounters(InstrProfIncrementInst
*Inc
) {
725 GlobalVariable
*NamePtr
= Inc
->getName();
726 auto It
= ProfileDataMap
.find(NamePtr
);
727 PerFunctionProfileData PD
;
728 if (It
!= ProfileDataMap
.end()) {
729 if (It
->second
.RegionCounters
)
730 return It
->second
.RegionCounters
;
734 // Match the linkage and visibility of the name global, except on COFF, where
735 // the linkage must be local and consequentially the visibility must be
737 Function
*Fn
= Inc
->getParent()->getParent();
738 GlobalValue::LinkageTypes Linkage
= NamePtr
->getLinkage();
739 GlobalValue::VisibilityTypes Visibility
= NamePtr
->getVisibility();
740 if (TT
.isOSBinFormatCOFF()) {
741 Linkage
= GlobalValue::InternalLinkage
;
742 Visibility
= GlobalValue::DefaultVisibility
;
745 // Move the name variable to the right section. Place them in a COMDAT group
746 // if the associated function is a COMDAT. This will make sure that only one
747 // copy of counters of the COMDAT function will be emitted after linking. Keep
748 // in mind that this pass may run before the inliner, so we need to create a
749 // new comdat group for the counters and profiling data. If we use the comdat
750 // of the parent function, that will result in relocations against discarded
752 Comdat
*Cmdt
= nullptr;
753 GlobalValue::LinkageTypes CounterLinkage
= Linkage
;
754 if (needsComdatForCounter(*Fn
, *M
)) {
755 StringRef CmdtPrefix
= getInstrProfComdatPrefix();
756 if (TT
.isOSBinFormatCOFF()) {
757 // For COFF, the comdat group name must be the name of a symbol in the
758 // group. Use the counter variable name, and upgrade its linkage to
759 // something externally visible, like linkonce_odr.
760 CmdtPrefix
= getInstrProfCountersVarPrefix();
761 CounterLinkage
= GlobalValue::LinkOnceODRLinkage
;
763 Cmdt
= M
->getOrInsertComdat(getVarName(Inc
, CmdtPrefix
));
766 uint64_t NumCounters
= Inc
->getNumCounters()->getZExtValue();
767 LLVMContext
&Ctx
= M
->getContext();
768 ArrayType
*CounterTy
= ArrayType::get(Type::getInt64Ty(Ctx
), NumCounters
);
770 // Create the counters variable.
772 new GlobalVariable(*M
, CounterTy
, false, Linkage
,
773 Constant::getNullValue(CounterTy
),
774 getVarName(Inc
, getInstrProfCountersVarPrefix()));
775 CounterPtr
->setVisibility(Visibility
);
776 CounterPtr
->setSection(
777 getInstrProfSectionName(IPSK_cnts
, TT
.getObjectFormat()));
778 CounterPtr
->setAlignment(8);
779 CounterPtr
->setComdat(Cmdt
);
780 CounterPtr
->setLinkage(CounterLinkage
);
782 auto *Int8PtrTy
= Type::getInt8PtrTy(Ctx
);
783 // Allocate statically the array of pointers to value profile nodes for
784 // the current function.
785 Constant
*ValuesPtrExpr
= ConstantPointerNull::get(Int8PtrTy
);
786 if (ValueProfileStaticAlloc
&& !needsRuntimeRegistrationOfSectionRange(TT
)) {
788 for (uint32_t Kind
= IPVK_First
; Kind
<= IPVK_Last
; ++Kind
)
789 NS
+= PD
.NumValueSites
[Kind
];
791 ArrayType
*ValuesTy
= ArrayType::get(Type::getInt64Ty(Ctx
), NS
);
794 new GlobalVariable(*M
, ValuesTy
, false, Linkage
,
795 Constant::getNullValue(ValuesTy
),
796 getVarName(Inc
, getInstrProfValuesVarPrefix()));
797 ValuesVar
->setVisibility(Visibility
);
798 ValuesVar
->setSection(
799 getInstrProfSectionName(IPSK_vals
, TT
.getObjectFormat()));
800 ValuesVar
->setAlignment(8);
801 ValuesVar
->setComdat(Cmdt
);
803 ConstantExpr::getBitCast(ValuesVar
, Type::getInt8PtrTy(Ctx
));
807 // Create data variable.
808 auto *Int16Ty
= Type::getInt16Ty(Ctx
);
809 auto *Int16ArrayTy
= ArrayType::get(Int16Ty
, IPVK_Last
+ 1);
810 Type
*DataTypes
[] = {
811 #define INSTR_PROF_DATA(Type, LLVMType, Name, Init) LLVMType,
812 #include "llvm/ProfileData/InstrProfData.inc"
814 auto *DataTy
= StructType::get(Ctx
, makeArrayRef(DataTypes
));
816 Constant
*FunctionAddr
= shouldRecordFunctionAddr(Fn
)
817 ? ConstantExpr::getBitCast(Fn
, Int8PtrTy
)
818 : ConstantPointerNull::get(Int8PtrTy
);
820 Constant
*Int16ArrayVals
[IPVK_Last
+ 1];
821 for (uint32_t Kind
= IPVK_First
; Kind
<= IPVK_Last
; ++Kind
)
822 Int16ArrayVals
[Kind
] = ConstantInt::get(Int16Ty
, PD
.NumValueSites
[Kind
]);
824 Constant
*DataVals
[] = {
825 #define INSTR_PROF_DATA(Type, LLVMType, Name, Init) Init,
826 #include "llvm/ProfileData/InstrProfData.inc"
828 auto *Data
= new GlobalVariable(*M
, DataTy
, false, Linkage
,
829 ConstantStruct::get(DataTy
, DataVals
),
830 getVarName(Inc
, getInstrProfDataVarPrefix()));
831 Data
->setVisibility(Visibility
);
832 Data
->setSection(getInstrProfSectionName(IPSK_data
, TT
.getObjectFormat()));
833 Data
->setAlignment(INSTR_PROF_DATA_ALIGNMENT
);
834 Data
->setComdat(Cmdt
);
836 PD
.RegionCounters
= CounterPtr
;
838 ProfileDataMap
[NamePtr
] = PD
;
840 // Mark the data variable as used so that it isn't stripped out.
841 UsedVars
.push_back(Data
);
842 // Now that the linkage set by the FE has been passed to the data and counter
843 // variables, reset Name variable's linkage and visibility to private so that
844 // it can be removed later by the compiler.
845 NamePtr
->setLinkage(GlobalValue::PrivateLinkage
);
846 // Collect the referenced names to be used by emitNameData.
847 ReferencedNames
.push_back(NamePtr
);
852 void InstrProfiling::emitVNodes() {
853 if (!ValueProfileStaticAlloc
)
856 // For now only support this on platforms that do
857 // not require runtime registration to discover
858 // named section start/end.
859 if (needsRuntimeRegistrationOfSectionRange(TT
))
863 for (auto &PD
: ProfileDataMap
) {
864 for (uint32_t Kind
= IPVK_First
; Kind
<= IPVK_Last
; ++Kind
)
865 TotalNS
+= PD
.second
.NumValueSites
[Kind
];
871 uint64_t NumCounters
= TotalNS
* NumCountersPerValueSite
;
872 // Heuristic for small programs with very few total value sites.
873 // The default value of vp-counters-per-site is chosen based on
874 // the observation that large apps usually have a low percentage
875 // of value sites that actually have any profile data, and thus
876 // the average number of counters per site is low. For small
877 // apps with very few sites, this may not be true. Bump up the
878 // number of counters in this case.
879 #define INSTR_PROF_MIN_VAL_COUNTS 10
880 if (NumCounters
< INSTR_PROF_MIN_VAL_COUNTS
)
881 NumCounters
= std::max(INSTR_PROF_MIN_VAL_COUNTS
, (int)NumCounters
* 2);
883 auto &Ctx
= M
->getContext();
884 Type
*VNodeTypes
[] = {
885 #define INSTR_PROF_VALUE_NODE(Type, LLVMType, Name, Init) LLVMType,
886 #include "llvm/ProfileData/InstrProfData.inc"
888 auto *VNodeTy
= StructType::get(Ctx
, makeArrayRef(VNodeTypes
));
890 ArrayType
*VNodesTy
= ArrayType::get(VNodeTy
, NumCounters
);
891 auto *VNodesVar
= new GlobalVariable(
892 *M
, VNodesTy
, false, GlobalValue::PrivateLinkage
,
893 Constant::getNullValue(VNodesTy
), getInstrProfVNodesVarName());
894 VNodesVar
->setSection(
895 getInstrProfSectionName(IPSK_vnodes
, TT
.getObjectFormat()));
896 UsedVars
.push_back(VNodesVar
);
899 void InstrProfiling::emitNameData() {
900 std::string UncompressedData
;
902 if (ReferencedNames
.empty())
905 std::string CompressedNameStr
;
906 if (Error E
= collectPGOFuncNameStrings(ReferencedNames
, CompressedNameStr
,
907 DoNameCompression
)) {
908 report_fatal_error(toString(std::move(E
)), false);
911 auto &Ctx
= M
->getContext();
912 auto *NamesVal
= ConstantDataArray::getString(
913 Ctx
, StringRef(CompressedNameStr
), false);
914 NamesVar
= new GlobalVariable(*M
, NamesVal
->getType(), true,
915 GlobalValue::PrivateLinkage
, NamesVal
,
916 getInstrProfNamesVarName());
917 NamesSize
= CompressedNameStr
.size();
918 NamesVar
->setSection(
919 getInstrProfSectionName(IPSK_name
, TT
.getObjectFormat()));
920 // On COFF, it's important to reduce the alignment down to 1 to prevent the
921 // linker from inserting padding before the start of the names section or
922 // between names entries.
923 NamesVar
->setAlignment(1);
924 UsedVars
.push_back(NamesVar
);
926 for (auto *NamePtr
: ReferencedNames
)
927 NamePtr
->eraseFromParent();
930 void InstrProfiling::emitRegistration() {
931 if (!needsRuntimeRegistrationOfSectionRange(TT
))
934 // Construct the function.
935 auto *VoidTy
= Type::getVoidTy(M
->getContext());
936 auto *VoidPtrTy
= Type::getInt8PtrTy(M
->getContext());
937 auto *Int64Ty
= Type::getInt64Ty(M
->getContext());
938 auto *RegisterFTy
= FunctionType::get(VoidTy
, false);
939 auto *RegisterF
= Function::Create(RegisterFTy
, GlobalValue::InternalLinkage
,
940 getInstrProfRegFuncsName(), M
);
941 RegisterF
->setUnnamedAddr(GlobalValue::UnnamedAddr::Global
);
942 if (Options
.NoRedZone
)
943 RegisterF
->addFnAttr(Attribute::NoRedZone
);
945 auto *RuntimeRegisterTy
= FunctionType::get(VoidTy
, VoidPtrTy
, false);
946 auto *RuntimeRegisterF
=
947 Function::Create(RuntimeRegisterTy
, GlobalVariable::ExternalLinkage
,
948 getInstrProfRegFuncName(), M
);
950 IRBuilder
<> IRB(BasicBlock::Create(M
->getContext(), "", RegisterF
));
951 for (Value
*Data
: UsedVars
)
952 if (Data
!= NamesVar
&& !isa
<Function
>(Data
))
953 IRB
.CreateCall(RuntimeRegisterF
, IRB
.CreateBitCast(Data
, VoidPtrTy
));
956 Type
*ParamTypes
[] = {VoidPtrTy
, Int64Ty
};
957 auto *NamesRegisterTy
=
958 FunctionType::get(VoidTy
, makeArrayRef(ParamTypes
), false);
959 auto *NamesRegisterF
=
960 Function::Create(NamesRegisterTy
, GlobalVariable::ExternalLinkage
,
961 getInstrProfNamesRegFuncName(), M
);
962 IRB
.CreateCall(NamesRegisterF
, {IRB
.CreateBitCast(NamesVar
, VoidPtrTy
),
963 IRB
.getInt64(NamesSize
)});
969 bool InstrProfiling::emitRuntimeHook() {
970 // We expect the linker to be invoked with -u<hook_var> flag for linux,
971 // for which case there is no need to emit the user function.
975 // If the module's provided its own runtime, we don't need to do anything.
976 if (M
->getGlobalVariable(getInstrProfRuntimeHookVarName()))
979 // Declare an external variable that will pull in the runtime initialization.
980 auto *Int32Ty
= Type::getInt32Ty(M
->getContext());
982 new GlobalVariable(*M
, Int32Ty
, false, GlobalValue::ExternalLinkage
,
983 nullptr, getInstrProfRuntimeHookVarName());
985 // Make a function that uses it.
986 auto *User
= Function::Create(FunctionType::get(Int32Ty
, false),
987 GlobalValue::LinkOnceODRLinkage
,
988 getInstrProfRuntimeHookVarUseFuncName(), M
);
989 User
->addFnAttr(Attribute::NoInline
);
990 if (Options
.NoRedZone
)
991 User
->addFnAttr(Attribute::NoRedZone
);
992 User
->setVisibility(GlobalValue::HiddenVisibility
);
993 if (TT
.supportsCOMDAT())
994 User
->setComdat(M
->getOrInsertComdat(User
->getName()));
996 IRBuilder
<> IRB(BasicBlock::Create(M
->getContext(), "", User
));
997 auto *Load
= IRB
.CreateLoad(Int32Ty
, Var
);
1000 // Mark the user variable as used so that it isn't stripped out.
1001 UsedVars
.push_back(User
);
1005 void InstrProfiling::emitUses() {
1006 if (!UsedVars
.empty())
1007 appendToUsed(*M
, UsedVars
);
1010 void InstrProfiling::emitInitialization() {
1011 // Create ProfileFileName variable. Don't don't this for the
1012 // context-sensitive instrumentation lowering: This lowering is after
1013 // LTO/ThinLTO linking. Pass PGOInstrumentationGenCreateVar should
1014 // have already create the variable before LTO/ThinLTO linking.
1016 createProfileFileNameVar(*M
, Options
.InstrProfileOutput
);
1017 Function
*RegisterF
= M
->getFunction(getInstrProfRegFuncsName());
1021 // Create the initialization function.
1022 auto *VoidTy
= Type::getVoidTy(M
->getContext());
1023 auto *F
= Function::Create(FunctionType::get(VoidTy
, false),
1024 GlobalValue::InternalLinkage
,
1025 getInstrProfInitFuncName(), M
);
1026 F
->setUnnamedAddr(GlobalValue::UnnamedAddr::Global
);
1027 F
->addFnAttr(Attribute::NoInline
);
1028 if (Options
.NoRedZone
)
1029 F
->addFnAttr(Attribute::NoRedZone
);
1031 // Add the basic block and the necessary calls.
1032 IRBuilder
<> IRB(BasicBlock::Create(M
->getContext(), "", F
));
1033 IRB
.CreateCall(RegisterF
, {});
1034 IRB
.CreateRetVoid();
1036 appendToGlobalCtors(*M
, F
, 0);