1 //===-- InstrProfiling.cpp - Frontend instrumentation based profiling -----===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 // This pass lowers instrprof_* intrinsics emitted by a frontend for profiling.
10 // It also builds the data structures and initialization code needed for
11 // updating execution counts and emitting the profile at runtime.
13 //===----------------------------------------------------------------------===//
15 #include "llvm/Transforms/Instrumentation/InstrProfiling.h"
16 #include "llvm/ADT/ArrayRef.h"
17 #include "llvm/ADT/SmallVector.h"
18 #include "llvm/ADT/StringRef.h"
19 #include "llvm/ADT/Triple.h"
20 #include "llvm/ADT/Twine.h"
21 #include "llvm/Analysis/BlockFrequencyInfo.h"
22 #include "llvm/Analysis/BranchProbabilityInfo.h"
23 #include "llvm/Analysis/LoopInfo.h"
24 #include "llvm/Analysis/TargetLibraryInfo.h"
25 #include "llvm/IR/Attributes.h"
26 #include "llvm/IR/BasicBlock.h"
27 #include "llvm/IR/Constant.h"
28 #include "llvm/IR/Constants.h"
29 #include "llvm/IR/DerivedTypes.h"
30 #include "llvm/IR/Dominators.h"
31 #include "llvm/IR/Function.h"
32 #include "llvm/IR/GlobalValue.h"
33 #include "llvm/IR/GlobalVariable.h"
34 #include "llvm/IR/IRBuilder.h"
35 #include "llvm/IR/Instruction.h"
36 #include "llvm/IR/Instructions.h"
37 #include "llvm/IR/IntrinsicInst.h"
38 #include "llvm/IR/Module.h"
39 #include "llvm/IR/Type.h"
40 #include "llvm/Pass.h"
41 #include "llvm/ProfileData/InstrProf.h"
42 #include "llvm/Support/Casting.h"
43 #include "llvm/Support/CommandLine.h"
44 #include "llvm/Support/Error.h"
45 #include "llvm/Support/ErrorHandling.h"
46 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
47 #include "llvm/Transforms/Utils/ModuleUtils.h"
48 #include "llvm/Transforms/Utils/SSAUpdater.h"
57 #define DEBUG_TYPE "instrprof"
59 // The start and end values of precise value profile range for memory
61 cl::opt
<std::string
> MemOPSizeRange(
63 cl::desc("Set the range of size in memory intrinsic calls to be profiled "
64 "precisely, in a format of <start_val>:<end_val>"),
67 // The value that considered to be large value in memory intrinsic.
68 cl::opt
<unsigned> MemOPSizeLarge(
70 cl::desc("Set large value thresthold in memory intrinsic size profiling. "
71 "Value of 0 disables the large value profiling."),
76 cl::opt
<bool> DoNameCompression("enable-name-compression",
77 cl::desc("Enable name string compression"),
80 cl::opt
<bool> DoHashBasedCounterSplit(
81 "hash-based-counter-split",
82 cl::desc("Rename counter variable of a comdat function based on cfg hash"),
85 cl::opt
<bool> ValueProfileStaticAlloc(
87 cl::desc("Do static counter allocation for value profiler"),
90 cl::opt
<double> NumCountersPerValueSite(
91 "vp-counters-per-site",
92 cl::desc("The average number of profile counters allocated "
93 "per value profiling site."),
94 // This is set to a very small value because in real programs, only
95 // a very small percentage of value sites have non-zero targets, e.g, 1/30.
96 // For those sites with non-zero profile, the average number of targets
97 // is usually smaller than 2.
100 cl::opt
<bool> AtomicCounterUpdateAll(
101 "instrprof-atomic-counter-update-all", cl::ZeroOrMore
,
102 cl::desc("Make all profile counter updates atomic (for testing only)"),
105 cl::opt
<bool> AtomicCounterUpdatePromoted(
106 "atomic-counter-update-promoted", cl::ZeroOrMore
,
107 cl::desc("Do counter update using atomic fetch add "
108 " for promoted counters only"),
111 // If the option is not specified, the default behavior about whether
112 // counter promotion is done depends on how instrumentaiton lowering
113 // pipeline is setup, i.e., the default value of true of this option
114 // does not mean the promotion will be done by default. Explicitly
115 // setting this option can override the default behavior.
116 cl::opt
<bool> DoCounterPromotion("do-counter-promotion", cl::ZeroOrMore
,
117 cl::desc("Do counter register promotion"),
119 cl::opt
<unsigned> MaxNumOfPromotionsPerLoop(
120 cl::ZeroOrMore
, "max-counter-promotions-per-loop", cl::init(20),
121 cl::desc("Max number counter promotions per loop to avoid"
122 " increasing register pressure too much"));
126 MaxNumOfPromotions(cl::ZeroOrMore
, "max-counter-promotions", cl::init(-1),
127 cl::desc("Max number of allowed counter promotions"));
129 cl::opt
<unsigned> SpeculativeCounterPromotionMaxExiting(
130 cl::ZeroOrMore
, "speculative-counter-promotion-max-exiting", cl::init(3),
131 cl::desc("The max number of exiting blocks of a loop to allow "
132 " speculative counter promotion"));
134 cl::opt
<bool> SpeculativeCounterPromotionToLoop(
135 cl::ZeroOrMore
, "speculative-counter-promotion-to-loop", cl::init(false),
136 cl::desc("When the option is false, if the target block is in a loop, "
137 "the promotion will be disallowed unless the promoted counter "
138 " update can be further/iteratively promoted into an acyclic "
141 cl::opt
<bool> IterativeCounterPromotion(
142 cl::ZeroOrMore
, "iterative-counter-promotion", cl::init(true),
143 cl::desc("Allow counter promotion across the whole loop nest."));
145 class InstrProfilingLegacyPass
: public ModulePass
{
146 InstrProfiling InstrProf
;
151 InstrProfilingLegacyPass() : ModulePass(ID
) {}
152 InstrProfilingLegacyPass(const InstrProfOptions
&Options
, bool IsCS
= false)
153 : ModulePass(ID
), InstrProf(Options
, IsCS
) {}
155 StringRef
getPassName() const override
{
156 return "Frontend instrumentation-based coverage lowering";
159 bool runOnModule(Module
&M
) override
{
160 auto GetTLI
= [this](Function
&F
) -> TargetLibraryInfo
& {
161 return this->getAnalysis
<TargetLibraryInfoWrapperPass
>().getTLI(F
);
163 return InstrProf
.run(M
, GetTLI
);
166 void getAnalysisUsage(AnalysisUsage
&AU
) const override
{
167 AU
.setPreservesCFG();
168 AU
.addRequired
<TargetLibraryInfoWrapperPass
>();
173 /// A helper class to promote one counter RMW operation in the loop
174 /// into register update.
176 /// RWM update for the counter will be sinked out of the loop after
177 /// the transformation.
179 class PGOCounterPromoterHelper
: public LoadAndStorePromoter
{
181 PGOCounterPromoterHelper(
182 Instruction
*L
, Instruction
*S
, SSAUpdater
&SSA
, Value
*Init
,
183 BasicBlock
*PH
, ArrayRef
<BasicBlock
*> ExitBlocks
,
184 ArrayRef
<Instruction
*> InsertPts
,
185 DenseMap
<Loop
*, SmallVector
<LoadStorePair
, 8>> &LoopToCands
,
187 : LoadAndStorePromoter({L
, S
}, SSA
), Store(S
), ExitBlocks(ExitBlocks
),
188 InsertPts(InsertPts
), LoopToCandidates(LoopToCands
), LI(LI
) {
189 assert(isa
<LoadInst
>(L
));
190 assert(isa
<StoreInst
>(S
));
191 SSA
.AddAvailableValue(PH
, Init
);
194 void doExtraRewritesBeforeFinalDeletion() override
{
195 for (unsigned i
= 0, e
= ExitBlocks
.size(); i
!= e
; ++i
) {
196 BasicBlock
*ExitBlock
= ExitBlocks
[i
];
197 Instruction
*InsertPos
= InsertPts
[i
];
198 // Get LiveIn value into the ExitBlock. If there are multiple
199 // predecessors, the value is defined by a PHI node in this
201 Value
*LiveInValue
= SSA
.GetValueInMiddleOfBlock(ExitBlock
);
202 Value
*Addr
= cast
<StoreInst
>(Store
)->getPointerOperand();
203 Type
*Ty
= LiveInValue
->getType();
204 IRBuilder
<> Builder(InsertPos
);
205 if (AtomicCounterUpdatePromoted
)
206 // automic update currently can only be promoted across the current
207 // loop, not the whole loop nest.
208 Builder
.CreateAtomicRMW(AtomicRMWInst::Add
, Addr
, LiveInValue
,
209 AtomicOrdering::SequentiallyConsistent
);
211 LoadInst
*OldVal
= Builder
.CreateLoad(Ty
, Addr
, "pgocount.promoted");
212 auto *NewVal
= Builder
.CreateAdd(OldVal
, LiveInValue
);
213 auto *NewStore
= Builder
.CreateStore(NewVal
, Addr
);
215 // Now update the parent loop's candidate list:
216 if (IterativeCounterPromotion
) {
217 auto *TargetLoop
= LI
.getLoopFor(ExitBlock
);
219 LoopToCandidates
[TargetLoop
].emplace_back(OldVal
, NewStore
);
227 ArrayRef
<BasicBlock
*> ExitBlocks
;
228 ArrayRef
<Instruction
*> InsertPts
;
229 DenseMap
<Loop
*, SmallVector
<LoadStorePair
, 8>> &LoopToCandidates
;
233 /// A helper class to do register promotion for all profile counter
234 /// updates in a loop.
236 class PGOCounterPromoter
{
239 DenseMap
<Loop
*, SmallVector
<LoadStorePair
, 8>> &LoopToCands
,
240 Loop
&CurLoop
, LoopInfo
&LI
, BlockFrequencyInfo
*BFI
)
241 : LoopToCandidates(LoopToCands
), ExitBlocks(), InsertPts(), L(CurLoop
),
244 SmallVector
<BasicBlock
*, 8> LoopExitBlocks
;
245 SmallPtrSet
<BasicBlock
*, 8> BlockSet
;
246 L
.getExitBlocks(LoopExitBlocks
);
248 for (BasicBlock
*ExitBlock
: LoopExitBlocks
) {
249 if (BlockSet
.insert(ExitBlock
).second
) {
250 ExitBlocks
.push_back(ExitBlock
);
251 InsertPts
.push_back(&*ExitBlock
->getFirstInsertionPt());
256 bool run(int64_t *NumPromoted
) {
257 // Skip 'infinite' loops:
258 if (ExitBlocks
.size() == 0)
260 unsigned MaxProm
= getMaxNumOfPromotionsInLoop(&L
);
264 unsigned Promoted
= 0;
265 for (auto &Cand
: LoopToCandidates
[&L
]) {
267 SmallVector
<PHINode
*, 4> NewPHIs
;
268 SSAUpdater
SSA(&NewPHIs
);
269 Value
*InitVal
= ConstantInt::get(Cand
.first
->getType(), 0);
271 // If BFI is set, we will use it to guide the promotions.
273 auto *BB
= Cand
.first
->getParent();
274 auto InstrCount
= BFI
->getBlockProfileCount(BB
);
277 auto PreheaderCount
= BFI
->getBlockProfileCount(L
.getLoopPreheader());
278 // If the average loop trip count is not greater than 1.5, we skip
280 if (PreheaderCount
&&
281 (PreheaderCount
.getValue() * 3) >= (InstrCount
.getValue() * 2))
285 PGOCounterPromoterHelper
Promoter(Cand
.first
, Cand
.second
, SSA
, InitVal
,
286 L
.getLoopPreheader(), ExitBlocks
,
287 InsertPts
, LoopToCandidates
, LI
);
288 Promoter
.run(SmallVector
<Instruction
*, 2>({Cand
.first
, Cand
.second
}));
290 if (Promoted
>= MaxProm
)
294 if (MaxNumOfPromotions
!= -1 && *NumPromoted
>= MaxNumOfPromotions
)
298 LLVM_DEBUG(dbgs() << Promoted
<< " counters promoted for loop (depth="
299 << L
.getLoopDepth() << ")\n");
300 return Promoted
!= 0;
304 bool allowSpeculativeCounterPromotion(Loop
*LP
) {
305 SmallVector
<BasicBlock
*, 8> ExitingBlocks
;
306 L
.getExitingBlocks(ExitingBlocks
);
307 // Not considierered speculative.
308 if (ExitingBlocks
.size() == 1)
310 if (ExitingBlocks
.size() > SpeculativeCounterPromotionMaxExiting
)
315 // Returns the max number of Counter Promotions for LP.
316 unsigned getMaxNumOfPromotionsInLoop(Loop
*LP
) {
317 // We can't insert into a catchswitch.
318 SmallVector
<BasicBlock
*, 8> LoopExitBlocks
;
319 LP
->getExitBlocks(LoopExitBlocks
);
320 if (llvm::any_of(LoopExitBlocks
, [](BasicBlock
*Exit
) {
321 return isa
<CatchSwitchInst
>(Exit
->getTerminator());
325 if (!LP
->hasDedicatedExits())
328 BasicBlock
*PH
= LP
->getLoopPreheader();
332 SmallVector
<BasicBlock
*, 8> ExitingBlocks
;
333 LP
->getExitingBlocks(ExitingBlocks
);
335 // If BFI is set, we do more aggressive promotions based on BFI.
339 // Not considierered speculative.
340 if (ExitingBlocks
.size() == 1)
341 return MaxNumOfPromotionsPerLoop
;
343 if (ExitingBlocks
.size() > SpeculativeCounterPromotionMaxExiting
)
346 // Whether the target block is in a loop does not matter:
347 if (SpeculativeCounterPromotionToLoop
)
348 return MaxNumOfPromotionsPerLoop
;
350 // Now check the target block:
351 unsigned MaxProm
= MaxNumOfPromotionsPerLoop
;
352 for (auto *TargetBlock
: LoopExitBlocks
) {
353 auto *TargetLoop
= LI
.getLoopFor(TargetBlock
);
356 unsigned MaxPromForTarget
= getMaxNumOfPromotionsInLoop(TargetLoop
);
357 unsigned PendingCandsInTarget
= LoopToCandidates
[TargetLoop
].size();
359 std::min(MaxProm
, std::max(MaxPromForTarget
, PendingCandsInTarget
) -
360 PendingCandsInTarget
);
365 DenseMap
<Loop
*, SmallVector
<LoadStorePair
, 8>> &LoopToCandidates
;
366 SmallVector
<BasicBlock
*, 8> ExitBlocks
;
367 SmallVector
<Instruction
*, 8> InsertPts
;
370 BlockFrequencyInfo
*BFI
;
373 } // end anonymous namespace
375 PreservedAnalyses
InstrProfiling::run(Module
&M
, ModuleAnalysisManager
&AM
) {
376 FunctionAnalysisManager
&FAM
=
377 AM
.getResult
<FunctionAnalysisManagerModuleProxy
>(M
).getManager();
378 auto GetTLI
= [&FAM
](Function
&F
) -> TargetLibraryInfo
& {
379 return FAM
.getResult
<TargetLibraryAnalysis
>(F
);
382 return PreservedAnalyses::all();
384 return PreservedAnalyses::none();
387 char InstrProfilingLegacyPass::ID
= 0;
388 INITIALIZE_PASS_BEGIN(
389 InstrProfilingLegacyPass
, "instrprof",
390 "Frontend instrumentation-based coverage lowering.", false, false)
391 INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass
)
393 InstrProfilingLegacyPass
, "instrprof",
394 "Frontend instrumentation-based coverage lowering.", false, false)
397 llvm::createInstrProfilingLegacyPass(const InstrProfOptions
&Options
,
399 return new InstrProfilingLegacyPass(Options
, IsCS
);
402 static InstrProfIncrementInst
*castToIncrementInst(Instruction
*Instr
) {
403 InstrProfIncrementInst
*Inc
= dyn_cast
<InstrProfIncrementInstStep
>(Instr
);
406 return dyn_cast
<InstrProfIncrementInst
>(Instr
);
409 bool InstrProfiling::lowerIntrinsics(Function
*F
) {
410 bool MadeChange
= false;
411 PromotionCandidates
.clear();
412 for (BasicBlock
&BB
: *F
) {
413 for (auto I
= BB
.begin(), E
= BB
.end(); I
!= E
;) {
415 InstrProfIncrementInst
*Inc
= castToIncrementInst(&*Instr
);
419 } else if (auto *Ind
= dyn_cast
<InstrProfValueProfileInst
>(Instr
)) {
420 lowerValueProfileInst(Ind
);
429 promoteCounterLoadStores(F
);
433 bool InstrProfiling::isCounterPromotionEnabled() const {
434 if (DoCounterPromotion
.getNumOccurrences() > 0)
435 return DoCounterPromotion
;
437 return Options
.DoCounterPromotion
;
440 void InstrProfiling::promoteCounterLoadStores(Function
*F
) {
441 if (!isCounterPromotionEnabled())
444 DominatorTree
DT(*F
);
446 DenseMap
<Loop
*, SmallVector
<LoadStorePair
, 8>> LoopPromotionCandidates
;
448 std::unique_ptr
<BlockFrequencyInfo
> BFI
;
449 if (Options
.UseBFIInPromotion
) {
450 std::unique_ptr
<BranchProbabilityInfo
> BPI
;
451 BPI
.reset(new BranchProbabilityInfo(*F
, LI
, &GetTLI(*F
)));
452 BFI
.reset(new BlockFrequencyInfo(*F
, *BPI
, LI
));
455 for (const auto &LoadStore
: PromotionCandidates
) {
456 auto *CounterLoad
= LoadStore
.first
;
457 auto *CounterStore
= LoadStore
.second
;
458 BasicBlock
*BB
= CounterLoad
->getParent();
459 Loop
*ParentLoop
= LI
.getLoopFor(BB
);
462 LoopPromotionCandidates
[ParentLoop
].emplace_back(CounterLoad
, CounterStore
);
465 SmallVector
<Loop
*, 4> Loops
= LI
.getLoopsInPreorder();
467 // Do a post-order traversal of the loops so that counter updates can be
468 // iteratively hoisted outside the loop nest.
469 for (auto *Loop
: llvm::reverse(Loops
)) {
470 PGOCounterPromoter
Promoter(LoopPromotionCandidates
, *Loop
, LI
, BFI
.get());
471 Promoter
.run(&TotalCountersPromoted
);
475 /// Check if the module contains uses of any profiling intrinsics.
476 static bool containsProfilingIntrinsics(Module
&M
) {
477 if (auto *F
= M
.getFunction(
478 Intrinsic::getName(llvm::Intrinsic::instrprof_increment
)))
481 if (auto *F
= M
.getFunction(
482 Intrinsic::getName(llvm::Intrinsic::instrprof_increment_step
)))
485 if (auto *F
= M
.getFunction(
486 Intrinsic::getName(llvm::Intrinsic::instrprof_value_profile
)))
492 bool InstrProfiling::run(
493 Module
&M
, std::function
<const TargetLibraryInfo
&(Function
&F
)> GetTLI
) {
495 this->GetTLI
= std::move(GetTLI
);
498 ProfileDataMap
.clear();
500 getMemOPSizeRangeFromOption(MemOPSizeRange
, MemOPSizeRangeStart
,
502 TT
= Triple(M
.getTargetTriple());
504 // Emit the runtime hook even if no counters are present.
505 bool MadeChange
= emitRuntimeHook();
507 // Improve compile time by avoiding linear scans when there is no work.
508 GlobalVariable
*CoverageNamesVar
=
509 M
.getNamedGlobal(getCoverageUnusedNamesVarName());
510 if (!containsProfilingIntrinsics(M
) && !CoverageNamesVar
)
513 // We did not know how many value sites there would be inside
514 // the instrumented function. This is counting the number of instrumented
515 // target value sites to enter it as field in the profile data variable.
516 for (Function
&F
: M
) {
517 InstrProfIncrementInst
*FirstProfIncInst
= nullptr;
518 for (BasicBlock
&BB
: F
)
519 for (auto I
= BB
.begin(), E
= BB
.end(); I
!= E
; I
++)
520 if (auto *Ind
= dyn_cast
<InstrProfValueProfileInst
>(I
))
521 computeNumValueSiteCounts(Ind
);
522 else if (FirstProfIncInst
== nullptr)
523 FirstProfIncInst
= dyn_cast
<InstrProfIncrementInst
>(I
);
525 // Value profiling intrinsic lowering requires per-function profile data
526 // variable to be created first.
527 if (FirstProfIncInst
!= nullptr)
528 static_cast<void>(getOrCreateRegionCounters(FirstProfIncInst
));
531 for (Function
&F
: M
)
532 MadeChange
|= lowerIntrinsics(&F
);
534 if (CoverageNamesVar
) {
535 lowerCoverageData(CoverageNamesVar
);
546 emitInitialization();
550 static FunctionCallee
551 getOrInsertValueProfilingCall(Module
&M
, const TargetLibraryInfo
&TLI
,
552 bool IsRange
= false) {
553 LLVMContext
&Ctx
= M
.getContext();
554 auto *ReturnTy
= Type::getVoidTy(M
.getContext());
557 if (auto AK
= TLI
.getExtAttrForI32Param(false))
558 AL
= AL
.addParamAttribute(M
.getContext(), 2, AK
);
561 Type
*ParamTypes
[] = {
562 #define VALUE_PROF_FUNC_PARAM(ParamType, ParamName, ParamLLVMType) ParamLLVMType
563 #include "llvm/ProfileData/InstrProfData.inc"
565 auto *ValueProfilingCallTy
=
566 FunctionType::get(ReturnTy
, makeArrayRef(ParamTypes
), false);
567 return M
.getOrInsertFunction(getInstrProfValueProfFuncName(),
568 ValueProfilingCallTy
, AL
);
570 Type
*RangeParamTypes
[] = {
571 #define VALUE_RANGE_PROF 1
572 #define VALUE_PROF_FUNC_PARAM(ParamType, ParamName, ParamLLVMType) ParamLLVMType
573 #include "llvm/ProfileData/InstrProfData.inc"
574 #undef VALUE_RANGE_PROF
576 auto *ValueRangeProfilingCallTy
=
577 FunctionType::get(ReturnTy
, makeArrayRef(RangeParamTypes
), false);
578 return M
.getOrInsertFunction(getInstrProfValueRangeProfFuncName(),
579 ValueRangeProfilingCallTy
, AL
);
583 void InstrProfiling::computeNumValueSiteCounts(InstrProfValueProfileInst
*Ind
) {
584 GlobalVariable
*Name
= Ind
->getName();
585 uint64_t ValueKind
= Ind
->getValueKind()->getZExtValue();
586 uint64_t Index
= Ind
->getIndex()->getZExtValue();
587 auto It
= ProfileDataMap
.find(Name
);
588 if (It
== ProfileDataMap
.end()) {
589 PerFunctionProfileData PD
;
590 PD
.NumValueSites
[ValueKind
] = Index
+ 1;
591 ProfileDataMap
[Name
] = PD
;
592 } else if (It
->second
.NumValueSites
[ValueKind
] <= Index
)
593 It
->second
.NumValueSites
[ValueKind
] = Index
+ 1;
596 void InstrProfiling::lowerValueProfileInst(InstrProfValueProfileInst
*Ind
) {
597 GlobalVariable
*Name
= Ind
->getName();
598 auto It
= ProfileDataMap
.find(Name
);
599 assert(It
!= ProfileDataMap
.end() && It
->second
.DataVar
&&
600 "value profiling detected in function with no counter incerement");
602 GlobalVariable
*DataVar
= It
->second
.DataVar
;
603 uint64_t ValueKind
= Ind
->getValueKind()->getZExtValue();
604 uint64_t Index
= Ind
->getIndex()->getZExtValue();
605 for (uint32_t Kind
= IPVK_First
; Kind
< ValueKind
; ++Kind
)
606 Index
+= It
->second
.NumValueSites
[Kind
];
608 IRBuilder
<> Builder(Ind
);
609 bool IsRange
= (Ind
->getValueKind()->getZExtValue() ==
610 llvm::InstrProfValueKind::IPVK_MemOPSize
);
611 CallInst
*Call
= nullptr;
612 auto *TLI
= &GetTLI(*Ind
->getFunction());
614 Value
*Args
[3] = {Ind
->getTargetValue(),
615 Builder
.CreateBitCast(DataVar
, Builder
.getInt8PtrTy()),
616 Builder
.getInt32(Index
)};
617 Call
= Builder
.CreateCall(getOrInsertValueProfilingCall(*M
, *TLI
), Args
);
620 Ind
->getTargetValue(),
621 Builder
.CreateBitCast(DataVar
, Builder
.getInt8PtrTy()),
622 Builder
.getInt32(Index
),
623 Builder
.getInt64(MemOPSizeRangeStart
),
624 Builder
.getInt64(MemOPSizeRangeLast
),
625 Builder
.getInt64(MemOPSizeLarge
== 0 ? INT64_MIN
: MemOPSizeLarge
)};
627 Builder
.CreateCall(getOrInsertValueProfilingCall(*M
, *TLI
, true), Args
);
629 if (auto AK
= TLI
->getExtAttrForI32Param(false))
630 Call
->addParamAttr(2, AK
);
631 Ind
->replaceAllUsesWith(Call
);
632 Ind
->eraseFromParent();
635 void InstrProfiling::lowerIncrement(InstrProfIncrementInst
*Inc
) {
636 GlobalVariable
*Counters
= getOrCreateRegionCounters(Inc
);
638 IRBuilder
<> Builder(Inc
);
639 uint64_t Index
= Inc
->getIndex()->getZExtValue();
640 Value
*Addr
= Builder
.CreateConstInBoundsGEP2_64(Counters
->getValueType(),
643 if (Options
.Atomic
|| AtomicCounterUpdateAll
) {
644 Builder
.CreateAtomicRMW(AtomicRMWInst::Add
, Addr
, Inc
->getStep(),
645 AtomicOrdering::Monotonic
);
647 Value
*IncStep
= Inc
->getStep();
648 Value
*Load
= Builder
.CreateLoad(IncStep
->getType(), Addr
, "pgocount");
649 auto *Count
= Builder
.CreateAdd(Load
, Inc
->getStep());
650 auto *Store
= Builder
.CreateStore(Count
, Addr
);
651 if (isCounterPromotionEnabled())
652 PromotionCandidates
.emplace_back(cast
<Instruction
>(Load
), Store
);
654 Inc
->eraseFromParent();
657 void InstrProfiling::lowerCoverageData(GlobalVariable
*CoverageNamesVar
) {
658 ConstantArray
*Names
=
659 cast
<ConstantArray
>(CoverageNamesVar
->getInitializer());
660 for (unsigned I
= 0, E
= Names
->getNumOperands(); I
< E
; ++I
) {
661 Constant
*NC
= Names
->getOperand(I
);
662 Value
*V
= NC
->stripPointerCasts();
663 assert(isa
<GlobalVariable
>(V
) && "Missing reference to function name");
664 GlobalVariable
*Name
= cast
<GlobalVariable
>(V
);
666 Name
->setLinkage(GlobalValue::PrivateLinkage
);
667 ReferencedNames
.push_back(Name
);
668 NC
->dropAllReferences();
670 CoverageNamesVar
->eraseFromParent();
673 /// Get the name of a profiling variable for a particular function.
674 static std::string
getVarName(InstrProfIncrementInst
*Inc
, StringRef Prefix
) {
675 StringRef NamePrefix
= getInstrProfNameVarPrefix();
676 StringRef Name
= Inc
->getName()->getName().substr(NamePrefix
.size());
677 Function
*F
= Inc
->getParent()->getParent();
678 Module
*M
= F
->getParent();
679 if (!DoHashBasedCounterSplit
|| !isIRPGOFlagSet(M
) ||
680 !canRenameComdatFunc(*F
))
681 return (Prefix
+ Name
).str();
682 uint64_t FuncHash
= Inc
->getHash()->getZExtValue();
683 SmallVector
<char, 24> HashPostfix
;
684 if (Name
.endswith((Twine(".") + Twine(FuncHash
)).toStringRef(HashPostfix
)))
685 return (Prefix
+ Name
).str();
686 return (Prefix
+ Name
+ "." + Twine(FuncHash
)).str();
689 static inline bool shouldRecordFunctionAddr(Function
*F
) {
691 bool HasAvailableExternallyLinkage
= F
->hasAvailableExternallyLinkage();
692 if (!F
->hasLinkOnceLinkage() && !F
->hasLocalLinkage() &&
693 !HasAvailableExternallyLinkage
)
696 // A function marked 'alwaysinline' with available_externally linkage can't
697 // have its address taken. Doing so would create an undefined external ref to
698 // the function, which would fail to link.
699 if (HasAvailableExternallyLinkage
&&
700 F
->hasFnAttribute(Attribute::AlwaysInline
))
703 // Prohibit function address recording if the function is both internal and
704 // COMDAT. This avoids the profile data variable referencing internal symbols
706 if (F
->hasLocalLinkage() && F
->hasComdat())
709 // Check uses of this function for other than direct calls or invokes to it.
710 // Inline virtual functions have linkeOnceODR linkage. When a key method
711 // exists, the vtable will only be emitted in the TU where the key method
712 // is defined. In a TU where vtable is not available, the function won't
713 // be 'addresstaken'. If its address is not recorded here, the profile data
714 // with missing address may be picked by the linker leading to missing
715 // indirect call target info.
716 return F
->hasAddressTaken() || F
->hasLinkOnceLinkage();
719 static bool needsRuntimeRegistrationOfSectionRange(const Triple
&TT
) {
720 // Don't do this for Darwin. compiler-rt uses linker magic.
723 // Use linker script magic to get data/cnts/name start/end.
724 if (TT
.isOSLinux() || TT
.isOSFreeBSD() || TT
.isOSNetBSD() ||
725 TT
.isOSSolaris() || TT
.isOSFuchsia() || TT
.isPS4CPU() ||
733 InstrProfiling::getOrCreateRegionCounters(InstrProfIncrementInst
*Inc
) {
734 GlobalVariable
*NamePtr
= Inc
->getName();
735 auto It
= ProfileDataMap
.find(NamePtr
);
736 PerFunctionProfileData PD
;
737 if (It
!= ProfileDataMap
.end()) {
738 if (It
->second
.RegionCounters
)
739 return It
->second
.RegionCounters
;
743 // Match the linkage and visibility of the name global. COFF supports using
744 // comdats with internal symbols, so do that if we can.
745 Function
*Fn
= Inc
->getParent()->getParent();
746 GlobalValue::LinkageTypes Linkage
= NamePtr
->getLinkage();
747 GlobalValue::VisibilityTypes Visibility
= NamePtr
->getVisibility();
748 if (TT
.isOSBinFormatCOFF()) {
749 Linkage
= GlobalValue::InternalLinkage
;
750 Visibility
= GlobalValue::DefaultVisibility
;
753 // Move the name variable to the right section. Place them in a COMDAT group
754 // if the associated function is a COMDAT. This will make sure that only one
755 // copy of counters of the COMDAT function will be emitted after linking. Keep
756 // in mind that this pass may run before the inliner, so we need to create a
757 // new comdat group for the counters and profiling data. If we use the comdat
758 // of the parent function, that will result in relocations against discarded
760 bool NeedComdat
= needsComdatForCounter(*Fn
, *M
);
762 if (TT
.isOSBinFormatCOFF()) {
763 // For COFF, put the counters, data, and values each into their own
764 // comdats. We can't use a group because the Visual C++ linker will
765 // report duplicate symbol errors if there are multiple external symbols
766 // with the same name marked IMAGE_COMDAT_SELECT_ASSOCIATIVE.
767 Linkage
= GlobalValue::LinkOnceODRLinkage
;
768 Visibility
= GlobalValue::HiddenVisibility
;
771 auto MaybeSetComdat
= [=](GlobalVariable
*GV
) {
773 GV
->setComdat(M
->getOrInsertComdat(GV
->getName()));
776 uint64_t NumCounters
= Inc
->getNumCounters()->getZExtValue();
777 LLVMContext
&Ctx
= M
->getContext();
778 ArrayType
*CounterTy
= ArrayType::get(Type::getInt64Ty(Ctx
), NumCounters
);
780 // Create the counters variable.
782 new GlobalVariable(*M
, CounterTy
, false, Linkage
,
783 Constant::getNullValue(CounterTy
),
784 getVarName(Inc
, getInstrProfCountersVarPrefix()));
785 CounterPtr
->setVisibility(Visibility
);
786 CounterPtr
->setSection(
787 getInstrProfSectionName(IPSK_cnts
, TT
.getObjectFormat()));
788 CounterPtr
->setAlignment(Align(8));
789 MaybeSetComdat(CounterPtr
);
790 CounterPtr
->setLinkage(Linkage
);
792 auto *Int8PtrTy
= Type::getInt8PtrTy(Ctx
);
793 // Allocate statically the array of pointers to value profile nodes for
794 // the current function.
795 Constant
*ValuesPtrExpr
= ConstantPointerNull::get(Int8PtrTy
);
796 if (ValueProfileStaticAlloc
&& !needsRuntimeRegistrationOfSectionRange(TT
)) {
798 for (uint32_t Kind
= IPVK_First
; Kind
<= IPVK_Last
; ++Kind
)
799 NS
+= PD
.NumValueSites
[Kind
];
801 ArrayType
*ValuesTy
= ArrayType::get(Type::getInt64Ty(Ctx
), NS
);
804 new GlobalVariable(*M
, ValuesTy
, false, Linkage
,
805 Constant::getNullValue(ValuesTy
),
806 getVarName(Inc
, getInstrProfValuesVarPrefix()));
807 ValuesVar
->setVisibility(Visibility
);
808 ValuesVar
->setSection(
809 getInstrProfSectionName(IPSK_vals
, TT
.getObjectFormat()));
810 ValuesVar
->setAlignment(Align(8));
811 MaybeSetComdat(ValuesVar
);
813 ConstantExpr::getBitCast(ValuesVar
, Type::getInt8PtrTy(Ctx
));
817 // Create data variable.
818 auto *Int16Ty
= Type::getInt16Ty(Ctx
);
819 auto *Int16ArrayTy
= ArrayType::get(Int16Ty
, IPVK_Last
+ 1);
820 Type
*DataTypes
[] = {
821 #define INSTR_PROF_DATA(Type, LLVMType, Name, Init) LLVMType,
822 #include "llvm/ProfileData/InstrProfData.inc"
824 auto *DataTy
= StructType::get(Ctx
, makeArrayRef(DataTypes
));
826 Constant
*FunctionAddr
= shouldRecordFunctionAddr(Fn
)
827 ? ConstantExpr::getBitCast(Fn
, Int8PtrTy
)
828 : ConstantPointerNull::get(Int8PtrTy
);
830 Constant
*Int16ArrayVals
[IPVK_Last
+ 1];
831 for (uint32_t Kind
= IPVK_First
; Kind
<= IPVK_Last
; ++Kind
)
832 Int16ArrayVals
[Kind
] = ConstantInt::get(Int16Ty
, PD
.NumValueSites
[Kind
]);
834 Constant
*DataVals
[] = {
835 #define INSTR_PROF_DATA(Type, LLVMType, Name, Init) Init,
836 #include "llvm/ProfileData/InstrProfData.inc"
838 auto *Data
= new GlobalVariable(*M
, DataTy
, false, Linkage
,
839 ConstantStruct::get(DataTy
, DataVals
),
840 getVarName(Inc
, getInstrProfDataVarPrefix()));
841 Data
->setVisibility(Visibility
);
842 Data
->setSection(getInstrProfSectionName(IPSK_data
, TT
.getObjectFormat()));
843 Data
->setAlignment(Align(INSTR_PROF_DATA_ALIGNMENT
));
844 MaybeSetComdat(Data
);
845 Data
->setLinkage(Linkage
);
847 PD
.RegionCounters
= CounterPtr
;
849 ProfileDataMap
[NamePtr
] = PD
;
851 // Mark the data variable as used so that it isn't stripped out.
852 UsedVars
.push_back(Data
);
853 // Now that the linkage set by the FE has been passed to the data and counter
854 // variables, reset Name variable's linkage and visibility to private so that
855 // it can be removed later by the compiler.
856 NamePtr
->setLinkage(GlobalValue::PrivateLinkage
);
857 // Collect the referenced names to be used by emitNameData.
858 ReferencedNames
.push_back(NamePtr
);
863 void InstrProfiling::emitVNodes() {
864 if (!ValueProfileStaticAlloc
)
867 // For now only support this on platforms that do
868 // not require runtime registration to discover
869 // named section start/end.
870 if (needsRuntimeRegistrationOfSectionRange(TT
))
874 for (auto &PD
: ProfileDataMap
) {
875 for (uint32_t Kind
= IPVK_First
; Kind
<= IPVK_Last
; ++Kind
)
876 TotalNS
+= PD
.second
.NumValueSites
[Kind
];
882 uint64_t NumCounters
= TotalNS
* NumCountersPerValueSite
;
883 // Heuristic for small programs with very few total value sites.
884 // The default value of vp-counters-per-site is chosen based on
885 // the observation that large apps usually have a low percentage
886 // of value sites that actually have any profile data, and thus
887 // the average number of counters per site is low. For small
888 // apps with very few sites, this may not be true. Bump up the
889 // number of counters in this case.
890 #define INSTR_PROF_MIN_VAL_COUNTS 10
891 if (NumCounters
< INSTR_PROF_MIN_VAL_COUNTS
)
892 NumCounters
= std::max(INSTR_PROF_MIN_VAL_COUNTS
, (int)NumCounters
* 2);
894 auto &Ctx
= M
->getContext();
895 Type
*VNodeTypes
[] = {
896 #define INSTR_PROF_VALUE_NODE(Type, LLVMType, Name, Init) LLVMType,
897 #include "llvm/ProfileData/InstrProfData.inc"
899 auto *VNodeTy
= StructType::get(Ctx
, makeArrayRef(VNodeTypes
));
901 ArrayType
*VNodesTy
= ArrayType::get(VNodeTy
, NumCounters
);
902 auto *VNodesVar
= new GlobalVariable(
903 *M
, VNodesTy
, false, GlobalValue::PrivateLinkage
,
904 Constant::getNullValue(VNodesTy
), getInstrProfVNodesVarName());
905 VNodesVar
->setSection(
906 getInstrProfSectionName(IPSK_vnodes
, TT
.getObjectFormat()));
907 UsedVars
.push_back(VNodesVar
);
910 void InstrProfiling::emitNameData() {
911 std::string UncompressedData
;
913 if (ReferencedNames
.empty())
916 std::string CompressedNameStr
;
917 if (Error E
= collectPGOFuncNameStrings(ReferencedNames
, CompressedNameStr
,
918 DoNameCompression
)) {
919 report_fatal_error(toString(std::move(E
)), false);
922 auto &Ctx
= M
->getContext();
923 auto *NamesVal
= ConstantDataArray::getString(
924 Ctx
, StringRef(CompressedNameStr
), false);
925 NamesVar
= new GlobalVariable(*M
, NamesVal
->getType(), true,
926 GlobalValue::PrivateLinkage
, NamesVal
,
927 getInstrProfNamesVarName());
928 NamesSize
= CompressedNameStr
.size();
929 NamesVar
->setSection(
930 getInstrProfSectionName(IPSK_name
, TT
.getObjectFormat()));
931 // On COFF, it's important to reduce the alignment down to 1 to prevent the
932 // linker from inserting padding before the start of the names section or
933 // between names entries.
934 NamesVar
->setAlignment(Align::None());
935 UsedVars
.push_back(NamesVar
);
937 for (auto *NamePtr
: ReferencedNames
)
938 NamePtr
->eraseFromParent();
941 void InstrProfiling::emitRegistration() {
942 if (!needsRuntimeRegistrationOfSectionRange(TT
))
945 // Construct the function.
946 auto *VoidTy
= Type::getVoidTy(M
->getContext());
947 auto *VoidPtrTy
= Type::getInt8PtrTy(M
->getContext());
948 auto *Int64Ty
= Type::getInt64Ty(M
->getContext());
949 auto *RegisterFTy
= FunctionType::get(VoidTy
, false);
950 auto *RegisterF
= Function::Create(RegisterFTy
, GlobalValue::InternalLinkage
,
951 getInstrProfRegFuncsName(), M
);
952 RegisterF
->setUnnamedAddr(GlobalValue::UnnamedAddr::Global
);
953 if (Options
.NoRedZone
)
954 RegisterF
->addFnAttr(Attribute::NoRedZone
);
956 auto *RuntimeRegisterTy
= FunctionType::get(VoidTy
, VoidPtrTy
, false);
957 auto *RuntimeRegisterF
=
958 Function::Create(RuntimeRegisterTy
, GlobalVariable::ExternalLinkage
,
959 getInstrProfRegFuncName(), M
);
961 IRBuilder
<> IRB(BasicBlock::Create(M
->getContext(), "", RegisterF
));
962 for (Value
*Data
: UsedVars
)
963 if (Data
!= NamesVar
&& !isa
<Function
>(Data
))
964 IRB
.CreateCall(RuntimeRegisterF
, IRB
.CreateBitCast(Data
, VoidPtrTy
));
967 Type
*ParamTypes
[] = {VoidPtrTy
, Int64Ty
};
968 auto *NamesRegisterTy
=
969 FunctionType::get(VoidTy
, makeArrayRef(ParamTypes
), false);
970 auto *NamesRegisterF
=
971 Function::Create(NamesRegisterTy
, GlobalVariable::ExternalLinkage
,
972 getInstrProfNamesRegFuncName(), M
);
973 IRB
.CreateCall(NamesRegisterF
, {IRB
.CreateBitCast(NamesVar
, VoidPtrTy
),
974 IRB
.getInt64(NamesSize
)});
980 bool InstrProfiling::emitRuntimeHook() {
981 // We expect the linker to be invoked with -u<hook_var> flag for linux,
982 // for which case there is no need to emit the user function.
986 // If the module's provided its own runtime, we don't need to do anything.
987 if (M
->getGlobalVariable(getInstrProfRuntimeHookVarName()))
990 // Declare an external variable that will pull in the runtime initialization.
991 auto *Int32Ty
= Type::getInt32Ty(M
->getContext());
993 new GlobalVariable(*M
, Int32Ty
, false, GlobalValue::ExternalLinkage
,
994 nullptr, getInstrProfRuntimeHookVarName());
996 // Make a function that uses it.
997 auto *User
= Function::Create(FunctionType::get(Int32Ty
, false),
998 GlobalValue::LinkOnceODRLinkage
,
999 getInstrProfRuntimeHookVarUseFuncName(), M
);
1000 User
->addFnAttr(Attribute::NoInline
);
1001 if (Options
.NoRedZone
)
1002 User
->addFnAttr(Attribute::NoRedZone
);
1003 User
->setVisibility(GlobalValue::HiddenVisibility
);
1004 if (TT
.supportsCOMDAT())
1005 User
->setComdat(M
->getOrInsertComdat(User
->getName()));
1007 IRBuilder
<> IRB(BasicBlock::Create(M
->getContext(), "", User
));
1008 auto *Load
= IRB
.CreateLoad(Int32Ty
, Var
);
1009 IRB
.CreateRet(Load
);
1011 // Mark the user variable as used so that it isn't stripped out.
1012 UsedVars
.push_back(User
);
1016 void InstrProfiling::emitUses() {
1017 if (!UsedVars
.empty())
1018 appendToUsed(*M
, UsedVars
);
1021 void InstrProfiling::emitInitialization() {
1022 // Create ProfileFileName variable. Don't don't this for the
1023 // context-sensitive instrumentation lowering: This lowering is after
1024 // LTO/ThinLTO linking. Pass PGOInstrumentationGenCreateVar should
1025 // have already create the variable before LTO/ThinLTO linking.
1027 createProfileFileNameVar(*M
, Options
.InstrProfileOutput
);
1028 Function
*RegisterF
= M
->getFunction(getInstrProfRegFuncsName());
1032 // Create the initialization function.
1033 auto *VoidTy
= Type::getVoidTy(M
->getContext());
1034 auto *F
= Function::Create(FunctionType::get(VoidTy
, false),
1035 GlobalValue::InternalLinkage
,
1036 getInstrProfInitFuncName(), M
);
1037 F
->setUnnamedAddr(GlobalValue::UnnamedAddr::Global
);
1038 F
->addFnAttr(Attribute::NoInline
);
1039 if (Options
.NoRedZone
)
1040 F
->addFnAttr(Attribute::NoRedZone
);
1042 // Add the basic block and the necessary calls.
1043 IRBuilder
<> IRB(BasicBlock::Create(M
->getContext(), "", F
));
1044 IRB
.CreateCall(RegisterF
, {});
1045 IRB
.CreateRetVoid();
1047 appendToGlobalCtors(*M
, F
, 0);