1 //===-- InstrProfiling.cpp - Frontend instrumentation based profiling -----===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This pass lowers instrprof_* intrinsics emitted by a frontend for profiling.
11 // It also builds the data structures and initialization code needed for
12 // updating execution counts and emitting the profile at runtime.
14 //===----------------------------------------------------------------------===//
16 #include "llvm/Transforms/Instrumentation/InstrProfiling.h"
17 #include "llvm/ADT/ArrayRef.h"
18 #include "llvm/ADT/SmallVector.h"
19 #include "llvm/ADT/StringRef.h"
20 #include "llvm/ADT/Triple.h"
21 #include "llvm/ADT/Twine.h"
22 #include "llvm/Analysis/LoopInfo.h"
23 #include "llvm/Analysis/TargetLibraryInfo.h"
24 #include "llvm/IR/Attributes.h"
25 #include "llvm/IR/BasicBlock.h"
26 #include "llvm/IR/Constant.h"
27 #include "llvm/IR/Constants.h"
28 #include "llvm/IR/DerivedTypes.h"
29 #include "llvm/IR/Dominators.h"
30 #include "llvm/IR/Function.h"
31 #include "llvm/IR/GlobalValue.h"
32 #include "llvm/IR/GlobalVariable.h"
33 #include "llvm/IR/IRBuilder.h"
34 #include "llvm/IR/Instruction.h"
35 #include "llvm/IR/Instructions.h"
36 #include "llvm/IR/IntrinsicInst.h"
37 #include "llvm/IR/Module.h"
38 #include "llvm/IR/Type.h"
39 #include "llvm/Pass.h"
40 #include "llvm/ProfileData/InstrProf.h"
41 #include "llvm/Support/Casting.h"
42 #include "llvm/Support/CommandLine.h"
43 #include "llvm/Support/Error.h"
44 #include "llvm/Support/ErrorHandling.h"
45 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
46 #include "llvm/Transforms/Utils/ModuleUtils.h"
47 #include "llvm/Transforms/Utils/SSAUpdater.h"
56 #define DEBUG_TYPE "instrprof"
58 // The start and end values of precise value profile range for memory
60 cl::opt
<std::string
> MemOPSizeRange(
62 cl::desc("Set the range of size in memory intrinsic calls to be profiled "
63 "precisely, in a format of <start_val>:<end_val>"),
66 // The value that considered to be large value in memory intrinsic.
67 cl::opt
<unsigned> MemOPSizeLarge(
69 cl::desc("Set large value thresthold in memory intrinsic size profiling. "
70 "Value of 0 disables the large value profiling."),
75 cl::opt
<bool> DoNameCompression("enable-name-compression",
76 cl::desc("Enable name string compression"),
79 cl::opt
<bool> DoHashBasedCounterSplit(
80 "hash-based-counter-split",
81 cl::desc("Rename counter variable of a comdat function based on cfg hash"),
84 cl::opt
<bool> ValueProfileStaticAlloc(
86 cl::desc("Do static counter allocation for value profiler"),
89 cl::opt
<double> NumCountersPerValueSite(
90 "vp-counters-per-site",
91 cl::desc("The average number of profile counters allocated "
92 "per value profiling site."),
93 // This is set to a very small value because in real programs, only
94 // a very small percentage of value sites have non-zero targets, e.g, 1/30.
95 // For those sites with non-zero profile, the average number of targets
96 // is usually smaller than 2.
99 cl::opt
<bool> AtomicCounterUpdateAll(
100 "instrprof-atomic-counter-update-all", cl::ZeroOrMore
,
101 cl::desc("Make all profile counter updates atomic (for testing only)"),
104 cl::opt
<bool> AtomicCounterUpdatePromoted(
105 "atomic-counter-update-promoted", cl::ZeroOrMore
,
106 cl::desc("Do counter update using atomic fetch add "
107 " for promoted counters only"),
110 // If the option is not specified, the default behavior about whether
111 // counter promotion is done depends on how instrumentaiton lowering
112 // pipeline is setup, i.e., the default value of true of this option
113 // does not mean the promotion will be done by default. Explicitly
114 // setting this option can override the default behavior.
115 cl::opt
<bool> DoCounterPromotion("do-counter-promotion", cl::ZeroOrMore
,
116 cl::desc("Do counter register promotion"),
118 cl::opt
<unsigned> MaxNumOfPromotionsPerLoop(
119 cl::ZeroOrMore
, "max-counter-promotions-per-loop", cl::init(20),
120 cl::desc("Max number counter promotions per loop to avoid"
121 " increasing register pressure too much"));
125 MaxNumOfPromotions(cl::ZeroOrMore
, "max-counter-promotions", cl::init(-1),
126 cl::desc("Max number of allowed counter promotions"));
128 cl::opt
<unsigned> SpeculativeCounterPromotionMaxExiting(
129 cl::ZeroOrMore
, "speculative-counter-promotion-max-exiting", cl::init(3),
130 cl::desc("The max number of exiting blocks of a loop to allow "
131 " speculative counter promotion"));
133 cl::opt
<bool> SpeculativeCounterPromotionToLoop(
134 cl::ZeroOrMore
, "speculative-counter-promotion-to-loop", cl::init(false),
135 cl::desc("When the option is false, if the target block is in a loop, "
136 "the promotion will be disallowed unless the promoted counter "
137 " update can be further/iteratively promoted into an acyclic "
140 cl::opt
<bool> IterativeCounterPromotion(
141 cl::ZeroOrMore
, "iterative-counter-promotion", cl::init(true),
142 cl::desc("Allow counter promotion across the whole loop nest."));
144 class InstrProfilingLegacyPass
: public ModulePass
{
145 InstrProfiling InstrProf
;
150 InstrProfilingLegacyPass() : ModulePass(ID
) {}
151 InstrProfilingLegacyPass(const InstrProfOptions
&Options
)
152 : ModulePass(ID
), InstrProf(Options
) {}
154 StringRef
getPassName() const override
{
155 return "Frontend instrumentation-based coverage lowering";
158 bool runOnModule(Module
&M
) override
{
159 return InstrProf
.run(M
, getAnalysis
<TargetLibraryInfoWrapperPass
>().getTLI());
162 void getAnalysisUsage(AnalysisUsage
&AU
) const override
{
163 AU
.setPreservesCFG();
164 AU
.addRequired
<TargetLibraryInfoWrapperPass
>();
169 /// A helper class to promote one counter RMW operation in the loop
170 /// into register update.
172 /// RWM update for the counter will be sinked out of the loop after
173 /// the transformation.
175 class PGOCounterPromoterHelper
: public LoadAndStorePromoter
{
177 PGOCounterPromoterHelper(
178 Instruction
*L
, Instruction
*S
, SSAUpdater
&SSA
, Value
*Init
,
179 BasicBlock
*PH
, ArrayRef
<BasicBlock
*> ExitBlocks
,
180 ArrayRef
<Instruction
*> InsertPts
,
181 DenseMap
<Loop
*, SmallVector
<LoadStorePair
, 8>> &LoopToCands
,
183 : LoadAndStorePromoter({L
, S
}, SSA
), Store(S
), ExitBlocks(ExitBlocks
),
184 InsertPts(InsertPts
), LoopToCandidates(LoopToCands
), LI(LI
) {
185 assert(isa
<LoadInst
>(L
));
186 assert(isa
<StoreInst
>(S
));
187 SSA
.AddAvailableValue(PH
, Init
);
190 void doExtraRewritesBeforeFinalDeletion() const override
{
191 for (unsigned i
= 0, e
= ExitBlocks
.size(); i
!= e
; ++i
) {
192 BasicBlock
*ExitBlock
= ExitBlocks
[i
];
193 Instruction
*InsertPos
= InsertPts
[i
];
194 // Get LiveIn value into the ExitBlock. If there are multiple
195 // predecessors, the value is defined by a PHI node in this
197 Value
*LiveInValue
= SSA
.GetValueInMiddleOfBlock(ExitBlock
);
198 Value
*Addr
= cast
<StoreInst
>(Store
)->getPointerOperand();
199 IRBuilder
<> Builder(InsertPos
);
200 if (AtomicCounterUpdatePromoted
)
201 // automic update currently can only be promoted across the current
202 // loop, not the whole loop nest.
203 Builder
.CreateAtomicRMW(AtomicRMWInst::Add
, Addr
, LiveInValue
,
204 AtomicOrdering::SequentiallyConsistent
);
206 LoadInst
*OldVal
= Builder
.CreateLoad(Addr
, "pgocount.promoted");
207 auto *NewVal
= Builder
.CreateAdd(OldVal
, LiveInValue
);
208 auto *NewStore
= Builder
.CreateStore(NewVal
, Addr
);
210 // Now update the parent loop's candidate list:
211 if (IterativeCounterPromotion
) {
212 auto *TargetLoop
= LI
.getLoopFor(ExitBlock
);
214 LoopToCandidates
[TargetLoop
].emplace_back(OldVal
, NewStore
);
222 ArrayRef
<BasicBlock
*> ExitBlocks
;
223 ArrayRef
<Instruction
*> InsertPts
;
224 DenseMap
<Loop
*, SmallVector
<LoadStorePair
, 8>> &LoopToCandidates
;
228 /// A helper class to do register promotion for all profile counter
229 /// updates in a loop.
231 class PGOCounterPromoter
{
234 DenseMap
<Loop
*, SmallVector
<LoadStorePair
, 8>> &LoopToCands
,
235 Loop
&CurLoop
, LoopInfo
&LI
)
236 : LoopToCandidates(LoopToCands
), ExitBlocks(), InsertPts(), L(CurLoop
),
239 SmallVector
<BasicBlock
*, 8> LoopExitBlocks
;
240 SmallPtrSet
<BasicBlock
*, 8> BlockSet
;
241 L
.getExitBlocks(LoopExitBlocks
);
243 for (BasicBlock
*ExitBlock
: LoopExitBlocks
) {
244 if (BlockSet
.insert(ExitBlock
).second
) {
245 ExitBlocks
.push_back(ExitBlock
);
246 InsertPts
.push_back(&*ExitBlock
->getFirstInsertionPt());
251 bool run(int64_t *NumPromoted
) {
252 // Skip 'infinite' loops:
253 if (ExitBlocks
.size() == 0)
255 unsigned MaxProm
= getMaxNumOfPromotionsInLoop(&L
);
259 unsigned Promoted
= 0;
260 for (auto &Cand
: LoopToCandidates
[&L
]) {
262 SmallVector
<PHINode
*, 4> NewPHIs
;
263 SSAUpdater
SSA(&NewPHIs
);
264 Value
*InitVal
= ConstantInt::get(Cand
.first
->getType(), 0);
266 PGOCounterPromoterHelper
Promoter(Cand
.first
, Cand
.second
, SSA
, InitVal
,
267 L
.getLoopPreheader(), ExitBlocks
,
268 InsertPts
, LoopToCandidates
, LI
);
269 Promoter
.run(SmallVector
<Instruction
*, 2>({Cand
.first
, Cand
.second
}));
271 if (Promoted
>= MaxProm
)
275 if (MaxNumOfPromotions
!= -1 && *NumPromoted
>= MaxNumOfPromotions
)
279 LLVM_DEBUG(dbgs() << Promoted
<< " counters promoted for loop (depth="
280 << L
.getLoopDepth() << ")\n");
281 return Promoted
!= 0;
285 bool allowSpeculativeCounterPromotion(Loop
*LP
) {
286 SmallVector
<BasicBlock
*, 8> ExitingBlocks
;
287 L
.getExitingBlocks(ExitingBlocks
);
288 // Not considierered speculative.
289 if (ExitingBlocks
.size() == 1)
291 if (ExitingBlocks
.size() > SpeculativeCounterPromotionMaxExiting
)
296 // Returns the max number of Counter Promotions for LP.
297 unsigned getMaxNumOfPromotionsInLoop(Loop
*LP
) {
298 // We can't insert into a catchswitch.
299 SmallVector
<BasicBlock
*, 8> LoopExitBlocks
;
300 LP
->getExitBlocks(LoopExitBlocks
);
301 if (llvm::any_of(LoopExitBlocks
, [](BasicBlock
*Exit
) {
302 return isa
<CatchSwitchInst
>(Exit
->getTerminator());
306 if (!LP
->hasDedicatedExits())
309 BasicBlock
*PH
= LP
->getLoopPreheader();
313 SmallVector
<BasicBlock
*, 8> ExitingBlocks
;
314 LP
->getExitingBlocks(ExitingBlocks
);
315 // Not considierered speculative.
316 if (ExitingBlocks
.size() == 1)
317 return MaxNumOfPromotionsPerLoop
;
319 if (ExitingBlocks
.size() > SpeculativeCounterPromotionMaxExiting
)
322 // Whether the target block is in a loop does not matter:
323 if (SpeculativeCounterPromotionToLoop
)
324 return MaxNumOfPromotionsPerLoop
;
326 // Now check the target block:
327 unsigned MaxProm
= MaxNumOfPromotionsPerLoop
;
328 for (auto *TargetBlock
: LoopExitBlocks
) {
329 auto *TargetLoop
= LI
.getLoopFor(TargetBlock
);
332 unsigned MaxPromForTarget
= getMaxNumOfPromotionsInLoop(TargetLoop
);
333 unsigned PendingCandsInTarget
= LoopToCandidates
[TargetLoop
].size();
335 std::min(MaxProm
, std::max(MaxPromForTarget
, PendingCandsInTarget
) -
336 PendingCandsInTarget
);
341 DenseMap
<Loop
*, SmallVector
<LoadStorePair
, 8>> &LoopToCandidates
;
342 SmallVector
<BasicBlock
*, 8> ExitBlocks
;
343 SmallVector
<Instruction
*, 8> InsertPts
;
348 } // end anonymous namespace
350 PreservedAnalyses
InstrProfiling::run(Module
&M
, ModuleAnalysisManager
&AM
) {
351 auto &TLI
= AM
.getResult
<TargetLibraryAnalysis
>(M
);
353 return PreservedAnalyses::all();
355 return PreservedAnalyses::none();
358 char InstrProfilingLegacyPass::ID
= 0;
359 INITIALIZE_PASS_BEGIN(
360 InstrProfilingLegacyPass
, "instrprof",
361 "Frontend instrumentation-based coverage lowering.", false, false)
362 INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass
)
364 InstrProfilingLegacyPass
, "instrprof",
365 "Frontend instrumentation-based coverage lowering.", false, false)
368 llvm::createInstrProfilingLegacyPass(const InstrProfOptions
&Options
) {
369 return new InstrProfilingLegacyPass(Options
);
372 static InstrProfIncrementInst
*castToIncrementInst(Instruction
*Instr
) {
373 InstrProfIncrementInst
*Inc
= dyn_cast
<InstrProfIncrementInstStep
>(Instr
);
376 return dyn_cast
<InstrProfIncrementInst
>(Instr
);
379 bool InstrProfiling::lowerIntrinsics(Function
*F
) {
380 bool MadeChange
= false;
381 PromotionCandidates
.clear();
382 for (BasicBlock
&BB
: *F
) {
383 for (auto I
= BB
.begin(), E
= BB
.end(); I
!= E
;) {
385 InstrProfIncrementInst
*Inc
= castToIncrementInst(&*Instr
);
389 } else if (auto *Ind
= dyn_cast
<InstrProfValueProfileInst
>(Instr
)) {
390 lowerValueProfileInst(Ind
);
399 promoteCounterLoadStores(F
);
403 bool InstrProfiling::isCounterPromotionEnabled() const {
404 if (DoCounterPromotion
.getNumOccurrences() > 0)
405 return DoCounterPromotion
;
407 return Options
.DoCounterPromotion
;
410 void InstrProfiling::promoteCounterLoadStores(Function
*F
) {
411 if (!isCounterPromotionEnabled())
414 DominatorTree
DT(*F
);
416 DenseMap
<Loop
*, SmallVector
<LoadStorePair
, 8>> LoopPromotionCandidates
;
418 for (const auto &LoadStore
: PromotionCandidates
) {
419 auto *CounterLoad
= LoadStore
.first
;
420 auto *CounterStore
= LoadStore
.second
;
421 BasicBlock
*BB
= CounterLoad
->getParent();
422 Loop
*ParentLoop
= LI
.getLoopFor(BB
);
425 LoopPromotionCandidates
[ParentLoop
].emplace_back(CounterLoad
, CounterStore
);
428 SmallVector
<Loop
*, 4> Loops
= LI
.getLoopsInPreorder();
430 // Do a post-order traversal of the loops so that counter updates can be
431 // iteratively hoisted outside the loop nest.
432 for (auto *Loop
: llvm::reverse(Loops
)) {
433 PGOCounterPromoter
Promoter(LoopPromotionCandidates
, *Loop
, LI
);
434 Promoter
.run(&TotalCountersPromoted
);
438 /// Check if the module contains uses of any profiling intrinsics.
439 static bool containsProfilingIntrinsics(Module
&M
) {
440 if (auto *F
= M
.getFunction(
441 Intrinsic::getName(llvm::Intrinsic::instrprof_increment
)))
444 if (auto *F
= M
.getFunction(
445 Intrinsic::getName(llvm::Intrinsic::instrprof_increment_step
)))
448 if (auto *F
= M
.getFunction(
449 Intrinsic::getName(llvm::Intrinsic::instrprof_value_profile
)))
455 bool InstrProfiling::run(Module
&M
, const TargetLibraryInfo
&TLI
) {
460 ProfileDataMap
.clear();
462 getMemOPSizeRangeFromOption(MemOPSizeRange
, MemOPSizeRangeStart
,
464 TT
= Triple(M
.getTargetTriple());
466 // Emit the runtime hook even if no counters are present.
467 bool MadeChange
= emitRuntimeHook();
469 // Improve compile time by avoiding linear scans when there is no work.
470 GlobalVariable
*CoverageNamesVar
=
471 M
.getNamedGlobal(getCoverageUnusedNamesVarName());
472 if (!containsProfilingIntrinsics(M
) && !CoverageNamesVar
)
475 // We did not know how many value sites there would be inside
476 // the instrumented function. This is counting the number of instrumented
477 // target value sites to enter it as field in the profile data variable.
478 for (Function
&F
: M
) {
479 InstrProfIncrementInst
*FirstProfIncInst
= nullptr;
480 for (BasicBlock
&BB
: F
)
481 for (auto I
= BB
.begin(), E
= BB
.end(); I
!= E
; I
++)
482 if (auto *Ind
= dyn_cast
<InstrProfValueProfileInst
>(I
))
483 computeNumValueSiteCounts(Ind
);
484 else if (FirstProfIncInst
== nullptr)
485 FirstProfIncInst
= dyn_cast
<InstrProfIncrementInst
>(I
);
487 // Value profiling intrinsic lowering requires per-function profile data
488 // variable to be created first.
489 if (FirstProfIncInst
!= nullptr)
490 static_cast<void>(getOrCreateRegionCounters(FirstProfIncInst
));
493 for (Function
&F
: M
)
494 MadeChange
|= lowerIntrinsics(&F
);
496 if (CoverageNamesVar
) {
497 lowerCoverageData(CoverageNamesVar
);
508 emitInitialization();
512 static Constant
*getOrInsertValueProfilingCall(Module
&M
,
513 const TargetLibraryInfo
&TLI
,
514 bool IsRange
= false) {
515 LLVMContext
&Ctx
= M
.getContext();
516 auto *ReturnTy
= Type::getVoidTy(M
.getContext());
520 Type
*ParamTypes
[] = {
521 #define VALUE_PROF_FUNC_PARAM(ParamType, ParamName, ParamLLVMType) ParamLLVMType
522 #include "llvm/ProfileData/InstrProfData.inc"
524 auto *ValueProfilingCallTy
=
525 FunctionType::get(ReturnTy
, makeArrayRef(ParamTypes
), false);
526 Res
= M
.getOrInsertFunction(getInstrProfValueProfFuncName(),
527 ValueProfilingCallTy
);
529 Type
*RangeParamTypes
[] = {
530 #define VALUE_RANGE_PROF 1
531 #define VALUE_PROF_FUNC_PARAM(ParamType, ParamName, ParamLLVMType) ParamLLVMType
532 #include "llvm/ProfileData/InstrProfData.inc"
533 #undef VALUE_RANGE_PROF
535 auto *ValueRangeProfilingCallTy
=
536 FunctionType::get(ReturnTy
, makeArrayRef(RangeParamTypes
), false);
537 Res
= M
.getOrInsertFunction(getInstrProfValueRangeProfFuncName(),
538 ValueRangeProfilingCallTy
);
541 if (Function
*FunRes
= dyn_cast
<Function
>(Res
)) {
542 if (auto AK
= TLI
.getExtAttrForI32Param(false))
543 FunRes
->addParamAttr(2, AK
);
548 void InstrProfiling::computeNumValueSiteCounts(InstrProfValueProfileInst
*Ind
) {
549 GlobalVariable
*Name
= Ind
->getName();
550 uint64_t ValueKind
= Ind
->getValueKind()->getZExtValue();
551 uint64_t Index
= Ind
->getIndex()->getZExtValue();
552 auto It
= ProfileDataMap
.find(Name
);
553 if (It
== ProfileDataMap
.end()) {
554 PerFunctionProfileData PD
;
555 PD
.NumValueSites
[ValueKind
] = Index
+ 1;
556 ProfileDataMap
[Name
] = PD
;
557 } else if (It
->second
.NumValueSites
[ValueKind
] <= Index
)
558 It
->second
.NumValueSites
[ValueKind
] = Index
+ 1;
561 void InstrProfiling::lowerValueProfileInst(InstrProfValueProfileInst
*Ind
) {
562 GlobalVariable
*Name
= Ind
->getName();
563 auto It
= ProfileDataMap
.find(Name
);
564 assert(It
!= ProfileDataMap
.end() && It
->second
.DataVar
&&
565 "value profiling detected in function with no counter incerement");
567 GlobalVariable
*DataVar
= It
->second
.DataVar
;
568 uint64_t ValueKind
= Ind
->getValueKind()->getZExtValue();
569 uint64_t Index
= Ind
->getIndex()->getZExtValue();
570 for (uint32_t Kind
= IPVK_First
; Kind
< ValueKind
; ++Kind
)
571 Index
+= It
->second
.NumValueSites
[Kind
];
573 IRBuilder
<> Builder(Ind
);
574 bool IsRange
= (Ind
->getValueKind()->getZExtValue() ==
575 llvm::InstrProfValueKind::IPVK_MemOPSize
);
576 CallInst
*Call
= nullptr;
578 Value
*Args
[3] = {Ind
->getTargetValue(),
579 Builder
.CreateBitCast(DataVar
, Builder
.getInt8PtrTy()),
580 Builder
.getInt32(Index
)};
581 Call
= Builder
.CreateCall(getOrInsertValueProfilingCall(*M
, *TLI
), Args
);
584 Ind
->getTargetValue(),
585 Builder
.CreateBitCast(DataVar
, Builder
.getInt8PtrTy()),
586 Builder
.getInt32(Index
),
587 Builder
.getInt64(MemOPSizeRangeStart
),
588 Builder
.getInt64(MemOPSizeRangeLast
),
589 Builder
.getInt64(MemOPSizeLarge
== 0 ? INT64_MIN
: MemOPSizeLarge
)};
591 Builder
.CreateCall(getOrInsertValueProfilingCall(*M
, *TLI
, true), Args
);
593 if (auto AK
= TLI
->getExtAttrForI32Param(false))
594 Call
->addParamAttr(2, AK
);
595 Ind
->replaceAllUsesWith(Call
);
596 Ind
->eraseFromParent();
599 void InstrProfiling::lowerIncrement(InstrProfIncrementInst
*Inc
) {
600 GlobalVariable
*Counters
= getOrCreateRegionCounters(Inc
);
602 IRBuilder
<> Builder(Inc
);
603 uint64_t Index
= Inc
->getIndex()->getZExtValue();
604 Value
*Addr
= Builder
.CreateConstInBoundsGEP2_64(Counters
, 0, Index
);
606 if (Options
.Atomic
|| AtomicCounterUpdateAll
) {
607 Builder
.CreateAtomicRMW(AtomicRMWInst::Add
, Addr
, Inc
->getStep(),
608 AtomicOrdering::Monotonic
);
610 Value
*Load
= Builder
.CreateLoad(Addr
, "pgocount");
611 auto *Count
= Builder
.CreateAdd(Load
, Inc
->getStep());
612 auto *Store
= Builder
.CreateStore(Count
, Addr
);
613 if (isCounterPromotionEnabled())
614 PromotionCandidates
.emplace_back(cast
<Instruction
>(Load
), Store
);
616 Inc
->eraseFromParent();
619 void InstrProfiling::lowerCoverageData(GlobalVariable
*CoverageNamesVar
) {
620 ConstantArray
*Names
=
621 cast
<ConstantArray
>(CoverageNamesVar
->getInitializer());
622 for (unsigned I
= 0, E
= Names
->getNumOperands(); I
< E
; ++I
) {
623 Constant
*NC
= Names
->getOperand(I
);
624 Value
*V
= NC
->stripPointerCasts();
625 assert(isa
<GlobalVariable
>(V
) && "Missing reference to function name");
626 GlobalVariable
*Name
= cast
<GlobalVariable
>(V
);
628 Name
->setLinkage(GlobalValue::PrivateLinkage
);
629 ReferencedNames
.push_back(Name
);
630 NC
->dropAllReferences();
632 CoverageNamesVar
->eraseFromParent();
635 /// Get the name of a profiling variable for a particular function.
636 static std::string
getVarName(InstrProfIncrementInst
*Inc
, StringRef Prefix
) {
637 StringRef NamePrefix
= getInstrProfNameVarPrefix();
638 StringRef Name
= Inc
->getName()->getName().substr(NamePrefix
.size());
639 Function
*F
= Inc
->getParent()->getParent();
640 Module
*M
= F
->getParent();
641 if (!DoHashBasedCounterSplit
|| !isIRPGOFlagSet(M
) ||
642 !canRenameComdatFunc(*F
))
643 return (Prefix
+ Name
).str();
644 uint64_t FuncHash
= Inc
->getHash()->getZExtValue();
645 SmallVector
<char, 24> HashPostfix
;
646 if (Name
.endswith((Twine(".") + Twine(FuncHash
)).toStringRef(HashPostfix
)))
647 return (Prefix
+ Name
).str();
648 return (Prefix
+ Name
+ "." + Twine(FuncHash
)).str();
651 static inline bool shouldRecordFunctionAddr(Function
*F
) {
653 bool HasAvailableExternallyLinkage
= F
->hasAvailableExternallyLinkage();
654 if (!F
->hasLinkOnceLinkage() && !F
->hasLocalLinkage() &&
655 !HasAvailableExternallyLinkage
)
658 // A function marked 'alwaysinline' with available_externally linkage can't
659 // have its address taken. Doing so would create an undefined external ref to
660 // the function, which would fail to link.
661 if (HasAvailableExternallyLinkage
&&
662 F
->hasFnAttribute(Attribute::AlwaysInline
))
665 // Prohibit function address recording if the function is both internal and
666 // COMDAT. This avoids the profile data variable referencing internal symbols
668 if (F
->hasLocalLinkage() && F
->hasComdat())
671 // Check uses of this function for other than direct calls or invokes to it.
672 // Inline virtual functions have linkeOnceODR linkage. When a key method
673 // exists, the vtable will only be emitted in the TU where the key method
674 // is defined. In a TU where vtable is not available, the function won't
675 // be 'addresstaken'. If its address is not recorded here, the profile data
676 // with missing address may be picked by the linker leading to missing
677 // indirect call target info.
678 return F
->hasAddressTaken() || F
->hasLinkOnceLinkage();
681 static inline Comdat
*getOrCreateProfileComdat(Module
&M
, Function
&F
,
682 InstrProfIncrementInst
*Inc
) {
683 if (!needsComdatForCounter(F
, M
))
686 // COFF format requires a COMDAT section to have a key symbol with the same
687 // name. The linker targeting COFF also requires that the COMDAT
688 // a section is associated to must precede the associating section. For this
689 // reason, we must choose the counter var's name as the name of the comdat.
690 StringRef ComdatPrefix
= (Triple(M
.getTargetTriple()).isOSBinFormatCOFF()
691 ? getInstrProfCountersVarPrefix()
692 : getInstrProfComdatPrefix());
693 return M
.getOrInsertComdat(StringRef(getVarName(Inc
, ComdatPrefix
)));
696 static bool needsRuntimeRegistrationOfSectionRange(const Module
&M
) {
697 // Don't do this for Darwin. compiler-rt uses linker magic.
698 if (Triple(M
.getTargetTriple()).isOSDarwin())
701 // Use linker script magic to get data/cnts/name start/end.
702 if (Triple(M
.getTargetTriple()).isOSLinux() ||
703 Triple(M
.getTargetTriple()).isOSFreeBSD() ||
704 Triple(M
.getTargetTriple()).isOSFuchsia() ||
705 Triple(M
.getTargetTriple()).isPS4CPU())
712 InstrProfiling::getOrCreateRegionCounters(InstrProfIncrementInst
*Inc
) {
713 GlobalVariable
*NamePtr
= Inc
->getName();
714 auto It
= ProfileDataMap
.find(NamePtr
);
715 PerFunctionProfileData PD
;
716 if (It
!= ProfileDataMap
.end()) {
717 if (It
->second
.RegionCounters
)
718 return It
->second
.RegionCounters
;
722 // Move the name variable to the right section. Place them in a COMDAT group
723 // if the associated function is a COMDAT. This will make sure that
724 // only one copy of counters of the COMDAT function will be emitted after
726 Function
*Fn
= Inc
->getParent()->getParent();
727 Comdat
*ProfileVarsComdat
= nullptr;
728 ProfileVarsComdat
= getOrCreateProfileComdat(*M
, *Fn
, Inc
);
730 uint64_t NumCounters
= Inc
->getNumCounters()->getZExtValue();
731 LLVMContext
&Ctx
= M
->getContext();
732 ArrayType
*CounterTy
= ArrayType::get(Type::getInt64Ty(Ctx
), NumCounters
);
734 // Create the counters variable.
736 new GlobalVariable(*M
, CounterTy
, false, NamePtr
->getLinkage(),
737 Constant::getNullValue(CounterTy
),
738 getVarName(Inc
, getInstrProfCountersVarPrefix()));
739 CounterPtr
->setVisibility(NamePtr
->getVisibility());
740 CounterPtr
->setSection(
741 getInstrProfSectionName(IPSK_cnts
, TT
.getObjectFormat()));
742 CounterPtr
->setAlignment(8);
743 CounterPtr
->setComdat(ProfileVarsComdat
);
745 auto *Int8PtrTy
= Type::getInt8PtrTy(Ctx
);
746 // Allocate statically the array of pointers to value profile nodes for
747 // the current function.
748 Constant
*ValuesPtrExpr
= ConstantPointerNull::get(Int8PtrTy
);
749 if (ValueProfileStaticAlloc
&& !needsRuntimeRegistrationOfSectionRange(*M
)) {
751 for (uint32_t Kind
= IPVK_First
; Kind
<= IPVK_Last
; ++Kind
)
752 NS
+= PD
.NumValueSites
[Kind
];
754 ArrayType
*ValuesTy
= ArrayType::get(Type::getInt64Ty(Ctx
), NS
);
757 new GlobalVariable(*M
, ValuesTy
, false, NamePtr
->getLinkage(),
758 Constant::getNullValue(ValuesTy
),
759 getVarName(Inc
, getInstrProfValuesVarPrefix()));
760 ValuesVar
->setVisibility(NamePtr
->getVisibility());
761 ValuesVar
->setSection(
762 getInstrProfSectionName(IPSK_vals
, TT
.getObjectFormat()));
763 ValuesVar
->setAlignment(8);
764 ValuesVar
->setComdat(ProfileVarsComdat
);
766 ConstantExpr::getBitCast(ValuesVar
, Type::getInt8PtrTy(Ctx
));
770 // Create data variable.
771 auto *Int16Ty
= Type::getInt16Ty(Ctx
);
772 auto *Int16ArrayTy
= ArrayType::get(Int16Ty
, IPVK_Last
+ 1);
773 Type
*DataTypes
[] = {
774 #define INSTR_PROF_DATA(Type, LLVMType, Name, Init) LLVMType,
775 #include "llvm/ProfileData/InstrProfData.inc"
777 auto *DataTy
= StructType::get(Ctx
, makeArrayRef(DataTypes
));
779 Constant
*FunctionAddr
= shouldRecordFunctionAddr(Fn
)
780 ? ConstantExpr::getBitCast(Fn
, Int8PtrTy
)
781 : ConstantPointerNull::get(Int8PtrTy
);
783 Constant
*Int16ArrayVals
[IPVK_Last
+ 1];
784 for (uint32_t Kind
= IPVK_First
; Kind
<= IPVK_Last
; ++Kind
)
785 Int16ArrayVals
[Kind
] = ConstantInt::get(Int16Ty
, PD
.NumValueSites
[Kind
]);
787 Constant
*DataVals
[] = {
788 #define INSTR_PROF_DATA(Type, LLVMType, Name, Init) Init,
789 #include "llvm/ProfileData/InstrProfData.inc"
791 auto *Data
= new GlobalVariable(*M
, DataTy
, false, NamePtr
->getLinkage(),
792 ConstantStruct::get(DataTy
, DataVals
),
793 getVarName(Inc
, getInstrProfDataVarPrefix()));
794 Data
->setVisibility(NamePtr
->getVisibility());
795 Data
->setSection(getInstrProfSectionName(IPSK_data
, TT
.getObjectFormat()));
796 Data
->setAlignment(INSTR_PROF_DATA_ALIGNMENT
);
797 Data
->setComdat(ProfileVarsComdat
);
799 PD
.RegionCounters
= CounterPtr
;
801 ProfileDataMap
[NamePtr
] = PD
;
803 // Mark the data variable as used so that it isn't stripped out.
804 UsedVars
.push_back(Data
);
805 // Now that the linkage set by the FE has been passed to the data and counter
806 // variables, reset Name variable's linkage and visibility to private so that
807 // it can be removed later by the compiler.
808 NamePtr
->setLinkage(GlobalValue::PrivateLinkage
);
809 // Collect the referenced names to be used by emitNameData.
810 ReferencedNames
.push_back(NamePtr
);
815 void InstrProfiling::emitVNodes() {
816 if (!ValueProfileStaticAlloc
)
819 // For now only support this on platforms that do
820 // not require runtime registration to discover
821 // named section start/end.
822 if (needsRuntimeRegistrationOfSectionRange(*M
))
826 for (auto &PD
: ProfileDataMap
) {
827 for (uint32_t Kind
= IPVK_First
; Kind
<= IPVK_Last
; ++Kind
)
828 TotalNS
+= PD
.second
.NumValueSites
[Kind
];
834 uint64_t NumCounters
= TotalNS
* NumCountersPerValueSite
;
835 // Heuristic for small programs with very few total value sites.
836 // The default value of vp-counters-per-site is chosen based on
837 // the observation that large apps usually have a low percentage
838 // of value sites that actually have any profile data, and thus
839 // the average number of counters per site is low. For small
840 // apps with very few sites, this may not be true. Bump up the
841 // number of counters in this case.
842 #define INSTR_PROF_MIN_VAL_COUNTS 10
843 if (NumCounters
< INSTR_PROF_MIN_VAL_COUNTS
)
844 NumCounters
= std::max(INSTR_PROF_MIN_VAL_COUNTS
, (int)NumCounters
* 2);
846 auto &Ctx
= M
->getContext();
847 Type
*VNodeTypes
[] = {
848 #define INSTR_PROF_VALUE_NODE(Type, LLVMType, Name, Init) LLVMType,
849 #include "llvm/ProfileData/InstrProfData.inc"
851 auto *VNodeTy
= StructType::get(Ctx
, makeArrayRef(VNodeTypes
));
853 ArrayType
*VNodesTy
= ArrayType::get(VNodeTy
, NumCounters
);
854 auto *VNodesVar
= new GlobalVariable(
855 *M
, VNodesTy
, false, GlobalValue::PrivateLinkage
,
856 Constant::getNullValue(VNodesTy
), getInstrProfVNodesVarName());
857 VNodesVar
->setSection(
858 getInstrProfSectionName(IPSK_vnodes
, TT
.getObjectFormat()));
859 UsedVars
.push_back(VNodesVar
);
862 void InstrProfiling::emitNameData() {
863 std::string UncompressedData
;
865 if (ReferencedNames
.empty())
868 std::string CompressedNameStr
;
869 if (Error E
= collectPGOFuncNameStrings(ReferencedNames
, CompressedNameStr
,
870 DoNameCompression
)) {
871 report_fatal_error(toString(std::move(E
)), false);
874 auto &Ctx
= M
->getContext();
875 auto *NamesVal
= ConstantDataArray::getString(
876 Ctx
, StringRef(CompressedNameStr
), false);
877 NamesVar
= new GlobalVariable(*M
, NamesVal
->getType(), true,
878 GlobalValue::PrivateLinkage
, NamesVal
,
879 getInstrProfNamesVarName());
880 NamesSize
= CompressedNameStr
.size();
881 NamesVar
->setSection(
882 getInstrProfSectionName(IPSK_name
, TT
.getObjectFormat()));
883 UsedVars
.push_back(NamesVar
);
885 for (auto *NamePtr
: ReferencedNames
)
886 NamePtr
->eraseFromParent();
889 void InstrProfiling::emitRegistration() {
890 if (!needsRuntimeRegistrationOfSectionRange(*M
))
893 // Construct the function.
894 auto *VoidTy
= Type::getVoidTy(M
->getContext());
895 auto *VoidPtrTy
= Type::getInt8PtrTy(M
->getContext());
896 auto *Int64Ty
= Type::getInt64Ty(M
->getContext());
897 auto *RegisterFTy
= FunctionType::get(VoidTy
, false);
898 auto *RegisterF
= Function::Create(RegisterFTy
, GlobalValue::InternalLinkage
,
899 getInstrProfRegFuncsName(), M
);
900 RegisterF
->setUnnamedAddr(GlobalValue::UnnamedAddr::Global
);
901 if (Options
.NoRedZone
)
902 RegisterF
->addFnAttr(Attribute::NoRedZone
);
904 auto *RuntimeRegisterTy
= FunctionType::get(VoidTy
, VoidPtrTy
, false);
905 auto *RuntimeRegisterF
=
906 Function::Create(RuntimeRegisterTy
, GlobalVariable::ExternalLinkage
,
907 getInstrProfRegFuncName(), M
);
909 IRBuilder
<> IRB(BasicBlock::Create(M
->getContext(), "", RegisterF
));
910 for (Value
*Data
: UsedVars
)
911 if (Data
!= NamesVar
&& !isa
<Function
>(Data
))
912 IRB
.CreateCall(RuntimeRegisterF
, IRB
.CreateBitCast(Data
, VoidPtrTy
));
915 Type
*ParamTypes
[] = {VoidPtrTy
, Int64Ty
};
916 auto *NamesRegisterTy
=
917 FunctionType::get(VoidTy
, makeArrayRef(ParamTypes
), false);
918 auto *NamesRegisterF
=
919 Function::Create(NamesRegisterTy
, GlobalVariable::ExternalLinkage
,
920 getInstrProfNamesRegFuncName(), M
);
921 IRB
.CreateCall(NamesRegisterF
, {IRB
.CreateBitCast(NamesVar
, VoidPtrTy
),
922 IRB
.getInt64(NamesSize
)});
928 bool InstrProfiling::emitRuntimeHook() {
929 // We expect the linker to be invoked with -u<hook_var> flag for linux,
930 // for which case there is no need to emit the user function.
931 if (Triple(M
->getTargetTriple()).isOSLinux())
934 // If the module's provided its own runtime, we don't need to do anything.
935 if (M
->getGlobalVariable(getInstrProfRuntimeHookVarName()))
938 // Declare an external variable that will pull in the runtime initialization.
939 auto *Int32Ty
= Type::getInt32Ty(M
->getContext());
941 new GlobalVariable(*M
, Int32Ty
, false, GlobalValue::ExternalLinkage
,
942 nullptr, getInstrProfRuntimeHookVarName());
944 // Make a function that uses it.
945 auto *User
= Function::Create(FunctionType::get(Int32Ty
, false),
946 GlobalValue::LinkOnceODRLinkage
,
947 getInstrProfRuntimeHookVarUseFuncName(), M
);
948 User
->addFnAttr(Attribute::NoInline
);
949 if (Options
.NoRedZone
)
950 User
->addFnAttr(Attribute::NoRedZone
);
951 User
->setVisibility(GlobalValue::HiddenVisibility
);
952 if (Triple(M
->getTargetTriple()).supportsCOMDAT())
953 User
->setComdat(M
->getOrInsertComdat(User
->getName()));
955 IRBuilder
<> IRB(BasicBlock::Create(M
->getContext(), "", User
));
956 auto *Load
= IRB
.CreateLoad(Var
);
959 // Mark the user variable as used so that it isn't stripped out.
960 UsedVars
.push_back(User
);
964 void InstrProfiling::emitUses() {
965 if (!UsedVars
.empty())
966 appendToUsed(*M
, UsedVars
);
969 void InstrProfiling::emitInitialization() {
970 StringRef InstrProfileOutput
= Options
.InstrProfileOutput
;
972 if (!InstrProfileOutput
.empty()) {
973 // Create variable for profile name.
974 Constant
*ProfileNameConst
=
975 ConstantDataArray::getString(M
->getContext(), InstrProfileOutput
, true);
976 GlobalVariable
*ProfileNameVar
= new GlobalVariable(
977 *M
, ProfileNameConst
->getType(), true, GlobalValue::WeakAnyLinkage
,
978 ProfileNameConst
, INSTR_PROF_QUOTE(INSTR_PROF_PROFILE_NAME_VAR
));
979 if (TT
.supportsCOMDAT()) {
980 ProfileNameVar
->setLinkage(GlobalValue::ExternalLinkage
);
981 ProfileNameVar
->setComdat(M
->getOrInsertComdat(
982 StringRef(INSTR_PROF_QUOTE(INSTR_PROF_PROFILE_NAME_VAR
))));
986 Constant
*RegisterF
= M
->getFunction(getInstrProfRegFuncsName());
990 // Create the initialization function.
991 auto *VoidTy
= Type::getVoidTy(M
->getContext());
992 auto *F
= Function::Create(FunctionType::get(VoidTy
, false),
993 GlobalValue::InternalLinkage
,
994 getInstrProfInitFuncName(), M
);
995 F
->setUnnamedAddr(GlobalValue::UnnamedAddr::Global
);
996 F
->addFnAttr(Attribute::NoInline
);
997 if (Options
.NoRedZone
)
998 F
->addFnAttr(Attribute::NoRedZone
);
1000 // Add the basic block and the necessary calls.
1001 IRBuilder
<> IRB(BasicBlock::Create(M
->getContext(), "", F
));
1003 IRB
.CreateCall(RegisterF
, {});
1004 IRB
.CreateRetVoid();
1006 appendToGlobalCtors(*M
, F
, 0);