[llvm-exegesis] [NFC] Fixing typo.
[llvm-complete.git] / lib / Transforms / Instrumentation / InstrProfiling.cpp
blobe77427aa91ddc612ef560e9b24d3c4b523bdeb68
1 //===-- InstrProfiling.cpp - Frontend instrumentation based profiling -----===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This pass lowers instrprof_* intrinsics emitted by a frontend for profiling.
10 // It also builds the data structures and initialization code needed for
11 // updating execution counts and emitting the profile at runtime.
13 //===----------------------------------------------------------------------===//
15 #include "llvm/Transforms/Instrumentation/InstrProfiling.h"
16 #include "llvm/ADT/ArrayRef.h"
17 #include "llvm/ADT/SmallVector.h"
18 #include "llvm/ADT/StringRef.h"
19 #include "llvm/ADT/Triple.h"
20 #include "llvm/ADT/Twine.h"
21 #include "llvm/Analysis/LoopInfo.h"
22 #include "llvm/Analysis/TargetLibraryInfo.h"
23 #include "llvm/IR/Attributes.h"
24 #include "llvm/IR/BasicBlock.h"
25 #include "llvm/IR/Constant.h"
26 #include "llvm/IR/Constants.h"
27 #include "llvm/IR/DerivedTypes.h"
28 #include "llvm/IR/Dominators.h"
29 #include "llvm/IR/Function.h"
30 #include "llvm/IR/GlobalValue.h"
31 #include "llvm/IR/GlobalVariable.h"
32 #include "llvm/IR/IRBuilder.h"
33 #include "llvm/IR/Instruction.h"
34 #include "llvm/IR/Instructions.h"
35 #include "llvm/IR/IntrinsicInst.h"
36 #include "llvm/IR/Module.h"
37 #include "llvm/IR/Type.h"
38 #include "llvm/Pass.h"
39 #include "llvm/ProfileData/InstrProf.h"
40 #include "llvm/Support/Casting.h"
41 #include "llvm/Support/CommandLine.h"
42 #include "llvm/Support/Error.h"
43 #include "llvm/Support/ErrorHandling.h"
44 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
45 #include "llvm/Transforms/Utils/ModuleUtils.h"
46 #include "llvm/Transforms/Utils/SSAUpdater.h"
47 #include <algorithm>
48 #include <cassert>
49 #include <cstddef>
50 #include <cstdint>
51 #include <string>
53 using namespace llvm;
55 #define DEBUG_TYPE "instrprof"
57 // The start and end values of precise value profile range for memory
58 // intrinsic sizes
59 cl::opt<std::string> MemOPSizeRange(
60 "memop-size-range",
61 cl::desc("Set the range of size in memory intrinsic calls to be profiled "
62 "precisely, in a format of <start_val>:<end_val>"),
63 cl::init(""));
65 // The value that considered to be large value in memory intrinsic.
66 cl::opt<unsigned> MemOPSizeLarge(
67 "memop-size-large",
68 cl::desc("Set large value thresthold in memory intrinsic size profiling. "
69 "Value of 0 disables the large value profiling."),
70 cl::init(8192));
72 namespace {
74 cl::opt<bool> DoNameCompression("enable-name-compression",
75 cl::desc("Enable name string compression"),
76 cl::init(true));
78 cl::opt<bool> DoHashBasedCounterSplit(
79 "hash-based-counter-split",
80 cl::desc("Rename counter variable of a comdat function based on cfg hash"),
81 cl::init(true));
83 cl::opt<bool> ValueProfileStaticAlloc(
84 "vp-static-alloc",
85 cl::desc("Do static counter allocation for value profiler"),
86 cl::init(true));
88 cl::opt<double> NumCountersPerValueSite(
89 "vp-counters-per-site",
90 cl::desc("The average number of profile counters allocated "
91 "per value profiling site."),
92 // This is set to a very small value because in real programs, only
93 // a very small percentage of value sites have non-zero targets, e.g, 1/30.
94 // For those sites with non-zero profile, the average number of targets
95 // is usually smaller than 2.
96 cl::init(1.0));
98 cl::opt<bool> AtomicCounterUpdateAll(
99 "instrprof-atomic-counter-update-all", cl::ZeroOrMore,
100 cl::desc("Make all profile counter updates atomic (for testing only)"),
101 cl::init(false));
103 cl::opt<bool> AtomicCounterUpdatePromoted(
104 "atomic-counter-update-promoted", cl::ZeroOrMore,
105 cl::desc("Do counter update using atomic fetch add "
106 " for promoted counters only"),
107 cl::init(false));
109 // If the option is not specified, the default behavior about whether
110 // counter promotion is done depends on how instrumentaiton lowering
111 // pipeline is setup, i.e., the default value of true of this option
112 // does not mean the promotion will be done by default. Explicitly
113 // setting this option can override the default behavior.
114 cl::opt<bool> DoCounterPromotion("do-counter-promotion", cl::ZeroOrMore,
115 cl::desc("Do counter register promotion"),
116 cl::init(false));
117 cl::opt<unsigned> MaxNumOfPromotionsPerLoop(
118 cl::ZeroOrMore, "max-counter-promotions-per-loop", cl::init(20),
119 cl::desc("Max number counter promotions per loop to avoid"
120 " increasing register pressure too much"));
122 // A debug option
123 cl::opt<int>
124 MaxNumOfPromotions(cl::ZeroOrMore, "max-counter-promotions", cl::init(-1),
125 cl::desc("Max number of allowed counter promotions"));
127 cl::opt<unsigned> SpeculativeCounterPromotionMaxExiting(
128 cl::ZeroOrMore, "speculative-counter-promotion-max-exiting", cl::init(3),
129 cl::desc("The max number of exiting blocks of a loop to allow "
130 " speculative counter promotion"));
132 cl::opt<bool> SpeculativeCounterPromotionToLoop(
133 cl::ZeroOrMore, "speculative-counter-promotion-to-loop", cl::init(false),
134 cl::desc("When the option is false, if the target block is in a loop, "
135 "the promotion will be disallowed unless the promoted counter "
136 " update can be further/iteratively promoted into an acyclic "
137 " region."));
139 cl::opt<bool> IterativeCounterPromotion(
140 cl::ZeroOrMore, "iterative-counter-promotion", cl::init(true),
141 cl::desc("Allow counter promotion across the whole loop nest."));
143 class InstrProfilingLegacyPass : public ModulePass {
144 InstrProfiling InstrProf;
146 public:
147 static char ID;
149 InstrProfilingLegacyPass() : ModulePass(ID) {}
150 InstrProfilingLegacyPass(const InstrProfOptions &Options)
151 : ModulePass(ID), InstrProf(Options) {}
153 StringRef getPassName() const override {
154 return "Frontend instrumentation-based coverage lowering";
157 bool runOnModule(Module &M) override {
158 return InstrProf.run(M, getAnalysis<TargetLibraryInfoWrapperPass>().getTLI());
161 void getAnalysisUsage(AnalysisUsage &AU) const override {
162 AU.setPreservesCFG();
163 AU.addRequired<TargetLibraryInfoWrapperPass>();
168 /// A helper class to promote one counter RMW operation in the loop
169 /// into register update.
171 /// RWM update for the counter will be sinked out of the loop after
172 /// the transformation.
174 class PGOCounterPromoterHelper : public LoadAndStorePromoter {
175 public:
176 PGOCounterPromoterHelper(
177 Instruction *L, Instruction *S, SSAUpdater &SSA, Value *Init,
178 BasicBlock *PH, ArrayRef<BasicBlock *> ExitBlocks,
179 ArrayRef<Instruction *> InsertPts,
180 DenseMap<Loop *, SmallVector<LoadStorePair, 8>> &LoopToCands,
181 LoopInfo &LI)
182 : LoadAndStorePromoter({L, S}, SSA), Store(S), ExitBlocks(ExitBlocks),
183 InsertPts(InsertPts), LoopToCandidates(LoopToCands), LI(LI) {
184 assert(isa<LoadInst>(L));
185 assert(isa<StoreInst>(S));
186 SSA.AddAvailableValue(PH, Init);
189 void doExtraRewritesBeforeFinalDeletion() override {
190 for (unsigned i = 0, e = ExitBlocks.size(); i != e; ++i) {
191 BasicBlock *ExitBlock = ExitBlocks[i];
192 Instruction *InsertPos = InsertPts[i];
193 // Get LiveIn value into the ExitBlock. If there are multiple
194 // predecessors, the value is defined by a PHI node in this
195 // block.
196 Value *LiveInValue = SSA.GetValueInMiddleOfBlock(ExitBlock);
197 Value *Addr = cast<StoreInst>(Store)->getPointerOperand();
198 Type *Ty = LiveInValue->getType();
199 IRBuilder<> Builder(InsertPos);
200 if (AtomicCounterUpdatePromoted)
201 // automic update currently can only be promoted across the current
202 // loop, not the whole loop nest.
203 Builder.CreateAtomicRMW(AtomicRMWInst::Add, Addr, LiveInValue,
204 AtomicOrdering::SequentiallyConsistent);
205 else {
206 LoadInst *OldVal = Builder.CreateLoad(Ty, Addr, "pgocount.promoted");
207 auto *NewVal = Builder.CreateAdd(OldVal, LiveInValue);
208 auto *NewStore = Builder.CreateStore(NewVal, Addr);
210 // Now update the parent loop's candidate list:
211 if (IterativeCounterPromotion) {
212 auto *TargetLoop = LI.getLoopFor(ExitBlock);
213 if (TargetLoop)
214 LoopToCandidates[TargetLoop].emplace_back(OldVal, NewStore);
220 private:
221 Instruction *Store;
222 ArrayRef<BasicBlock *> ExitBlocks;
223 ArrayRef<Instruction *> InsertPts;
224 DenseMap<Loop *, SmallVector<LoadStorePair, 8>> &LoopToCandidates;
225 LoopInfo &LI;
228 /// A helper class to do register promotion for all profile counter
229 /// updates in a loop.
231 class PGOCounterPromoter {
232 public:
233 PGOCounterPromoter(
234 DenseMap<Loop *, SmallVector<LoadStorePair, 8>> &LoopToCands,
235 Loop &CurLoop, LoopInfo &LI)
236 : LoopToCandidates(LoopToCands), ExitBlocks(), InsertPts(), L(CurLoop),
237 LI(LI) {
239 SmallVector<BasicBlock *, 8> LoopExitBlocks;
240 SmallPtrSet<BasicBlock *, 8> BlockSet;
241 L.getExitBlocks(LoopExitBlocks);
243 for (BasicBlock *ExitBlock : LoopExitBlocks) {
244 if (BlockSet.insert(ExitBlock).second) {
245 ExitBlocks.push_back(ExitBlock);
246 InsertPts.push_back(&*ExitBlock->getFirstInsertionPt());
251 bool run(int64_t *NumPromoted) {
252 // Skip 'infinite' loops:
253 if (ExitBlocks.size() == 0)
254 return false;
255 unsigned MaxProm = getMaxNumOfPromotionsInLoop(&L);
256 if (MaxProm == 0)
257 return false;
259 unsigned Promoted = 0;
260 for (auto &Cand : LoopToCandidates[&L]) {
262 SmallVector<PHINode *, 4> NewPHIs;
263 SSAUpdater SSA(&NewPHIs);
264 Value *InitVal = ConstantInt::get(Cand.first->getType(), 0);
266 PGOCounterPromoterHelper Promoter(Cand.first, Cand.second, SSA, InitVal,
267 L.getLoopPreheader(), ExitBlocks,
268 InsertPts, LoopToCandidates, LI);
269 Promoter.run(SmallVector<Instruction *, 2>({Cand.first, Cand.second}));
270 Promoted++;
271 if (Promoted >= MaxProm)
272 break;
274 (*NumPromoted)++;
275 if (MaxNumOfPromotions != -1 && *NumPromoted >= MaxNumOfPromotions)
276 break;
279 LLVM_DEBUG(dbgs() << Promoted << " counters promoted for loop (depth="
280 << L.getLoopDepth() << ")\n");
281 return Promoted != 0;
284 private:
285 bool allowSpeculativeCounterPromotion(Loop *LP) {
286 SmallVector<BasicBlock *, 8> ExitingBlocks;
287 L.getExitingBlocks(ExitingBlocks);
288 // Not considierered speculative.
289 if (ExitingBlocks.size() == 1)
290 return true;
291 if (ExitingBlocks.size() > SpeculativeCounterPromotionMaxExiting)
292 return false;
293 return true;
296 // Returns the max number of Counter Promotions for LP.
297 unsigned getMaxNumOfPromotionsInLoop(Loop *LP) {
298 // We can't insert into a catchswitch.
299 SmallVector<BasicBlock *, 8> LoopExitBlocks;
300 LP->getExitBlocks(LoopExitBlocks);
301 if (llvm::any_of(LoopExitBlocks, [](BasicBlock *Exit) {
302 return isa<CatchSwitchInst>(Exit->getTerminator());
304 return 0;
306 if (!LP->hasDedicatedExits())
307 return 0;
309 BasicBlock *PH = LP->getLoopPreheader();
310 if (!PH)
311 return 0;
313 SmallVector<BasicBlock *, 8> ExitingBlocks;
314 LP->getExitingBlocks(ExitingBlocks);
315 // Not considierered speculative.
316 if (ExitingBlocks.size() == 1)
317 return MaxNumOfPromotionsPerLoop;
319 if (ExitingBlocks.size() > SpeculativeCounterPromotionMaxExiting)
320 return 0;
322 // Whether the target block is in a loop does not matter:
323 if (SpeculativeCounterPromotionToLoop)
324 return MaxNumOfPromotionsPerLoop;
326 // Now check the target block:
327 unsigned MaxProm = MaxNumOfPromotionsPerLoop;
328 for (auto *TargetBlock : LoopExitBlocks) {
329 auto *TargetLoop = LI.getLoopFor(TargetBlock);
330 if (!TargetLoop)
331 continue;
332 unsigned MaxPromForTarget = getMaxNumOfPromotionsInLoop(TargetLoop);
333 unsigned PendingCandsInTarget = LoopToCandidates[TargetLoop].size();
334 MaxProm =
335 std::min(MaxProm, std::max(MaxPromForTarget, PendingCandsInTarget) -
336 PendingCandsInTarget);
338 return MaxProm;
341 DenseMap<Loop *, SmallVector<LoadStorePair, 8>> &LoopToCandidates;
342 SmallVector<BasicBlock *, 8> ExitBlocks;
343 SmallVector<Instruction *, 8> InsertPts;
344 Loop &L;
345 LoopInfo &LI;
348 } // end anonymous namespace
350 PreservedAnalyses InstrProfiling::run(Module &M, ModuleAnalysisManager &AM) {
351 auto &TLI = AM.getResult<TargetLibraryAnalysis>(M);
352 if (!run(M, TLI))
353 return PreservedAnalyses::all();
355 return PreservedAnalyses::none();
358 char InstrProfilingLegacyPass::ID = 0;
359 INITIALIZE_PASS_BEGIN(
360 InstrProfilingLegacyPass, "instrprof",
361 "Frontend instrumentation-based coverage lowering.", false, false)
362 INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
363 INITIALIZE_PASS_END(
364 InstrProfilingLegacyPass, "instrprof",
365 "Frontend instrumentation-based coverage lowering.", false, false)
367 ModulePass *
368 llvm::createInstrProfilingLegacyPass(const InstrProfOptions &Options) {
369 return new InstrProfilingLegacyPass(Options);
372 static InstrProfIncrementInst *castToIncrementInst(Instruction *Instr) {
373 InstrProfIncrementInst *Inc = dyn_cast<InstrProfIncrementInstStep>(Instr);
374 if (Inc)
375 return Inc;
376 return dyn_cast<InstrProfIncrementInst>(Instr);
379 bool InstrProfiling::lowerIntrinsics(Function *F) {
380 bool MadeChange = false;
381 PromotionCandidates.clear();
382 for (BasicBlock &BB : *F) {
383 for (auto I = BB.begin(), E = BB.end(); I != E;) {
384 auto Instr = I++;
385 InstrProfIncrementInst *Inc = castToIncrementInst(&*Instr);
386 if (Inc) {
387 lowerIncrement(Inc);
388 MadeChange = true;
389 } else if (auto *Ind = dyn_cast<InstrProfValueProfileInst>(Instr)) {
390 lowerValueProfileInst(Ind);
391 MadeChange = true;
396 if (!MadeChange)
397 return false;
399 promoteCounterLoadStores(F);
400 return true;
403 bool InstrProfiling::isCounterPromotionEnabled() const {
404 if (DoCounterPromotion.getNumOccurrences() > 0)
405 return DoCounterPromotion;
407 return Options.DoCounterPromotion;
410 void InstrProfiling::promoteCounterLoadStores(Function *F) {
411 if (!isCounterPromotionEnabled())
412 return;
414 DominatorTree DT(*F);
415 LoopInfo LI(DT);
416 DenseMap<Loop *, SmallVector<LoadStorePair, 8>> LoopPromotionCandidates;
418 for (const auto &LoadStore : PromotionCandidates) {
419 auto *CounterLoad = LoadStore.first;
420 auto *CounterStore = LoadStore.second;
421 BasicBlock *BB = CounterLoad->getParent();
422 Loop *ParentLoop = LI.getLoopFor(BB);
423 if (!ParentLoop)
424 continue;
425 LoopPromotionCandidates[ParentLoop].emplace_back(CounterLoad, CounterStore);
428 SmallVector<Loop *, 4> Loops = LI.getLoopsInPreorder();
430 // Do a post-order traversal of the loops so that counter updates can be
431 // iteratively hoisted outside the loop nest.
432 for (auto *Loop : llvm::reverse(Loops)) {
433 PGOCounterPromoter Promoter(LoopPromotionCandidates, *Loop, LI);
434 Promoter.run(&TotalCountersPromoted);
438 /// Check if the module contains uses of any profiling intrinsics.
439 static bool containsProfilingIntrinsics(Module &M) {
440 if (auto *F = M.getFunction(
441 Intrinsic::getName(llvm::Intrinsic::instrprof_increment)))
442 if (!F->use_empty())
443 return true;
444 if (auto *F = M.getFunction(
445 Intrinsic::getName(llvm::Intrinsic::instrprof_increment_step)))
446 if (!F->use_empty())
447 return true;
448 if (auto *F = M.getFunction(
449 Intrinsic::getName(llvm::Intrinsic::instrprof_value_profile)))
450 if (!F->use_empty())
451 return true;
452 return false;
455 bool InstrProfiling::run(Module &M, const TargetLibraryInfo &TLI) {
456 this->M = &M;
457 this->TLI = &TLI;
458 NamesVar = nullptr;
459 NamesSize = 0;
460 ProfileDataMap.clear();
461 UsedVars.clear();
462 getMemOPSizeRangeFromOption(MemOPSizeRange, MemOPSizeRangeStart,
463 MemOPSizeRangeLast);
464 TT = Triple(M.getTargetTriple());
466 // Emit the runtime hook even if no counters are present.
467 bool MadeChange = emitRuntimeHook();
469 // Improve compile time by avoiding linear scans when there is no work.
470 GlobalVariable *CoverageNamesVar =
471 M.getNamedGlobal(getCoverageUnusedNamesVarName());
472 if (!containsProfilingIntrinsics(M) && !CoverageNamesVar)
473 return MadeChange;
475 // We did not know how many value sites there would be inside
476 // the instrumented function. This is counting the number of instrumented
477 // target value sites to enter it as field in the profile data variable.
478 for (Function &F : M) {
479 InstrProfIncrementInst *FirstProfIncInst = nullptr;
480 for (BasicBlock &BB : F)
481 for (auto I = BB.begin(), E = BB.end(); I != E; I++)
482 if (auto *Ind = dyn_cast<InstrProfValueProfileInst>(I))
483 computeNumValueSiteCounts(Ind);
484 else if (FirstProfIncInst == nullptr)
485 FirstProfIncInst = dyn_cast<InstrProfIncrementInst>(I);
487 // Value profiling intrinsic lowering requires per-function profile data
488 // variable to be created first.
489 if (FirstProfIncInst != nullptr)
490 static_cast<void>(getOrCreateRegionCounters(FirstProfIncInst));
493 for (Function &F : M)
494 MadeChange |= lowerIntrinsics(&F);
496 if (CoverageNamesVar) {
497 lowerCoverageData(CoverageNamesVar);
498 MadeChange = true;
501 if (!MadeChange)
502 return false;
504 emitVNodes();
505 emitNameData();
506 emitRegistration();
507 emitUses();
508 emitInitialization();
509 return true;
512 static FunctionCallee
513 getOrInsertValueProfilingCall(Module &M, const TargetLibraryInfo &TLI,
514 bool IsRange = false) {
515 LLVMContext &Ctx = M.getContext();
516 auto *ReturnTy = Type::getVoidTy(M.getContext());
518 AttributeList AL;
519 if (auto AK = TLI.getExtAttrForI32Param(false))
520 AL = AL.addParamAttribute(M.getContext(), 2, AK);
522 if (!IsRange) {
523 Type *ParamTypes[] = {
524 #define VALUE_PROF_FUNC_PARAM(ParamType, ParamName, ParamLLVMType) ParamLLVMType
525 #include "llvm/ProfileData/InstrProfData.inc"
527 auto *ValueProfilingCallTy =
528 FunctionType::get(ReturnTy, makeArrayRef(ParamTypes), false);
529 return M.getOrInsertFunction(getInstrProfValueProfFuncName(),
530 ValueProfilingCallTy, AL);
531 } else {
532 Type *RangeParamTypes[] = {
533 #define VALUE_RANGE_PROF 1
534 #define VALUE_PROF_FUNC_PARAM(ParamType, ParamName, ParamLLVMType) ParamLLVMType
535 #include "llvm/ProfileData/InstrProfData.inc"
536 #undef VALUE_RANGE_PROF
538 auto *ValueRangeProfilingCallTy =
539 FunctionType::get(ReturnTy, makeArrayRef(RangeParamTypes), false);
540 return M.getOrInsertFunction(getInstrProfValueRangeProfFuncName(),
541 ValueRangeProfilingCallTy, AL);
545 void InstrProfiling::computeNumValueSiteCounts(InstrProfValueProfileInst *Ind) {
546 GlobalVariable *Name = Ind->getName();
547 uint64_t ValueKind = Ind->getValueKind()->getZExtValue();
548 uint64_t Index = Ind->getIndex()->getZExtValue();
549 auto It = ProfileDataMap.find(Name);
550 if (It == ProfileDataMap.end()) {
551 PerFunctionProfileData PD;
552 PD.NumValueSites[ValueKind] = Index + 1;
553 ProfileDataMap[Name] = PD;
554 } else if (It->second.NumValueSites[ValueKind] <= Index)
555 It->second.NumValueSites[ValueKind] = Index + 1;
558 void InstrProfiling::lowerValueProfileInst(InstrProfValueProfileInst *Ind) {
559 GlobalVariable *Name = Ind->getName();
560 auto It = ProfileDataMap.find(Name);
561 assert(It != ProfileDataMap.end() && It->second.DataVar &&
562 "value profiling detected in function with no counter incerement");
564 GlobalVariable *DataVar = It->second.DataVar;
565 uint64_t ValueKind = Ind->getValueKind()->getZExtValue();
566 uint64_t Index = Ind->getIndex()->getZExtValue();
567 for (uint32_t Kind = IPVK_First; Kind < ValueKind; ++Kind)
568 Index += It->second.NumValueSites[Kind];
570 IRBuilder<> Builder(Ind);
571 bool IsRange = (Ind->getValueKind()->getZExtValue() ==
572 llvm::InstrProfValueKind::IPVK_MemOPSize);
573 CallInst *Call = nullptr;
574 if (!IsRange) {
575 Value *Args[3] = {Ind->getTargetValue(),
576 Builder.CreateBitCast(DataVar, Builder.getInt8PtrTy()),
577 Builder.getInt32(Index)};
578 Call = Builder.CreateCall(getOrInsertValueProfilingCall(*M, *TLI), Args);
579 } else {
580 Value *Args[6] = {
581 Ind->getTargetValue(),
582 Builder.CreateBitCast(DataVar, Builder.getInt8PtrTy()),
583 Builder.getInt32(Index),
584 Builder.getInt64(MemOPSizeRangeStart),
585 Builder.getInt64(MemOPSizeRangeLast),
586 Builder.getInt64(MemOPSizeLarge == 0 ? INT64_MIN : MemOPSizeLarge)};
587 Call =
588 Builder.CreateCall(getOrInsertValueProfilingCall(*M, *TLI, true), Args);
590 if (auto AK = TLI->getExtAttrForI32Param(false))
591 Call->addParamAttr(2, AK);
592 Ind->replaceAllUsesWith(Call);
593 Ind->eraseFromParent();
596 void InstrProfiling::lowerIncrement(InstrProfIncrementInst *Inc) {
597 GlobalVariable *Counters = getOrCreateRegionCounters(Inc);
599 IRBuilder<> Builder(Inc);
600 uint64_t Index = Inc->getIndex()->getZExtValue();
601 Value *Addr = Builder.CreateConstInBoundsGEP2_64(Counters->getValueType(),
602 Counters, 0, Index);
604 if (Options.Atomic || AtomicCounterUpdateAll) {
605 Builder.CreateAtomicRMW(AtomicRMWInst::Add, Addr, Inc->getStep(),
606 AtomicOrdering::Monotonic);
607 } else {
608 Value *IncStep = Inc->getStep();
609 Value *Load = Builder.CreateLoad(IncStep->getType(), Addr, "pgocount");
610 auto *Count = Builder.CreateAdd(Load, Inc->getStep());
611 auto *Store = Builder.CreateStore(Count, Addr);
612 if (isCounterPromotionEnabled())
613 PromotionCandidates.emplace_back(cast<Instruction>(Load), Store);
615 Inc->eraseFromParent();
618 void InstrProfiling::lowerCoverageData(GlobalVariable *CoverageNamesVar) {
619 ConstantArray *Names =
620 cast<ConstantArray>(CoverageNamesVar->getInitializer());
621 for (unsigned I = 0, E = Names->getNumOperands(); I < E; ++I) {
622 Constant *NC = Names->getOperand(I);
623 Value *V = NC->stripPointerCasts();
624 assert(isa<GlobalVariable>(V) && "Missing reference to function name");
625 GlobalVariable *Name = cast<GlobalVariable>(V);
627 Name->setLinkage(GlobalValue::PrivateLinkage);
628 ReferencedNames.push_back(Name);
629 NC->dropAllReferences();
631 CoverageNamesVar->eraseFromParent();
634 /// Get the name of a profiling variable for a particular function.
635 static std::string getVarName(InstrProfIncrementInst *Inc, StringRef Prefix) {
636 StringRef NamePrefix = getInstrProfNameVarPrefix();
637 StringRef Name = Inc->getName()->getName().substr(NamePrefix.size());
638 Function *F = Inc->getParent()->getParent();
639 Module *M = F->getParent();
640 if (!DoHashBasedCounterSplit || !isIRPGOFlagSet(M) ||
641 !canRenameComdatFunc(*F))
642 return (Prefix + Name).str();
643 uint64_t FuncHash = Inc->getHash()->getZExtValue();
644 SmallVector<char, 24> HashPostfix;
645 if (Name.endswith((Twine(".") + Twine(FuncHash)).toStringRef(HashPostfix)))
646 return (Prefix + Name).str();
647 return (Prefix + Name + "." + Twine(FuncHash)).str();
650 static inline bool shouldRecordFunctionAddr(Function *F) {
651 // Check the linkage
652 bool HasAvailableExternallyLinkage = F->hasAvailableExternallyLinkage();
653 if (!F->hasLinkOnceLinkage() && !F->hasLocalLinkage() &&
654 !HasAvailableExternallyLinkage)
655 return true;
657 // A function marked 'alwaysinline' with available_externally linkage can't
658 // have its address taken. Doing so would create an undefined external ref to
659 // the function, which would fail to link.
660 if (HasAvailableExternallyLinkage &&
661 F->hasFnAttribute(Attribute::AlwaysInline))
662 return false;
664 // Prohibit function address recording if the function is both internal and
665 // COMDAT. This avoids the profile data variable referencing internal symbols
666 // in COMDAT.
667 if (F->hasLocalLinkage() && F->hasComdat())
668 return false;
670 // Check uses of this function for other than direct calls or invokes to it.
671 // Inline virtual functions have linkeOnceODR linkage. When a key method
672 // exists, the vtable will only be emitted in the TU where the key method
673 // is defined. In a TU where vtable is not available, the function won't
674 // be 'addresstaken'. If its address is not recorded here, the profile data
675 // with missing address may be picked by the linker leading to missing
676 // indirect call target info.
677 return F->hasAddressTaken() || F->hasLinkOnceLinkage();
680 static bool needsRuntimeRegistrationOfSectionRange(const Triple &TT) {
681 // Don't do this for Darwin. compiler-rt uses linker magic.
682 if (TT.isOSDarwin())
683 return false;
685 // Use linker script magic to get data/cnts/name start/end.
686 if (TT.isOSLinux() || TT.isOSFreeBSD() || TT.isOSNetBSD() ||
687 TT.isOSFuchsia() || TT.isPS4CPU() || TT.isOSWindows())
688 return false;
690 return true;
693 GlobalVariable *
694 InstrProfiling::getOrCreateRegionCounters(InstrProfIncrementInst *Inc) {
695 GlobalVariable *NamePtr = Inc->getName();
696 auto It = ProfileDataMap.find(NamePtr);
697 PerFunctionProfileData PD;
698 if (It != ProfileDataMap.end()) {
699 if (It->second.RegionCounters)
700 return It->second.RegionCounters;
701 PD = It->second;
704 // Match the linkage and visibility of the name global, except on COFF, where
705 // the linkage must be local and consequentially the visibility must be
706 // default.
707 Function *Fn = Inc->getParent()->getParent();
708 GlobalValue::LinkageTypes Linkage = NamePtr->getLinkage();
709 GlobalValue::VisibilityTypes Visibility = NamePtr->getVisibility();
710 if (TT.isOSBinFormatCOFF()) {
711 Linkage = GlobalValue::InternalLinkage;
712 Visibility = GlobalValue::DefaultVisibility;
715 // Move the name variable to the right section. Place them in a COMDAT group
716 // if the associated function is a COMDAT. This will make sure that only one
717 // copy of counters of the COMDAT function will be emitted after linking.
718 Comdat *Cmdt = nullptr;
719 GlobalValue::LinkageTypes CounterLinkage = Linkage;
720 if (needsComdatForCounter(*Fn, *M)) {
721 if (TT.isOSBinFormatCOFF()) {
722 // There are two cases that need a comdat on COFF:
723 // 1. Functions that already have comdats (standard case)
724 // 2. available_externally functions (dllimport and C99 inline)
725 // In the first case, put all the data in the original function comdat. In
726 // the second case, create a new comdat group using the counter as the
727 // leader. It's linkage must be external, so use linkonce_odr linkage in
728 // that case.
729 if (Comdat *C = Fn->getComdat()) {
730 Cmdt = C;
731 } else {
732 Cmdt = M->getOrInsertComdat(
733 getVarName(Inc, getInstrProfCountersVarPrefix()));
734 CounterLinkage = GlobalValue::LinkOnceODRLinkage;
736 } else {
737 // For other platforms that use comdats (ELF), make a new comdat group for
738 // all the profile data. It will be deduplicated within the current DSO.
739 Cmdt = M->getOrInsertComdat(getVarName(Inc, getInstrProfComdatPrefix()));
743 uint64_t NumCounters = Inc->getNumCounters()->getZExtValue();
744 LLVMContext &Ctx = M->getContext();
745 ArrayType *CounterTy = ArrayType::get(Type::getInt64Ty(Ctx), NumCounters);
747 // Create the counters variable.
748 auto *CounterPtr =
749 new GlobalVariable(*M, CounterTy, false, Linkage,
750 Constant::getNullValue(CounterTy),
751 getVarName(Inc, getInstrProfCountersVarPrefix()));
752 CounterPtr->setVisibility(Visibility);
753 CounterPtr->setSection(
754 getInstrProfSectionName(IPSK_cnts, TT.getObjectFormat()));
755 CounterPtr->setAlignment(8);
756 CounterPtr->setComdat(Cmdt);
757 CounterPtr->setLinkage(CounterLinkage);
759 auto *Int8PtrTy = Type::getInt8PtrTy(Ctx);
760 // Allocate statically the array of pointers to value profile nodes for
761 // the current function.
762 Constant *ValuesPtrExpr = ConstantPointerNull::get(Int8PtrTy);
763 if (ValueProfileStaticAlloc && !needsRuntimeRegistrationOfSectionRange(TT)) {
764 uint64_t NS = 0;
765 for (uint32_t Kind = IPVK_First; Kind <= IPVK_Last; ++Kind)
766 NS += PD.NumValueSites[Kind];
767 if (NS) {
768 ArrayType *ValuesTy = ArrayType::get(Type::getInt64Ty(Ctx), NS);
770 auto *ValuesVar =
771 new GlobalVariable(*M, ValuesTy, false, Linkage,
772 Constant::getNullValue(ValuesTy),
773 getVarName(Inc, getInstrProfValuesVarPrefix()));
774 ValuesVar->setVisibility(Visibility);
775 ValuesVar->setSection(
776 getInstrProfSectionName(IPSK_vals, TT.getObjectFormat()));
777 ValuesVar->setAlignment(8);
778 ValuesVar->setComdat(Cmdt);
779 ValuesPtrExpr =
780 ConstantExpr::getBitCast(ValuesVar, Type::getInt8PtrTy(Ctx));
784 // Create data variable.
785 auto *Int16Ty = Type::getInt16Ty(Ctx);
786 auto *Int16ArrayTy = ArrayType::get(Int16Ty, IPVK_Last + 1);
787 Type *DataTypes[] = {
788 #define INSTR_PROF_DATA(Type, LLVMType, Name, Init) LLVMType,
789 #include "llvm/ProfileData/InstrProfData.inc"
791 auto *DataTy = StructType::get(Ctx, makeArrayRef(DataTypes));
793 Constant *FunctionAddr = shouldRecordFunctionAddr(Fn)
794 ? ConstantExpr::getBitCast(Fn, Int8PtrTy)
795 : ConstantPointerNull::get(Int8PtrTy);
797 Constant *Int16ArrayVals[IPVK_Last + 1];
798 for (uint32_t Kind = IPVK_First; Kind <= IPVK_Last; ++Kind)
799 Int16ArrayVals[Kind] = ConstantInt::get(Int16Ty, PD.NumValueSites[Kind]);
801 Constant *DataVals[] = {
802 #define INSTR_PROF_DATA(Type, LLVMType, Name, Init) Init,
803 #include "llvm/ProfileData/InstrProfData.inc"
805 auto *Data = new GlobalVariable(*M, DataTy, false, Linkage,
806 ConstantStruct::get(DataTy, DataVals),
807 getVarName(Inc, getInstrProfDataVarPrefix()));
808 Data->setVisibility(Visibility);
809 Data->setSection(getInstrProfSectionName(IPSK_data, TT.getObjectFormat()));
810 Data->setAlignment(INSTR_PROF_DATA_ALIGNMENT);
811 Data->setComdat(Cmdt);
813 PD.RegionCounters = CounterPtr;
814 PD.DataVar = Data;
815 ProfileDataMap[NamePtr] = PD;
817 // Mark the data variable as used so that it isn't stripped out.
818 UsedVars.push_back(Data);
819 // Now that the linkage set by the FE has been passed to the data and counter
820 // variables, reset Name variable's linkage and visibility to private so that
821 // it can be removed later by the compiler.
822 NamePtr->setLinkage(GlobalValue::PrivateLinkage);
823 // Collect the referenced names to be used by emitNameData.
824 ReferencedNames.push_back(NamePtr);
826 return CounterPtr;
829 void InstrProfiling::emitVNodes() {
830 if (!ValueProfileStaticAlloc)
831 return;
833 // For now only support this on platforms that do
834 // not require runtime registration to discover
835 // named section start/end.
836 if (needsRuntimeRegistrationOfSectionRange(TT))
837 return;
839 size_t TotalNS = 0;
840 for (auto &PD : ProfileDataMap) {
841 for (uint32_t Kind = IPVK_First; Kind <= IPVK_Last; ++Kind)
842 TotalNS += PD.second.NumValueSites[Kind];
845 if (!TotalNS)
846 return;
848 uint64_t NumCounters = TotalNS * NumCountersPerValueSite;
849 // Heuristic for small programs with very few total value sites.
850 // The default value of vp-counters-per-site is chosen based on
851 // the observation that large apps usually have a low percentage
852 // of value sites that actually have any profile data, and thus
853 // the average number of counters per site is low. For small
854 // apps with very few sites, this may not be true. Bump up the
855 // number of counters in this case.
856 #define INSTR_PROF_MIN_VAL_COUNTS 10
857 if (NumCounters < INSTR_PROF_MIN_VAL_COUNTS)
858 NumCounters = std::max(INSTR_PROF_MIN_VAL_COUNTS, (int)NumCounters * 2);
860 auto &Ctx = M->getContext();
861 Type *VNodeTypes[] = {
862 #define INSTR_PROF_VALUE_NODE(Type, LLVMType, Name, Init) LLVMType,
863 #include "llvm/ProfileData/InstrProfData.inc"
865 auto *VNodeTy = StructType::get(Ctx, makeArrayRef(VNodeTypes));
867 ArrayType *VNodesTy = ArrayType::get(VNodeTy, NumCounters);
868 auto *VNodesVar = new GlobalVariable(
869 *M, VNodesTy, false, GlobalValue::PrivateLinkage,
870 Constant::getNullValue(VNodesTy), getInstrProfVNodesVarName());
871 VNodesVar->setSection(
872 getInstrProfSectionName(IPSK_vnodes, TT.getObjectFormat()));
873 UsedVars.push_back(VNodesVar);
876 void InstrProfiling::emitNameData() {
877 std::string UncompressedData;
879 if (ReferencedNames.empty())
880 return;
882 std::string CompressedNameStr;
883 if (Error E = collectPGOFuncNameStrings(ReferencedNames, CompressedNameStr,
884 DoNameCompression)) {
885 report_fatal_error(toString(std::move(E)), false);
888 auto &Ctx = M->getContext();
889 auto *NamesVal = ConstantDataArray::getString(
890 Ctx, StringRef(CompressedNameStr), false);
891 NamesVar = new GlobalVariable(*M, NamesVal->getType(), true,
892 GlobalValue::PrivateLinkage, NamesVal,
893 getInstrProfNamesVarName());
894 NamesSize = CompressedNameStr.size();
895 NamesVar->setSection(
896 getInstrProfSectionName(IPSK_name, TT.getObjectFormat()));
897 // On COFF, it's important to reduce the alignment down to 1 to prevent the
898 // linker from inserting padding before the start of the names section or
899 // between names entries.
900 NamesVar->setAlignment(1);
901 UsedVars.push_back(NamesVar);
903 for (auto *NamePtr : ReferencedNames)
904 NamePtr->eraseFromParent();
907 void InstrProfiling::emitRegistration() {
908 if (!needsRuntimeRegistrationOfSectionRange(TT))
909 return;
911 // Construct the function.
912 auto *VoidTy = Type::getVoidTy(M->getContext());
913 auto *VoidPtrTy = Type::getInt8PtrTy(M->getContext());
914 auto *Int64Ty = Type::getInt64Ty(M->getContext());
915 auto *RegisterFTy = FunctionType::get(VoidTy, false);
916 auto *RegisterF = Function::Create(RegisterFTy, GlobalValue::InternalLinkage,
917 getInstrProfRegFuncsName(), M);
918 RegisterF->setUnnamedAddr(GlobalValue::UnnamedAddr::Global);
919 if (Options.NoRedZone)
920 RegisterF->addFnAttr(Attribute::NoRedZone);
922 auto *RuntimeRegisterTy = FunctionType::get(VoidTy, VoidPtrTy, false);
923 auto *RuntimeRegisterF =
924 Function::Create(RuntimeRegisterTy, GlobalVariable::ExternalLinkage,
925 getInstrProfRegFuncName(), M);
927 IRBuilder<> IRB(BasicBlock::Create(M->getContext(), "", RegisterF));
928 for (Value *Data : UsedVars)
929 if (Data != NamesVar && !isa<Function>(Data))
930 IRB.CreateCall(RuntimeRegisterF, IRB.CreateBitCast(Data, VoidPtrTy));
932 if (NamesVar) {
933 Type *ParamTypes[] = {VoidPtrTy, Int64Ty};
934 auto *NamesRegisterTy =
935 FunctionType::get(VoidTy, makeArrayRef(ParamTypes), false);
936 auto *NamesRegisterF =
937 Function::Create(NamesRegisterTy, GlobalVariable::ExternalLinkage,
938 getInstrProfNamesRegFuncName(), M);
939 IRB.CreateCall(NamesRegisterF, {IRB.CreateBitCast(NamesVar, VoidPtrTy),
940 IRB.getInt64(NamesSize)});
943 IRB.CreateRetVoid();
946 bool InstrProfiling::emitRuntimeHook() {
947 // We expect the linker to be invoked with -u<hook_var> flag for linux,
948 // for which case there is no need to emit the user function.
949 if (TT.isOSLinux())
950 return false;
952 // If the module's provided its own runtime, we don't need to do anything.
953 if (M->getGlobalVariable(getInstrProfRuntimeHookVarName()))
954 return false;
956 // Declare an external variable that will pull in the runtime initialization.
957 auto *Int32Ty = Type::getInt32Ty(M->getContext());
958 auto *Var =
959 new GlobalVariable(*M, Int32Ty, false, GlobalValue::ExternalLinkage,
960 nullptr, getInstrProfRuntimeHookVarName());
962 // Make a function that uses it.
963 auto *User = Function::Create(FunctionType::get(Int32Ty, false),
964 GlobalValue::LinkOnceODRLinkage,
965 getInstrProfRuntimeHookVarUseFuncName(), M);
966 User->addFnAttr(Attribute::NoInline);
967 if (Options.NoRedZone)
968 User->addFnAttr(Attribute::NoRedZone);
969 User->setVisibility(GlobalValue::HiddenVisibility);
970 if (TT.supportsCOMDAT())
971 User->setComdat(M->getOrInsertComdat(User->getName()));
973 IRBuilder<> IRB(BasicBlock::Create(M->getContext(), "", User));
974 auto *Load = IRB.CreateLoad(Int32Ty, Var);
975 IRB.CreateRet(Load);
977 // Mark the user variable as used so that it isn't stripped out.
978 UsedVars.push_back(User);
979 return true;
982 void InstrProfiling::emitUses() {
983 if (!UsedVars.empty())
984 appendToUsed(*M, UsedVars);
987 void InstrProfiling::emitInitialization() {
988 // Create variable for profile name.
989 createProfileFileNameVar(*M, Options.InstrProfileOutput);
990 Function *RegisterF = M->getFunction(getInstrProfRegFuncsName());
991 if (!RegisterF)
992 return;
994 // Create the initialization function.
995 auto *VoidTy = Type::getVoidTy(M->getContext());
996 auto *F = Function::Create(FunctionType::get(VoidTy, false),
997 GlobalValue::InternalLinkage,
998 getInstrProfInitFuncName(), M);
999 F->setUnnamedAddr(GlobalValue::UnnamedAddr::Global);
1000 F->addFnAttr(Attribute::NoInline);
1001 if (Options.NoRedZone)
1002 F->addFnAttr(Attribute::NoRedZone);
1004 // Add the basic block and the necessary calls.
1005 IRBuilder<> IRB(BasicBlock::Create(M->getContext(), "", F));
1006 IRB.CreateCall(RegisterF, {});
1007 IRB.CreateRetVoid();
1009 appendToGlobalCtors(*M, F, 0);