[PowerPC] Do not emit record-form rotates when record-form andi/andis suffices
[llvm-core.git] / lib / Transforms / Instrumentation / InstrProfiling.cpp
blob62da93002539e45d56c12fe285ba4a286ed8415f
1 //===-- InstrProfiling.cpp - Frontend instrumentation based profiling -----===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This pass lowers instrprof_* intrinsics emitted by a frontend for profiling.
11 // It also builds the data structures and initialization code needed for
12 // updating execution counts and emitting the profile at runtime.
14 //===----------------------------------------------------------------------===//
16 #include "llvm/Transforms/Instrumentation/InstrProfiling.h"
17 #include "llvm/ADT/ArrayRef.h"
18 #include "llvm/ADT/SmallVector.h"
19 #include "llvm/ADT/StringRef.h"
20 #include "llvm/ADT/Triple.h"
21 #include "llvm/ADT/Twine.h"
22 #include "llvm/Analysis/LoopInfo.h"
23 #include "llvm/Analysis/TargetLibraryInfo.h"
24 #include "llvm/IR/Attributes.h"
25 #include "llvm/IR/BasicBlock.h"
26 #include "llvm/IR/Constant.h"
27 #include "llvm/IR/Constants.h"
28 #include "llvm/IR/DerivedTypes.h"
29 #include "llvm/IR/Dominators.h"
30 #include "llvm/IR/Function.h"
31 #include "llvm/IR/GlobalValue.h"
32 #include "llvm/IR/GlobalVariable.h"
33 #include "llvm/IR/IRBuilder.h"
34 #include "llvm/IR/Instruction.h"
35 #include "llvm/IR/Instructions.h"
36 #include "llvm/IR/IntrinsicInst.h"
37 #include "llvm/IR/Module.h"
38 #include "llvm/IR/Type.h"
39 #include "llvm/Pass.h"
40 #include "llvm/ProfileData/InstrProf.h"
41 #include "llvm/Support/Casting.h"
42 #include "llvm/Support/CommandLine.h"
43 #include "llvm/Support/Error.h"
44 #include "llvm/Support/ErrorHandling.h"
45 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
46 #include "llvm/Transforms/Utils/ModuleUtils.h"
47 #include "llvm/Transforms/Utils/SSAUpdater.h"
48 #include <algorithm>
49 #include <cassert>
50 #include <cstddef>
51 #include <cstdint>
52 #include <string>
54 using namespace llvm;
56 #define DEBUG_TYPE "instrprof"
58 // The start and end values of precise value profile range for memory
59 // intrinsic sizes
60 cl::opt<std::string> MemOPSizeRange(
61 "memop-size-range",
62 cl::desc("Set the range of size in memory intrinsic calls to be profiled "
63 "precisely, in a format of <start_val>:<end_val>"),
64 cl::init(""));
66 // The value that considered to be large value in memory intrinsic.
67 cl::opt<unsigned> MemOPSizeLarge(
68 "memop-size-large",
69 cl::desc("Set large value thresthold in memory intrinsic size profiling. "
70 "Value of 0 disables the large value profiling."),
71 cl::init(8192));
73 namespace {
75 cl::opt<bool> DoNameCompression("enable-name-compression",
76 cl::desc("Enable name string compression"),
77 cl::init(true));
79 cl::opt<bool> DoHashBasedCounterSplit(
80 "hash-based-counter-split",
81 cl::desc("Rename counter variable of a comdat function based on cfg hash"),
82 cl::init(true));
84 cl::opt<bool> ValueProfileStaticAlloc(
85 "vp-static-alloc",
86 cl::desc("Do static counter allocation for value profiler"),
87 cl::init(true));
89 cl::opt<double> NumCountersPerValueSite(
90 "vp-counters-per-site",
91 cl::desc("The average number of profile counters allocated "
92 "per value profiling site."),
93 // This is set to a very small value because in real programs, only
94 // a very small percentage of value sites have non-zero targets, e.g, 1/30.
95 // For those sites with non-zero profile, the average number of targets
96 // is usually smaller than 2.
97 cl::init(1.0));
99 cl::opt<bool> AtomicCounterUpdateAll(
100 "instrprof-atomic-counter-update-all", cl::ZeroOrMore,
101 cl::desc("Make all profile counter updates atomic (for testing only)"),
102 cl::init(false));
104 cl::opt<bool> AtomicCounterUpdatePromoted(
105 "atomic-counter-update-promoted", cl::ZeroOrMore,
106 cl::desc("Do counter update using atomic fetch add "
107 " for promoted counters only"),
108 cl::init(false));
110 // If the option is not specified, the default behavior about whether
111 // counter promotion is done depends on how instrumentaiton lowering
112 // pipeline is setup, i.e., the default value of true of this option
113 // does not mean the promotion will be done by default. Explicitly
114 // setting this option can override the default behavior.
115 cl::opt<bool> DoCounterPromotion("do-counter-promotion", cl::ZeroOrMore,
116 cl::desc("Do counter register promotion"),
117 cl::init(false));
118 cl::opt<unsigned> MaxNumOfPromotionsPerLoop(
119 cl::ZeroOrMore, "max-counter-promotions-per-loop", cl::init(20),
120 cl::desc("Max number counter promotions per loop to avoid"
121 " increasing register pressure too much"));
123 // A debug option
124 cl::opt<int>
125 MaxNumOfPromotions(cl::ZeroOrMore, "max-counter-promotions", cl::init(-1),
126 cl::desc("Max number of allowed counter promotions"));
128 cl::opt<unsigned> SpeculativeCounterPromotionMaxExiting(
129 cl::ZeroOrMore, "speculative-counter-promotion-max-exiting", cl::init(3),
130 cl::desc("The max number of exiting blocks of a loop to allow "
131 " speculative counter promotion"));
133 cl::opt<bool> SpeculativeCounterPromotionToLoop(
134 cl::ZeroOrMore, "speculative-counter-promotion-to-loop", cl::init(false),
135 cl::desc("When the option is false, if the target block is in a loop, "
136 "the promotion will be disallowed unless the promoted counter "
137 " update can be further/iteratively promoted into an acyclic "
138 " region."));
140 cl::opt<bool> IterativeCounterPromotion(
141 cl::ZeroOrMore, "iterative-counter-promotion", cl::init(true),
142 cl::desc("Allow counter promotion across the whole loop nest."));
144 class InstrProfilingLegacyPass : public ModulePass {
145 InstrProfiling InstrProf;
147 public:
148 static char ID;
150 InstrProfilingLegacyPass() : ModulePass(ID) {}
151 InstrProfilingLegacyPass(const InstrProfOptions &Options)
152 : ModulePass(ID), InstrProf(Options) {}
154 StringRef getPassName() const override {
155 return "Frontend instrumentation-based coverage lowering";
158 bool runOnModule(Module &M) override {
159 return InstrProf.run(M, getAnalysis<TargetLibraryInfoWrapperPass>().getTLI());
162 void getAnalysisUsage(AnalysisUsage &AU) const override {
163 AU.setPreservesCFG();
164 AU.addRequired<TargetLibraryInfoWrapperPass>();
169 /// A helper class to promote one counter RMW operation in the loop
170 /// into register update.
172 /// RWM update for the counter will be sinked out of the loop after
173 /// the transformation.
175 class PGOCounterPromoterHelper : public LoadAndStorePromoter {
176 public:
177 PGOCounterPromoterHelper(
178 Instruction *L, Instruction *S, SSAUpdater &SSA, Value *Init,
179 BasicBlock *PH, ArrayRef<BasicBlock *> ExitBlocks,
180 ArrayRef<Instruction *> InsertPts,
181 DenseMap<Loop *, SmallVector<LoadStorePair, 8>> &LoopToCands,
182 LoopInfo &LI)
183 : LoadAndStorePromoter({L, S}, SSA), Store(S), ExitBlocks(ExitBlocks),
184 InsertPts(InsertPts), LoopToCandidates(LoopToCands), LI(LI) {
185 assert(isa<LoadInst>(L));
186 assert(isa<StoreInst>(S));
187 SSA.AddAvailableValue(PH, Init);
190 void doExtraRewritesBeforeFinalDeletion() const override {
191 for (unsigned i = 0, e = ExitBlocks.size(); i != e; ++i) {
192 BasicBlock *ExitBlock = ExitBlocks[i];
193 Instruction *InsertPos = InsertPts[i];
194 // Get LiveIn value into the ExitBlock. If there are multiple
195 // predecessors, the value is defined by a PHI node in this
196 // block.
197 Value *LiveInValue = SSA.GetValueInMiddleOfBlock(ExitBlock);
198 Value *Addr = cast<StoreInst>(Store)->getPointerOperand();
199 IRBuilder<> Builder(InsertPos);
200 if (AtomicCounterUpdatePromoted)
201 // automic update currently can only be promoted across the current
202 // loop, not the whole loop nest.
203 Builder.CreateAtomicRMW(AtomicRMWInst::Add, Addr, LiveInValue,
204 AtomicOrdering::SequentiallyConsistent);
205 else {
206 LoadInst *OldVal = Builder.CreateLoad(Addr, "pgocount.promoted");
207 auto *NewVal = Builder.CreateAdd(OldVal, LiveInValue);
208 auto *NewStore = Builder.CreateStore(NewVal, Addr);
210 // Now update the parent loop's candidate list:
211 if (IterativeCounterPromotion) {
212 auto *TargetLoop = LI.getLoopFor(ExitBlock);
213 if (TargetLoop)
214 LoopToCandidates[TargetLoop].emplace_back(OldVal, NewStore);
220 private:
221 Instruction *Store;
222 ArrayRef<BasicBlock *> ExitBlocks;
223 ArrayRef<Instruction *> InsertPts;
224 DenseMap<Loop *, SmallVector<LoadStorePair, 8>> &LoopToCandidates;
225 LoopInfo &LI;
228 /// A helper class to do register promotion for all profile counter
229 /// updates in a loop.
231 class PGOCounterPromoter {
232 public:
233 PGOCounterPromoter(
234 DenseMap<Loop *, SmallVector<LoadStorePair, 8>> &LoopToCands,
235 Loop &CurLoop, LoopInfo &LI)
236 : LoopToCandidates(LoopToCands), ExitBlocks(), InsertPts(), L(CurLoop),
237 LI(LI) {
239 SmallVector<BasicBlock *, 8> LoopExitBlocks;
240 SmallPtrSet<BasicBlock *, 8> BlockSet;
241 L.getExitBlocks(LoopExitBlocks);
243 for (BasicBlock *ExitBlock : LoopExitBlocks) {
244 if (BlockSet.insert(ExitBlock).second) {
245 ExitBlocks.push_back(ExitBlock);
246 InsertPts.push_back(&*ExitBlock->getFirstInsertionPt());
251 bool run(int64_t *NumPromoted) {
252 // Skip 'infinite' loops:
253 if (ExitBlocks.size() == 0)
254 return false;
255 unsigned MaxProm = getMaxNumOfPromotionsInLoop(&L);
256 if (MaxProm == 0)
257 return false;
259 unsigned Promoted = 0;
260 for (auto &Cand : LoopToCandidates[&L]) {
262 SmallVector<PHINode *, 4> NewPHIs;
263 SSAUpdater SSA(&NewPHIs);
264 Value *InitVal = ConstantInt::get(Cand.first->getType(), 0);
266 PGOCounterPromoterHelper Promoter(Cand.first, Cand.second, SSA, InitVal,
267 L.getLoopPreheader(), ExitBlocks,
268 InsertPts, LoopToCandidates, LI);
269 Promoter.run(SmallVector<Instruction *, 2>({Cand.first, Cand.second}));
270 Promoted++;
271 if (Promoted >= MaxProm)
272 break;
274 (*NumPromoted)++;
275 if (MaxNumOfPromotions != -1 && *NumPromoted >= MaxNumOfPromotions)
276 break;
279 LLVM_DEBUG(dbgs() << Promoted << " counters promoted for loop (depth="
280 << L.getLoopDepth() << ")\n");
281 return Promoted != 0;
284 private:
285 bool allowSpeculativeCounterPromotion(Loop *LP) {
286 SmallVector<BasicBlock *, 8> ExitingBlocks;
287 L.getExitingBlocks(ExitingBlocks);
288 // Not considierered speculative.
289 if (ExitingBlocks.size() == 1)
290 return true;
291 if (ExitingBlocks.size() > SpeculativeCounterPromotionMaxExiting)
292 return false;
293 return true;
296 // Returns the max number of Counter Promotions for LP.
297 unsigned getMaxNumOfPromotionsInLoop(Loop *LP) {
298 // We can't insert into a catchswitch.
299 SmallVector<BasicBlock *, 8> LoopExitBlocks;
300 LP->getExitBlocks(LoopExitBlocks);
301 if (llvm::any_of(LoopExitBlocks, [](BasicBlock *Exit) {
302 return isa<CatchSwitchInst>(Exit->getTerminator());
304 return 0;
306 if (!LP->hasDedicatedExits())
307 return 0;
309 BasicBlock *PH = LP->getLoopPreheader();
310 if (!PH)
311 return 0;
313 SmallVector<BasicBlock *, 8> ExitingBlocks;
314 LP->getExitingBlocks(ExitingBlocks);
315 // Not considierered speculative.
316 if (ExitingBlocks.size() == 1)
317 return MaxNumOfPromotionsPerLoop;
319 if (ExitingBlocks.size() > SpeculativeCounterPromotionMaxExiting)
320 return 0;
322 // Whether the target block is in a loop does not matter:
323 if (SpeculativeCounterPromotionToLoop)
324 return MaxNumOfPromotionsPerLoop;
326 // Now check the target block:
327 unsigned MaxProm = MaxNumOfPromotionsPerLoop;
328 for (auto *TargetBlock : LoopExitBlocks) {
329 auto *TargetLoop = LI.getLoopFor(TargetBlock);
330 if (!TargetLoop)
331 continue;
332 unsigned MaxPromForTarget = getMaxNumOfPromotionsInLoop(TargetLoop);
333 unsigned PendingCandsInTarget = LoopToCandidates[TargetLoop].size();
334 MaxProm =
335 std::min(MaxProm, std::max(MaxPromForTarget, PendingCandsInTarget) -
336 PendingCandsInTarget);
338 return MaxProm;
341 DenseMap<Loop *, SmallVector<LoadStorePair, 8>> &LoopToCandidates;
342 SmallVector<BasicBlock *, 8> ExitBlocks;
343 SmallVector<Instruction *, 8> InsertPts;
344 Loop &L;
345 LoopInfo &LI;
348 } // end anonymous namespace
350 PreservedAnalyses InstrProfiling::run(Module &M, ModuleAnalysisManager &AM) {
351 auto &TLI = AM.getResult<TargetLibraryAnalysis>(M);
352 if (!run(M, TLI))
353 return PreservedAnalyses::all();
355 return PreservedAnalyses::none();
358 char InstrProfilingLegacyPass::ID = 0;
359 INITIALIZE_PASS_BEGIN(
360 InstrProfilingLegacyPass, "instrprof",
361 "Frontend instrumentation-based coverage lowering.", false, false)
362 INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
363 INITIALIZE_PASS_END(
364 InstrProfilingLegacyPass, "instrprof",
365 "Frontend instrumentation-based coverage lowering.", false, false)
367 ModulePass *
368 llvm::createInstrProfilingLegacyPass(const InstrProfOptions &Options) {
369 return new InstrProfilingLegacyPass(Options);
372 static InstrProfIncrementInst *castToIncrementInst(Instruction *Instr) {
373 InstrProfIncrementInst *Inc = dyn_cast<InstrProfIncrementInstStep>(Instr);
374 if (Inc)
375 return Inc;
376 return dyn_cast<InstrProfIncrementInst>(Instr);
379 bool InstrProfiling::lowerIntrinsics(Function *F) {
380 bool MadeChange = false;
381 PromotionCandidates.clear();
382 for (BasicBlock &BB : *F) {
383 for (auto I = BB.begin(), E = BB.end(); I != E;) {
384 auto Instr = I++;
385 InstrProfIncrementInst *Inc = castToIncrementInst(&*Instr);
386 if (Inc) {
387 lowerIncrement(Inc);
388 MadeChange = true;
389 } else if (auto *Ind = dyn_cast<InstrProfValueProfileInst>(Instr)) {
390 lowerValueProfileInst(Ind);
391 MadeChange = true;
396 if (!MadeChange)
397 return false;
399 promoteCounterLoadStores(F);
400 return true;
403 bool InstrProfiling::isCounterPromotionEnabled() const {
404 if (DoCounterPromotion.getNumOccurrences() > 0)
405 return DoCounterPromotion;
407 return Options.DoCounterPromotion;
410 void InstrProfiling::promoteCounterLoadStores(Function *F) {
411 if (!isCounterPromotionEnabled())
412 return;
414 DominatorTree DT(*F);
415 LoopInfo LI(DT);
416 DenseMap<Loop *, SmallVector<LoadStorePair, 8>> LoopPromotionCandidates;
418 for (const auto &LoadStore : PromotionCandidates) {
419 auto *CounterLoad = LoadStore.first;
420 auto *CounterStore = LoadStore.second;
421 BasicBlock *BB = CounterLoad->getParent();
422 Loop *ParentLoop = LI.getLoopFor(BB);
423 if (!ParentLoop)
424 continue;
425 LoopPromotionCandidates[ParentLoop].emplace_back(CounterLoad, CounterStore);
428 SmallVector<Loop *, 4> Loops = LI.getLoopsInPreorder();
430 // Do a post-order traversal of the loops so that counter updates can be
431 // iteratively hoisted outside the loop nest.
432 for (auto *Loop : llvm::reverse(Loops)) {
433 PGOCounterPromoter Promoter(LoopPromotionCandidates, *Loop, LI);
434 Promoter.run(&TotalCountersPromoted);
438 /// Check if the module contains uses of any profiling intrinsics.
439 static bool containsProfilingIntrinsics(Module &M) {
440 if (auto *F = M.getFunction(
441 Intrinsic::getName(llvm::Intrinsic::instrprof_increment)))
442 if (!F->use_empty())
443 return true;
444 if (auto *F = M.getFunction(
445 Intrinsic::getName(llvm::Intrinsic::instrprof_increment_step)))
446 if (!F->use_empty())
447 return true;
448 if (auto *F = M.getFunction(
449 Intrinsic::getName(llvm::Intrinsic::instrprof_value_profile)))
450 if (!F->use_empty())
451 return true;
452 return false;
455 bool InstrProfiling::run(Module &M, const TargetLibraryInfo &TLI) {
456 this->M = &M;
457 this->TLI = &TLI;
458 NamesVar = nullptr;
459 NamesSize = 0;
460 ProfileDataMap.clear();
461 UsedVars.clear();
462 getMemOPSizeRangeFromOption(MemOPSizeRange, MemOPSizeRangeStart,
463 MemOPSizeRangeLast);
464 TT = Triple(M.getTargetTriple());
466 // Emit the runtime hook even if no counters are present.
467 bool MadeChange = emitRuntimeHook();
469 // Improve compile time by avoiding linear scans when there is no work.
470 GlobalVariable *CoverageNamesVar =
471 M.getNamedGlobal(getCoverageUnusedNamesVarName());
472 if (!containsProfilingIntrinsics(M) && !CoverageNamesVar)
473 return MadeChange;
475 // We did not know how many value sites there would be inside
476 // the instrumented function. This is counting the number of instrumented
477 // target value sites to enter it as field in the profile data variable.
478 for (Function &F : M) {
479 InstrProfIncrementInst *FirstProfIncInst = nullptr;
480 for (BasicBlock &BB : F)
481 for (auto I = BB.begin(), E = BB.end(); I != E; I++)
482 if (auto *Ind = dyn_cast<InstrProfValueProfileInst>(I))
483 computeNumValueSiteCounts(Ind);
484 else if (FirstProfIncInst == nullptr)
485 FirstProfIncInst = dyn_cast<InstrProfIncrementInst>(I);
487 // Value profiling intrinsic lowering requires per-function profile data
488 // variable to be created first.
489 if (FirstProfIncInst != nullptr)
490 static_cast<void>(getOrCreateRegionCounters(FirstProfIncInst));
493 for (Function &F : M)
494 MadeChange |= lowerIntrinsics(&F);
496 if (CoverageNamesVar) {
497 lowerCoverageData(CoverageNamesVar);
498 MadeChange = true;
501 if (!MadeChange)
502 return false;
504 emitVNodes();
505 emitNameData();
506 emitRegistration();
507 emitUses();
508 emitInitialization();
509 return true;
512 static Constant *getOrInsertValueProfilingCall(Module &M,
513 const TargetLibraryInfo &TLI,
514 bool IsRange = false) {
515 LLVMContext &Ctx = M.getContext();
516 auto *ReturnTy = Type::getVoidTy(M.getContext());
518 Constant *Res;
519 if (!IsRange) {
520 Type *ParamTypes[] = {
521 #define VALUE_PROF_FUNC_PARAM(ParamType, ParamName, ParamLLVMType) ParamLLVMType
522 #include "llvm/ProfileData/InstrProfData.inc"
524 auto *ValueProfilingCallTy =
525 FunctionType::get(ReturnTy, makeArrayRef(ParamTypes), false);
526 Res = M.getOrInsertFunction(getInstrProfValueProfFuncName(),
527 ValueProfilingCallTy);
528 } else {
529 Type *RangeParamTypes[] = {
530 #define VALUE_RANGE_PROF 1
531 #define VALUE_PROF_FUNC_PARAM(ParamType, ParamName, ParamLLVMType) ParamLLVMType
532 #include "llvm/ProfileData/InstrProfData.inc"
533 #undef VALUE_RANGE_PROF
535 auto *ValueRangeProfilingCallTy =
536 FunctionType::get(ReturnTy, makeArrayRef(RangeParamTypes), false);
537 Res = M.getOrInsertFunction(getInstrProfValueRangeProfFuncName(),
538 ValueRangeProfilingCallTy);
541 if (Function *FunRes = dyn_cast<Function>(Res)) {
542 if (auto AK = TLI.getExtAttrForI32Param(false))
543 FunRes->addParamAttr(2, AK);
545 return Res;
548 void InstrProfiling::computeNumValueSiteCounts(InstrProfValueProfileInst *Ind) {
549 GlobalVariable *Name = Ind->getName();
550 uint64_t ValueKind = Ind->getValueKind()->getZExtValue();
551 uint64_t Index = Ind->getIndex()->getZExtValue();
552 auto It = ProfileDataMap.find(Name);
553 if (It == ProfileDataMap.end()) {
554 PerFunctionProfileData PD;
555 PD.NumValueSites[ValueKind] = Index + 1;
556 ProfileDataMap[Name] = PD;
557 } else if (It->second.NumValueSites[ValueKind] <= Index)
558 It->second.NumValueSites[ValueKind] = Index + 1;
561 void InstrProfiling::lowerValueProfileInst(InstrProfValueProfileInst *Ind) {
562 GlobalVariable *Name = Ind->getName();
563 auto It = ProfileDataMap.find(Name);
564 assert(It != ProfileDataMap.end() && It->second.DataVar &&
565 "value profiling detected in function with no counter incerement");
567 GlobalVariable *DataVar = It->second.DataVar;
568 uint64_t ValueKind = Ind->getValueKind()->getZExtValue();
569 uint64_t Index = Ind->getIndex()->getZExtValue();
570 for (uint32_t Kind = IPVK_First; Kind < ValueKind; ++Kind)
571 Index += It->second.NumValueSites[Kind];
573 IRBuilder<> Builder(Ind);
574 bool IsRange = (Ind->getValueKind()->getZExtValue() ==
575 llvm::InstrProfValueKind::IPVK_MemOPSize);
576 CallInst *Call = nullptr;
577 if (!IsRange) {
578 Value *Args[3] = {Ind->getTargetValue(),
579 Builder.CreateBitCast(DataVar, Builder.getInt8PtrTy()),
580 Builder.getInt32(Index)};
581 Call = Builder.CreateCall(getOrInsertValueProfilingCall(*M, *TLI), Args);
582 } else {
583 Value *Args[6] = {
584 Ind->getTargetValue(),
585 Builder.CreateBitCast(DataVar, Builder.getInt8PtrTy()),
586 Builder.getInt32(Index),
587 Builder.getInt64(MemOPSizeRangeStart),
588 Builder.getInt64(MemOPSizeRangeLast),
589 Builder.getInt64(MemOPSizeLarge == 0 ? INT64_MIN : MemOPSizeLarge)};
590 Call =
591 Builder.CreateCall(getOrInsertValueProfilingCall(*M, *TLI, true), Args);
593 if (auto AK = TLI->getExtAttrForI32Param(false))
594 Call->addParamAttr(2, AK);
595 Ind->replaceAllUsesWith(Call);
596 Ind->eraseFromParent();
599 void InstrProfiling::lowerIncrement(InstrProfIncrementInst *Inc) {
600 GlobalVariable *Counters = getOrCreateRegionCounters(Inc);
602 IRBuilder<> Builder(Inc);
603 uint64_t Index = Inc->getIndex()->getZExtValue();
604 Value *Addr = Builder.CreateConstInBoundsGEP2_64(Counters, 0, Index);
606 if (Options.Atomic || AtomicCounterUpdateAll) {
607 Builder.CreateAtomicRMW(AtomicRMWInst::Add, Addr, Inc->getStep(),
608 AtomicOrdering::Monotonic);
609 } else {
610 Value *Load = Builder.CreateLoad(Addr, "pgocount");
611 auto *Count = Builder.CreateAdd(Load, Inc->getStep());
612 auto *Store = Builder.CreateStore(Count, Addr);
613 if (isCounterPromotionEnabled())
614 PromotionCandidates.emplace_back(cast<Instruction>(Load), Store);
616 Inc->eraseFromParent();
619 void InstrProfiling::lowerCoverageData(GlobalVariable *CoverageNamesVar) {
620 ConstantArray *Names =
621 cast<ConstantArray>(CoverageNamesVar->getInitializer());
622 for (unsigned I = 0, E = Names->getNumOperands(); I < E; ++I) {
623 Constant *NC = Names->getOperand(I);
624 Value *V = NC->stripPointerCasts();
625 assert(isa<GlobalVariable>(V) && "Missing reference to function name");
626 GlobalVariable *Name = cast<GlobalVariable>(V);
628 Name->setLinkage(GlobalValue::PrivateLinkage);
629 ReferencedNames.push_back(Name);
630 NC->dropAllReferences();
632 CoverageNamesVar->eraseFromParent();
635 /// Get the name of a profiling variable for a particular function.
636 static std::string getVarName(InstrProfIncrementInst *Inc, StringRef Prefix) {
637 StringRef NamePrefix = getInstrProfNameVarPrefix();
638 StringRef Name = Inc->getName()->getName().substr(NamePrefix.size());
639 Function *F = Inc->getParent()->getParent();
640 Module *M = F->getParent();
641 if (!DoHashBasedCounterSplit || !isIRPGOFlagSet(M) ||
642 !canRenameComdatFunc(*F))
643 return (Prefix + Name).str();
644 uint64_t FuncHash = Inc->getHash()->getZExtValue();
645 SmallVector<char, 24> HashPostfix;
646 if (Name.endswith((Twine(".") + Twine(FuncHash)).toStringRef(HashPostfix)))
647 return (Prefix + Name).str();
648 return (Prefix + Name + "." + Twine(FuncHash)).str();
651 static inline bool shouldRecordFunctionAddr(Function *F) {
652 // Check the linkage
653 bool HasAvailableExternallyLinkage = F->hasAvailableExternallyLinkage();
654 if (!F->hasLinkOnceLinkage() && !F->hasLocalLinkage() &&
655 !HasAvailableExternallyLinkage)
656 return true;
658 // A function marked 'alwaysinline' with available_externally linkage can't
659 // have its address taken. Doing so would create an undefined external ref to
660 // the function, which would fail to link.
661 if (HasAvailableExternallyLinkage &&
662 F->hasFnAttribute(Attribute::AlwaysInline))
663 return false;
665 // Prohibit function address recording if the function is both internal and
666 // COMDAT. This avoids the profile data variable referencing internal symbols
667 // in COMDAT.
668 if (F->hasLocalLinkage() && F->hasComdat())
669 return false;
671 // Check uses of this function for other than direct calls or invokes to it.
672 // Inline virtual functions have linkeOnceODR linkage. When a key method
673 // exists, the vtable will only be emitted in the TU where the key method
674 // is defined. In a TU where vtable is not available, the function won't
675 // be 'addresstaken'. If its address is not recorded here, the profile data
676 // with missing address may be picked by the linker leading to missing
677 // indirect call target info.
678 return F->hasAddressTaken() || F->hasLinkOnceLinkage();
681 static inline Comdat *getOrCreateProfileComdat(Module &M, Function &F,
682 InstrProfIncrementInst *Inc) {
683 if (!needsComdatForCounter(F, M))
684 return nullptr;
686 // COFF format requires a COMDAT section to have a key symbol with the same
687 // name. The linker targeting COFF also requires that the COMDAT
688 // a section is associated to must precede the associating section. For this
689 // reason, we must choose the counter var's name as the name of the comdat.
690 StringRef ComdatPrefix = (Triple(M.getTargetTriple()).isOSBinFormatCOFF()
691 ? getInstrProfCountersVarPrefix()
692 : getInstrProfComdatPrefix());
693 return M.getOrInsertComdat(StringRef(getVarName(Inc, ComdatPrefix)));
696 static bool needsRuntimeRegistrationOfSectionRange(const Module &M) {
697 // Don't do this for Darwin. compiler-rt uses linker magic.
698 if (Triple(M.getTargetTriple()).isOSDarwin())
699 return false;
701 // Use linker script magic to get data/cnts/name start/end.
702 if (Triple(M.getTargetTriple()).isOSLinux() ||
703 Triple(M.getTargetTriple()).isOSFreeBSD() ||
704 Triple(M.getTargetTriple()).isOSFuchsia() ||
705 Triple(M.getTargetTriple()).isPS4CPU())
706 return false;
708 return true;
711 GlobalVariable *
712 InstrProfiling::getOrCreateRegionCounters(InstrProfIncrementInst *Inc) {
713 GlobalVariable *NamePtr = Inc->getName();
714 auto It = ProfileDataMap.find(NamePtr);
715 PerFunctionProfileData PD;
716 if (It != ProfileDataMap.end()) {
717 if (It->second.RegionCounters)
718 return It->second.RegionCounters;
719 PD = It->second;
722 // Move the name variable to the right section. Place them in a COMDAT group
723 // if the associated function is a COMDAT. This will make sure that
724 // only one copy of counters of the COMDAT function will be emitted after
725 // linking.
726 Function *Fn = Inc->getParent()->getParent();
727 Comdat *ProfileVarsComdat = nullptr;
728 ProfileVarsComdat = getOrCreateProfileComdat(*M, *Fn, Inc);
730 uint64_t NumCounters = Inc->getNumCounters()->getZExtValue();
731 LLVMContext &Ctx = M->getContext();
732 ArrayType *CounterTy = ArrayType::get(Type::getInt64Ty(Ctx), NumCounters);
734 // Create the counters variable.
735 auto *CounterPtr =
736 new GlobalVariable(*M, CounterTy, false, NamePtr->getLinkage(),
737 Constant::getNullValue(CounterTy),
738 getVarName(Inc, getInstrProfCountersVarPrefix()));
739 CounterPtr->setVisibility(NamePtr->getVisibility());
740 CounterPtr->setSection(
741 getInstrProfSectionName(IPSK_cnts, TT.getObjectFormat()));
742 CounterPtr->setAlignment(8);
743 CounterPtr->setComdat(ProfileVarsComdat);
745 auto *Int8PtrTy = Type::getInt8PtrTy(Ctx);
746 // Allocate statically the array of pointers to value profile nodes for
747 // the current function.
748 Constant *ValuesPtrExpr = ConstantPointerNull::get(Int8PtrTy);
749 if (ValueProfileStaticAlloc && !needsRuntimeRegistrationOfSectionRange(*M)) {
750 uint64_t NS = 0;
751 for (uint32_t Kind = IPVK_First; Kind <= IPVK_Last; ++Kind)
752 NS += PD.NumValueSites[Kind];
753 if (NS) {
754 ArrayType *ValuesTy = ArrayType::get(Type::getInt64Ty(Ctx), NS);
756 auto *ValuesVar =
757 new GlobalVariable(*M, ValuesTy, false, NamePtr->getLinkage(),
758 Constant::getNullValue(ValuesTy),
759 getVarName(Inc, getInstrProfValuesVarPrefix()));
760 ValuesVar->setVisibility(NamePtr->getVisibility());
761 ValuesVar->setSection(
762 getInstrProfSectionName(IPSK_vals, TT.getObjectFormat()));
763 ValuesVar->setAlignment(8);
764 ValuesVar->setComdat(ProfileVarsComdat);
765 ValuesPtrExpr =
766 ConstantExpr::getBitCast(ValuesVar, Type::getInt8PtrTy(Ctx));
770 // Create data variable.
771 auto *Int16Ty = Type::getInt16Ty(Ctx);
772 auto *Int16ArrayTy = ArrayType::get(Int16Ty, IPVK_Last + 1);
773 Type *DataTypes[] = {
774 #define INSTR_PROF_DATA(Type, LLVMType, Name, Init) LLVMType,
775 #include "llvm/ProfileData/InstrProfData.inc"
777 auto *DataTy = StructType::get(Ctx, makeArrayRef(DataTypes));
779 Constant *FunctionAddr = shouldRecordFunctionAddr(Fn)
780 ? ConstantExpr::getBitCast(Fn, Int8PtrTy)
781 : ConstantPointerNull::get(Int8PtrTy);
783 Constant *Int16ArrayVals[IPVK_Last + 1];
784 for (uint32_t Kind = IPVK_First; Kind <= IPVK_Last; ++Kind)
785 Int16ArrayVals[Kind] = ConstantInt::get(Int16Ty, PD.NumValueSites[Kind]);
787 Constant *DataVals[] = {
788 #define INSTR_PROF_DATA(Type, LLVMType, Name, Init) Init,
789 #include "llvm/ProfileData/InstrProfData.inc"
791 auto *Data = new GlobalVariable(*M, DataTy, false, NamePtr->getLinkage(),
792 ConstantStruct::get(DataTy, DataVals),
793 getVarName(Inc, getInstrProfDataVarPrefix()));
794 Data->setVisibility(NamePtr->getVisibility());
795 Data->setSection(getInstrProfSectionName(IPSK_data, TT.getObjectFormat()));
796 Data->setAlignment(INSTR_PROF_DATA_ALIGNMENT);
797 Data->setComdat(ProfileVarsComdat);
799 PD.RegionCounters = CounterPtr;
800 PD.DataVar = Data;
801 ProfileDataMap[NamePtr] = PD;
803 // Mark the data variable as used so that it isn't stripped out.
804 UsedVars.push_back(Data);
805 // Now that the linkage set by the FE has been passed to the data and counter
806 // variables, reset Name variable's linkage and visibility to private so that
807 // it can be removed later by the compiler.
808 NamePtr->setLinkage(GlobalValue::PrivateLinkage);
809 // Collect the referenced names to be used by emitNameData.
810 ReferencedNames.push_back(NamePtr);
812 return CounterPtr;
815 void InstrProfiling::emitVNodes() {
816 if (!ValueProfileStaticAlloc)
817 return;
819 // For now only support this on platforms that do
820 // not require runtime registration to discover
821 // named section start/end.
822 if (needsRuntimeRegistrationOfSectionRange(*M))
823 return;
825 size_t TotalNS = 0;
826 for (auto &PD : ProfileDataMap) {
827 for (uint32_t Kind = IPVK_First; Kind <= IPVK_Last; ++Kind)
828 TotalNS += PD.second.NumValueSites[Kind];
831 if (!TotalNS)
832 return;
834 uint64_t NumCounters = TotalNS * NumCountersPerValueSite;
835 // Heuristic for small programs with very few total value sites.
836 // The default value of vp-counters-per-site is chosen based on
837 // the observation that large apps usually have a low percentage
838 // of value sites that actually have any profile data, and thus
839 // the average number of counters per site is low. For small
840 // apps with very few sites, this may not be true. Bump up the
841 // number of counters in this case.
842 #define INSTR_PROF_MIN_VAL_COUNTS 10
843 if (NumCounters < INSTR_PROF_MIN_VAL_COUNTS)
844 NumCounters = std::max(INSTR_PROF_MIN_VAL_COUNTS, (int)NumCounters * 2);
846 auto &Ctx = M->getContext();
847 Type *VNodeTypes[] = {
848 #define INSTR_PROF_VALUE_NODE(Type, LLVMType, Name, Init) LLVMType,
849 #include "llvm/ProfileData/InstrProfData.inc"
851 auto *VNodeTy = StructType::get(Ctx, makeArrayRef(VNodeTypes));
853 ArrayType *VNodesTy = ArrayType::get(VNodeTy, NumCounters);
854 auto *VNodesVar = new GlobalVariable(
855 *M, VNodesTy, false, GlobalValue::PrivateLinkage,
856 Constant::getNullValue(VNodesTy), getInstrProfVNodesVarName());
857 VNodesVar->setSection(
858 getInstrProfSectionName(IPSK_vnodes, TT.getObjectFormat()));
859 UsedVars.push_back(VNodesVar);
862 void InstrProfiling::emitNameData() {
863 std::string UncompressedData;
865 if (ReferencedNames.empty())
866 return;
868 std::string CompressedNameStr;
869 if (Error E = collectPGOFuncNameStrings(ReferencedNames, CompressedNameStr,
870 DoNameCompression)) {
871 report_fatal_error(toString(std::move(E)), false);
874 auto &Ctx = M->getContext();
875 auto *NamesVal = ConstantDataArray::getString(
876 Ctx, StringRef(CompressedNameStr), false);
877 NamesVar = new GlobalVariable(*M, NamesVal->getType(), true,
878 GlobalValue::PrivateLinkage, NamesVal,
879 getInstrProfNamesVarName());
880 NamesSize = CompressedNameStr.size();
881 NamesVar->setSection(
882 getInstrProfSectionName(IPSK_name, TT.getObjectFormat()));
883 UsedVars.push_back(NamesVar);
885 for (auto *NamePtr : ReferencedNames)
886 NamePtr->eraseFromParent();
889 void InstrProfiling::emitRegistration() {
890 if (!needsRuntimeRegistrationOfSectionRange(*M))
891 return;
893 // Construct the function.
894 auto *VoidTy = Type::getVoidTy(M->getContext());
895 auto *VoidPtrTy = Type::getInt8PtrTy(M->getContext());
896 auto *Int64Ty = Type::getInt64Ty(M->getContext());
897 auto *RegisterFTy = FunctionType::get(VoidTy, false);
898 auto *RegisterF = Function::Create(RegisterFTy, GlobalValue::InternalLinkage,
899 getInstrProfRegFuncsName(), M);
900 RegisterF->setUnnamedAddr(GlobalValue::UnnamedAddr::Global);
901 if (Options.NoRedZone)
902 RegisterF->addFnAttr(Attribute::NoRedZone);
904 auto *RuntimeRegisterTy = FunctionType::get(VoidTy, VoidPtrTy, false);
905 auto *RuntimeRegisterF =
906 Function::Create(RuntimeRegisterTy, GlobalVariable::ExternalLinkage,
907 getInstrProfRegFuncName(), M);
909 IRBuilder<> IRB(BasicBlock::Create(M->getContext(), "", RegisterF));
910 for (Value *Data : UsedVars)
911 if (Data != NamesVar && !isa<Function>(Data))
912 IRB.CreateCall(RuntimeRegisterF, IRB.CreateBitCast(Data, VoidPtrTy));
914 if (NamesVar) {
915 Type *ParamTypes[] = {VoidPtrTy, Int64Ty};
916 auto *NamesRegisterTy =
917 FunctionType::get(VoidTy, makeArrayRef(ParamTypes), false);
918 auto *NamesRegisterF =
919 Function::Create(NamesRegisterTy, GlobalVariable::ExternalLinkage,
920 getInstrProfNamesRegFuncName(), M);
921 IRB.CreateCall(NamesRegisterF, {IRB.CreateBitCast(NamesVar, VoidPtrTy),
922 IRB.getInt64(NamesSize)});
925 IRB.CreateRetVoid();
928 bool InstrProfiling::emitRuntimeHook() {
929 // We expect the linker to be invoked with -u<hook_var> flag for linux,
930 // for which case there is no need to emit the user function.
931 if (Triple(M->getTargetTriple()).isOSLinux())
932 return false;
934 // If the module's provided its own runtime, we don't need to do anything.
935 if (M->getGlobalVariable(getInstrProfRuntimeHookVarName()))
936 return false;
938 // Declare an external variable that will pull in the runtime initialization.
939 auto *Int32Ty = Type::getInt32Ty(M->getContext());
940 auto *Var =
941 new GlobalVariable(*M, Int32Ty, false, GlobalValue::ExternalLinkage,
942 nullptr, getInstrProfRuntimeHookVarName());
944 // Make a function that uses it.
945 auto *User = Function::Create(FunctionType::get(Int32Ty, false),
946 GlobalValue::LinkOnceODRLinkage,
947 getInstrProfRuntimeHookVarUseFuncName(), M);
948 User->addFnAttr(Attribute::NoInline);
949 if (Options.NoRedZone)
950 User->addFnAttr(Attribute::NoRedZone);
951 User->setVisibility(GlobalValue::HiddenVisibility);
952 if (Triple(M->getTargetTriple()).supportsCOMDAT())
953 User->setComdat(M->getOrInsertComdat(User->getName()));
955 IRBuilder<> IRB(BasicBlock::Create(M->getContext(), "", User));
956 auto *Load = IRB.CreateLoad(Var);
957 IRB.CreateRet(Load);
959 // Mark the user variable as used so that it isn't stripped out.
960 UsedVars.push_back(User);
961 return true;
964 void InstrProfiling::emitUses() {
965 if (!UsedVars.empty())
966 appendToUsed(*M, UsedVars);
969 void InstrProfiling::emitInitialization() {
970 StringRef InstrProfileOutput = Options.InstrProfileOutput;
972 if (!InstrProfileOutput.empty()) {
973 // Create variable for profile name.
974 Constant *ProfileNameConst =
975 ConstantDataArray::getString(M->getContext(), InstrProfileOutput, true);
976 GlobalVariable *ProfileNameVar = new GlobalVariable(
977 *M, ProfileNameConst->getType(), true, GlobalValue::WeakAnyLinkage,
978 ProfileNameConst, INSTR_PROF_QUOTE(INSTR_PROF_PROFILE_NAME_VAR));
979 if (TT.supportsCOMDAT()) {
980 ProfileNameVar->setLinkage(GlobalValue::ExternalLinkage);
981 ProfileNameVar->setComdat(M->getOrInsertComdat(
982 StringRef(INSTR_PROF_QUOTE(INSTR_PROF_PROFILE_NAME_VAR))));
986 Constant *RegisterF = M->getFunction(getInstrProfRegFuncsName());
987 if (!RegisterF)
988 return;
990 // Create the initialization function.
991 auto *VoidTy = Type::getVoidTy(M->getContext());
992 auto *F = Function::Create(FunctionType::get(VoidTy, false),
993 GlobalValue::InternalLinkage,
994 getInstrProfInitFuncName(), M);
995 F->setUnnamedAddr(GlobalValue::UnnamedAddr::Global);
996 F->addFnAttr(Attribute::NoInline);
997 if (Options.NoRedZone)
998 F->addFnAttr(Attribute::NoRedZone);
1000 // Add the basic block and the necessary calls.
1001 IRBuilder<> IRB(BasicBlock::Create(M->getContext(), "", F));
1002 if (RegisterF)
1003 IRB.CreateCall(RegisterF, {});
1004 IRB.CreateRetVoid();
1006 appendToGlobalCtors(*M, F, 0);