[RISCV] Check isFixedLengthVector before calling getVectorNumElements in getSingleShu...
[llvm-project.git] / llvm / lib / Transforms / Instrumentation / MemProfiler.cpp
blob91c48338d0320893d55bebe6df32047454b5c702
1 //===- MemProfiler.cpp - memory allocation and access profiler ------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file is a part of MemProfiler. Memory accesses are instrumented
10 // to increment the access count held in a shadow memory location, or
11 // alternatively to call into the runtime. Memory intrinsic calls (memmove,
12 // memcpy, memset) are changed to call the memory profiling runtime version
13 // instead.
15 //===----------------------------------------------------------------------===//
17 #include "llvm/Transforms/Instrumentation/MemProfiler.h"
18 #include "llvm/ADT/SmallVector.h"
19 #include "llvm/ADT/Statistic.h"
20 #include "llvm/ADT/StringRef.h"
21 #include "llvm/Analysis/MemoryBuiltins.h"
22 #include "llvm/Analysis/MemoryProfileInfo.h"
23 #include "llvm/Analysis/TargetLibraryInfo.h"
24 #include "llvm/Analysis/ValueTracking.h"
25 #include "llvm/IR/Constant.h"
26 #include "llvm/IR/DataLayout.h"
27 #include "llvm/IR/DiagnosticInfo.h"
28 #include "llvm/IR/Function.h"
29 #include "llvm/IR/GlobalValue.h"
30 #include "llvm/IR/IRBuilder.h"
31 #include "llvm/IR/Instruction.h"
32 #include "llvm/IR/IntrinsicInst.h"
33 #include "llvm/IR/Module.h"
34 #include "llvm/IR/Type.h"
35 #include "llvm/IR/Value.h"
36 #include "llvm/ProfileData/InstrProf.h"
37 #include "llvm/ProfileData/InstrProfReader.h"
38 #include "llvm/Support/BLAKE3.h"
39 #include "llvm/Support/CommandLine.h"
40 #include "llvm/Support/Debug.h"
41 #include "llvm/Support/HashBuilder.h"
42 #include "llvm/Support/VirtualFileSystem.h"
43 #include "llvm/TargetParser/Triple.h"
44 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
45 #include "llvm/Transforms/Utils/LongestCommonSequence.h"
46 #include "llvm/Transforms/Utils/ModuleUtils.h"
47 #include <map>
48 #include <set>
50 using namespace llvm;
51 using namespace llvm::memprof;
53 #define DEBUG_TYPE "memprof"
55 namespace llvm {
56 extern cl::opt<bool> PGOWarnMissing;
57 extern cl::opt<bool> NoPGOWarnMismatch;
58 extern cl::opt<bool> NoPGOWarnMismatchComdatWeak;
59 } // namespace llvm
61 constexpr int LLVM_MEM_PROFILER_VERSION = 1;
63 // Size of memory mapped to a single shadow location.
64 constexpr uint64_t DefaultMemGranularity = 64;
66 // Size of memory mapped to a single histogram bucket.
67 constexpr uint64_t HistogramGranularity = 8;
69 // Scale from granularity down to shadow size.
70 constexpr uint64_t DefaultShadowScale = 3;
72 constexpr char MemProfModuleCtorName[] = "memprof.module_ctor";
73 constexpr uint64_t MemProfCtorAndDtorPriority = 1;
74 // On Emscripten, the system needs more than one priorities for constructors.
75 constexpr uint64_t MemProfEmscriptenCtorAndDtorPriority = 50;
76 constexpr char MemProfInitName[] = "__memprof_init";
77 constexpr char MemProfVersionCheckNamePrefix[] =
78 "__memprof_version_mismatch_check_v";
80 constexpr char MemProfShadowMemoryDynamicAddress[] =
81 "__memprof_shadow_memory_dynamic_address";
83 constexpr char MemProfFilenameVar[] = "__memprof_profile_filename";
85 constexpr char MemProfHistogramFlagVar[] = "__memprof_histogram";
87 // Command-line flags.
89 static cl::opt<bool> ClInsertVersionCheck(
90 "memprof-guard-against-version-mismatch",
91 cl::desc("Guard against compiler/runtime version mismatch."), cl::Hidden,
92 cl::init(true));
94 // This flag may need to be replaced with -f[no-]memprof-reads.
95 static cl::opt<bool> ClInstrumentReads("memprof-instrument-reads",
96 cl::desc("instrument read instructions"),
97 cl::Hidden, cl::init(true));
99 static cl::opt<bool>
100 ClInstrumentWrites("memprof-instrument-writes",
101 cl::desc("instrument write instructions"), cl::Hidden,
102 cl::init(true));
104 static cl::opt<bool> ClInstrumentAtomics(
105 "memprof-instrument-atomics",
106 cl::desc("instrument atomic instructions (rmw, cmpxchg)"), cl::Hidden,
107 cl::init(true));
109 static cl::opt<bool> ClUseCalls(
110 "memprof-use-callbacks",
111 cl::desc("Use callbacks instead of inline instrumentation sequences."),
112 cl::Hidden, cl::init(false));
114 static cl::opt<std::string>
115 ClMemoryAccessCallbackPrefix("memprof-memory-access-callback-prefix",
116 cl::desc("Prefix for memory access callbacks"),
117 cl::Hidden, cl::init("__memprof_"));
119 // These flags allow to change the shadow mapping.
120 // The shadow mapping looks like
121 // Shadow = ((Mem & mask) >> scale) + offset
123 static cl::opt<int> ClMappingScale("memprof-mapping-scale",
124 cl::desc("scale of memprof shadow mapping"),
125 cl::Hidden, cl::init(DefaultShadowScale));
127 static cl::opt<int>
128 ClMappingGranularity("memprof-mapping-granularity",
129 cl::desc("granularity of memprof shadow mapping"),
130 cl::Hidden, cl::init(DefaultMemGranularity));
132 static cl::opt<bool> ClStack("memprof-instrument-stack",
133 cl::desc("Instrument scalar stack variables"),
134 cl::Hidden, cl::init(false));
136 // Debug flags.
138 static cl::opt<int> ClDebug("memprof-debug", cl::desc("debug"), cl::Hidden,
139 cl::init(0));
141 static cl::opt<std::string> ClDebugFunc("memprof-debug-func", cl::Hidden,
142 cl::desc("Debug func"));
144 static cl::opt<int> ClDebugMin("memprof-debug-min", cl::desc("Debug min inst"),
145 cl::Hidden, cl::init(-1));
147 static cl::opt<int> ClDebugMax("memprof-debug-max", cl::desc("Debug max inst"),
148 cl::Hidden, cl::init(-1));
150 // By default disable matching of allocation profiles onto operator new that
151 // already explicitly pass a hot/cold hint, since we don't currently
152 // override these hints anyway.
153 static cl::opt<bool> ClMemProfMatchHotColdNew(
154 "memprof-match-hot-cold-new",
155 cl::desc(
156 "Match allocation profiles onto existing hot/cold operator new calls"),
157 cl::Hidden, cl::init(false));
159 static cl::opt<bool> ClHistogram("memprof-histogram",
160 cl::desc("Collect access count histograms"),
161 cl::Hidden, cl::init(false));
163 static cl::opt<bool>
164 ClPrintMemProfMatchInfo("memprof-print-match-info",
165 cl::desc("Print matching stats for each allocation "
166 "context in this module's profiles"),
167 cl::Hidden, cl::init(false));
169 static cl::opt<std::string>
170 MemprofRuntimeDefaultOptions("memprof-runtime-default-options",
171 cl::desc("The default memprof options"),
172 cl::Hidden, cl::init(""));
174 static cl::opt<bool>
175 SalvageStaleProfile("memprof-salvage-stale-profile",
176 cl::desc("Salvage stale MemProf profile"),
177 cl::init(false), cl::Hidden);
179 cl::opt<unsigned> MinClonedColdBytePercent(
180 "memprof-cloning-cold-threshold", cl::init(100), cl::Hidden,
181 cl::desc("Min percent of cold bytes to hint alloc cold during cloning"));
183 extern cl::opt<bool> MemProfReportHintedSizes;
185 static cl::opt<unsigned> MinMatchedColdBytePercent(
186 "memprof-matching-cold-threshold", cl::init(100), cl::Hidden,
187 cl::desc("Min percent of cold bytes matched to hint allocation cold"));
189 // Instrumentation statistics
190 STATISTIC(NumInstrumentedReads, "Number of instrumented reads");
191 STATISTIC(NumInstrumentedWrites, "Number of instrumented writes");
192 STATISTIC(NumSkippedStackReads, "Number of non-instrumented stack reads");
193 STATISTIC(NumSkippedStackWrites, "Number of non-instrumented stack writes");
195 // Matching statistics
196 STATISTIC(NumOfMemProfMissing, "Number of functions without memory profile.");
197 STATISTIC(NumOfMemProfMismatch,
198 "Number of functions having mismatched memory profile hash.");
199 STATISTIC(NumOfMemProfFunc, "Number of functions having valid memory profile.");
200 STATISTIC(NumOfMemProfAllocContextProfiles,
201 "Number of alloc contexts in memory profile.");
202 STATISTIC(NumOfMemProfCallSiteProfiles,
203 "Number of callsites in memory profile.");
204 STATISTIC(NumOfMemProfMatchedAllocContexts,
205 "Number of matched memory profile alloc contexts.");
206 STATISTIC(NumOfMemProfMatchedAllocs,
207 "Number of matched memory profile allocs.");
208 STATISTIC(NumOfMemProfMatchedCallSites,
209 "Number of matched memory profile callsites.");
211 namespace {
213 /// This struct defines the shadow mapping using the rule:
214 /// shadow = ((mem & mask) >> Scale) ADD DynamicShadowOffset.
215 struct ShadowMapping {
216 ShadowMapping() {
217 Scale = ClMappingScale;
218 Granularity = ClHistogram ? HistogramGranularity : ClMappingGranularity;
219 Mask = ~(Granularity - 1);
222 int Scale;
223 int Granularity;
224 uint64_t Mask; // Computed as ~(Granularity-1)
227 static uint64_t getCtorAndDtorPriority(Triple &TargetTriple) {
228 return TargetTriple.isOSEmscripten() ? MemProfEmscriptenCtorAndDtorPriority
229 : MemProfCtorAndDtorPriority;
232 struct InterestingMemoryAccess {
233 Value *Addr = nullptr;
234 bool IsWrite;
235 Type *AccessTy;
236 Value *MaybeMask = nullptr;
239 /// Instrument the code in module to profile memory accesses.
240 class MemProfiler {
241 public:
242 MemProfiler(Module &M) {
243 C = &(M.getContext());
244 LongSize = M.getDataLayout().getPointerSizeInBits();
245 IntptrTy = Type::getIntNTy(*C, LongSize);
246 PtrTy = PointerType::getUnqual(*C);
249 /// If it is an interesting memory access, populate information
250 /// about the access and return a InterestingMemoryAccess struct.
251 /// Otherwise return std::nullopt.
252 std::optional<InterestingMemoryAccess>
253 isInterestingMemoryAccess(Instruction *I) const;
255 void instrumentMop(Instruction *I, const DataLayout &DL,
256 InterestingMemoryAccess &Access);
257 void instrumentAddress(Instruction *OrigIns, Instruction *InsertBefore,
258 Value *Addr, bool IsWrite);
259 void instrumentMaskedLoadOrStore(const DataLayout &DL, Value *Mask,
260 Instruction *I, Value *Addr, Type *AccessTy,
261 bool IsWrite);
262 void instrumentMemIntrinsic(MemIntrinsic *MI);
263 Value *memToShadow(Value *Shadow, IRBuilder<> &IRB);
264 bool instrumentFunction(Function &F);
265 bool maybeInsertMemProfInitAtFunctionEntry(Function &F);
266 bool insertDynamicShadowAtFunctionEntry(Function &F);
268 private:
269 void initializeCallbacks(Module &M);
271 LLVMContext *C;
272 int LongSize;
273 Type *IntptrTy;
274 PointerType *PtrTy;
275 ShadowMapping Mapping;
277 // These arrays is indexed by AccessIsWrite
278 FunctionCallee MemProfMemoryAccessCallback[2];
280 FunctionCallee MemProfMemmove, MemProfMemcpy, MemProfMemset;
281 Value *DynamicShadowOffset = nullptr;
284 class ModuleMemProfiler {
285 public:
286 ModuleMemProfiler(Module &M) { TargetTriple = Triple(M.getTargetTriple()); }
288 bool instrumentModule(Module &);
290 private:
291 Triple TargetTriple;
292 ShadowMapping Mapping;
293 Function *MemProfCtorFunction = nullptr;
296 } // end anonymous namespace
298 MemProfilerPass::MemProfilerPass() = default;
300 PreservedAnalyses MemProfilerPass::run(Function &F,
301 AnalysisManager<Function> &AM) {
302 assert((!ClHistogram || ClMappingGranularity == DefaultMemGranularity) &&
303 "Memprof with histogram only supports default mapping granularity");
304 Module &M = *F.getParent();
305 MemProfiler Profiler(M);
306 if (Profiler.instrumentFunction(F))
307 return PreservedAnalyses::none();
308 return PreservedAnalyses::all();
311 ModuleMemProfilerPass::ModuleMemProfilerPass() = default;
313 PreservedAnalyses ModuleMemProfilerPass::run(Module &M,
314 AnalysisManager<Module> &AM) {
316 ModuleMemProfiler Profiler(M);
317 if (Profiler.instrumentModule(M))
318 return PreservedAnalyses::none();
319 return PreservedAnalyses::all();
322 Value *MemProfiler::memToShadow(Value *Shadow, IRBuilder<> &IRB) {
323 // (Shadow & mask) >> scale
324 Shadow = IRB.CreateAnd(Shadow, Mapping.Mask);
325 Shadow = IRB.CreateLShr(Shadow, Mapping.Scale);
326 // (Shadow >> scale) | offset
327 assert(DynamicShadowOffset);
328 return IRB.CreateAdd(Shadow, DynamicShadowOffset);
331 // Instrument memset/memmove/memcpy
332 void MemProfiler::instrumentMemIntrinsic(MemIntrinsic *MI) {
333 IRBuilder<> IRB(MI);
334 if (isa<MemTransferInst>(MI)) {
335 IRB.CreateCall(isa<MemMoveInst>(MI) ? MemProfMemmove : MemProfMemcpy,
336 {MI->getOperand(0), MI->getOperand(1),
337 IRB.CreateIntCast(MI->getOperand(2), IntptrTy, false)});
338 } else if (isa<MemSetInst>(MI)) {
339 IRB.CreateCall(
340 MemProfMemset,
341 {MI->getOperand(0),
342 IRB.CreateIntCast(MI->getOperand(1), IRB.getInt32Ty(), false),
343 IRB.CreateIntCast(MI->getOperand(2), IntptrTy, false)});
345 MI->eraseFromParent();
348 std::optional<InterestingMemoryAccess>
349 MemProfiler::isInterestingMemoryAccess(Instruction *I) const {
350 // Do not instrument the load fetching the dynamic shadow address.
351 if (DynamicShadowOffset == I)
352 return std::nullopt;
354 InterestingMemoryAccess Access;
356 if (LoadInst *LI = dyn_cast<LoadInst>(I)) {
357 if (!ClInstrumentReads)
358 return std::nullopt;
359 Access.IsWrite = false;
360 Access.AccessTy = LI->getType();
361 Access.Addr = LI->getPointerOperand();
362 } else if (StoreInst *SI = dyn_cast<StoreInst>(I)) {
363 if (!ClInstrumentWrites)
364 return std::nullopt;
365 Access.IsWrite = true;
366 Access.AccessTy = SI->getValueOperand()->getType();
367 Access.Addr = SI->getPointerOperand();
368 } else if (AtomicRMWInst *RMW = dyn_cast<AtomicRMWInst>(I)) {
369 if (!ClInstrumentAtomics)
370 return std::nullopt;
371 Access.IsWrite = true;
372 Access.AccessTy = RMW->getValOperand()->getType();
373 Access.Addr = RMW->getPointerOperand();
374 } else if (AtomicCmpXchgInst *XCHG = dyn_cast<AtomicCmpXchgInst>(I)) {
375 if (!ClInstrumentAtomics)
376 return std::nullopt;
377 Access.IsWrite = true;
378 Access.AccessTy = XCHG->getCompareOperand()->getType();
379 Access.Addr = XCHG->getPointerOperand();
380 } else if (auto *CI = dyn_cast<CallInst>(I)) {
381 auto *F = CI->getCalledFunction();
382 if (F && (F->getIntrinsicID() == Intrinsic::masked_load ||
383 F->getIntrinsicID() == Intrinsic::masked_store)) {
384 unsigned OpOffset = 0;
385 if (F->getIntrinsicID() == Intrinsic::masked_store) {
386 if (!ClInstrumentWrites)
387 return std::nullopt;
388 // Masked store has an initial operand for the value.
389 OpOffset = 1;
390 Access.AccessTy = CI->getArgOperand(0)->getType();
391 Access.IsWrite = true;
392 } else {
393 if (!ClInstrumentReads)
394 return std::nullopt;
395 Access.AccessTy = CI->getType();
396 Access.IsWrite = false;
399 auto *BasePtr = CI->getOperand(0 + OpOffset);
400 Access.MaybeMask = CI->getOperand(2 + OpOffset);
401 Access.Addr = BasePtr;
405 if (!Access.Addr)
406 return std::nullopt;
408 // Do not instrument accesses from different address spaces; we cannot deal
409 // with them.
410 Type *PtrTy = cast<PointerType>(Access.Addr->getType()->getScalarType());
411 if (PtrTy->getPointerAddressSpace() != 0)
412 return std::nullopt;
414 // Ignore swifterror addresses.
415 // swifterror memory addresses are mem2reg promoted by instruction
416 // selection. As such they cannot have regular uses like an instrumentation
417 // function and it makes no sense to track them as memory.
418 if (Access.Addr->isSwiftError())
419 return std::nullopt;
421 // Peel off GEPs and BitCasts.
422 auto *Addr = Access.Addr->stripInBoundsOffsets();
424 if (GlobalVariable *GV = dyn_cast<GlobalVariable>(Addr)) {
425 // Do not instrument PGO counter updates.
426 if (GV->hasSection()) {
427 StringRef SectionName = GV->getSection();
428 // Check if the global is in the PGO counters section.
429 auto OF = Triple(I->getModule()->getTargetTriple()).getObjectFormat();
430 if (SectionName.ends_with(
431 getInstrProfSectionName(IPSK_cnts, OF, /*AddSegmentInfo=*/false)))
432 return std::nullopt;
435 // Do not instrument accesses to LLVM internal variables.
436 if (GV->getName().starts_with("__llvm"))
437 return std::nullopt;
440 return Access;
443 void MemProfiler::instrumentMaskedLoadOrStore(const DataLayout &DL, Value *Mask,
444 Instruction *I, Value *Addr,
445 Type *AccessTy, bool IsWrite) {
446 auto *VTy = cast<FixedVectorType>(AccessTy);
447 unsigned Num = VTy->getNumElements();
448 auto *Zero = ConstantInt::get(IntptrTy, 0);
449 for (unsigned Idx = 0; Idx < Num; ++Idx) {
450 Value *InstrumentedAddress = nullptr;
451 Instruction *InsertBefore = I;
452 if (auto *Vector = dyn_cast<ConstantVector>(Mask)) {
453 // dyn_cast as we might get UndefValue
454 if (auto *Masked = dyn_cast<ConstantInt>(Vector->getOperand(Idx))) {
455 if (Masked->isZero())
456 // Mask is constant false, so no instrumentation needed.
457 continue;
458 // If we have a true or undef value, fall through to instrumentAddress.
459 // with InsertBefore == I
461 } else {
462 IRBuilder<> IRB(I);
463 Value *MaskElem = IRB.CreateExtractElement(Mask, Idx);
464 Instruction *ThenTerm = SplitBlockAndInsertIfThen(MaskElem, I, false);
465 InsertBefore = ThenTerm;
468 IRBuilder<> IRB(InsertBefore);
469 InstrumentedAddress =
470 IRB.CreateGEP(VTy, Addr, {Zero, ConstantInt::get(IntptrTy, Idx)});
471 instrumentAddress(I, InsertBefore, InstrumentedAddress, IsWrite);
475 void MemProfiler::instrumentMop(Instruction *I, const DataLayout &DL,
476 InterestingMemoryAccess &Access) {
477 // Skip instrumentation of stack accesses unless requested.
478 if (!ClStack && isa<AllocaInst>(getUnderlyingObject(Access.Addr))) {
479 if (Access.IsWrite)
480 ++NumSkippedStackWrites;
481 else
482 ++NumSkippedStackReads;
483 return;
486 if (Access.IsWrite)
487 NumInstrumentedWrites++;
488 else
489 NumInstrumentedReads++;
491 if (Access.MaybeMask) {
492 instrumentMaskedLoadOrStore(DL, Access.MaybeMask, I, Access.Addr,
493 Access.AccessTy, Access.IsWrite);
494 } else {
495 // Since the access counts will be accumulated across the entire allocation,
496 // we only update the shadow access count for the first location and thus
497 // don't need to worry about alignment and type size.
498 instrumentAddress(I, I, Access.Addr, Access.IsWrite);
502 void MemProfiler::instrumentAddress(Instruction *OrigIns,
503 Instruction *InsertBefore, Value *Addr,
504 bool IsWrite) {
505 IRBuilder<> IRB(InsertBefore);
506 Value *AddrLong = IRB.CreatePointerCast(Addr, IntptrTy);
508 if (ClUseCalls) {
509 IRB.CreateCall(MemProfMemoryAccessCallback[IsWrite], AddrLong);
510 return;
513 Type *ShadowTy = ClHistogram ? Type::getInt8Ty(*C) : Type::getInt64Ty(*C);
514 Type *ShadowPtrTy = PointerType::get(*C, 0);
516 Value *ShadowPtr = memToShadow(AddrLong, IRB);
517 Value *ShadowAddr = IRB.CreateIntToPtr(ShadowPtr, ShadowPtrTy);
518 Value *ShadowValue = IRB.CreateLoad(ShadowTy, ShadowAddr);
519 // If we are profiling with histograms, add overflow protection at 255.
520 if (ClHistogram) {
521 Value *MaxCount = ConstantInt::get(Type::getInt8Ty(*C), 255);
522 Value *Cmp = IRB.CreateICmpULT(ShadowValue, MaxCount);
523 Instruction *IncBlock =
524 SplitBlockAndInsertIfThen(Cmp, InsertBefore, /*Unreachable=*/false);
525 IRB.SetInsertPoint(IncBlock);
527 Value *Inc = ConstantInt::get(ShadowTy, 1);
528 ShadowValue = IRB.CreateAdd(ShadowValue, Inc);
529 IRB.CreateStore(ShadowValue, ShadowAddr);
532 // Create the variable for the profile file name.
533 void createProfileFileNameVar(Module &M) {
534 const MDString *MemProfFilename =
535 dyn_cast_or_null<MDString>(M.getModuleFlag("MemProfProfileFilename"));
536 if (!MemProfFilename)
537 return;
538 assert(!MemProfFilename->getString().empty() &&
539 "Unexpected MemProfProfileFilename metadata with empty string");
540 Constant *ProfileNameConst = ConstantDataArray::getString(
541 M.getContext(), MemProfFilename->getString(), true);
542 GlobalVariable *ProfileNameVar = new GlobalVariable(
543 M, ProfileNameConst->getType(), /*isConstant=*/true,
544 GlobalValue::WeakAnyLinkage, ProfileNameConst, MemProfFilenameVar);
545 Triple TT(M.getTargetTriple());
546 if (TT.supportsCOMDAT()) {
547 ProfileNameVar->setLinkage(GlobalValue::ExternalLinkage);
548 ProfileNameVar->setComdat(M.getOrInsertComdat(MemProfFilenameVar));
552 // Set MemprofHistogramFlag as a Global veriable in IR. This makes it accessible
553 // to the runtime, changing shadow count behavior.
554 void createMemprofHistogramFlagVar(Module &M) {
555 const StringRef VarName(MemProfHistogramFlagVar);
556 Type *IntTy1 = Type::getInt1Ty(M.getContext());
557 auto MemprofHistogramFlag = new GlobalVariable(
558 M, IntTy1, true, GlobalValue::WeakAnyLinkage,
559 Constant::getIntegerValue(IntTy1, APInt(1, ClHistogram)), VarName);
560 Triple TT(M.getTargetTriple());
561 if (TT.supportsCOMDAT()) {
562 MemprofHistogramFlag->setLinkage(GlobalValue::ExternalLinkage);
563 MemprofHistogramFlag->setComdat(M.getOrInsertComdat(VarName));
565 appendToCompilerUsed(M, MemprofHistogramFlag);
568 void createMemprofDefaultOptionsVar(Module &M) {
569 Constant *OptionsConst = ConstantDataArray::getString(
570 M.getContext(), MemprofRuntimeDefaultOptions, /*AddNull=*/true);
571 GlobalVariable *OptionsVar =
572 new GlobalVariable(M, OptionsConst->getType(), /*isConstant=*/true,
573 GlobalValue::WeakAnyLinkage, OptionsConst,
574 "__memprof_default_options_str");
575 Triple TT(M.getTargetTriple());
576 if (TT.supportsCOMDAT()) {
577 OptionsVar->setLinkage(GlobalValue::ExternalLinkage);
578 OptionsVar->setComdat(M.getOrInsertComdat(OptionsVar->getName()));
582 bool ModuleMemProfiler::instrumentModule(Module &M) {
584 // Create a module constructor.
585 std::string MemProfVersion = std::to_string(LLVM_MEM_PROFILER_VERSION);
586 std::string VersionCheckName =
587 ClInsertVersionCheck ? (MemProfVersionCheckNamePrefix + MemProfVersion)
588 : "";
589 std::tie(MemProfCtorFunction, std::ignore) =
590 createSanitizerCtorAndInitFunctions(M, MemProfModuleCtorName,
591 MemProfInitName, /*InitArgTypes=*/{},
592 /*InitArgs=*/{}, VersionCheckName);
594 const uint64_t Priority = getCtorAndDtorPriority(TargetTriple);
595 appendToGlobalCtors(M, MemProfCtorFunction, Priority);
597 createProfileFileNameVar(M);
599 createMemprofHistogramFlagVar(M);
601 createMemprofDefaultOptionsVar(M);
603 return true;
606 void MemProfiler::initializeCallbacks(Module &M) {
607 IRBuilder<> IRB(*C);
609 for (size_t AccessIsWrite = 0; AccessIsWrite <= 1; AccessIsWrite++) {
610 const std::string TypeStr = AccessIsWrite ? "store" : "load";
611 const std::string HistPrefix = ClHistogram ? "hist_" : "";
613 SmallVector<Type *, 2> Args1{1, IntptrTy};
614 MemProfMemoryAccessCallback[AccessIsWrite] = M.getOrInsertFunction(
615 ClMemoryAccessCallbackPrefix + HistPrefix + TypeStr,
616 FunctionType::get(IRB.getVoidTy(), Args1, false));
618 MemProfMemmove = M.getOrInsertFunction(
619 ClMemoryAccessCallbackPrefix + "memmove", PtrTy, PtrTy, PtrTy, IntptrTy);
620 MemProfMemcpy = M.getOrInsertFunction(ClMemoryAccessCallbackPrefix + "memcpy",
621 PtrTy, PtrTy, PtrTy, IntptrTy);
622 MemProfMemset =
623 M.getOrInsertFunction(ClMemoryAccessCallbackPrefix + "memset", PtrTy,
624 PtrTy, IRB.getInt32Ty(), IntptrTy);
627 bool MemProfiler::maybeInsertMemProfInitAtFunctionEntry(Function &F) {
628 // For each NSObject descendant having a +load method, this method is invoked
629 // by the ObjC runtime before any of the static constructors is called.
630 // Therefore we need to instrument such methods with a call to __memprof_init
631 // at the beginning in order to initialize our runtime before any access to
632 // the shadow memory.
633 // We cannot just ignore these methods, because they may call other
634 // instrumented functions.
635 if (F.getName().contains(" load]")) {
636 FunctionCallee MemProfInitFunction =
637 declareSanitizerInitFunction(*F.getParent(), MemProfInitName, {});
638 IRBuilder<> IRB(&F.front(), F.front().begin());
639 IRB.CreateCall(MemProfInitFunction, {});
640 return true;
642 return false;
645 bool MemProfiler::insertDynamicShadowAtFunctionEntry(Function &F) {
646 IRBuilder<> IRB(&F.front().front());
647 Value *GlobalDynamicAddress = F.getParent()->getOrInsertGlobal(
648 MemProfShadowMemoryDynamicAddress, IntptrTy);
649 if (F.getParent()->getPICLevel() == PICLevel::NotPIC)
650 cast<GlobalVariable>(GlobalDynamicAddress)->setDSOLocal(true);
651 DynamicShadowOffset = IRB.CreateLoad(IntptrTy, GlobalDynamicAddress);
652 return true;
655 bool MemProfiler::instrumentFunction(Function &F) {
656 if (F.getLinkage() == GlobalValue::AvailableExternallyLinkage)
657 return false;
658 if (ClDebugFunc == F.getName())
659 return false;
660 if (F.getName().starts_with("__memprof_"))
661 return false;
663 bool FunctionModified = false;
665 // If needed, insert __memprof_init.
666 // This function needs to be called even if the function body is not
667 // instrumented.
668 if (maybeInsertMemProfInitAtFunctionEntry(F))
669 FunctionModified = true;
671 LLVM_DEBUG(dbgs() << "MEMPROF instrumenting:\n" << F << "\n");
673 initializeCallbacks(*F.getParent());
675 SmallVector<Instruction *, 16> ToInstrument;
677 // Fill the set of memory operations to instrument.
678 for (auto &BB : F) {
679 for (auto &Inst : BB) {
680 if (isInterestingMemoryAccess(&Inst) || isa<MemIntrinsic>(Inst))
681 ToInstrument.push_back(&Inst);
685 if (ToInstrument.empty()) {
686 LLVM_DEBUG(dbgs() << "MEMPROF done instrumenting: " << FunctionModified
687 << " " << F << "\n");
689 return FunctionModified;
692 FunctionModified |= insertDynamicShadowAtFunctionEntry(F);
694 int NumInstrumented = 0;
695 for (auto *Inst : ToInstrument) {
696 if (ClDebugMin < 0 || ClDebugMax < 0 ||
697 (NumInstrumented >= ClDebugMin && NumInstrumented <= ClDebugMax)) {
698 std::optional<InterestingMemoryAccess> Access =
699 isInterestingMemoryAccess(Inst);
700 if (Access)
701 instrumentMop(Inst, F.getDataLayout(), *Access);
702 else
703 instrumentMemIntrinsic(cast<MemIntrinsic>(Inst));
705 NumInstrumented++;
708 if (NumInstrumented > 0)
709 FunctionModified = true;
711 LLVM_DEBUG(dbgs() << "MEMPROF done instrumenting: " << FunctionModified << " "
712 << F << "\n");
714 return FunctionModified;
717 static void addCallsiteMetadata(Instruction &I,
718 ArrayRef<uint64_t> InlinedCallStack,
719 LLVMContext &Ctx) {
720 I.setMetadata(LLVMContext::MD_callsite,
721 buildCallstackMetadata(InlinedCallStack, Ctx));
724 static uint64_t computeStackId(GlobalValue::GUID Function, uint32_t LineOffset,
725 uint32_t Column) {
726 llvm::HashBuilder<llvm::TruncatedBLAKE3<8>, llvm::endianness::little>
727 HashBuilder;
728 HashBuilder.add(Function, LineOffset, Column);
729 llvm::BLAKE3Result<8> Hash = HashBuilder.final();
730 uint64_t Id;
731 std::memcpy(&Id, Hash.data(), sizeof(Hash));
732 return Id;
735 static uint64_t computeStackId(const memprof::Frame &Frame) {
736 return computeStackId(Frame.Function, Frame.LineOffset, Frame.Column);
739 // Helper to generate a single hash id for a given callstack, used for emitting
740 // matching statistics and useful for uniquing such statistics across modules.
741 static uint64_t computeFullStackId(ArrayRef<Frame> CallStack) {
742 llvm::HashBuilder<llvm::TruncatedBLAKE3<8>, llvm::endianness::little>
743 HashBuilder;
744 for (auto &F : CallStack)
745 HashBuilder.add(F.Function, F.LineOffset, F.Column);
746 llvm::BLAKE3Result<8> Hash = HashBuilder.final();
747 uint64_t Id;
748 std::memcpy(&Id, Hash.data(), sizeof(Hash));
749 return Id;
752 static AllocationType addCallStack(CallStackTrie &AllocTrie,
753 const AllocationInfo *AllocInfo,
754 uint64_t FullStackId) {
755 SmallVector<uint64_t> StackIds;
756 for (const auto &StackFrame : AllocInfo->CallStack)
757 StackIds.push_back(computeStackId(StackFrame));
758 auto AllocType = getAllocType(AllocInfo->Info.getTotalLifetimeAccessDensity(),
759 AllocInfo->Info.getAllocCount(),
760 AllocInfo->Info.getTotalLifetime());
761 std::vector<ContextTotalSize> ContextSizeInfo;
762 if (MemProfReportHintedSizes || MinClonedColdBytePercent < 100) {
763 auto TotalSize = AllocInfo->Info.getTotalSize();
764 assert(TotalSize);
765 assert(FullStackId != 0);
766 ContextSizeInfo.push_back({FullStackId, TotalSize});
768 AllocTrie.addCallStack(AllocType, StackIds, std::move(ContextSizeInfo));
769 return AllocType;
772 // Helper to compare the InlinedCallStack computed from an instruction's debug
773 // info to a list of Frames from profile data (either the allocation data or a
774 // callsite). For callsites, the StartIndex to use in the Frame array may be
775 // non-zero.
776 static bool
777 stackFrameIncludesInlinedCallStack(ArrayRef<Frame> ProfileCallStack,
778 ArrayRef<uint64_t> InlinedCallStack) {
779 auto StackFrame = ProfileCallStack.begin();
780 auto InlCallStackIter = InlinedCallStack.begin();
781 for (; StackFrame != ProfileCallStack.end() &&
782 InlCallStackIter != InlinedCallStack.end();
783 ++StackFrame, ++InlCallStackIter) {
784 uint64_t StackId = computeStackId(*StackFrame);
785 if (StackId != *InlCallStackIter)
786 return false;
788 // Return true if we found and matched all stack ids from the call
789 // instruction.
790 return InlCallStackIter == InlinedCallStack.end();
793 static bool isAllocationWithHotColdVariant(const Function *Callee,
794 const TargetLibraryInfo &TLI) {
795 if (!Callee)
796 return false;
797 LibFunc Func;
798 if (!TLI.getLibFunc(*Callee, Func))
799 return false;
800 switch (Func) {
801 case LibFunc_Znwm:
802 case LibFunc_ZnwmRKSt9nothrow_t:
803 case LibFunc_ZnwmSt11align_val_t:
804 case LibFunc_ZnwmSt11align_val_tRKSt9nothrow_t:
805 case LibFunc_Znam:
806 case LibFunc_ZnamRKSt9nothrow_t:
807 case LibFunc_ZnamSt11align_val_t:
808 case LibFunc_ZnamSt11align_val_tRKSt9nothrow_t:
809 case LibFunc_size_returning_new:
810 case LibFunc_size_returning_new_aligned:
811 return true;
812 case LibFunc_Znwm12__hot_cold_t:
813 case LibFunc_ZnwmRKSt9nothrow_t12__hot_cold_t:
814 case LibFunc_ZnwmSt11align_val_t12__hot_cold_t:
815 case LibFunc_ZnwmSt11align_val_tRKSt9nothrow_t12__hot_cold_t:
816 case LibFunc_Znam12__hot_cold_t:
817 case LibFunc_ZnamRKSt9nothrow_t12__hot_cold_t:
818 case LibFunc_ZnamSt11align_val_t12__hot_cold_t:
819 case LibFunc_ZnamSt11align_val_tRKSt9nothrow_t12__hot_cold_t:
820 case LibFunc_size_returning_new_hot_cold:
821 case LibFunc_size_returning_new_aligned_hot_cold:
822 return ClMemProfMatchHotColdNew;
823 default:
824 return false;
828 struct AllocMatchInfo {
829 uint64_t TotalSize = 0;
830 AllocationType AllocType = AllocationType::None;
831 bool Matched = false;
834 DenseMap<uint64_t, SmallVector<CallEdgeTy, 0>>
835 memprof::extractCallsFromIR(Module &M, const TargetLibraryInfo &TLI,
836 function_ref<bool(uint64_t)> IsPresentInProfile) {
837 DenseMap<uint64_t, SmallVector<CallEdgeTy, 0>> Calls;
839 auto GetOffset = [](const DILocation *DIL) {
840 return (DIL->getLine() - DIL->getScope()->getSubprogram()->getLine()) &
841 0xffff;
844 for (Function &F : M) {
845 if (F.isDeclaration())
846 continue;
848 for (auto &BB : F) {
849 for (auto &I : BB) {
850 if (!isa<CallBase>(&I) || isa<IntrinsicInst>(&I))
851 continue;
853 auto *CB = dyn_cast<CallBase>(&I);
854 auto *CalledFunction = CB->getCalledFunction();
855 // Disregard indirect calls and intrinsics.
856 if (!CalledFunction || CalledFunction->isIntrinsic())
857 continue;
859 StringRef CalleeName = CalledFunction->getName();
860 // True if we are calling a heap allocation function that supports
861 // hot/cold variants.
862 bool IsAlloc = isAllocationWithHotColdVariant(CalledFunction, TLI);
863 // True for the first iteration below, indicating that we are looking at
864 // a leaf node.
865 bool IsLeaf = true;
866 for (const DILocation *DIL = I.getDebugLoc(); DIL;
867 DIL = DIL->getInlinedAt()) {
868 StringRef CallerName = DIL->getSubprogramLinkageName();
869 assert(!CallerName.empty() &&
870 "Be sure to enable -fdebug-info-for-profiling");
871 uint64_t CallerGUID = IndexedMemProfRecord::getGUID(CallerName);
872 uint64_t CalleeGUID = IndexedMemProfRecord::getGUID(CalleeName);
873 // Pretend that we are calling a function with GUID == 0 if we are
874 // in the inline stack leading to a heap allocation function.
875 if (IsAlloc) {
876 if (IsLeaf) {
877 // For leaf nodes, set CalleeGUID to 0 without consulting
878 // IsPresentInProfile.
879 CalleeGUID = 0;
880 } else if (!IsPresentInProfile(CalleeGUID)) {
881 // In addition to the leaf case above, continue to set CalleeGUID
882 // to 0 as long as we don't see CalleeGUID in the profile.
883 CalleeGUID = 0;
884 } else {
885 // Once we encounter a callee that exists in the profile, stop
886 // setting CalleeGUID to 0.
887 IsAlloc = false;
891 LineLocation Loc = {GetOffset(DIL), DIL->getColumn()};
892 Calls[CallerGUID].emplace_back(Loc, CalleeGUID);
893 CalleeName = CallerName;
894 IsLeaf = false;
900 // Sort each call list by the source location.
901 for (auto &[CallerGUID, CallList] : Calls) {
902 llvm::sort(CallList);
903 CallList.erase(llvm::unique(CallList), CallList.end());
906 return Calls;
909 DenseMap<uint64_t, LocToLocMap>
910 memprof::computeUndriftMap(Module &M, IndexedInstrProfReader *MemProfReader,
911 const TargetLibraryInfo &TLI) {
912 DenseMap<uint64_t, LocToLocMap> UndriftMaps;
914 DenseMap<uint64_t, SmallVector<memprof::CallEdgeTy, 0>> CallsFromProfile =
915 MemProfReader->getMemProfCallerCalleePairs();
916 DenseMap<uint64_t, SmallVector<memprof::CallEdgeTy, 0>> CallsFromIR =
917 extractCallsFromIR(M, TLI, [&](uint64_t GUID) {
918 return CallsFromProfile.contains(GUID);
921 // Compute an undrift map for each CallerGUID.
922 for (const auto &[CallerGUID, IRAnchors] : CallsFromIR) {
923 auto It = CallsFromProfile.find(CallerGUID);
924 if (It == CallsFromProfile.end())
925 continue;
926 const auto &ProfileAnchors = It->second;
928 LocToLocMap Matchings;
929 longestCommonSequence<LineLocation, GlobalValue::GUID>(
930 ProfileAnchors, IRAnchors, std::equal_to<GlobalValue::GUID>(),
931 [&](LineLocation A, LineLocation B) { Matchings.try_emplace(A, B); });
932 bool Inserted = UndriftMaps.try_emplace(CallerGUID, Matchings).second;
934 // The insertion must succeed because we visit each GUID exactly once.
935 assert(Inserted);
936 (void)Inserted;
939 return UndriftMaps;
942 // Given a MemProfRecord, undrift all the source locations present in the
943 // record in place.
944 static void
945 undriftMemProfRecord(const DenseMap<uint64_t, LocToLocMap> &UndriftMaps,
946 memprof::MemProfRecord &MemProfRec) {
947 // Undrift a call stack in place.
948 auto UndriftCallStack = [&](std::vector<Frame> &CallStack) {
949 for (auto &F : CallStack) {
950 auto I = UndriftMaps.find(F.Function);
951 if (I == UndriftMaps.end())
952 continue;
953 auto J = I->second.find(LineLocation(F.LineOffset, F.Column));
954 if (J == I->second.end())
955 continue;
956 auto &NewLoc = J->second;
957 F.LineOffset = NewLoc.LineOffset;
958 F.Column = NewLoc.Column;
962 for (auto &AS : MemProfRec.AllocSites)
963 UndriftCallStack(AS.CallStack);
965 for (auto &CS : MemProfRec.CallSites)
966 UndriftCallStack(CS);
969 static void
970 readMemprof(Module &M, Function &F, IndexedInstrProfReader *MemProfReader,
971 const TargetLibraryInfo &TLI,
972 std::map<uint64_t, AllocMatchInfo> &FullStackIdToAllocMatchInfo,
973 DenseMap<uint64_t, LocToLocMap> &UndriftMaps) {
974 auto &Ctx = M.getContext();
975 // Previously we used getIRPGOFuncName() here. If F is local linkage,
976 // getIRPGOFuncName() returns FuncName with prefix 'FileName;'. But
977 // llvm-profdata uses FuncName in dwarf to create GUID which doesn't
978 // contain FileName's prefix. It caused local linkage function can't
979 // find MemProfRecord. So we use getName() now.
980 // 'unique-internal-linkage-names' can make MemProf work better for local
981 // linkage function.
982 auto FuncName = F.getName();
983 auto FuncGUID = Function::getGUID(FuncName);
984 std::optional<memprof::MemProfRecord> MemProfRec;
985 auto Err = MemProfReader->getMemProfRecord(FuncGUID).moveInto(MemProfRec);
986 if (Err) {
987 handleAllErrors(std::move(Err), [&](const InstrProfError &IPE) {
988 auto Err = IPE.get();
989 bool SkipWarning = false;
990 LLVM_DEBUG(dbgs() << "Error in reading profile for Func " << FuncName
991 << ": ");
992 if (Err == instrprof_error::unknown_function) {
993 NumOfMemProfMissing++;
994 SkipWarning = !PGOWarnMissing;
995 LLVM_DEBUG(dbgs() << "unknown function");
996 } else if (Err == instrprof_error::hash_mismatch) {
997 NumOfMemProfMismatch++;
998 SkipWarning =
999 NoPGOWarnMismatch ||
1000 (NoPGOWarnMismatchComdatWeak &&
1001 (F.hasComdat() ||
1002 F.getLinkage() == GlobalValue::AvailableExternallyLinkage));
1003 LLVM_DEBUG(dbgs() << "hash mismatch (skip=" << SkipWarning << ")");
1006 if (SkipWarning)
1007 return;
1009 std::string Msg = (IPE.message() + Twine(" ") + F.getName().str() +
1010 Twine(" Hash = ") + std::to_string(FuncGUID))
1011 .str();
1013 Ctx.diagnose(
1014 DiagnosticInfoPGOProfile(M.getName().data(), Msg, DS_Warning));
1016 return;
1019 NumOfMemProfFunc++;
1021 // If requested, undrfit MemProfRecord so that the source locations in it
1022 // match those in the IR.
1023 if (SalvageStaleProfile)
1024 undriftMemProfRecord(UndriftMaps, *MemProfRec);
1026 // Detect if there are non-zero column numbers in the profile. If not,
1027 // treat all column numbers as 0 when matching (i.e. ignore any non-zero
1028 // columns in the IR). The profiled binary might have been built with
1029 // column numbers disabled, for example.
1030 bool ProfileHasColumns = false;
1032 // Build maps of the location hash to all profile data with that leaf location
1033 // (allocation info and the callsites).
1034 std::map<uint64_t, std::set<const AllocationInfo *>> LocHashToAllocInfo;
1035 // A hash function for std::unordered_set<ArrayRef<Frame>> to work.
1036 struct CallStackHash {
1037 size_t operator()(ArrayRef<Frame> CS) const {
1038 return computeFullStackId(CS);
1041 // For the callsites we need to record slices of the frame array (see comments
1042 // below where the map entries are added).
1043 std::map<uint64_t, std::unordered_set<ArrayRef<Frame>, CallStackHash>>
1044 LocHashToCallSites;
1045 for (auto &AI : MemProfRec->AllocSites) {
1046 NumOfMemProfAllocContextProfiles++;
1047 // Associate the allocation info with the leaf frame. The later matching
1048 // code will match any inlined call sequences in the IR with a longer prefix
1049 // of call stack frames.
1050 uint64_t StackId = computeStackId(AI.CallStack[0]);
1051 LocHashToAllocInfo[StackId].insert(&AI);
1052 ProfileHasColumns |= AI.CallStack[0].Column;
1054 for (auto &CS : MemProfRec->CallSites) {
1055 NumOfMemProfCallSiteProfiles++;
1056 // Need to record all frames from leaf up to and including this function,
1057 // as any of these may or may not have been inlined at this point.
1058 unsigned Idx = 0;
1059 for (auto &StackFrame : CS) {
1060 uint64_t StackId = computeStackId(StackFrame);
1061 LocHashToCallSites[StackId].insert(ArrayRef<Frame>(CS).drop_front(Idx++));
1062 ProfileHasColumns |= StackFrame.Column;
1063 // Once we find this function, we can stop recording.
1064 if (StackFrame.Function == FuncGUID)
1065 break;
1067 assert(Idx <= CS.size() && CS[Idx - 1].Function == FuncGUID);
1070 auto GetOffset = [](const DILocation *DIL) {
1071 return (DIL->getLine() - DIL->getScope()->getSubprogram()->getLine()) &
1072 0xffff;
1075 // Now walk the instructions, looking up the associated profile data using
1076 // debug locations.
1077 for (auto &BB : F) {
1078 for (auto &I : BB) {
1079 if (I.isDebugOrPseudoInst())
1080 continue;
1081 // We are only interested in calls (allocation or interior call stack
1082 // context calls).
1083 auto *CI = dyn_cast<CallBase>(&I);
1084 if (!CI)
1085 continue;
1086 auto *CalledFunction = CI->getCalledFunction();
1087 if (CalledFunction && CalledFunction->isIntrinsic())
1088 continue;
1089 // List of call stack ids computed from the location hashes on debug
1090 // locations (leaf to inlined at root).
1091 SmallVector<uint64_t, 8> InlinedCallStack;
1092 // Was the leaf location found in one of the profile maps?
1093 bool LeafFound = false;
1094 // If leaf was found in a map, iterators pointing to its location in both
1095 // of the maps. It might exist in neither, one, or both (the latter case
1096 // can happen because we don't currently have discriminators to
1097 // distinguish the case when a single line/col maps to both an allocation
1098 // and another callsite).
1099 auto AllocInfoIter = LocHashToAllocInfo.end();
1100 auto CallSitesIter = LocHashToCallSites.end();
1101 for (const DILocation *DIL = I.getDebugLoc(); DIL != nullptr;
1102 DIL = DIL->getInlinedAt()) {
1103 // Use C++ linkage name if possible. Need to compile with
1104 // -fdebug-info-for-profiling to get linkage name.
1105 StringRef Name = DIL->getScope()->getSubprogram()->getLinkageName();
1106 if (Name.empty())
1107 Name = DIL->getScope()->getSubprogram()->getName();
1108 auto CalleeGUID = Function::getGUID(Name);
1109 auto StackId = computeStackId(CalleeGUID, GetOffset(DIL),
1110 ProfileHasColumns ? DIL->getColumn() : 0);
1111 // Check if we have found the profile's leaf frame. If yes, collect
1112 // the rest of the call's inlined context starting here. If not, see if
1113 // we find a match further up the inlined context (in case the profile
1114 // was missing debug frames at the leaf).
1115 if (!LeafFound) {
1116 AllocInfoIter = LocHashToAllocInfo.find(StackId);
1117 CallSitesIter = LocHashToCallSites.find(StackId);
1118 if (AllocInfoIter != LocHashToAllocInfo.end() ||
1119 CallSitesIter != LocHashToCallSites.end())
1120 LeafFound = true;
1122 if (LeafFound)
1123 InlinedCallStack.push_back(StackId);
1125 // If leaf not in either of the maps, skip inst.
1126 if (!LeafFound)
1127 continue;
1129 // First add !memprof metadata from allocation info, if we found the
1130 // instruction's leaf location in that map, and if the rest of the
1131 // instruction's locations match the prefix Frame locations on an
1132 // allocation context with the same leaf.
1133 if (AllocInfoIter != LocHashToAllocInfo.end()) {
1134 // Only consider allocations which support hinting.
1135 if (!isAllocationWithHotColdVariant(CI->getCalledFunction(), TLI))
1136 continue;
1137 // We may match this instruction's location list to multiple MIB
1138 // contexts. Add them to a Trie specialized for trimming the contexts to
1139 // the minimal needed to disambiguate contexts with unique behavior.
1140 CallStackTrie AllocTrie;
1141 uint64_t TotalSize = 0;
1142 uint64_t TotalColdSize = 0;
1143 for (auto *AllocInfo : AllocInfoIter->second) {
1144 // Check the full inlined call stack against this one.
1145 // If we found and thus matched all frames on the call, include
1146 // this MIB.
1147 if (stackFrameIncludesInlinedCallStack(AllocInfo->CallStack,
1148 InlinedCallStack)) {
1149 NumOfMemProfMatchedAllocContexts++;
1150 uint64_t FullStackId = 0;
1151 if (ClPrintMemProfMatchInfo || MemProfReportHintedSizes ||
1152 MinClonedColdBytePercent < 100)
1153 FullStackId = computeFullStackId(AllocInfo->CallStack);
1154 auto AllocType = addCallStack(AllocTrie, AllocInfo, FullStackId);
1155 TotalSize += AllocInfo->Info.getTotalSize();
1156 if (AllocType == AllocationType::Cold)
1157 TotalColdSize += AllocInfo->Info.getTotalSize();
1158 // Record information about the allocation if match info printing
1159 // was requested.
1160 if (ClPrintMemProfMatchInfo) {
1161 assert(FullStackId != 0);
1162 FullStackIdToAllocMatchInfo[FullStackId] = {
1163 AllocInfo->Info.getTotalSize(), AllocType, /*Matched=*/true};
1167 // If the threshold for the percent of cold bytes is less than 100%,
1168 // and not all bytes are cold, see if we should still hint this
1169 // allocation as cold without context sensitivity.
1170 if (TotalColdSize < TotalSize && MinMatchedColdBytePercent < 100 &&
1171 TotalColdSize * 100 >= MinMatchedColdBytePercent * TotalSize) {
1172 AllocTrie.addSingleAllocTypeAttribute(CI, AllocationType::Cold,
1173 "dominant");
1174 continue;
1177 // We might not have matched any to the full inlined call stack.
1178 // But if we did, create and attach metadata, or a function attribute if
1179 // all contexts have identical profiled behavior.
1180 if (!AllocTrie.empty()) {
1181 NumOfMemProfMatchedAllocs++;
1182 // MemprofMDAttached will be false if a function attribute was
1183 // attached.
1184 bool MemprofMDAttached = AllocTrie.buildAndAttachMIBMetadata(CI);
1185 assert(MemprofMDAttached == I.hasMetadata(LLVMContext::MD_memprof));
1186 if (MemprofMDAttached) {
1187 // Add callsite metadata for the instruction's location list so that
1188 // it simpler later on to identify which part of the MIB contexts
1189 // are from this particular instruction (including during inlining,
1190 // when the callsite metadata will be updated appropriately).
1191 // FIXME: can this be changed to strip out the matching stack
1192 // context ids from the MIB contexts and not add any callsite
1193 // metadata here to save space?
1194 addCallsiteMetadata(I, InlinedCallStack, Ctx);
1197 continue;
1200 // Otherwise, add callsite metadata. If we reach here then we found the
1201 // instruction's leaf location in the callsites map and not the allocation
1202 // map.
1203 assert(CallSitesIter != LocHashToCallSites.end());
1204 for (auto CallStackIdx : CallSitesIter->second) {
1205 // If we found and thus matched all frames on the call, create and
1206 // attach call stack metadata.
1207 if (stackFrameIncludesInlinedCallStack(CallStackIdx,
1208 InlinedCallStack)) {
1209 NumOfMemProfMatchedCallSites++;
1210 addCallsiteMetadata(I, InlinedCallStack, Ctx);
1211 // Only need to find one with a matching call stack and add a single
1212 // callsite metadata.
1213 break;
1220 MemProfUsePass::MemProfUsePass(std::string MemoryProfileFile,
1221 IntrusiveRefCntPtr<vfs::FileSystem> FS)
1222 : MemoryProfileFileName(MemoryProfileFile), FS(FS) {
1223 if (!FS)
1224 this->FS = vfs::getRealFileSystem();
1227 PreservedAnalyses MemProfUsePass::run(Module &M, ModuleAnalysisManager &AM) {
1228 // Return immediately if the module doesn't contain any function.
1229 if (M.empty())
1230 return PreservedAnalyses::all();
1232 LLVM_DEBUG(dbgs() << "Read in memory profile:");
1233 auto &Ctx = M.getContext();
1234 auto ReaderOrErr = IndexedInstrProfReader::create(MemoryProfileFileName, *FS);
1235 if (Error E = ReaderOrErr.takeError()) {
1236 handleAllErrors(std::move(E), [&](const ErrorInfoBase &EI) {
1237 Ctx.diagnose(
1238 DiagnosticInfoPGOProfile(MemoryProfileFileName.data(), EI.message()));
1240 return PreservedAnalyses::all();
1243 std::unique_ptr<IndexedInstrProfReader> MemProfReader =
1244 std::move(ReaderOrErr.get());
1245 if (!MemProfReader) {
1246 Ctx.diagnose(DiagnosticInfoPGOProfile(
1247 MemoryProfileFileName.data(), StringRef("Cannot get MemProfReader")));
1248 return PreservedAnalyses::all();
1251 if (!MemProfReader->hasMemoryProfile()) {
1252 Ctx.diagnose(DiagnosticInfoPGOProfile(MemoryProfileFileName.data(),
1253 "Not a memory profile"));
1254 return PreservedAnalyses::all();
1257 auto &FAM = AM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager();
1259 TargetLibraryInfo &TLI = FAM.getResult<TargetLibraryAnalysis>(*M.begin());
1260 DenseMap<uint64_t, LocToLocMap> UndriftMaps;
1261 if (SalvageStaleProfile)
1262 UndriftMaps = computeUndriftMap(M, MemProfReader.get(), TLI);
1264 // Map from the stack has of each allocation context in the function profiles
1265 // to the total profiled size (bytes), allocation type, and whether we matched
1266 // it to an allocation in the IR.
1267 std::map<uint64_t, AllocMatchInfo> FullStackIdToAllocMatchInfo;
1269 for (auto &F : M) {
1270 if (F.isDeclaration())
1271 continue;
1273 const TargetLibraryInfo &TLI = FAM.getResult<TargetLibraryAnalysis>(F);
1274 readMemprof(M, F, MemProfReader.get(), TLI, FullStackIdToAllocMatchInfo,
1275 UndriftMaps);
1278 if (ClPrintMemProfMatchInfo) {
1279 for (const auto &[Id, Info] : FullStackIdToAllocMatchInfo)
1280 errs() << "MemProf " << getAllocTypeAttributeString(Info.AllocType)
1281 << " context with id " << Id << " has total profiled size "
1282 << Info.TotalSize << (Info.Matched ? " is" : " not")
1283 << " matched\n";
1286 return PreservedAnalyses::none();