[sanitizer] Improve FreeBSD ASLR detection
[llvm-project.git] / llvm / tools / llvm-profgen / ProfiledBinary.cpp
blob6dc0d2604367deaa517ebd320d990f6d13a6d324
1 //===-- ProfiledBinary.cpp - Binary decoder ---------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
9 #include "ProfiledBinary.h"
10 #include "ErrorHandling.h"
11 #include "ProfileGenerator.h"
12 #include "llvm/ADT/Triple.h"
13 #include "llvm/Demangle/Demangle.h"
14 #include "llvm/IR/DebugInfoMetadata.h"
15 #include "llvm/MC/TargetRegistry.h"
16 #include "llvm/Support/CommandLine.h"
17 #include "llvm/Support/Format.h"
18 #include "llvm/Support/TargetSelect.h"
20 #define DEBUG_TYPE "load-binary"
22 using namespace llvm;
23 using namespace sampleprof;
25 cl::opt<bool> ShowDisassemblyOnly("show-disassembly-only", cl::init(false),
26 cl::ZeroOrMore,
27 cl::desc("Print disassembled code."));
29 cl::opt<bool> ShowSourceLocations("show-source-locations", cl::init(false),
30 cl::ZeroOrMore,
31 cl::desc("Print source locations."));
33 static cl::opt<bool>
34 ShowCanonicalFnName("show-canonical-fname", cl::init(false), cl::ZeroOrMore,
35 cl::desc("Print canonical function name."));
37 static cl::opt<bool> ShowPseudoProbe(
38 "show-pseudo-probe", cl::init(false), cl::ZeroOrMore,
39 cl::desc("Print pseudo probe section and disassembled info."));
41 static cl::opt<bool> UseDwarfCorrelation(
42 "use-dwarf-correlation", cl::init(false), cl::ZeroOrMore,
43 cl::desc("Use dwarf for profile correlation even when binary contains "
44 "pseudo probe."));
46 static cl::list<std::string> DisassembleFunctions(
47 "disassemble-functions", cl::CommaSeparated,
48 cl::desc("List of functions to print disassembly for. Accept demangled "
49 "names only. Only work with show-disassembly-only"));
51 extern cl::opt<bool> ShowDetailedWarning;
53 namespace llvm {
54 namespace sampleprof {
56 static const Target *getTarget(const ObjectFile *Obj) {
57 Triple TheTriple = Obj->makeTriple();
58 std::string Error;
59 std::string ArchName;
60 const Target *TheTarget =
61 TargetRegistry::lookupTarget(ArchName, TheTriple, Error);
62 if (!TheTarget)
63 exitWithError(Error, Obj->getFileName());
64 return TheTarget;
67 void BinarySizeContextTracker::addInstructionForContext(
68 const SampleContextFrameVector &Context, uint32_t InstrSize) {
69 ContextTrieNode *CurNode = &RootContext;
70 bool IsLeaf = true;
71 for (const auto &Callsite : reverse(Context)) {
72 StringRef CallerName = Callsite.FuncName;
73 LineLocation CallsiteLoc = IsLeaf ? LineLocation(0, 0) : Callsite.Location;
74 CurNode = CurNode->getOrCreateChildContext(CallsiteLoc, CallerName);
75 IsLeaf = false;
78 CurNode->addFunctionSize(InstrSize);
81 uint32_t
82 BinarySizeContextTracker::getFuncSizeForContext(const SampleContext &Context) {
83 ContextTrieNode *CurrNode = &RootContext;
84 ContextTrieNode *PrevNode = nullptr;
85 SampleContextFrames Frames = Context.getContextFrames();
86 int32_t I = Frames.size() - 1;
87 Optional<uint32_t> Size;
89 // Start from top-level context-less function, traverse down the reverse
90 // context trie to find the best/longest match for given context, then
91 // retrieve the size.
93 while (CurrNode && I >= 0) {
94 // Process from leaf function to callers (added to context).
95 const auto &ChildFrame = Frames[I--];
96 PrevNode = CurrNode;
97 CurrNode =
98 CurrNode->getChildContext(ChildFrame.Location, ChildFrame.FuncName);
99 if (CurrNode && CurrNode->getFunctionSize().hasValue())
100 Size = CurrNode->getFunctionSize().getValue();
103 // If we traversed all nodes along the path of the context and haven't
104 // found a size yet, pivot to look for size from sibling nodes, i.e size
105 // of inlinee under different context.
106 if (!Size.hasValue()) {
107 if (!CurrNode)
108 CurrNode = PrevNode;
109 while (!Size.hasValue() && CurrNode &&
110 !CurrNode->getAllChildContext().empty()) {
111 CurrNode = &CurrNode->getAllChildContext().begin()->second;
112 if (CurrNode->getFunctionSize().hasValue())
113 Size = CurrNode->getFunctionSize().getValue();
117 assert(Size.hasValue() && "We should at least find one context size.");
118 return Size.getValue();
121 void BinarySizeContextTracker::trackInlineesOptimizedAway(
122 MCPseudoProbeDecoder &ProbeDecoder) {
123 ProbeFrameStack ProbeContext;
124 for (const auto &Child : ProbeDecoder.getDummyInlineRoot().getChildren())
125 trackInlineesOptimizedAway(ProbeDecoder, *Child.second.get(), ProbeContext);
128 void BinarySizeContextTracker::trackInlineesOptimizedAway(
129 MCPseudoProbeDecoder &ProbeDecoder,
130 MCDecodedPseudoProbeInlineTree &ProbeNode, ProbeFrameStack &ProbeContext) {
131 StringRef FuncName =
132 ProbeDecoder.getFuncDescForGUID(ProbeNode.Guid)->FuncName;
133 ProbeContext.emplace_back(FuncName, 0);
135 // This ProbeContext has a probe, so it has code before inlining and
136 // optimization. Make sure we mark its size as known.
137 if (!ProbeNode.getProbes().empty()) {
138 ContextTrieNode *SizeContext = &RootContext;
139 for (auto &ProbeFrame : reverse(ProbeContext)) {
140 StringRef CallerName = ProbeFrame.first;
141 LineLocation CallsiteLoc(ProbeFrame.second, 0);
142 SizeContext =
143 SizeContext->getOrCreateChildContext(CallsiteLoc, CallerName);
145 // Add 0 size to make known.
146 SizeContext->addFunctionSize(0);
149 // DFS down the probe inline tree
150 for (const auto &ChildNode : ProbeNode.getChildren()) {
151 InlineSite Location = ChildNode.first;
152 ProbeContext.back().second = std::get<1>(Location);
153 trackInlineesOptimizedAway(ProbeDecoder, *ChildNode.second.get(), ProbeContext);
156 ProbeContext.pop_back();
159 void ProfiledBinary::warnNoFuncEntry() {
160 uint64_t NoFuncEntryNum = 0;
161 for (auto &F : BinaryFunctions) {
162 if (F.second.Ranges.empty())
163 continue;
164 bool hasFuncEntry = false;
165 for (auto &R : F.second.Ranges) {
166 if (FuncRange *FR = findFuncRangeForStartOffset(R.first)) {
167 if (FR->IsFuncEntry) {
168 hasFuncEntry = true;
169 break;
174 if (!hasFuncEntry) {
175 NoFuncEntryNum++;
176 if (ShowDetailedWarning)
177 WithColor::warning()
178 << "Failed to determine function entry for " << F.first
179 << " due to inconsistent name from symbol table and dwarf info.\n";
182 emitWarningSummary(NoFuncEntryNum, BinaryFunctions.size(),
183 "of functions failed to determine function entry due to "
184 "inconsistent name from symbol table and dwarf info.");
187 void ProfiledBinary::load() {
188 // Attempt to open the binary.
189 OwningBinary<Binary> OBinary = unwrapOrError(createBinary(Path), Path);
190 Binary &Binary = *OBinary.getBinary();
192 auto *Obj = dyn_cast<ELFObjectFileBase>(&Binary);
193 if (!Obj)
194 exitWithError("not a valid Elf image", Path);
196 TheTriple = Obj->makeTriple();
197 // Current only support X86
198 if (!TheTriple.isX86())
199 exitWithError("unsupported target", TheTriple.getTriple());
200 LLVM_DEBUG(dbgs() << "Loading " << Path << "\n");
202 // Find the preferred load address for text sections.
203 setPreferredTextSegmentAddresses(Obj);
205 // Decode pseudo probe related section
206 decodePseudoProbe(Obj);
208 // Load debug info of subprograms from DWARF section.
209 loadSymbolsFromDWARF(*cast<ObjectFile>(&Binary));
211 // Disassemble the text sections.
212 disassemble(Obj);
214 // Track size for optimized inlinees when probe is available
215 if (UsePseudoProbes && TrackFuncContextSize)
216 FuncSizeTracker.trackInlineesOptimizedAway(ProbeDecoder);
218 // Use function start and return address to infer prolog and epilog
219 ProEpilogTracker.inferPrologOffsets(StartOffset2FuncRangeMap);
220 ProEpilogTracker.inferEpilogOffsets(RetOffsets);
222 warnNoFuncEntry();
224 // TODO: decode other sections.
227 bool ProfiledBinary::inlineContextEqual(uint64_t Address1, uint64_t Address2) {
228 uint64_t Offset1 = virtualAddrToOffset(Address1);
229 uint64_t Offset2 = virtualAddrToOffset(Address2);
230 const SampleContextFrameVector &Context1 = getFrameLocationStack(Offset1);
231 const SampleContextFrameVector &Context2 = getFrameLocationStack(Offset2);
232 if (Context1.size() != Context2.size())
233 return false;
234 if (Context1.empty())
235 return false;
236 // The leaf frame contains location within the leaf, and it
237 // needs to be remove that as it's not part of the calling context
238 return std::equal(Context1.begin(), Context1.begin() + Context1.size() - 1,
239 Context2.begin(), Context2.begin() + Context2.size() - 1);
242 SampleContextFrameVector
243 ProfiledBinary::getExpandedContext(const SmallVectorImpl<uint64_t> &Stack,
244 bool &WasLeafInlined) {
245 SampleContextFrameVector ContextVec;
246 // Process from frame root to leaf
247 for (auto Address : Stack) {
248 uint64_t Offset = virtualAddrToOffset(Address);
249 const SampleContextFrameVector &ExpandedContext =
250 getFrameLocationStack(Offset);
251 // An instruction without a valid debug line will be ignored by sample
252 // processing
253 if (ExpandedContext.empty())
254 return SampleContextFrameVector();
255 // Set WasLeafInlined to the size of inlined frame count for the last
256 // address which is leaf
257 WasLeafInlined = (ExpandedContext.size() > 1);
258 ContextVec.append(ExpandedContext);
261 // Replace with decoded base discriminator
262 for (auto &Frame : ContextVec) {
263 Frame.Location.Discriminator = ProfileGeneratorBase::getBaseDiscriminator(
264 Frame.Location.Discriminator, UseFSDiscriminator);
267 assert(ContextVec.size() && "Context length should be at least 1");
269 // Compress the context string except for the leaf frame
270 auto LeafFrame = ContextVec.back();
271 LeafFrame.Location = LineLocation(0, 0);
272 ContextVec.pop_back();
273 CSProfileGenerator::compressRecursionContext(ContextVec);
274 CSProfileGenerator::trimContext(ContextVec);
275 ContextVec.push_back(LeafFrame);
276 return ContextVec;
279 template <class ELFT>
280 void ProfiledBinary::setPreferredTextSegmentAddresses(const ELFFile<ELFT> &Obj, StringRef FileName) {
281 const auto &PhdrRange = unwrapOrError(Obj.program_headers(), FileName);
282 // FIXME: This should be the page size of the system running profiling.
283 // However such info isn't available at post-processing time, assuming
284 // 4K page now. Note that we don't use EXEC_PAGESIZE from <linux/param.h>
285 // because we may build the tools on non-linux.
286 uint32_t PageSize = 0x1000;
287 for (const typename ELFT::Phdr &Phdr : PhdrRange) {
288 if (Phdr.p_type == ELF::PT_LOAD) {
289 if (!FirstLoadableAddress)
290 FirstLoadableAddress = Phdr.p_vaddr & ~(PageSize - 1U);
291 if (Phdr.p_flags & ELF::PF_X) {
292 // Segments will always be loaded at a page boundary.
293 PreferredTextSegmentAddresses.push_back(Phdr.p_vaddr &
294 ~(PageSize - 1U));
295 TextSegmentOffsets.push_back(Phdr.p_offset & ~(PageSize - 1U));
300 if (PreferredTextSegmentAddresses.empty())
301 exitWithError("no executable segment found", FileName);
304 void ProfiledBinary::setPreferredTextSegmentAddresses(const ELFObjectFileBase *Obj) {
305 if (const auto *ELFObj = dyn_cast<ELF32LEObjectFile>(Obj))
306 setPreferredTextSegmentAddresses(ELFObj->getELFFile(), Obj->getFileName());
307 else if (const auto *ELFObj = dyn_cast<ELF32BEObjectFile>(Obj))
308 setPreferredTextSegmentAddresses(ELFObj->getELFFile(), Obj->getFileName());
309 else if (const auto *ELFObj = dyn_cast<ELF64LEObjectFile>(Obj))
310 setPreferredTextSegmentAddresses(ELFObj->getELFFile(), Obj->getFileName());
311 else if (const auto *ELFObj = cast<ELF64BEObjectFile>(Obj))
312 setPreferredTextSegmentAddresses(ELFObj->getELFFile(), Obj->getFileName());
313 else
314 llvm_unreachable("invalid ELF object format");
317 void ProfiledBinary::decodePseudoProbe(const ELFObjectFileBase *Obj) {
318 if (UseDwarfCorrelation)
319 return;
321 StringRef FileName = Obj->getFileName();
322 for (section_iterator SI = Obj->section_begin(), SE = Obj->section_end();
323 SI != SE; ++SI) {
324 const SectionRef &Section = *SI;
325 StringRef SectionName = unwrapOrError(Section.getName(), FileName);
327 if (SectionName == ".pseudo_probe_desc") {
328 StringRef Contents = unwrapOrError(Section.getContents(), FileName);
329 if (!ProbeDecoder.buildGUID2FuncDescMap(
330 reinterpret_cast<const uint8_t *>(Contents.data()),
331 Contents.size()))
332 exitWithError("Pseudo Probe decoder fail in .pseudo_probe_desc section");
333 } else if (SectionName == ".pseudo_probe") {
334 StringRef Contents = unwrapOrError(Section.getContents(), FileName);
335 if (!ProbeDecoder.buildAddress2ProbeMap(
336 reinterpret_cast<const uint8_t *>(Contents.data()),
337 Contents.size()))
338 exitWithError("Pseudo Probe decoder fail in .pseudo_probe section");
339 // set UsePseudoProbes flag, used for PerfReader
340 UsePseudoProbes = true;
344 if (ShowPseudoProbe)
345 ProbeDecoder.printGUID2FuncDescMap(outs());
348 void ProfiledBinary::setIsFuncEntry(uint64_t Offset, StringRef RangeSymName) {
349 // Note that the start offset of each ELF section can be a non-function
350 // symbol, we need to binary search for the start of a real function range.
351 auto *FuncRange = findFuncRangeForOffset(Offset);
352 // Skip external function symbol.
353 if (!FuncRange)
354 return;
356 // Set IsFuncEntry to ture if there is only one range in the function or the
357 // RangeSymName from ELF is equal to its DWARF-based function name.
358 if (FuncRange->Func->Ranges.size() == 1 ||
359 (!FuncRange->IsFuncEntry && FuncRange->getFuncName() == RangeSymName))
360 FuncRange->IsFuncEntry = true;
363 bool ProfiledBinary::dissassembleSymbol(std::size_t SI, ArrayRef<uint8_t> Bytes,
364 SectionSymbolsTy &Symbols,
365 const SectionRef &Section) {
366 std::size_t SE = Symbols.size();
367 uint64_t SectionOffset = Section.getAddress() - getPreferredBaseAddress();
368 uint64_t SectSize = Section.getSize();
369 uint64_t StartOffset = Symbols[SI].Addr - getPreferredBaseAddress();
370 uint64_t NextStartOffset =
371 (SI + 1 < SE) ? Symbols[SI + 1].Addr - getPreferredBaseAddress()
372 : SectionOffset + SectSize;
373 setIsFuncEntry(StartOffset,
374 FunctionSamples::getCanonicalFnName(Symbols[SI].Name));
376 StringRef SymbolName =
377 ShowCanonicalFnName
378 ? FunctionSamples::getCanonicalFnName(Symbols[SI].Name)
379 : Symbols[SI].Name;
380 bool ShowDisassembly =
381 ShowDisassemblyOnly && (DisassembleFunctionSet.empty() ||
382 DisassembleFunctionSet.count(SymbolName));
383 if (ShowDisassembly)
384 outs() << '<' << SymbolName << ">:\n";
386 auto WarnInvalidInsts = [](uint64_t Start, uint64_t End) {
387 WithColor::warning() << "Invalid instructions at "
388 << format("%8" PRIx64, Start) << " - "
389 << format("%8" PRIx64, End) << "\n";
392 uint64_t Offset = StartOffset;
393 // Size of a consecutive invalid instruction range starting from Offset -1
394 // backwards.
395 uint64_t InvalidInstLength = 0;
396 while (Offset < NextStartOffset) {
397 MCInst Inst;
398 uint64_t Size;
399 // Disassemble an instruction.
400 bool Disassembled =
401 DisAsm->getInstruction(Inst, Size, Bytes.slice(Offset - SectionOffset),
402 Offset + getPreferredBaseAddress(), nulls());
403 if (Size == 0)
404 Size = 1;
406 if (ShowDisassembly) {
407 if (ShowPseudoProbe) {
408 ProbeDecoder.printProbeForAddress(outs(),
409 Offset + getPreferredBaseAddress());
411 outs() << format("%8" PRIx64 ":", Offset + getPreferredBaseAddress());
412 size_t Start = outs().tell();
413 if (Disassembled)
414 IPrinter->printInst(&Inst, Offset + Size, "", *STI.get(), outs());
415 else
416 outs() << "\t<unknown>";
417 if (ShowSourceLocations) {
418 unsigned Cur = outs().tell() - Start;
419 if (Cur < 40)
420 outs().indent(40 - Cur);
421 InstructionPointer IP(this, Offset);
422 outs() << getReversedLocWithContext(
423 symbolize(IP, ShowCanonicalFnName, ShowPseudoProbe));
425 outs() << "\n";
428 if (Disassembled) {
429 const MCInstrDesc &MCDesc = MII->get(Inst.getOpcode());
431 // Record instruction size.
432 Offset2InstSizeMap[Offset] = Size;
434 // Populate address maps.
435 CodeAddrOffsets.push_back(Offset);
436 if (MCDesc.isCall())
437 CallOffsets.insert(Offset);
438 else if (MCDesc.isReturn())
439 RetOffsets.insert(Offset);
440 else if (MCDesc.isBranch())
441 BranchOffsets.insert(Offset);
443 if (InvalidInstLength) {
444 WarnInvalidInsts(Offset - InvalidInstLength, Offset - 1);
445 InvalidInstLength = 0;
447 } else {
448 InvalidInstLength += Size;
451 Offset += Size;
454 if (InvalidInstLength)
455 WarnInvalidInsts(Offset - InvalidInstLength, Offset - 1);
457 if (ShowDisassembly)
458 outs() << "\n";
460 return true;
463 void ProfiledBinary::setUpDisassembler(const ELFObjectFileBase *Obj) {
464 const Target *TheTarget = getTarget(Obj);
465 std::string TripleName = TheTriple.getTriple();
466 StringRef FileName = Obj->getFileName();
468 MRI.reset(TheTarget->createMCRegInfo(TripleName));
469 if (!MRI)
470 exitWithError("no register info for target " + TripleName, FileName);
472 MCTargetOptions MCOptions;
473 AsmInfo.reset(TheTarget->createMCAsmInfo(*MRI, TripleName, MCOptions));
474 if (!AsmInfo)
475 exitWithError("no assembly info for target " + TripleName, FileName);
477 SubtargetFeatures Features = Obj->getFeatures();
478 STI.reset(
479 TheTarget->createMCSubtargetInfo(TripleName, "", Features.getString()));
480 if (!STI)
481 exitWithError("no subtarget info for target " + TripleName, FileName);
483 MII.reset(TheTarget->createMCInstrInfo());
484 if (!MII)
485 exitWithError("no instruction info for target " + TripleName, FileName);
487 MCContext Ctx(Triple(TripleName), AsmInfo.get(), MRI.get(), STI.get());
488 std::unique_ptr<MCObjectFileInfo> MOFI(
489 TheTarget->createMCObjectFileInfo(Ctx, /*PIC=*/false));
490 Ctx.setObjectFileInfo(MOFI.get());
491 DisAsm.reset(TheTarget->createMCDisassembler(*STI, Ctx));
492 if (!DisAsm)
493 exitWithError("no disassembler for target " + TripleName, FileName);
495 MIA.reset(TheTarget->createMCInstrAnalysis(MII.get()));
497 int AsmPrinterVariant = AsmInfo->getAssemblerDialect();
498 IPrinter.reset(TheTarget->createMCInstPrinter(
499 Triple(TripleName), AsmPrinterVariant, *AsmInfo, *MII, *MRI));
500 IPrinter->setPrintBranchImmAsAddress(true);
503 void ProfiledBinary::disassemble(const ELFObjectFileBase *Obj) {
504 // Set up disassembler and related components.
505 setUpDisassembler(Obj);
507 // Create a mapping from virtual address to symbol name. The symbols in text
508 // sections are the candidates to dissassemble.
509 std::map<SectionRef, SectionSymbolsTy> AllSymbols;
510 StringRef FileName = Obj->getFileName();
511 for (const SymbolRef &Symbol : Obj->symbols()) {
512 const uint64_t Addr = unwrapOrError(Symbol.getAddress(), FileName);
513 const StringRef Name = unwrapOrError(Symbol.getName(), FileName);
514 section_iterator SecI = unwrapOrError(Symbol.getSection(), FileName);
515 if (SecI != Obj->section_end())
516 AllSymbols[*SecI].push_back(SymbolInfoTy(Addr, Name, ELF::STT_NOTYPE));
519 // Sort all the symbols. Use a stable sort to stabilize the output.
520 for (std::pair<const SectionRef, SectionSymbolsTy> &SecSyms : AllSymbols)
521 stable_sort(SecSyms.second);
523 DisassembleFunctionSet.insert(DisassembleFunctions.begin(),
524 DisassembleFunctions.end());
525 assert((DisassembleFunctionSet.empty() || ShowDisassemblyOnly) &&
526 "Functions to disassemble should be only specified together with "
527 "--show-disassembly-only");
529 if (ShowDisassemblyOnly)
530 outs() << "\nDisassembly of " << FileName << ":\n";
532 // Dissassemble a text section.
533 for (section_iterator SI = Obj->section_begin(), SE = Obj->section_end();
534 SI != SE; ++SI) {
535 const SectionRef &Section = *SI;
536 if (!Section.isText())
537 continue;
539 uint64_t ImageLoadAddr = getPreferredBaseAddress();
540 uint64_t SectionOffset = Section.getAddress() - ImageLoadAddr;
541 uint64_t SectSize = Section.getSize();
542 if (!SectSize)
543 continue;
545 // Register the text section.
546 TextSections.insert({SectionOffset, SectSize});
548 StringRef SectionName = unwrapOrError(Section.getName(), FileName);
550 if (ShowDisassemblyOnly) {
551 outs() << "\nDisassembly of section " << SectionName;
552 outs() << " [" << format("0x%" PRIx64, Section.getAddress()) << ", "
553 << format("0x%" PRIx64, Section.getAddress() + SectSize)
554 << "]:\n\n";
557 if (SectionName == ".plt")
558 continue;
560 // Get the section data.
561 ArrayRef<uint8_t> Bytes =
562 arrayRefFromStringRef(unwrapOrError(Section.getContents(), FileName));
564 // Get the list of all the symbols in this section.
565 SectionSymbolsTy &Symbols = AllSymbols[Section];
567 // Disassemble symbol by symbol.
568 for (std::size_t SI = 0, SE = Symbols.size(); SI != SE; ++SI) {
569 if (!dissassembleSymbol(SI, Bytes, Symbols, Section))
570 exitWithError("disassembling error", FileName);
574 // Dissassemble rodata section to check if FS discriminator symbol exists.
575 checkUseFSDiscriminator(Obj, AllSymbols);
578 void ProfiledBinary::checkUseFSDiscriminator(
579 const ELFObjectFileBase *Obj,
580 std::map<SectionRef, SectionSymbolsTy> &AllSymbols) {
581 const char *FSDiscriminatorVar = "__llvm_fs_discriminator__";
582 for (section_iterator SI = Obj->section_begin(), SE = Obj->section_end();
583 SI != SE; ++SI) {
584 const SectionRef &Section = *SI;
585 if (!Section.isData() || Section.getSize() == 0)
586 continue;
587 SectionSymbolsTy &Symbols = AllSymbols[Section];
589 for (std::size_t SI = 0, SE = Symbols.size(); SI != SE; ++SI) {
590 if (Symbols[SI].Name == FSDiscriminatorVar) {
591 UseFSDiscriminator = true;
592 return;
598 void ProfiledBinary::loadSymbolsFromDWARF(ObjectFile &Obj) {
599 auto DebugContext = llvm::DWARFContext::create(Obj);
600 if (!DebugContext)
601 exitWithError("Misssing debug info.", Path);
603 for (const auto &CompilationUnit : DebugContext->compile_units()) {
604 for (const auto &DieInfo : CompilationUnit->dies()) {
605 llvm::DWARFDie Die(CompilationUnit.get(), &DieInfo);
607 if (!Die.isSubprogramDIE())
608 continue;
609 auto Name = Die.getName(llvm::DINameKind::LinkageName);
610 if (!Name)
611 Name = Die.getName(llvm::DINameKind::ShortName);
612 if (!Name)
613 continue;
615 auto RangesOrError = Die.getAddressRanges();
616 if (!RangesOrError)
617 continue;
618 const DWARFAddressRangesVector &Ranges = RangesOrError.get();
620 if (Ranges.empty())
621 continue;
623 // Different DWARF symbols can have same function name, search or create
624 // BinaryFunction indexed by the name.
625 auto Ret = BinaryFunctions.emplace(Name, BinaryFunction());
626 auto &Func = Ret.first->second;
627 if (Ret.second)
628 Func.FuncName = Ret.first->first;
630 for (const auto &Range : Ranges) {
631 uint64_t FuncStart = Range.LowPC;
632 uint64_t FuncSize = Range.HighPC - FuncStart;
634 if (FuncSize == 0 || FuncStart < getPreferredBaseAddress())
635 continue;
637 uint64_t StartOffset = FuncStart - getPreferredBaseAddress();
638 uint64_t EndOffset = Range.HighPC - getPreferredBaseAddress();
640 // We may want to know all ranges for one function. Here group the
641 // ranges and store them into BinaryFunction.
642 Func.Ranges.emplace_back(StartOffset, EndOffset);
644 auto R = StartOffset2FuncRangeMap.emplace(StartOffset, FuncRange());
645 if (R.second) {
646 FuncRange &FRange = R.first->second;
647 FRange.Func = &Func;
648 FRange.StartOffset = StartOffset;
649 FRange.EndOffset = EndOffset;
650 } else {
651 WithColor::warning()
652 << "Duplicated symbol start address at "
653 << format("%8" PRIx64, StartOffset + getPreferredBaseAddress())
654 << " " << R.first->second.getFuncName() << " and " << Name
655 << "\n";
660 assert(!StartOffset2FuncRangeMap.empty() && "Misssing debug info.");
663 void ProfiledBinary::populateSymbolListFromDWARF(
664 ProfileSymbolList &SymbolList) {
665 for (auto &I : StartOffset2FuncRangeMap)
666 SymbolList.add(I.second.getFuncName());
669 void ProfiledBinary::setupSymbolizer() {
670 symbolize::LLVMSymbolizer::Options SymbolizerOpts;
671 SymbolizerOpts.PrintFunctions =
672 DILineInfoSpecifier::FunctionNameKind::LinkageName;
673 SymbolizerOpts.Demangle = false;
674 SymbolizerOpts.DefaultArch = TheTriple.getArchName().str();
675 SymbolizerOpts.UseSymbolTable = false;
676 SymbolizerOpts.RelativeAddresses = false;
677 Symbolizer = std::make_unique<symbolize::LLVMSymbolizer>(SymbolizerOpts);
680 SampleContextFrameVector ProfiledBinary::symbolize(const InstructionPointer &IP,
681 bool UseCanonicalFnName,
682 bool UseProbeDiscriminator) {
683 assert(this == IP.Binary &&
684 "Binary should only symbolize its own instruction");
685 auto Addr = object::SectionedAddress{IP.Offset + getPreferredBaseAddress(),
686 object::SectionedAddress::UndefSection};
687 DIInliningInfo InlineStack =
688 unwrapOrError(Symbolizer->symbolizeInlinedCode(Path, Addr), getName());
690 SampleContextFrameVector CallStack;
691 for (int32_t I = InlineStack.getNumberOfFrames() - 1; I >= 0; I--) {
692 const auto &CallerFrame = InlineStack.getFrame(I);
693 if (CallerFrame.FunctionName == "<invalid>")
694 break;
696 StringRef FunctionName(CallerFrame.FunctionName);
697 if (UseCanonicalFnName)
698 FunctionName = FunctionSamples::getCanonicalFnName(FunctionName);
700 uint32_t Discriminator = CallerFrame.Discriminator;
701 uint32_t LineOffset = (CallerFrame.Line - CallerFrame.StartLine) & 0xffff;
702 if (UseProbeDiscriminator) {
703 LineOffset =
704 PseudoProbeDwarfDiscriminator::extractProbeIndex(Discriminator);
705 Discriminator = 0;
708 LineLocation Line(LineOffset, Discriminator);
709 auto It = NameStrings.insert(FunctionName.str());
710 CallStack.emplace_back(*It.first, Line);
713 return CallStack;
716 void ProfiledBinary::computeInlinedContextSizeForRange(uint64_t StartOffset,
717 uint64_t EndOffset) {
718 uint64_t RangeBegin = offsetToVirtualAddr(StartOffset);
719 uint64_t RangeEnd = offsetToVirtualAddr(EndOffset);
720 InstructionPointer IP(this, RangeBegin, true);
722 if (IP.Address != RangeBegin)
723 WithColor::warning() << "Invalid start instruction at "
724 << format("%8" PRIx64, RangeBegin) << "\n";
726 if (IP.Address >= RangeEnd)
727 return;
729 do {
730 uint64_t Offset = virtualAddrToOffset(IP.Address);
731 const SampleContextFrameVector &SymbolizedCallStack =
732 getFrameLocationStack(Offset, UsePseudoProbes);
733 uint64_t Size = Offset2InstSizeMap[Offset];
735 // Record instruction size for the corresponding context
736 FuncSizeTracker.addInstructionForContext(SymbolizedCallStack, Size);
738 } while (IP.advance() && IP.Address < RangeEnd);
741 InstructionPointer::InstructionPointer(const ProfiledBinary *Binary,
742 uint64_t Address, bool RoundToNext)
743 : Binary(Binary), Address(Address) {
744 Index = Binary->getIndexForAddr(Address);
745 if (RoundToNext) {
746 // we might get address which is not the code
747 // it should round to the next valid address
748 if (Index >= Binary->getCodeOffsetsSize())
749 this->Address = UINT64_MAX;
750 else
751 this->Address = Binary->getAddressforIndex(Index);
755 bool InstructionPointer::advance() {
756 Index++;
757 if (Index >= Binary->getCodeOffsetsSize()) {
758 Address = UINT64_MAX;
759 return false;
761 Address = Binary->getAddressforIndex(Index);
762 return true;
765 bool InstructionPointer::backward() {
766 if (Index == 0) {
767 Address = 0;
768 return false;
770 Index--;
771 Address = Binary->getAddressforIndex(Index);
772 return true;
775 void InstructionPointer::update(uint64_t Addr) {
776 Address = Addr;
777 Index = Binary->getIndexForAddr(Address);
780 } // end namespace sampleprof
781 } // end namespace llvm