1 //===-- ProfiledBinary.h - Binary decoder -----------------------*- C++ -*-===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 #ifndef LLVM_TOOLS_LLVM_PROFGEN_PROFILEDBINARY_H
10 #define LLVM_TOOLS_LLVM_PROFGEN_PROFILEDBINARY_H
12 #include "CallContext.h"
13 #include "ErrorHandling.h"
14 #include "llvm/ADT/DenseMap.h"
15 #include "llvm/ADT/StringRef.h"
16 #include "llvm/ADT/StringSet.h"
17 #include "llvm/DebugInfo/DWARF/DWARFContext.h"
18 #include "llvm/DebugInfo/Symbolize/Symbolize.h"
19 #include "llvm/MC/MCAsmInfo.h"
20 #include "llvm/MC/MCContext.h"
21 #include "llvm/MC/MCDisassembler/MCDisassembler.h"
22 #include "llvm/MC/MCInst.h"
23 #include "llvm/MC/MCInstPrinter.h"
24 #include "llvm/MC/MCInstrAnalysis.h"
25 #include "llvm/MC/MCInstrInfo.h"
26 #include "llvm/MC/MCObjectFileInfo.h"
27 #include "llvm/MC/MCPseudoProbe.h"
28 #include "llvm/MC/MCRegisterInfo.h"
29 #include "llvm/MC/MCSubtargetInfo.h"
30 #include "llvm/MC/MCTargetOptions.h"
31 #include "llvm/Object/ELFObjectFile.h"
32 #include "llvm/ProfileData/SampleProf.h"
33 #include "llvm/Support/CommandLine.h"
34 #include "llvm/Support/Path.h"
35 #include "llvm/Transforms/IPO/SampleContextTracker.h"
41 #include <unordered_map>
42 #include <unordered_set>
46 extern cl::opt
<bool> EnableCSPreInliner
;
47 extern cl::opt
<bool> UseContextCostForPreInliner
;
51 using namespace sampleprof
;
52 using namespace llvm::object
;
55 namespace sampleprof
{
58 class MissingFrameInferrer
;
60 struct InstructionPointer
{
61 const ProfiledBinary
*Binary
;
62 // Address of the executable segment of the binary.
64 // Index to the sorted code address array of the binary.
66 InstructionPointer(const ProfiledBinary
*Binary
, uint64_t Address
,
67 bool RoundToNext
= false);
70 void update(uint64_t Addr
);
73 // The special frame addresses.
74 enum SpecialFrameAddr
{
75 // Dummy root of frame trie.
77 // Represent all the addresses outside of current binary.
78 // This's also used to indicate the call stack should be truncated since this
79 // isn't a real call context the compiler will see.
83 using RangesTy
= std::vector
<std::pair
<uint64_t, uint64_t>>;
85 struct BinaryFunction
{
87 // End of range is an exclusive bound.
90 uint64_t getFuncSize() {
92 for (auto &R
: Ranges
) {
93 Sum
+= R
.second
- R
.first
;
99 // Info about function range. A function can be split into multiple
100 // non-continuous ranges, each range corresponds to one FuncRange.
102 uint64_t StartAddress
;
103 // EndAddress is an exclusive bound.
105 // Function the range belongs to
106 BinaryFunction
*Func
;
107 // Whether the start address is the real entry of the function.
108 bool IsFuncEntry
= false;
110 StringRef
getFuncName() { return Func
->FuncName
; }
113 // PrologEpilog address tracker, used to filter out broken stack samples
114 // Currently we use a heuristic size (two) to infer prolog and epilog
115 // based on the start address and return address. In the future,
116 // we will switch to Dwarf CFI based tracker
117 struct PrologEpilogTracker
{
118 // A set of prolog and epilog addresses. Used by virtual unwinding.
119 std::unordered_set
<uint64_t> PrologEpilogSet
;
120 ProfiledBinary
*Binary
;
121 PrologEpilogTracker(ProfiledBinary
*Bin
) : Binary(Bin
){};
123 // Take the two addresses from the start of function as prolog
125 inferPrologAddresses(std::map
<uint64_t, FuncRange
> &FuncStartAddressMap
) {
126 for (auto I
: FuncStartAddressMap
) {
127 PrologEpilogSet
.insert(I
.first
);
128 InstructionPointer
IP(Binary
, I
.first
);
131 PrologEpilogSet
.insert(IP
.Address
);
135 // Take the last two addresses before the return address as epilog
136 void inferEpilogAddresses(std::unordered_set
<uint64_t> &RetAddrs
) {
137 for (auto Addr
: RetAddrs
) {
138 PrologEpilogSet
.insert(Addr
);
139 InstructionPointer
IP(Binary
, Addr
);
142 PrologEpilogSet
.insert(IP
.Address
);
147 // Track function byte size under different context (outlined version as well as
148 // various inlined versions). It also provides query support to get function
149 // size with the best matching context, which is used to help pre-inliner use
150 // accurate post-optimization size to make decisions.
151 // TODO: If an inlinee is completely optimized away, ideally we should have zero
152 // for its context size, currently we would misss such context since it doesn't
153 // have instructions. To fix this, we need to mark all inlinee with entry probe
154 // but without instructions as having zero size.
155 class BinarySizeContextTracker
{
157 // Add instruction with given size to a context
158 void addInstructionForContext(const SampleContextFrameVector
&Context
,
161 // Get function size with a specific context. When there's no exact match
162 // for the given context, try to retrieve the size of that function from
163 // closest matching context.
164 uint32_t getFuncSizeForContext(const ContextTrieNode
*Context
);
166 // For inlinees that are full optimized away, we can establish zero size using
167 // their remaining probes.
168 void trackInlineesOptimizedAway(MCPseudoProbeDecoder
&ProbeDecoder
);
170 using ProbeFrameStack
= SmallVector
<std::pair
<StringRef
, uint32_t>>;
171 void trackInlineesOptimizedAway(MCPseudoProbeDecoder
&ProbeDecoder
,
172 MCDecodedPseudoProbeInlineTree
&ProbeNode
,
173 ProbeFrameStack
&Context
);
175 void dump() { RootContext
.dumpTree(); }
178 // Root node for context trie tree, node that this is a reverse context trie
179 // with callee as parent and caller as child. This way we can traverse from
180 // root to find the best/longest matching context if an exact match does not
181 // exist. It gives us the best possible estimate for function's post-inline,
182 // post-optimization byte size.
183 ContextTrieNode RootContext
;
186 using AddressRange
= std::pair
<uint64_t, uint64_t>;
188 class ProfiledBinary
{
189 // Absolute path of the executable binary.
191 // Path of the debug info binary.
192 std::string DebugBinaryPath
;
193 // The target triple.
195 // Path of symbolizer path which should be pointed to binary with debug info.
196 StringRef SymbolizerPath
;
197 // Options used to configure the symbolizer
198 symbolize::LLVMSymbolizer::Options SymbolizerOpts
;
199 // The runtime base address that the first executable segment is loaded at.
200 uint64_t BaseAddress
= 0;
201 // The runtime base address that the first loadabe segment is loaded at.
202 uint64_t FirstLoadableAddress
= 0;
203 // The preferred load address of each executable segment.
204 std::vector
<uint64_t> PreferredTextSegmentAddresses
;
205 // The file offset of each executable segment.
206 std::vector
<uint64_t> TextSegmentOffsets
;
208 // Mutiple MC component info
209 std::unique_ptr
<const MCRegisterInfo
> MRI
;
210 std::unique_ptr
<const MCAsmInfo
> AsmInfo
;
211 std::unique_ptr
<const MCSubtargetInfo
> STI
;
212 std::unique_ptr
<const MCInstrInfo
> MII
;
213 std::unique_ptr
<MCDisassembler
> DisAsm
;
214 std::unique_ptr
<const MCInstrAnalysis
> MIA
;
215 std::unique_ptr
<MCInstPrinter
> IPrinter
;
216 // A list of text sections sorted by start RVA and size. Used to check
217 // if a given RVA is a valid code address.
218 std::set
<std::pair
<uint64_t, uint64_t>> TextSections
;
220 // A map of mapping function name to BinaryFunction info.
221 std::unordered_map
<std::string
, BinaryFunction
> BinaryFunctions
;
223 // Lookup BinaryFunctions using the function name's MD5 hash. Needed if the
224 // profile is using MD5.
225 std::unordered_map
<uint64_t, BinaryFunction
*> HashBinaryFunctions
;
227 // A list of binary functions that have samples.
228 std::unordered_set
<const BinaryFunction
*> ProfiledFunctions
;
230 // GUID to Elf symbol start address map
231 DenseMap
<uint64_t, uint64_t> SymbolStartAddrs
;
233 // These maps are for temporary use of warning diagnosis.
234 DenseSet
<int64_t> AddrsWithMultipleSymbols
;
235 DenseSet
<std::pair
<uint64_t, uint64_t>> AddrsWithInvalidInstruction
;
237 // Start address to Elf symbol GUID map
238 std::unordered_multimap
<uint64_t, uint64_t> StartAddrToSymMap
;
240 // An ordered map of mapping function's start address to function range
241 // relevant info. Currently to determine if the offset of ELF is the start of
242 // a real function, we leverage the function range info from DWARF.
243 std::map
<uint64_t, FuncRange
> StartAddrToFuncRangeMap
;
245 // Address to context location map. Used to expand the context.
246 std::unordered_map
<uint64_t, SampleContextFrameVector
> AddressToLocStackMap
;
248 // Address to instruction size map. Also used for quick Address lookup.
249 std::unordered_map
<uint64_t, uint64_t> AddressToInstSizeMap
;
251 // An array of Addresses of all instructions sorted in increasing order. The
252 // sorting is needed to fast advance to the next forward/backward instruction.
253 std::vector
<uint64_t> CodeAddressVec
;
254 // A set of call instruction addresses. Used by virtual unwinding.
255 std::unordered_set
<uint64_t> CallAddressSet
;
256 // A set of return instruction addresses. Used by virtual unwinding.
257 std::unordered_set
<uint64_t> RetAddressSet
;
258 // An ordered set of unconditional branch instruction addresses.
259 std::set
<uint64_t> UncondBranchAddrSet
;
260 // A set of branch instruction addresses.
261 std::unordered_set
<uint64_t> BranchAddressSet
;
263 // Estimate and track function prolog and epilog ranges.
264 PrologEpilogTracker ProEpilogTracker
;
266 // Infer missing frames due to compiler optimizations such as tail call
268 std::unique_ptr
<MissingFrameInferrer
> MissingContextInferrer
;
270 // Track function sizes under different context
271 BinarySizeContextTracker FuncSizeTracker
;
273 // The symbolizer used to get inline context for an instruction.
274 std::unique_ptr
<symbolize::LLVMSymbolizer
> Symbolizer
;
276 // String table owning function name strings created from the symbolizer.
277 std::unordered_set
<std::string
> NameStrings
;
279 // A collection of functions to print disassembly for.
280 StringSet
<> DisassembleFunctionSet
;
282 // Pseudo probe decoder
283 MCPseudoProbeDecoder ProbeDecoder
;
285 // Function name to probe frame map for top-level outlined functions.
286 StringMap
<MCDecodedPseudoProbeInlineTree
*> TopLevelProbeFrameMap
;
288 bool UsePseudoProbes
= false;
290 bool UseFSDiscriminator
= false;
292 // Whether we need to symbolize all instructions to get function context size.
293 bool TrackFuncContextSize
= false;
295 // Indicate if the base loading address is parsed from the mmap event or uses
296 // the preferred address
297 bool IsLoadedByMMap
= false;
298 // Use to avoid redundant warning.
299 bool MissingMMapWarned
= false;
301 void setPreferredTextSegmentAddresses(const ELFObjectFileBase
*O
);
303 template <class ELFT
>
304 void setPreferredTextSegmentAddresses(const ELFFile
<ELFT
> &Obj
,
307 void checkPseudoProbe(const ELFObjectFileBase
*Obj
);
309 void decodePseudoProbe(const ELFObjectFileBase
*Obj
);
312 checkUseFSDiscriminator(const ELFObjectFileBase
*Obj
,
313 std::map
<SectionRef
, SectionSymbolsTy
> &AllSymbols
);
315 // Set up disassembler and related components.
316 void setUpDisassembler(const ELFObjectFileBase
*Obj
);
317 symbolize::LLVMSymbolizer::Options
getSymbolizerOpts() const;
319 // Load debug info of subprograms from DWARF section.
320 void loadSymbolsFromDWARF(ObjectFile
&Obj
);
322 // Load debug info from DWARF unit.
323 void loadSymbolsFromDWARFUnit(DWARFUnit
&CompilationUnit
);
325 // Create elf symbol to its start address mapping.
326 void populateElfSymbolAddressList(const ELFObjectFileBase
*O
);
328 // A function may be spilt into multiple non-continuous address ranges. We use
329 // this to set whether start a function range is the real entry of the
330 // function and also set false to the non-function label.
331 void setIsFuncEntry(FuncRange
*FRange
, StringRef RangeSymName
);
333 // Warn if no entry range exists in the function.
334 void warnNoFuncEntry();
336 /// Dissassemble the text section and build various address maps.
337 void disassemble(const ELFObjectFileBase
*O
);
339 /// Helper function to dissassemble the symbol and extract info for unwinding
340 bool dissassembleSymbol(std::size_t SI
, ArrayRef
<uint8_t> Bytes
,
341 SectionSymbolsTy
&Symbols
, const SectionRef
&Section
);
342 /// Symbolize a given instruction pointer and return a full call context.
343 SampleContextFrameVector
symbolize(const InstructionPointer
&IP
,
344 bool UseCanonicalFnName
= false,
345 bool UseProbeDiscriminator
= false);
346 /// Decode the interesting parts of the binary and build internal data
347 /// structures. On high level, the parts of interest are:
348 /// 1. Text sections, including the main code section and the PLT
349 /// entries that will be used to handle cross-module call transitions.
350 /// 2. The .debug_line section, used by Dwarf-based profile generation.
351 /// 3. Pseudo probe related sections, used by probe-based profile
356 ProfiledBinary(const StringRef ExeBinPath
, const StringRef DebugBinPath
);
359 void decodePseudoProbe();
361 StringRef
getPath() const { return Path
; }
362 StringRef
getName() const { return llvm::sys::path::filename(Path
); }
363 uint64_t getBaseAddress() const { return BaseAddress
; }
364 void setBaseAddress(uint64_t Address
) { BaseAddress
= Address
; }
366 // Canonicalize to use preferred load address as base address.
367 uint64_t canonicalizeVirtualAddress(uint64_t Address
) {
368 return Address
- BaseAddress
+ getPreferredBaseAddress();
370 // Return the preferred load address for the first executable segment.
371 uint64_t getPreferredBaseAddress() const {
372 return PreferredTextSegmentAddresses
[0];
374 // Return the preferred load address for the first loadable segment.
375 uint64_t getFirstLoadableAddress() const { return FirstLoadableAddress
; }
376 // Return the file offset for the first executable segment.
377 uint64_t getTextSegmentOffset() const { return TextSegmentOffsets
[0]; }
378 const std::vector
<uint64_t> &getPreferredTextSegmentAddresses() const {
379 return PreferredTextSegmentAddresses
;
381 const std::vector
<uint64_t> &getTextSegmentOffsets() const {
382 return TextSegmentOffsets
;
385 uint64_t getInstSize(uint64_t Address
) const {
386 auto I
= AddressToInstSizeMap
.find(Address
);
387 if (I
== AddressToInstSizeMap
.end())
392 bool addressIsCode(uint64_t Address
) const {
393 return AddressToInstSizeMap
.find(Address
) != AddressToInstSizeMap
.end();
396 bool addressIsCall(uint64_t Address
) const {
397 return CallAddressSet
.count(Address
);
399 bool addressIsReturn(uint64_t Address
) const {
400 return RetAddressSet
.count(Address
);
402 bool addressInPrologEpilog(uint64_t Address
) const {
403 return ProEpilogTracker
.PrologEpilogSet
.count(Address
);
406 bool addressIsTransfer(uint64_t Address
) {
407 return BranchAddressSet
.count(Address
) || RetAddressSet
.count(Address
) ||
408 CallAddressSet
.count(Address
);
411 bool rangeCrossUncondBranch(uint64_t Start
, uint64_t End
) {
414 auto R
= UncondBranchAddrSet
.lower_bound(Start
);
415 return R
!= UncondBranchAddrSet
.end() && *R
< End
;
418 uint64_t getAddressforIndex(uint64_t Index
) const {
419 return CodeAddressVec
[Index
];
422 size_t getCodeAddrVecSize() const { return CodeAddressVec
.size(); }
424 bool usePseudoProbes() const { return UsePseudoProbes
; }
425 bool useFSDiscriminator() const { return UseFSDiscriminator
; }
426 // Get the index in CodeAddressVec for the address
427 // As we might get an address which is not the code
428 // here it would round to the next valid code address by
429 // using lower bound operation
430 uint32_t getIndexForAddr(uint64_t Address
) const {
431 auto Low
= llvm::lower_bound(CodeAddressVec
, Address
);
432 return Low
- CodeAddressVec
.begin();
435 uint64_t getCallAddrFromFrameAddr(uint64_t FrameAddr
) const {
436 if (FrameAddr
== ExternalAddr
)
438 auto I
= getIndexForAddr(FrameAddr
);
439 FrameAddr
= I
? getAddressforIndex(I
- 1) : 0;
440 if (FrameAddr
&& addressIsCall(FrameAddr
))
445 FuncRange
*findFuncRangeForStartAddr(uint64_t Address
) {
446 auto I
= StartAddrToFuncRangeMap
.find(Address
);
447 if (I
== StartAddrToFuncRangeMap
.end())
452 // Binary search the function range which includes the input address.
453 FuncRange
*findFuncRange(uint64_t Address
) {
454 auto I
= StartAddrToFuncRangeMap
.upper_bound(Address
);
455 if (I
== StartAddrToFuncRangeMap
.begin())
459 if (Address
>= I
->second
.EndAddress
)
465 // Get all ranges of one function.
466 RangesTy
getRanges(uint64_t Address
) {
467 auto *FRange
= findFuncRange(Address
);
468 // Ignore the range which falls into plt section or system lib.
472 return FRange
->Func
->Ranges
;
475 const std::unordered_map
<std::string
, BinaryFunction
> &
476 getAllBinaryFunctions() {
477 return BinaryFunctions
;
480 std::unordered_set
<const BinaryFunction
*> &getProfiledFunctions() {
481 return ProfiledFunctions
;
484 void setProfiledFunctions(std::unordered_set
<const BinaryFunction
*> &Funcs
) {
485 ProfiledFunctions
= Funcs
;
488 BinaryFunction
*getBinaryFunction(FunctionId FName
) {
489 if (FName
.isStringRef()) {
490 auto I
= BinaryFunctions
.find(FName
.str());
491 if (I
== BinaryFunctions
.end())
495 auto I
= HashBinaryFunctions
.find(FName
.getHashCode());
496 if (I
== HashBinaryFunctions
.end())
501 uint32_t getFuncSizeForContext(const ContextTrieNode
*ContextNode
) {
502 return FuncSizeTracker
.getFuncSizeForContext(ContextNode
);
505 void inferMissingFrames(const SmallVectorImpl
<uint64_t> &Context
,
506 SmallVectorImpl
<uint64_t> &NewContext
);
508 // Load the symbols from debug table and populate into symbol list.
509 void populateSymbolListFromDWARF(ProfileSymbolList
&SymbolList
);
511 SampleContextFrameVector
512 getFrameLocationStack(uint64_t Address
, bool UseProbeDiscriminator
= false) {
513 InstructionPointer
IP(this, Address
);
514 return symbolize(IP
, SymbolizerOpts
.UseSymbolTable
, UseProbeDiscriminator
);
517 const SampleContextFrameVector
&
518 getCachedFrameLocationStack(uint64_t Address
,
519 bool UseProbeDiscriminator
= false) {
520 auto I
= AddressToLocStackMap
.emplace(Address
, SampleContextFrameVector());
522 I
.first
->second
= getFrameLocationStack(Address
, UseProbeDiscriminator
);
524 return I
.first
->second
;
527 std::optional
<SampleContextFrame
> getInlineLeafFrameLoc(uint64_t Address
) {
528 const auto &Stack
= getCachedFrameLocationStack(Address
);
534 void flushSymbolizer() { Symbolizer
.reset(); }
536 MissingFrameInferrer
*getMissingContextInferrer() {
537 return MissingContextInferrer
.get();
540 // Compare two addresses' inline context
541 bool inlineContextEqual(uint64_t Add1
, uint64_t Add2
);
543 // Get the full context of the current stack with inline context filled in.
544 // It will search the disassembling info stored in AddressToLocStackMap. This
545 // is used as the key of function sample map
546 SampleContextFrameVector
547 getExpandedContext(const SmallVectorImpl
<uint64_t> &Stack
,
548 bool &WasLeafInlined
);
549 // Go through instructions among the given range and record its size for the
551 void computeInlinedContextSizeForRange(uint64_t StartAddress
,
552 uint64_t EndAddress
);
554 void computeInlinedContextSizeForFunc(const BinaryFunction
*Func
);
556 const MCDecodedPseudoProbe
*getCallProbeForAddr(uint64_t Address
) const {
557 return ProbeDecoder
.getCallProbeForAddr(Address
);
560 void getInlineContextForProbe(const MCDecodedPseudoProbe
*Probe
,
561 SampleContextFrameVector
&InlineContextStack
,
562 bool IncludeLeaf
= false) const {
563 SmallVector
<MCPseduoProbeFrameLocation
, 16> ProbeInlineContext
;
564 ProbeDecoder
.getInlineContextForProbe(Probe
, ProbeInlineContext
,
566 for (uint32_t I
= 0; I
< ProbeInlineContext
.size(); I
++) {
567 auto &Callsite
= ProbeInlineContext
[I
];
568 // Clear the current context for an unknown probe.
569 if (Callsite
.second
== 0 && I
!= ProbeInlineContext
.size() - 1) {
570 InlineContextStack
.clear();
573 InlineContextStack
.emplace_back(FunctionId(Callsite
.first
),
574 LineLocation(Callsite
.second
, 0));
577 const AddressProbesMap
&getAddress2ProbesMap() const {
578 return ProbeDecoder
.getAddress2ProbesMap();
580 const MCPseudoProbeFuncDesc
*getFuncDescForGUID(uint64_t GUID
) {
581 return ProbeDecoder
.getFuncDescForGUID(GUID
);
584 const MCPseudoProbeFuncDesc
*
585 getInlinerDescForProbe(const MCDecodedPseudoProbe
*Probe
) {
586 return ProbeDecoder
.getInlinerDescForProbe(Probe
);
589 bool getTrackFuncContextSize() { return TrackFuncContextSize
; }
591 bool getIsLoadedByMMap() { return IsLoadedByMMap
; }
593 void setIsLoadedByMMap(bool Value
) { IsLoadedByMMap
= Value
; }
595 bool getMissingMMapWarned() { return MissingMMapWarned
; }
597 void setMissingMMapWarned(bool Value
) { MissingMMapWarned
= Value
; }
600 } // end namespace sampleprof
601 } // end namespace llvm