1 //===-- ProfiledBinary.h - Binary decoder -----------------------*- C++ -*-===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 #ifndef LLVM_TOOLS_LLVM_PROFGEN_PROFILEDBINARY_H
10 #define LLVM_TOOLS_LLVM_PROFGEN_PROFILEDBINARY_H
12 #include "CallContext.h"
13 #include "ErrorHandling.h"
14 #include "llvm/ADT/Optional.h"
15 #include "llvm/ADT/StringRef.h"
16 #include "llvm/DebugInfo/DWARF/DWARFContext.h"
17 #include "llvm/DebugInfo/Symbolize/Symbolize.h"
18 #include "llvm/MC/MCAsmInfo.h"
19 #include "llvm/MC/MCContext.h"
20 #include "llvm/MC/MCDisassembler/MCDisassembler.h"
21 #include "llvm/MC/MCInst.h"
22 #include "llvm/MC/MCInstPrinter.h"
23 #include "llvm/MC/MCInstrAnalysis.h"
24 #include "llvm/MC/MCInstrInfo.h"
25 #include "llvm/MC/MCObjectFileInfo.h"
26 #include "llvm/MC/MCPseudoProbe.h"
27 #include "llvm/MC/MCRegisterInfo.h"
28 #include "llvm/MC/MCSubtargetInfo.h"
29 #include "llvm/MC/MCTargetOptions.h"
30 #include "llvm/Object/ELFObjectFile.h"
31 #include "llvm/ProfileData/SampleProf.h"
32 #include "llvm/Support/CommandLine.h"
33 #include "llvm/Support/Path.h"
34 #include "llvm/Transforms/IPO/SampleContextTracker.h"
40 #include <unordered_map>
41 #include <unordered_set>
44 extern cl::opt
<bool> EnableCSPreInliner
;
45 extern cl::opt
<bool> UseContextCostForPreInliner
;
48 using namespace sampleprof
;
49 using namespace llvm::object
;
52 namespace sampleprof
{
56 struct InstructionPointer
{
57 const ProfiledBinary
*Binary
;
59 // Offset of the executable segment of the binary.
61 // Also used as address in unwinder
64 // Index to the sorted code address array of the binary.
66 InstructionPointer(const ProfiledBinary
*Binary
, uint64_t Address
,
67 bool RoundToNext
= false);
70 void update(uint64_t Addr
);
73 // The special frame addresses.
74 enum SpecialFrameAddr
{
75 // Dummy root of frame trie.
77 // Represent all the addresses outside of current binary.
78 // This's also used to indicate the call stack should be truncated since this
79 // isn't a real call context the compiler will see.
83 using RangesTy
= std::vector
<std::pair
<uint64_t, uint64_t>>;
85 struct BinaryFunction
{
87 // End of range is an exclusive bound.
90 uint64_t getFuncSize() {
92 for (auto &R
: Ranges
) {
93 Sum
+= R
.second
- R
.first
;
99 // Info about function range. A function can be split into multiple
100 // non-continuous ranges, each range corresponds to one FuncRange.
102 uint64_t StartOffset
;
103 // EndOffset is an exclusive bound.
105 // Function the range belongs to
106 BinaryFunction
*Func
;
107 // Whether the start offset is the real entry of the function.
108 bool IsFuncEntry
= false;
110 StringRef
getFuncName() { return Func
->FuncName
; }
113 // PrologEpilog offset tracker, used to filter out broken stack samples
114 // Currently we use a heuristic size (two) to infer prolog and epilog
115 // based on the start address and return address. In the future,
116 // we will switch to Dwarf CFI based tracker
117 struct PrologEpilogTracker
{
118 // A set of prolog and epilog offsets. Used by virtual unwinding.
119 std::unordered_set
<uint64_t> PrologEpilogSet
;
120 ProfiledBinary
*Binary
;
121 PrologEpilogTracker(ProfiledBinary
*Bin
) : Binary(Bin
){};
123 // Take the two addresses from the start of function as prolog
124 void inferPrologOffsets(std::map
<uint64_t, FuncRange
> &FuncStartOffsetMap
) {
125 for (auto I
: FuncStartOffsetMap
) {
126 PrologEpilogSet
.insert(I
.first
);
127 InstructionPointer
IP(Binary
, I
.first
);
130 PrologEpilogSet
.insert(IP
.Offset
);
134 // Take the last two addresses before the return address as epilog
135 void inferEpilogOffsets(std::unordered_set
<uint64_t> &RetAddrs
) {
136 for (auto Addr
: RetAddrs
) {
137 PrologEpilogSet
.insert(Addr
);
138 InstructionPointer
IP(Binary
, Addr
);
141 PrologEpilogSet
.insert(IP
.Offset
);
146 // Track function byte size under different context (outlined version as well as
147 // various inlined versions). It also provides query support to get function
148 // size with the best matching context, which is used to help pre-inliner use
149 // accurate post-optimization size to make decisions.
150 // TODO: If an inlinee is completely optimized away, ideally we should have zero
151 // for its context size, currently we would misss such context since it doesn't
152 // have instructions. To fix this, we need to mark all inlinee with entry probe
153 // but without instructions as having zero size.
154 class BinarySizeContextTracker
{
156 // Add instruction with given size to a context
157 void addInstructionForContext(const SampleContextFrameVector
&Context
,
160 // Get function size with a specific context. When there's no exact match
161 // for the given context, try to retrieve the size of that function from
162 // closest matching context.
163 uint32_t getFuncSizeForContext(const SampleContext
&Context
);
165 // For inlinees that are full optimized away, we can establish zero size using
166 // their remaining probes.
167 void trackInlineesOptimizedAway(MCPseudoProbeDecoder
&ProbeDecoder
);
169 void dump() { RootContext
.dumpTree(); }
172 using ProbeFrameStack
= SmallVector
<std::pair
<StringRef
, uint32_t>>;
173 void trackInlineesOptimizedAway(MCPseudoProbeDecoder
&ProbeDecoder
,
174 MCDecodedPseudoProbeInlineTree
&ProbeNode
,
175 ProbeFrameStack
&Context
);
177 // Root node for context trie tree, node that this is a reverse context trie
178 // with callee as parent and caller as child. This way we can traverse from
179 // root to find the best/longest matching context if an exact match does not
180 // exist. It gives us the best possible estimate for function's post-inline,
181 // post-optimization byte size.
182 ContextTrieNode RootContext
;
185 using OffsetRange
= std::pair
<uint64_t, uint64_t>;
187 class ProfiledBinary
{
188 // Absolute path of the binary.
190 // The target triple.
192 // The runtime base address that the first executable segment is loaded at.
193 uint64_t BaseAddress
= 0;
194 // The runtime base address that the first loadabe segment is loaded at.
195 uint64_t FirstLoadableAddress
= 0;
196 // The preferred load address of each executable segment.
197 std::vector
<uint64_t> PreferredTextSegmentAddresses
;
198 // The file offset of each executable segment.
199 std::vector
<uint64_t> TextSegmentOffsets
;
201 // Mutiple MC component info
202 std::unique_ptr
<const MCRegisterInfo
> MRI
;
203 std::unique_ptr
<const MCAsmInfo
> AsmInfo
;
204 std::unique_ptr
<const MCSubtargetInfo
> STI
;
205 std::unique_ptr
<const MCInstrInfo
> MII
;
206 std::unique_ptr
<MCDisassembler
> DisAsm
;
207 std::unique_ptr
<const MCInstrAnalysis
> MIA
;
208 std::unique_ptr
<MCInstPrinter
> IPrinter
;
209 // A list of text sections sorted by start RVA and size. Used to check
210 // if a given RVA is a valid code address.
211 std::set
<std::pair
<uint64_t, uint64_t>> TextSections
;
213 // A map of mapping function name to BinaryFunction info.
214 std::unordered_map
<std::string
, BinaryFunction
> BinaryFunctions
;
216 // An ordered map of mapping function's start offset to function range
217 // relevant info. Currently to determine if the offset of ELF is the start of
218 // a real function, we leverage the function range info from DWARF.
219 std::map
<uint64_t, FuncRange
> StartOffset2FuncRangeMap
;
221 // Offset to context location map. Used to expand the context.
222 std::unordered_map
<uint64_t, SampleContextFrameVector
> Offset2LocStackMap
;
224 // Offset to instruction size map. Also used for quick offset lookup.
225 std::unordered_map
<uint64_t, uint64_t> Offset2InstSizeMap
;
227 // An array of offsets of all instructions sorted in increasing order. The
228 // sorting is needed to fast advance to the next forward/backward instruction.
229 std::vector
<uint64_t> CodeAddrOffsets
;
230 // A set of call instruction offsets. Used by virtual unwinding.
231 std::unordered_set
<uint64_t> CallOffsets
;
232 // A set of return instruction offsets. Used by virtual unwinding.
233 std::unordered_set
<uint64_t> RetOffsets
;
234 // A set of branch instruction offsets.
235 std::unordered_set
<uint64_t> BranchOffsets
;
237 // Estimate and track function prolog and epilog ranges.
238 PrologEpilogTracker ProEpilogTracker
;
240 // Track function sizes under different context
241 BinarySizeContextTracker FuncSizeTracker
;
243 // The symbolizer used to get inline context for an instruction.
244 std::unique_ptr
<symbolize::LLVMSymbolizer
> Symbolizer
;
246 // String table owning function name strings created from the symbolizer.
247 std::unordered_set
<std::string
> NameStrings
;
249 // A collection of functions to print disassembly for.
250 StringSet
<> DisassembleFunctionSet
;
252 // Pseudo probe decoder
253 MCPseudoProbeDecoder ProbeDecoder
;
255 bool UsePseudoProbes
= false;
257 bool UseFSDiscriminator
= false;
259 // Whether we need to symbolize all instructions to get function context size.
260 bool TrackFuncContextSize
= false;
262 // Indicate if the base loading address is parsed from the mmap event or uses
263 // the preferred address
264 bool IsLoadedByMMap
= false;
265 // Use to avoid redundant warning.
266 bool MissingMMapWarned
= false;
268 void setPreferredTextSegmentAddresses(const ELFObjectFileBase
*O
);
270 template <class ELFT
>
271 void setPreferredTextSegmentAddresses(const ELFFile
<ELFT
> &Obj
, StringRef FileName
);
273 void decodePseudoProbe(const ELFObjectFileBase
*Obj
);
276 checkUseFSDiscriminator(const ELFObjectFileBase
*Obj
,
277 std::map
<SectionRef
, SectionSymbolsTy
> &AllSymbols
);
279 // Set up disassembler and related components.
280 void setUpDisassembler(const ELFObjectFileBase
*Obj
);
281 void setupSymbolizer();
283 // Load debug info of subprograms from DWARF section.
284 void loadSymbolsFromDWARF(ObjectFile
&Obj
);
286 // A function may be spilt into multiple non-continuous address ranges. We use
287 // this to set whether start offset of a function is the real entry of the
288 // function and also set false to the non-function label.
289 void setIsFuncEntry(uint64_t Offset
, StringRef RangeSymName
);
291 // Warn if no entry range exists in the function.
292 void warnNoFuncEntry();
294 /// Dissassemble the text section and build various address maps.
295 void disassemble(const ELFObjectFileBase
*O
);
297 /// Helper function to dissassemble the symbol and extract info for unwinding
298 bool dissassembleSymbol(std::size_t SI
, ArrayRef
<uint8_t> Bytes
,
299 SectionSymbolsTy
&Symbols
, const SectionRef
&Section
);
300 /// Symbolize a given instruction pointer and return a full call context.
301 SampleContextFrameVector
symbolize(const InstructionPointer
&IP
,
302 bool UseCanonicalFnName
= false,
303 bool UseProbeDiscriminator
= false);
304 /// Decode the interesting parts of the binary and build internal data
305 /// structures. On high level, the parts of interest are:
306 /// 1. Text sections, including the main code section and the PLT
307 /// entries that will be used to handle cross-module call transitions.
308 /// 2. The .debug_line section, used by Dwarf-based profile generation.
309 /// 3. Pseudo probe related sections, used by probe-based profile
314 ProfiledBinary(const StringRef Path
)
315 : Path(Path
), ProEpilogTracker(this),
316 TrackFuncContextSize(EnableCSPreInliner
&&
317 UseContextCostForPreInliner
) {
321 uint64_t virtualAddrToOffset(uint64_t VirtualAddress
) const {
322 return VirtualAddress
- BaseAddress
;
324 uint64_t offsetToVirtualAddr(uint64_t Offset
) const {
325 return Offset
+ BaseAddress
;
327 StringRef
getPath() const { return Path
; }
328 StringRef
getName() const { return llvm::sys::path::filename(Path
); }
329 uint64_t getBaseAddress() const { return BaseAddress
; }
330 void setBaseAddress(uint64_t Address
) { BaseAddress
= Address
; }
332 // Return the preferred load address for the first executable segment.
333 uint64_t getPreferredBaseAddress() const { return PreferredTextSegmentAddresses
[0]; }
334 // Return the preferred load address for the first loadable segment.
335 uint64_t getFirstLoadableAddress() const { return FirstLoadableAddress
; }
336 // Return the file offset for the first executable segment.
337 uint64_t getTextSegmentOffset() const { return TextSegmentOffsets
[0]; }
338 const std::vector
<uint64_t> &getPreferredTextSegmentAddresses() const {
339 return PreferredTextSegmentAddresses
;
341 const std::vector
<uint64_t> &getTextSegmentOffsets() const {
342 return TextSegmentOffsets
;
345 uint64_t getInstSize(uint64_t Offset
) const {
346 auto I
= Offset2InstSizeMap
.find(Offset
);
347 if (I
== Offset2InstSizeMap
.end())
352 bool offsetIsCode(uint64_t Offset
) const {
353 return Offset2InstSizeMap
.find(Offset
) != Offset2InstSizeMap
.end();
355 bool addressIsCode(uint64_t Address
) const {
356 uint64_t Offset
= virtualAddrToOffset(Address
);
357 return offsetIsCode(Offset
);
359 bool addressIsCall(uint64_t Address
) const {
360 uint64_t Offset
= virtualAddrToOffset(Address
);
361 return CallOffsets
.count(Offset
);
363 bool addressIsReturn(uint64_t Address
) const {
364 uint64_t Offset
= virtualAddrToOffset(Address
);
365 return RetOffsets
.count(Offset
);
367 bool addressInPrologEpilog(uint64_t Address
) const {
368 uint64_t Offset
= virtualAddrToOffset(Address
);
369 return ProEpilogTracker
.PrologEpilogSet
.count(Offset
);
372 bool offsetIsTransfer(uint64_t Offset
) {
373 return BranchOffsets
.count(Offset
) || RetOffsets
.count(Offset
) ||
374 CallOffsets
.count(Offset
);
377 uint64_t getAddressforIndex(uint64_t Index
) const {
378 return offsetToVirtualAddr(CodeAddrOffsets
[Index
]);
381 size_t getCodeOffsetsSize() const { return CodeAddrOffsets
.size(); }
383 bool usePseudoProbes() const { return UsePseudoProbes
; }
384 bool useFSDiscriminator() const { return UseFSDiscriminator
; }
385 // Get the index in CodeAddrOffsets for the address
386 // As we might get an address which is not the code
387 // here it would round to the next valid code address by
388 // using lower bound operation
389 uint32_t getIndexForOffset(uint64_t Offset
) const {
390 auto Low
= llvm::lower_bound(CodeAddrOffsets
, Offset
);
391 return Low
- CodeAddrOffsets
.begin();
393 uint32_t getIndexForAddr(uint64_t Address
) const {
394 uint64_t Offset
= virtualAddrToOffset(Address
);
395 return getIndexForOffset(Offset
);
398 uint64_t getCallAddrFromFrameAddr(uint64_t FrameAddr
) const {
399 if (FrameAddr
== ExternalAddr
)
401 auto I
= getIndexForAddr(FrameAddr
);
402 FrameAddr
= I
? getAddressforIndex(I
- 1) : 0;
403 if (FrameAddr
&& addressIsCall(FrameAddr
))
408 FuncRange
*findFuncRangeForStartOffset(uint64_t Offset
) {
409 auto I
= StartOffset2FuncRangeMap
.find(Offset
);
410 if (I
== StartOffset2FuncRangeMap
.end())
415 // Binary search the function range which includes the input offset.
416 FuncRange
*findFuncRangeForOffset(uint64_t Offset
) {
417 auto I
= StartOffset2FuncRangeMap
.upper_bound(Offset
);
418 if (I
== StartOffset2FuncRangeMap
.begin())
422 if (Offset
>= I
->second
.EndOffset
)
428 // Get all ranges of one function.
429 RangesTy
getRangesForOffset(uint64_t Offset
) {
430 auto *FRange
= findFuncRangeForOffset(Offset
);
431 // Ignore the range which falls into plt section or system lib.
435 return FRange
->Func
->Ranges
;
438 const std::unordered_map
<std::string
, BinaryFunction
> &
439 getAllBinaryFunctions() {
440 return BinaryFunctions
;
443 BinaryFunction
*getBinaryFunction(StringRef FName
) {
444 auto I
= BinaryFunctions
.find(FName
.str());
445 if (I
== BinaryFunctions
.end())
450 uint32_t getFuncSizeForContext(SampleContext
&Context
) {
451 return FuncSizeTracker
.getFuncSizeForContext(Context
);
454 // Load the symbols from debug table and populate into symbol list.
455 void populateSymbolListFromDWARF(ProfileSymbolList
&SymbolList
);
457 const SampleContextFrameVector
&
458 getFrameLocationStack(uint64_t Offset
, bool UseProbeDiscriminator
= false) {
459 auto I
= Offset2LocStackMap
.emplace(Offset
, SampleContextFrameVector());
461 InstructionPointer
IP(this, Offset
);
462 I
.first
->second
= symbolize(IP
, true, UseProbeDiscriminator
);
464 return I
.first
->second
;
467 Optional
<SampleContextFrame
> getInlineLeafFrameLoc(uint64_t Offset
) {
468 const auto &Stack
= getFrameLocationStack(Offset
);
474 // Compare two addresses' inline context
475 bool inlineContextEqual(uint64_t Add1
, uint64_t Add2
);
477 // Get the full context of the current stack with inline context filled in.
478 // It will search the disassembling info stored in Offset2LocStackMap. This is
479 // used as the key of function sample map
480 SampleContextFrameVector
481 getExpandedContext(const SmallVectorImpl
<uint64_t> &Stack
,
482 bool &WasLeafInlined
);
483 // Go through instructions among the given range and record its size for the
485 void computeInlinedContextSizeForRange(uint64_t StartOffset
,
488 const MCDecodedPseudoProbe
*getCallProbeForAddr(uint64_t Address
) const {
489 return ProbeDecoder
.getCallProbeForAddr(Address
);
492 void getInlineContextForProbe(const MCDecodedPseudoProbe
*Probe
,
493 SampleContextFrameVector
&InlineContextStack
,
494 bool IncludeLeaf
= false) const {
495 SmallVector
<MCPseduoProbeFrameLocation
, 16> ProbeInlineContext
;
496 ProbeDecoder
.getInlineContextForProbe(Probe
, ProbeInlineContext
,
498 for (uint32_t I
= 0; I
< ProbeInlineContext
.size(); I
++) {
499 auto &Callsite
= ProbeInlineContext
[I
];
500 // Clear the current context for an unknown probe.
501 if (Callsite
.second
== 0 && I
!= ProbeInlineContext
.size() - 1) {
502 InlineContextStack
.clear();
505 InlineContextStack
.emplace_back(Callsite
.first
,
506 LineLocation(Callsite
.second
, 0));
509 const AddressProbesMap
&getAddress2ProbesMap() const {
510 return ProbeDecoder
.getAddress2ProbesMap();
512 const MCPseudoProbeFuncDesc
*getFuncDescForGUID(uint64_t GUID
) {
513 return ProbeDecoder
.getFuncDescForGUID(GUID
);
516 const MCPseudoProbeFuncDesc
*
517 getInlinerDescForProbe(const MCDecodedPseudoProbe
*Probe
) {
518 return ProbeDecoder
.getInlinerDescForProbe(Probe
);
521 bool getTrackFuncContextSize() { return TrackFuncContextSize
; }
523 bool getIsLoadedByMMap() { return IsLoadedByMMap
; }
525 void setIsLoadedByMMap(bool Value
) { IsLoadedByMMap
= Value
; }
527 bool getMissingMMapWarned() { return MissingMMapWarned
; }
529 void setMissingMMapWarned(bool Value
) { MissingMMapWarned
= Value
; }
532 } // end namespace sampleprof
533 } // end namespace llvm