2 /*--------------------------------------------------------------------*/
3 /*--- Format-neutral storage of and querying of info acquired from ---*/
4 /*--- ELF/XCOFF stabs/dwarf1/dwarf2 debug info. ---*/
5 /*--- priv_storage.h ---*/
6 /*--------------------------------------------------------------------*/
9 This file is part of Valgrind, a dynamic binary instrumentation
12 Copyright (C) 2000-2017 Julian Seward
15 This program is free software; you can redistribute it and/or
16 modify it under the terms of the GNU General Public License as
17 published by the Free Software Foundation; either version 2 of the
18 License, or (at your option) any later version.
20 This program is distributed in the hope that it will be useful, but
21 WITHOUT ANY WARRANTY; without even the implied warranty of
22 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
23 General Public License for more details.
25 You should have received a copy of the GNU General Public License
26 along with this program; if not, write to the Free Software
27 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
30 The GNU General Public License is contained in the file COPYING.
33 Stabs reader greatly improved by Nick Nethercote, Apr 02.
34 This module was also extensively hacked on by Jeremy Fitzhardinge
37 /* See comment at top of debuginfo.c for explanation of
38 the _svma / _avma / _image / _bias naming scheme.
40 /* Note this is not freestanding; needs pub_core_xarray.h and
41 priv_tytypes.h to be included before it. */
43 #ifndef __PRIV_STORAGE_H
44 #define __PRIV_STORAGE_H
46 #include "pub_core_basics.h" // Addr
47 #include "pub_core_xarray.h" // XArray
48 #include "pub_core_deduppoolalloc.h" // DedupPoolAlloc
49 #include "priv_d3basics.h" // GExpr et al.
50 #include "priv_image.h" // DiCursor
52 /* --------------------- SYMBOLS --------------------- */
54 /* A structure to hold an ELF/MachO symbol (very crudely). Usually
55 the symbol only has one name, which is stored in ::pri_name, and
56 ::sec_names is NULL. If there are other names, these are stored in
57 ::sec_names, which is a NULL terminated vector holding the names.
58 The vector is allocated in VG_AR_DINFO, the names themselves live
59 in DebugInfo::strpool.
61 From the point of view of ELF, the primary vs secondary distinction
62 is artificial: they are all just names associated with the address,
63 none of which has higher precedence than any other. However, from
64 the point of view of mapping an address to a name to display to the
65 user, we need to choose one "preferred" name, and so that might as
66 well be installed as the pri_name, whilst all others can live in
67 sec_names[]. This has the convenient side effect that, in the
68 common case where there is only one name for the address,
69 sec_names[] does not need to be allocated.
73 SymAVMAs avmas
; /* Symbol Actual VMAs: lowest address of entity,
74 + platform specific fields, to access with
75 the macros defined in pub_core_debuginfo.h */
76 const HChar
* pri_name
; /* primary name, never NULL */
77 const HChar
** sec_names
; /* NULL, or a NULL term'd array of other names */
78 // XXX: DiSym could be shrunk (on 32-bit platforms to exactly 16
79 // bytes, on 64-bit platforms the first 3 pointers already add
80 // up to 24 bytes, so size plus bits will extend to 32 bytes
81 // anyway) by using 29 bits for the size and 1 bit each for
82 // isText, isIFunc and isGlobal. If you do this, make sure that
83 // all assignments to the latter two use 0 or 1 (or True or
84 // False), and that a positive number larger than 1 is never
85 // used to represent True.
86 UInt size
; /* size in bytes */
88 Bool isIFunc
; /* symbol is an indirect function? */
89 Bool isGlobal
; /* Is this symbol globally visible? */
93 /* --------------------- SRCLOCS --------------------- */
95 /* Line count at which overflow happens, due to line numbers being
96 stored as shorts in `struct nlist' in a.out.h. */
97 #define LINENO_OVERFLOW (1 << (sizeof(short) * 8))
99 #define LINENO_BITS 20
100 #define LOC_SIZE_BITS (32 - LINENO_BITS)
101 #define MAX_LINENO ((1 << LINENO_BITS) - 1)
103 /* Unlikely to have any lines with instruction ranges > 4096 bytes */
104 #define MAX_LOC_SIZE ((1 << LOC_SIZE_BITS) - 1)
106 /* Number used to detect line number overflows; if one line is
107 60000-odd smaller than the previous, it was probably an overflow.
109 #define OVERFLOW_DIFFERENCE (LINENO_OVERFLOW - 5000)
111 /* Filename and Dirname pair. FnDn are stored in di->fndnpool
112 and are allocated using VG_(allocFixedEltDedupPA).
113 The filename/dirname strings are themselves stored in di->strpool. */
116 const HChar
* filename
; /* source filename */
117 const HChar
* dirname
; /* source directory name */
120 /* A structure to hold addr-to-source info for a single line. There
121 can be a lot of these, hence the dense packing. */
125 Addr addr
; /* lowest address for this line */
127 UShort size
:LOC_SIZE_BITS
; /* # bytes; we catch overflows of this */
128 UInt lineno
:LINENO_BITS
; /* source line number, or zero */
132 #define LEVEL_BITS (32 - LINENO_BITS)
133 #define MAX_LEVEL ((1 << LEVEL_BITS) - 1)
135 /* A structure to hold addr-to-inlined fn info. There
136 can be a lot of these, hence the dense packing.
137 Only caller source filename and lineno are stored.
138 Handling dirname should be done using fndn_ix technique
139 similar to ML_(addLineInfo). */
143 Addr addr_lo
; /* lowest address for inlined fn */
145 Addr addr_hi
; /* highest address following the inlined fn */
147 const HChar
* inlinedfn
; /* inlined function name */
149 UInt fndn_ix
; /* index in di->fndnpool of caller source
151 UInt lineno
:LINENO_BITS
; /* caller line number */
152 UShort level
:LEVEL_BITS
; /* level of inlining */
156 /* --------------------- CF INFO --------------------- */
158 /* DiCfSI: a structure to summarise DWARF2/3 CFA info for the code
159 address range [base .. base+len-1].
161 On x86 and amd64 ("IA"), if you know ({e,r}sp, {e,r}bp, {e,r}ip) at
162 some point and {e,r}ip is in the range [base .. base+len-1], it
163 tells you how to calculate ({e,r}sp, {e,r}bp) for the caller of the
164 current frame and also ra, the return address of the current frame.
166 First off, calculate CFA, the Canonical Frame Address, thusly:
168 cfa = case cfa_how of
169 CFIC_IA_SPREL -> {e,r}sp + cfa_off
170 CFIC_IA_BPREL -> {e,r}bp + cfa_off
171 CFIC_EXPR -> expr whose index is in cfa_off
173 Once that is done, the previous frame's {e,r}sp/{e,r}bp values and
174 this frame's {e,r}ra value can be calculated like this:
176 old_{e,r}sp/{e,r}bp/ra
177 = case {e,r}sp/{e,r}bp/ra_how of
178 CFIR_UNKNOWN -> we don't know, sorry
179 CFIR_SAME -> same as it was before (sp/fp only)
180 CFIR_CFAREL -> cfa + sp/bp/ra_off
181 CFIR_MEMCFAREL -> *( cfa + sp/bp/ra_off )
182 CFIR_EXPR -> expr whose index is in sp/bp/ra_off
184 On ARM it's pretty much the same, except we have more registers to
187 cfa = case cfa_how of
188 CFIC_ARM_R13REL -> r13 + cfa_off
189 CFIC_ARM_R12REL -> r12 + cfa_off
190 CFIC_ARM_R11REL -> r11 + cfa_off
191 CFIC_ARM_R7REL -> r7 + cfa_off
192 CFIR_EXPR -> expr whose index is in cfa_off
194 old_r14/r13/r12/r11/r7/ra
195 = case r14/r13/r12/r11/r7/ra_how of
196 CFIR_UNKNOWN -> we don't know, sorry
197 CFIR_SAME -> same as it was before (r14/r13/r12/r11/r7 only)
198 CFIR_CFAREL -> cfa + r14/r13/r12/r11/r7/ra_off
199 CFIR_MEMCFAREL -> *( cfa + r14/r13/r12/r11/r7/ra_off )
200 CFIR_EXPR -> expr whose index is in r14/r13/r12/r11/r7/ra_off
204 cfa = case cfa_how of
205 CFIC_ARM64_SPREL -> sp + cfa_off
206 CFIC_ARM64_X29REL -> x29 + cfa_off
207 CFIC_EXPR -> expr whose index is in cfa_off
210 = case sp/x30/x29/ra_how of
211 CFIR_UNKNOWN -> we don't know, sorry
212 CFIR_SAME -> same as it was before
213 CFIR_CFAREL -> cfa + sp/x30/x29/ra_how
214 CFIR_MEMCFAREL -> *( cfa + sp/x30/x29/ra_how )
215 CFIR_EXPR -> expr whose index is in sp/x30/x29/ra_off
217 On s390x we have a similar logic as x86 or amd64. We need the stack pointer
218 (r15), the frame pointer r11 (like BP) and together with the instruction
219 address in the PSW we can calculate the previous values:
220 cfa = case cfa_how of
221 CFIC_IA_SPREL -> r15 + cfa_off
222 CFIC_IA_BPREL -> r11 + cfa_off
223 CFIC_EXPR -> expr whose index is in cfa_off
226 = case sp/fp/ra_how of
227 CFIR_UNKNOWN -> we don't know, sorry
228 CFIR_SAME -> same as it was before (sp/fp only)
229 CFIR_CFAREL -> cfa + sp/fp/ra_off
230 CFIR_MEMCFAREL -> *( cfa + sp/fp/ra_off )
231 CFIR_EXPR -> expr whose index is in sp/fp/ra_off
234 #define CFIC_IA_SPREL ((UChar)1)
235 #define CFIC_IA_BPREL ((UChar)2)
236 #define CFIC_ARM_R13REL ((UChar)3)
237 #define CFIC_ARM_R12REL ((UChar)4)
238 #define CFIC_ARM_R11REL ((UChar)5)
239 #define CFIC_ARM_R7REL ((UChar)6)
240 #define CFIC_ARM64_SPREL ((UChar)7)
241 #define CFIC_ARM64_X29REL ((UChar)8)
242 #define CFIC_EXPR ((UChar)9) /* all targets */
244 #define CFIR_UNKNOWN ((UChar)64)
245 #define CFIR_SAME ((UChar)65)
246 #define CFIR_CFAREL ((UChar)66)
247 #define CFIR_MEMCFAREL ((UChar)67)
248 #define CFIR_EXPR ((UChar)68)
250 /* Definition of the DiCfSI_m DiCfSI machine dependent part.
251 These are highly duplicated, and are stored in a pool. */
252 #if defined(VGA_x86) || defined(VGA_amd64)
255 UChar cfa_how
; /* a CFIC_IA value */
256 UChar ra_how
; /* a CFIR_ value */
257 UChar sp_how
; /* a CFIR_ value */
258 UChar bp_how
; /* a CFIR_ value */
265 #elif defined(VGA_arm)
268 UChar cfa_how
; /* a CFIC_ value */
269 UChar ra_how
; /* a CFIR_ value */
270 UChar r14_how
; /* a CFIR_ value */
271 UChar r13_how
; /* a CFIR_ value */
272 UChar r12_how
; /* a CFIR_ value */
273 UChar r11_how
; /* a CFIR_ value */
274 UChar r7_how
; /* a CFIR_ value */
282 // If you add additional fields, don't forget to update the
283 // initialisation of this in readexidx.c accordingly.
286 #elif defined(VGA_arm64)
289 UChar cfa_how
; /* a CFIC_ value */
290 UChar ra_how
; /* a CFIR_ value */
291 UChar sp_how
; /* a CFIR_ value */ /*dw31=SP*/
292 UChar x30_how
; /* a CFIR_ value */ /*dw30=LR*/
293 UChar x29_how
; /* a CFIR_ value */ /*dw29=FP*/
301 #elif defined(VGA_ppc32) || defined(VGA_ppc64be) || defined(VGA_ppc64le)
302 /* Just have a struct with the common fields in, so that code that
303 processes the common fields doesn't have to be ifdef'd against
304 VGP_/VGA_ symbols. These are not used in any way on ppc32/64-linux
308 UChar cfa_how
; /* a CFIC_ value */
309 UChar ra_how
; /* a CFIR_ value */
314 #elif defined(VGA_s390x)
317 UChar cfa_how
; /* a CFIC_ value */
318 UChar sp_how
; /* a CFIR_ value */
319 UChar ra_how
; /* a CFIR_ value */
320 UChar fp_how
; /* a CFIR_ value */
327 #elif defined(VGA_mips32) || defined(VGA_mips64)
330 UChar cfa_how
; /* a CFIC_ value */
331 UChar ra_how
; /* a CFIR_ value */
332 UChar sp_how
; /* a CFIR_ value */
333 UChar fp_how
; /* a CFIR_ value */
341 # error "Unknown arch"
441 extern Int
ML_(CfiExpr_Undef
) ( XArray
* dst
);
442 extern Int
ML_(CfiExpr_Deref
) ( XArray
* dst
, Int ixAddr
);
443 extern Int
ML_(CfiExpr_Const
) ( XArray
* dst
, UWord con
);
444 extern Int
ML_(CfiExpr_Unop
) ( XArray
* dst
, CfiUnop op
, Int ix
);
445 extern Int
ML_(CfiExpr_Binop
) ( XArray
* dst
, CfiBinop op
, Int ixL
, Int ixR
);
446 extern Int
ML_(CfiExpr_CfiReg
)( XArray
* dst
, CfiReg reg
);
447 extern Int
ML_(CfiExpr_DwReg
) ( XArray
* dst
, Int reg
);
449 extern void ML_(ppCfiExpr
)( const XArray
* src
, Int ix
);
451 /* ---------------- FPO INFO (Windows PE) -------------- */
453 /* for apps using Wine: MSVC++ PDB FramePointerOmitted: somewhat like
456 struct _FPO_DATA
{ /* 16 bytes */
457 UInt ulOffStart
; /* offset of 1st byte of function code */
458 UInt cbProcSize
; /* # bytes in function */
459 UInt cdwLocals
; /* # bytes/4 in locals */
460 UShort cdwParams
; /* # bytes/4 in params */
461 UChar cbProlog
; /* # bytes in prolog */
462 UChar cbRegs
:3; /* # regs saved */
463 UChar fHasSEH
:1; /* Structured Exception Handling */
464 UChar fUseBP
:1; /* EBP has been used */
466 UChar cbFrame
:2; /* frame type */
470 #define PDB_FRAME_FPO 0
471 #define PDB_FRAME_TRAP 1
472 #define PDB_FRAME_TSS 2
474 /* --------------------- VARIABLES --------------------- */
480 XArray
* /* of DiVariable */ vars
;
486 const HChar
* name
; /* in DebugInfo.strpool */
487 UWord typeR
; /* a cuOff */
488 const GExpr
* gexpr
; /* on DebugInfo.gexprs list */
489 const GExpr
* fbGX
; /* SHARED. */
490 UInt fndn_ix
; /* where declared; may be zero. index
491 in DebugInfo.fndnpool */
492 Int lineNo
; /* where declared; may be zero. */
497 ML_(cmp_for_DiAddrRange_range
) ( const void* keyV
, const void* elemV
);
499 /* --------------------- DEBUGINFO --------------------- */
501 /* This is the top-level data type. It's a structure which contains
502 information pertaining to one mapped ELF object. This type is
503 exported only abstractly - in pub_tool_debuginfo.h. */
505 /* First though, here's an auxiliary data structure. It is only ever
506 used as part of a struct _DebugInfo. We use it to record
507 observations about mappings and permission changes to the
508 associated file, so as to decide when to read debug info. It's
509 essentially an ultra-trivial finite state machine which, when it
510 reaches an accept state, signals that we should now read debug info
511 from the object into the associated struct _DebugInfo. The accept
512 state is arrived at when have_rx_map and have_rw_map both become
513 true. The initial state is one in which we have no observations,
514 so have_rx_map and have_rw_map are both false.
516 This all started as a rather ad-hoc solution, but was further
517 expanded to handle weird object layouts, e.g. more than one rw
518 or rx mapping for one binary.
520 The normal sequence of events is one of
522 start --> r-x mapping --> rw- mapping --> accept
523 start --> rw- mapping --> r-x mapping --> accept
525 that is, take the first r-x and rw- mapping we see, and we're done.
527 On MacOSX >= 10.7, 32-bit, there appears to be a new variant:
529 start --> r-- mapping --> rw- mapping
530 --> upgrade r-- mapping to r-x mapping --> accept
532 where the upgrade is done by a call to mach_vm_protect (OSX 10.7)
533 or kernelrpc_mach_vm_protect_trap (OSX 10.9 and possibly 10.8).
534 Hence we need to also track this possibility.
536 From perusal of dyld sources, it appears that this scheme could
537 also be used 64 bit libraries, although that doesn't seem to happen
538 in practice. dyld uses this scheme when the text section requires
539 relocation, which only appears to be the case for 32 bit objects.
544 Addr avma
; /* these fields record the file offset, length */
545 SizeT size
; /* and map address of each mapping */
547 Bool rx
, rw
, ro
; /* memory access flags for this mapping */
552 HChar
* filename
; /* in mallocville (VG_AR_DINFO) */
553 HChar
* dbgname
; /* in mallocville (VG_AR_DINFO) */
554 XArray
* maps
; /* XArray of DebugInfoMapping structs */
555 Bool have_rx_map
; /* did we see a r?x mapping yet for the file? */
556 Bool have_rw_map
; /* did we see a rw? mapping yet for the file? */
557 Bool have_ro_map
; /* did we see a r-- mapping yet for the file? */
561 /* To do with the string table in struct _DebugInfo (::strpool) */
562 #define SEGINFO_STRPOOLSIZE (64*1024)
565 /* We may encounter more than one .eh_frame section in an object --
566 unusual but apparently allowed by ELF. See
567 http://sourceware.org/bugzilla/show_bug.cgi?id=12675
569 #define N_EHFRAME_SECTS 2
572 /* So, the main structure for holding debug info for one object. */
578 struct _DebugInfo
* next
; /* list of DebugInfos */
579 Bool mark
; /* marked for deletion? */
581 /* An abstract handle, which can be used by entities outside of
582 m_debuginfo to (in an abstract datatype sense) refer to this
583 struct _DebugInfo. A .handle of zero is invalid; valid handles
584 are 1 and above. The same handle is never issued twice (in any
585 given run of Valgrind), so a handle becomes invalid when the
586 associated struct _DebugInfo is discarded, and remains invalid
587 forever thereafter. The .handle field is set as soon as this
588 structure is allocated. */
591 /* The range of epochs for which this DebugInfo is valid. These also
592 divide the DebugInfo's lifetime into three parts:
594 (1) Allocated: but with only .fsm holding useful info -- in
595 particular, not yet holding any debug info.
596 .first_epoch == DebugInfoEpoch_INVALID
597 .last_epoch == DebugInfoEpoch_INVALID
599 (2) Active: containing debug info, and current.
600 .first_epoch != DebugInfoEpoch_INVALID
601 .last_epoch == DebugInfoEpoch_INVALID
603 (3) Archived: containing debug info, but no longer current.
604 .first_epoch != DebugInfoEpoch_INVALID
605 .last_epoch != DebugInfoEpoch_INVALID
607 State (2) corresponds to an object which is currently mapped. When
608 the object is unmapped, what happens depends on the setting of
611 * when =no, the DebugInfo is removed from debugInfo_list and
614 * when =yes, the DebugInfo is retained in debugInfo_list, but its
615 .last_epoch field is filled in, and current_epoch is advanced. This
616 effectively moves the DebugInfo into state (3).
621 /* Used for debugging only - indicate what stuff to dump whilst
622 reading stuff into the seginfo. Are computed as early in the
623 lifetime of the DebugInfo as possible -- at the point when it is
624 created. Use these when deciding what to spew out; do not use
625 the global VG_(clo_blah) flags. */
627 Bool trace_symtab
; /* symbols, our style */
628 Bool trace_cfi
; /* dwarf frame unwind, our style */
629 Bool ddump_syms
; /* mimic /usr/bin/readelf --syms */
630 Bool ddump_line
; /* mimic /usr/bin/readelf --debug-dump=line */
631 Bool ddump_frames
; /* mimic /usr/bin/readelf --debug-dump=frames */
633 /* The "decide when it is time to read debuginfo" state machine.
634 This structure must get filled in before we can start reading
635 anything from the ELF/MachO file. This structure is filled in
636 by VG_(di_notify_mmap) and its immediate helpers. */
637 struct _DebugInfoFSM fsm
;
639 /* Once the ::fsm has reached an accept state -- typically, when
640 both a rw? and r?x mapping for .filename have been observed --
641 we can go on to read the symbol tables and debug info.
642 .have_dinfo changes from False to True when the debug info has
643 been completely read in and postprocessed (canonicalised) and is
644 now suitable for querying. */
645 /* If have_dinfo is False, then all fields below this point are
646 invalid and should not be consulted. */
647 Bool have_dinfo
; /* initially False */
649 /* All the rest of the fields in this structure are filled in once
650 we have committed to reading the symbols and debug info (that
651 is, at the point where .have_dinfo is set to True). */
653 /* The file's soname. */
656 /* Description of some important mapped segments. The presence or
657 absence of the mapping is denoted by the _present field, since
658 in some obscure circumstances (to do with data/sdata/bss) it is
659 possible for the mapping to be present but have zero size.
660 Certainly text_ is mandatory on all platforms; not sure about
663 --------------------------------------------------------
665 Comment_on_IMPORTANT_CFSI_REPRESENTATIONAL_INVARIANTS: we require that
667 either (size of all rx maps == 0 && cfsi == NULL) (the degenerate case)
669 or the normal case, which is the AND of the following:
670 (0) size of at least one rx mapping > 0
671 (1) no two non-archived DebugInfos with some rx mapping of size > 0
672 have overlapping rx mappings
673 (2) [cfsi_minavma,cfsi_maxavma] does not extend beyond
674 [avma,+size) of one rx mapping; that is, the former
675 is a subrange or equal to the latter.
676 (3) all DiCfSI in the cfsi array all have ranges that fall within
677 [avma,+size) of that rx mapping.
678 (4) all DiCfSI in the cfsi array are non-overlapping
680 The cumulative effect of these restrictions is to ensure that
681 all the DiCfSI records in the entire system are non overlapping.
682 Hence any address falls into either exactly one DiCfSI record,
683 or none. Hence it is safe to cache the results of searches for
684 DiCfSI records. This is the whole point of these restrictions.
685 The caching of DiCfSI searches is done in VG_(use_CF_info). The
686 cache is flushed after any change to debugInfo_list. DiCfSI
687 searches are cached because they are central to stack unwinding
690 Where are these invariants imposed and checked?
692 They are checked after a successful read of debuginfo into
693 a DebugInfo*, in check_CFSI_related_invariants.
695 (1) is not really imposed anywhere. We simply assume that the
696 kernel will not map the text segments from two different objects
697 into the same space. Sounds reasonable.
699 (2) follows from (4) and (3). It is ensured by canonicaliseCFI.
700 (3) is ensured by ML_(addDiCfSI).
701 (4) is ensured by canonicaliseCFI.
703 --------------------------------------------------------
705 Comment_on_DEBUG_SVMA_and_DEBUG_BIAS_fields:
707 The _debug_{svma,bias} fields were added as part of a fix to
708 #185816. The problem encompassed in that bug report was that it
709 wasn't correct to use apply the bias values deduced for a
710 primary object to its associated debuginfo object, because the
711 debuginfo object (or the primary) could have been prelinked to a
712 different SVMA. Hence debuginfo and primary objects need to
713 have their own biases.
715 ------ JRS: (referring to r9329): ------
716 Let me see if I understand the workings correctly. Initially
717 the _debug_ values are set to the same values as the "normal"
718 ones, as there's a bunch of bits of code like this (in
721 di->text_svma = svma;
723 di->text_bias = rx_bias;
724 di->text_debug_svma = svma;
725 di->text_debug_bias = rx_bias;
727 If a debuginfo object subsequently shows up then the
728 _debug_svma/bias are set for the debuginfo object. Result is
729 that if there's no debuginfo object then the values are the same
730 as the primary-object values, and if there is a debuginfo object
731 then they will (or at least may) be different.
733 Then when we need to actually bias something, we'll have to
734 decide whether to use the primary bias or the debuginfo bias.
735 And the strategy is to use the primary bias for ELF symbols but
736 the debuginfo bias for anything pulled out of Dwarf.
739 Correct - the debug_svma and bias values apply to any address
740 read from the debug data regardless of where that debug data is
741 stored and the other values are used for addresses from other
742 places (primarily the symbol table).
745 Ok; so this was my only area of concern. Are there any
746 corner-case scenarios where this wouldn't be right? It sounds
747 like we're assuming the ELF symbols come from the primary object
748 and, if there is a debug object, then all the Dwarf comes from
749 there. But what if (eg) both symbols and Dwarf come from the
750 debug object? Is that even possible or allowable?
753 You may have a point...
755 The current logic is to try and take any one set of data from
756 either the base object or the debug object. There are four sets
764 If we see the primary section for a given set in the base object
765 then we ignore all sections relating to that set in the debug
768 Now in principle if we saw a secondary section (like debug_line
769 say) in the base object, but not the main section (debug_info in
770 this case) then we would take debug_info from the debug object
771 but would use the debug_line from the base object unless we saw
772 a replacement copy in the debug object. That's probably unlikely
775 A bigger issue might be, as you say, the symbol table as we will
776 pick that up from the debug object if it isn't in the base. The
777 dynamic symbol table will always have to be in the base object
778 though so we will have to be careful when processing symbols to
779 know which table we are reading in that case.
781 What we probably need to do is tell read_elf_symtab which object
782 the symbols it is being asked to read came from.
784 (A followup patch to deal with this was committed in r9469).
792 Addr text_debug_svma
;
793 PtrdiffT text_debug_bias
;
800 Addr data_debug_svma
;
801 PtrdiffT data_debug_bias
;
808 Addr sdata_debug_svma
;
809 PtrdiffT sdata_debug_bias
;
815 PtrdiffT rodata_bias
;
816 Addr rodata_debug_svma
;
817 PtrdiffT rodata_debug_bias
;
825 PtrdiffT bss_debug_bias
;
832 Addr sbss_debug_svma
;
833 PtrdiffT sbss_debug_bias
;
834 /* .ARM.exidx -- sometimes present on arm32, containing unwind info. */
840 /* .ARM.extab -- sometimes present on arm32, containing unwind info. */
858 /* .opd -- needed on ppc64be-linux for finding symbols */
862 /* .ehframe -- needed on amd64-linux for stack unwinding. We might
863 see more than one, hence the arrays. */
864 UInt n_ehframe
; /* 0 .. N_EHFRAME_SECTS */
865 Addr ehframe_avma
[N_EHFRAME_SECTS
];
866 SizeT ehframe_size
[N_EHFRAME_SECTS
];
868 /* Sorted tables of stuff we snarfed from the file. This is the
869 eventual product of reading the debug info. All this stuff
870 lives in VG_AR_DINFO. */
872 /* An expandable array of symbols. */
876 /* Two expandable arrays, storing locations and their filename/dirname. */
878 UInt sizeof_fndn_ix
; /* Similar use as sizeof_cfsi_m_ix below. */
879 void* loctab_fndn_ix
; /* loctab[i] filename/dirname is identified by
880 loctab_fnindex_ix[i] (an index in di->fndnpool)
881 0 means filename/dirname unknown.
882 The void* is an UChar* or UShort* or UInt*
883 depending on sizeof_fndn_ix. */
886 /* An expandable array of inlined fn info.
887 maxinl_codesz is the biggest inlined piece of code
888 in inltab (i.e. the max of 'addr_hi - addr_lo'. */
894 /* A set of expandable arrays to store CFI summary info records.
895 The machine specific information (i.e. the DiCfSI_m struct)
896 are stored in cfsi_m_pool, as these are highly duplicated.
897 The DiCfSI_m are allocated in cfsi_m_pool and identified using
898 a (we hope) small integer : often one byte is enough, sometimes
901 cfsi_base contains the bases of the code address ranges.
902 cfsi_size is the size of the cfsi_base array.
903 The elements cfsi_base[0] till cfsi_base[cfsi_used-1] are used.
904 Following elements are not used (yet).
906 For each base in cfsi_base, an index into cfsi_m_pool is stored
907 in cfsi_m_ix array. The size of cfsi_m_ix is equal to
908 cfsi_size*sizeof_cfsi_m_ix. The used portion of cfsi_m_ix is
909 cfsi_m_ix[0] till cfsi_m_ix[(cfsi_used-1)*sizeof_cfsi_m_ix].
911 cfsi_base[i] gives the base address of a code range covered by
912 some CF Info. The corresponding CF Info is identified by an index
913 in cfsi_m_pool. The DiCfSI_m index in cfsi_m_pool corresponding to
914 cfsi_base[i] is given
915 by ((UChar*) cfsi_m_ix)[i] if sizeof_cfsi_m_ix == 1
916 by ((UShort*)cfsi_m_ix)[i] if sizeof_cfsi_m_ix == 2
917 by ((UInt*) cfsi_m_ix)[i] if sizeof_cfsi_m_ix == 4.
919 The end of the code range starting at cfsi_base[i] is given by
920 cfsi_base[i+1]-1 (or cfsi_maxavma for cfsi_base[cfsi_used-1]).
921 Some code ranges between cfsi_minavma and cfsi_maxavma might not
922 be covered by cfi information. Such not covered ranges are stored by
923 a base in cfsi_base and a corresponding 0 index in cfsi_m_ix.
925 A variable size representation has been chosen for the elements of
926 cfsi_m_ix as in many case, one byte is good enough. For big
927 objects, 2 bytes are needed. No object has yet been found where
928 4 bytes are needed (but the code is ready to handle this case).
929 Not covered ranges ('cfi holes') are stored explicitly in
930 cfsi_base/cfsi_m_ix as this is more memory efficient than storing
931 a length for each covered range : on x86 or amd64, we typically have
932 a hole every 8 covered ranges. On arm64, we have very few holes
933 (1 every 50 or 100 ranges).
935 The cfsi information is read and prepared in the cfsi_rd array.
936 Once all the information has been read, the cfsi_base and cfsi_m_ix
937 arrays will be filled in from cfsi_rd. cfsi_rd will then be freed.
938 This is all done by ML_(finish_CFSI_arrays).
940 Also includes summary address bounds, showing the min and max address
941 covered by any of the records, as an aid to fast searching. And, if the
942 records require any expression nodes, they are stored in
945 UInt sizeof_cfsi_m_ix
; /* size in byte of indexes stored in cfsi_m_ix. */
946 void* cfsi_m_ix
; /* Each index occupies sizeof_cfsi_m_ix bytes.
947 The void* is an UChar* or UShort* or UInt*
948 depending on sizeof_cfsi_m_ix. */
950 DiCfSI
* cfsi_rd
; /* Only used during reading, NULL once info is read. */
955 DedupPoolAlloc
*cfsi_m_pool
;
958 XArray
* cfsi_exprs
; /* XArray of CfiExpr */
960 /* Optimized code under Wine x86: MSVC++ PDB FramePointerOmitted
961 data. Non-expandable array, hence .size == .used. */
968 /* Pool of strings -- the string table. Pointers
969 into this are stable (the memory is not reallocated). */
970 DedupPoolAlloc
*strpool
;
972 /* Pool of FnDn -- filename and dirname.
973 Elements in the pool are allocated using VG_(allocFixedEltDedupPA). */
974 DedupPoolAlloc
*fndnpool
;
976 /* Variable scope information, as harvested from Dwarf3 files.
980 array of (array of PC address ranges and variables)
982 The outer array indexes over scopes, with Entry 0 containing
983 information on variables which exist for any value of the program
984 counter (PC) -- that is, the outermost scope. Entries 1, 2, 3,
985 etc contain information on increasinly deeply nested variables.
987 Each inner array is an array of (an address range, and a set
988 of variables that are in scope over that address range).
990 The address ranges may not overlap.
992 Since Entry 0 in the outer array holds information on variables
993 that exist for any value of the PC (that is, global vars), it
994 follows that Entry 0's inner array can only have one address
995 range pair, one that covers the entire address space.
997 XArray
* /* of OSet of DiAddrRange */varinfo
;
999 /* These are arrays of the relevant typed objects, held here
1000 partially for the purposes of visiting each object exactly once
1001 when we need to delete them. */
1003 /* An array of TyEnts. These are needed to make sense of any types
1004 in the .varinfo. Also, when deleting this DebugInfo, we must
1005 first traverse this array and throw away malloc'd stuff hanging
1006 off it -- by calling ML_(TyEnt__make_EMPTY) on each entry. */
1007 XArray
* /* of TyEnt */ admin_tyents
;
1009 /* An array of guarded DWARF3 expressions. */
1010 XArray
* admin_gexprs
;
1012 /* Cached last rx mapping matched and returned by ML_(find_rx_mapping).
1013 This helps performance a lot during ML_(addLineInfo) etc., which can
1014 easily be invoked hundreds of thousands of times. */
1015 DebugInfoMapping
* last_rx_map
;
1018 /* --------------------- functions --------------------- */
1020 /* ------ Adding ------ */
1022 /* Add a symbol to si's symbol table. The contents of 'sym' are
1023 copied. It is assumed (and checked) that 'sym' only contains one
1024 name, so there is no auxiliary ::sec_names vector to duplicate.
1025 IOW, the copy is a shallow copy, and there are assertions in place
1026 to ensure that's OK. */
1027 extern void ML_(addSym
) ( struct _DebugInfo
* di
, DiSym
* sym
);
1029 /* Add a filename/dirname pair to a DebugInfo and returns the index
1030 in the fndnpool fixed pool. */
1031 extern UInt
ML_(addFnDn
) (struct _DebugInfo
* di
,
1032 const HChar
* filename
,
1033 const HChar
* dirname
); /* NULL is allowable */
1035 /* Returns the filename of the fndn pair identified by fndn_ix.
1036 Returns "???" if fndn_ix is 0. */
1037 extern const HChar
* ML_(fndn_ix2filename
) (const DebugInfo
* di
,
1040 /* Returns the dirname of the fndn pair identified by fndn_ix.
1041 Returns "" if fndn_ix is 0 or fndn->dirname is NULL. */
1042 extern const HChar
* ML_(fndn_ix2dirname
) (const DebugInfo
* di
,
1045 /* Returns the fndn_ix for the LineInfo locno in di->loctab.
1046 0 if filename/dirname are unknown. */
1047 extern UInt
ML_(fndn_ix
) (const DebugInfo
* di
, Word locno
);
1049 /* Add a line-number record to a DebugInfo.
1050 fndn_ix is an index in di->fndnpool, allocated using ML_(addFnDn).
1051 Give a 0 index for a unknown filename/dirname pair. */
1053 void ML_(addLineInfo
) ( struct _DebugInfo
* di
,
1055 Addr
this, Addr next
, Int lineno
, Int entry
);
1057 /* Add a call inlined record to a DebugInfo.
1058 A call to the below means that inlinedfn code has been
1059 inlined, resulting in code from [addr_lo, addr_hi[.
1060 Note that addr_hi is excluded, i.e. is not part of the inlined code.
1061 fndn_ix and lineno identifies the location of the call that caused
1063 fndn_ix is an index in di->fndnpool, allocated using ML_(addFnDn).
1064 Give a 0 index for an unknown filename/dirname pair.
1065 In case of nested inlining, a small level indicates the call
1066 is closer to main that a call with a higher level. */
1068 void ML_(addInlInfo
) ( struct _DebugInfo
* di
,
1069 Addr addr_lo
, Addr addr_hi
,
1070 const HChar
* inlinedfn
,
1072 Int lineno
, UShort level
);
1074 /* Add a CFI summary record. The supplied DiCfSI_m is copied. */
1075 extern void ML_(addDiCfSI
) ( struct _DebugInfo
* di
,
1076 Addr base
, UInt len
, DiCfSI_m
* cfsi_m
);
1078 /* Given a position in the di->cfsi_base/cfsi_m_ix arrays, return
1079 the corresponding cfsi_m*. Return NULL if the position corresponds
1081 DiCfSI_m
* ML_(get_cfsi_m
) (const DebugInfo
* di
, UInt pos
);
1083 /* Add a string to the string table of a DebugInfo. If len==-1,
1084 ML_(addStr) will itself measure the length of the string. */
1085 extern const HChar
* ML_(addStr
) ( DebugInfo
* di
, const HChar
* str
, Int len
);
1087 /* Add a string to the string table of a DebugInfo, by copying the
1088 string from the given DiCursor. Measures the length of the string
1090 extern const HChar
* ML_(addStrFromCursor
)( DebugInfo
* di
, DiCursor c
);
1092 extern void ML_(addVar
)( struct _DebugInfo
* di
,
1097 UWord typeR
, /* a cuOff */
1099 const GExpr
* fbGX
, /* SHARED. */
1100 UInt fndn_ix
, /* where decl'd - may be zero */
1101 Int lineNo
, /* where decl'd - may be zero */
1103 /* Note: fndn_ix identifies a filename/dirname pair similarly to
1104 ML_(addInlInfo) and ML_(addLineInfo). */
1106 /* Canonicalise the tables held by 'di', in preparation for use. Call
1107 this after finishing adding entries to these tables. */
1108 extern void ML_(canonicaliseTables
) ( struct _DebugInfo
* di
);
1110 /* Canonicalise the call-frame-info table held by 'di', in preparation
1111 for use. This is called by ML_(canonicaliseTables) but can also be
1112 called on it's own to sort just this table. */
1113 extern void ML_(canonicaliseCFI
) ( struct _DebugInfo
* di
);
1115 /* ML_(finish_CFSI_arrays) fills in the cfsi_base and cfsi_m_ix arrays
1116 from cfsi_rd array. cfsi_rd is then freed. */
1117 extern void ML_(finish_CFSI_arrays
) ( struct _DebugInfo
* di
);
1119 /* ------ Searching ------ */
1121 /* Find a symbol-table index containing the specified pointer, or -1
1122 if not found. Binary search. */
1123 extern Word
ML_(search_one_symtab
) ( const DebugInfo
* di
, Addr ptr
,
1126 /* Find a location-table index containing the specified pointer, or -1
1127 if not found. Binary search. */
1128 extern Word
ML_(search_one_loctab
) ( const DebugInfo
* di
, Addr ptr
);
1130 /* Find a CFI-table index containing the specified pointer, or -1 if
1131 not found. Binary search. */
1132 extern Word
ML_(search_one_cfitab
) ( const DebugInfo
* di
, Addr ptr
);
1134 /* Find a FPO-table index containing the specified pointer, or -1
1135 if not found. Binary search. */
1136 extern Word
ML_(search_one_fpotab
) ( const DebugInfo
* di
, Addr ptr
);
1138 /* Helper function for the most often needed searching for an rx
1139 mapping containing the specified address range. The range must
1140 fall entirely within the mapping to be considered to be within it.
1141 Asserts if lo > hi; caller must ensure this doesn't happen. */
1142 extern DebugInfoMapping
* ML_(find_rx_mapping
) ( DebugInfo
* di
,
1145 /* ------ Misc ------ */
1147 /* Show a non-fatal debug info reading error. Use VG_(core_panic) for
1148 fatal errors. 'serious' errors are always shown, not 'serious' ones
1149 are shown only at verbosity level 2 and above. */
1151 void ML_(symerr
) ( const DebugInfo
* di
, Bool serious
, const HChar
* msg
);
1153 /* Print a symbol. */
1154 extern void ML_(ppSym
) ( Int idx
, const DiSym
* sym
);
1156 /* Print a call-frame-info summary. */
1157 extern void ML_(ppDiCfSI
) ( const XArray
* /* of CfiExpr */ exprs
,
1158 Addr base
, UInt len
,
1159 const DiCfSI_m
* si_m
);
1162 #define TRACE_SYMTAB_ENABLED (di->trace_symtab)
1163 #define TRACE_SYMTAB(format, args...) \
1164 if (TRACE_SYMTAB_ENABLED) { VG_(printf)(format, ## args); }
1167 #endif /* ndef __PRIV_STORAGE_H */
1169 /*--------------------------------------------------------------------*/
1171 /*--------------------------------------------------------------------*/