Small comment fix for the UInt* cfsi_m index : 4 instead of 3
[valgrind.git] / coregrind / m_debuginfo / priv_storage.h
blob5740442876eaa96ed766a9b4351599398a9ae8cc
2 /*--------------------------------------------------------------------*/
3 /*--- Format-neutral storage of and querying of info acquired from ---*/
4 /*--- ELF/XCOFF stabs/dwarf1/dwarf2 debug info. ---*/
5 /*--- priv_storage.h ---*/
6 /*--------------------------------------------------------------------*/
8 /*
9 This file is part of Valgrind, a dynamic binary instrumentation
10 framework.
12 Copyright (C) 2000-2013 Julian Seward
13 jseward@acm.org
15 This program is free software; you can redistribute it and/or
16 modify it under the terms of the GNU General Public License as
17 published by the Free Software Foundation; either version 2 of the
18 License, or (at your option) any later version.
20 This program is distributed in the hope that it will be useful, but
21 WITHOUT ANY WARRANTY; without even the implied warranty of
22 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
23 General Public License for more details.
25 You should have received a copy of the GNU General Public License
26 along with this program; if not, write to the Free Software
27 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
28 02111-1307, USA.
30 The GNU General Public License is contained in the file COPYING.
33 Stabs reader greatly improved by Nick Nethercote, Apr 02.
34 This module was also extensively hacked on by Jeremy Fitzhardinge
35 and Tom Hughes.
37 /* See comment at top of debuginfo.c for explanation of
38 the _svma / _avma / _image / _bias naming scheme.
40 /* Note this is not freestanding; needs pub_core_xarray.h and
41 priv_tytypes.h to be included before it. */
43 #ifndef __PRIV_STORAGE_H
44 #define __PRIV_STORAGE_H
46 #include "pub_core_basics.h" // Addr
47 #include "pub_core_xarray.h" // XArray
48 #include "pub_core_deduppoolalloc.h" // DedupPoolAlloc
49 #include "priv_d3basics.h" // GExpr et al.
50 #include "priv_image.h" // DiCursor
52 /* --------------------- SYMBOLS --------------------- */
54 /* A structure to hold an ELF/MachO symbol (very crudely). Usually
55 the symbol only has one name, which is stored in ::pri_name, and
56 ::sec_names is NULL. If there are other names, these are stored in
57 ::sec_names, which is a NULL terminated vector holding the names.
58 The vector is allocated in VG_AR_DINFO, the names themselves live
59 in DebugInfo::strpool.
61 From the point of view of ELF, the primary vs secondary distinction
62 is artificial: they are all just names associated with the address,
63 none of which has higher precedence than any other. However, from
64 the point of view of mapping an address to a name to display to the
65 user, we need to choose one "preferred" name, and so that might as
66 well be installed as the pri_name, whilst all others can live in
67 sec_names[]. This has the convenient side effect that, in the
68 common case where there is only one name for the address,
69 sec_names[] does not need to be allocated.
71 typedef
72 struct {
73 Addr addr; /* lowest address of entity */
74 Addr tocptr; /* ppc64-linux only: value that R2 should have */
75 HChar* pri_name; /* primary name, never NULL */
76 HChar** sec_names; /* NULL, or a NULL term'd array of other names */
77 // XXX: this could be shrunk (on 32-bit platforms) by using 30
78 // bits for the size and 1 bit each for isText and isIFunc. If you
79 // do this, make sure that all assignments to the latter two use
80 // 0 or 1 (or True or False), and that a positive number larger
81 // than 1 is never used to represent True.
82 UInt size; /* size in bytes */
83 Bool isText;
84 Bool isIFunc; /* symbol is an indirect function? */
86 DiSym;
88 /* --------------------- SRCLOCS --------------------- */
90 /* Line count at which overflow happens, due to line numbers being
91 stored as shorts in `struct nlist' in a.out.h. */
92 #define LINENO_OVERFLOW (1 << (sizeof(short) * 8))
94 #define LINENO_BITS 20
95 #define LOC_SIZE_BITS (32 - LINENO_BITS)
96 #define MAX_LINENO ((1 << LINENO_BITS) - 1)
98 /* Unlikely to have any lines with instruction ranges > 4096 bytes */
99 #define MAX_LOC_SIZE ((1 << LOC_SIZE_BITS) - 1)
101 /* Number used to detect line number overflows; if one line is
102 60000-odd smaller than the previous, it was probably an overflow.
104 #define OVERFLOW_DIFFERENCE (LINENO_OVERFLOW - 5000)
106 /* A structure to hold addr-to-source info for a single line. There
107 can be a lot of these, hence the dense packing. */
108 typedef
109 struct {
110 /* Word 1 */
111 Addr addr; /* lowest address for this line */
112 /* Word 2 */
113 UShort size:LOC_SIZE_BITS; /* # bytes; we catch overflows of this */
114 UInt lineno:LINENO_BITS; /* source line number, or zero */
115 /* Word 3 */
116 const HChar* filename; /* source filename */
117 /* Word 4 */
118 const HChar* dirname; /* source directory name */
120 DiLoc;
122 #define LEVEL_BITS (32 - LINENO_BITS)
123 #define MAX_LEVEL ((1 << LEVEL_BITS) - 1)
125 /* A structure to hold addr-to-inlined fn info. There
126 can be a lot of these, hence the dense packing. */
127 typedef
128 struct {
129 /* Word 1 */
130 Addr addr_lo; /* lowest address for inlined fn */
131 /* Word 2 */
132 Addr addr_hi; /* highest address following the inlined fn */
133 /* Word 3 */
134 const HChar* inlinedfn; /* inlined function name */
135 /* Word 4 */
136 const HChar* filename; /* caller source filename */
137 /* Word 5 */
138 const HChar* dirname; /* caller source directory name */
139 /* Word 6 */
140 UInt lineno:LINENO_BITS; /* caller line number */
141 UShort level:LEVEL_BITS; /* level of inlining */
143 DiInlLoc;
145 /* --------------------- CF INFO --------------------- */
147 /* DiCfSI: a structure to summarise DWARF2/3 CFA info for the code
148 address range [base .. base+len-1].
150 On x86 and amd64 ("IA"), if you know ({e,r}sp, {e,r}bp, {e,r}ip) at
151 some point and {e,r}ip is in the range [base .. base+len-1], it
152 tells you how to calculate ({e,r}sp, {e,r}bp) for the caller of the
153 current frame and also ra, the return address of the current frame.
155 First off, calculate CFA, the Canonical Frame Address, thusly:
157 cfa = case cfa_how of
158 CFIC_IA_SPREL -> {e,r}sp + cfa_off
159 CFIC_IA_BPREL -> {e,r}bp + cfa_off
160 CFIC_EXPR -> expr whose index is in cfa_off
162 Once that is done, the previous frame's {e,r}sp/{e,r}bp values and
163 this frame's {e,r}ra value can be calculated like this:
165 old_{e,r}sp/{e,r}bp/ra
166 = case {e,r}sp/{e,r}bp/ra_how of
167 CFIR_UNKNOWN -> we don't know, sorry
168 CFIR_SAME -> same as it was before (sp/fp only)
169 CFIR_CFAREL -> cfa + sp/bp/ra_off
170 CFIR_MEMCFAREL -> *( cfa + sp/bp/ra_off )
171 CFIR_EXPR -> expr whose index is in sp/bp/ra_off
173 On ARM it's pretty much the same, except we have more registers to
174 keep track of:
176 cfa = case cfa_how of
177 CFIC_ARM_R13REL -> r13 + cfa_off
178 CFIC_ARM_R12REL -> r12 + cfa_off
179 CFIC_ARM_R11REL -> r11 + cfa_off
180 CFIC_ARM_R7REL -> r7 + cfa_off
181 CFIR_EXPR -> expr whose index is in cfa_off
183 old_r14/r13/r12/r11/r7/ra
184 = case r14/r13/r12/r11/r7/ra_how of
185 CFIR_UNKNOWN -> we don't know, sorry
186 CFIR_SAME -> same as it was before (r14/r13/r12/r11/r7 only)
187 CFIR_CFAREL -> cfa + r14/r13/r12/r11/r7/ra_off
188 CFIR_MEMCFAREL -> *( cfa + r14/r13/r12/r11/r7/ra_off )
189 CFIR_EXPR -> expr whose index is in r14/r13/r12/r11/r7/ra_off
191 On ARM64:
193 cfa = case cfa_how of
194 CFIC_ARM64_SPREL -> sp + cfa_off
195 CFIC_ARM64_X29REL -> x29 + cfa_off
196 CFIC_EXPR -> expr whose index is in cfa_off
198 old_sp/x30/x29/ra
199 = case sp/x30/x29/ra_how of
200 CFIR_UNKNOWN -> we don't know, sorry
201 CFIR_SAME -> same as it was before
202 CFIR_CFAREL -> cfa + sp/x30/x29/ra_how
203 CFIR_MEMCFAREL -> *( cfa + sp/x30/x29/ra_how )
204 CFIR_EXPR -> expr whose index is in sp/x30/x29/ra_off
206 On s390x we have a similar logic as x86 or amd64. We need the stack pointer
207 (r15), the frame pointer r11 (like BP) and together with the instruction
208 address in the PSW we can calculate the previous values:
209 cfa = case cfa_how of
210 CFIC_IA_SPREL -> r15 + cfa_off
211 CFIC_IA_BPREL -> r11 + cfa_off
212 CFIC_EXPR -> expr whose index is in cfa_off
214 old_sp/fp/ra
215 = case sp/fp/ra_how of
216 CFIR_UNKNOWN -> we don't know, sorry
217 CFIR_SAME -> same as it was before (sp/fp only)
218 CFIR_CFAREL -> cfa + sp/fp/ra_off
219 CFIR_MEMCFAREL -> *( cfa + sp/fp/ra_off )
220 CFIR_EXPR -> expr whose index is in sp/fp/ra_off
223 #define CFIC_IA_SPREL ((UChar)1)
224 #define CFIC_IA_BPREL ((UChar)2)
225 #define CFIC_ARM_R13REL ((UChar)3)
226 #define CFIC_ARM_R12REL ((UChar)4)
227 #define CFIC_ARM_R11REL ((UChar)5)
228 #define CFIC_ARM_R7REL ((UChar)6)
229 #define CFIC_ARM64_SPREL ((UChar)7)
230 #define CFIC_ARM64_X29REL ((UChar)8)
231 #define CFIC_EXPR ((UChar)9) /* all targets */
233 #define CFIR_UNKNOWN ((UChar)64)
234 #define CFIR_SAME ((UChar)65)
235 #define CFIR_CFAREL ((UChar)66)
236 #define CFIR_MEMCFAREL ((UChar)67)
237 #define CFIR_EXPR ((UChar)68)
239 /* Definition of the DiCfSI_m DiCfSI machine dependent part.
240 These are highly duplicated, and are stored in a pool. */
241 #if defined(VGA_x86) || defined(VGA_amd64)
242 typedef
243 struct {
244 UChar cfa_how; /* a CFIC_IA value */
245 UChar ra_how; /* a CFIR_ value */
246 UChar sp_how; /* a CFIR_ value */
247 UChar bp_how; /* a CFIR_ value */
248 Int cfa_off;
249 Int ra_off;
250 Int sp_off;
251 Int bp_off;
253 DiCfSI_m;
254 #elif defined(VGA_arm)
255 typedef
256 struct {
257 UChar cfa_how; /* a CFIC_ value */
258 UChar ra_how; /* a CFIR_ value */
259 UChar r14_how; /* a CFIR_ value */
260 UChar r13_how; /* a CFIR_ value */
261 UChar r12_how; /* a CFIR_ value */
262 UChar r11_how; /* a CFIR_ value */
263 UChar r7_how; /* a CFIR_ value */
264 Int cfa_off;
265 Int ra_off;
266 Int r14_off;
267 Int r13_off;
268 Int r12_off;
269 Int r11_off;
270 Int r7_off;
272 DiCfSI_m;
273 #elif defined(VGA_arm64)
274 typedef
275 struct {
276 UChar cfa_how; /* a CFIC_ value */
277 UChar ra_how; /* a CFIR_ value */
278 UChar sp_how; /* a CFIR_ value */ /*dw31=SP*/
279 UChar x30_how; /* a CFIR_ value */ /*dw30=LR*/
280 UChar x29_how; /* a CFIR_ value */ /*dw29=FP*/
281 Int cfa_off;
282 Int ra_off;
283 Int sp_off;
284 Int x30_off;
285 Int x29_off;
287 DiCfSI_m;
288 #elif defined(VGA_ppc32) || defined(VGA_ppc64)
289 /* Just have a struct with the common fields in, so that code that
290 processes the common fields doesn't have to be ifdef'd against
291 VGP_/VGA_ symbols. These are not used in any way on ppc32/64-linux
292 at the moment. */
293 typedef
294 struct {
295 UChar cfa_how; /* a CFIC_ value */
296 UChar ra_how; /* a CFIR_ value */
297 Int cfa_off;
298 Int ra_off;
300 DiCfSI_m;
301 #elif defined(VGA_s390x)
302 typedef
303 struct {
304 UChar cfa_how; /* a CFIC_ value */
305 UChar sp_how; /* a CFIR_ value */
306 UChar ra_how; /* a CFIR_ value */
307 UChar fp_how; /* a CFIR_ value */
308 Int cfa_off;
309 Int sp_off;
310 Int ra_off;
311 Int fp_off;
313 DiCfSI_m;
314 #elif defined(VGA_mips32) || defined(VGA_mips64)
315 typedef
316 struct {
317 UChar cfa_how; /* a CFIC_ value */
318 UChar ra_how; /* a CFIR_ value */
319 UChar sp_how; /* a CFIR_ value */
320 UChar fp_how; /* a CFIR_ value */
321 Int cfa_off;
322 Int ra_off;
323 Int sp_off;
324 Int fp_off;
326 DiCfSI_m;
327 #else
328 # error "Unknown arch"
329 #endif
331 typedef
332 struct {
333 Addr base;
334 UInt len;
335 UInt cfsi_m_ix;
337 DiCfSI;
339 typedef
340 enum {
341 Cunop_Abs=0x231,
342 Cunop_Neg,
343 Cunop_Not
345 CfiUnop;
347 typedef
348 enum {
349 Cbinop_Add=0x321,
350 Cbinop_Sub,
351 Cbinop_And,
352 Cbinop_Mul,
353 Cbinop_Shl,
354 Cbinop_Shr,
355 Cbinop_Eq,
356 Cbinop_Ge,
357 Cbinop_Gt,
358 Cbinop_Le,
359 Cbinop_Lt,
360 Cbinop_Ne
362 CfiBinop;
364 typedef
365 enum {
366 Creg_IA_SP=0x213,
367 Creg_IA_BP,
368 Creg_IA_IP,
369 Creg_ARM_R13,
370 Creg_ARM_R12,
371 Creg_ARM_R15,
372 Creg_ARM_R14,
373 Creg_ARM64_X30,
374 Creg_S390_R14,
375 Creg_MIPS_RA
377 CfiReg;
379 typedef
380 enum {
381 Cex_Undef=0x123,
382 Cex_Deref,
383 Cex_Const,
384 Cex_Unop,
385 Cex_Binop,
386 Cex_CfiReg,
387 Cex_DwReg
389 CfiExprTag;
391 typedef
392 struct {
393 CfiExprTag tag;
394 union {
395 struct {
396 } Undef;
397 struct {
398 Int ixAddr;
399 } Deref;
400 struct {
401 UWord con;
402 } Const;
403 struct {
404 CfiUnop op;
405 Int ix;
406 } Unop;
407 struct {
408 CfiBinop op;
409 Int ixL;
410 Int ixR;
411 } Binop;
412 struct {
413 CfiReg reg;
414 } CfiReg;
415 struct {
416 Int reg;
417 } DwReg;
419 Cex;
421 CfiExpr;
423 extern Int ML_(CfiExpr_Undef) ( XArray* dst );
424 extern Int ML_(CfiExpr_Deref) ( XArray* dst, Int ixAddr );
425 extern Int ML_(CfiExpr_Const) ( XArray* dst, UWord con );
426 extern Int ML_(CfiExpr_Unop) ( XArray* dst, CfiUnop op, Int ix );
427 extern Int ML_(CfiExpr_Binop) ( XArray* dst, CfiBinop op, Int ixL, Int ixR );
428 extern Int ML_(CfiExpr_CfiReg)( XArray* dst, CfiReg reg );
429 extern Int ML_(CfiExpr_DwReg) ( XArray* dst, Int reg );
431 extern void ML_(ppCfiExpr)( XArray* src, Int ix );
433 /* ---------------- FPO INFO (Windows PE) -------------- */
435 /* for apps using Wine: MSVC++ PDB FramePointerOmitted: somewhat like
436 a primitive CFI */
437 typedef
438 struct _FPO_DATA { /* 16 bytes */
439 UInt ulOffStart; /* offset of 1st byte of function code */
440 UInt cbProcSize; /* # bytes in function */
441 UInt cdwLocals; /* # bytes/4 in locals */
442 UShort cdwParams; /* # bytes/4 in params */
443 UChar cbProlog; /* # bytes in prolog */
444 UChar cbRegs :3; /* # regs saved */
445 UChar fHasSEH:1; /* Structured Exception Handling */
446 UChar fUseBP :1; /* EBP has been used */
447 UChar reserved:1;
448 UChar cbFrame:2; /* frame type */
450 FPO_DATA;
452 #define PDB_FRAME_FPO 0
453 #define PDB_FRAME_TRAP 1
454 #define PDB_FRAME_TSS 2
456 /* --------------------- VARIABLES --------------------- */
458 typedef
459 struct {
460 Addr aMin;
461 Addr aMax;
462 XArray* /* of DiVariable */ vars;
464 DiAddrRange;
466 typedef
467 struct {
468 HChar* name; /* in DebugInfo.strpool */
469 UWord typeR; /* a cuOff */
470 GExpr* gexpr; /* on DebugInfo.gexprs list */
471 GExpr* fbGX; /* SHARED. */
472 HChar* fileName; /* where declared; may be NULL. in
473 DebugInfo.strpool */
474 Int lineNo; /* where declared; may be zero. */
476 DiVariable;
478 Word
479 ML_(cmp_for_DiAddrRange_range) ( const void* keyV, const void* elemV );
481 /* --------------------- DEBUGINFO --------------------- */
483 /* This is the top-level data type. It's a structure which contains
484 information pertaining to one mapped ELF object. This type is
485 exported only abstractly - in pub_tool_debuginfo.h. */
487 /* First though, here's an auxiliary data structure. It is only ever
488 used as part of a struct _DebugInfo. We use it to record
489 observations about mappings and permission changes to the
490 associated file, so as to decide when to read debug info. It's
491 essentially an ultra-trivial finite state machine which, when it
492 reaches an accept state, signals that we should now read debug info
493 from the object into the associated struct _DebugInfo. The accept
494 state is arrived at when have_rx_map and have_rw_map both become
495 true. The initial state is one in which we have no observations,
496 so have_rx_map and have_rw_map are both false.
498 This all started as a rather ad-hoc solution, but was further
499 expanded to handle weird object layouts, e.g. more than one rw
500 or rx mapping for one binary.
502 The normal sequence of events is one of
504 start --> r-x mapping --> rw- mapping --> accept
505 start --> rw- mapping --> r-x mapping --> accept
507 that is, take the first r-x and rw- mapping we see, and we're done.
509 On MacOSX 10.7, 32-bit, there appears to be a new variant:
511 start --> r-- mapping --> rw- mapping
512 --> upgrade r-- mapping to r-x mapping --> accept
514 where the upgrade is done by a call to vm_protect. Hence we
515 need to also track this possibility.
518 struct _DebugInfoMapping
520 Addr avma; /* these fields record the file offset, length */
521 SizeT size; /* and map address of each mapping */
522 OffT foff;
523 Bool rx, rw, ro; /* memory access flags for this mapping */
526 struct _DebugInfoFSM
528 HChar* filename; /* in mallocville (VG_AR_DINFO) */
529 XArray* maps; /* XArray of _DebugInfoMapping structs */
530 Bool have_rx_map; /* did we see a r?x mapping yet for the file? */
531 Bool have_rw_map; /* did we see a rw? mapping yet for the file? */
532 Bool have_ro_map; /* did we see a r-- mapping yet for the file? */
536 /* To do with the string table in struct _DebugInfo (::strpool) */
537 #define SEGINFO_STRPOOLSIZE (64*1024)
540 /* We may encounter more than one .eh_frame section in an object --
541 unusual but apparently allowed by ELF. See
542 http://sourceware.org/bugzilla/show_bug.cgi?id=12675
544 #define N_EHFRAME_SECTS 2
547 /* So, the main structure for holding debug info for one object. */
549 struct _DebugInfo {
551 /* Admin stuff */
553 struct _DebugInfo* next; /* list of DebugInfos */
554 Bool mark; /* marked for deletion? */
556 /* An abstract handle, which can be used by entities outside of
557 m_debuginfo to (in an abstract datatype sense) refer to this
558 struct _DebugInfo. A .handle of zero is invalid; valid handles
559 are 1 and above. The same handle is never issued twice (in any
560 given run of Valgrind), so a handle becomes invalid when the
561 associated struct _DebugInfo is discarded, and remains invalid
562 forever thereafter. The .handle field is set as soon as this
563 structure is allocated. */
564 ULong handle;
566 /* Used for debugging only - indicate what stuff to dump whilst
567 reading stuff into the seginfo. Are computed as early in the
568 lifetime of the DebugInfo as possible -- at the point when it is
569 created. Use these when deciding what to spew out; do not use
570 the global VG_(clo_blah) flags. */
572 Bool trace_symtab; /* symbols, our style */
573 Bool trace_cfi; /* dwarf frame unwind, our style */
574 Bool ddump_syms; /* mimic /usr/bin/readelf --syms */
575 Bool ddump_line; /* mimic /usr/bin/readelf --debug-dump=line */
576 Bool ddump_frames; /* mimic /usr/bin/readelf --debug-dump=frames */
578 /* The "decide when it is time to read debuginfo" state machine.
579 This structure must get filled in before we can start reading
580 anything from the ELF/MachO file. This structure is filled in
581 by VG_(di_notify_mmap) and its immediate helpers. */
582 struct _DebugInfoFSM fsm;
584 /* Once the ::fsm has reached an accept state -- typically, when
585 both a rw? and r?x mapping for .filename have been observed --
586 we can go on to read the symbol tables and debug info.
587 .have_dinfo changes from False to True when the debug info has
588 been completely read in and postprocessed (canonicalised) and is
589 now suitable for querying. */
590 /* If have_dinfo is False, then all fields below this point are
591 invalid and should not be consulted. */
592 Bool have_dinfo; /* initially False */
594 /* All the rest of the fields in this structure are filled in once
595 we have committed to reading the symbols and debug info (that
596 is, at the point where .have_dinfo is set to True). */
598 /* The file's soname. */
599 HChar* soname;
601 /* Description of some important mapped segments. The presence or
602 absence of the mapping is denoted by the _present field, since
603 in some obscure circumstances (to do with data/sdata/bss) it is
604 possible for the mapping to be present but have zero size.
605 Certainly text_ is mandatory on all platforms; not sure about
606 the rest though.
608 --------------------------------------------------------
610 Comment_on_IMPORTANT_CFSI_REPRESENTATIONAL_INVARIANTS: we require that
612 either (size of all rx maps == 0 && cfsi == NULL) (the degenerate case)
614 or the normal case, which is the AND of the following:
615 (0) size of at least one rx mapping > 0
616 (1) no two DebugInfos with some rx mapping of size > 0
617 have overlapping rx mappings
618 (2) [cfsi_minavma,cfsi_maxavma] does not extend beyond
619 [avma,+size) of one rx mapping; that is, the former
620 is a subrange or equal to the latter.
621 (3) all DiCfSI in the cfsi array all have ranges that fall within
622 [avma,+size) of that rx mapping.
623 (4) all DiCfSI in the cfsi array are non-overlapping
625 The cumulative effect of these restrictions is to ensure that
626 all the DiCfSI records in the entire system are non overlapping.
627 Hence any address falls into either exactly one DiCfSI record,
628 or none. Hence it is safe to cache the results of searches for
629 DiCfSI records. This is the whole point of these restrictions.
630 The caching of DiCfSI searches is done in VG_(use_CF_info). The
631 cache is flushed after any change to debugInfo_list. DiCfSI
632 searches are cached because they are central to stack unwinding
633 on amd64-linux.
635 Where are these invariants imposed and checked?
637 They are checked after a successful read of debuginfo into
638 a DebugInfo*, in check_CFSI_related_invariants.
640 (1) is not really imposed anywhere. We simply assume that the
641 kernel will not map the text segments from two different objects
642 into the same space. Sounds reasonable.
644 (2) follows from (4) and (3). It is ensured by canonicaliseCFI.
645 (3) is ensured by ML_(addDiCfSI).
646 (4) is ensured by canonicaliseCFI.
648 --------------------------------------------------------
650 Comment_on_DEBUG_SVMA_and_DEBUG_BIAS_fields:
652 The _debug_{svma,bias} fields were added as part of a fix to
653 #185816. The problem encompassed in that bug report was that it
654 wasn't correct to use apply the bias values deduced for a
655 primary object to its associated debuginfo object, because the
656 debuginfo object (or the primary) could have been prelinked to a
657 different SVMA. Hence debuginfo and primary objects need to
658 have their own biases.
660 ------ JRS: (referring to r9329): ------
661 Let me see if I understand the workings correctly. Initially
662 the _debug_ values are set to the same values as the "normal"
663 ones, as there's a bunch of bits of code like this (in
664 readelf.c)
666 di->text_svma = svma;
668 di->text_bias = rx_bias;
669 di->text_debug_svma = svma;
670 di->text_debug_bias = rx_bias;
672 If a debuginfo object subsequently shows up then the
673 _debug_svma/bias are set for the debuginfo object. Result is
674 that if there's no debuginfo object then the values are the same
675 as the primary-object values, and if there is a debuginfo object
676 then they will (or at least may) be different.
678 Then when we need to actually bias something, we'll have to
679 decide whether to use the primary bias or the debuginfo bias.
680 And the strategy is to use the primary bias for ELF symbols but
681 the debuginfo bias for anything pulled out of Dwarf.
683 ------ THH: ------
684 Correct - the debug_svma and bias values apply to any address
685 read from the debug data regardless of where that debug data is
686 stored and the other values are used for addresses from other
687 places (primarily the symbol table).
689 ------ JRS: ------
690 Ok; so this was my only area of concern. Are there any
691 corner-case scenarios where this wouldn't be right? It sounds
692 like we're assuming the ELF symbols come from the primary object
693 and, if there is a debug object, then all the Dwarf comes from
694 there. But what if (eg) both symbols and Dwarf come from the
695 debug object? Is that even possible or allowable?
697 ------ THH: ------
698 You may have a point...
700 The current logic is to try and take any one set of data from
701 either the base object or the debug object. There are four sets
702 of data we consider:
704 - Symbol Table
705 - Stabs
706 - DWARF1
707 - DWARF2
709 If we see the primary section for a given set in the base object
710 then we ignore all sections relating to that set in the debug
711 object.
713 Now in principle if we saw a secondary section (like debug_line
714 say) in the base object, but not the main section (debug_info in
715 this case) then we would take debug_info from the debug object
716 but would use the debug_line from the base object unless we saw
717 a replacement copy in the debug object. That's probably unlikely
718 however.
720 A bigger issue might be, as you say, the symbol table as we will
721 pick that up from the debug object if it isn't in the base. The
722 dynamic symbol table will always have to be in the base object
723 though so we will have to be careful when processing symbols to
724 know which table we are reading in that case.
726 What we probably need to do is tell read_elf_symtab which object
727 the symbols it is being asked to read came from.
729 (A followup patch to deal with this was committed in r9469).
731 /* .text */
732 Bool text_present;
733 Addr text_avma;
734 Addr text_svma;
735 SizeT text_size;
736 PtrdiffT text_bias;
737 Addr text_debug_svma;
738 PtrdiffT text_debug_bias;
739 /* .data */
740 Bool data_present;
741 Addr data_svma;
742 Addr data_avma;
743 SizeT data_size;
744 PtrdiffT data_bias;
745 Addr data_debug_svma;
746 PtrdiffT data_debug_bias;
747 /* .sdata */
748 Bool sdata_present;
749 Addr sdata_svma;
750 Addr sdata_avma;
751 SizeT sdata_size;
752 PtrdiffT sdata_bias;
753 Addr sdata_debug_svma;
754 PtrdiffT sdata_debug_bias;
755 /* .rodata */
756 Bool rodata_present;
757 Addr rodata_svma;
758 Addr rodata_avma;
759 SizeT rodata_size;
760 PtrdiffT rodata_bias;
761 Addr rodata_debug_svma;
762 PtrdiffT rodata_debug_bias;
763 /* .bss */
764 Bool bss_present;
765 Addr bss_svma;
766 Addr bss_avma;
767 SizeT bss_size;
768 PtrdiffT bss_bias;
769 Addr bss_debug_svma;
770 PtrdiffT bss_debug_bias;
771 /* .sbss */
772 Bool sbss_present;
773 Addr sbss_svma;
774 Addr sbss_avma;
775 SizeT sbss_size;
776 PtrdiffT sbss_bias;
777 Addr sbss_debug_svma;
778 PtrdiffT sbss_debug_bias;
779 /* .plt */
780 Bool plt_present;
781 Addr plt_avma;
782 SizeT plt_size;
783 /* .got */
784 Bool got_present;
785 Addr got_avma;
786 SizeT got_size;
787 /* .got.plt */
788 Bool gotplt_present;
789 Addr gotplt_avma;
790 SizeT gotplt_size;
791 /* .opd -- needed on ppc64-linux for finding symbols */
792 Bool opd_present;
793 Addr opd_avma;
794 SizeT opd_size;
795 /* .ehframe -- needed on amd64-linux for stack unwinding. We might
796 see more than one, hence the arrays. */
797 UInt n_ehframe; /* 0 .. N_EHFRAME_SECTS */
798 Addr ehframe_avma[N_EHFRAME_SECTS];
799 SizeT ehframe_size[N_EHFRAME_SECTS];
801 /* Sorted tables of stuff we snarfed from the file. This is the
802 eventual product of reading the debug info. All this stuff
803 lives in VG_AR_DINFO. */
805 /* An expandable array of symbols. */
806 DiSym* symtab;
807 UWord symtab_used;
808 UWord symtab_size;
809 /* An expandable array of locations. */
810 DiLoc* loctab;
811 UWord loctab_used;
812 UWord loctab_size;
813 /* An expandable array of inlined fn info.
814 maxinl_codesz is the biggest inlined piece of code
815 in inltab (i.e. the max of 'addr_hi - addr_lo'. */
816 DiInlLoc* inltab;
817 UWord inltab_used;
818 UWord inltab_size;
819 SizeT maxinl_codesz;
821 /* A set of expandable arrays to store CFI summary info records.
822 The machine specific information (i.e. the DiCfSI_m struct)
823 are stored in cfsi_m_pool, as these are highly duplicated.
824 The DiCfSI_m are allocated in cfsi_m_pool and identified using
825 a (we hope) small integer : often one byte is enough, sometimes
826 2 bytes are needed.
828 cfsi_base contains the bases of the code address ranges.
829 cfsi_size is the size of the cfsi_base array.
830 The elements cfsi_base[0] till cfsi_base[cfsi_used-1] are used.
831 Following elements are not used (yet).
833 For each base in cfsi_base, an index into cfsi_m_pool is stored
834 in cfsi_m_ix array. The size of cfsi_m_ix is equal to
835 cfsi_size*sizeof_ix. The used portion of cfsi_m_ix is
836 cfsi_m_ix[0] till cfsi_m_ix[(cfsi_used-1)*sizeof_ix].
838 cfsi_base[i] gives the base address of a code range covered by
839 some CF Info. The corresponding CF Info is identified by an index
840 in cfsi_m_pool. The DiCfSI_m index in cfsi_m_pool corresponding to
841 cfsi_base[i] is given
842 by ((UChar*) cfsi_m_ix)[i] if sizeof_ix == 1
843 by ((UShort*)cfsi_m_ix)[i] if sizeof_ix == 2
844 by ((UInt*) cfsi_m_ix)[i] if sizeof_ix == 4.
846 The end of the code range starting at cfsi_base[i] is given by
847 cfsi_base[i+1]-1 (or cfsi_maxavma for cfsi_base[cfsi_used-1]).
848 Some code ranges between cfsi_minavma and cfsi_maxavma might not
849 be covered by cfi information. Such not covered ranges are stored by
850 a base in cfsi_base and a corresponding 0 index in cfsi_m_ix.
852 A variable size representation has been chosen for the elements of
853 cfsi_m_ix as in many case, one byte is good enough. For big
854 objects, 2 bytes are needed. No object has yet been found where
855 4 bytes are needed (but the code is ready to handle this case).
856 Not covered ranges ('cfi holes') are stored explicitely in
857 cfsi_base/cfsi_m_ix as this is more memory efficient than storing
858 a length for each covered range : on x86 or amd64, we typically have
859 a hole every 8 covered ranges. On arm64, we have very few holes
860 (1 every 50 or 100 ranges).
862 The cfsi information is read and prepared in the cfsi_rd array.
863 Once all the information has been read, the cfsi_base and cfsi_m_ix
864 arrays will be filled in from cfsi_rd. cfsi_rd will then be freed.
865 This is all done by ML_(finish_CFSI_arrays).
867 Also includes summary address bounds, showing the min and max address
868 covered by any of the records, as an aid to fast searching. And, if the
869 records require any expression nodes, they are stored in
870 cfsi_exprs. */
871 Addr* cfsi_base;
872 UInt sizeof_ix; /* size in byte of the indexes stored in cfsi_m_ix. */
873 void* cfsi_m_ix; /* Each index occupies sizeof_ix bytes. */
875 DiCfSI* cfsi_rd; /* Only used during reading, NULL once info is read. */
877 UWord cfsi_used;
878 UWord cfsi_size;
880 DedupPoolAlloc *cfsi_m_pool;
881 Addr cfsi_minavma;
882 Addr cfsi_maxavma;
883 XArray* cfsi_exprs; /* XArray of CfiExpr */
885 /* Optimized code under Wine x86: MSVC++ PDB FramePointerOmitted
886 data. Non-expandable array, hence .size == .used. */
887 FPO_DATA* fpo;
888 UWord fpo_size;
889 Addr fpo_minavma;
890 Addr fpo_maxavma;
891 Addr fpo_base_avma;
893 /* Pool of strings -- the string table. Pointers
894 into this are stable (the memory is not reallocated). */
895 DedupPoolAlloc *strpool;
897 /* Variable scope information, as harvested from Dwarf3 files.
899 In short it's an
901 array of (array of PC address ranges and variables)
903 The outer array indexes over scopes, with Entry 0 containing
904 information on variables which exist for any value of the program
905 counter (PC) -- that is, the outermost scope. Entries 1, 2, 3,
906 etc contain information on increasinly deeply nested variables.
908 Each inner array is an array of (an address range, and a set
909 of variables that are in scope over that address range).
911 The address ranges may not overlap.
913 Since Entry 0 in the outer array holds information on variables
914 that exist for any value of the PC (that is, global vars), it
915 follows that Entry 0's inner array can only have one address
916 range pair, one that covers the entire address space.
918 XArray* /* of OSet of DiAddrRange */varinfo;
920 /* These are arrays of the relevant typed objects, held here
921 partially for the purposes of visiting each object exactly once
922 when we need to delete them. */
924 /* An array of TyEnts. These are needed to make sense of any types
925 in the .varinfo. Also, when deleting this DebugInfo, we must
926 first traverse this array and throw away malloc'd stuff hanging
927 off it -- by calling ML_(TyEnt__make_EMPTY) on each entry. */
928 XArray* /* of TyEnt */ admin_tyents;
930 /* An array of guarded DWARF3 expressions. */
931 XArray* admin_gexprs;
933 /* Cached last rx mapping matched and returned by ML_(find_rx_mapping).
934 This helps performance a lot during ML_(addLineInfo) etc., which can
935 easily be invoked hundreds of thousands of times. */
936 struct _DebugInfoMapping* last_rx_map;
939 /* --------------------- functions --------------------- */
941 /* ------ Adding ------ */
943 /* Add a symbol to si's symbol table. The contents of 'sym' are
944 copied. It is assumed (and checked) that 'sym' only contains one
945 name, so there is no auxiliary ::sec_names vector to duplicate.
946 IOW, the copy is a shallow copy, and there are assertions in place
947 to ensure that's OK. */
948 extern void ML_(addSym) ( struct _DebugInfo* di, DiSym* sym );
950 /* Add a line-number record to a DebugInfo. */
951 extern
952 void ML_(addLineInfo) ( struct _DebugInfo* di,
953 const HChar* filename,
954 const HChar* dirname, /* NULL is allowable */
955 Addr this, Addr next, Int lineno, Int entry);
957 /* Add a call inlined record to a DebugInfo.
958 A call to the below means that inlinedfn code has been
959 inlined, resulting in code from [addr_lo, addr_hi[.
960 Note that addr_hi is excluded, i.e. is not part of the inlined code.
961 The call that caused this inlining is in filename/dirname/lineno
962 In case of nested inlining, a small level indicates the call
963 is closer to main that a call with a higher level. */
964 extern
965 void ML_(addInlInfo) ( struct _DebugInfo* di,
966 Addr addr_lo, Addr addr_hi,
967 const HChar* inlinedfn,
968 const HChar* filename,
969 const HChar* dirname, /* NULL is allowable */
970 Int lineno, UShort level);
972 /* Add a CFI summary record. The supplied DiCfSI_m is copied. */
973 extern void ML_(addDiCfSI) ( struct _DebugInfo* di,
974 Addr base, UInt len, DiCfSI_m* cfsi_m );
976 /* Given a position in the di->cfsi_base/cfsi_m_ix arrays, return
977 the corresponding cfsi_m*. Return NULL if the position corresponds
978 to a cfsi hole. */
979 DiCfSI_m* ML_(get_cfsi_m) (struct _DebugInfo* di, UInt pos);
981 /* Add a string to the string table of a DebugInfo. If len==-1,
982 ML_(addStr) will itself measure the length of the string. */
983 extern HChar* ML_(addStr) ( struct _DebugInfo* di, const HChar* str, Int len );
985 /* Add a string to the string table of a DebugInfo, by copying the
986 string from the given DiCursor. Measures the length of the string
987 itself. */
988 extern HChar* ML_(addStrFromCursor)( struct _DebugInfo* di, DiCursor c );
990 extern void ML_(addVar)( struct _DebugInfo* di,
991 Int level,
992 Addr aMin,
993 Addr aMax,
994 HChar* name,
995 UWord typeR, /* a cuOff */
996 GExpr* gexpr,
997 GExpr* fbGX, /* SHARED. */
998 HChar* fileName, /* where decl'd - may be NULL */
999 Int lineNo, /* where decl'd - may be zero */
1000 Bool show );
1002 /* Canonicalise the tables held by 'di', in preparation for use. Call
1003 this after finishing adding entries to these tables. */
1004 extern void ML_(canonicaliseTables) ( struct _DebugInfo* di );
1006 /* Canonicalise the call-frame-info table held by 'di', in preparation
1007 for use. This is called by ML_(canonicaliseTables) but can also be
1008 called on it's own to sort just this table. */
1009 extern void ML_(canonicaliseCFI) ( struct _DebugInfo* di );
1011 /* ML_(finish_CFSI_arrays) fills in the cfsi_base and cfsi_m_ix arrays
1012 from cfsi_rd array. cfsi_rd is then freed. */
1013 extern void ML_(finish_CFSI_arrays) ( struct _DebugInfo* di );
1015 /* ------ Searching ------ */
1017 /* Find a symbol-table index containing the specified pointer, or -1
1018 if not found. Binary search. */
1019 extern Word ML_(search_one_symtab) ( struct _DebugInfo* di, Addr ptr,
1020 Bool match_anywhere_in_sym,
1021 Bool findText );
1023 /* Find a location-table index containing the specified pointer, or -1
1024 if not found. Binary search. */
1025 extern Word ML_(search_one_loctab) ( struct _DebugInfo* di, Addr ptr );
1027 /* Find a CFI-table index containing the specified pointer, or -1 if
1028 not found. Binary search. */
1029 extern Word ML_(search_one_cfitab) ( struct _DebugInfo* di, Addr ptr );
1031 /* Find a FPO-table index containing the specified pointer, or -1
1032 if not found. Binary search. */
1033 extern Word ML_(search_one_fpotab) ( struct _DebugInfo* di, Addr ptr );
1035 /* Helper function for the most often needed searching for an rx
1036 mapping containing the specified address range. The range must
1037 fall entirely within the mapping to be considered to be within it.
1038 Asserts if lo > hi; caller must ensure this doesn't happen. */
1039 extern struct _DebugInfoMapping* ML_(find_rx_mapping) ( struct _DebugInfo* di,
1040 Addr lo, Addr hi );
1042 /* ------ Misc ------ */
1044 /* Show a non-fatal debug info reading error. Use vg_panic if
1045 terminal. 'serious' errors are always shown, not 'serious' ones
1046 are shown only at verbosity level 2 and above. */
1047 extern
1048 void ML_(symerr) ( struct _DebugInfo* di, Bool serious, const HChar* msg );
1050 /* Print a symbol. */
1051 extern void ML_(ppSym) ( Int idx, DiSym* sym );
1053 /* Print a call-frame-info summary. */
1054 extern void ML_(ppDiCfSI) ( XArray* /* of CfiExpr */ exprs,
1055 Addr base, UInt len,
1056 DiCfSI_m* si_m );
1059 #define TRACE_SYMTAB_ENABLED (di->trace_symtab)
1060 #define TRACE_SYMTAB(format, args...) \
1061 if (TRACE_SYMTAB_ENABLED) { VG_(printf)(format, ## args); }
1064 #endif /* ndef __PRIV_STORAGE_H */
1066 /*--------------------------------------------------------------------*/
1067 /*--- end ---*/
1068 /*--------------------------------------------------------------------*/