drd/tests/Makefile.am: Fix indentation
[valgrind.git] / coregrind / m_debuginfo / priv_storage.h
blob39456eccbd3cb3e7e10ca02379b7ad613e55bd35
2 /*--------------------------------------------------------------------*/
3 /*--- Format-neutral storage of and querying of info acquired from ---*/
4 /*--- ELF/XCOFF stabs/dwarf1/dwarf2 debug info. ---*/
5 /*--- priv_storage.h ---*/
6 /*--------------------------------------------------------------------*/
8 /*
9 This file is part of Valgrind, a dynamic binary instrumentation
10 framework.
12 Copyright (C) 2000-2017 Julian Seward
13 jseward@acm.org
15 This program is free software; you can redistribute it and/or
16 modify it under the terms of the GNU General Public License as
17 published by the Free Software Foundation; either version 2 of the
18 License, or (at your option) any later version.
20 This program is distributed in the hope that it will be useful, but
21 WITHOUT ANY WARRANTY; without even the implied warranty of
22 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
23 General Public License for more details.
25 You should have received a copy of the GNU General Public License
26 along with this program; if not, see <http://www.gnu.org/licenses/>.
28 The GNU General Public License is contained in the file COPYING.
31 Stabs reader greatly improved by Nick Nethercote, Apr 02.
32 This module was also extensively hacked on by Jeremy Fitzhardinge
33 and Tom Hughes.
35 /* See comment at top of debuginfo.c for explanation of
36 the _svma / _avma / _image / _bias naming scheme.
38 /* Note this is not freestanding; needs pub_core_xarray.h and
39 priv_tytypes.h to be included before it. */
41 #ifndef __PRIV_STORAGE_H
42 #define __PRIV_STORAGE_H
44 #include "pub_core_basics.h" // Addr
45 #include "pub_core_xarray.h" // XArray
46 #include "pub_core_deduppoolalloc.h" // DedupPoolAlloc
47 #include "priv_d3basics.h" // GExpr et al.
48 #include "priv_image.h" // DiCursor
50 /* --------------------- SYMBOLS --------------------- */
52 /* A structure to hold an ELF/MachO symbol (very crudely). Usually
53 the symbol only has one name, which is stored in ::pri_name, and
54 ::sec_names is NULL. If there are other names, these are stored in
55 ::sec_names, which is a NULL terminated vector holding the names.
56 The vector is allocated in VG_AR_DINFO, the names themselves live
57 in DebugInfo::strpool.
59 From the point of view of ELF, the primary vs secondary distinction
60 is artificial: they are all just names associated with the address,
61 none of which has higher precedence than any other. However, from
62 the point of view of mapping an address to a name to display to the
63 user, we need to choose one "preferred" name, and so that might as
64 well be installed as the pri_name, whilst all others can live in
65 sec_names[]. This has the convenient side effect that, in the
66 common case where there is only one name for the address,
67 sec_names[] does not need to be allocated.
69 typedef
70 struct {
71 SymAVMAs avmas; /* Symbol Actual VMAs: lowest address of entity,
72 + platform specific fields, to access with
73 the macros defined in pub_core_debuginfo.h */
74 const HChar* pri_name; /* primary name, never NULL */
75 const HChar** sec_names; /* NULL, or a NULL term'd array of other names */
76 // XXX: DiSym could be shrunk (on 32-bit platforms to exactly 16
77 // bytes, on 64-bit platforms the first 3 pointers already add
78 // up to 24 bytes, so size plus bits will extend to 32 bytes
79 // anyway) by using 29 bits for the size and 1 bit each for
80 // isText, isIFunc and isGlobal. If you do this, make sure that
81 // all assignments to the latter two use 0 or 1 (or True or
82 // False), and that a positive number larger than 1 is never
83 // used to represent True.
84 UInt size; /* size in bytes */
85 Bool isText;
86 Bool isIFunc; /* symbol is an indirect function? */
87 Bool isGlobal; /* Is this symbol globally visible? */
89 DiSym;
91 /* --------------------- SRCLOCS --------------------- */
93 /* Line count at which overflow happens, due to line numbers being
94 stored as shorts in `struct nlist' in a.out.h. */
95 #define LINENO_OVERFLOW (1 << (sizeof(short) * 8))
97 #define LINENO_BITS 20
98 #define LOC_SIZE_BITS (32 - LINENO_BITS)
99 #define MAX_LINENO ((1 << LINENO_BITS) - 1)
101 /* Unlikely to have any lines with instruction ranges > 4096 bytes */
102 #define MAX_LOC_SIZE ((1 << LOC_SIZE_BITS) - 1)
104 /* Number used to detect line number overflows; if one line is
105 60000-odd smaller than the previous, it was probably an overflow.
107 #define OVERFLOW_DIFFERENCE (LINENO_OVERFLOW - 5000)
109 /* Filename and Dirname pair. FnDn are stored in di->fndnpool
110 and are allocated using VG_(allocFixedEltDedupPA).
111 The filename/dirname strings are themselves stored in di->strpool. */
112 typedef
113 struct {
114 const HChar* filename; /* source filename */
115 const HChar* dirname; /* source directory name */
116 } FnDn;
118 /* A structure to hold addr-to-source info for a single line. There
119 can be a lot of these, hence the dense packing. */
120 typedef
121 struct {
122 /* Word 1 */
123 Addr addr; /* lowest address for this line */
124 /* Word 2 */
125 UShort size:LOC_SIZE_BITS; /* # bytes; we catch overflows of this */
126 UInt lineno:LINENO_BITS; /* source line number, or zero */
128 DiLoc;
130 #define LEVEL_BITS (32 - LINENO_BITS)
131 #define MAX_LEVEL ((1 << LEVEL_BITS) - 1)
133 /* A structure to hold addr-to-inlined fn info. There
134 can be a lot of these, hence the dense packing.
135 Only caller source filename and lineno are stored.
136 Handling dirname should be done using fndn_ix technique
137 similar to ML_(addLineInfo). */
138 typedef
139 struct {
140 /* Word 1 */
141 Addr addr_lo; /* lowest address for inlined fn */
142 /* Word 2 */
143 Addr addr_hi; /* highest address following the inlined fn */
144 /* Word 3 */
145 const HChar* inlinedfn; /* inlined function name */
146 /* Word 4 and 5 */
147 UInt fndn_ix; /* index in di->fndnpool of caller source
148 dirname/filename */
149 UInt lineno:LINENO_BITS; /* caller line number */
150 UShort level:LEVEL_BITS; /* level of inlining */
152 DiInlLoc;
154 /* --------------------- CF INFO --------------------- */
156 /* DiCfSI: a structure to summarise DWARF2/3 CFA info for the code
157 address range [base .. base+len-1].
159 On x86 and amd64 ("IA"), if you know ({e,r}sp, {e,r}bp, {e,r}ip) at
160 some point and {e,r}ip is in the range [base .. base+len-1], it
161 tells you how to calculate ({e,r}sp, {e,r}bp) for the caller of the
162 current frame and also ra, the return address of the current frame.
164 First off, calculate CFA, the Canonical Frame Address, thusly:
166 cfa = case cfa_how of
167 CFIC_IA_SPREL -> {e,r}sp + cfa_off
168 CFIC_IA_BPREL -> {e,r}bp + cfa_off
169 CFIC_EXPR -> expr whose index is in cfa_off
171 Once that is done, the previous frame's {e,r}sp/{e,r}bp values and
172 this frame's {e,r}ra value can be calculated like this:
174 old_{e,r}sp/{e,r}bp/ra
175 = case {e,r}sp/{e,r}bp/ra_how of
176 CFIR_UNKNOWN -> we don't know, sorry
177 CFIR_SAME -> same as it was before (sp/fp only)
178 CFIR_CFAREL -> cfa + sp/bp/ra_off
179 CFIR_MEMCFAREL -> *( cfa + sp/bp/ra_off )
180 CFIR_EXPR -> expr whose index is in sp/bp/ra_off
182 On ARM it's pretty much the same, except we have more registers to
183 keep track of:
185 cfa = case cfa_how of
186 CFIC_ARM_R13REL -> r13 + cfa_off
187 CFIC_ARM_R12REL -> r12 + cfa_off
188 CFIC_ARM_R11REL -> r11 + cfa_off
189 CFIC_ARM_R7REL -> r7 + cfa_off
190 CFIR_EXPR -> expr whose index is in cfa_off
192 old_r14/r13/r12/r11/r7/ra
193 = case r14/r13/r12/r11/r7/ra_how of
194 CFIR_UNKNOWN -> we don't know, sorry
195 CFIR_SAME -> same as it was before (r14/r13/r12/r11/r7 only)
196 CFIR_CFAREL -> cfa + r14/r13/r12/r11/r7/ra_off
197 CFIR_MEMCFAREL -> *( cfa + r14/r13/r12/r11/r7/ra_off )
198 CFIR_EXPR -> expr whose index is in r14/r13/r12/r11/r7/ra_off
200 On ARM64:
202 cfa = case cfa_how of
203 CFIC_ARM64_SPREL -> sp + cfa_off
204 CFIC_ARM64_X29REL -> x29 + cfa_off
205 CFIC_EXPR -> expr whose index is in cfa_off
207 old_sp/x30/x29/ra
208 = case sp/x30/x29/ra_how of
209 CFIR_UNKNOWN -> we don't know, sorry
210 CFIR_SAME -> same as it was before
211 CFIR_CFAREL -> cfa + sp/x30/x29/ra_how
212 CFIR_MEMCFAREL -> *( cfa + sp/x30/x29/ra_how )
213 CFIR_EXPR -> expr whose index is in sp/x30/x29/ra_off
215 On s390x we have a similar logic as x86 or amd64. We need the stack pointer
216 (r15), the frame pointer r11 (like BP) and together with the instruction
217 address in the PSW we can calculate the previous values:
218 cfa = case cfa_how of
219 CFIC_IA_SPREL -> r15 + cfa_off
220 CFIC_IA_BPREL -> r11 + cfa_off
221 CFIC_EXPR -> expr whose index is in cfa_off
223 old_sp/fp/ra
224 = case sp/fp/ra_how of
225 CFIR_UNKNOWN -> we don't know, sorry
226 CFIR_SAME -> same as it was before (sp/fp only)
227 CFIR_CFAREL -> cfa + sp/fp/ra_off
228 CFIR_MEMCFAREL -> *( cfa + sp/fp/ra_off )
229 CFIR_EXPR -> expr whose index is in sp/fp/ra_off
230 CFIR_S390X_F0 -> old value of %f0
231 CFIR_S390X_F1 -> old value of %f1
232 CFIR_S390X_F2 -> old value of %f2
233 CFIR_S390X_F3 -> old value of %f3
234 CFIR_S390X_F4 -> old value of %f4
235 CFIR_S390X_F5 -> old value of %f5
236 CFIR_S390X_F6 -> old value of %f6
237 CFIR_S390X_F7 -> old value of %f7
240 #define CFIC_IA_SPREL ((UChar)1)
241 #define CFIC_IA_BPREL ((UChar)2)
242 #define CFIC_ARM_R13REL ((UChar)3)
243 #define CFIC_ARM_R12REL ((UChar)4)
244 #define CFIC_ARM_R11REL ((UChar)5)
245 #define CFIC_ARM_R7REL ((UChar)6)
246 #define CFIC_ARM64_SPREL ((UChar)7)
247 #define CFIC_ARM64_X29REL ((UChar)8)
248 #define CFIC_EXPR ((UChar)9) /* all targets */
250 #define CFIR_UNKNOWN ((UChar)64)
251 #define CFIR_SAME ((UChar)65)
252 #define CFIR_CFAREL ((UChar)66)
253 #define CFIR_MEMCFAREL ((UChar)67)
254 #define CFIR_EXPR ((UChar)68)
255 #define CFIR_S390X_F0 ((UChar)69)
256 #define CFIR_S390X_F1 ((UChar)70)
257 #define CFIR_S390X_F2 ((UChar)71)
258 #define CFIR_S390X_F3 ((UChar)72)
259 #define CFIR_S390X_F4 ((UChar)73)
260 #define CFIR_S390X_F5 ((UChar)74)
261 #define CFIR_S390X_F6 ((UChar)75)
262 #define CFIR_S390X_F7 ((UChar)76)
264 /* Definition of the DiCfSI_m DiCfSI machine dependent part.
265 These are highly duplicated, and are stored in a pool. */
266 #if defined(VGA_x86) || defined(VGA_amd64)
267 typedef
268 struct {
269 UChar cfa_how; /* a CFIC_IA value */
270 UChar ra_how; /* a CFIR_ value */
271 UChar sp_how; /* a CFIR_ value */
272 UChar bp_how; /* a CFIR_ value */
273 Int cfa_off;
274 Int ra_off;
275 Int sp_off;
276 Int bp_off;
278 DiCfSI_m;
279 #elif defined(VGA_arm)
280 typedef
281 struct {
282 UChar cfa_how; /* a CFIC_ value */
283 UChar ra_how; /* a CFIR_ value */
284 UChar r14_how; /* a CFIR_ value */
285 UChar r13_how; /* a CFIR_ value */
286 UChar r12_how; /* a CFIR_ value */
287 UChar r11_how; /* a CFIR_ value */
288 UChar r7_how; /* a CFIR_ value */
289 Int cfa_off;
290 Int ra_off;
291 Int r14_off;
292 Int r13_off;
293 Int r12_off;
294 Int r11_off;
295 Int r7_off;
296 // If you add additional fields, don't forget to update the
297 // initialisation of this in readexidx.c accordingly.
299 DiCfSI_m;
300 #elif defined(VGA_arm64)
301 typedef
302 struct {
303 UChar cfa_how; /* a CFIC_ value */
304 UChar ra_how; /* a CFIR_ value */
305 UChar sp_how; /* a CFIR_ value */ /*dw31=SP*/
306 UChar x30_how; /* a CFIR_ value */ /*dw30=LR*/
307 UChar x29_how; /* a CFIR_ value */ /*dw29=FP*/
308 Int cfa_off;
309 Int ra_off;
310 Int sp_off;
311 Int x30_off;
312 Int x29_off;
314 DiCfSI_m;
315 #elif defined(VGA_ppc32) || defined(VGA_ppc64be) || defined(VGA_ppc64le)
316 /* Just have a struct with the common fields in, so that code that
317 processes the common fields doesn't have to be ifdef'd against
318 VGP_/VGA_ symbols. These are not used in any way on ppc32/64-linux
319 at the moment. */
320 typedef
321 struct {
322 UChar cfa_how; /* a CFIC_ value */
323 UChar ra_how; /* a CFIR_ value */
324 Int cfa_off;
325 Int ra_off;
327 DiCfSI_m;
328 #elif defined(VGA_s390x)
329 typedef
330 struct {
331 UChar cfa_how; /* a CFIC_ value */
332 UChar sp_how; /* a CFIR_ value */
333 UChar ra_how; /* a CFIR_ value */
334 UChar fp_how; /* a CFIR_ value */
335 UChar f0_how; /* a CFIR_ value */
336 UChar f1_how; /* a CFIR_ value */
337 UChar f2_how; /* a CFIR_ value */
338 UChar f3_how; /* a CFIR_ value */
339 UChar f4_how; /* a CFIR_ value */
340 UChar f5_how; /* a CFIR_ value */
341 UChar f6_how; /* a CFIR_ value */
342 UChar f7_how; /* a CFIR_ value */
343 Int cfa_off;
344 Int sp_off;
345 Int ra_off;
346 Int fp_off;
347 Int f0_off;
348 Int f1_off;
349 Int f2_off;
350 Int f3_off;
351 Int f4_off;
352 Int f5_off;
353 Int f6_off;
354 Int f7_off;
356 DiCfSI_m;
357 #elif defined(VGA_mips32) || defined(VGA_mips64) || defined(VGA_nanomips)
358 typedef
359 struct {
360 UChar cfa_how; /* a CFIC_ value */
361 UChar ra_how; /* a CFIR_ value */
362 UChar sp_how; /* a CFIR_ value */
363 UChar fp_how; /* a CFIR_ value */
364 Int cfa_off;
365 Int ra_off;
366 Int sp_off;
367 Int fp_off;
369 DiCfSI_m;
370 #else
371 # error "Unknown arch"
372 #endif
374 typedef
375 struct {
376 Addr base;
377 UInt len;
378 UInt cfsi_m_ix;
380 DiCfSI;
382 typedef
383 enum {
384 Cunop_Abs=0x231,
385 Cunop_Neg,
386 Cunop_Not
388 CfiUnop;
390 typedef
391 enum {
392 Cbinop_Add=0x321,
393 Cbinop_Sub,
394 Cbinop_And,
395 Cbinop_Mul,
396 Cbinop_Shl,
397 Cbinop_Shr,
398 Cbinop_Eq,
399 Cbinop_Ge,
400 Cbinop_Gt,
401 Cbinop_Le,
402 Cbinop_Lt,
403 Cbinop_Ne
405 CfiBinop;
407 typedef
408 enum {
409 Creg_INVALID=0x213,
410 Creg_IA_SP,
411 Creg_IA_BP,
412 Creg_IA_IP,
413 Creg_ARM_R13,
414 Creg_ARM_R12,
415 Creg_ARM_R15,
416 Creg_ARM_R14,
417 Creg_ARM_R7,
418 Creg_ARM64_X30,
419 Creg_S390_IA,
420 Creg_S390_SP,
421 Creg_S390_FP,
422 Creg_S390_LR,
423 Creg_MIPS_RA
425 CfiReg;
427 typedef
428 enum {
429 Cex_Undef=0x123,
430 Cex_Deref,
431 Cex_Const,
432 Cex_Unop,
433 Cex_Binop,
434 Cex_CfiReg,
435 Cex_DwReg
437 CfiExprTag;
439 typedef
440 struct {
441 CfiExprTag tag;
442 union {
443 struct {
444 } Undef;
445 struct {
446 Int ixAddr;
447 } Deref;
448 struct {
449 UWord con;
450 } Const;
451 struct {
452 CfiUnop op;
453 Int ix;
454 } Unop;
455 struct {
456 CfiBinop op;
457 Int ixL;
458 Int ixR;
459 } Binop;
460 struct {
461 CfiReg reg;
462 } CfiReg;
463 struct {
464 Int reg;
465 } DwReg;
467 Cex;
469 CfiExpr;
471 extern Int ML_(CfiExpr_Undef) ( XArray* dst );
472 extern Int ML_(CfiExpr_Deref) ( XArray* dst, Int ixAddr );
473 extern Int ML_(CfiExpr_Const) ( XArray* dst, UWord con );
474 extern Int ML_(CfiExpr_Unop) ( XArray* dst, CfiUnop op, Int ix );
475 extern Int ML_(CfiExpr_Binop) ( XArray* dst, CfiBinop op, Int ixL, Int ixR );
476 extern Int ML_(CfiExpr_CfiReg)( XArray* dst, CfiReg reg );
477 extern Int ML_(CfiExpr_DwReg) ( XArray* dst, Int reg );
479 extern void ML_(ppCfiExpr)( const XArray* src, Int ix );
481 /* ---------------- FPO INFO (Windows PE) -------------- */
483 /* for apps using Wine: MSVC++ PDB FramePointerOmitted: somewhat like
484 a primitive CFI */
485 typedef
486 struct _FPO_DATA { /* 16 bytes */
487 UInt ulOffStart; /* offset of 1st byte of function code */
488 UInt cbProcSize; /* # bytes in function */
489 UInt cdwLocals; /* # bytes/4 in locals */
490 UShort cdwParams; /* # bytes/4 in params */
491 UChar cbProlog; /* # bytes in prolog */
492 UChar cbRegs :3; /* # regs saved */
493 UChar fHasSEH:1; /* Structured Exception Handling */
494 UChar fUseBP :1; /* EBP has been used */
495 UChar reserved:1;
496 UChar cbFrame:2; /* frame type */
498 FPO_DATA;
500 #define PDB_FRAME_FPO 0
501 #define PDB_FRAME_TRAP 1
502 #define PDB_FRAME_TSS 2
504 /* --------------------- VARIABLES --------------------- */
506 typedef
507 struct {
508 Addr aMin;
509 Addr aMax;
510 XArray* /* of DiVariable */ vars;
512 DiAddrRange;
514 typedef
515 struct {
516 const HChar* name; /* in DebugInfo.strpool */
517 UWord typeR; /* a cuOff */
518 const GExpr* gexpr; /* on DebugInfo.gexprs list */
519 const GExpr* fbGX; /* SHARED. */
520 UInt fndn_ix; /* where declared; may be zero. index
521 in DebugInfo.fndnpool */
522 Int lineNo; /* where declared; may be zero. */
524 DiVariable;
526 Word
527 ML_(cmp_for_DiAddrRange_range) ( const void* keyV, const void* elemV );
529 /* --------------------- DEBUGINFO --------------------- */
531 /* This is the top-level data type. It's a structure which contains
532 information pertaining to one mapped ELF object. This type is
533 exported only abstractly - in pub_tool_debuginfo.h. */
535 /* First though, here's an auxiliary data structure. It is only ever
536 used as part of a struct _DebugInfo. We use it to record
537 observations about mappings and permission changes to the
538 associated file, so as to decide when to read debug info. It's
539 essentially an ultra-trivial finite state machine which, when it
540 reaches an accept state, signals that we should now read debug info
541 from the object into the associated struct _DebugInfo. The accept
542 state is arrived at when have_rx_map and have_rw_map both become
543 true. The initial state is one in which we have no observations,
544 so have_rx_map and have_rw_map are both false.
546 This all started as a rather ad-hoc solution, but was further
547 expanded to handle weird object layouts, e.g. more than one rw
548 or rx mapping for one binary.
550 The normal sequence of events is one of
552 start --> r-x mapping --> rw- mapping --> accept
553 start --> rw- mapping --> r-x mapping --> accept
555 that is, take the first r-x and rw- mapping we see, and we're done.
557 On MacOSX >= 10.7, 32-bit, there appears to be a new variant:
559 start --> r-- mapping --> rw- mapping
560 --> upgrade r-- mapping to r-x mapping --> accept
562 where the upgrade is done by a call to mach_vm_protect (OSX 10.7)
563 or kernelrpc_mach_vm_protect_trap (OSX 10.9 and possibly 10.8).
564 Hence we need to also track this possibility.
566 From perusal of dyld sources, it appears that this scheme could
567 also be used 64 bit libraries, although that doesn't seem to happen
568 in practice. dyld uses this scheme when the text section requires
569 relocation, which only appears to be the case for 32 bit objects.
572 typedef struct
574 Addr avma; /* these fields record the file offset, length */
575 SizeT size; /* and map address of each mapping */
576 OffT foff;
577 Bool rx, rw, ro; /* memory access flags for this mapping */
578 } DebugInfoMapping;
580 struct _DebugInfoFSM
582 HChar* filename; /* in mallocville (VG_AR_DINFO) */
583 HChar* dbgname; /* in mallocville (VG_AR_DINFO) */
584 XArray* maps; /* XArray of DebugInfoMapping structs */
585 Bool have_rx_map; /* did we see a r?x mapping yet for the file? */
586 Bool have_rw_map; /* did we see a rw? mapping yet for the file? */
587 Bool have_ro_map; /* did we see a r-- mapping yet for the file? */
591 /* To do with the string table in struct _DebugInfo (::strpool) */
592 #define SEGINFO_STRPOOLSIZE (64*1024)
595 /* We may encounter more than one .eh_frame section in an object --
596 unusual but apparently allowed by ELF. See
597 http://sourceware.org/bugzilla/show_bug.cgi?id=12675
599 #define N_EHFRAME_SECTS 2
602 /* So, the main structure for holding debug info for one object. */
604 struct _DebugInfo {
606 /* Admin stuff */
608 struct _DebugInfo* next; /* list of DebugInfos */
609 Bool mark; /* marked for deletion? */
611 /* An abstract handle, which can be used by entities outside of
612 m_debuginfo to (in an abstract datatype sense) refer to this
613 struct _DebugInfo. A .handle of zero is invalid; valid handles
614 are 1 and above. The same handle is never issued twice (in any
615 given run of Valgrind), so a handle becomes invalid when the
616 associated struct _DebugInfo is discarded, and remains invalid
617 forever thereafter. The .handle field is set as soon as this
618 structure is allocated. */
619 ULong handle;
621 /* The range of epochs for which this DebugInfo is valid. These also
622 divide the DebugInfo's lifetime into three parts:
624 (1) Allocated: but with only .fsm holding useful info -- in
625 particular, not yet holding any debug info.
626 .first_epoch == DebugInfoEpoch_INVALID
627 .last_epoch == DebugInfoEpoch_INVALID
629 (2) Active: containing debug info, and current.
630 .first_epoch != DebugInfoEpoch_INVALID
631 .last_epoch == DebugInfoEpoch_INVALID
633 (3) Archived: containing debug info, but no longer current.
634 .first_epoch != DebugInfoEpoch_INVALID
635 .last_epoch != DebugInfoEpoch_INVALID
637 State (2) corresponds to an object which is currently mapped. When
638 the object is unmapped, what happens depends on the setting of
639 --keep-debuginfo:
641 * when =no, the DebugInfo is removed from debugInfo_list and
642 deleted.
644 * when =yes, the DebugInfo is retained in debugInfo_list, but its
645 .last_epoch field is filled in, and current_epoch is advanced. This
646 effectively moves the DebugInfo into state (3).
648 DiEpoch first_epoch;
649 DiEpoch last_epoch;
651 /* Used for debugging only - indicate what stuff to dump whilst
652 reading stuff into the seginfo. Are computed as early in the
653 lifetime of the DebugInfo as possible -- at the point when it is
654 created. Use these when deciding what to spew out; do not use
655 the global VG_(clo_blah) flags. */
657 Bool trace_symtab; /* symbols, our style */
658 Bool trace_cfi; /* dwarf frame unwind, our style */
659 Bool ddump_syms; /* mimic /usr/bin/readelf --syms */
660 Bool ddump_line; /* mimic /usr/bin/readelf --debug-dump=line */
661 Bool ddump_frames; /* mimic /usr/bin/readelf --debug-dump=frames */
663 /* The "decide when it is time to read debuginfo" state machine.
664 This structure must get filled in before we can start reading
665 anything from the ELF/MachO file. This structure is filled in
666 by VG_(di_notify_mmap) and its immediate helpers. */
667 struct _DebugInfoFSM fsm;
669 /* Once the ::fsm has reached an accept state -- typically, when
670 both a rw? and r?x mapping for .filename have been observed --
671 we can go on to read the symbol tables and debug info.
672 .have_dinfo changes from False to True when the debug info has
673 been completely read in and postprocessed (canonicalised) and is
674 now suitable for querying. */
675 /* If have_dinfo is False, then all fields below this point are
676 invalid and should not be consulted. */
677 Bool have_dinfo; /* initially False */
679 /* All the rest of the fields in this structure are filled in once
680 we have committed to reading the symbols and debug info (that
681 is, at the point where .have_dinfo is set to True). */
683 /* The file's soname. */
684 HChar* soname;
686 /* Description of some important mapped segments. The presence or
687 absence of the mapping is denoted by the _present field, since
688 in some obscure circumstances (to do with data/sdata/bss) it is
689 possible for the mapping to be present but have zero size.
690 Certainly text_ is mandatory on all platforms; not sure about
691 the rest though.
693 --------------------------------------------------------
695 Comment_on_IMPORTANT_CFSI_REPRESENTATIONAL_INVARIANTS: we require that
697 either (size of all rx maps == 0 && cfsi == NULL) (the degenerate case)
699 or the normal case, which is the AND of the following:
700 (0) size of at least one rx mapping > 0
701 (1) no two non-archived DebugInfos with some rx mapping of size > 0
702 have overlapping rx mappings
703 (2) Each address in [cfsi_minavma,cfsi_maxavma] is in an rx mapping
704 or else no cfsi can cover this address.
705 The typical case is a single rx mapping covering the full range.
706 In some cases, the union of several rx mappings covers the range,
707 with possibly some holes between the rx mappings, and no cfsi fall
708 within such an hole.
709 (3) all DiCfSI in the cfsi array all have ranges that fall within
710 [avma,+size) of that rx mapping.
711 (4) all DiCfSI in the cfsi array are non-overlapping
713 The cumulative effect of these restrictions is to ensure that
714 all the DiCfSI records in the entire system are non overlapping.
715 Hence any address falls into either exactly one DiCfSI record,
716 or none. Hence it is safe to cache the results of searches for
717 DiCfSI records. This is the whole point of these restrictions.
718 The caching of DiCfSI searches is done in VG_(use_CF_info). The
719 cache is flushed after any change to debugInfo_list. DiCfSI
720 searches are cached because they are central to stack unwinding
721 on amd64-linux.
723 Where are these invariants imposed and checked?
725 They are checked after a successful read of debuginfo into
726 a DebugInfo*, in check_CFSI_related_invariants.
728 (1) is not really imposed anywhere. We simply assume that the
729 kernel will not map the text segments from two different objects
730 into the same space. Sounds reasonable.
732 (2) follows from (4) and (3). It is ensured by canonicaliseCFI.
733 (3) is ensured by ML_(addDiCfSI).
734 (4) is ensured by canonicaliseCFI.
736 --------------------------------------------------------
738 Comment_on_DEBUG_SVMA_and_DEBUG_BIAS_fields:
740 The _debug_{svma,bias} fields were added as part of a fix to
741 #185816. The problem encompassed in that bug report was that it
742 wasn't correct to use apply the bias values deduced for a
743 primary object to its associated debuginfo object, because the
744 debuginfo object (or the primary) could have been prelinked to a
745 different SVMA. Hence debuginfo and primary objects need to
746 have their own biases.
748 ------ JRS: (referring to r9329): ------
749 Let me see if I understand the workings correctly. Initially
750 the _debug_ values are set to the same values as the "normal"
751 ones, as there's a bunch of bits of code like this (in
752 readelf.c)
754 di->text_svma = svma;
756 di->text_bias = rx_bias;
757 di->text_debug_svma = svma;
758 di->text_debug_bias = rx_bias;
760 If a debuginfo object subsequently shows up then the
761 _debug_svma/bias are set for the debuginfo object. Result is
762 that if there's no debuginfo object then the values are the same
763 as the primary-object values, and if there is a debuginfo object
764 then they will (or at least may) be different.
766 Then when we need to actually bias something, we'll have to
767 decide whether to use the primary bias or the debuginfo bias.
768 And the strategy is to use the primary bias for ELF symbols but
769 the debuginfo bias for anything pulled out of Dwarf.
771 ------ THH: ------
772 Correct - the debug_svma and bias values apply to any address
773 read from the debug data regardless of where that debug data is
774 stored and the other values are used for addresses from other
775 places (primarily the symbol table).
777 ------ JRS: ------
778 Ok; so this was my only area of concern. Are there any
779 corner-case scenarios where this wouldn't be right? It sounds
780 like we're assuming the ELF symbols come from the primary object
781 and, if there is a debug object, then all the Dwarf comes from
782 there. But what if (eg) both symbols and Dwarf come from the
783 debug object? Is that even possible or allowable?
785 ------ THH: ------
786 You may have a point...
788 The current logic is to try and take any one set of data from
789 either the base object or the debug object. There are four sets
790 of data we consider:
792 - Symbol Table
793 - Stabs
794 - DWARF1
795 - DWARF2
797 If we see the primary section for a given set in the base object
798 then we ignore all sections relating to that set in the debug
799 object.
801 Now in principle if we saw a secondary section (like debug_line
802 say) in the base object, but not the main section (debug_info in
803 this case) then we would take debug_info from the debug object
804 but would use the debug_line from the base object unless we saw
805 a replacement copy in the debug object. That's probably unlikely
806 however.
808 A bigger issue might be, as you say, the symbol table as we will
809 pick that up from the debug object if it isn't in the base. The
810 dynamic symbol table will always have to be in the base object
811 though so we will have to be careful when processing symbols to
812 know which table we are reading in that case.
814 What we probably need to do is tell read_elf_symtab which object
815 the symbols it is being asked to read came from.
817 (A followup patch to deal with this was committed in r9469).
819 /* .text */
820 Bool text_present;
821 Addr text_avma;
822 Addr text_svma;
823 SizeT text_size;
824 PtrdiffT text_bias;
825 Addr text_debug_svma;
826 PtrdiffT text_debug_bias;
827 /* .data */
828 Bool data_present;
829 Addr data_svma;
830 Addr data_avma;
831 SizeT data_size;
832 PtrdiffT data_bias;
833 Addr data_debug_svma;
834 PtrdiffT data_debug_bias;
835 /* .sdata */
836 Bool sdata_present;
837 Addr sdata_svma;
838 Addr sdata_avma;
839 SizeT sdata_size;
840 PtrdiffT sdata_bias;
841 Addr sdata_debug_svma;
842 PtrdiffT sdata_debug_bias;
843 /* .rodata */
844 Bool rodata_present;
845 Addr rodata_svma;
846 Addr rodata_avma;
847 SizeT rodata_size;
848 PtrdiffT rodata_bias;
849 Addr rodata_debug_svma;
850 PtrdiffT rodata_debug_bias;
851 /* .bss */
852 Bool bss_present;
853 Addr bss_svma;
854 Addr bss_avma;
855 SizeT bss_size;
856 PtrdiffT bss_bias;
857 Addr bss_debug_svma;
858 PtrdiffT bss_debug_bias;
859 /* .sbss */
860 Bool sbss_present;
861 Addr sbss_svma;
862 Addr sbss_avma;
863 SizeT sbss_size;
864 PtrdiffT sbss_bias;
865 Addr sbss_debug_svma;
866 PtrdiffT sbss_debug_bias;
867 /* .ARM.exidx -- sometimes present on arm32, containing unwind info. */
868 Bool exidx_present;
869 Addr exidx_avma;
870 Addr exidx_svma;
871 SizeT exidx_size;
872 PtrdiffT exidx_bias;
873 /* .ARM.extab -- sometimes present on arm32, containing unwind info. */
874 Bool extab_present;
875 Addr extab_avma;
876 Addr extab_svma;
877 SizeT extab_size;
878 PtrdiffT extab_bias;
879 /* .plt */
880 Bool plt_present;
881 Addr plt_avma;
882 SizeT plt_size;
883 /* .got */
884 Bool got_present;
885 Addr got_avma;
886 SizeT got_size;
887 /* .got.plt */
888 Bool gotplt_present;
889 Addr gotplt_avma;
890 SizeT gotplt_size;
891 /* .opd -- needed on ppc64be-linux for finding symbols */
892 Bool opd_present;
893 Addr opd_avma;
894 SizeT opd_size;
895 /* .ehframe -- needed on amd64-linux for stack unwinding. We might
896 see more than one, hence the arrays. */
897 UInt n_ehframe; /* 0 .. N_EHFRAME_SECTS */
898 Addr ehframe_avma[N_EHFRAME_SECTS];
899 SizeT ehframe_size[N_EHFRAME_SECTS];
901 /* Sorted tables of stuff we snarfed from the file. This is the
902 eventual product of reading the debug info. All this stuff
903 lives in VG_AR_DINFO. */
905 /* An expandable array of symbols. */
906 DiSym* symtab;
907 UWord symtab_used;
908 UWord symtab_size;
909 /* Two expandable arrays, storing locations and their filename/dirname. */
910 DiLoc* loctab;
911 UInt sizeof_fndn_ix; /* Similar use as sizeof_cfsi_m_ix below. */
912 void* loctab_fndn_ix; /* loctab[i] filename/dirname is identified by
913 loctab_fnindex_ix[i] (an index in di->fndnpool)
914 0 means filename/dirname unknown.
915 The void* is an UChar* or UShort* or UInt*
916 depending on sizeof_fndn_ix. */
917 UWord loctab_used;
918 UWord loctab_size;
919 /* An expandable array of inlined fn info.
920 maxinl_codesz is the biggest inlined piece of code
921 in inltab (i.e. the max of 'addr_hi - addr_lo'. */
922 DiInlLoc* inltab;
923 UWord inltab_used;
924 UWord inltab_size;
925 SizeT maxinl_codesz;
927 /* A set of expandable arrays to store CFI summary info records.
928 The machine specific information (i.e. the DiCfSI_m struct)
929 are stored in cfsi_m_pool, as these are highly duplicated.
930 The DiCfSI_m are allocated in cfsi_m_pool and identified using
931 a (we hope) small integer : often one byte is enough, sometimes
932 2 bytes are needed.
934 cfsi_base contains the bases of the code address ranges.
935 cfsi_size is the size of the cfsi_base array.
936 The elements cfsi_base[0] till cfsi_base[cfsi_used-1] are used.
937 Following elements are not used (yet).
939 For each base in cfsi_base, an index into cfsi_m_pool is stored
940 in cfsi_m_ix array. The size of cfsi_m_ix is equal to
941 cfsi_size*sizeof_cfsi_m_ix. The used portion of cfsi_m_ix is
942 cfsi_m_ix[0] till cfsi_m_ix[(cfsi_used-1)*sizeof_cfsi_m_ix].
944 cfsi_base[i] gives the base address of a code range covered by
945 some CF Info. The corresponding CF Info is identified by an index
946 in cfsi_m_pool. The DiCfSI_m index in cfsi_m_pool corresponding to
947 cfsi_base[i] is given
948 by ((UChar*) cfsi_m_ix)[i] if sizeof_cfsi_m_ix == 1
949 by ((UShort*)cfsi_m_ix)[i] if sizeof_cfsi_m_ix == 2
950 by ((UInt*) cfsi_m_ix)[i] if sizeof_cfsi_m_ix == 4.
952 The end of the code range starting at cfsi_base[i] is given by
953 cfsi_base[i+1]-1 (or cfsi_maxavma for cfsi_base[cfsi_used-1]).
954 Some code ranges between cfsi_minavma and cfsi_maxavma might not
955 be covered by cfi information. Such not covered ranges are stored by
956 a base in cfsi_base and a corresponding 0 index in cfsi_m_ix.
958 A variable size representation has been chosen for the elements of
959 cfsi_m_ix as in many case, one byte is good enough. For big
960 objects, 2 bytes are needed. No object has yet been found where
961 4 bytes are needed (but the code is ready to handle this case).
962 Not covered ranges ('cfi holes') are stored explicitly in
963 cfsi_base/cfsi_m_ix as this is more memory efficient than storing
964 a length for each covered range : on x86 or amd64, we typically have
965 a hole every 8 covered ranges. On arm64, we have very few holes
966 (1 every 50 or 100 ranges).
968 The cfsi information is read and prepared in the cfsi_rd array.
969 Once all the information has been read, the cfsi_base and cfsi_m_ix
970 arrays will be filled in from cfsi_rd. cfsi_rd will then be freed.
971 This is all done by ML_(finish_CFSI_arrays).
973 Also includes summary address bounds, showing the min and max address
974 covered by any of the records, as an aid to fast searching. And, if the
975 records require any expression nodes, they are stored in
976 cfsi_exprs. */
977 Addr* cfsi_base;
978 UInt sizeof_cfsi_m_ix; /* size in byte of indexes stored in cfsi_m_ix. */
979 void* cfsi_m_ix; /* Each index occupies sizeof_cfsi_m_ix bytes.
980 The void* is an UChar* or UShort* or UInt*
981 depending on sizeof_cfsi_m_ix. */
983 DiCfSI* cfsi_rd; /* Only used during reading, NULL once info is read. */
985 UWord cfsi_used;
986 UWord cfsi_size;
988 DedupPoolAlloc *cfsi_m_pool;
989 Addr cfsi_minavma;
990 Addr cfsi_maxavma;
991 XArray* cfsi_exprs; /* XArray of CfiExpr */
993 /* Optimized code under Wine x86: MSVC++ PDB FramePointerOmitted
994 data. Non-expandable array, hence .size == .used. */
995 FPO_DATA* fpo;
996 UWord fpo_size;
997 Addr fpo_minavma;
998 Addr fpo_maxavma;
999 Addr fpo_base_avma;
1001 /* Pool of strings -- the string table. Pointers
1002 into this are stable (the memory is not reallocated). */
1003 DedupPoolAlloc *strpool;
1005 /* Pool of FnDn -- filename and dirname.
1006 Elements in the pool are allocated using VG_(allocFixedEltDedupPA). */
1007 DedupPoolAlloc *fndnpool;
1009 /* Variable scope information, as harvested from Dwarf3 files.
1011 In short it's an
1013 array of (array of PC address ranges and variables)
1015 The outer array indexes over scopes, with Entry 0 containing
1016 information on variables which exist for any value of the program
1017 counter (PC) -- that is, the outermost scope. Entries 1, 2, 3,
1018 etc contain information on increasinly deeply nested variables.
1020 Each inner array is an array of (an address range, and a set
1021 of variables that are in scope over that address range).
1023 The address ranges may not overlap.
1025 Since Entry 0 in the outer array holds information on variables
1026 that exist for any value of the PC (that is, global vars), it
1027 follows that Entry 0's inner array can only have one address
1028 range pair, one that covers the entire address space.
1030 XArray* /* of OSet of DiAddrRange */varinfo;
1032 /* These are arrays of the relevant typed objects, held here
1033 partially for the purposes of visiting each object exactly once
1034 when we need to delete them. */
1036 /* An array of TyEnts. These are needed to make sense of any types
1037 in the .varinfo. Also, when deleting this DebugInfo, we must
1038 first traverse this array and throw away malloc'd stuff hanging
1039 off it -- by calling ML_(TyEnt__make_EMPTY) on each entry. */
1040 XArray* /* of TyEnt */ admin_tyents;
1042 /* An array of guarded DWARF3 expressions. */
1043 XArray* admin_gexprs;
1045 /* Cached last rx mapping matched and returned by ML_(find_rx_mapping).
1046 This helps performance a lot during ML_(addLineInfo) etc., which can
1047 easily be invoked hundreds of thousands of times. */
1048 DebugInfoMapping* last_rx_map;
1051 /* --------------------- functions --------------------- */
1053 /* ------ Adding ------ */
1055 /* Add a symbol to si's symbol table. The contents of 'sym' are
1056 copied. It is assumed (and checked) that 'sym' only contains one
1057 name, so there is no auxiliary ::sec_names vector to duplicate.
1058 IOW, the copy is a shallow copy, and there are assertions in place
1059 to ensure that's OK. */
1060 extern void ML_(addSym) ( struct _DebugInfo* di, DiSym* sym );
1062 /* Add a filename/dirname pair to a DebugInfo and returns the index
1063 in the fndnpool fixed pool. */
1064 extern UInt ML_(addFnDn) (struct _DebugInfo* di,
1065 const HChar* filename,
1066 const HChar* dirname); /* NULL is allowable */
1068 /* Returns the filename of the fndn pair identified by fndn_ix.
1069 Returns "???" if fndn_ix is 0. */
1070 extern const HChar* ML_(fndn_ix2filename) (const DebugInfo* di,
1071 UInt fndn_ix);
1073 /* Returns the dirname of the fndn pair identified by fndn_ix.
1074 Returns "" if fndn_ix is 0 or fndn->dirname is NULL. */
1075 extern const HChar* ML_(fndn_ix2dirname) (const DebugInfo* di,
1076 UInt fndn_ix);
1078 /* Returns the fndn_ix for the LineInfo locno in di->loctab.
1079 0 if filename/dirname are unknown. */
1080 extern UInt ML_(fndn_ix) (const DebugInfo* di, Word locno);
1082 /* Add a line-number record to a DebugInfo.
1083 fndn_ix is an index in di->fndnpool, allocated using ML_(addFnDn).
1084 Give a 0 index for a unknown filename/dirname pair. */
1085 extern
1086 void ML_(addLineInfo) ( struct _DebugInfo* di,
1087 UInt fndn_ix,
1088 Addr this, Addr next, Int lineno, Int entry);
1090 /* Add a call inlined record to a DebugInfo.
1091 A call to the below means that inlinedfn code has been
1092 inlined, resulting in code from [addr_lo, addr_hi[.
1093 Note that addr_hi is excluded, i.e. is not part of the inlined code.
1094 fndn_ix and lineno identifies the location of the call that caused
1095 this inlining.
1096 fndn_ix is an index in di->fndnpool, allocated using ML_(addFnDn).
1097 Give a 0 index for an unknown filename/dirname pair.
1098 In case of nested inlining, a small level indicates the call
1099 is closer to main that a call with a higher level. */
1100 extern
1101 void ML_(addInlInfo) ( struct _DebugInfo* di,
1102 Addr addr_lo, Addr addr_hi,
1103 const HChar* inlinedfn,
1104 UInt fndn_ix,
1105 Int lineno, UShort level);
1107 /* Add a CFI summary record. The supplied DiCfSI_m is copied. */
1108 extern void ML_(addDiCfSI) ( struct _DebugInfo* di,
1109 Addr base, UInt len, DiCfSI_m* cfsi_m );
1111 /* Given a position in the di->cfsi_base/cfsi_m_ix arrays, return
1112 the corresponding cfsi_m*. Return NULL if the position corresponds
1113 to a cfsi hole. */
1114 DiCfSI_m* ML_(get_cfsi_m) (const DebugInfo* di, UInt pos);
1116 /* Add a string to the string table of a DebugInfo. If len==-1,
1117 ML_(addStr) will itself measure the length of the string. */
1118 extern const HChar* ML_(addStr) ( DebugInfo* di, const HChar* str, Int len );
1120 /* Add a string to the string table of a DebugInfo, by copying the
1121 string from the given DiCursor. Measures the length of the string
1122 itself. */
1123 extern const HChar* ML_(addStrFromCursor)( DebugInfo* di, DiCursor c );
1125 extern void ML_(addVar)( struct _DebugInfo* di,
1126 Int level,
1127 Addr aMin,
1128 Addr aMax,
1129 const HChar* name,
1130 UWord typeR, /* a cuOff */
1131 const GExpr* gexpr,
1132 const GExpr* fbGX, /* SHARED. */
1133 UInt fndn_ix, /* where decl'd - may be zero */
1134 Int lineNo, /* where decl'd - may be zero */
1135 Bool show );
1136 /* Note: fndn_ix identifies a filename/dirname pair similarly to
1137 ML_(addInlInfo) and ML_(addLineInfo). */
1139 /* Canonicalise the tables held by 'di', in preparation for use. Call
1140 this after finishing adding entries to these tables. */
1141 extern void ML_(canonicaliseTables) ( struct _DebugInfo* di );
1143 /* Canonicalise the call-frame-info table held by 'di', in preparation
1144 for use. This is called by ML_(canonicaliseTables) but can also be
1145 called on it's own to sort just this table. */
1146 extern void ML_(canonicaliseCFI) ( struct _DebugInfo* di );
1148 /* ML_(finish_CFSI_arrays) fills in the cfsi_base and cfsi_m_ix arrays
1149 from cfsi_rd array. cfsi_rd is then freed. */
1150 extern void ML_(finish_CFSI_arrays) ( struct _DebugInfo* di );
1152 /* ------ Searching ------ */
1154 /* Find a symbol-table index containing the specified pointer, or -1
1155 if not found. Binary search. */
1156 extern Word ML_(search_one_symtab) ( const DebugInfo* di, Addr ptr,
1157 Bool findText );
1159 /* Find a location-table index containing the specified pointer, or -1
1160 if not found. Binary search. */
1161 extern Word ML_(search_one_loctab) ( const DebugInfo* di, Addr ptr );
1163 /* Find a CFI-table index containing the specified pointer, or -1 if
1164 not found. Binary search. */
1165 extern Word ML_(search_one_cfitab) ( const DebugInfo* di, Addr ptr );
1167 /* Find a FPO-table index containing the specified pointer, or -1
1168 if not found. Binary search. */
1169 extern Word ML_(search_one_fpotab) ( const DebugInfo* di, Addr ptr );
1171 /* Helper function for the most often needed searching for an rx
1172 mapping containing the specified address range. The range must
1173 fall entirely within the mapping to be considered to be within it.
1174 Asserts if lo > hi; caller must ensure this doesn't happen. */
1175 extern DebugInfoMapping* ML_(find_rx_mapping) ( DebugInfo* di,
1176 Addr lo, Addr hi );
1178 /* ------ Misc ------ */
1180 /* Show a non-fatal debug info reading error. Use VG_(core_panic) for
1181 fatal errors. 'serious' errors are always shown, not 'serious' ones
1182 are shown only at verbosity level 2 and above. */
1183 extern
1184 void ML_(symerr) ( const DebugInfo* di, Bool serious, const HChar* msg );
1186 /* Print a symbol. */
1187 extern void ML_(ppSym) ( Int idx, const DiSym* sym );
1189 /* Print a call-frame-info summary. */
1190 extern void ML_(ppDiCfSI) ( const XArray* /* of CfiExpr */ exprs,
1191 Addr base, UInt len,
1192 const DiCfSI_m* si_m );
1195 #define TRACE_SYMTAB_ENABLED (di->trace_symtab)
1196 #define TRACE_SYMTAB(format, args...) \
1197 if (TRACE_SYMTAB_ENABLED) { VG_(printf)(format, ## args); }
1200 #endif /* ndef __PRIV_STORAGE_H */
1202 /*--------------------------------------------------------------------*/
1203 /*--- end ---*/
1204 /*--------------------------------------------------------------------*/