1 /* -*- mode: C; c-basic-offset: 3; -*- */
3 /*--------------------------------------------------------------------*/
4 /*--- Read DWARF3/4 ".debug_info" sections (DIE trees). ---*/
5 /*--- readdwarf3.c ---*/
6 /*--------------------------------------------------------------------*/
9 This file is part of Valgrind, a dynamic binary instrumentation
12 Copyright (C) 2008-2017 OpenWorks LLP
15 This program is free software; you can redistribute it and/or
16 modify it under the terms of the GNU General Public License as
17 published by the Free Software Foundation; either version 2 of the
18 License, or (at your option) any later version.
20 This program is distributed in the hope that it will be useful, but
21 WITHOUT ANY WARRANTY; without even the implied warranty of
22 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
23 General Public License for more details.
25 You should have received a copy of the GNU General Public License
26 along with this program; if not, see <http://www.gnu.org/licenses/>.
28 The GNU General Public License is contained in the file COPYING.
30 Neither the names of the U.S. Department of Energy nor the
31 University of California nor the names of its contributors may be
32 used to endorse or promote products derived from this software
33 without prior written permission.
36 #if defined(VGO_linux) || defined(VGO_darwin) || defined(VGO_solaris) || defined(VGO_freebsd)
38 /* REFERENCE (without which this code will not make much sense):
40 DWARF Debugging Information Format, Version 3,
41 dated 20 December 2005 (the "D3 spec").
43 Available at http://www.dwarfstd.org/Dwarf3.pdf. There's also a
44 .doc (MS Word) version, but for some reason the section numbers
45 between the Word and PDF versions differ by 1 in the first digit.
46 All section references in this code are to the PDF version.
50 DW_TAG_{const,volatile}_type no DW_AT_type is allowed; it is
51 assumed to mean "const void" or "volatile void" respectively.
52 GDB appears to interpret them like this, anyway.
54 In many cases it is important to know the svma of a CU (the "base
55 address of the CU", as the D3 spec calls it). There are some
56 situations in which the spec implies this value is unknown, but the
57 Dwarf3 produced by gcc-4.1 seems to assume is not unknown but
58 merely zero when not explicitly stated. So we too have to make
61 POTENTIAL BUG? Spotted 6 Sept 08. Why doesn't
62 unitary_range_list() bias the resulting range list in the same way
63 that its more general cousin, get_range_list(), does? I don't
68 get rid of cu_svma_known and document the assumed-zero svma hack.
70 ML_(sizeOfType): differentiate between zero sized types and types
71 for which the size is unknown. Is this important? I don't know.
73 DW_TAG_array_types: deal with explicit sizes (currently we compute
74 the size from the bounds and the element size, although that's
75 fragile, if the bounds incompletely specified, or completely
78 Document reason for difference (by 1) of stack preening depth in
79 parse_var_DIE vs parse_type_DIE.
81 Don't hand to ML_(addVars), vars whose locations are entirely in
82 registers (DW_OP_reg*). This is merely a space-saving
83 optimisation, as ML_(evaluate_Dwarf3_Expr) should handle these
84 expressions correctly, by failing to evaluate them and hence
85 effectively ignoring the variable with which they are associated.
87 Deal with DW_TAG_array_types which have element size != stride
89 In some cases, the info for a variable is split between two
90 different DIEs (generally a declarer and a definer). We punt on
91 these. Could do better here.
93 The 'data_bias' argument passed to the expression evaluator
94 (ML_(evaluate_Dwarf3_Expr)) should really be changed to a
95 MaybeUWord, to make it clear when we do vs don't know what it is
96 for the evaluation of an expression. At the moment zero is passed
97 for this parameter in the don't know case. That's a bit fragile
98 and obscure; using a MaybeUWord would be clearer.
100 POTENTIAL PERFORMANCE IMPROVEMENTS:
102 Currently, duplicate removal and all other queries for the type
103 entities array is done using cuOffset-based pointing, which
104 involves a binary search (VG_(lookupXA)) for each access. This is
105 wildly inefficient, although simple. It would be better to
106 translate all the cuOffset-based references (iow, all the "R" and
107 "Rs" fields in the TyEnts in 'tyents') to direct index numbers in
108 'tyents' right at the start of dedup_types(), and use direct
109 indexing (VG_(indexXA)) wherever possible after that.
111 cmp__XArrays_of_AddrRange is also a performance bottleneck. Move
112 VG_(indexXA) into pub_tool_xarray.h so it can be inlined at all use
113 points, and possibly also make an _UNCHECKED version which skips
114 the range checks in performance-critical situations such as this.
116 Handle interaction between read_DIE and parse_{var,type}_DIE
117 better. Currently read_DIE reads the entire DIE just to find where
118 the end is (and for debug printing), so that it can later reliably
119 move the cursor to the end regardless of what parse_{var,type}_DIE
120 do. This means many DIEs (most, even?) are read twice. It would
121 be smarter to make parse_{var,type}_DIE return a Bool indicating
122 whether or not they advanced the DIE cursor, and only if they
123 didn't should read_DIE itself read through the DIE.
125 ML_(addVar) and add_var_to_arange: quite a lot of DiAddrRanges have
126 zero variables in their .vars XArray. Rather than have an XArray
127 with zero elements (which uses 2 malloc'd blocks), allow the .vars
128 pointer to be NULL in this case.
130 More generally, reduce the amount of memory allocated and freed
131 while reading Dwarf3 type/variable information. Even modest (20MB)
132 objects cause this module to allocate and free hundreds of
133 thousands of small blocks, and ML_(arena_malloc) and its various
134 groupies always show up at the top of performance profiles. */
136 #include "pub_core_basics.h"
137 #include "pub_core_debuginfo.h"
138 #include "pub_core_libcbase.h"
139 #include "pub_core_libcassert.h"
140 #include "pub_core_libcprint.h"
141 #include "pub_core_libcsetjmp.h" // setjmp facilities
142 #include "pub_core_hashtable.h"
143 #include "pub_core_options.h"
144 #include "pub_core_tooliface.h" /* VG_(needs) */
145 #include "pub_core_xarray.h"
146 #include "pub_core_wordfm.h"
147 #include "priv_misc.h" /* dinfo_zalloc/free */
148 #include "priv_image.h"
149 #include "priv_tytypes.h"
150 #include "priv_d3basics.h"
151 #include "priv_storage.h"
152 #include "priv_readdwarf3.h" /* self */
155 /*------------------------------------------------------------*/
157 /*--- Basic machinery for parsing DIEs. ---*/
159 /*------------------------------------------------------------*/
161 #define TRACE_D3(format, args...) \
162 if (UNLIKELY(td3)) { VG_(printf)(format, ## args); }
163 #define TD3 (UNLIKELY(td3))
165 #define D3_INVALID_CUOFF ((UWord)(-1UL))
166 #define D3_FAKEVOID_CUOFF ((UWord)(-2UL))
170 DiSlice sli
; // to which this cursor applies
171 DiOffT sli_next
; // offset in underlying DiImage; must be >= sli.ioff
172 void (*barf
)( const HChar
* ) __attribute__((noreturn
));
173 const HChar
* barfstr
;
177 static inline Bool
is_sane_Cursor ( const Cursor
* c
) {
178 if (!c
) return False
;
179 if (!c
->barf
) return False
;
180 if (!c
->barfstr
) return False
;
181 if (!ML_(sli_is_valid
)(c
->sli
)) return False
;
182 if (c
->sli
.ioff
== DiOffT_INVALID
) return False
;
183 if (c
->sli_next
< c
->sli
.ioff
) return False
;
187 // Initialise a cursor from a DiSlice (ELF section, really) so as to
188 // start reading at offset |sli_initial_offset| from the start of the
190 static void init_Cursor ( /*OUT*/Cursor
* c
,
192 ULong sli_initial_offset
,
193 __attribute__((noreturn
)) void (*barf
)(const HChar
*),
194 const HChar
* barfstr
)
197 VG_(bzero_inline
)(c
, sizeof(*c
));
199 c
->sli_next
= c
->sli
.ioff
+ sli_initial_offset
;
201 c
->barfstr
= barfstr
;
202 vg_assert(is_sane_Cursor(c
));
205 static Bool
is_at_end_Cursor ( const Cursor
* c
) {
206 vg_assert(is_sane_Cursor(c
));
207 return c
->sli_next
>= c
->sli
.ioff
+ c
->sli
.szB
;
210 static inline ULong
get_position_of_Cursor ( const Cursor
* c
) {
211 vg_assert(is_sane_Cursor(c
));
212 return c
->sli_next
- c
->sli
.ioff
;
214 static inline void set_position_of_Cursor ( Cursor
* c
, ULong pos
) {
215 c
->sli_next
= c
->sli
.ioff
+ pos
;
216 vg_assert(is_sane_Cursor(c
));
218 static inline void advance_position_of_Cursor ( Cursor
* c
, ULong delta
) {
219 c
->sli_next
+= delta
;
220 vg_assert(is_sane_Cursor(c
));
223 static /*signed*/Long
get_remaining_length_Cursor ( const Cursor
* c
) {
224 vg_assert(is_sane_Cursor(c
));
225 return c
->sli
.ioff
+ c
->sli
.szB
- c
->sli_next
;
228 //static void* get_address_of_Cursor ( Cursor* c ) {
229 // vg_assert(is_sane_Cursor(c));
230 // return &c->region_start_img[ c->region_next ];
233 static DiCursor
get_DiCursor_from_Cursor ( const Cursor
* c
) {
234 return mk_DiCursor(c
->sli
.img
, c
->sli_next
);
237 /* FIXME: document assumptions on endianness for
238 get_UShort/UInt/ULong. */
239 static inline UChar
get_UChar ( Cursor
* c
) {
241 vg_assert(is_sane_Cursor(c
));
242 if (c
->sli_next
+ sizeof(UChar
) > c
->sli
.ioff
+ c
->sli
.szB
) {
247 r
= ML_(img_get_UChar
)(c
->sli
.img
, c
->sli_next
);
248 c
->sli_next
+= sizeof(UChar
);
251 static UShort
get_UShort ( Cursor
* c
) {
253 vg_assert(is_sane_Cursor(c
));
254 if (c
->sli_next
+ sizeof(UShort
) > c
->sli
.ioff
+ c
->sli
.szB
) {
259 r
= ML_(img_get_UShort
)(c
->sli
.img
, c
->sli_next
);
260 c
->sli_next
+= sizeof(UShort
);
263 static UInt
get_UInt ( Cursor
* c
) {
265 vg_assert(is_sane_Cursor(c
));
266 if (c
->sli_next
+ sizeof(UInt
) > c
->sli
.ioff
+ c
->sli
.szB
) {
271 r
= ML_(img_get_UInt
)(c
->sli
.img
, c
->sli_next
);
272 c
->sli_next
+= sizeof(UInt
);
275 static ULong
get_ULong ( Cursor
* c
) {
277 vg_assert(is_sane_Cursor(c
));
278 if (c
->sli_next
+ sizeof(ULong
) > c
->sli
.ioff
+ c
->sli
.szB
) {
283 r
= ML_(img_get_ULong
)(c
->sli
.img
, c
->sli_next
);
284 c
->sli_next
+= sizeof(ULong
);
287 static ULong
get_ULEB128 ( Cursor
* c
) {
291 /* unroll first iteration */
292 byte
= get_UChar( c
);
293 result
= (ULong
)(byte
& 0x7f);
294 if (LIKELY(!(byte
& 0x80))) return result
;
296 /* end unroll first iteration */
298 byte
= get_UChar( c
);
299 result
|= ((ULong
)(byte
& 0x7f)) << shift
;
301 } while (byte
& 0x80);
304 static Long
get_SLEB128 ( Cursor
* c
) {
310 result
|= ((ULong
)(byte
& 0x7f)) << shift
;
312 } while (byte
& 0x80);
313 if (shift
< 64 && (byte
& 0x40))
314 result
|= -(1ULL << shift
);
317 static UInt
get_UInt3 ( Cursor
* c
) {
319 vg_assert(is_sane_Cursor(c
));
320 if (c
->sli_next
+ 3 > c
->sli
.ioff
+ c
->sli
.szB
) {
325 c1
= ML_(img_get_UChar
)(c
->sli
.img
, c
->sli_next
);
326 c2
= ML_(img_get_UChar
)(c
->sli
.img
, c
->sli_next
+1);
327 c3
= ML_(img_get_UChar
)(c
->sli
.img
, c
->sli_next
+2);
329 #if defined(VG_BIGENDIAN)
330 return c1
<< 16 | c2
<< 8 | c3
;
332 return c1
| c2
<< 8 | c3
<< 16;
337 /* Assume 'c' points to the start of a string. Return a DiCursor of
338 whatever it points at, and advance it past the terminating zero.
339 This makes it safe for the caller to then copy the string with
340 ML_(addStr), since (w.r.t. image overruns) the process of advancing
341 past the terminating zero will already have "vetted" the string. */
342 static DiCursor
get_AsciiZ ( Cursor
* c
) {
344 DiCursor res
= get_DiCursor_from_Cursor(c
);
345 do { uc
= get_UChar(c
); } while (uc
!= 0);
349 static ULong
peek_ULEB128 ( Cursor
* c
) {
350 DiOffT here
= c
->sli_next
;
351 ULong r
= get_ULEB128( c
);
355 static UChar
peek_UChar ( Cursor
* c
) {
356 DiOffT here
= c
->sli_next
;
357 UChar r
= get_UChar( c
);
362 static ULong
get_Dwarfish_UWord ( Cursor
* c
, Bool is_dw64
) {
363 return is_dw64
? get_ULong(c
) : (ULong
) get_UInt(c
);
366 static UWord
get_UWord ( Cursor
* c
) {
367 vg_assert(sizeof(UWord
) == sizeof(void*));
368 if (sizeof(UWord
) == 4) return get_UInt(c
);
369 if (sizeof(UWord
) == 8) return get_ULong(c
);
373 /* Read a DWARF3 'Initial Length' field */
374 static ULong
get_Initial_Length ( /*OUT*/Bool
* is64
,
376 const HChar
* barfMsg
)
382 if (w32
>= 0xFFFFFFF0 && w32
< 0xFFFFFFFF) {
385 else if (w32
== 0xFFFFFFFF) {
387 w64
= get_ULong( c
);
396 /*------------------------------------------------------------*/
398 /*--- "CUConst" structure ---*/
400 /*------------------------------------------------------------*/
404 ULong at_name
; // Dwarf Attribute name
405 ULong at_form
; // Dwarf Attribute form
406 Long at_val
; // Dwarf Attribute value (for implicit_const)
407 UInt skip_szB
; // Nr of bytes skippable from here ...
408 UInt next_nf
; // ... to reach this attr/form index in the g_abbv.nf
410 /* skip_szB and next_nf are used to optimise the skipping of uninteresting DIEs.
411 Each name_form maintains how many (fixed) nr of bytes can be skipped from
412 the beginning of this form till the next attr/form to look at.
413 The next form to look can be:
414 an 'interesting' attr/form to read while skipping a DIE
415 (currently, this is only DW_AT_sibling)
417 a variable length form which must be read to be skipped.
418 For a variable length form, the skip_szB will be equal to VARSZ_FORM.
420 Note: this technique could also be used to speed up the parsing
421 of DIEs : for each parser kind, we could have the nr of bytes
422 to skip to directly reach the interesting form(s) for the parser. */
426 struct _g_abbv
*next
; // read/write by hash table.
427 UWord abbv_code
; // key, read by hash table
431 /* Variable-length array of name/form pairs, terminated
433 The skip_szB/next_nf allows to skip efficiently a DIE
434 described by this g_abbv; */
437 /* Holds information about the .debug_abbrev section for this CU. The current
438 Cursor into the abbrev section, the known abbrev codes are but into an hash
439 table. The (starting) offset into the abbrev_offset can be used to check
440 whether the abbv can be shared between CUs. The done boolean is set when all
441 known codes have been read. Initialize a new abbv_state with init_ht_abbvs.
442 To read any new abbrev codes not yet in the hash table call find_ht_abbvs
443 (get_abbv will first query the ht_abbvs, then if not done, call
447 Cursor c
; /* Current cursor into .debug_abbrev. */
448 VgHashTable
*ht_abbvs
; /* Hash table mapping codes to abbrevs. */
449 ULong debug_abbrev_offset
; /* Starting offset into .debug_abbrev. */
450 Bool done
; /* Whether there (might) still be new abbrev codes not yet
454 /* Holds information that is constant through the parsing of a
455 Compilation Unit. This is basically plumbed through to
459 /* Call here if anything goes wrong */
460 void (*barf
)( const HChar
* ) __attribute__((noreturn
));
461 /* Is this 64-bit DWARF ? */
463 /* Which DWARF version ? (2, 3, 4 or 5) */
465 /* Length of this Compilation Unit, as stated in the
466 .unit_length :: InitialLength field of the CU Header.
467 However, this size (as specified by the D3 spec) does not
468 include the size of the .unit_length field itself, which is
469 either 4 or 12 bytes (32-bit or 64-bit Dwarf3). That value
470 can be obtained through the expression ".is_dw64 ? 12 : 4". */
472 /* Offset of start of this unit in .debug_info */
473 UWord cu_start_offset
;
474 /* SVMA for this CU. In the D3 spec, is known as the "base
475 address of the compilation unit (last para sec 3.1.1).
476 Needed for (amongst things) interpretation of location-list
481 /* The debug_abbreviations table to be used for this Unit */
483 /* Upper bound on size thereof (an overestimate, in general) */
484 //UWord debug_abbv_maxszB;
485 /* A bounded area of the image, to be used as the
486 debug_abbreviations table tobe used for this Unit. */
489 /* Image information for various sections. */
490 DiSlice escn_debug_str
;
491 DiSlice escn_debug_ranges
;
492 DiSlice escn_debug_rnglists
;
493 DiSlice escn_debug_loclists
;
494 DiSlice escn_debug_loc
;
495 DiSlice escn_debug_line
;
496 DiSlice escn_debug_info
;
497 DiSlice escn_debug_types
;
498 DiSlice escn_debug_info_alt
;
499 DiSlice escn_debug_str_alt
;
500 DiSlice escn_debug_line_str
;
501 DiSlice escn_debug_addr
;
502 DiSlice escn_debug_str_offsets
;
503 /* How much to add to .debug_types resp. alternate .debug_info offsets
505 UWord types_cuOff_bias
;
506 UWord alt_cuOff_bias
;
507 /* DW_AT_addr_base */
509 Bool cu_has_addr_base
;
510 /* DW_AT_str_offsets_base */
511 Addr cu_str_offsets_base
;
512 Bool cu_has_str_offsets_base
;
513 /* DW_AT_rnglists_base */
514 Addr cu_rnglists_base
;
515 Bool cu_has_rnglists_base
;
516 /* DW_AT_loclists_base */
517 Addr cu_loclists_base
;
518 Bool cu_has_loclists_base
;
519 /* --- Needed so we can add stuff to the string table. --- */
520 struct _DebugInfo
* di
;
521 /* --- State of the hash table of g_abbv (i.e. parsed abbreviations)
522 technically makes this struct not const. --- */
525 /* True if this came from .debug_types; otherwise it came from
528 /* For a unit coming from .debug_types, these hold the TU's type
529 signature and the uncooked DIE offset of the TU's signatured
530 type. For a unit coming from .debug_info, these are unused. */
531 ULong type_signature
;
534 /* Signatured type hash; computed once and then shared by all
536 VgHashTable
*signature_types
;
538 /* True if this came from alternate .debug_info; otherwise
539 it came from normal .debug_info or .debug_types. */
545 /* Return the cooked value of DIE depending on whether CC represents a
546 .debug_types unit. To cook a DIE, we pretend that the .debug_info,
547 .debug_types and optional alternate .debug_info sections form
548 a contiguous whole, so that DIEs coming from .debug_types are numbered
549 starting at the end of .debug_info and DIEs coming from alternate
550 .debug_info are numbered starting at the end of .debug_types. */
551 static UWord
cook_die( const CUConst
* cc
, UWord die
)
553 if (cc
->is_type_unit
)
554 die
+= cc
->types_cuOff_bias
;
555 else if (cc
->is_alt_info
)
556 die
+= cc
->alt_cuOff_bias
;
560 /* Like cook_die, but understand that DIEs coming from a
561 DW_FORM_ref_sig8 reference are already cooked. Also, handle
562 DW_FORM_GNU_ref_alt from within primary .debug_info or .debug_types
563 as reference to alternate .debug_info. */
564 static UWord
cook_die_using_form( const CUConst
*cc
, UWord die
, DW_FORM form
)
566 if (form
== DW_FORM_ref_sig8
)
568 if (form
== DW_FORM_GNU_ref_alt
)
569 return die
+ cc
->alt_cuOff_bias
;
570 return cook_die( cc
, die
);
573 /* Return the uncooked offset of DIE and set *TYPE_FLAG to true if the DIE
574 came from the .debug_types section and *ALT_FLAG to true if the DIE
575 came from alternate .debug_info section. */
576 static UWord
uncook_die( const CUConst
*cc
, UWord die
, /*OUT*/Bool
*type_flag
,
581 /* The use of escn_debug_{info,types}.szB seems safe to me even if
582 escn_debug_{info,types} are DiSlice_INVALID (meaning the
583 sections were not found), because DiSlice_INVALID.szB is always
584 zero. That said, it seems unlikely we'd ever get here if
585 .debug_info or .debug_types were missing. */
586 if (die
>= cc
->escn_debug_info
.szB
) {
587 if (die
>= cc
->escn_debug_info
.szB
+ cc
->escn_debug_types
.szB
) {
589 die
-= cc
->escn_debug_info
.szB
+ cc
->escn_debug_types
.szB
;
592 die
-= cc
->escn_debug_info
.szB
;
598 /* Return an entry from .debug_addr with the given index.
599 Call one of the variants below that do error-checking. */
600 static ULong
get_debug_addr_entry_common( ULong index
, const CUConst
* cc
)
602 vg_assert(cc
->cu_has_addr_base
);
603 /* We make the same word-size assumption as DW_FORM_addr. */
604 UWord addr_pos
= cc
->cu_addr_base
+ index
* sizeof(UWord
);
606 init_Cursor( &cur
, cc
->escn_debug_addr
, addr_pos
, cc
->barf
,
607 "get_debug_addr_entry_common: index points outside .debug_addr" );
608 return (ULong
)(UWord
)get_UWord(&cur
);
611 static ULong
get_debug_addr_entry_form( ULong index
, const CUConst
* cc
,
614 if(!cc
->cu_has_addr_base
) {
616 "get_debug_addr_entry_form: %u (%s) without DW_AT_addr_base\n",
617 form
, ML_(pp_DW_FORM
)(form
));
618 cc
->barf("get_debug_addr_entry_form: DW_AT_addr_base not set");
620 return get_debug_addr_entry_common( index
, cc
);
623 static ULong
get_debug_addr_entry_lle( ULong index
, const CUConst
* cc
,
626 if(!cc
->cu_has_addr_base
) {
628 "get_debug_addr_entry_lle: %u (%s) without DW_AT_addr_base\n",
629 entry
, ML_(pp_DW_LLE
)(entry
));
630 cc
->barf("get_debug_addr_entry_lle: DW_AT_addr_base not set");
632 return get_debug_addr_entry_common( index
, cc
);
635 static ULong
get_debug_addr_entry_rle( ULong index
, const CUConst
* cc
,
638 if(!cc
->cu_has_addr_base
) {
640 "get_debug_addr_entry_rle: %u (%s) without DW_AT_addr_base\n",
641 entry
, ML_(pp_DW_RLE
)(entry
));
642 cc
->barf("get_debug_addr_entry_rle: DW_AT_addr_base not set");
644 return get_debug_addr_entry_common( index
, cc
);
647 /*------------------------------------------------------------*/
649 /*--- Helper functions for Guarded Expressions ---*/
651 /*------------------------------------------------------------*/
653 /* Parse the location list starting at img-offset 'debug_loc_offset'
654 in .debug_loc. Results are biased with 'svma_of_referencing_CU'
655 and so I believe are correct SVMAs for the object as a whole. This
656 function allocates the UChar*, and the caller must deallocate it.
657 The resulting block is in so-called Guarded-Expression format.
659 Guarded-Expression format is similar but not identical to the DWARF3
660 location-list format. The format of each returned block is:
664 followed by zero or more of
666 (Addr aMin; Addr aMax; UShort nbytes; ..bytes..; UChar isEnd)
668 '..bytes..' is an standard DWARF3 location expression which is
669 valid when aMin <= pc <= aMax (possibly after suitable biasing).
671 The number of bytes in '..bytes..' is nbytes.
673 The end of the sequence is marked by an isEnd == 1 value. All
674 previous isEnd values must be zero.
676 biasMe is 1 if the aMin/aMax fields need this DebugInfo's
677 text_bias added before use, and 0 if the GX is this is not
678 necessary (is ready to go).
680 Hence the block can be quickly parsed and is self-describing. Note
681 that aMax is 1 less than the corresponding value in a DWARF3
682 location list. Zero length ranges, with aMax == aMin-1, are not
685 /* 2008-sept-12: moved ML_(pp_GX) from here to d3basics.c, where
686 it more logically belongs. */
689 /* Apply a text bias to a GX. */
690 static void bias_GX ( /*MOD*/GExpr
* gx
, const DebugInfo
* di
)
693 UChar
* p
= &gx
->payload
[0];
696 uc
= *p
++; /*biasMe*/
700 p
[-1] = 0; /* mark it as done */
708 ML_(write_Addr
)(pA
, ML_(read_Addr
)(pA
) + di
->text_debug_bias
);
712 ML_(write_Addr
)(pA
, ML_(read_Addr
)(pA
) + di
->text_debug_bias
);
714 /* nbytes, and actual expression */
715 nbytes
= ML_(read_UShort
)(p
); p
+= sizeof(UShort
);
720 __attribute__((noinline
))
721 static GExpr
* make_singleton_GX ( DiCursor block
, ULong nbytes
)
727 vg_assert(sizeof(UWord
) == sizeof(Addr
));
728 vg_assert(nbytes
<= 0xFFFF); /* else we overflow the nbytes field */
730 = sizeof(UChar
) /*biasMe*/ + sizeof(UChar
) /*!isEnd*/
731 + sizeof(UWord
) /*aMin*/ + sizeof(UWord
) /*aMax*/
732 + sizeof(UShort
) /*nbytes*/ + (SizeT
)nbytes
733 + sizeof(UChar
); /*isEnd*/
735 gx
= ML_(dinfo_zalloc
)( "di.readdwarf3.msGX.1",
736 sizeof(GExpr
) + bytesReqd
);
738 p
= pstart
= &gx
->payload
[0];
740 p
= ML_(write_UChar
)(p
, 0); /*biasMe*/
741 p
= ML_(write_UChar
)(p
, 0); /*!isEnd*/
742 p
= ML_(write_Addr
)(p
, 0); /*aMin*/
743 p
= ML_(write_Addr
)(p
, ~0); /*aMax*/
744 p
= ML_(write_UShort
)(p
, nbytes
); /*nbytes*/
745 ML_(cur_read_get
)(p
, block
, nbytes
); p
+= nbytes
;
746 p
= ML_(write_UChar
)(p
, 1); /*isEnd*/
748 vg_assert( (SizeT
)(p
- pstart
) == bytesReqd
);
749 vg_assert( &gx
->payload
[bytesReqd
]
750 == ((UChar
*)gx
) + sizeof(GExpr
) + bytesReqd
);
755 __attribute__((noinline
))
756 static GExpr
* make_general_GX ( const CUConst
* cc
,
759 Addr svma_of_referencing_CU
)
764 XArray
* xa
; /* XArray of UChar */
767 Bool addBase
= cc
->version
< 5;
769 vg_assert(sizeof(UWord
) == sizeof(Addr
));
770 if (cc
->version
< 5 && (!ML_(sli_is_valid
)(cc
->escn_debug_loc
)
771 || cc
->escn_debug_loc
.szB
== 0))
772 cc
->barf("make_general_GX: .debug_loc is empty/missing");
773 if (cc
->version
>= 5 && (!ML_(sli_is_valid
)(cc
->escn_debug_loclists
)
774 || cc
->escn_debug_loclists
.szB
== 0))
775 cc
->barf("make_general_GX: .debug_loclists is empty/missing");
778 init_Cursor( &loc
, cc
->escn_debug_loc
, 0, cc
->barf
,
779 "Overrun whilst reading .debug_loc section(2)" );
781 init_Cursor( &loc
, cc
->escn_debug_loclists
, 0, cc
->barf
,
782 "Overrun whilst reading .debug_loclists section(2)" );
783 set_position_of_Cursor( &loc
, offset
);
785 TRACE_D3("make_general_GX (offset = %llu, ioff = %llu) {\n",
786 offset
, get_DiCursor_from_Cursor(&loc
).ioff
);
788 /* Who frees this xa? It is freed before this fn exits. */
789 xa
= VG_(newXA
)( ML_(dinfo_zalloc
), "di.readdwarf3.mgGX.1",
793 { UChar c
= 1; /*biasMe*/ VG_(addBytesToXA
)( xa
, &c
, sizeof(c
) ); }
802 if (cc
->version
< 5) {
803 /* Read a (host-)word pair. This is something of a hack since
804 the word size to read is really dictated by the ELF file;
805 however, we assume we're reading a file with the same
806 word-sizeness as the host. Reasonably enough. */
807 w1
= get_UWord( &loc
);
808 w2
= get_UWord( &loc
);
810 TRACE_D3(" %08lx %08lx\n", w1
, w2
);
811 if (w1
== 0 && w2
== 0) {
813 break; /* end of list */
817 /* new value for 'base' */
821 /* else a location expression follows */
822 len
= (UWord
)get_UShort( &loc
);
827 DW_LLE r
= get_UChar( &loc
);
829 case DW_LLE_end_of_list
:
832 case DW_LLE_base_address
:
833 base
= get_UWord( &loc
);
835 case DW_LLE_start_length
:
836 w1
= get_UWord( &loc
);
837 w2
= w1
+ get_ULEB128( &loc
);
838 len
= get_ULEB128( &loc
);
840 case DW_LLE_offset_pair
:
841 w1
= base
+ get_ULEB128( &loc
);
842 w2
= base
+ get_ULEB128( &loc
);
843 len
= get_ULEB128( &loc
);
845 case DW_LLE_start_end
:
846 w1
= get_UWord ( &loc
);
847 w2
= get_UWord ( &loc
);
848 len
= get_ULEB128( &loc
);
850 case DW_LLE_GNU_view_pair
:
854 case DW_LLE_base_addressx
:
855 base
= get_debug_addr_entry_lle( get_ULEB128( &loc
), cc
,
856 DW_LLE_base_addressx
);
858 case DW_LLE_startx_endx
:
859 w1
= get_debug_addr_entry_lle( get_ULEB128( &loc
), cc
,
860 DW_LLE_startx_endx
);
861 w2
= get_debug_addr_entry_lle( get_ULEB128( &loc
), cc
,
862 DW_LLE_startx_endx
);
863 len
= get_ULEB128( &loc
);
865 case DW_LLE_startx_length
:
866 w1
= get_debug_addr_entry_lle( get_ULEB128( &loc
), cc
,
867 DW_LLE_startx_length
);
868 w2
= w1
+ get_ULEB128( &loc
);
869 len
= get_ULEB128( &loc
);
871 case DW_LLE_default_location
:
873 cc
->barf( "Unhandled or unknown loclists entry" );
878 /* else enumerate [w1+base, w2+base) */
879 /* w2 is 1 past end of range, as per D3 defn for "DW_AT_high_pc"
882 TRACE_D3("negative range is for .debug_loc expr at "
883 "file offset %llu\n",
885 cc
->barf( "negative range in .debug_loc section" );
888 /* ignore zero length ranges */
896 VG_(addBytesToXA
)( xa
, &c
, sizeof(c
) );
897 w
= w1
+ (addBase
? base
: 0) + svma_of_referencing_CU
;
898 VG_(addBytesToXA
)( xa
, &w
, sizeof(w
) );
899 w
= w2
-1 + (addBase
? base
: 0) + svma_of_referencing_CU
;
900 VG_(addBytesToXA
)( xa
, &w
, sizeof(w
) );
902 VG_(addBytesToXA
)( xa
, &s
, sizeof(s
) );
906 UChar byte
= get_UChar( &loc
);
907 TRACE_D3("%02x", (UInt
)byte
);
909 VG_(addBytesToXA
)( xa
, &byte
, 1 );
915 { UChar c
= 1; /*isEnd*/ VG_(addBytesToXA
)( xa
, &c
, sizeof(c
) ); }
917 nbytes
= VG_(sizeXA
)( xa
);
918 vg_assert(nbytes
>= 1);
920 gx
= ML_(dinfo_zalloc
)( "di.readdwarf3.mgGX.2", sizeof(GExpr
) + nbytes
);
921 VG_(memcpy
)( &gx
->payload
[0], (UChar
*)VG_(indexXA
)(xa
,0), nbytes
);
922 vg_assert( &gx
->payload
[nbytes
]
923 == ((UChar
*)gx
) + sizeof(GExpr
) + nbytes
);
933 /*------------------------------------------------------------*/
935 /*--- Helper functions for range lists and CU headers ---*/
937 /*------------------------------------------------------------*/
939 /* Denotes an address range. Both aMin and aMax are included in the
940 range; hence a complete range is (0, ~0) and an empty range is any
941 (X, X-1) for X > 0.*/
943 struct { Addr aMin
; Addr aMax
; }
947 /* Generate an arbitrary structural total ordering on
948 XArray* of AddrRange. */
949 static Word
cmp__XArrays_of_AddrRange ( const XArray
* rngs1
,
950 const XArray
* rngs2
)
953 vg_assert(rngs1
&& rngs2
);
954 n1
= VG_(sizeXA
)( rngs1
);
955 n2
= VG_(sizeXA
)( rngs2
);
956 if (n1
< n2
) return -1;
957 if (n1
> n2
) return 1;
958 for (i
= 0; i
< n1
; i
++) {
959 AddrRange
* rng1
= (AddrRange
*)VG_(indexXA
)( rngs1
, i
);
960 AddrRange
* rng2
= (AddrRange
*)VG_(indexXA
)( rngs2
, i
);
961 if (rng1
->aMin
< rng2
->aMin
) return -1;
962 if (rng1
->aMin
> rng2
->aMin
) return 1;
963 if (rng1
->aMax
< rng2
->aMax
) return -1;
964 if (rng1
->aMax
> rng2
->aMax
) return 1;
970 __attribute__((noinline
))
971 static XArray
* /* of AddrRange */ empty_range_list ( void )
973 XArray
* xa
; /* XArray of AddrRange */
974 /* Who frees this xa? varstack_preen() does. */
975 xa
= VG_(newXA
)( ML_(dinfo_zalloc
), "di.readdwarf3.erl.1",
982 __attribute__((noinline
))
983 static XArray
* unitary_range_list ( Addr aMin
, Addr aMax
)
987 vg_assert(aMin
<= aMax
);
988 /* Who frees this xa? varstack_preen() does. */
989 xa
= VG_(newXA
)( ML_(dinfo_zalloc
), "di.readdwarf3.url.1",
994 VG_(addToXA
)( xa
, &pair
);
999 /* Enumerate the address ranges starting at img-offset
1000 'debug_ranges_offset' in .debug_ranges. Results are biased with
1001 'svma_of_referencing_CU' and so I believe are correct SVMAs for the
1002 object as a whole. This function allocates the XArray, and the
1003 caller must deallocate it. */
1004 __attribute__((noinline
))
1005 static XArray
* /* of AddrRange */
1006 get_range_list ( const CUConst
* cc
,
1008 UWord debug_ranges_offset
,
1009 Addr svma_of_referencing_CU
)
1013 XArray
* xa
; /* XArray of AddrRange */
1016 if (cc
->version
< 5 && (!ML_(sli_is_valid
)(cc
->escn_debug_ranges
)
1017 || cc
->escn_debug_ranges
.szB
== 0))
1018 cc
->barf("get_range_list: .debug_ranges is empty/missing");
1019 if (cc
->version
>= 5 && (!ML_(sli_is_valid
)(cc
->escn_debug_rnglists
)
1020 || cc
->escn_debug_rnglists
.szB
== 0))
1021 cc
->barf("get_range_list: .debug_rnglists is empty/missing");
1023 if (cc
->version
< 5)
1024 init_Cursor( &ranges
, cc
->escn_debug_ranges
, 0, cc
->barf
,
1025 "Overrun whilst reading .debug_ranges section(2)" );
1027 init_Cursor( &ranges
, cc
->escn_debug_rnglists
, 0, cc
->barf
,
1028 "Overrun whilst reading .debug_rnglists section(2)" );
1030 set_position_of_Cursor( &ranges
, debug_ranges_offset
);
1032 /* Who frees this xa? varstack_preen() does. */
1033 xa
= VG_(newXA
)( ML_(dinfo_zalloc
), "di.readdwarf3.grl.1", ML_(dinfo_free
),
1034 sizeof(AddrRange
) );
1036 if (cc
->version
< 5) {
1038 /* Read a (host-)word pair. This is something of a hack since
1039 the word size to read is really dictated by the ELF file;
1040 however, we assume we're reading a file with the same
1041 word-sizeness as the host. Reasonably enough. */
1042 UWord w1
= get_UWord( &ranges
);
1043 UWord w2
= get_UWord( &ranges
);
1045 if (w1
== 0 && w2
== 0)
1046 break; /* end of list. */
1049 /* new value for 'base' */
1054 /* else enumerate [w1+base, w2+base) */
1055 /* w2 is 1 past end of range, as per D3 defn for "DW_AT_high_pc"
1058 cc
->barf( "negative range in .debug_ranges section" );
1060 pair
.aMin
= w1
+ base
+ svma_of_referencing_CU
;
1061 pair
.aMax
= w2
- 1 + base
+ svma_of_referencing_CU
;
1062 vg_assert(pair
.aMin
<= pair
.aMax
);
1063 VG_(addToXA
)( xa
, &pair
);
1071 DW_RLE r
= get_UChar( &ranges
);
1073 case DW_RLE_end_of_list
:
1076 case DW_RLE_base_address
:
1077 base
= get_UWord( &ranges
);
1079 case DW_RLE_start_length
:
1080 w1
= get_UWord( &ranges
);
1081 w2
= w1
+ get_ULEB128( &ranges
);
1083 case DW_RLE_offset_pair
:
1084 w1
= base
+ get_ULEB128( &ranges
);
1085 w2
= base
+ get_ULEB128( &ranges
);
1087 case DW_RLE_start_end
:
1088 w1
= get_UWord ( &ranges
);
1089 w2
= get_UWord ( &ranges
);
1091 case DW_RLE_base_addressx
:
1092 base
= get_debug_addr_entry_rle( get_ULEB128( &ranges
), cc
,
1093 DW_RLE_base_addressx
);
1095 case DW_RLE_startx_endx
:
1096 w1
= get_debug_addr_entry_rle( get_ULEB128( &ranges
), cc
,
1097 DW_RLE_startx_endx
);
1098 w2
= get_debug_addr_entry_rle( get_ULEB128( &ranges
), cc
,
1099 DW_RLE_startx_endx
);
1101 case DW_RLE_startx_length
:
1102 w1
= get_debug_addr_entry_rle( get_ULEB128( &ranges
), cc
,
1103 DW_RLE_startx_length
);
1104 w2
= w1
+ get_ULEB128( &ranges
);
1107 cc
->barf( "Unhandled or unknown range list entry" );
1111 cc
->barf( "negative range in .debug_rnglists section" );
1113 pair
.aMin
= w1
+ svma_of_referencing_CU
;
1114 pair
.aMax
= w2
- 1 + svma_of_referencing_CU
;
1115 vg_assert(pair
.aMin
<= pair
.aMax
);
1116 VG_(addToXA
)( xa
, &pair
);
1123 #define VARSZ_FORM 0xffffffff
1124 static UInt
get_Form_szB (const CUConst
* cc
, DW_FORM form
);
1126 /* Initialises the hash table of abbreviations. This only sets up the abbv
1127 Cursor and hash table, but does not try to read any abbrevs yes. The actual
1128 reading of abbrevs will be done by get_abbv by calling find_ht_abbvs on
1129 demand if a requested abbrev code isn't in the hash table yet. When using the
1130 inline parser a lot of abbrevs will not be needed so reading everything
1131 upfront will often waste time and memory. */
1132 static void init_ht_abbvs (CUConst
* cc
, ULong debug_abbrev_offset
,
1135 Cursor
*c
= &cc
->abbv
.c
;
1136 init_Cursor( c
, cc
->debug_abbv
, 0, cc
->barf
,
1137 "Overrun whilst parsing .debug_abbrev section(2)" );
1138 cc
->abbv
.ht_abbvs
= VG_(HT_construct
) ("di.readdwarf3.ht_abbvs");
1139 cc
->abbv
.debug_abbrev_offset
= debug_abbrev_offset
;
1140 cc
->abbv
.done
= False
;
1143 static g_abbv
*find_ht_abbvs (CUConst
* cc
, ULong abbv_code
,
1147 g_abbv
*ta
; // temporary abbreviation, reallocated if needed.
1148 UInt ta_nf_maxE
; // max nr of pairs in ta.nf[], doubled when reallocated.
1149 UInt ta_nf_n
; // nr of pairs in ta->nf that are initialised.
1150 g_abbv
*ht_ta
; // abbv to insert in hash table.
1153 #define SZ_G_ABBV(_nf_szE) (sizeof(g_abbv) + _nf_szE * sizeof(name_form))
1155 ta_nf_maxE
= 10; // starting with enough for 9 pairs+terminating pair.
1156 ta
= ML_(dinfo_zalloc
) ("di.readdwarf3.ht_ta_nf", SZ_G_ABBV(ta_nf_maxE
));
1161 ta
->abbv_code
= get_ULEB128( c
);
1162 if (ta
->abbv_code
== 0) {
1163 cc
->abbv
.done
= True
;
1164 break; /* end of the table */
1167 ta
->atag
= get_ULEB128( c
);
1168 ta
->has_children
= get_UChar( c
);
1171 if (ta_nf_n
>= ta_nf_maxE
) {
1172 g_abbv
*old_ta
= ta
;
1173 ta
= ML_(dinfo_zalloc
) ("di.readdwarf3.ht_ta_nf",
1174 SZ_G_ABBV(2 * ta_nf_maxE
));
1175 ta_nf_maxE
= 2 * ta_nf_maxE
;
1176 VG_(memcpy
) (ta
, old_ta
, SZ_G_ABBV(ta_nf_n
));
1177 ML_(dinfo_free
) (old_ta
);
1179 ta
->nf
[ta_nf_n
].at_name
= get_ULEB128( c
);
1180 ta
->nf
[ta_nf_n
].at_form
= get_ULEB128( c
);
1181 if (ta
->nf
[ta_nf_n
].at_form
== DW_FORM_implicit_const
)
1182 ta
->nf
[ta_nf_n
].at_val
= get_SLEB128( c
);
1183 if (ta
->nf
[ta_nf_n
].at_name
== 0 && ta
->nf
[ta_nf_n
].at_form
== 0) {
1190 // Initialises the skip_szB/next_nf elements : an element at position
1191 // i must contain the sum of its own size + the sizes of all elements
1192 // following i till either the next variable size element, the next
1193 // sibling element or the end of the DIE.
1194 ta
->nf
[ta_nf_n
- 1].skip_szB
= 0;
1195 ta
->nf
[ta_nf_n
- 1].next_nf
= 0;
1196 for (i
= ta_nf_n
- 2; i
>= 0; i
--) {
1197 const UInt form_szB
= get_Form_szB (cc
, (DW_FORM
)ta
->nf
[i
].at_form
);
1199 if (ta
->nf
[i
+1].at_name
== DW_AT_sibling
1200 || ta
->nf
[i
+1].skip_szB
== VARSZ_FORM
) {
1201 ta
->nf
[i
].skip_szB
= form_szB
;
1202 ta
->nf
[i
].next_nf
= i
+1;
1203 } else if (form_szB
== VARSZ_FORM
) {
1204 ta
->nf
[i
].skip_szB
= form_szB
;
1205 ta
->nf
[i
].next_nf
= i
+1;
1207 ta
->nf
[i
].skip_szB
= ta
->nf
[i
+1].skip_szB
+ form_szB
;
1208 ta
->nf
[i
].next_nf
= ta
->nf
[i
+1].next_nf
;
1212 ht_ta
= ML_(dinfo_zalloc
) ("di.readdwarf3.ht_ta", SZ_G_ABBV(ta_nf_n
));
1213 VG_(memcpy
) (ht_ta
, ta
, SZ_G_ABBV(ta_nf_n
));
1214 VG_(HT_add_node
) ( cc
->abbv
.ht_abbvs
, ht_ta
);
1216 TRACE_D3(" Adding abbv_code %lu TAG %s [%s] nf %u ",
1217 ht_ta
->abbv_code
, ML_(pp_DW_TAG
)(ht_ta
->atag
),
1218 ML_(pp_DW_children
)(ht_ta
->has_children
),
1221 for (i
= 0; i
< ta_nf_n
; i
++)
1222 TRACE_D3("[%u,%u] ", ta
->nf
[i
].skip_szB
, ta
->nf
[i
].next_nf
);
1225 if (ht_ta
->abbv_code
== abbv_code
)
1229 ML_(dinfo_free
) (ta
);
1235 static g_abbv
* get_abbv (CUConst
* cc
, ULong abbv_code
,
1240 abbv
= VG_(HT_lookup
) (cc
->abbv
.ht_abbvs
, abbv_code
);
1241 if (!abbv
&& !cc
->abbv
.done
)
1242 abbv
= find_ht_abbvs (cc
, abbv_code
, td3
);
1244 cc
->barf ("abbv_code not found in ht_abbvs table");
1249 /* Parse the Compilation Unit header indicated at 'c' and
1250 initialise 'cc' accordingly. */
1251 static __attribute__((noinline
))
1252 void parse_CU_Header ( /*OUT*/CUConst
* cc
,
1255 DiSlice escn_debug_abbv
,
1256 abbv_state last_abbv
,
1260 UChar address_size
, unit_type
;
1261 ULong debug_abbrev_offset
;
1263 VG_(memset
)(cc
, 0, sizeof(*cc
));
1264 vg_assert(c
&& c
->barf
);
1267 /* initial_length field */
1269 = get_Initial_Length( &cc
->is_dw64
, c
,
1270 "parse_CU_Header: invalid initial-length field" );
1272 TRACE_D3(" Length: %llu\n", cc
->unit_length
);
1275 cc
->version
= get_UShort( c
);
1276 if (cc
->version
!= 2 && cc
->version
!= 3 && cc
->version
!= 4
1277 && cc
->version
!= 5)
1278 cc
->barf( "parse_CU_Header: "
1279 "is neither DWARF2 nor DWARF3 nor DWARF4 nor DWARF5" );
1280 TRACE_D3(" Version: %d\n", (Int
)cc
->version
);
1283 if (cc
->version
>= 5) {
1284 unit_type
= get_UChar( c
);
1285 address_size
= get_UChar( c
);
1287 unit_type
= type_unit
? DW_UT_type
: DW_UT_compile
;
1288 address_size
= 0; /* Will be read later. */
1291 /* debug_abbrev_offset */
1292 debug_abbrev_offset
= get_Dwarfish_UWord( c
, cc
->is_dw64
);
1293 if (debug_abbrev_offset
>= escn_debug_abbv
.szB
)
1294 cc
->barf( "parse_CU_Header: invalid debug_abbrev_offset" );
1295 TRACE_D3(" Abbrev Offset: %llu\n", debug_abbrev_offset
);
1297 /* address size. If this isn't equal to the host word size, just
1298 give up. This makes it safe to assume elsewhere that
1299 DW_FORM_addr and DW_FORM_ref_addr can be treated as a host
1301 if (cc
->version
< 5)
1302 address_size
= get_UChar( c
);
1304 if (address_size
!= sizeof(void*))
1305 cc
->barf( "parse_CU_Header: invalid address_size" );
1306 TRACE_D3(" Pointer Size: %d\n", (Int
)address_size
);
1308 cc
->is_type_unit
= type_unit
;
1309 cc
->is_alt_info
= alt_info
;
1311 if (type_unit
|| (cc
->version
>= 5 && (unit_type
== DW_UT_type
1312 || unit_type
== DW_UT_split_type
))) {
1313 cc
->type_signature
= get_ULong( c
);
1314 cc
->type_offset
= get_Dwarfish_UWord( c
, cc
->is_dw64
);
1317 if (cc
->version
>= 5 && (unit_type
== DW_UT_skeleton
1318 || unit_type
== DW_UT_split_compile
)) {
1319 /* dwo_id = */ get_ULong( c
);
1322 /* Set up cc->debug_abbv to point to the relevant table for this
1323 CU. Set its .szB so that at least we can't read off the end of
1324 the debug_abbrev section -- potentially (and quite likely) too
1325 big, if this isn't the last table in the section, but at least
1328 This amounts to taking debug_abbv_escn and moving the start
1329 position along by debug_abbrev_offset bytes, hence forming a
1330 smaller DiSlice which has the same end point. Since we checked
1331 just above that debug_abbrev_offset is less than the size of
1332 debug_abbv_escn, this should leave us with a nonempty slice. */
1333 vg_assert(debug_abbrev_offset
< escn_debug_abbv
.szB
);
1334 cc
->debug_abbv
= escn_debug_abbv
;
1335 cc
->debug_abbv
.ioff
+= debug_abbrev_offset
;
1336 cc
->debug_abbv
.szB
-= debug_abbrev_offset
;
1338 if (last_abbv
.ht_abbvs
!= NULL
1339 && debug_abbrev_offset
== last_abbv
.debug_abbrev_offset
) {
1340 cc
->abbv
= last_abbv
;
1342 if (last_abbv
.ht_abbvs
!= NULL
)
1343 VG_(HT_destruct
) (last_abbv
.ht_abbvs
, ML_(dinfo_free
));
1344 init_ht_abbvs(cc
, debug_abbrev_offset
, td3
);
1348 /* This represents a single signatured type. It maps a type signature
1349 (a ULong) to a cooked DIE offset. Objects of this type are stored
1350 in the type signature hash table. */
1352 struct D3SignatureType
{
1353 struct D3SignatureType
*next
;
1355 ULong type_signature
;
1360 /* Record a signatured type in the hash table. */
1361 static void record_signatured_type ( VgHashTable
*tab
,
1362 ULong type_signature
,
1365 D3SignatureType
*dstype
= ML_(dinfo_zalloc
) ( "di.readdwarf3.sigtype",
1366 sizeof(D3SignatureType
) );
1367 dstype
->data
= (UWord
) type_signature
;
1368 dstype
->type_signature
= type_signature
;
1370 VG_(HT_add_node
) ( tab
, dstype
);
1373 /* Given a type signature hash table and a type signature, return the
1374 cooked DIE offset of the type. If the type cannot be found, call
1376 static UWord
lookup_signatured_type ( const VgHashTable
*tab
,
1377 ULong type_signature
,
1378 void (*barf
)( const HChar
* ) __attribute__((noreturn
)) )
1380 D3SignatureType
*dstype
= VG_(HT_lookup
) ( tab
, (UWord
) type_signature
);
1381 /* This may be unwarranted chumminess with the hash table
1383 while ( dstype
!= NULL
&& dstype
->type_signature
!= type_signature
)
1384 dstype
= dstype
->next
;
1385 if (dstype
== NULL
) {
1386 barf("lookup_signatured_type: could not find signatured type");
1394 /* Represents Form data. If szB is 1/2/4/8 then the result is in the
1395 lowest 1/2/4/8 bytes of u.val. If szB is zero or negative then the
1396 result is an image section beginning at u.cur and with size -szB.
1397 No other szB values are allowed. */
1400 Long szB
; // 1, 2, 4, 8 or non-positive values only.
1401 union { ULong val
; DiCursor cur
; } u
;
1405 // Read data for get_Form_contents() from .debug_addr for the 'index' entry.
1406 static void get_Form_contents_addr( /*OUT*/FormContents
* cts
, DW_FORM form
,
1407 ULong index
, const CUConst
* cc
, Bool td3
)
1409 cts
->u
.val
= get_debug_addr_entry_form( index
, cc
, form
);
1410 cts
->szB
= sizeof(UWord
);
1411 TRACE_D3("0x%lx", (UWord
)cts
->u
.val
);
1414 // Read data for get_Form_contents() from .debug_str for the given offset.
1415 static void get_Form_contents_str( /*OUT*/FormContents
* cts
, DW_FORM form
,
1416 UWord offset
, const CUConst
* cc
, Bool td3
)
1418 if (!ML_(sli_is_valid
)(cc
->escn_debug_str
)
1419 || offset
>= cc
->escn_debug_str
.szB
) {
1421 "get_Form_contents_str: %u (%s) points outside .debug_str\n",
1422 form
, ML_(pp_DW_FORM
)(form
));
1423 cc
->barf("get_Form_contents_str: index points outside .debug_str");
1425 /* FIXME: check the entire string lies inside debug_str,
1426 not just the first byte of it. */
1428 = ML_(cur_plus
)( ML_(cur_from_sli
)(cc
->escn_debug_str
), offset
);
1430 HChar
* tmp
= ML_(cur_read_strdup
)(str
, "di.getFC.1");
1431 TRACE_D3("(indirect string, offset: 0x%lx): %s", offset
, tmp
);
1432 ML_(dinfo_free
)(tmp
);
1435 cts
->szB
= - (Long
)(1 + (ULong
)ML_(cur_strlen
)(str
));
1438 static inline UInt
sizeof_Dwarfish_UWord (Bool is_dw64
)
1441 return sizeof(ULong
);
1443 return sizeof(UInt
);
1446 // Read data for get_Form_contents() from .debug_str_offsets for the 'index' entry.
1447 static void get_Form_contents_str_offsets( /*OUT*/FormContents
* cts
, DW_FORM form
,
1448 ULong index
, const CUConst
* cc
, Bool td3
)
1450 if(!cc
->cu_has_str_offsets_base
) {
1452 "get_Form_contents_str_offsets: %u (%s) without DW_AT_str_offsets_base\n",
1453 form
, ML_(pp_DW_FORM
)(form
));
1454 cc
->barf("get_Form_contents_str_offsets: DW_AT_str_offsets_base not set");
1456 UWord str_offsets_pos
= cc
->cu_str_offsets_base
1457 + index
* sizeof_Dwarfish_UWord (cc
->is_dw64
);
1459 init_Cursor( &cur
, cc
->escn_debug_str_offsets
, str_offsets_pos
, cc
->barf
,
1460 "get_Form_contents_str_offsets: index "
1461 "points outside .debug_str_offsets" );
1463 HChar
* tmp
= ML_(cur_read_strdup
)(get_DiCursor_from_Cursor(&cur
), "di.getFC.1");
1464 TRACE_D3("(indirect string offset, offset: 0x%lx): %s", str_offsets_pos
, tmp
);
1465 ML_(dinfo_free
)(tmp
);
1467 get_Form_contents_str( cts
, form
, get_Dwarfish_UWord(&cur
, cc
->is_dw64
), cc
, td3
);
1470 /* From 'c', get the Form data into 'cts'. Either it gets a 1/2/4/8
1471 byte scalar value, or (a reference to) zero or more bytes starting
1474 void get_Form_contents ( /*OUT*/FormContents
* cts
,
1475 const CUConst
* cc
, Cursor
* c
,
1476 Bool td3
, const name_form
*abbv
)
1478 DW_FORM form
= abbv
->at_form
;
1479 VG_(bzero_inline
)(cts
, sizeof(*cts
));
1480 // !!! keep switch in sync with get_Form_szB. The nr of characters read below
1481 // must be computed similarly in get_Form_szB.
1482 // The consistency is verified in trace_DIE.
1485 cts
->u
.val
= (ULong
)(UChar
)get_UChar(c
);
1487 TRACE_D3("%u", (UInt
)cts
->u
.val
);
1490 cts
->u
.val
= (ULong
)(UShort
)get_UShort(c
);
1492 TRACE_D3("%u", (UInt
)cts
->u
.val
);
1495 cts
->u
.val
= (ULong
)(UInt
)get_UInt(c
);
1497 TRACE_D3("%u", (UInt
)cts
->u
.val
);
1500 cts
->u
.val
= get_ULong(c
);
1502 TRACE_D3("%llu", cts
->u
.val
);
1504 case DW_FORM_data16
: {
1505 /* This is more like a block than an integral value. */
1507 DiCursor data16
= get_DiCursor_from_Cursor(c
);
1508 TRACE_D3("data16: ");
1509 for (u64b
= 16; u64b
> 0; u64b
--) {
1510 UChar u8
= get_UChar(c
);
1511 TRACE_D3("%x ", (UInt
)u8
);
1513 cts
->u
.cur
= data16
;
1514 cts
->szB
= - (Long
)16;
1517 case DW_FORM_sec_offset
:
1518 cts
->u
.val
= (ULong
)get_Dwarfish_UWord( c
, cc
->is_dw64
);
1519 cts
->szB
= cc
->is_dw64
? 8 : 4;
1520 TRACE_D3("%llu", cts
->u
.val
);
1522 case DW_FORM_rnglistx
: {
1523 if(!cc
->cu_has_rnglists_base
) {
1524 cc
->barf("get_Form_contents: DW_FORM_rnglistsx"
1525 " without DW_AT_rnglists_base");
1527 /* Convert index to offset pointing to the offsets list. */
1528 ULong index
= get_ULEB128(c
);
1529 ULong offset_to_offset
= cc
->cu_rnglists_base
+ index
* sizeof_Dwarfish_UWord( cc
->is_dw64
);
1530 /* And read the offset value from there. */
1532 init_Cursor( &cur
, cc
->escn_debug_rnglists
, offset_to_offset
, cc
->barf
,
1533 "get_Form_contents: index points outside .debug_rnglists" );
1534 cts
->u
.val
= cc
->cu_rnglists_base
+ get_Dwarfish_UWord(&cur
, cc
->is_dw64
);
1536 TRACE_D3("%llu", cts
->u
.val
);
1539 case DW_FORM_loclistx
: {
1540 if(!cc
->cu_has_loclists_base
) {
1541 cc
->barf("get_Form_contents: DW_FORM_loclistsx"
1542 " without DW_AT_loclists_base");
1544 /* Convert index to offset pointing to the offsets list. */
1545 ULong index
= get_ULEB128(c
);
1546 ULong offset_to_offset
= cc
->cu_loclists_base
+ index
* sizeof_Dwarfish_UWord( cc
->is_dw64
);
1547 /* And read the offset value from there. */
1549 init_Cursor( &cur
, cc
->escn_debug_loclists
, offset_to_offset
, cc
->barf
,
1550 "get_Form_contents: index points outside .debug_loclists" );
1551 cts
->u
.val
= cc
->cu_loclists_base
+ get_Dwarfish_UWord(&cur
, cc
->is_dw64
);
1553 TRACE_D3("%llu", cts
->u
.val
);
1557 cts
->u
.val
= (ULong
)(Long
)get_SLEB128(c
);
1559 TRACE_D3("%llu", cts
->u
.val
);
1562 cts
->u
.val
= (ULong
)(Long
)get_ULEB128(c
);
1564 TRACE_D3("%llu", cts
->u
.val
);
1567 /* note, this is a hack. DW_FORM_addr is defined as getting
1568 a word the size of the target machine as defined by the
1569 address_size field in the CU Header. However,
1570 parse_CU_Header() rejects all inputs except those for
1571 which address_size == sizeof(Word), hence we can just
1572 treat it as a (host) Word. */
1573 cts
->u
.val
= (ULong
)(UWord
)get_UWord(c
);
1574 cts
->szB
= sizeof(UWord
);
1575 TRACE_D3("0x%lx", (UWord
)cts
->u
.val
);
1578 case DW_FORM_ref_addr
:
1579 /* We make the same word-size assumption as DW_FORM_addr. */
1580 /* What does this really mean? From D3 Sec 7.5.4,
1581 description of "reference", it would appear to reference
1582 some other DIE, by specifying the offset from the
1583 beginning of a .debug_info section. The D3 spec mentions
1584 that this might be in some other shared object and
1585 executable. But I don't see how the name of the other
1586 object/exe is specified.
1588 At least for the DW_FORM_ref_addrs created by icc11, the
1589 references seem to be within the same object/executable.
1590 So for the moment we merely range-check, to see that they
1591 actually do specify a plausible offset within this
1592 object's .debug_info, and return the value unchanged.
1594 In DWARF 2, DW_FORM_ref_addr is address-sized, but in
1595 DWARF 3 and later, it is offset-sized.
1597 if (cc
->version
== 2) {
1598 cts
->u
.val
= (ULong
)(UWord
)get_UWord(c
);
1599 cts
->szB
= sizeof(UWord
);
1601 cts
->u
.val
= get_Dwarfish_UWord(c
, cc
->is_dw64
);
1602 cts
->szB
= cc
->is_dw64
? sizeof(ULong
) : sizeof(UInt
);
1604 TRACE_D3("0x%lx", (UWord
)cts
->u
.val
);
1605 if (0) VG_(printf
)("DW_FORM_ref_addr 0x%lx\n", (UWord
)cts
->u
.val
);
1606 if (/* the following is surely impossible, but ... */
1607 !ML_(sli_is_valid
)(cc
->escn_debug_info
)
1608 || cts
->u
.val
>= (ULong
)cc
->escn_debug_info
.szB
) {
1609 /* Hmm. Offset is nonsensical for this object's .debug_info
1610 section. Be safe and reject it. */
1611 cc
->barf("get_Form_contents: DW_FORM_ref_addr points "
1612 "outside .debug_info");
1616 case DW_FORM_strp
: {
1617 /* this is an offset into .debug_str */
1618 UWord uw
= (UWord
)get_Dwarfish_UWord( c
, cc
->is_dw64
);
1619 get_Form_contents_str( cts
, form
, uw
, cc
, td3
);
1622 case DW_FORM_line_strp
: {
1623 /* this is an offset into .debug_line_str */
1624 UWord uw
= (UWord
)get_Dwarfish_UWord( c
, cc
->is_dw64
);
1625 if (!ML_(sli_is_valid
)(cc
->escn_debug_line_str
)
1626 || uw
>= cc
->escn_debug_line_str
.szB
)
1627 cc
->barf("get_Form_contents: DW_FORM_line_strp "
1628 "points outside .debug_line_str");
1629 /* FIXME: check the entire string lies inside debug_line_str,
1630 not just the first byte of it. */
1632 = ML_(cur_plus
)( ML_(cur_from_sli
)(cc
->escn_debug_line_str
), uw
);
1634 HChar
* tmp
= ML_(cur_read_strdup
)(line_str
, "di.getFC.1.5");
1635 TRACE_D3("(indirect line string, offset: 0x%lx): %s", uw
, tmp
);
1636 ML_(dinfo_free
)(tmp
);
1638 cts
->u
.cur
= line_str
;
1639 cts
->szB
= - (Long
)(1 + (ULong
)ML_(cur_strlen
)(line_str
));
1642 case DW_FORM_string
: {
1643 DiCursor str
= get_AsciiZ(c
);
1645 HChar
* tmp
= ML_(cur_read_strdup
)(str
, "di.getFC.2");
1646 TRACE_D3("%s", tmp
);
1647 ML_(dinfo_free
)(tmp
);
1650 /* strlen is safe because get_AsciiZ already 'vetted' the
1652 cts
->szB
= - (Long
)(1 + (ULong
)ML_(cur_strlen
)(str
));
1655 case DW_FORM_ref1
: {
1656 UChar u8
= get_UChar(c
);
1657 UWord res
= cc
->cu_start_offset
+ (UWord
)u8
;
1658 cts
->u
.val
= (ULong
)res
;
1659 cts
->szB
= sizeof(UWord
);
1660 TRACE_D3("<%lx>", res
);
1663 case DW_FORM_ref2
: {
1664 UShort u16
= get_UShort(c
);
1665 UWord res
= cc
->cu_start_offset
+ (UWord
)u16
;
1666 cts
->u
.val
= (ULong
)res
;
1667 cts
->szB
= sizeof(UWord
);
1668 TRACE_D3("<%lx>", res
);
1671 case DW_FORM_ref4
: {
1672 UInt u32
= get_UInt(c
);
1673 UWord res
= cc
->cu_start_offset
+ (UWord
)u32
;
1674 cts
->u
.val
= (ULong
)res
;
1675 cts
->szB
= sizeof(UWord
);
1676 TRACE_D3("<%lx>", res
);
1679 case DW_FORM_ref8
: {
1680 ULong u64
= get_ULong(c
);
1681 UWord res
= cc
->cu_start_offset
+ (UWord
)u64
;
1682 cts
->u
.val
= (ULong
)res
;
1683 cts
->szB
= sizeof(UWord
);
1684 TRACE_D3("<%lx>", res
);
1687 case DW_FORM_ref_udata
: {
1688 ULong u64
= get_ULEB128(c
);
1689 UWord res
= cc
->cu_start_offset
+ (UWord
)u64
;
1690 cts
->u
.val
= (ULong
)res
;
1691 cts
->szB
= sizeof(UWord
);
1692 TRACE_D3("<%lx>", res
);
1695 case DW_FORM_flag
: {
1696 UChar u8
= get_UChar(c
);
1697 TRACE_D3("%u", (UInt
)u8
);
1698 cts
->u
.val
= (ULong
)u8
;
1702 case DW_FORM_flag_present
:
1707 case DW_FORM_implicit_const
:
1708 cts
->u
.val
= (ULong
)abbv
->at_val
;
1710 TRACE_D3("%llu", cts
->u
.val
);
1712 case DW_FORM_block1
: {
1714 ULong u64
= (ULong
)get_UChar(c
);
1715 DiCursor block
= get_DiCursor_from_Cursor(c
);
1716 TRACE_D3("%llu byte block: ", u64
);
1717 for (u64b
= u64
; u64b
> 0; u64b
--) {
1718 UChar u8
= get_UChar(c
);
1719 TRACE_D3("%x ", (UInt
)u8
);
1722 cts
->szB
= - (Long
)u64
;
1725 case DW_FORM_block2
: {
1727 ULong u64
= (ULong
)get_UShort(c
);
1728 DiCursor block
= get_DiCursor_from_Cursor(c
);
1729 TRACE_D3("%llu byte block: ", u64
);
1730 for (u64b
= u64
; u64b
> 0; u64b
--) {
1731 UChar u8
= get_UChar(c
);
1732 TRACE_D3("%x ", (UInt
)u8
);
1735 cts
->szB
= - (Long
)u64
;
1738 case DW_FORM_block4
: {
1740 ULong u64
= (ULong
)get_UInt(c
);
1741 DiCursor block
= get_DiCursor_from_Cursor(c
);
1742 TRACE_D3("%llu byte block: ", u64
);
1743 for (u64b
= u64
; u64b
> 0; u64b
--) {
1744 UChar u8
= get_UChar(c
);
1745 TRACE_D3("%x ", (UInt
)u8
);
1748 cts
->szB
= - (Long
)u64
;
1751 case DW_FORM_exprloc
:
1752 case DW_FORM_block
: {
1754 ULong u64
= (ULong
)get_ULEB128(c
);
1755 DiCursor block
= get_DiCursor_from_Cursor(c
);
1756 TRACE_D3("%llu byte block: ", u64
);
1757 for (u64b
= u64
; u64b
> 0; u64b
--) {
1758 UChar u8
= get_UChar(c
);
1759 TRACE_D3("%x ", (UInt
)u8
);
1762 cts
->szB
= - (Long
)u64
;
1765 case DW_FORM_ref_sig8
: {
1767 ULong signature
= get_ULong (c
);
1768 ULong work
= signature
;
1769 TRACE_D3("8 byte signature: ");
1770 for (u64b
= 8; u64b
> 0; u64b
--) {
1771 UChar u8
= work
& 0xff;
1772 TRACE_D3("%x ", (UInt
)u8
);
1776 /* cc->signature_types is only built/initialised when
1777 VG_(clo_read_var_info) is set. In this case,
1778 the DW_FORM_ref_sig8 can be looked up.
1779 But we can also arrive here when only reading inline info
1780 and VG_(clo_trace_symtab) is set. In such a case,
1781 we cannot lookup the DW_FORM_ref_sig8, we rather assign
1782 a dummy value. This is a kludge, but otherwise,
1783 the 'dwarf inline info reader' tracing would have to
1784 do type processing/reading. It is better to avoid
1785 adding significant 'real' processing only due to tracing. */
1786 if (VG_(clo_read_var_info
)) {
1787 /* Due to the way that the hash table is constructed, the
1788 resulting DIE offset here is already "cooked". See
1789 cook_die_using_form. */
1790 cts
->u
.val
= lookup_signatured_type (cc
->signature_types
, signature
,
1794 vg_assert (VG_(clo_read_inline_info
));
1795 TRACE_D3("<not dereferencing signature type>");
1796 cts
->u
.val
= 0; /* Assign a dummy/rubbish value */
1798 cts
->szB
= sizeof(UWord
);
1801 case DW_FORM_indirect
: {
1802 /* Urgh, this is ugly and somewhat unclear how it works
1803 with DW_FORM_implicit_const. HACK. */
1804 name_form nfi
= *abbv
;
1805 nfi
.at_form
= (DW_FORM
)get_ULEB128(c
);
1806 get_Form_contents (cts
, cc
, c
, td3
, &nfi
);
1810 case DW_FORM_GNU_ref_alt
:
1811 cts
->u
.val
= get_Dwarfish_UWord(c
, cc
->is_dw64
);
1812 cts
->szB
= cc
->is_dw64
? sizeof(ULong
) : sizeof(UInt
);
1813 TRACE_D3("0x%lx", (UWord
)cts
->u
.val
);
1814 if (0) VG_(printf
)("DW_FORM_GNU_ref_alt 0x%lx\n", (UWord
)cts
->u
.val
);
1815 if (/* the following is surely impossible, but ... */
1816 !ML_(sli_is_valid
)(cc
->escn_debug_info_alt
))
1817 cc
->barf("get_Form_contents: DW_FORM_GNU_ref_addr used, "
1818 "but no alternate .debug_info");
1819 else if (cts
->u
.val
>= (ULong
)cc
->escn_debug_info_alt
.szB
) {
1820 /* Hmm. Offset is nonsensical for this object's .debug_info
1821 section. Be safe and reject it. */
1822 cc
->barf("get_Form_contents: DW_FORM_GNU_ref_addr points "
1823 "outside alternate .debug_info");
1827 case DW_FORM_GNU_strp_alt
: {
1828 /* this is an offset into alternate .debug_str */
1829 SizeT uw
= (UWord
)get_Dwarfish_UWord( c
, cc
->is_dw64
);
1830 if (!ML_(sli_is_valid
)(cc
->escn_debug_str_alt
))
1831 cc
->barf("get_Form_contents: DW_FORM_GNU_strp_alt used, "
1832 "but no alternate .debug_str");
1833 else if (uw
>= cc
->escn_debug_str_alt
.szB
)
1834 cc
->barf("get_Form_contents: DW_FORM_GNU_strp_alt "
1835 "points outside alternate .debug_str");
1836 /* FIXME: check the entire string lies inside debug_str,
1837 not just the first byte of it. */
1839 = ML_(cur_plus
)( ML_(cur_from_sli
)(cc
->escn_debug_str_alt
), uw
);
1841 HChar
* tmp
= ML_(cur_read_strdup
)(str
, "di.getFC.3");
1842 TRACE_D3("(indirect alt string, offset: 0x%lx): %s", uw
, tmp
);
1843 ML_(dinfo_free
)(tmp
);
1846 cts
->szB
= - (Long
)(1 + (ULong
)ML_(cur_strlen
)(str
));
1850 case DW_FORM_addrx
: {
1851 /* this is an offset into .debug_addr */
1852 ULong index
= (ULong
)(Long
)get_ULEB128(c
);
1853 get_Form_contents_addr(cts
, form
, index
, cc
, td3
);
1856 case DW_FORM_addrx1
: {
1857 /* this is an offset into .debug_addr */
1858 ULong index
= (ULong
)get_UChar(c
);
1859 get_Form_contents_addr(cts
, form
, index
, cc
, td3
);
1862 case DW_FORM_addrx2
: {
1863 /* this is an offset into .debug_addr */
1864 ULong index
= (ULong
)get_UShort(c
);
1865 get_Form_contents_addr(cts
, form
, index
, cc
, td3
);
1868 case DW_FORM_addrx3
: {
1869 /* this is an offset into .debug_addr */
1870 ULong index
= (ULong
)get_UInt3(c
);
1871 get_Form_contents_addr(cts
, form
, index
, cc
, td3
);
1874 case DW_FORM_addrx4
: {
1875 /* this is an offset into .debug_addr */
1876 ULong index
= (ULong
)get_UInt(c
);
1877 get_Form_contents_addr(cts
, form
, index
, cc
, td3
);
1880 case DW_FORM_strx
: {
1881 /* this is an offset into .debug_str_offsets */
1882 ULong index
= (ULong
)(Long
)get_ULEB128(c
);
1883 get_Form_contents_str_offsets(cts
, form
, index
, cc
, td3
);
1886 case DW_FORM_strx1
: {
1887 /* this is an offset into .debug_str_offsets */
1888 ULong index
= get_UChar(c
);
1889 get_Form_contents_str_offsets(cts
, form
, index
, cc
, td3
);
1892 case DW_FORM_strx2
: {
1893 /* this is an offset into .debug_str_offsets */
1894 ULong index
= (ULong
)get_UShort(c
);
1895 get_Form_contents_str_offsets(cts
, form
, index
, cc
, td3
);
1898 case DW_FORM_strx3
: {
1899 /* this is an offset into .debug_str_offsets */
1900 ULong index
= (ULong
)get_UInt3(c
);
1901 get_Form_contents_str_offsets(cts
, form
, index
, cc
, td3
);
1904 case DW_FORM_strx4
: {
1905 /* this is an offset into .debug_str_offsets */
1906 ULong index
= (ULong
)get_UInt(c
);
1907 get_Form_contents_str_offsets(cts
, form
, index
, cc
, td3
);
1913 "get_Form_contents: unhandled %u (%s) at <%llx>\n",
1914 form
, ML_(pp_DW_FORM
)(form
), get_position_of_Cursor(c
));
1915 c
->barf("get_Form_contents: unhandled DW_FORM");
1919 #define VARSZ_FORM 0xffffffff
1920 /* If the form is a fixed length form, return the nr of bytes for this form.
1921 If the form is a variable length form, return VARSZ_FORM. */
1923 UInt
get_Form_szB (const CUConst
* cc
, DW_FORM form
)
1925 // !!! keep switch in sync with get_Form_contents : the nr of bytes
1926 // read from a cursor by get_Form_contents must be returned by
1927 // the below switch.
1928 // The consistency is verified in trace_DIE.
1930 case DW_FORM_data1
: return 1;
1931 case DW_FORM_data2
: return 2;
1932 case DW_FORM_data4
: return 4;
1933 case DW_FORM_data8
: return 8;
1934 case DW_FORM_data16
: return 16;
1935 case DW_FORM_sec_offset
:
1940 case DW_FORM_rnglistx
:
1941 case DW_FORM_loclistx
:
1947 case DW_FORM_addr
: // See hack in get_Form_contents
1948 return sizeof(UWord
);
1949 case DW_FORM_ref_addr
: // See hack in get_Form_contents
1950 if (cc
->version
== 2)
1951 return sizeof(UWord
);
1953 return sizeof_Dwarfish_UWord (cc
->is_dw64
);
1955 case DW_FORM_line_strp
:
1956 return sizeof_Dwarfish_UWord (cc
->is_dw64
);
1957 case DW_FORM_string
:
1967 case DW_FORM_ref_udata
:
1971 case DW_FORM_flag_present
:
1972 return 0; // !!! special case, no data.
1973 case DW_FORM_block1
:
1975 case DW_FORM_block2
:
1977 case DW_FORM_block4
:
1979 case DW_FORM_exprloc
:
1982 case DW_FORM_ref_sig8
:
1984 case DW_FORM_indirect
:
1986 case DW_FORM_GNU_ref_alt
:
1987 return sizeof_Dwarfish_UWord(cc
->is_dw64
);
1988 case DW_FORM_GNU_strp_alt
:
1989 return sizeof_Dwarfish_UWord(cc
->is_dw64
);
1990 case DW_FORM_implicit_const
:
1991 return 0; /* Value inside abbrev. */
1996 case DW_FORM_addrx1
:
1999 case DW_FORM_addrx2
:
2002 case DW_FORM_addrx3
:
2005 case DW_FORM_addrx4
:
2010 "get_Form_szB: unhandled %u (%s)\n",
2011 form
, ML_(pp_DW_FORM
)(form
));
2012 cc
->barf("get_Form_contents: unhandled DW_FORM");
2016 /* Skip a DIE as described by abbv.
2017 If the DIE has a sibling, *sibling is set to the skipped DIE sibling value. */
2019 void skip_DIE (UWord
*sibling
,
2028 if (abbv
->nf
[nf_i
].at_name
== DW_AT_sibling
) {
2029 get_Form_contents( &cts
, cc
, c_die
, False
/*td3*/,
2032 *sibling
= cts
.u
.val
;
2034 } else if (abbv
->nf
[nf_i
].skip_szB
== VARSZ_FORM
) {
2035 DW_FORM form
= abbv
->nf
[nf_i
].at_form
;
2036 if(form
== DW_FORM_addrx
|| form
== DW_FORM_strx
2037 || form
== DW_FORM_rnglistx
|| form
== DW_FORM_loclistx
) {
2038 /* Skip without interpreting them, they may depend on e.g.
2039 DW_AT_addr_base that has not been read yet. */
2040 (void) get_ULEB128(c_die
);
2042 get_Form_contents( &cts
, cc
, c_die
, False
/*td3*/,
2046 advance_position_of_Cursor (c_die
, (ULong
)abbv
->nf
[nf_i
].skip_szB
);
2047 nf_i
= abbv
->nf
[nf_i
].next_nf
;
2055 /*------------------------------------------------------------*/
2057 /*--- Parsing of variable-related DIEs ---*/
2059 /*------------------------------------------------------------*/
2063 const HChar
* name
; /* in DebugInfo's .strpool */
2064 /* Represent ranges economically. nRanges is the number of
2066 0: .rngOneMin .rngOneMax .manyRanges are all zero
2067 1: .rngOneMin .rngOneMax hold the range; .rngMany is NULL
2068 2: .rngOneMin .rngOneMax are zero; .rngMany holds the ranges.
2069 This is merely an optimisation to avoid having to allocate
2070 and free the XArray in the common (98%) of cases where there
2071 is zero or one address ranges. */
2075 XArray
* rngMany
; /* of AddrRange. NON-UNIQUE PTR in AR_DINFO. */
2076 /* Do not free .rngMany, since many TempVars will have the same
2077 value. Instead the associated storage is to be freed by
2078 deleting 'rangetree', which stores a single copy of each
2082 UWord typeR
; /* a cuOff */
2083 GExpr
* gexpr
; /* for this variable */
2084 GExpr
* fbGX
; /* to find the frame base of the enclosing fn, if
2086 UInt fndn_ix
; /* declaring file/dirname index in fndnpool, or 0 */
2087 Int fLine
; /* declaring file line number, or zero */
2088 /* offset in .debug_info, so that abstract instances can be
2089 found to satisfy references from concrete instances. */
2091 UWord absOri
; /* so the absOri fields refer to dioff fields
2092 in some other, related TempVar. */
2098 /* Contains the range stack: a stack of address ranges, one
2099 stack entry for each nested scope.
2101 Some scope entries are created by function definitions
2102 (DW_AT_subprogram), and for those, we also note the GExpr
2103 derived from its DW_AT_frame_base attribute, if any.
2104 Consequently it should be possible to find, for any
2105 variable's DIE, the GExpr for the containing function's
2106 DW_AT_frame_base by scanning back through the stack to find
2107 the nearest entry associated with a function. This somewhat
2108 elaborate scheme is provided so as to make it possible to
2109 obtain the correct DW_AT_frame_base expression even in the
2110 presence of nested functions (or to be more precise, in the
2111 presence of nested DW_AT_subprogram DIEs).
2113 Int sp
; /* [sp] is innermost active entry; sp==-1 for empty
2116 XArray
**ranges
; /* XArray of AddrRange */
2117 Int
*level
; /* D3 DIE levels */
2118 Bool
*isFunc
; /* from DW_AT_subprogram? */
2119 GExpr
**fbGX
; /* if isFunc, contains the FB expr, else NULL */
2123 /* Completely initialise a variable parser object */
2125 var_parser_init ( D3VarParser
*parser
)
2128 parser
->stack_size
= 0;
2129 parser
->ranges
= NULL
;
2130 parser
->level
= NULL
;
2131 parser
->isFunc
= NULL
;
2132 parser
->fbGX
= NULL
;
2135 /* Release any memory hanging off a variable parser object */
2137 var_parser_release ( D3VarParser
*parser
)
2139 ML_(dinfo_free
)( parser
->ranges
);
2140 ML_(dinfo_free
)( parser
->level
);
2141 ML_(dinfo_free
)( parser
->isFunc
);
2142 ML_(dinfo_free
)( parser
->fbGX
);
2145 static void varstack_show ( const D3VarParser
* parser
, const HChar
* str
)
2148 VG_(printf
)(" varstack (%s) {\n", str
);
2149 for (i
= 0; i
<= parser
->sp
; i
++) {
2150 XArray
* xa
= parser
->ranges
[i
];
2152 VG_(printf
)(" [%ld] (level %d)", i
, parser
->level
[i
]);
2153 if (parser
->isFunc
[i
]) {
2154 VG_(printf
)(" (fbGX=%p)", parser
->fbGX
[i
]);
2156 vg_assert(parser
->fbGX
[i
] == NULL
);
2159 if (VG_(sizeXA
)( xa
) == 0) {
2160 VG_(printf
)("** empty PC range array **");
2162 for (j
= 0; j
< VG_(sizeXA
)( xa
); j
++) {
2163 AddrRange
* range
= (AddrRange
*) VG_(indexXA
)( xa
, j
);
2165 VG_(printf
)("[%#lx,%#lx] ", range
->aMin
, range
->aMax
);
2170 VG_(printf
)(" }\n");
2173 /* Remove from the stack, all entries with .level > 'level' */
2175 void varstack_preen ( D3VarParser
* parser
, Bool td3
, Int level
)
2177 Bool changed
= False
;
2178 vg_assert(parser
->sp
< parser
->stack_size
);
2180 vg_assert(parser
->sp
>= -1);
2181 if (parser
->sp
== -1) break;
2182 if (parser
->level
[parser
->sp
] <= level
) break;
2184 TRACE_D3("BBBBAAAA varstack_pop [newsp=%d]\n", parser
->sp
-1);
2185 vg_assert(parser
->ranges
[parser
->sp
]);
2186 /* Who allocated this xa? get_range_list() or
2187 unitary_range_list(). */
2188 VG_(deleteXA
)( parser
->ranges
[parser
->sp
] );
2193 varstack_show( parser
, "after preen" );
2196 static void varstack_push ( const CUConst
* cc
,
2197 D3VarParser
* parser
,
2199 XArray
* ranges
, Int level
,
2200 Bool isFunc
, GExpr
* fbGX
) {
2202 TRACE_D3("BBBBAAAA varstack_push[newsp=%d]: %d %p\n",
2203 parser
->sp
+1, level
, ranges
);
2205 /* First we need to zap everything >= 'level', as we are about to
2206 replace any previous entry at 'level', so .. */
2207 varstack_preen(parser
, /*td3*/False
, level
-1);
2209 vg_assert(parser
->sp
>= -1);
2210 vg_assert(parser
->sp
< parser
->stack_size
);
2211 if (parser
->sp
== parser
->stack_size
- 1) {
2212 parser
->stack_size
+= 48;
2214 ML_(dinfo_realloc
)("di.readdwarf3.varpush.1", parser
->ranges
,
2215 parser
->stack_size
* sizeof parser
->ranges
[0]);
2217 ML_(dinfo_realloc
)("di.readdwarf3.varpush.2", parser
->level
,
2218 parser
->stack_size
* sizeof parser
->level
[0]);
2220 ML_(dinfo_realloc
)("di.readdwarf3.varpush.3", parser
->isFunc
,
2221 parser
->stack_size
* sizeof parser
->isFunc
[0]);
2223 ML_(dinfo_realloc
)("di.readdwarf3.varpush.4", parser
->fbGX
,
2224 parser
->stack_size
* sizeof parser
->fbGX
[0]);
2226 if (parser
->sp
>= 0)
2227 vg_assert(parser
->level
[parser
->sp
] < level
);
2229 vg_assert(ranges
!= NULL
);
2230 if (!isFunc
) vg_assert(fbGX
== NULL
);
2231 parser
->ranges
[parser
->sp
] = ranges
;
2232 parser
->level
[parser
->sp
] = level
;
2233 parser
->isFunc
[parser
->sp
] = isFunc
;
2234 parser
->fbGX
[parser
->sp
] = fbGX
;
2236 varstack_show( parser
, "after push" );
2240 /* cts is derived from a DW_AT_location and so refers either to a
2241 location expression or to a location list. Figure out which, and
2242 in both cases bundle the expression or location list into a
2243 so-called GExpr (guarded expression). */
2244 __attribute__((noinline
))
2245 static GExpr
* get_GX ( const CUConst
* cc
, Bool td3
, const FormContents
* cts
)
2247 GExpr
* gexpr
= NULL
;
2249 /* represents a non-empty in-line location expression, and
2250 cts->u.cur points at the image bytes */
2251 gexpr
= make_singleton_GX( cts
->u
.cur
, (ULong
)(- cts
->szB
) );
2255 /* represents a location list. cts->u.val is the offset of it
2257 if (!cc
->cu_svma_known
)
2258 cc
->barf("get_GX: location list, but CU svma is unknown");
2259 gexpr
= make_general_GX( cc
, td3
, cts
->u
.val
, cc
->cu_svma
);
2262 vg_assert(0); /* else caller is bogus */
2268 HChar
* get_line_str (struct _DebugInfo
* di
, Bool is_dw64
,
2269 Cursor
*data
, const UInt form
,
2270 DiSlice debugstr_img
, DiSlice debuglinestr_img
)
2274 case DW_FORM_string
: {
2275 DiCursor distr
= get_AsciiZ(data
);
2276 str
= ML_(cur_step_strdup
)(&distr
, "di.gls.string");
2279 case DW_FORM_strp
: {
2280 UWord uw
= (UWord
)get_Dwarfish_UWord( data
, is_dw64
);
2282 = ML_(cur_plus
)( ML_(cur_from_sli
)(debugstr_img
), uw
);
2283 str
= ML_(cur_read_strdup
)(distr
, "di.gls.strp");
2286 case DW_FORM_line_strp
: {
2287 UWord uw
= (UWord
)get_Dwarfish_UWord( data
, is_dw64
);
2289 = ML_(cur_plus
)( ML_(cur_from_sli
)(debuglinestr_img
), uw
);
2290 str
= ML_(cur_read_strdup
)(distr
, "di.gls.line_strp");
2294 ML_(symerr
)(di
, True
,
2295 "Unknown path string FORM in .debug_line");
2302 Int
get_line_ndx (struct _DebugInfo
* di
,
2303 Cursor
*data
, const UInt form
)
2308 res
= get_UChar(data
);
2311 res
= get_UShort(data
);
2314 res
= get_ULEB128(data
);
2317 ML_(symerr
)(di
, True
,
2318 "Unknown directory_index value FORM in .debug_line");
2325 void skip_line_form (struct _DebugInfo
* di
, Bool is_dw64
,
2326 Cursor
*d
, const UInt form
)
2329 case DW_FORM_block
: {
2330 ULong len
= get_ULEB128(d
);
2331 advance_position_of_Cursor (d
, len
);
2334 case DW_FORM_block1
: {
2335 UChar len
= get_UChar(d
);
2336 advance_position_of_Cursor (d
, len
);
2339 case DW_FORM_block2
: {
2340 UShort len
= get_UShort(d
);
2341 advance_position_of_Cursor (d
, len
);
2344 case DW_FORM_block4
: {
2345 UInt len
= get_UInt(d
);
2346 advance_position_of_Cursor (d
, len
);
2351 advance_position_of_Cursor (d
, 1);
2354 advance_position_of_Cursor (d
, 2);
2357 advance_position_of_Cursor (d
, 4);
2360 advance_position_of_Cursor (d
, 8);
2362 case DW_FORM_data16
:
2363 advance_position_of_Cursor (d
, 16);
2365 case DW_FORM_string
:
2366 (void)get_AsciiZ (d
);
2369 case DW_FORM_line_strp
:
2370 case DW_FORM_sec_offset
:
2371 advance_position_of_Cursor (d
, is_dw64
? 8 : 4);
2374 (void)get_ULEB128(d
);
2377 (void)get_SLEB128(d
);
2380 ML_(symerr
)(di
, True
, "Unknown FORM in .debug_line");
2385 /* Returns an xarray* of directory names (indexed by the dwarf dirname
2387 If 'compdir' is NULL, entry [0] will be set to "."
2388 otherwise entry [0] is set to compdir.
2389 Entry [0] basically means "the current directory of the compilation",
2390 whatever that means, according to the DWARF3 spec.
2391 FIXME??? readdwarf3.c/readdwarf.c have a lot of duplicated code */
2393 XArray
* read_dirname_xa (DebugInfo
* di
, UShort version
, const HChar
*compdir
,
2394 Cursor
*c
, const CUConst
*cc
,
2397 XArray
* dirname_xa
; /* xarray of HChar* dirname */
2398 const HChar
* dirname
;
2401 dirname_xa
= VG_(newXA
) (ML_(dinfo_zalloc
), "di.rdxa.1", ML_(dinfo_free
),
2404 if (compdir
== NULL
) {
2409 compdir_len
= VG_(strlen
)(compdir
);
2412 /* For version 5, the compdir is the first (zero) entry. */
2414 VG_(addToXA
) (dirname_xa
, &dirname
);
2417 TRACE_D3("The Directory Table%s\n",
2418 peek_UChar(c
) == 0 ? " is empty." : ":" );
2420 while (peek_UChar(c
) != 0) {
2422 DiCursor cur
= get_AsciiZ(c
);
2423 HChar
* data_str
= ML_(cur_read_strdup
)( cur
, "dirname_xa.1" );
2424 TRACE_D3(" %s\n", data_str
);
2426 /* If data_str[0] is '/', then 'data' is an absolute path and we
2427 don't mess with it. Otherwise, construct the
2428 path 'compdir' ++ "/" ++ 'data'. */
2430 if (data_str
[0] != '/'
2431 /* not an absolute path */
2433 /* actually got something sensible for compdir */
2436 SizeT len
= compdir_len
+ 1 + VG_(strlen
)(data_str
);
2437 HChar
*buf
= ML_(dinfo_zalloc
)("dirname_xa.2", len
+ 1);
2439 VG_(strcpy
)(buf
, compdir
);
2440 VG_(strcat
)(buf
, "/");
2441 VG_(strcat
)(buf
, data_str
);
2443 dirname
= ML_(addStr
)(di
, buf
, len
);
2444 VG_(addToXA
) (dirname_xa
, &dirname
);
2445 if (0) VG_(printf
)("rel path %s\n", buf
);
2446 ML_(dinfo_free
)(buf
);
2448 /* just use 'data'. */
2449 dirname
= ML_(addStr
)(di
,data_str
,-1);
2450 VG_(addToXA
) (dirname_xa
, &dirname
);
2451 if (0) VG_(printf
)("abs path %s\n", data_str
);
2454 ML_(dinfo_free
)(data_str
);
2459 UInt directories_count
;
2460 UChar directory_entry_format_count
;
2462 DiSlice debugstr_img
= cc
->escn_debug_str
;
2463 DiSlice debuglinestr_img
= cc
->escn_debug_line_str
;
2465 directory_entry_format_count
= get_UChar(c
);
2466 for (n
= 0; n
< directory_entry_format_count
; n
++) {
2467 UInt lnct
= get_ULEB128(c
);
2468 UInt form
= get_ULEB128(c
);
2469 if (lnct
== DW_LNCT_path
)
2473 directories_count
= get_ULEB128(c
);
2474 TRACE_D3("The Directory Table%s\n",
2475 directories_count
== 0 ? " is empty." : ":" );
2477 for (n
= 0; n
< directories_count
; n
++) {
2479 for (f
= 0; f
< directory_entry_format_count
; f
++) {
2480 UInt form
= forms
[f
];
2482 HChar
*data_str
= get_line_str (di
, cc
->is_dw64
, c
, form
,
2485 TRACE_D3(" %s\n", data_str
);
2487 /* If data_str[0] is '/', then 'data' is an absolute path and we
2488 don't mess with it. Otherwise, construct the
2489 path 'compdir' ++ "/" ++ 'data'. */
2491 if (data_str
[0] != '/'
2492 /* not an absolute path */
2494 /* actually got something sensible for compdir */
2497 SizeT len
= compdir_len
+ 1 + VG_(strlen
)(data_str
);
2498 HChar
*buf
= ML_(dinfo_zalloc
)("dirname_xa.2", len
+ 1);
2500 VG_(strcpy
)(buf
, compdir
);
2501 VG_(strcat
)(buf
, "/");
2502 VG_(strcat
)(buf
, data_str
);
2504 dirname
= ML_(addStr
)(di
, buf
, len
);
2505 VG_(addToXA
) (dirname_xa
, &dirname
);
2506 if (0) VG_(printf
)("rel path %s\n", buf
);
2507 ML_(dinfo_free
)(buf
);
2509 /* just use 'data'. */
2510 dirname
= ML_(addStr
)(di
,data_str
,-1);
2511 VG_(addToXA
) (dirname_xa
, &dirname
);
2512 if (0) VG_(printf
)("abs path %s\n", data_str
);
2515 ML_(dinfo_free
)(data_str
);
2517 skip_line_form (di
, cc
->is_dw64
, c
, form
);
2525 if (version
< 5 && get_UChar (c
) != 0) {
2526 ML_(symerr
)(NULL
, True
,
2527 "could not get NUL at end of DWARF directory table");
2528 VG_(deleteXA
)(dirname_xa
);
2536 void read_filename_table( /*MOD*/XArray
* /* of UInt* */ fndn_ix_Table
,
2537 const HChar
* compdir
,
2538 const CUConst
* cc
, ULong debug_line_offset
,
2547 XArray
* dirname_xa
; /* xarray of HChar* dirname */
2548 ULong dir_xa_ix
; /* Index in dirname_xa, as read from dwarf info. */
2549 const HChar
* dirname
;
2552 vg_assert(fndn_ix_Table
&& cc
&& cc
->barf
);
2553 if (!ML_(sli_is_valid
)(cc
->escn_debug_line
)
2554 || cc
->escn_debug_line
.szB
<= debug_line_offset
) {
2555 cc
->barf("read_filename_table: .debug_line is missing?");
2558 init_Cursor( &c
, cc
->escn_debug_line
, debug_line_offset
, cc
->barf
,
2559 "Overrun whilst reading .debug_line section(1)" );
2562 get_Initial_Length( &is_dw64
, &c
,
2563 "read_filename_table: invalid initial-length field" );
2564 version
= get_UShort( &c
);
2565 if (version
!= 2 && version
!= 3 && version
!= 4 && version
!= 5)
2566 cc
->barf("read_filename_table: Only DWARF version 2, 3, 4 and 5 "
2567 "line info is currently supported.");
2569 /* addrs_size = */ get_UChar( &c
);
2570 /* seg_size = */ get_UChar( &c
);
2572 /*header_length = (ULong)*/ get_Dwarfish_UWord( &c
, is_dw64
);
2573 /*minimum_instruction_length = */ get_UChar( &c
);
2575 /*maximum_operations_per_insn = */ get_UChar( &c
);
2576 /*default_is_stmt = */ get_UChar( &c
);
2577 /*line_base = (Char)*/ get_UChar( &c
);
2578 /*line_range = */ get_UChar( &c
);
2579 opcode_base
= get_UChar( &c
);
2580 /* skip over "standard_opcode_lengths" */
2581 for (i
= 1; i
< (Word
)opcode_base
; i
++)
2582 (void)get_UChar( &c
);
2584 dirname_xa
= read_dirname_xa(cc
->di
, version
, compdir
, &c
, cc
, td3
);
2586 /* Read and record the file names table */
2587 vg_assert( VG_(sizeXA
)( fndn_ix_Table
) == 0 );
2589 /* Add a dummy index-zero entry. DWARF3 numbers its files
2590 from 1, for some reason. */
2591 fndn_ix
= ML_(addFnDn
) ( cc
->di
, "<unknown_file>", NULL
);
2592 VG_(addToXA
)( fndn_ix_Table
, &fndn_ix
);
2593 while (peek_UChar(&c
) != 0) {
2594 DiCursor cur
= get_AsciiZ(&c
);
2595 str
= ML_(addStrFromCursor
)( cc
->di
, cur
);
2596 dir_xa_ix
= get_ULEB128( &c
);
2597 if (dirname_xa
!= NULL
&& dir_xa_ix
< VG_(sizeXA
) (dirname_xa
))
2598 dirname
= *(HChar
**)VG_(indexXA
) ( dirname_xa
, dir_xa_ix
);
2601 fndn_ix
= ML_(addFnDn
)( cc
->di
, str
, dirname
);
2602 TRACE_D3(" read_filename_table: %ld fndn_ix %u %s %s\n",
2603 VG_(sizeXA
)(fndn_ix_Table
), fndn_ix
,
2605 VG_(addToXA
)( fndn_ix_Table
, &fndn_ix
);
2606 (void)get_ULEB128( &c
); /* skip last mod time */
2607 (void)get_ULEB128( &c
); /* file size */
2611 UChar p_ndx
= 0, d_ndx
= 0;
2612 UInt file_names_count
;
2613 UChar file_names_entry_format_count
;
2615 DiSlice debugstr_img
= cc
->escn_debug_str
;
2616 DiSlice debuglinestr_img
= cc
->escn_debug_line_str
;
2617 file_names_entry_format_count
= get_UChar( &c
);
2618 for (n
= 0; n
< file_names_entry_format_count
; n
++) {
2619 UInt lnct
= get_ULEB128( &c
);
2620 UInt form
= get_ULEB128( &c
);
2621 if (lnct
== DW_LNCT_path
)
2623 if (lnct
== DW_LNCT_directory_index
)
2627 file_names_count
= get_ULEB128( &c
);
2628 for (n
= 0; n
< file_names_count
; n
++) {
2632 for (f
= 0; f
< file_names_entry_format_count
; f
++) {
2633 UInt form
= forms
[f
];
2635 str
= get_line_str (cc
->di
, cc
->is_dw64
, &c
, form
,
2636 debugstr_img
, debuglinestr_img
);
2637 else if (f
== d_ndx
)
2638 dir_xa_ix
= get_line_ndx (cc
->di
, &c
, form
);
2640 skip_line_form (cc
->di
, cc
->is_dw64
, &c
, form
);
2643 if (dirname_xa
!= NULL
2644 && dir_xa_ix
>= 0 && dir_xa_ix
< VG_(sizeXA
) (dirname_xa
))
2645 dirname
= *(HChar
**)VG_(indexXA
) ( dirname_xa
, dir_xa_ix
);
2648 fndn_ix
= ML_(addFnDn
)( cc
->di
, str
, dirname
);
2649 TRACE_D3(" read_filename_table: %ld fndn_ix %u %s %s\n",
2650 VG_(sizeXA
)(fndn_ix_Table
), fndn_ix
,
2652 VG_(addToXA
)( fndn_ix_Table
, &fndn_ix
);
2655 /* We're done! The rest of it is not interesting. */
2656 if (dirname_xa
!= NULL
)
2657 VG_(deleteXA
)(dirname_xa
);
2660 /* setup_cu_svma to be called when a cu is found at level 0,
2661 to establish the cu_svma. */
2662 static void setup_cu_svma(CUConst
* cc
, Bool have_lo
, Addr ip_lo
, Bool td3
)
2665 /* We have potentially more than one type of parser parsing the
2666 dwarf information. At least currently, each parser establishes
2667 the cu_svma. So, in case cu_svma_known, we check that the same
2668 result is obtained by the 2nd parsing of the cu.
2670 Alternatively, we could reset cu_svma_known after each parsing
2671 and then check that we only see a single DW_TAG_compile_unit DIE
2672 at level 0, DWARF3 only allows exactly one top level DIE per
2678 /* Now, it may be that this DIE doesn't tell us the CU's
2679 SVMA, by way of not having a DW_AT_low_pc. That's OK --
2680 the CU doesn't *have* to have its SVMA specified.
2682 But as per last para D3 spec sec 3.1.1 ("Normal and
2683 Partial Compilation Unit Entries", "If the base address
2684 (viz, the SVMA) is undefined, then any DWARF entry of
2685 structure defined interms of the base address of that
2686 compilation unit is not valid.". So that means, if whilst
2687 processing the children of this top level DIE (or their
2688 children, etc) we see a DW_AT_range, and cu_svma_known is
2689 False, then the DIE that contains it is (per the spec)
2690 invalid, and we can legitimately stop and complain. */
2691 /* .. whereas The Reality is, simply assume the SVMA is zero
2692 if it isn't specified. */
2696 if (cc
->cu_svma_known
) {
2697 vg_assert (cu_svma
== cc
->cu_svma
);
2699 cc
->cu_svma_known
= True
;
2700 cc
->cu_svma
= cu_svma
;
2702 TRACE_D3("setup_cu_svma: acquire CU_SVMA of %p\n", (void*) cc
->cu_svma
);
2706 /* Setup info from DW_AT_addr_base, DW_AT_str_offsets_base, DW_AT_rnglists_base
2707 and DW_AT_loclists_base. This needs to be done early, because other DW_AT_*
2708 info may depend on it. */
2709 static void setup_cu_bases(CUConst
* cc
, Cursor
* c_die
, const g_abbv
* abbv
)
2714 if(cc
->cu_has_addr_base
&& cc
->cu_has_str_offsets_base
2715 && cc
->cu_has_rnglists_base
&& cc
->cu_has_loclists_base
)
2717 saved_c_pos
= get_position_of_Cursor(c_die
);
2719 while (!cc
->cu_has_addr_base
|| !cc
->cu_has_str_offsets_base
2720 || !cc
->cu_has_rnglists_base
|| !cc
->cu_has_loclists_base
) {
2721 DW_AT attr
= (DW_AT
) abbv
->nf
[nf_i
].at_name
;
2722 DW_FORM form
= (DW_FORM
)abbv
->nf
[nf_i
].at_form
;
2723 const name_form
*nf
= &abbv
->nf
[nf_i
];
2724 if (attr
== 0 && form
== 0)
2726 if (attr
!= DW_AT_addr_base
&& attr
!= DW_AT_str_offsets_base
2727 && attr
!= DW_AT_rnglists_base
&& attr
!= DW_AT_loclists_base
) {
2728 const UInt form_szB
= get_Form_szB (cc
, form
);
2729 if (form_szB
== VARSZ_FORM
) {
2730 if(form
== DW_FORM_addrx
|| form
== DW_FORM_strx
2731 || form
== DW_FORM_rnglistx
|| form
== DW_FORM_loclistx
) {
2732 /* Skip without interpreting them, they depend on *_base. */
2733 (void) get_ULEB128(c_die
);
2735 /* Need to read the contents of this one to skip it. */
2736 get_Form_contents( &cts
, cc
, c_die
, False
/*td3*/,
2740 /* Skip without even reading it, as it may depend on *_base. */
2741 advance_position_of_Cursor (c_die
, form_szB
);
2746 get_Form_contents( &cts
, cc
, c_die
, False
/*td3*/, nf
);
2747 if (attr
== DW_AT_addr_base
&& cts
.szB
> 0) {
2748 Addr addr_base
= cts
.u
.val
;
2749 if (cc
->cu_has_addr_base
)
2750 vg_assert (addr_base
== cc
->cu_addr_base
);
2752 cc
->cu_has_addr_base
= True
;
2753 cc
->cu_addr_base
= addr_base
;
2756 if (attr
== DW_AT_str_offsets_base
&& cts
.szB
> 0) {
2757 Addr str_offsets_base
= cts
.u
.val
;
2758 if (cc
->cu_has_str_offsets_base
)
2759 vg_assert (str_offsets_base
== cc
->cu_str_offsets_base
);
2761 cc
->cu_has_str_offsets_base
= True
;
2762 cc
->cu_str_offsets_base
= str_offsets_base
;
2765 if (attr
== DW_AT_rnglists_base
&& cts
.szB
> 0) {
2766 Addr rnglists_base
= cts
.u
.val
;
2767 if (cc
->cu_has_rnglists_base
)
2768 vg_assert (rnglists_base
== cc
->cu_rnglists_base
);
2770 cc
->cu_has_rnglists_base
= True
;
2771 cc
->cu_rnglists_base
= rnglists_base
;
2774 if (attr
== DW_AT_loclists_base
&& cts
.szB
> 0) {
2775 Addr loclists_base
= cts
.u
.val
;
2776 if (cc
->cu_has_loclists_base
)
2777 vg_assert (loclists_base
== cc
->cu_loclists_base
);
2779 cc
->cu_has_loclists_base
= True
;
2780 cc
->cu_loclists_base
= loclists_base
;
2785 set_position_of_Cursor(c_die
, saved_c_pos
);
2788 static void trace_DIE(
2792 UWord saved_die_c_offset
,
2800 Bool debug_types_flag
;
2803 UWord check_sibling
= 0;
2805 posn
= uncook_die( cc
, posn
, &debug_types_flag
, &alt_flag
);
2807 debug_types_flag
? cc
->escn_debug_types
:
2808 alt_flag
? cc
->escn_debug_info_alt
: cc
->escn_debug_info
,
2809 saved_die_c_offset
, cc
->barf
,
2810 "Overrun trace_DIE");
2812 VG_(printf
)(" <%d><%lx>: Abbrev Number: %llu (%s)%s%s\n",
2813 level
, posn
, (ULong
) abbv
->abbv_code
, ML_(pp_DW_TAG
)( dtag
),
2814 debug_types_flag
? " (in .debug_types)" : "",
2815 alt_flag
? " (in alternate .debug_info)" : "");
2818 DW_AT attr
= (DW_AT
) abbv
->nf
[nf_i
].at_name
;
2819 DW_FORM form
= (DW_FORM
)abbv
->nf
[nf_i
].at_form
;
2820 const name_form
*nf
= &abbv
->nf
[nf_i
];
2822 if (attr
== 0 && form
== 0) break;
2823 VG_(printf
)(" %-18s: ", ML_(pp_DW_AT
)(attr
));
2824 /* Get the form contents, so as to print them */
2825 get_Form_contents( &cts
, cc
, &c
, True
, nf
);
2826 if (attr
== DW_AT_sibling
&& cts
.szB
> 0) {
2827 sibling
= cts
.u
.val
;
2829 VG_(printf
)("\t\n");
2832 /* Verify that skipping a DIE gives the same displacement as
2833 tracing (i.e. reading) a DIE. If there is an inconsistency in
2834 the nr of bytes read by get_Form_contents and get_Form_szB, this
2835 should be detected by the below. Using --trace-symtab=yes
2836 --read-var-info=yes will ensure all DIEs are systematically
2838 skip_DIE (&check_sibling
, &check_skip
, abbv
, cc
);
2839 vg_assert (check_sibling
== sibling
);
2840 vg_assert (get_position_of_Cursor (&check_skip
)
2841 == get_position_of_Cursor (&c
));
2844 __attribute__((noreturn
))
2845 static void dump_bad_die_and_barf(
2846 const HChar
*whichparser
,
2851 UWord saved_die_c_offset
,
2855 trace_DIE (dtag
, posn
, level
, saved_die_c_offset
, abbv
, cc
);
2856 VG_(printf
)("%s:\n", whichparser
);
2857 cc
->barf("confused by the above DIE");
2860 __attribute__((noinline
))
2861 static void bad_DIE_confusion(int linenr
)
2863 VG_(printf
)("\nparse DIE(readdwarf3.c:%d): confused by:\n", linenr
);
2865 #define goto_bad_DIE do {bad_DIE_confusion(__LINE__); goto bad_DIE;} while (0)
2867 /* Reset the fndn_ix_Table. When we come across the top level DIE for a CU we
2868 will copy all the file names out of the .debug_line img area and use this
2869 table to look up the copies when we later see filename numbers in
2870 DW_TAG_variables etc. The table can be be reused between parsers (var and
2871 inline) and between CUs. So we keep a copy of the last one parsed. Call
2872 reset_fndn_ix_table before reading a new one from a new offset. */
2874 void reset_fndn_ix_table (XArray
** fndn_ix_Table
, ULong
*debug_line_offset
,
2877 vg_assert (new_offset
== -1
2878 || *debug_line_offset
!= new_offset
);
2879 Int size
= *fndn_ix_Table
== NULL
? 0 : VG_(sizeXA
) (*fndn_ix_Table
);
2881 VG_(deleteXA
) (*fndn_ix_Table
);
2882 *fndn_ix_Table
= NULL
;
2884 if (*fndn_ix_Table
== NULL
)
2885 *fndn_ix_Table
= VG_(newXA
)( ML_(dinfo_zalloc
),
2886 "di.readdwarf3.reset_ix_table",
2889 *debug_line_offset
= new_offset
;
2892 __attribute__((noinline
))
2893 static void parse_var_DIE (
2894 /*MOD*/WordFM
* /* of (XArray* of AddrRange, void) */ rangestree
,
2895 /*MOD*/XArray
* /* of TempVar* */ tempvars
,
2896 /*MOD*/XArray
* /* of GExpr* */ gexprs
,
2897 /*MOD*/D3VarParser
* parser
,
2898 XArray
** fndn_ix_Table
,
2899 ULong
*debug_line_offset
,
2912 UWord saved_die_c_offset
= get_position_of_Cursor( c_die
);
2914 varstack_preen( parser
, td3
, level
-1 );
2916 if (dtag
== DW_TAG_compile_unit
2917 || dtag
== DW_TAG_type_unit
2918 || dtag
== DW_TAG_partial_unit
2919 || dtag
== DW_TAG_skeleton_unit
) {
2920 Bool have_lo
= False
;
2921 Bool have_hi1
= False
;
2922 Bool hiIsRelative
= False
;
2923 Bool have_range
= False
;
2927 const HChar
*compdir
= NULL
;
2930 setup_cu_bases(cc
, c_die
, abbv
);
2933 DW_AT attr
= (DW_AT
) abbv
->nf
[nf_i
].at_name
;
2934 DW_FORM form
= (DW_FORM
)abbv
->nf
[nf_i
].at_form
;
2935 const name_form
*nf
= &abbv
->nf
[nf_i
];
2937 if (attr
== 0 && form
== 0) break;
2938 get_Form_contents( &cts
, cc
, c_die
, False
/*td3*/, nf
);
2939 if (attr
== DW_AT_low_pc
&& cts
.szB
> 0) {
2943 if (attr
== DW_AT_high_pc
&& cts
.szB
> 0) {
2946 if (form
!= DW_FORM_addr
)
2947 hiIsRelative
= True
;
2949 if (attr
== DW_AT_ranges
&& cts
.szB
> 0) {
2950 rangeoff
= cts
.u
.val
;
2953 if (attr
== DW_AT_comp_dir
) {
2955 cc
->barf("parse_var_DIE compdir: expecting indirect string");
2956 HChar
*str
= ML_(cur_read_strdup
)( cts
.u
.cur
,
2957 "parse_var_DIE.compdir" );
2958 compdir
= ML_(addStr
)(cc
->di
, str
, -1);
2959 ML_(dinfo_free
) (str
);
2961 if (attr
== DW_AT_stmt_list
&& cts
.szB
> 0) {
2962 if (cts
.u
.val
!= *debug_line_offset
) {
2963 reset_fndn_ix_table( fndn_ix_Table
, debug_line_offset
,
2965 read_filename_table( *fndn_ix_Table
, compdir
,
2966 cc
, cts
.u
.val
, td3
);
2970 if (have_lo
&& have_hi1
&& hiIsRelative
)
2973 /* Now, does this give us an opportunity to find this
2976 setup_cu_svma(cc
, have_lo
, ip_lo
, td3
);
2978 /* Do we have something that looks sane? */
2979 if (have_lo
&& have_hi1
&& (!have_range
)) {
2981 varstack_push( cc
, parser
, td3
,
2982 unitary_range_list(ip_lo
, ip_hi1
- 1),
2984 False
/*isFunc*/, NULL
/*fbGX*/ );
2985 else if (ip_lo
== 0 && ip_hi1
== 0)
2986 /* CU has no code, presumably?
2987 Such situations have been encountered for code
2988 compiled with -ffunction-sections -fdata-sections
2989 and linked with --gc-sections. Completely
2990 eliminated CU gives such 0 lo/hi pc. Similarly
2991 to a CU which has no lo/hi/range pc, we push
2992 an empty range list. */
2993 varstack_push( cc
, parser
, td3
,
2996 False
/*isFunc*/, NULL
/*fbGX*/ );
2998 if ((!have_lo
) && (!have_hi1
) && have_range
) {
2999 varstack_push( cc
, parser
, td3
,
3000 get_range_list( cc
, td3
,
3001 rangeoff
, cc
->cu_svma
),
3003 False
/*isFunc*/, NULL
/*fbGX*/ );
3005 if ((!have_lo
) && (!have_hi1
) && (!have_range
)) {
3006 /* CU has no code, presumably? */
3007 varstack_push( cc
, parser
, td3
,
3010 False
/*isFunc*/, NULL
/*fbGX*/ );
3012 if (have_lo
&& (!have_hi1
) && have_range
&& ip_lo
== 0) {
3013 /* broken DIE created by gcc-4.3.X ? Ignore the
3014 apparently-redundant DW_AT_low_pc and use the DW_AT_ranges
3016 varstack_push( cc
, parser
, td3
,
3017 get_range_list( cc
, td3
,
3018 rangeoff
, cc
->cu_svma
),
3020 False
/*isFunc*/, NULL
/*fbGX*/ );
3022 if (0) VG_(printf
)("I got hlo %d hhi1 %d hrange %d\n",
3023 (Int
)have_lo
, (Int
)have_hi1
, (Int
)have_range
);
3028 if (dtag
== DW_TAG_lexical_block
|| dtag
== DW_TAG_subprogram
) {
3029 Bool have_lo
= False
;
3030 Bool have_hi1
= False
;
3031 Bool have_range
= False
;
3032 Bool hiIsRelative
= False
;
3036 Bool isFunc
= dtag
== DW_TAG_subprogram
;
3040 DW_AT attr
= (DW_AT
) abbv
->nf
[nf_i
].at_name
;
3041 DW_FORM form
= (DW_FORM
)abbv
->nf
[nf_i
].at_form
;
3042 const name_form
*nf
= &abbv
->nf
[nf_i
];
3044 if (attr
== 0 && form
== 0) break;
3045 get_Form_contents( &cts
, cc
, c_die
, False
/*td3*/, nf
);
3046 if (attr
== DW_AT_low_pc
&& cts
.szB
> 0) {
3050 if (attr
== DW_AT_high_pc
&& cts
.szB
> 0) {
3053 if (form
!= DW_FORM_addr
)
3054 hiIsRelative
= True
;
3056 if (attr
== DW_AT_ranges
&& cts
.szB
> 0) {
3057 rangeoff
= cts
.u
.val
;
3061 && attr
== DW_AT_frame_base
3062 && cts
.szB
!= 0 /* either scalar or nonempty block */) {
3063 fbGX
= get_GX( cc
, False
/*td3*/, &cts
);
3065 VG_(addToXA
)(gexprs
, &fbGX
);
3068 if (have_lo
&& have_hi1
&& hiIsRelative
)
3070 /* Do we have something that looks sane? */
3071 if (dtag
== DW_TAG_subprogram
3072 && (!have_lo
) && (!have_hi1
) && (!have_range
)) {
3073 /* This is legit - ignore it. Sec 3.3.3: "A subroutine entry
3074 representing a subroutine declaration that is not also a
3075 definition does not have code address or range
3078 if (dtag
== DW_TAG_lexical_block
3079 && (!have_lo
) && (!have_hi1
) && (!have_range
)) {
3080 /* I believe this is legit, and means the lexical block
3081 contains no insns (whatever that might mean). Ignore. */
3083 if (have_lo
&& have_hi1
&& (!have_range
)) {
3084 /* This scope supplies just a single address range. */
3086 varstack_push( cc
, parser
, td3
,
3087 unitary_range_list(ip_lo
, ip_hi1
- 1),
3088 level
, isFunc
, fbGX
);
3090 if ((!have_lo
) && (!have_hi1
) && have_range
) {
3091 /* This scope supplies multiple address ranges via the use of
3093 varstack_push( cc
, parser
, td3
,
3094 get_range_list( cc
, td3
,
3095 rangeoff
, cc
->cu_svma
),
3096 level
, isFunc
, fbGX
);
3098 if (have_lo
&& (!have_hi1
) && (!have_range
)) {
3099 /* This scope is bogus. The D3 spec sec 3.4 (Lexical Block
3100 Entries) says fairly clearly that a scope must have either
3101 _range or (_low_pc and _high_pc). */
3102 /* The spec is a bit ambiguous though. Perhaps a single byte
3103 range is intended? See sec 2.17 (Code Addresses And Ranges) */
3104 /* This case is here because icc9 produced this:
3105 <2><13bd>: DW_TAG_lexical_block
3106 DW_AT_decl_line : 5229
3107 DW_AT_decl_column : 37
3109 DW_AT_low_pc : 0x401b03
3111 /* Ignore (seems safe than pushing a single byte range) */
3116 if (dtag
== DW_TAG_variable
|| dtag
== DW_TAG_formal_parameter
) {
3117 const HChar
* name
= NULL
;
3118 UWord typeR
= D3_INVALID_CUOFF
;
3119 Bool global
= False
;
3120 GExpr
* gexpr
= NULL
;
3122 UWord abs_ori
= (UWord
)D3_INVALID_CUOFF
;
3127 DW_AT attr
= (DW_AT
) abbv
->nf
[nf_i
].at_name
;
3128 DW_FORM form
= (DW_FORM
)abbv
->nf
[nf_i
].at_form
;
3129 const name_form
*nf
= &abbv
->nf
[nf_i
];
3131 if (attr
== 0 && form
== 0) break;
3132 get_Form_contents( &cts
, cc
, c_die
, False
/*td3*/, nf
);
3134 if (attr
== DW_AT_name
&& cts
.szB
< 0) {
3135 name
= ML_(addStrFromCursor
)( cc
->di
, cts
.u
.cur
);
3137 if (attr
== DW_AT_location
3138 && cts
.szB
!= 0 /* either scalar or nonempty block */) {
3139 gexpr
= get_GX( cc
, False
/*td3*/, &cts
);
3141 VG_(addToXA
)(gexprs
, &gexpr
);
3143 if (attr
== DW_AT_type
&& cts
.szB
> 0) {
3144 typeR
= cook_die_using_form( cc
, cts
.u
.val
, form
);
3146 if (attr
== DW_AT_external
&& cts
.szB
> 0 && cts
.u
.val
> 0) {
3149 if (attr
== DW_AT_abstract_origin
&& cts
.szB
> 0) {
3150 abs_ori
= (UWord
)cts
.u
.val
;
3152 if (attr
== DW_AT_declaration
&& cts
.szB
> 0 && cts
.u
.val
> 0) {
3153 /*declaration = True;*/
3155 if (attr
== DW_AT_decl_line
&& cts
.szB
> 0) {
3156 lineNo
= (Int
)cts
.u
.val
;
3158 if (attr
== DW_AT_decl_file
&& cts
.szB
> 0) {
3159 Int ftabIx
= (Int
)cts
.u
.val
;
3161 && ftabIx
< VG_(sizeXA
)( *fndn_ix_Table
)) {
3162 fndn_ix
= *(UInt
*)VG_(indexXA
)( *fndn_ix_Table
, ftabIx
);
3164 if (0) VG_(printf
)("XXX filename fndn_ix = %u %s\n", fndn_ix
,
3165 ML_(fndn_ix2filename
) (cc
->di
, fndn_ix
));
3168 if (!global
&& dtag
== DW_TAG_variable
&& level
== 1) {
3169 /* Case of a static variable. It is better to declare
3170 it global as the variable is not really related to
3171 a PC range, as its address can be used by program
3172 counters outside of the ranges where it is visible . */
3176 /* We'll collect it under if one of the following three
3178 (1) has location and type -> completed
3179 (2) has type only -> is an abstract instance
3180 (3) has location and abs_ori -> is a concrete instance
3181 Name, fndn_ix and line number are all optional frills.
3183 if ( /* 1 */ (gexpr
&& typeR
!= D3_INVALID_CUOFF
)
3184 /* 2 */ || (typeR
!= D3_INVALID_CUOFF
)
3185 /* 3 */ || (gexpr
&& abs_ori
!= (UWord
)D3_INVALID_CUOFF
) ) {
3187 /* Add this variable to the list of interesting looking
3188 variables. Crucially, note along with it the address
3189 range(s) associated with the variable, which for locals
3190 will be the address ranges at the top of the varparser's
3194 const XArray
* /* of AddrRange */ xa
;
3196 /* Stack can't be empty; we put a dummy entry on it for the
3197 entire address range before starting with the DIEs for
3199 vg_assert(parser
->sp
>= 0);
3201 /* If this is a local variable (non-global), try to find
3202 the GExpr for the DW_AT_frame_base of the containing
3203 function. It should have been pushed on the stack at the
3204 time we encountered its DW_TAG_subprogram DIE, so the way
3205 to find it is to scan back down the stack looking for it.
3206 If there isn't an enclosing stack entry marked 'isFunc'
3207 then we must be seeing variable or formal param DIEs
3208 outside of a function, so we deem the Dwarf to be
3209 malformed if that happens. Note that the fbGX may be NULL
3210 if the containing DT_TAG_subprogram didn't supply a
3211 DW_AT_frame_base -- that's OK, but there must actually be
3212 a containing DW_TAG_subprogram. */
3215 for (i
= parser
->sp
; i
>= 0; i
--) {
3216 if (parser
->isFunc
[i
]) {
3217 fbGX
= parser
->fbGX
[i
];
3223 if (0 && VG_(clo_verbosity
) >= 0) {
3224 VG_(message
)(Vg_DebugMsg
,
3225 "warning: parse_var_DIE: non-global variable "
3226 "outside DW_TAG_subprogram\n");
3229 /* This seems to happen a lot. Just ignore it -- if,
3230 when we come to evaluation of the location (guarded)
3231 expression, it requires a frame base value, and
3232 there's no expression for that, then evaluation as a
3233 whole will fail. Harmless - a bit of a waste of
3234 cycles but nothing more. */
3238 /* re "global ? 0 : parser->sp" (twice), if the var is
3239 marked 'global' then we must put it at the global scope,
3240 as only the global scope (level 0) covers the entire PC
3241 address space. It is asserted elsewhere that level 0
3242 always covers the entire address space. */
3243 xa
= parser
->ranges
[global
? 0 : parser
->sp
];
3244 nRanges
= VG_(sizeXA
)(xa
);
3245 vg_assert(nRanges
>= 0);
3247 tv
= ML_(dinfo_zalloc
)( "di.readdwarf3.pvD.1", sizeof(TempVar
) );
3249 tv
->level
= global
? 0 : parser
->sp
;
3253 tv
->fndn_ix
= fndn_ix
;
3256 tv
->absOri
= abs_ori
;
3258 /* See explanation on definition of type TempVar for the
3259 reason for this elaboration. */
3260 tv
->nRanges
= nRanges
;
3265 AddrRange
* range
= VG_(indexXA
)(xa
, 0);
3266 tv
->rngOneMin
= range
->aMin
;
3267 tv
->rngOneMax
= range
->aMax
;
3269 else if (nRanges
> 1) {
3270 /* See if we already have a range list which is
3271 structurally identical. If so, use that; if not, clone
3272 this one, and add it to our collection. */
3274 if (VG_(lookupFM
)( rangestree
, &keyW
, &valW
, (UWord
)xa
)) {
3275 XArray
* old
= (XArray
*)keyW
;
3276 vg_assert(valW
== 0);
3277 vg_assert(old
!= xa
);
3280 XArray
* cloned
= VG_(cloneXA
)( "di.readdwarf3.pvD.2", xa
);
3281 tv
->rngMany
= cloned
;
3282 VG_(addToFM
)( rangestree
, (UWord
)cloned
, 0 );
3286 VG_(addToXA
)( tempvars
, &tv
);
3288 TRACE_D3(" Recording this variable, with %ld PC range(s)\n",
3290 /* collect stats on how effective the ->ranges special
3293 static Int ntot
=0, ngt
=0;
3295 if (tv
->rngMany
) ngt
++;
3296 if (0 == (ntot
% 100000))
3297 VG_(printf
)("XXXX %d tot, %d cloned\n", ntot
, ngt
);
3302 /* Here are some other weird cases seen in the wild:
3304 We have a variable with a name and a type, but no
3305 location. I guess that's a sign that it has been
3306 optimised away. Ignore it. Here's an example:
3308 static Int lc_compar(void* n1, void* n2) {
3309 MC_Chunk* mc1 = *(MC_Chunk**)n1;
3310 MC_Chunk* mc2 = *(MC_Chunk**)n2;
3311 return (mc1->data < mc2->data ? -1 : 1);
3314 Both mc1 and mc2 are like this
3315 <2><5bc>: Abbrev Number: 21 (DW_TAG_variable)
3318 DW_AT_decl_line : 216
3321 whereas n1 and n2 do have locations specified.
3323 ---------------------------------------------
3325 We see a DW_TAG_formal_parameter with a type, but
3326 no name and no location. It's probably part of a function type
3327 construction, thusly, hence ignore it:
3328 <1><2b4>: Abbrev Number: 12 (DW_TAG_subroutine_type)
3329 DW_AT_sibling : <2c9>
3330 DW_AT_prototyped : 1
3332 <2><2be>: Abbrev Number: 13 (DW_TAG_formal_parameter)
3334 <2><2c3>: Abbrev Number: 13 (DW_TAG_formal_parameter)
3337 ---------------------------------------------
3339 Is very minimal, like this:
3340 <4><81d>: Abbrev Number: 44 (DW_TAG_variable)
3341 DW_AT_abstract_origin: <7ba>
3342 What that signifies I have no idea. Ignore.
3344 ----------------------------------------------
3346 Is very minimal, like this:
3347 <200f>: DW_TAG_formal_parameter
3348 DW_AT_abstract_ori: <1f4c>
3349 DW_AT_location : 13440
3350 What that signifies I have no idea. Ignore.
3351 It might be significant, though: the variable at least
3352 has a location and so might exist somewhere.
3353 Maybe we should handle this.
3355 ---------------------------------------------
3357 <22407>: DW_TAG_variable
3358 DW_AT_name : (indirect string, offset: 0x6579):
3359 vgPlain_trampoline_stuff_start
3360 DW_AT_decl_file : 29
3361 DW_AT_decl_line : 56
3363 DW_AT_declaration : 1
3365 Nameless and typeless variable that has a location? Who
3367 <2><3d178>: Abbrev Number: 22 (DW_TAG_variable)
3368 DW_AT_location : 9 byte block: 3 c0 c7 13 38 0 0 0 0
3369 (DW_OP_addr: 3813c7c0)
3371 No, really. Check it out. gcc is quite simply borked.
3372 <3><168cc>: Abbrev Number: 141 (DW_TAG_variable)
3373 // followed by no attributes, and the next DIE is a sibling,
3380 dump_bad_die_and_barf("parse_var_DIE", dtag
, posn
, level
,
3381 c_die
, saved_die_c_offset
,
3389 UWord sibling
; // sibling of the last read DIE (if it has a sibling).
3393 /* Return the function name corresponding to absori.
3395 absori is a 'cooked' reference to a DIE, i.e. absori can be either
3396 in cc->escn_debug_info or in cc->escn_debug_info_alt.
3397 get_inlFnName will uncook absori.
3399 The returned value is a (permanent) string in DebugInfo's .strchunks.
3401 LIMITATION: absori must point in the CU of cc. If absori points
3402 in another CU, returns "UnknownInlinedFun".
3404 Here are the problems to retrieve the fun name if absori is in
3405 another CU: the DIE reading code cannot properly extract data from
3406 another CU, as the abbv code retrieved in the other CU cannot be
3407 translated in an abbreviation. Reading data from the alternate debug
3408 info also gives problems as the string reference is also in the alternate
3409 file, but when reading the alt DIE, the string form is a 'local' string,
3410 but cannot be read in the current CU, but must be read in the alt CU.
3411 See bug 338803 comment#3 and attachment for a failed attempt to handle
3412 these problems (failed because with the patch, only one alt abbrev hash
3413 table is kept, while we must handle all abbreviations in all CUs
3414 referenced by an absori (being a reference to an alt CU, or a previous
3415 or following CU). */
3416 static const HChar
* get_inlFnName (Int absori
, CUConst
* cc
, Bool td3
)
3420 ULong atag
, abbv_code
;
3423 Bool type_flag
, alt_flag
;
3424 const HChar
*ret
= NULL
;
3428 /* Some inlined subroutine call dwarf entries do not have the abstract
3429 origin attribute, resulting in absori being 0 (see callers of
3430 get_inlFnName). This is observed at least with gcc 6.3.0 when compiling
3431 valgrind with lto. So, in case we have a 0 absori, do not report an
3432 error, instead, rather return an unknown inlined function. */
3434 static Bool absori0_reported
= False
;
3435 if (!absori0_reported
&& VG_(clo_verbosity
) > 1) {
3436 VG_(message
)(Vg_DebugMsg
,
3437 "Warning: inlined fn name without absori\n"
3438 "is shown as UnknownInlinedFun\n");
3439 absori0_reported
= True
;
3441 TRACE_D3(" <get_inlFnName>: absori is not set");
3442 return ML_(addStr
)(cc
->di
, "UnknownInlinedFun", -1);
3445 posn
= uncook_die( cc
, absori
, &type_flag
, &alt_flag
);
3447 cc
->barf("get_inlFnName: uncooked absori in type debug info");
3449 /* LIMITATION: check we are in the same CU.
3450 If not, return unknown inlined function name. */
3451 /* if crossing between alt debug info<>normal info
3452 or posn not in the cu range,
3453 then it is in another CU. */
3454 if (alt_flag
!= cc
->is_alt_info
3455 || posn
< cc
->cu_start_offset
3456 || posn
>= cc
->cu_start_offset
+ cc
->unit_length
) {
3457 static Bool reported
= False
;
3458 if (!reported
&& VG_(clo_verbosity
) > 1) {
3459 VG_(message
)(Vg_DebugMsg
,
3460 "Warning: cross-CU LIMITATION: some inlined fn names\n"
3461 "might be shown as UnknownInlinedFun\n");
3464 TRACE_D3(" <get_inlFnName><%lx>: cross-CU LIMITATION", posn
);
3465 return ML_(addStr
)(cc
->di
, "UnknownInlinedFun", -1);
3468 init_Cursor (&c
, cc
->escn_debug_info
, posn
, cc
->barf
,
3469 "Overrun get_inlFnName absori");
3471 abbv_code
= get_ULEB128( &c
);
3472 abbv
= get_abbv ( cc
, abbv_code
, td3
);
3474 TRACE_D3(" <get_inlFnName><%lx>: Abbrev Number: %llu (%s)\n",
3475 posn
, abbv_code
, ML_(pp_DW_TAG
)( atag
) );
3478 cc
->barf("get_inlFnName: invalid zero tag on DIE");
3480 has_children
= abbv
->has_children
;
3481 if (has_children
!= DW_children_no
&& has_children
!= DW_children_yes
)
3482 cc
->barf("get_inlFnName: invalid has_children value");
3484 if (atag
!= DW_TAG_subprogram
)
3485 cc
->barf("get_inlFnName: absori not a subprogram");
3489 DW_AT attr
= (DW_AT
) abbv
->nf
[nf_i
].at_name
;
3490 DW_FORM form
= (DW_FORM
)abbv
->nf
[nf_i
].at_form
;
3491 const name_form
*nf
= &abbv
->nf
[nf_i
];
3493 if (attr
== 0 && form
== 0) break;
3494 get_Form_contents( &cts
, cc
, &c
, False
/*td3*/, nf
);
3495 if (attr
== DW_AT_name
) {
3498 cc
->barf("get_inlFnName: expecting indirect string");
3499 fnname
= ML_(cur_read_strdup
)( cts
.u
.cur
,
3500 "get_inlFnName.1" );
3501 ret
= ML_(addStr
)(cc
->di
, fnname
, -1);
3502 ML_(dinfo_free
) (fnname
);
3503 break; /* Name found, get out of the loop, as this has priority over
3504 DW_AT_specification. */
3506 if (attr
== DW_AT_specification
) {
3510 cc
->barf("get_inlFnName: AT specification missing");
3512 /* The recursive call to get_inlFnName will uncook its arg.
3513 So, we need to cook it here, so as to reference the
3514 correct section (e.g. the alt info). */
3515 cdie
= cook_die_using_form(cc
, (UWord
)cts
.u
.val
, form
);
3517 /* hoping that there is no loop */
3518 ret
= get_inlFnName (cdie
, cc
, td3
);
3519 /* Unclear if having both DW_AT_specification and DW_AT_name is
3520 possible but in any case, we do not break here.
3521 If we find later on a DW_AT_name, it will override the name found
3522 in the DW_AT_specification.*/
3529 TRACE_D3("AbsOriFnNameNotFound");
3530 return ML_(addStr
)(cc
->di
, "AbsOriFnNameNotFound", -1);
3534 /* Returns True if the (possibly) childrens of the current DIE are interesting
3535 to parse. Returns False otherwise.
3536 If the current DIE has a sibling, the non interesting children can
3537 maybe be skipped (if the DIE has a DW_AT_sibling). */
3538 __attribute__((noinline
))
3539 static Bool
parse_inl_DIE (
3540 /*MOD*/D3InlParser
* parser
,
3541 XArray
** fndn_ix_Table
,
3542 ULong
*debug_line_offset
,
3555 UWord saved_die_c_offset
= get_position_of_Cursor( c_die
);
3557 /* Get info about DW_TAG_compile_unit and DW_TAG_partial_unit which in theory
3558 could also contain inlined fn calls, if they cover an address range. */
3559 Bool unit_has_addrs
= False
;
3560 if (dtag
== DW_TAG_compile_unit
|| dtag
== DW_TAG_partial_unit
3561 || dtag
== DW_TAG_skeleton_unit
) {
3562 Bool have_lo
= False
;
3564 const HChar
*compdir
= NULL
;
3565 Bool has_stmt_list
= False
;
3566 ULong cu_line_offset
= 0;
3569 setup_cu_bases(cc
, c_die
, abbv
);
3572 DW_AT attr
= (DW_AT
) abbv
->nf
[nf_i
].at_name
;
3573 DW_FORM form
= (DW_FORM
)abbv
->nf
[nf_i
].at_form
;
3574 const name_form
*nf
= &abbv
->nf
[nf_i
];
3576 if (attr
== 0 && form
== 0) break;
3577 get_Form_contents( &cts
, cc
, c_die
, False
/*td3*/, nf
);
3578 if (attr
== DW_AT_low_pc
&& cts
.szB
> 0) {
3581 unit_has_addrs
= True
;
3583 if (attr
== DW_AT_ranges
&& cts
.szB
> 0)
3584 unit_has_addrs
= True
;
3585 if (attr
== DW_AT_comp_dir
) {
3587 cc
->barf("parse_inl_DIE compdir: expecting indirect string");
3588 HChar
*str
= ML_(cur_read_strdup
)( cts
.u
.cur
,
3589 "parse_inl_DIE.compdir" );
3590 compdir
= ML_(addStr
)(cc
->di
, str
, -1);
3591 ML_(dinfo_free
) (str
);
3593 if (attr
== DW_AT_stmt_list
&& cts
.szB
> 0) {
3594 has_stmt_list
= True
;
3595 cu_line_offset
= cts
.u
.val
;
3597 if (attr
== DW_AT_sibling
&& cts
.szB
> 0) {
3598 parser
->sibling
= cts
.u
.val
;
3602 setup_cu_svma (cc
, have_lo
, ip_lo
, td3
);
3603 if (has_stmt_list
&& unit_has_addrs
3604 && *debug_line_offset
!= cu_line_offset
) {
3605 reset_fndn_ix_table ( fndn_ix_Table
, debug_line_offset
,
3607 read_filename_table( *fndn_ix_Table
, compdir
,
3608 cc
, cu_line_offset
, td3
);
3613 if (dtag
== DW_TAG_inlined_subroutine
) {
3614 Bool have_lo
= False
;
3615 Bool have_hi1
= False
;
3616 Bool have_range
= False
;
3617 Bool hiIsRelative
= False
;
3621 UInt caller_fndn_ix
= 0;
3622 Int caller_lineno
= 0;
3623 Int inlinedfn_abstract_origin
= 0;
3624 // 0 will be interpreted as no abstract origin by get_inlFnName
3628 DW_AT attr
= (DW_AT
) abbv
->nf
[nf_i
].at_name
;
3629 DW_FORM form
= (DW_FORM
)abbv
->nf
[nf_i
].at_form
;
3630 const name_form
*nf
= &abbv
->nf
[nf_i
];
3632 if (attr
== 0 && form
== 0) break;
3633 get_Form_contents( &cts
, cc
, c_die
, False
/*td3*/, nf
);
3634 if (attr
== DW_AT_call_file
&& cts
.szB
> 0) {
3635 Int ftabIx
= (Int
)cts
.u
.val
;
3637 && ftabIx
< VG_(sizeXA
)( *fndn_ix_Table
)) {
3638 caller_fndn_ix
= *(UInt
*)
3639 VG_(indexXA
)( *fndn_ix_Table
, ftabIx
);
3641 if (0) VG_(printf
)("XXX caller_fndn_ix = %u %s\n", caller_fndn_ix
,
3642 ML_(fndn_ix2filename
) (cc
->di
, caller_fndn_ix
));
3644 if (attr
== DW_AT_call_line
&& cts
.szB
> 0) {
3645 caller_lineno
= cts
.u
.val
;
3648 if (attr
== DW_AT_abstract_origin
&& cts
.szB
> 0) {
3649 inlinedfn_abstract_origin
3650 = cook_die_using_form (cc
, (UWord
)cts
.u
.val
, form
);
3653 if (attr
== DW_AT_low_pc
&& cts
.szB
> 0) {
3657 if (attr
== DW_AT_high_pc
&& cts
.szB
> 0) {
3660 if (form
!= DW_FORM_addr
)
3661 hiIsRelative
= True
;
3663 if (attr
== DW_AT_ranges
&& cts
.szB
> 0) {
3664 rangeoff
= cts
.u
.val
;
3667 if (attr
== DW_AT_sibling
&& cts
.szB
> 0) {
3668 parser
->sibling
= cts
.u
.val
;
3671 if (have_lo
&& have_hi1
&& hiIsRelative
)
3673 /* Do we have something that looks sane? */
3674 if (dtag
== DW_TAG_inlined_subroutine
3675 && (!have_lo
) && (!have_hi1
) && (!have_range
)) {
3676 /* Seems strange. How can an inlined subroutine have
3680 if (have_lo
&& have_hi1
&& (!have_range
)) {
3681 /* This inlined call is just a single address range. */
3682 if (ip_lo
< ip_hi1
) {
3683 /* Apply text debug biasing */
3684 ip_lo
+= cc
->di
->text_debug_bias
;
3685 ip_hi1
+= cc
->di
->text_debug_bias
;
3686 ML_(addInlInfo
) (cc
->di
,
3688 get_inlFnName (inlinedfn_abstract_origin
, cc
, td3
),
3690 caller_lineno
, level
);
3692 } else if (have_range
) {
3693 /* This inlined call is several address ranges. */
3696 const HChar
*inlfnname
=
3697 get_inlFnName (inlinedfn_abstract_origin
, cc
, td3
);
3699 /* Ranges are biased for the inline info using the same logic
3700 as what is used for biasing ranges for the var info, for which
3701 ranges are read using cc->cu_svma (see parse_var_DIE).
3702 Then text_debug_bias is added when a (non global) var
3703 is recorded (see just before the call to ML_(addVar)) */
3704 ranges
= get_range_list( cc
, td3
,
3705 rangeoff
, cc
->cu_svma
);
3706 for (j
= 0; j
< VG_(sizeXA
)( ranges
); j
++) {
3707 AddrRange
* range
= (AddrRange
*) VG_(indexXA
)( ranges
, j
);
3708 ML_(addInlInfo
) (cc
->di
,
3709 range
->aMin
+ cc
->di
->text_debug_bias
,
3710 range
->aMax
+1 + cc
->di
->text_debug_bias
,
3711 // aMax+1 as range has its last bound included
3712 // while ML_(addInlInfo) expects last bound not
3716 caller_lineno
, level
);
3718 VG_(deleteXA
)( ranges
);
3723 // Only recursively parse the (possible) children for the DIE which
3724 // might maybe contain a DW_TAG_inlined_subroutine:
3725 Bool ret
= (unit_has_addrs
3726 || dtag
== DW_TAG_lexical_block
|| dtag
== DW_TAG_subprogram
3727 || dtag
== DW_TAG_inlined_subroutine
|| dtag
== DW_TAG_namespace
);
3731 dump_bad_die_and_barf("parse_inl_DIE", dtag
, posn
, level
,
3732 c_die
, saved_die_c_offset
,
3739 /*------------------------------------------------------------*/
3741 /*--- Parsing of type-related DIEs ---*/
3743 /*------------------------------------------------------------*/
3747 /* What source language? 'A'=Ada83/95,
3751 Established once per compilation unit. */
3753 /* A stack of types which are currently under construction */
3754 Int sp
; /* [sp] is innermost active entry; sp==-1 for empty
3757 /* Note that the TyEnts in qparentE are temporary copies of the
3758 ones accumulating in the main tyent array. So it is not safe
3759 to free up anything on them when popping them off the stack
3760 (iow, it isn't safe to use TyEnt__make_EMPTY on them). Just
3761 memset them to zero when done. */
3762 TyEnt
*qparentE
; /* parent TyEnts */
3767 /* Completely initialise a type parser object */
3769 type_parser_init ( D3TypeParser
*parser
)
3772 parser
->language
= '?';
3773 parser
->stack_size
= 0;
3774 parser
->qparentE
= NULL
;
3775 parser
->qlevel
= NULL
;
3778 /* Release any memory hanging off a type parser object */
3780 type_parser_release ( D3TypeParser
*parser
)
3782 ML_(dinfo_free
)( parser
->qparentE
);
3783 ML_(dinfo_free
)( parser
->qlevel
);
3786 static void typestack_show ( const D3TypeParser
* parser
, const HChar
* str
)
3789 VG_(printf
)(" typestack (%s) {\n", str
);
3790 for (i
= 0; i
<= parser
->sp
; i
++) {
3791 VG_(printf
)(" [%ld] (level %d): ", i
, parser
->qlevel
[i
]);
3792 ML_(pp_TyEnt
)( &parser
->qparentE
[i
] );
3795 VG_(printf
)(" }\n");
3798 /* Remove from the stack, all entries with .level > 'level' */
3800 void typestack_preen ( D3TypeParser
* parser
, Bool td3
, Int level
)
3802 Bool changed
= False
;
3803 vg_assert(parser
->sp
< parser
->stack_size
);
3805 vg_assert(parser
->sp
>= -1);
3806 if (parser
->sp
== -1) break;
3807 if (parser
->qlevel
[parser
->sp
] <= level
) break;
3809 TRACE_D3("BBBBAAAA typestack_pop [newsp=%d]\n", parser
->sp
-1);
3810 vg_assert(ML_(TyEnt__is_type
)(&parser
->qparentE
[parser
->sp
]));
3815 typestack_show( parser
, "after preen" );
3818 static Bool
typestack_is_empty ( const D3TypeParser
* parser
)
3820 vg_assert(parser
->sp
>= -1 && parser
->sp
< parser
->stack_size
);
3821 return parser
->sp
== -1;
3824 static void typestack_push ( const CUConst
* cc
,
3825 D3TypeParser
* parser
,
3827 const TyEnt
* parentE
, Int level
)
3830 TRACE_D3("BBBBAAAA typestack_push[newsp=%d]: %d %05lx\n",
3831 parser
->sp
+1, level
, parentE
->cuOff
);
3833 /* First we need to zap everything >= 'level', as we are about to
3834 replace any previous entry at 'level', so .. */
3835 typestack_preen(parser
, /*td3*/False
, level
-1);
3837 vg_assert(parser
->sp
>= -1);
3838 vg_assert(parser
->sp
< parser
->stack_size
);
3839 if (parser
->sp
== parser
->stack_size
- 1) {
3840 parser
->stack_size
+= 16;
3842 ML_(dinfo_realloc
)("di.readdwarf3.typush.1", parser
->qparentE
,
3843 parser
->stack_size
* sizeof parser
->qparentE
[0]);
3845 ML_(dinfo_realloc
)("di.readdwarf3.typush.2", parser
->qlevel
,
3846 parser
->stack_size
* sizeof parser
->qlevel
[0]);
3848 if (parser
->sp
>= 0)
3849 vg_assert(parser
->qlevel
[parser
->sp
] < level
);
3852 vg_assert(ML_(TyEnt__is_type
)(parentE
));
3853 vg_assert(parentE
->cuOff
!= D3_INVALID_CUOFF
);
3854 parser
->qparentE
[parser
->sp
] = *parentE
;
3855 parser
->qlevel
[parser
->sp
] = level
;
3857 typestack_show( parser
, "after push" );
3860 /* True if the subrange type being parsed gives the bounds of an array. */
3861 static Bool
subrange_type_denotes_array_bounds ( const D3TypeParser
* parser
,
3863 vg_assert(dtag
== DW_TAG_subrange_type
);
3864 /* If we don't know the language, assume false. */
3865 if (parser
->language
== '?')
3867 /* For most languages, a subrange_type dtag always gives the
3869 For Ada, there are additional conditions as a subrange_type
3870 is also used for other purposes. */
3871 if (parser
->language
!= 'A')
3872 /* not Ada, so it definitely denotes an array bound. */
3875 /* Extra constraints for Ada: it only denotes an array bound if .. */
3876 return (! typestack_is_empty(parser
)
3877 && parser
->qparentE
[parser
->sp
].tag
== Te_TyArray
);
3880 /* True if the form is one of the forms supported to give an array bound.
3881 For some arrays (scope local arrays with variable size),
3882 a DW_FORM_ref4 was used, and was wrongly used as the bound value.
3883 So, refuse the forms that are known to give a problem. */
3884 static Bool
form_expected_for_bound ( DW_FORM form
) {
3885 if (form
== DW_FORM_ref1
3886 || form
== DW_FORM_ref2
3887 || form
== DW_FORM_ref4
3888 || form
== DW_FORM_ref8
)
3894 /* Parse a type-related DIE. 'parser' holds the current parser state.
3895 'admin' is where the completed types are dumped. 'dtag' is the tag
3896 for this DIE. 'c_die' points to the start of the data fields (FORM
3897 stuff) for the DIE. abbv is the parsed abbreviation which describe
3900 We may find the DIE uninteresting, in which case we should ignore
3903 What happens: the DIE is examined. If uninteresting, it is ignored.
3904 Otherwise, the DIE gives rise to two things:
3906 (1) the offset of this DIE in the CU -- the cuOffset, a UWord
3907 (2) a TyAdmin structure, which holds the type, or related stuff
3909 (2) is added at the end of 'tyadmins', at some index, say 'i'.
3911 A pair (cuOffset, i) is added to 'tydict'.
3913 Hence 'tyadmins' holds the actual type entities, and 'tydict' holds
3914 a mapping from cuOffset to the index of the corresponding entry in
3917 When resolving a cuOffset to a TyAdmin, first look up the cuOffset
3918 in the tydict (by binary search). This gives an index into
3919 tyadmins, and the required entity lives in tyadmins at that index.
3921 __attribute__((noinline
))
3922 static void parse_type_DIE ( /*MOD*/XArray
* /* of TyEnt */ tyents
,
3923 /*MOD*/D3TypeParser
* parser
,
3939 UWord saved_die_c_offset
= get_position_of_Cursor( c_die
);
3941 VG_(memset
)( &typeE
, 0xAA, sizeof(typeE
) );
3942 VG_(memset
)( &atomE
, 0xAA, sizeof(atomE
) );
3943 VG_(memset
)( &fieldE
, 0xAA, sizeof(fieldE
) );
3944 VG_(memset
)( &boundE
, 0xAA, sizeof(boundE
) );
3946 /* If we've returned to a level at or above any previously noted
3947 parent, un-note it, so we don't believe we're still collecting
3949 typestack_preen( parser
, td3
, level
-1 );
3951 if (dtag
== DW_TAG_compile_unit
3952 || dtag
== DW_TAG_type_unit
3953 || dtag
== DW_TAG_partial_unit
3954 || dtag
== DW_TAG_skeleton_unit
) {
3956 setup_cu_bases(cc
, c_die
, abbv
);
3957 /* See if we can find DW_AT_language, since it is important for
3958 establishing array bounds (see DW_TAG_subrange_type below in
3962 DW_AT attr
= (DW_AT
) abbv
->nf
[nf_i
].at_name
;
3963 DW_FORM form
= (DW_FORM
)abbv
->nf
[nf_i
].at_form
;
3964 const name_form
*nf
= &abbv
->nf
[nf_i
];
3966 if (attr
== 0 && form
== 0) break;
3967 get_Form_contents( &cts
, cc
, c_die
, False
/*td3*/, nf
);
3968 if (attr
!= DW_AT_language
)
3972 switch (cts
.u
.val
) {
3973 case DW_LANG_C89
: case DW_LANG_C
:
3974 case DW_LANG_C_plus_plus
: case DW_LANG_ObjC
:
3975 case DW_LANG_ObjC_plus_plus
: case DW_LANG_UPC
:
3976 case DW_LANG_Upc
: case DW_LANG_C99
: case DW_LANG_C11
:
3977 case DW_LANG_C_plus_plus_11
: case DW_LANG_C_plus_plus_14
:
3978 parser
->language
= 'C'; break;
3979 case DW_LANG_Fortran77
: case DW_LANG_Fortran90
:
3980 case DW_LANG_Fortran95
: case DW_LANG_Fortran03
:
3981 case DW_LANG_Fortran08
:
3982 parser
->language
= 'F'; break;
3983 case DW_LANG_Ada83
: case DW_LANG_Ada95
:
3984 parser
->language
= 'A'; break;
3985 case DW_LANG_Cobol74
:
3986 case DW_LANG_Cobol85
: case DW_LANG_Pascal83
:
3987 case DW_LANG_Modula2
: case DW_LANG_Java
:
3989 case DW_LANG_D
: case DW_LANG_Python
: case DW_LANG_Go
:
3990 case DW_LANG_Mips_Assembler
:
3991 parser
->language
= '?'; break;
3998 if (dtag
== DW_TAG_base_type
) {
3999 /* We can pick up a new base type any time. */
4000 VG_(memset
)(&typeE
, 0, sizeof(typeE
));
4001 typeE
.cuOff
= D3_INVALID_CUOFF
;
4002 typeE
.tag
= Te_TyBase
;
4005 DW_AT attr
= (DW_AT
) abbv
->nf
[nf_i
].at_name
;
4006 DW_FORM form
= (DW_FORM
)abbv
->nf
[nf_i
].at_form
;
4007 const name_form
*nf
= &abbv
->nf
[nf_i
];
4009 if (attr
== 0 && form
== 0) break;
4010 get_Form_contents( &cts
, cc
, c_die
, False
/*td3*/, nf
);
4011 if (attr
== DW_AT_name
&& cts
.szB
< 0) {
4012 typeE
.Te
.TyBase
.name
4013 = ML_(cur_read_strdup
)( cts
.u
.cur
,
4014 "di.readdwarf3.ptD.base_type.1" );
4016 if (attr
== DW_AT_byte_size
&& cts
.szB
> 0) {
4017 typeE
.Te
.TyBase
.szB
= cts
.u
.val
;
4019 if (attr
== DW_AT_encoding
&& cts
.szB
> 0) {
4020 switch (cts
.u
.val
) {
4021 case DW_ATE_unsigned
: case DW_ATE_unsigned_char
:
4022 case DW_ATE_UTF
: /* since DWARF4, e.g. char16_t from C++ */
4023 case DW_ATE_boolean
:/* FIXME - is this correct? */
4024 case DW_ATE_unsigned_fixed
:
4025 typeE
.Te
.TyBase
.enc
= 'U'; break;
4026 case DW_ATE_signed
: case DW_ATE_signed_char
:
4027 case DW_ATE_signed_fixed
:
4028 typeE
.Te
.TyBase
.enc
= 'S'; break;
4030 typeE
.Te
.TyBase
.enc
= 'F'; break;
4031 case DW_ATE_complex_float
:
4032 typeE
.Te
.TyBase
.enc
= 'C'; break;
4039 /* Invent a name if it doesn't have one. gcc-4.3
4040 -ftree-vectorize is observed to emit nameless base types. */
4041 if (!typeE
.Te
.TyBase
.name
)
4042 typeE
.Te
.TyBase
.name
4043 = ML_(dinfo_strdup
)( "di.readdwarf3.ptD.base_type.2",
4044 "<anon_base_type>" );
4046 /* Do we have something that looks sane? */
4047 if (/* must have a name */
4048 typeE
.Te
.TyBase
.name
== NULL
4049 /* and a plausible size. Yes, really 32: "complex long
4050 double" apparently has size=32 */
4051 || typeE
.Te
.TyBase
.szB
< 0 || typeE
.Te
.TyBase
.szB
> 32
4052 /* and a plausible encoding */
4053 || (typeE
.Te
.TyBase
.enc
!= 'U'
4054 && typeE
.Te
.TyBase
.enc
!= 'S'
4055 && typeE
.Te
.TyBase
.enc
!= 'F'
4056 && typeE
.Te
.TyBase
.enc
!= 'C'))
4058 /* Last minute hack: if we see this
4059 <1><515>: DW_TAG_base_type
4063 convert it into a real Void type. */
4064 if (typeE
.Te
.TyBase
.szB
== 0
4065 && 0 == VG_(strcmp
)("void", typeE
.Te
.TyBase
.name
)) {
4066 ML_(TyEnt__make_EMPTY
)(&typeE
);
4067 typeE
.tag
= Te_TyVoid
;
4068 typeE
.Te
.TyVoid
.isFake
= False
; /* it's a real one! */
4075 * An example of DW_TAG_rvalue_reference_type:
4077 * $ readelf --debug-dump /usr/lib/debug/usr/lib/libstdc++.so.6.0.16.debug
4078 * <1><1014>: Abbrev Number: 55 (DW_TAG_rvalue_reference_type)
4079 * <1015> DW_AT_byte_size : 4
4080 * <1016> DW_AT_type : <0xe52>
4082 if (dtag
== DW_TAG_pointer_type
|| dtag
== DW_TAG_reference_type
4083 || dtag
== DW_TAG_ptr_to_member_type
4084 || dtag
== DW_TAG_rvalue_reference_type
) {
4085 /* This seems legit for _pointer_type and _reference_type. I
4086 don't know if rolling _ptr_to_member_type in here really is
4087 legit, but it's better than not handling it at all. */
4088 VG_(memset
)(&typeE
, 0, sizeof(typeE
));
4089 typeE
.cuOff
= D3_INVALID_CUOFF
;
4091 case DW_TAG_pointer_type
:
4092 typeE
.tag
= Te_TyPtr
;
4094 case DW_TAG_reference_type
:
4095 typeE
.tag
= Te_TyRef
;
4097 case DW_TAG_ptr_to_member_type
:
4098 typeE
.tag
= Te_TyPtrMbr
;
4100 case DW_TAG_rvalue_reference_type
:
4101 typeE
.tag
= Te_TyRvalRef
;
4106 /* target type defaults to void */
4107 typeE
.Te
.TyPorR
.typeR
= D3_FAKEVOID_CUOFF
;
4108 /* These four type kinds don't *have* to specify their size, in
4109 which case we assume it's a machine word. But if they do
4110 specify it, it must be a machine word :-) This probably
4111 assumes that the word size of the Dwarf3 we're reading is the
4112 same size as that on the machine. gcc appears to give a size
4113 whereas icc9 doesn't. */
4114 typeE
.Te
.TyPorR
.szB
= sizeof(UWord
);
4117 DW_AT attr
= (DW_AT
) abbv
->nf
[nf_i
].at_name
;
4118 DW_FORM form
= (DW_FORM
)abbv
->nf
[nf_i
].at_form
;
4119 const name_form
*nf
= &abbv
->nf
[nf_i
];
4121 if (attr
== 0 && form
== 0) break;
4122 get_Form_contents( &cts
, cc
, c_die
, False
/*td3*/, nf
);
4123 if (attr
== DW_AT_byte_size
&& cts
.szB
> 0) {
4124 typeE
.Te
.TyPorR
.szB
= cts
.u
.val
;
4126 if (attr
== DW_AT_type
&& cts
.szB
> 0) {
4127 typeE
.Te
.TyPorR
.typeR
4128 = cook_die_using_form( cc
, (UWord
)cts
.u
.val
, form
);
4131 /* Do we have something that looks sane? */
4132 if (typeE
.Te
.TyPorR
.szB
!= sizeof(UWord
))
4138 if (dtag
== DW_TAG_enumeration_type
) {
4139 /* Create a new Type to hold the results. */
4140 VG_(memset
)(&typeE
, 0, sizeof(typeE
));
4142 typeE
.tag
= Te_TyEnum
;
4143 Bool is_decl
= False
;
4144 typeE
.Te
.TyEnum
.atomRs
4145 = VG_(newXA
)( ML_(dinfo_zalloc
), "di.readdwarf3.ptD.enum_type.1",
4150 DW_AT attr
= (DW_AT
) abbv
->nf
[nf_i
].at_name
;
4151 DW_FORM form
= (DW_FORM
)abbv
->nf
[nf_i
].at_form
;
4152 const name_form
*nf
= &abbv
->nf
[nf_i
];
4154 if (attr
== 0 && form
== 0) break;
4155 get_Form_contents( &cts
, cc
, c_die
, False
/*td3*/, nf
);
4156 if (attr
== DW_AT_name
&& cts
.szB
< 0) {
4157 typeE
.Te
.TyEnum
.name
4158 = ML_(cur_read_strdup
)( cts
.u
.cur
,
4159 "di.readdwarf3.pTD.enum_type.2" );
4161 if (attr
== DW_AT_byte_size
&& cts
.szB
> 0) {
4162 typeE
.Te
.TyEnum
.szB
= cts
.u
.val
;
4164 if (attr
== DW_AT_declaration
) {
4169 if (!typeE
.Te
.TyEnum
.name
)
4170 typeE
.Te
.TyEnum
.name
4171 = ML_(dinfo_strdup
)( "di.readdwarf3.pTD.enum_type.3",
4172 "<anon_enum_type>" );
4174 /* Do we have something that looks sane? */
4175 if (typeE
.Te
.TyEnum
.szB
== 0
4176 /* we must know the size */
4177 /* but not for Ada, which uses such dummy
4178 enumerations as helper for gdb ada mode.
4179 Also GCC allows incomplete enums as GNU extension.
4180 http://gcc.gnu.org/onlinedocs/gcc/Incomplete-Enums.html
4181 These are marked as DW_AT_declaration and won't have
4182 a size. They can only be used in declaration or as
4183 pointer types. You can't allocate variables or storage
4184 using such an enum type. (Also GCC seems to have a bug
4185 that will put such an enumeration_type into a .debug_types
4186 unit which should only contain complete types.) */
4187 && (parser
->language
!= 'A' && !is_decl
)) {
4192 typestack_push( cc
, parser
, td3
, &typeE
, level
);
4196 /* gcc (GCC) 4.4.0 20081017 (experimental) occasionally produces
4197 DW_TAG_enumerator with only a DW_AT_name but no
4198 DW_AT_const_value. This is in violation of the Dwarf3 standard,
4199 and appears to be a new "feature" of gcc - versions 4.3.x and
4200 earlier do not appear to do this. So accept DW_TAG_enumerator
4201 which only have a name but no value. An example:
4203 <1><180>: Abbrev Number: 6 (DW_TAG_enumeration_type)
4204 <181> DW_AT_name : (indirect string, offset: 0xda70):
4206 <185> DW_AT_byte_size : 4
4207 <186> DW_AT_decl_file : 14
4208 <187> DW_AT_decl_line : 1480
4209 <189> DW_AT_sibling : <0x1a7>
4210 <2><18d>: Abbrev Number: 7 (DW_TAG_enumerator)
4211 <18e> DW_AT_name : (indirect string, offset: 0x9e18):
4213 <2><192>: Abbrev Number: 7 (DW_TAG_enumerator)
4214 <193> DW_AT_name : (indirect string, offset: 0x1505f):
4216 <2><197>: Abbrev Number: 7 (DW_TAG_enumerator)
4217 <198> DW_AT_name : (indirect string, offset: 0x16f4a):
4219 <2><19c>: Abbrev Number: 7 (DW_TAG_enumerator)
4220 <19d> DW_AT_name : (indirect string, offset: 0x156dd):
4222 <2><1a1>: Abbrev Number: 7 (DW_TAG_enumerator)
4223 <1a2> DW_AT_name : (indirect string, offset: 0x13660):
4226 if (dtag
== DW_TAG_enumerator
) {
4227 VG_(memset
)( &atomE
, 0, sizeof(atomE
) );
4229 atomE
.tag
= Te_Atom
;
4232 DW_AT attr
= (DW_AT
) abbv
->nf
[nf_i
].at_name
;
4233 DW_FORM form
= (DW_FORM
)abbv
->nf
[nf_i
].at_form
;
4234 const name_form
*nf
= &abbv
->nf
[nf_i
];
4236 if (attr
== 0 && form
== 0) break;
4237 get_Form_contents( &cts
, cc
, c_die
, False
/*td3*/, nf
);
4238 if (attr
== DW_AT_name
&& cts
.szB
< 0) {
4240 = ML_(cur_read_strdup
)( cts
.u
.cur
,
4241 "di.readdwarf3.pTD.enumerator.1" );
4243 if (attr
== DW_AT_const_value
&& cts
.szB
> 0) {
4244 atomE
.Te
.Atom
.value
= cts
.u
.val
;
4245 atomE
.Te
.Atom
.valueKnown
= True
;
4248 /* Do we have something that looks sane? */
4249 if (atomE
.Te
.Atom
.name
== NULL
)
4251 /* Do we have a plausible parent? */
4252 if (typestack_is_empty(parser
)) goto_bad_DIE
;
4253 vg_assert(ML_(TyEnt__is_type
)(&parser
->qparentE
[parser
->sp
]));
4254 vg_assert(parser
->qparentE
[parser
->sp
].cuOff
!= D3_INVALID_CUOFF
);
4255 if (level
!= parser
->qlevel
[parser
->sp
]+1) goto_bad_DIE
;
4256 if (parser
->qparentE
[parser
->sp
].tag
!= Te_TyEnum
) goto_bad_DIE
;
4257 /* Record this child in the parent */
4258 vg_assert(parser
->qparentE
[parser
->sp
].Te
.TyEnum
.atomRs
);
4259 VG_(addToXA
)( parser
->qparentE
[parser
->sp
].Te
.TyEnum
.atomRs
,
4261 /* And record the child itself */
4265 /* Treat DW_TAG_class_type as if it was a DW_TAG_structure_type. I
4266 don't know if this is correct, but it at least makes this reader
4267 usable for gcc-4.3 produced Dwarf3. */
4268 if (dtag
== DW_TAG_structure_type
|| dtag
== DW_TAG_class_type
4269 || dtag
== DW_TAG_union_type
) {
4270 Bool have_szB
= False
;
4271 Bool is_decl
= False
;
4272 Bool is_spec
= False
;
4273 /* Create a new Type to hold the results. */
4274 VG_(memset
)(&typeE
, 0, sizeof(typeE
));
4276 typeE
.tag
= Te_TyStOrUn
;
4277 typeE
.Te
.TyStOrUn
.name
= NULL
;
4278 typeE
.Te
.TyStOrUn
.typeR
= D3_INVALID_CUOFF
;
4279 typeE
.Te
.TyStOrUn
.fieldRs
4280 = VG_(newXA
)( ML_(dinfo_zalloc
), "di.readdwarf3.pTD.struct_type.1",
4283 typeE
.Te
.TyStOrUn
.complete
= True
;
4284 typeE
.Te
.TyStOrUn
.isStruct
= dtag
== DW_TAG_structure_type
4285 || dtag
== DW_TAG_class_type
;
4288 DW_AT attr
= (DW_AT
) abbv
->nf
[nf_i
].at_name
;
4289 DW_FORM form
= (DW_FORM
)abbv
->nf
[nf_i
].at_form
;
4290 const name_form
*nf
= &abbv
->nf
[nf_i
];
4292 if (attr
== 0 && form
== 0) break;
4293 get_Form_contents( &cts
, cc
, c_die
, False
/*td3*/, nf
);
4294 if (attr
== DW_AT_name
&& cts
.szB
< 0) {
4295 typeE
.Te
.TyStOrUn
.name
4296 = ML_(cur_read_strdup
)( cts
.u
.cur
,
4297 "di.readdwarf3.ptD.struct_type.2" );
4299 if (attr
== DW_AT_byte_size
&& cts
.szB
>= 0) {
4300 typeE
.Te
.TyStOrUn
.szB
= cts
.u
.val
;
4303 if (attr
== DW_AT_declaration
&& cts
.szB
> 0 && cts
.u
.val
> 0) {
4306 if (attr
== DW_AT_specification
&& cts
.szB
> 0 && cts
.u
.val
> 0) {
4309 if (attr
== DW_AT_signature
&& form
== DW_FORM_ref_sig8
4312 typeE
.Te
.TyStOrUn
.szB
= 8;
4313 typeE
.Te
.TyStOrUn
.typeR
4314 = cook_die_using_form( cc
, (UWord
)cts
.u
.val
, form
);
4317 /* Do we have something that looks sane? */
4318 if (is_decl
&& (!is_spec
)) {
4319 /* It's a DW_AT_declaration. We require the name but
4321 /* JRS 2012-06-28: following discussion w/ tromey, if the
4322 type doesn't have name, just make one up, and accept it.
4323 It might be referred to by other DIEs, so ignoring it
4324 doesn't seem like a safe option. */
4325 if (typeE
.Te
.TyStOrUn
.name
== NULL
)
4326 typeE
.Te
.TyStOrUn
.name
4327 = ML_(dinfo_strdup
)( "di.readdwarf3.ptD.struct_type.3",
4328 "<anon_struct_type>" );
4329 typeE
.Te
.TyStOrUn
.complete
= False
;
4330 /* JRS 2009 Aug 10: <possible kludge>? */
4331 /* Push this tyent on the stack, even though it's incomplete.
4332 It appears that gcc-4.4 on Fedora 11 will sometimes create
4333 DW_TAG_member entries for it, and so we need to have a
4334 plausible parent present in order for that to work. See
4335 #200029 comments 8 and 9. */
4336 typestack_push( cc
, parser
, td3
, &typeE
, level
);
4337 /* </possible kludge> */
4340 if ((!is_decl
) /* && (!is_spec) */) {
4341 /* this is the common, ordinary case */
4342 /* The name can be present, or not */
4344 /* We must know the size.
4345 But in Ada, record with discriminants might have no size.
4346 But in C, VLA in the middle of a struct (gcc extension)
4348 Instead, some GNAT dwarf extensions and/or dwarf entries
4349 allow to calculate the struct size at runtime.
4350 We cannot do that (yet?) so, the temporary kludge is to use
4352 typeE
.Te
.TyStOrUn
.szB
= 1;
4355 typestack_push( cc
, parser
, td3
, &typeE
, level
);
4359 /* don't know how to handle any other variants just now */
4364 if (dtag
== DW_TAG_member
) {
4365 /* Acquire member entries for both DW_TAG_structure_type and
4366 DW_TAG_union_type. They differ minorly, in that struct
4367 members must have a DW_AT_data_member_location expression
4368 whereas union members must not. */
4369 Bool parent_is_struct
;
4370 Bool is_artificial
= False
;
4371 VG_(memset
)( &fieldE
, 0, sizeof(fieldE
) );
4372 fieldE
.cuOff
= posn
;
4373 fieldE
.tag
= Te_Field
;
4374 fieldE
.Te
.Field
.typeR
= D3_INVALID_CUOFF
;
4377 DW_AT attr
= (DW_AT
) abbv
->nf
[nf_i
].at_name
;
4378 DW_FORM form
= (DW_FORM
)abbv
->nf
[nf_i
].at_form
;
4379 const name_form
*nf
= &abbv
->nf
[nf_i
];
4381 if (attr
== 0 && form
== 0) break;
4382 get_Form_contents( &cts
, cc
, c_die
, False
/*td3*/, nf
);
4383 if (attr
== DW_AT_name
&& cts
.szB
< 0) {
4384 fieldE
.Te
.Field
.name
4385 = ML_(cur_read_strdup
)( cts
.u
.cur
,
4386 "di.readdwarf3.ptD.member.1" );
4388 if (attr
== DW_AT_type
&& cts
.szB
> 0) {
4389 fieldE
.Te
.Field
.typeR
4390 = cook_die_using_form( cc
, (UWord
)cts
.u
.val
, form
);
4392 /* There are 2 different cases for DW_AT_data_member_location.
4393 If it is a constant class attribute, it contains byte offset
4394 from the beginning of the containing entity.
4395 Otherwise it is a location expression. */
4396 if (attr
== DW_AT_data_member_location
&& cts
.szB
> 0) {
4397 fieldE
.Te
.Field
.nLoc
= -1;
4398 fieldE
.Te
.Field
.pos
.offset
= cts
.u
.val
;
4400 if (attr
== DW_AT_data_member_location
&& cts
.szB
<= 0) {
4401 fieldE
.Te
.Field
.nLoc
= (UWord
)(-cts
.szB
);
4402 fieldE
.Te
.Field
.pos
.loc
4403 = ML_(cur_read_memdup
)( cts
.u
.cur
,
4404 (SizeT
)fieldE
.Te
.Field
.nLoc
,
4405 "di.readdwarf3.ptD.member.2" );
4407 if (attr
== DW_AT_artificial
&& cts
.u
.val
== 1)
4408 is_artificial
= True
;
4410 /* Skip artificial members, they might not behave as expected. */
4413 /* Do we have a plausible parent? */
4414 if (typestack_is_empty(parser
)) goto_bad_DIE
;
4415 vg_assert(ML_(TyEnt__is_type
)(&parser
->qparentE
[parser
->sp
]));
4416 vg_assert(parser
->qparentE
[parser
->sp
].cuOff
!= D3_INVALID_CUOFF
);
4417 if (level
!= parser
->qlevel
[parser
->sp
]+1) goto_bad_DIE
;
4418 if (parser
->qparentE
[parser
->sp
].tag
!= Te_TyStOrUn
) goto_bad_DIE
;
4419 /* Do we have something that looks sane? If this a member of a
4420 struct, we must have a location expression; but if a member
4421 of a union that is irrelevant (D3 spec sec 5.6.6). We ought
4422 to reject in the latter case, but some compilers have been
4423 observed to emit constant-zero expressions. So just ignore
4426 = parser
->qparentE
[parser
->sp
].Te
.TyStOrUn
.isStruct
;
4427 if (!fieldE
.Te
.Field
.name
)
4428 fieldE
.Te
.Field
.name
4429 = ML_(dinfo_strdup
)( "di.readdwarf3.ptD.member.3",
4431 if (fieldE
.Te
.Field
.typeR
== D3_INVALID_CUOFF
)
4433 if (fieldE
.Te
.Field
.nLoc
) {
4434 if (!parent_is_struct
) {
4435 /* If this is a union type, pretend we haven't seen the data
4436 member location expression, as it is by definition
4437 redundant (it must be zero). */
4438 if (fieldE
.Te
.Field
.nLoc
> 0)
4439 ML_(dinfo_free
)(fieldE
.Te
.Field
.pos
.loc
);
4440 fieldE
.Te
.Field
.pos
.loc
= NULL
;
4441 fieldE
.Te
.Field
.nLoc
= 0;
4443 /* Record this child in the parent */
4444 fieldE
.Te
.Field
.isStruct
= parent_is_struct
;
4445 vg_assert(parser
->qparentE
[parser
->sp
].Te
.TyStOrUn
.fieldRs
);
4446 VG_(addToXA
)( parser
->qparentE
[parser
->sp
].Te
.TyStOrUn
.fieldRs
,
4448 /* And record the child itself */
4451 /* Member with no location - this can happen with static
4452 const members in C++ code which are compile time constants
4453 that do no exist in the class. They're not of any interest
4454 to us so we ignore them. */
4456 ML_(TyEnt__make_EMPTY
)(&fieldE
);
4460 if (dtag
== DW_TAG_array_type
) {
4461 VG_(memset
)(&typeE
, 0, sizeof(typeE
));
4463 typeE
.tag
= Te_TyArray
;
4464 typeE
.Te
.TyArray
.typeR
= D3_INVALID_CUOFF
;
4465 typeE
.Te
.TyArray
.boundRs
4466 = VG_(newXA
)( ML_(dinfo_zalloc
), "di.readdwarf3.ptD.array_type.1",
4471 DW_AT attr
= (DW_AT
) abbv
->nf
[nf_i
].at_name
;
4472 DW_FORM form
= (DW_FORM
)abbv
->nf
[nf_i
].at_form
;
4473 const name_form
*nf
= &abbv
->nf
[nf_i
];
4475 if (attr
== 0 && form
== 0) break;
4476 get_Form_contents( &cts
, cc
, c_die
, False
/*td3*/, nf
);
4477 if (attr
== DW_AT_type
&& cts
.szB
> 0) {
4478 typeE
.Te
.TyArray
.typeR
4479 = cook_die_using_form( cc
, (UWord
)cts
.u
.val
, form
);
4482 if (typeE
.Te
.TyArray
.typeR
== D3_INVALID_CUOFF
)
4485 typestack_push( cc
, parser
, td3
, &typeE
, level
);
4489 /* this is a subrange type defining the bounds of an array. */
4490 if (dtag
== DW_TAG_subrange_type
4491 && subrange_type_denotes_array_bounds(parser
, dtag
)) {
4492 Bool have_lower
= False
;
4493 Bool have_upper
= False
;
4494 Bool have_count
= False
;
4499 switch (parser
->language
) {
4500 case 'C': have_lower
= True
; lower
= 0; break;
4501 case 'F': have_lower
= True
; lower
= 1; break;
4502 case '?': have_lower
= False
; break;
4503 case 'A': have_lower
= False
; break;
4504 default: vg_assert(0); /* assured us by handling of
4505 DW_TAG_compile_unit in this fn */
4508 VG_(memset
)( &boundE
, 0, sizeof(boundE
) );
4509 boundE
.cuOff
= D3_INVALID_CUOFF
;
4510 boundE
.tag
= Te_Bound
;
4513 DW_AT attr
= (DW_AT
) abbv
->nf
[nf_i
].at_name
;
4514 DW_FORM form
= (DW_FORM
)abbv
->nf
[nf_i
].at_form
;
4515 const name_form
*nf
= &abbv
->nf
[nf_i
];
4517 if (attr
== 0 && form
== 0) break;
4518 get_Form_contents( &cts
, cc
, c_die
, False
/*td3*/, nf
);
4519 if (attr
== DW_AT_lower_bound
&& cts
.szB
> 0
4520 && form_expected_for_bound (form
)) {
4521 lower
= (Long
)cts
.u
.val
;
4524 if (attr
== DW_AT_upper_bound
&& cts
.szB
> 0
4525 && form_expected_for_bound (form
)) {
4526 upper
= (Long
)cts
.u
.val
;
4529 if (attr
== DW_AT_count
&& cts
.szB
> 0) {
4530 count
= (Long
)cts
.u
.val
;
4534 /* FIXME: potentially skip the rest if no parent present, since
4535 it could be the case that this subrange type is free-standing
4536 (not being used to describe the bounds of a containing array
4538 /* Do we have a plausible parent? */
4539 if (typestack_is_empty(parser
)) goto_bad_DIE
;
4540 vg_assert(ML_(TyEnt__is_type
)(&parser
->qparentE
[parser
->sp
]));
4541 vg_assert(parser
->qparentE
[parser
->sp
].cuOff
!= D3_INVALID_CUOFF
);
4542 if (level
!= parser
->qlevel
[parser
->sp
]+1) goto_bad_DIE
;
4543 if (parser
->qparentE
[parser
->sp
].tag
!= Te_TyArray
) goto_bad_DIE
;
4545 /* Figure out if we have a definite range or not */
4546 if (have_lower
&& have_upper
&& (!have_count
)) {
4547 boundE
.Te
.Bound
.knownL
= True
;
4548 boundE
.Te
.Bound
.knownU
= True
;
4549 boundE
.Te
.Bound
.boundL
= lower
;
4550 boundE
.Te
.Bound
.boundU
= upper
;
4552 else if (have_lower
&& (!have_upper
) && (!have_count
)) {
4553 boundE
.Te
.Bound
.knownL
= True
;
4554 boundE
.Te
.Bound
.knownU
= False
;
4555 boundE
.Te
.Bound
.boundL
= lower
;
4556 boundE
.Te
.Bound
.boundU
= 0;
4558 else if ((!have_lower
) && have_upper
&& (!have_count
)) {
4559 boundE
.Te
.Bound
.knownL
= False
;
4560 boundE
.Te
.Bound
.knownU
= True
;
4561 boundE
.Te
.Bound
.boundL
= 0;
4562 boundE
.Te
.Bound
.boundU
= upper
;
4564 else if ((!have_lower
) && (!have_upper
) && (!have_count
)) {
4565 boundE
.Te
.Bound
.knownL
= False
;
4566 boundE
.Te
.Bound
.knownU
= False
;
4567 boundE
.Te
.Bound
.boundL
= 0;
4568 boundE
.Te
.Bound
.boundU
= 0;
4569 } else if (have_lower
&& (!have_upper
) && (have_count
)) {
4570 boundE
.Te
.Bound
.knownL
= True
;
4571 boundE
.Te
.Bound
.knownU
= True
;
4572 boundE
.Te
.Bound
.boundL
= lower
;
4573 boundE
.Te
.Bound
.boundU
= lower
+ count
- 1;
4575 /* FIXME: handle more cases */
4579 /* Record this bound in the parent */
4580 boundE
.cuOff
= posn
;
4581 vg_assert(parser
->qparentE
[parser
->sp
].Te
.TyArray
.boundRs
);
4582 VG_(addToXA
)( parser
->qparentE
[parser
->sp
].Te
.TyArray
.boundRs
,
4584 /* And record the child itself */
4588 /* typedef or subrange_type other than array bounds. */
4589 if (dtag
== DW_TAG_typedef
4590 || (dtag
== DW_TAG_subrange_type
4591 && !subrange_type_denotes_array_bounds(parser
, dtag
))) {
4592 /* subrange_type other than array bound is only for Ada. */
4593 vg_assert (dtag
== DW_TAG_typedef
|| (parser
->language
== 'A'
4594 || parser
->language
== '?'));
4595 /* We can pick up a new typedef/subrange_type any time. */
4596 VG_(memset
)(&typeE
, 0, sizeof(typeE
));
4597 typeE
.cuOff
= D3_INVALID_CUOFF
;
4598 typeE
.tag
= Te_TyTyDef
;
4599 typeE
.Te
.TyTyDef
.name
= NULL
;
4600 typeE
.Te
.TyTyDef
.typeR
= D3_INVALID_CUOFF
;
4603 DW_AT attr
= (DW_AT
) abbv
->nf
[nf_i
].at_name
;
4604 DW_FORM form
= (DW_FORM
)abbv
->nf
[nf_i
].at_form
;
4605 const name_form
*nf
= &abbv
->nf
[nf_i
];
4607 if (attr
== 0 && form
== 0) break;
4608 get_Form_contents( &cts
, cc
, c_die
, False
/*td3*/, nf
);
4609 if (attr
== DW_AT_name
&& cts
.szB
< 0) {
4610 typeE
.Te
.TyTyDef
.name
4611 = ML_(cur_read_strdup
)( cts
.u
.cur
,
4612 "di.readdwarf3.ptD.typedef.1" );
4614 if (attr
== DW_AT_type
&& cts
.szB
> 0) {
4615 typeE
.Te
.TyTyDef
.typeR
4616 = cook_die_using_form( cc
, (UWord
)cts
.u
.val
, form
);
4619 /* Do we have something that looks sane?
4620 gcc gnat Ada generates minimal typedef
4622 <6><91cc>: DW_TAG_typedef
4623 DW_AT_abstract_ori: <9066>
4624 g++ for OMP can generate artificial functions that have
4625 parameters that refer to pointers to unnamed typedefs.
4626 See https://bugs.kde.org/show_bug.cgi?id=273475
4627 So we cannot require a name for a DW_TAG_typedef.
4632 if (dtag
== DW_TAG_subroutine_type
) {
4633 /* function type? just record that one fact and ask no
4634 further questions. */
4635 VG_(memset
)(&typeE
, 0, sizeof(typeE
));
4636 typeE
.cuOff
= D3_INVALID_CUOFF
;
4637 typeE
.tag
= Te_TyFn
;
4641 if (dtag
== DW_TAG_volatile_type
|| dtag
== DW_TAG_const_type
4642 || dtag
== DW_TAG_restrict_type
|| dtag
== DW_TAG_atomic_type
) {
4644 VG_(memset
)(&typeE
, 0, sizeof(typeE
));
4645 typeE
.cuOff
= D3_INVALID_CUOFF
;
4646 typeE
.tag
= Te_TyQual
;
4647 typeE
.Te
.TyQual
.qual
4648 = (dtag
== DW_TAG_volatile_type
? 'V'
4649 : (dtag
== DW_TAG_const_type
? 'C'
4650 : (dtag
== DW_TAG_restrict_type
? 'R' : 'A')));
4651 /* target type defaults to 'void' */
4652 typeE
.Te
.TyQual
.typeR
= D3_FAKEVOID_CUOFF
;
4655 DW_AT attr
= (DW_AT
) abbv
->nf
[nf_i
].at_name
;
4656 DW_FORM form
= (DW_FORM
)abbv
->nf
[nf_i
].at_form
;
4657 const name_form
*nf
= &abbv
->nf
[nf_i
];
4659 if (attr
== 0 && form
== 0) break;
4660 get_Form_contents( &cts
, cc
, c_die
, False
/*td3*/, nf
);
4661 if (attr
== DW_AT_type
&& cts
.szB
> 0) {
4662 typeE
.Te
.TyQual
.typeR
4663 = cook_die_using_form( cc
, (UWord
)cts
.u
.val
, form
);
4667 /* gcc sometimes generates DW_TAG_const/volatile_type without
4668 DW_AT_type and GDB appears to interpret the type as 'const
4669 void' (resp. 'volatile void'). So just allow it .. */
4670 if (have_ty
== 1 || have_ty
== 0)
4677 * Treat DW_TAG_unspecified_type as type void. An example of DW_TAG_unspecified_type:
4679 * $ readelf --debug-dump /usr/lib/debug/usr/lib/libstdc++.so.6.0.16.debug
4680 * <1><10d4>: Abbrev Number: 53 (DW_TAG_unspecified_type)
4681 * <10d5> DW_AT_name : (indirect string, offset: 0xdb7): decltype(nullptr)
4683 if (dtag
== DW_TAG_unspecified_type
) {
4684 VG_(memset
)(&typeE
, 0, sizeof(typeE
));
4685 typeE
.cuOff
= D3_INVALID_CUOFF
;
4686 typeE
.tag
= Te_TyQual
;
4687 typeE
.Te
.TyQual
.typeR
= D3_FAKEVOID_CUOFF
;
4691 /* else ignore this DIE */
4696 if (0) VG_(printf
)("YYYY Acquire Type\n");
4697 vg_assert(ML_(TyEnt__is_type
)( &typeE
));
4698 vg_assert(typeE
.cuOff
== D3_INVALID_CUOFF
|| typeE
.cuOff
== posn
);
4700 VG_(addToXA
)( tyents
, &typeE
);
4705 if (0) VG_(printf
)("YYYY Acquire Atom\n");
4706 vg_assert(atomE
.tag
== Te_Atom
);
4707 vg_assert(atomE
.cuOff
== D3_INVALID_CUOFF
|| atomE
.cuOff
== posn
);
4709 VG_(addToXA
)( tyents
, &atomE
);
4714 /* For union members, Expr should be absent */
4715 if (0) VG_(printf
)("YYYY Acquire Field\n");
4716 vg_assert(fieldE
.tag
== Te_Field
);
4717 vg_assert(fieldE
.Te
.Field
.nLoc
<= 0 || fieldE
.Te
.Field
.pos
.loc
!= NULL
);
4718 vg_assert(fieldE
.Te
.Field
.nLoc
!= 0 || fieldE
.Te
.Field
.pos
.loc
== NULL
);
4719 if (fieldE
.Te
.Field
.isStruct
) {
4720 vg_assert(fieldE
.Te
.Field
.nLoc
!= 0);
4722 vg_assert(fieldE
.Te
.Field
.nLoc
== 0);
4724 vg_assert(fieldE
.cuOff
== D3_INVALID_CUOFF
|| fieldE
.cuOff
== posn
);
4725 fieldE
.cuOff
= posn
;
4726 VG_(addToXA
)( tyents
, &fieldE
);
4731 if (0) VG_(printf
)("YYYY Acquire Bound\n");
4732 vg_assert(boundE
.tag
== Te_Bound
);
4733 vg_assert(boundE
.cuOff
== D3_INVALID_CUOFF
|| boundE
.cuOff
== posn
);
4734 boundE
.cuOff
= posn
;
4735 VG_(addToXA
)( tyents
, &boundE
);
4740 dump_bad_die_and_barf("parse_type_DIE", dtag
, posn
, level
,
4741 c_die
, saved_die_c_offset
,
4748 /*------------------------------------------------------------*/
4750 /*--- Compression of type DIE information ---*/
4752 /*------------------------------------------------------------*/
4754 static UWord
chase_cuOff ( Bool
* changed
,
4755 const XArray
* /* of TyEnt */ ents
,
4756 TyEntIndexCache
* ents_cache
,
4760 ent
= ML_(TyEnts__index_by_cuOff
)( ents
, ents_cache
, cuOff
);
4763 if (VG_(clo_verbosity
) > 1)
4764 VG_(printf
)("chase_cuOff: no entry for 0x%05lx\n", cuOff
);
4769 vg_assert(ent
->tag
!= Te_EMPTY
);
4770 if (ent
->tag
!= Te_INDIR
) {
4774 vg_assert(ent
->Te
.INDIR
.indR
< cuOff
);
4776 return ent
->Te
.INDIR
.indR
;
4781 void chase_cuOffs_in_XArray ( Bool
* changed
,
4782 const XArray
* /* of TyEnt */ ents
,
4783 TyEntIndexCache
* ents_cache
,
4784 /*MOD*/XArray
* /* of UWord */ cuOffs
)
4787 Word i
, n
= VG_(sizeXA
)( cuOffs
);
4788 for (i
= 0; i
< n
; i
++) {
4790 UWord
* p
= VG_(indexXA
)( cuOffs
, i
);
4791 *p
= chase_cuOff( &b
, ents
, ents_cache
, *p
);
4798 static Bool
TyEnt__subst_R_fields ( const XArray
* /* of TyEnt */ ents
,
4799 TyEntIndexCache
* ents_cache
,
4802 Bool b
, changed
= False
;
4808 = chase_cuOff( &b
, ents
, ents_cache
, te
->Te
.INDIR
.indR
);
4809 if (b
) changed
= True
;
4817 = chase_cuOff( &b
, ents
, ents_cache
, te
->Te
.Field
.typeR
);
4818 if (b
) changed
= True
;
4829 = chase_cuOff( &b
, ents
, ents_cache
, te
->Te
.TyPorR
.typeR
);
4830 if (b
) changed
= True
;
4833 te
->Te
.TyTyDef
.typeR
4834 = chase_cuOff( &b
, ents
, ents_cache
, te
->Te
.TyTyDef
.typeR
);
4835 if (b
) changed
= True
;
4838 chase_cuOffs_in_XArray( &b
, ents
, ents_cache
, te
->Te
.TyStOrUn
.fieldRs
);
4839 if (b
) changed
= True
;
4842 chase_cuOffs_in_XArray( &b
, ents
, ents_cache
, te
->Te
.TyEnum
.atomRs
);
4843 if (b
) changed
= True
;
4846 te
->Te
.TyArray
.typeR
4847 = chase_cuOff( &b
, ents
, ents_cache
, te
->Te
.TyArray
.typeR
);
4848 if (b
) changed
= True
;
4849 chase_cuOffs_in_XArray( &b
, ents
, ents_cache
, te
->Te
.TyArray
.boundRs
);
4850 if (b
) changed
= True
;
4856 = chase_cuOff( &b
, ents
, ents_cache
, te
->Te
.TyQual
.typeR
);
4857 if (b
) changed
= True
;
4868 /* Make a pass over 'ents'. For each tyent, inspect the target of any
4869 'R' or 'Rs' fields (those which refer to other tyents), and replace
4870 any which point to INDIR nodes with the target of the indirection
4871 (which should not itself be an indirection). In summary, this
4872 routine shorts out all references to indirection nodes. */
4874 Word
dedup_types_substitution_pass ( /*MOD*/XArray
* /* of TyEnt */ ents
,
4875 TyEntIndexCache
* ents_cache
)
4877 Word i
, n
, nChanged
= 0;
4879 n
= VG_(sizeXA
)( ents
);
4880 for (i
= 0; i
< n
; i
++) {
4881 TyEnt
* ent
= VG_(indexXA
)( ents
, i
);
4882 vg_assert(ent
->tag
!= Te_EMPTY
);
4883 /* We have to substitute everything, even indirections, so as to
4884 ensure that chains of indirections don't build up. */
4885 b
= TyEnt__subst_R_fields( ents
, ents_cache
, ent
);
4894 /* Make a pass over 'ents', building a dictionary of TyEnts as we go.
4895 Look up each new tyent in the dictionary in turn. If it is already
4896 in the dictionary, replace this tyent with an indirection to the
4897 existing one, and delete any malloc'd stuff hanging off this one.
4898 In summary, this routine commons up all tyents that are identical
4899 as defined by TyEnt__cmp_by_all_except_cuOff. */
4901 Word
dedup_types_commoning_pass ( /*MOD*/XArray
* /* of TyEnt */ ents
)
4903 Word n
, i
, nDeleted
;
4904 WordFM
* dict
; /* TyEnt* -> void */
4909 ML_(dinfo_zalloc
), "di.readdwarf3.dtcp.1",
4911 (Word(*)(UWord
,UWord
)) ML_(TyEnt__cmp_by_all_except_cuOff
)
4915 n
= VG_(sizeXA
)( ents
);
4916 for (i
= 0; i
< n
; i
++) {
4917 ent
= VG_(indexXA
)( ents
, i
);
4918 vg_assert(ent
->tag
!= Te_EMPTY
);
4920 /* Ignore indirections, although check that they are
4921 not forming a cycle. */
4922 if (ent
->tag
== Te_INDIR
) {
4923 vg_assert(ent
->Te
.INDIR
.indR
< ent
->cuOff
);
4928 if (VG_(lookupFM
)( dict
, &keyW
, &valW
, (UWord
)ent
)) {
4929 /* it's already in the dictionary. */
4930 TyEnt
* old
= (TyEnt
*)keyW
;
4931 vg_assert(valW
== 0);
4932 vg_assert(old
!= ent
);
4933 vg_assert(old
->tag
!= Te_INDIR
);
4934 /* since we are traversing the array in increasing order of
4936 vg_assert(old
->cuOff
< ent
->cuOff
);
4937 /* So anyway, dump this entry and replace it with an
4938 indirection to the one in the dictionary. Note that the
4939 assertion above guarantees that we cannot create cycles of
4940 indirections, since we are always creating an indirection
4941 to a tyent with a cuOff lower than this one. */
4942 ML_(TyEnt__make_EMPTY
)( ent
);
4943 ent
->tag
= Te_INDIR
;
4944 ent
->Te
.INDIR
.indR
= old
->cuOff
;
4947 /* not in dictionary; add it and keep going. */
4948 VG_(addToFM
)( dict
, (UWord
)ent
, 0 );
4952 VG_(deleteFM
)( dict
, NULL
, NULL
);
4959 void dedup_types ( Bool td3
,
4960 /*MOD*/XArray
* /* of TyEnt */ ents
,
4961 TyEntIndexCache
* ents_cache
)
4963 Word m
, n
, i
, nDel
, nSubst
, nThresh
;
4966 n
= VG_(sizeXA
)( ents
);
4968 /* If a commoning pass and a substitution pass both make fewer than
4969 this many changes, just stop. It's pointless to burn up CPU
4970 time trying to compress the last 1% or so out of the array. */
4973 /* First we must sort .ents by its .cuOff fields, so we
4974 can index into it. */
4975 VG_(setCmpFnXA
)( ents
, (XACmpFn_t
) ML_(TyEnt__cmp_by_cuOff_only
) );
4976 VG_(sortXA
)( ents
);
4978 /* Now repeatedly do commoning and substitution passes over
4979 the array, until there are no more changes. */
4981 nDel
= dedup_types_commoning_pass ( ents
);
4982 nSubst
= dedup_types_substitution_pass ( ents
, ents_cache
);
4983 vg_assert(nDel
>= 0 && nSubst
>= 0);
4984 TRACE_D3(" %ld deletions, %ld substitutions\n", nDel
, nSubst
);
4985 } while (nDel
> nThresh
|| nSubst
> nThresh
);
4987 /* Sanity check: all INDIR nodes should point at a non-INDIR thing.
4988 In fact this should be true at the end of every loop iteration
4989 above (a commoning pass followed by a substitution pass), but
4990 checking it on every iteration is excessively expensive. Note,
4991 this loop also computes 'm' for the stats printing below it. */
4993 n
= VG_(sizeXA
)( ents
);
4994 for (i
= 0; i
< n
; i
++) {
4996 ent
= VG_(indexXA
)( ents
, i
);
4997 if (ent
->tag
!= Te_INDIR
) continue;
4999 ind
= ML_(TyEnts__index_by_cuOff
)( ents
, ents_cache
,
5000 ent
->Te
.INDIR
.indR
);
5002 vg_assert(ind
->tag
!= Te_INDIR
);
5005 TRACE_D3("Overall: %ld before, %ld after\n", n
, n
-m
);
5009 /*------------------------------------------------------------*/
5011 /*--- Resolution of references to type DIEs ---*/
5013 /*------------------------------------------------------------*/
5015 /* Make a pass through the (temporary) variables array. Examine the
5016 type of each variable, check is it found, and chase any Te_INDIRs.
5017 Postcondition is: each variable has a typeR field that refers to a
5018 valid type in tyents, or a Te_UNKNOWN, and is certainly guaranteed
5019 not to refer to a Te_INDIR. (This is so that we can throw all the
5020 Te_INDIRs away later). */
5022 __attribute__((noinline
))
5023 static void resolve_variable_types (
5024 void (*barf
)( const HChar
* ) __attribute__((noreturn
)),
5025 /*R-O*/XArray
* /* of TyEnt */ ents
,
5026 /*MOD*/TyEntIndexCache
* ents_cache
,
5027 /*MOD*/XArray
* /* of TempVar* */ vars
5031 n
= VG_(sizeXA
)( vars
);
5032 for (i
= 0; i
< n
; i
++) {
5033 TempVar
* var
= *(TempVar
**)VG_(indexXA
)( vars
, i
);
5034 /* This is the stated type of the variable. But it might be
5035 an indirection, so be careful. */
5036 TyEnt
* ent
= ML_(TyEnts__index_by_cuOff
)( ents
, ents_cache
,
5038 if (ent
&& ent
->tag
== Te_INDIR
) {
5039 ent
= ML_(TyEnts__index_by_cuOff
)( ents
, ents_cache
,
5040 ent
->Te
.INDIR
.indR
);
5042 vg_assert(ent
->tag
!= Te_INDIR
);
5045 /* Deal first with "normal" cases */
5046 if (ent
&& ML_(TyEnt__is_type
)(ent
)) {
5047 var
->typeR
= ent
->cuOff
;
5051 /* If there's no ent, it probably we did not manage to read a
5052 type at the cuOffset which is stated as being this variable's
5053 type. Maybe a deficiency in parse_type_DIE. Complain. */
5055 VG_(printf
)("\n: Invalid cuOff = 0x%05lx\n", var
->typeR
);
5056 barf("resolve_variable_types: "
5057 "cuOff does not refer to a known type");
5060 /* If ent has any other tag, something bad happened, along the
5061 lines of var->typeR not referring to a type at all. */
5062 vg_assert(ent
->tag
== Te_UNKNOWN
);
5063 /* Just accept it; the type will be useless, but at least keep
5065 var
->typeR
= ent
->cuOff
;
5070 /*------------------------------------------------------------*/
5072 /*--- Parsing of Compilation Units ---*/
5074 /*------------------------------------------------------------*/
5076 static Int
cmp_TempVar_by_dioff ( const void* v1
, const void* v2
) {
5077 const TempVar
* t1
= *(const TempVar
*const *)v1
;
5078 const TempVar
* t2
= *(const TempVar
*const *)v2
;
5079 if (t1
->dioff
< t2
->dioff
) return -1;
5080 if (t1
->dioff
> t2
->dioff
) return 1;
5084 static void read_DIE (
5085 /*MOD*/WordFM
* /* of (XArray* of AddrRange, void) */ rangestree
,
5086 /*MOD*/XArray
* /* of TyEnt */ tyents
,
5087 /*MOD*/XArray
* /* of TempVar* */ tempvars
,
5088 /*MOD*/XArray
* /* of GExpr* */ gexprs
,
5089 /*MOD*/D3TypeParser
* typarser
,
5090 /*MOD*/D3VarParser
* varparser
,
5091 /*MOD*/D3InlParser
* inlparser
,
5092 XArray
** fndn_ix_Table
,
5093 ULong
*debug_line_offset
,
5094 Cursor
* c
, Bool td3
, CUConst
* cc
, Int level
5098 ULong atag
, abbv_code
;
5101 UWord start_die_c_offset
;
5102 UWord after_die_c_offset
;
5103 // If the DIE we will parse has a sibling and the parser(s) are
5104 // all indicating that parse_children is not necessary, then
5105 // we will skip the children by jumping to the sibling of this DIE
5106 // (if it has a sibling).
5108 Bool parse_children
= False
;
5110 /* --- Deal with this DIE --- */
5111 posn
= cook_die( cc
, get_position_of_Cursor( c
) );
5112 abbv_code
= get_ULEB128( c
);
5113 abbv
= get_abbv(cc
, abbv_code
, td3
);
5118 trace_DIE ((DW_TAG
)atag
, posn
, level
,
5119 get_position_of_Cursor( c
), abbv
, cc
);
5123 cc
->barf("read_DIE: invalid zero tag on DIE");
5125 has_children
= abbv
->has_children
;
5126 if (has_children
!= DW_children_no
&& has_children
!= DW_children_yes
)
5127 cc
->barf("read_DIE: invalid has_children value");
5129 /* We're set up to look at the fields of this DIE. Hand it off to
5130 any parser(s) that want to see it. Since they will in general
5131 advance the DIE cursor, remember the current settings so that we
5132 can then back up. . */
5133 start_die_c_offset
= get_position_of_Cursor( c
);
5134 after_die_c_offset
= 0; // set to c position if a parser has read the DIE.
5136 if (VG_(clo_read_var_info
)) {
5137 parse_type_DIE( tyents
,
5146 if (get_position_of_Cursor( c
) != start_die_c_offset
) {
5147 after_die_c_offset
= get_position_of_Cursor( c
);
5148 set_position_of_Cursor( c
, start_die_c_offset
);
5151 parse_var_DIE( rangestree
,
5164 if (get_position_of_Cursor( c
) != start_die_c_offset
) {
5165 after_die_c_offset
= get_position_of_Cursor( c
);
5166 set_position_of_Cursor( c
, start_die_c_offset
);
5169 parse_children
= True
;
5170 // type and var parsers do not have logic to skip childrens and establish
5171 // the value of sibling.
5174 if (VG_(clo_read_inline_info
)) {
5175 inlparser
->sibling
= 0;
5177 parse_inl_DIE( inlparser
,
5188 if (get_position_of_Cursor( c
) != start_die_c_offset
) {
5189 after_die_c_offset
= get_position_of_Cursor( c
);
5190 // Last parser, no need to reset the cursor to start_die_c_offset.
5193 sibling
= inlparser
->sibling
;
5194 vg_assert (inlparser
->sibling
== 0 || inlparser
->sibling
== sibling
);
5197 /* Top level CU DIE, but we don't want to read anything else, just skip
5198 to the end and return. */
5199 if (level
== 0 && !parse_children
) {
5200 UWord cu_size_including_IniLen
= (cc
->unit_length
5201 + (cc
->is_dw64
? 12 : 4));
5202 set_position_of_Cursor( c
, (cc
->cu_start_offset
5203 + cu_size_including_IniLen
));
5207 if (after_die_c_offset
> 0) {
5208 // DIE was read by a parser above, so we know where the DIE ends.
5209 set_position_of_Cursor( c
, after_die_c_offset
);
5211 /* No parser has parsed this DIE. So, we need to skip the DIE,
5212 in order to read the next DIE.
5213 At the same time, establish sibling value if the DIE has one. */
5214 TRACE_D3(" uninteresting DIE -> skipping ...\n");
5215 skip_DIE (&sibling
, c
, abbv
, cc
);
5218 /* --- Now recurse into its children, if any
5219 and the parsing of the children is requested by a parser --- */
5220 if (has_children
== DW_children_yes
) {
5221 if (parse_children
|| sibling
== 0) {
5222 if (0) TRACE_D3("BEGIN children of level %d\n", level
);
5224 atag
= peek_ULEB128( c
);
5225 if (atag
== 0) break;
5226 if (parse_children
) {
5227 read_DIE( rangestree
, tyents
, tempvars
, gexprs
,
5228 typarser
, varparser
, inlparser
,
5229 fndn_ix_Table
, debug_line_offset
,
5230 c
, td3
, cc
, level
+1 );
5232 Int skip_level
= level
+ 1;
5234 atag
= peek_ULEB128( c
);
5237 if (skip_level
== level
) break;
5238 /* Eat the terminating zero and continue skipping the
5239 children one level up. */
5240 atag
= get_ULEB128( c
);
5241 vg_assert(atag
== 0);
5245 abbv_code
= get_ULEB128( c
);
5246 abbv
= get_abbv(cc
, abbv_code
, td3
);
5248 skip_DIE (&sibling
, c
, abbv
, cc
);
5249 if (abbv
->has_children
) {
5253 set_position_of_Cursor( c
, sibling
);
5258 /* Now we need to eat the terminating zero */
5259 atag
= get_ULEB128( c
);
5260 vg_assert(atag
== 0);
5261 if (0) TRACE_D3("END children of level %d\n", level
);
5263 // We can skip the childrens, by jumping to the sibling
5264 TRACE_D3(" SKIPPING DIE's children,"
5265 "jumping to sibling <%d><%lx>\n",
5267 set_position_of_Cursor( c
, sibling
);
5273 static void trace_debug_loc (const DebugInfo
* di
,
5274 __attribute__((noreturn
)) void (*barf
)( const HChar
* ),
5275 DiSlice escn_debug_loc
)
5278 /* This doesn't work properly because it assumes all entries are
5279 packed end to end, with no holes. But that doesn't always
5280 appear to be the case, so it loses sync. And the D3 spec
5281 doesn't appear to require a no-hole situation either. */
5282 /* Display .debug_loc */
5285 Cursor loc
; /* for showing .debug_loc */
5286 Bool td3
= di
->trace_symtab
;
5289 TRACE_SYMTAB("\n------ The contents of .debug_loc ------\n");
5290 TRACE_SYMTAB(" Offset Begin End Expression\n");
5291 if (ML_(sli_is_valid
)(escn_debug_loc
)) {
5292 init_Cursor( &loc
, escn_debug_loc
, 0, barf
,
5293 "Overrun whilst reading .debug_loc section(1)" );
5299 if (is_at_end_Cursor( &loc
))
5302 /* Read a (host-)word pair. This is something of a hack since
5303 the word size to read is really dictated by the ELF file;
5304 however, we assume we're reading a file with the same
5305 word-sizeness as the host. Reasonably enough. */
5306 w1
= get_UWord( &loc
);
5307 w2
= get_UWord( &loc
);
5309 if (w1
== 0 && w2
== 0) {
5310 /* end of list. reset 'base' */
5311 TRACE_D3(" %08lx <End of list>\n", dl_offset
);
5313 dl_offset
= get_position_of_Cursor( &loc
);
5318 /* new value for 'base' */
5319 TRACE_D3(" %08lx %16lx %08lx (base address)\n",
5325 /* else a location expression follows */
5326 TRACE_D3(" %08lx %08lx %08lx ",
5327 dl_offset
, w1
+ dl_base
, w2
+ dl_base
);
5328 len
= (UWord
)get_UShort( &loc
);
5330 UChar byte
= get_UChar( &loc
);
5331 TRACE_D3("%02x", (UInt
)byte
);
5340 static void trace_debug_ranges (const DebugInfo
* di
,
5341 __attribute__((noreturn
)) void (*barf
)( const HChar
* ),
5342 DiSlice escn_debug_ranges
)
5344 Cursor ranges
; /* for showing .debug_ranges */
5347 Bool td3
= di
->trace_symtab
;
5349 /* Display .debug_ranges */
5351 TRACE_SYMTAB("\n------ The contents of .debug_ranges ------\n");
5352 TRACE_SYMTAB(" Offset Begin End\n");
5353 if (ML_(sli_is_valid
)(escn_debug_ranges
)) {
5354 init_Cursor( &ranges
, escn_debug_ranges
, 0, barf
,
5355 "Overrun whilst reading .debug_ranges section(1)" );
5361 if (is_at_end_Cursor( &ranges
))
5364 /* Read a (host-)word pair. This is something of a hack since
5365 the word size to read is really dictated by the ELF file;
5366 however, we assume we're reading a file with the same
5367 word-sizeness as the host. Reasonably enough. */
5368 w1
= get_UWord( &ranges
);
5369 w2
= get_UWord( &ranges
);
5371 if (w1
== 0 && w2
== 0) {
5372 /* end of list. reset 'base' */
5373 TRACE_D3(" %08lx <End of list>\n", dr_offset
);
5375 dr_offset
= get_position_of_Cursor( &ranges
);
5380 /* new value for 'base' */
5381 TRACE_D3(" %08lx %16lx %08lx (base address)\n",
5387 /* else a range [w1+base, w2+base) is denoted */
5388 TRACE_D3(" %08lx %08lx %08lx\n",
5389 dr_offset
, w1
+ dr_base
, w2
+ dr_base
);
5394 static void trace_debug_abbrev (const DebugInfo
* di
,
5395 __attribute__((noreturn
)) void (*barf
)( const HChar
* ),
5396 DiSlice escn_debug_abbv
)
5398 Cursor abbv
; /* for showing .debug_abbrev */
5399 Bool td3
= di
->trace_symtab
;
5401 /* Display .debug_abbrev */
5403 TRACE_SYMTAB("\n------ The contents of .debug_abbrev ------\n");
5404 if (ML_(sli_is_valid
)(escn_debug_abbv
)) {
5405 init_Cursor( &abbv
, escn_debug_abbv
, 0, barf
,
5406 "Overrun whilst reading .debug_abbrev section" );
5408 if (is_at_end_Cursor( &abbv
))
5410 /* Read one abbreviation table */
5411 TRACE_D3(" Number TAG\n");
5415 ULong acode
= get_ULEB128( &abbv
);
5416 if (acode
== 0) break; /* end of the table */
5417 atag
= get_ULEB128( &abbv
);
5418 has_children
= get_UChar( &abbv
);
5419 TRACE_D3(" %llu %s [%s]\n",
5420 acode
, ML_(pp_DW_TAG
)(atag
),
5421 ML_(pp_DW_children
)(has_children
));
5423 ULong at_name
= get_ULEB128( &abbv
);
5424 ULong at_form
= get_ULEB128( &abbv
);
5425 if (at_form
== DW_FORM_implicit_const
) {
5426 /* Long at_val = */ get_SLEB128 ( &abbv
);
5428 if (at_name
== 0 && at_form
== 0) break;
5429 TRACE_D3(" %-18s %s\n",
5430 ML_(pp_DW_AT
)(at_name
), ML_(pp_DW_FORM
)(at_form
));
5438 void new_dwarf3_reader_wrk (
5440 __attribute__((noreturn
)) void (*barf
)( const HChar
* ),
5441 DiSlice escn_debug_info
, DiSlice escn_debug_types
,
5442 DiSlice escn_debug_abbv
, DiSlice escn_debug_line
,
5443 DiSlice escn_debug_str
, DiSlice escn_debug_ranges
,
5444 DiSlice escn_debug_rnglists
, DiSlice escn_debug_loclists
,
5445 DiSlice escn_debug_loc
, DiSlice escn_debug_info_alt
,
5446 DiSlice escn_debug_abbv_alt
, DiSlice escn_debug_line_alt
,
5447 DiSlice escn_debug_str_alt
, DiSlice escn_debug_line_str
,
5448 DiSlice escn_debug_addr
, DiSlice escn_debug_str_offsets
5451 XArray
* /* of TyEnt */ tyents
= NULL
;
5452 XArray
* /* of TyEnt */ tyents_to_keep
= NULL
;
5453 XArray
* /* of GExpr* */ gexprs
= NULL
;
5454 XArray
* /* of TempVar* */ tempvars
= NULL
;
5455 WordFM
* /* of (XArray* of AddrRange, void) */ rangestree
= NULL
;
5456 TyEntIndexCache
* tyents_cache
= NULL
;
5457 TyEntIndexCache
* tyents_to_keep_cache
= NULL
;
5458 TempVar
*varp
, *varp2
;
5460 Cursor info
; /* primary cursor for parsing .debug_info */
5461 D3TypeParser typarser
;
5462 D3VarParser varparser
;
5463 D3InlParser inlparser
;
5464 XArray
* /* of UInt */ fndn_ix_Table
= NULL
;
5465 ULong debug_line_offset
= (ULong
) -1;
5467 Bool td3
= di
->trace_symtab
;
5468 XArray
* /* of TempVar* */ dioff_lookup_tab
;
5470 VgHashTable
*signature_types
= NULL
;
5472 /* Display/trace various information, if requested. */
5474 trace_debug_loc (di
, barf
, escn_debug_loc
);
5475 trace_debug_ranges (di
, barf
, escn_debug_ranges
);
5476 trace_debug_abbrev (di
, barf
, escn_debug_abbv
);
5480 /* Zero out all parsers. Parsers will really be initialised
5481 according to VG_(clo_read_*_info). */
5482 VG_(memset
)( &inlparser
, 0, sizeof(inlparser
) );
5484 if (VG_(clo_read_var_info
)) {
5485 /* We'll park the harvested type information in here. Also create
5486 a fake "void" entry with offset D3_FAKEVOID_CUOFF, so we always
5487 have at least one type entry to refer to. D3_FAKEVOID_CUOFF is
5488 huge and presumably will not occur in any valid DWARF3 file --
5489 it would need to have a .debug_info section 4GB long for that to
5490 happen. These type entries end up in the DebugInfo. */
5491 tyents
= VG_(newXA
)( ML_(dinfo_zalloc
),
5492 "di.readdwarf3.ndrw.1 (TyEnt temp array)",
5493 ML_(dinfo_free
), sizeof(TyEnt
) );
5495 VG_(memset
)(&tyent
, 0, sizeof(tyent
));
5496 tyent
.tag
= Te_TyVoid
;
5497 tyent
.cuOff
= D3_FAKEVOID_CUOFF
;
5498 tyent
.Te
.TyVoid
.isFake
= True
;
5499 VG_(addToXA
)( tyents
, &tyent
);
5502 VG_(memset
)(&tyent
, 0, sizeof(tyent
));
5503 tyent
.tag
= Te_UNKNOWN
;
5504 tyent
.cuOff
= D3_INVALID_CUOFF
;
5505 VG_(addToXA
)( tyents
, &tyent
);
5508 /* This is a tree used to unique-ify the range lists that are
5509 manufactured by parse_var_DIE. References to the keys in the
5510 tree wind up in .rngMany fields in TempVars. We'll need to
5511 delete this tree, and the XArrays attached to it, at the end of
5513 rangestree
= VG_(newFM
)( ML_(dinfo_zalloc
),
5514 "di.readdwarf3.ndrw.2 (rangestree)",
5516 (Word(*)(UWord
,UWord
))cmp__XArrays_of_AddrRange
);
5518 /* List of variables we're accumulating. These don't end up in the
5519 DebugInfo; instead their contents are handed to ML_(addVar) and
5520 the list elements are then deleted. */
5521 tempvars
= VG_(newXA
)( ML_(dinfo_zalloc
),
5522 "di.readdwarf3.ndrw.3 (TempVar*s array)",
5526 /* List of GExprs we're accumulating. These wind up in the
5528 gexprs
= VG_(newXA
)( ML_(dinfo_zalloc
), "di.readdwarf3.ndrw.4",
5529 ML_(dinfo_free
), sizeof(GExpr
*) );
5531 /* We need a D3TypeParser to keep track of partially constructed
5532 types. It'll be discarded as soon as we've completed the CU,
5533 since the resulting information is tipped in to 'tyents' as it
5535 type_parser_init(&typarser
);
5537 var_parser_init(&varparser
);
5539 signature_types
= VG_(HT_construct
) ("signature_types");
5542 /* Do an initial pass to scan the .debug_types section, if any, and
5543 fill in the signatured types hash table. This lets us handle
5544 mapping from a type signature to a (cooked) DIE offset directly
5545 in get_Form_contents. */
5546 if (VG_(clo_read_var_info
) && ML_(sli_is_valid
)(escn_debug_types
)) {
5547 init_Cursor( &info
, escn_debug_types
, 0, barf
,
5548 "Overrun whilst reading .debug_types section" );
5549 TRACE_D3("\n------ Collecting signatures from "
5550 ".debug_types section ------\n");
5552 abbv_state last_abbv
;
5553 last_abbv
.debug_abbrev_offset
= (ULong
) -1;
5554 last_abbv
.ht_abbvs
= NULL
;
5556 UWord cu_start_offset
, cu_offset_now
;
5559 cu_start_offset
= get_position_of_Cursor( &info
);
5561 TRACE_D3(" Compilation Unit @ offset 0x%lx:\n", cu_start_offset
);
5562 /* parse_CU_header initialises the CU's abbv hash table. */
5563 parse_CU_Header( &cc
, td3
, &info
, escn_debug_abbv
,
5564 last_abbv
, True
, False
);
5566 /* Needed by cook_die. */
5567 cc
.types_cuOff_bias
= escn_debug_info
.szB
;
5569 record_signatured_type( signature_types
, cc
.type_signature
,
5570 cook_die( &cc
, cc
.type_offset
));
5572 /* Until proven otherwise we assume we don't need the icc9
5573 workaround in this case; see the DIE-reading loop below
5575 cu_offset_now
= (cu_start_offset
+ cc
.unit_length
5576 + (cc
.is_dw64
? 12 : 4));
5578 last_abbv
= cc
.abbv
;
5580 if (cu_offset_now
>= escn_debug_types
.szB
) {
5584 set_position_of_Cursor ( &info
, cu_offset_now
);
5586 if (last_abbv
.ht_abbvs
!= NULL
)
5587 VG_(HT_destruct
) (last_abbv
.ht_abbvs
, ML_(dinfo_free
));
5590 /* Perform three DIE-reading passes. The first pass reads DIEs from
5591 alternate .debug_info (if any), the second pass reads DIEs from
5592 .debug_info, and the third pass reads DIEs from .debug_types.
5593 Moving the body of this loop into a separate function would
5594 require a large number of arguments to be passed in, so it is
5595 kept inline instead. */
5596 for (pass
= 0; pass
< 3; ++pass
) {
5600 if (!ML_(sli_is_valid
)(escn_debug_info_alt
))
5602 /* Now loop over the Compilation Units listed in the alternate
5603 .debug_info section (see D3SPEC sec 7.5) paras 1 and 2.
5604 Each compilation unit contains a Compilation Unit Header
5605 followed by precisely one DW_TAG_compile_unit or
5606 DW_TAG_partial_unit DIE. */
5607 init_Cursor( &info
, escn_debug_info_alt
, 0, barf
,
5608 "Overrun whilst reading alternate .debug_info section" );
5609 section_size
= escn_debug_info_alt
.szB
;
5611 /* Keep track of the last line table we have seen,
5612 it might turn up again. */
5613 reset_fndn_ix_table(&fndn_ix_Table
, &debug_line_offset
, (ULong
) -1);
5615 TRACE_D3("\n------ Parsing alternate .debug_info section ------\n");
5616 } else if (pass
== 1) {
5617 /* Now loop over the Compilation Units listed in the .debug_info
5618 section (see D3SPEC sec 7.5) paras 1 and 2. Each compilation
5619 unit contains a Compilation Unit Header followed by precisely
5620 one DW_TAG_compile_unit or DW_TAG_partial_unit DIE. */
5621 init_Cursor( &info
, escn_debug_info
, 0, barf
,
5622 "Overrun whilst reading .debug_info section" );
5623 section_size
= escn_debug_info
.szB
;
5625 /* Keep track of the last line table we have seen,
5626 it might turn up again. */
5627 reset_fndn_ix_table(&fndn_ix_Table
, &debug_line_offset
, (ULong
) -1);
5629 TRACE_D3("\n------ Parsing .debug_info section ------\n");
5631 if (!ML_(sli_is_valid
)(escn_debug_types
))
5633 if (!VG_(clo_read_var_info
))
5634 continue; // Types not needed when only reading inline info.
5635 init_Cursor( &info
, escn_debug_types
, 0, barf
,
5636 "Overrun whilst reading .debug_types section" );
5637 section_size
= escn_debug_types
.szB
;
5639 /* Keep track of the last line table we have seen,
5640 it might turn up again. */
5641 reset_fndn_ix_table(&fndn_ix_Table
, &debug_line_offset
, (ULong
) -1);
5643 TRACE_D3("\n------ Parsing .debug_types section ------\n");
5646 abbv_state last_abbv
;
5647 last_abbv
.debug_abbrev_offset
= (ULong
) -1;
5648 last_abbv
.ht_abbvs
= NULL
;
5650 ULong cu_start_offset
, cu_offset_now
;
5652 /* It may be that the stated size of this CU is larger than the
5653 amount of stuff actually in it. icc9 seems to generate CUs
5654 thusly. We use these variables to figure out if this is
5655 indeed the case, and if so how many bytes we need to skip to
5656 get to the start of the next CU. Not skipping those bytes
5657 causes us to misidentify the start of the next CU, and it all
5658 goes badly wrong after that (not surprisingly). */
5659 UWord cu_size_including_IniLen
, cu_amount_used
;
5661 /* It seems icc9 finishes the DIE info before debug_info_sz
5662 bytes have been used up. So be flexible, and declare the
5663 sequence complete if there is not enough remaining bytes to
5664 hold even the smallest conceivable CU header. (11 bytes I
5666 /* JRS 23Jan09: I suspect this is no longer necessary now that
5667 the code below contains a 'while (cu_amount_used <
5668 cu_size_including_IniLen ...' style loop, which skips over
5669 any leftover bytes at the end of a CU in the case where the
5670 CU's stated size is larger than its actual size (as
5671 determined by reading all its DIEs). However, for prudence,
5672 I'll leave the following test in place. I can't see that a
5673 CU header can be smaller than 11 bytes, so I don't think
5674 there's any harm possible through the test -- it just adds
5676 Word avail
= get_remaining_length_Cursor( &info
);
5679 TRACE_D3("new_dwarf3_reader_wrk: warning: "
5680 "%ld unused bytes after end of DIEs\n", avail
);
5684 if (VG_(clo_read_var_info
)) {
5685 /* Check the varparser's stack is in a sane state. */
5686 vg_assert(varparser
.sp
== -1);
5687 /* Check the typarser's stack is in a sane state. */
5688 vg_assert(typarser
.sp
== -1);
5691 cu_start_offset
= get_position_of_Cursor( &info
);
5693 TRACE_D3(" Compilation Unit @ offset 0x%llx:\n", cu_start_offset
);
5694 /* parse_CU_header initialises the CU's hashtable of abbvs ht_abbvs */
5696 parse_CU_Header( &cc
, td3
, &info
, escn_debug_abbv_alt
,
5697 last_abbv
, False
, True
);
5699 parse_CU_Header( &cc
, td3
, &info
, escn_debug_abbv
,
5700 last_abbv
, pass
== 2, False
);
5702 cc
.escn_debug_str
= pass
== 0 ? escn_debug_str_alt
5704 cc
.escn_debug_ranges
= escn_debug_ranges
;
5705 cc
.escn_debug_rnglists
= escn_debug_rnglists
;
5706 cc
.escn_debug_loclists
= escn_debug_loclists
;
5707 cc
.escn_debug_loc
= escn_debug_loc
;
5708 cc
.escn_debug_line
= pass
== 0 ? escn_debug_line_alt
5710 cc
.escn_debug_info
= pass
== 0 ? escn_debug_info_alt
5712 cc
.escn_debug_types
= escn_debug_types
;
5713 cc
.escn_debug_info_alt
= escn_debug_info_alt
;
5714 cc
.escn_debug_str_alt
= escn_debug_str_alt
;
5715 cc
.escn_debug_line_str
= escn_debug_line_str
;
5716 cc
.escn_debug_addr
= escn_debug_addr
;
5717 cc
.escn_debug_str_offsets
= escn_debug_str_offsets
;
5718 cc
.types_cuOff_bias
= escn_debug_info
.szB
;
5719 cc
.alt_cuOff_bias
= escn_debug_info
.szB
+ escn_debug_types
.szB
;
5720 cc
.cu_start_offset
= cu_start_offset
;
5721 cc
.cu_addr_base
= 0;
5722 cc
.cu_has_addr_base
= False
;
5723 cc
.cu_str_offsets_base
= 0;
5724 cc
.cu_has_str_offsets_base
= False
;
5725 cc
.cu_rnglists_base
= 0;
5726 cc
.cu_has_rnglists_base
= False
;
5727 cc
.cu_loclists_base
= 0;
5728 cc
.cu_has_loclists_base
= False
;
5730 /* The CU's svma can be deduced by looking at the AT_low_pc
5731 value in the top level TAG_compile_unit, which is the topmost
5732 DIE. We'll leave it for the 'varparser' to acquire that info
5733 and fill it in -- since it is the only party to want to know
5735 cc
.cu_svma_known
= False
;
5738 if (VG_(clo_read_var_info
)) {
5739 cc
.signature_types
= signature_types
;
5741 /* Create a fake outermost-level range covering the entire
5742 address range. So we always have *something* to catch all
5743 variable declarations. */
5744 varstack_push( &cc
, &varparser
, td3
,
5745 unitary_range_list(0UL, ~0UL),
5746 -1, False
/*isFunc*/, NULL
/*fbGX*/ );
5750 /* Now read the one-and-only top-level DIE for this CU. */
5751 vg_assert(!VG_(clo_read_var_info
) || varparser
.sp
== 0);
5752 read_DIE( rangestree
,
5753 tyents
, tempvars
, gexprs
,
5754 &typarser
, &varparser
, &inlparser
,
5755 &fndn_ix_Table
, &debug_line_offset
,
5756 &info
, td3
, &cc
, 0 );
5758 cu_offset_now
= get_position_of_Cursor( &info
);
5760 if (0) VG_(printf
)("Travelled: %llu size %llu\n",
5761 cu_offset_now
- cc
.cu_start_offset
,
5762 cc
.unit_length
+ (cc
.is_dw64
? 12 : 4));
5764 /* How big the CU claims it is .. */
5765 cu_size_including_IniLen
= cc
.unit_length
+ (cc
.is_dw64
? 12 : 4);
5766 /* .. vs how big we have found it to be */
5767 cu_amount_used
= cu_offset_now
- cc
.cu_start_offset
;
5769 if (1) TRACE_D3("offset now %llu, d-i-size %llu\n",
5770 cu_offset_now
, section_size
);
5771 if (cu_offset_now
> section_size
)
5772 barf("toplevel DIEs beyond end of CU");
5774 /* If the CU is bigger than it claims to be, we've got a serious
5776 if (cu_amount_used
> cu_size_including_IniLen
)
5777 barf("CU's actual size appears to be larger than it claims it is");
5779 /* If the CU is smaller than it claims to be, we need to skip some
5780 bytes. Loop updates cu_offset_new and cu_amount_used. */
5781 while (cu_amount_used
< cu_size_including_IniLen
5782 && get_remaining_length_Cursor( &info
) > 0) {
5783 if (0) VG_(printf
)("SKIP\n");
5784 (void)get_UChar( &info
);
5785 cu_offset_now
= get_position_of_Cursor( &info
);
5786 cu_amount_used
= cu_offset_now
- cc
.cu_start_offset
;
5789 if (VG_(clo_read_var_info
)) {
5790 /* Preen to level -2. DIEs have level >= 0 so -2 cannot occur
5791 anywhere else at all. Our fake the-entire-address-space
5792 range is at level -1, so preening to -2 should completely
5793 empty the stack out. */
5795 varstack_preen( &varparser
, td3
, -2 );
5796 /* Similarly, empty the type stack out. */
5797 typestack_preen( &typarser
, td3
, -2 );
5800 last_abbv
= cc
.abbv
;
5802 if (cu_offset_now
== section_size
)
5804 /* else keep going */
5806 if (last_abbv
.ht_abbvs
!= NULL
)
5807 VG_(HT_destruct
) (last_abbv
.ht_abbvs
, ML_(dinfo_free
));
5810 if (fndn_ix_Table
!= NULL
)
5811 VG_(deleteXA
)(fndn_ix_Table
);
5813 if (VG_(clo_read_var_info
)) {
5814 /* From here on we're post-processing the stuff we got
5815 out of the .debug_info section. */
5818 ML_(pp_TyEnts
)(tyents
, "Initial type entity (TyEnt) array");
5820 TRACE_D3("------ Compressing type entries ------\n");
5823 tyents_cache
= ML_(dinfo_zalloc
)( "di.readdwarf3.ndrw.6",
5824 sizeof(TyEntIndexCache
) );
5825 ML_(TyEntIndexCache__invalidate
)( tyents_cache
);
5826 dedup_types( td3
, tyents
, tyents_cache
);
5829 ML_(pp_TyEnts
)(tyents
, "After type entity (TyEnt) compression");
5833 TRACE_D3("------ Resolving the types of variables ------\n" );
5834 resolve_variable_types( barf
, tyents
, tyents_cache
, tempvars
);
5836 /* Copy all the non-INDIR tyents into a new table. For large
5837 .so's, about 90% of the tyents will by now have been resolved to
5838 INDIRs, and we no longer need them, and so don't need to store
5841 = VG_(newXA
)( ML_(dinfo_zalloc
),
5842 "di.readdwarf3.ndrw.7 (TyEnt to-keep array)",
5843 ML_(dinfo_free
), sizeof(TyEnt
) );
5844 n
= VG_(sizeXA
)( tyents
);
5845 for (i
= 0; i
< n
; i
++) {
5846 TyEnt
* ent
= VG_(indexXA
)( tyents
, i
);
5847 if (ent
->tag
!= Te_INDIR
)
5848 VG_(addToXA
)( tyents_to_keep
, ent
);
5851 VG_(deleteXA
)( tyents
);
5853 ML_(dinfo_free
)( tyents_cache
);
5854 tyents_cache
= NULL
;
5856 /* Sort tyents_to_keep so we can lookup in it. A complete (if
5857 minor) waste of time, since tyents itself is sorted, but
5858 necessary since VG_(lookupXA) refuses to cooperate if we
5860 VG_(setCmpFnXA
)( tyents_to_keep
, (XACmpFn_t
) ML_(TyEnt__cmp_by_cuOff_only
) );
5861 VG_(sortXA
)( tyents_to_keep
);
5863 /* Enable cacheing on tyents_to_keep */
5864 tyents_to_keep_cache
5865 = ML_(dinfo_zalloc
)( "di.readdwarf3.ndrw.8",
5866 sizeof(TyEntIndexCache
) );
5867 ML_(TyEntIndexCache__invalidate
)( tyents_to_keep_cache
);
5869 /* And record the tyents in the DebugInfo. We do this before
5870 starting to hand variables to ML_(addVar), since if ML_(addVar)
5871 wants to do debug printing (of the types of said vars) then it
5872 will need the tyents.*/
5873 vg_assert(!di
->admin_tyents
);
5874 di
->admin_tyents
= tyents_to_keep
;
5876 /* Bias all the location expressions. */
5878 TRACE_D3("------ Biasing the location expressions ------\n" );
5880 n
= VG_(sizeXA
)( gexprs
);
5881 for (i
= 0; i
< n
; i
++) {
5882 gexpr
= *(GExpr
**)VG_(indexXA
)( gexprs
, i
);
5883 bias_GX( gexpr
, di
);
5887 TRACE_D3("------ Acquired the following variables: ------\n\n");
5889 /* Park (pointers to) all the vars in an XArray, so we can look up
5890 abstract origins quickly. The array is sorted (hence, looked-up
5891 by) the .dioff fields. Since the .dioffs should be in strictly
5892 ascending order, there is no need to sort the array after
5893 construction. The ascendingness is however asserted for. */
5895 = VG_(newXA
)( ML_(dinfo_zalloc
), "di.readdwarf3.ndrw.9",
5899 n
= VG_(sizeXA
)( tempvars
);
5900 Word first_primary_var
= 0;
5901 for (first_primary_var
= 0;
5902 escn_debug_info_alt
.szB
/*really?*/ && first_primary_var
< n
;
5903 first_primary_var
++) {
5904 varp
= *(TempVar
**)VG_(indexXA
)( tempvars
, first_primary_var
);
5905 if (varp
->dioff
< escn_debug_info
.szB
+ escn_debug_types
.szB
)
5908 for (i
= 0; i
< n
; i
++) {
5909 varp
= *(TempVar
**)VG_(indexXA
)( tempvars
, (i
+ first_primary_var
) % n
);
5910 if (i
> first_primary_var
) {
5911 varp2
= *(TempVar
**)VG_(indexXA
)( tempvars
,
5912 (i
+ first_primary_var
- 1) % n
);
5913 /* why should this hold? Only, I think, because we've
5914 constructed the array by reading .debug_info sequentially,
5915 and so the array .dioff fields should reflect that, and be
5916 strictly ascending. */
5917 vg_assert(varp2
->dioff
< varp
->dioff
);
5919 VG_(addToXA
)( dioff_lookup_tab
, &varp
);
5921 VG_(setCmpFnXA
)( dioff_lookup_tab
, cmp_TempVar_by_dioff
);
5922 VG_(sortXA
)( dioff_lookup_tab
); /* POINTLESS; FIXME: rm */
5924 /* Now visit each var. Collect up as much info as possible for
5925 each var and hand it to ML_(addVar). */
5926 n
= VG_(sizeXA
)( tempvars
);
5927 for (j
= 0; j
< n
; j
++) {
5929 varp
= *(TempVar
**)VG_(indexXA
)( tempvars
, j
);
5931 /* Possibly show .. */
5933 VG_(printf
)("<%lx> addVar: level %d: %s :: ",
5936 varp
->name
? varp
->name
: "<anon_var>" );
5938 ML_(pp_TyEnt_C_ishly
)( tyents_to_keep
, varp
->typeR
);
5940 VG_(printf
)("NULL");
5942 VG_(printf
)("\n Loc=");
5944 ML_(pp_GX
)(varp
->gexpr
);
5946 VG_(printf
)("NULL");
5950 VG_(printf
)(" FrB=");
5951 ML_(pp_GX
)( varp
->fbGX
);
5954 VG_(printf
)(" FrB=none\n");
5956 VG_(printf
)(" declared at: %u %s:%d\n",
5958 ML_(fndn_ix2filename
) (di
, varp
->fndn_ix
),
5960 if (varp
->absOri
!= (UWord
)D3_INVALID_CUOFF
)
5961 VG_(printf
)(" abstract origin: <%lx>\n", varp
->absOri
);
5964 /* Skip variables which have no location. These must be
5965 abstract instances; they are useless as-is since with no
5966 location they have no specified memory location. They will
5967 presumably be referred to via the absOri fields of other
5970 TRACE_D3(" SKIP (no location)\n\n");
5974 /* So it has a location, at least. If it refers to some other
5975 entry through its absOri field, pull in further info through
5977 if (varp
->absOri
!= (UWord
)D3_INVALID_CUOFF
) {
5979 Word ixFirst
, ixLast
;
5981 TempVar
* keyp
= &key
;
5983 VG_(memset
)(&key
, 0, sizeof(key
)); /* not necessary */
5984 key
.dioff
= varp
->absOri
; /* this is what we want to find */
5985 found
= VG_(lookupXA
)( dioff_lookup_tab
, &keyp
,
5986 &ixFirst
, &ixLast
);
5988 /* barf("DW_AT_abstract_origin can't be resolved"); */
5989 TRACE_D3(" SKIP (DW_AT_abstract_origin can't be resolved)\n\n");
5992 /* If the following fails, there is more than one entry with
5993 the same dioff. Which can't happen. */
5994 vg_assert(ixFirst
== ixLast
);
5995 varAI
= *(TempVar
**)VG_(indexXA
)( dioff_lookup_tab
, ixFirst
);
5998 vg_assert(varAI
->dioff
== varp
->absOri
);
6000 /* Copy what useful info we can. */
6001 if (varAI
->typeR
&& !varp
->typeR
)
6002 varp
->typeR
= varAI
->typeR
;
6003 if (varAI
->name
&& !varp
->name
)
6004 varp
->name
= varAI
->name
;
6005 if (varAI
->fndn_ix
&& !varp
->fndn_ix
)
6006 varp
->fndn_ix
= varAI
->fndn_ix
;
6007 if (varAI
->fLine
> 0 && varp
->fLine
== 0)
6008 varp
->fLine
= varAI
->fLine
;
6011 /* Give it a name if it doesn't have one. */
6013 varp
->name
= ML_(addStr
)( di
, "<anon_var>", -1 );
6015 /* So now does it have enough info to be useful? */
6016 /* NOTE: re typeR: this is a hack. If typeR is Te_UNKNOWN then
6017 the type didn't get resolved. Really, in that case
6018 something's broken earlier on, and should be fixed, rather
6019 than just skipping the variable. */
6020 ent
= ML_(TyEnts__index_by_cuOff
)( tyents_to_keep
,
6021 tyents_to_keep_cache
,
6023 /* The next two assertions should be guaranteed by
6024 our previous call to resolve_variable_types. */
6026 vg_assert(ML_(TyEnt__is_type
)(ent
) || ent
->tag
== Te_UNKNOWN
);
6028 if (ent
->tag
== Te_UNKNOWN
) continue;
6030 vg_assert(varp
->gexpr
);
6031 vg_assert(varp
->name
);
6032 vg_assert(varp
->typeR
);
6033 vg_assert(varp
->level
>= 0);
6035 /* Ok. So we're going to keep it. Call ML_(addVar) once for
6036 each address range in which the variable exists. */
6037 TRACE_D3(" ACQUIRE for range(s) ");
6038 { AddrRange oneRange
;
6039 AddrRange
* varPcRanges
;
6041 /* Set up to iterate over address ranges, however
6043 if (varp
->nRanges
== 0 || varp
->nRanges
== 1) {
6044 vg_assert(!varp
->rngMany
);
6045 if (varp
->nRanges
== 0) {
6046 vg_assert(varp
->rngOneMin
== 0);
6047 vg_assert(varp
->rngOneMax
== 0);
6049 nVarPcRanges
= varp
->nRanges
;
6050 oneRange
.aMin
= varp
->rngOneMin
;
6051 oneRange
.aMax
= varp
->rngOneMax
;
6052 varPcRanges
= &oneRange
;
6054 vg_assert(varp
->rngMany
);
6055 vg_assert(varp
->rngOneMin
== 0);
6056 vg_assert(varp
->rngOneMax
== 0);
6057 nVarPcRanges
= VG_(sizeXA
)(varp
->rngMany
);
6058 vg_assert(nVarPcRanges
>= 2);
6059 vg_assert(nVarPcRanges
== (Word
)varp
->nRanges
);
6060 varPcRanges
= VG_(indexXA
)(varp
->rngMany
, 0);
6062 if (varp
->level
== 0)
6063 vg_assert( nVarPcRanges
== 1 );
6065 for (i
= 0; i
< nVarPcRanges
; i
++) {
6066 Addr pcMin
= varPcRanges
[i
].aMin
;
6067 Addr pcMax
= varPcRanges
[i
].aMax
;
6068 vg_assert(pcMin
<= pcMax
);
6069 /* Level 0 is the global address range. So at level 0 we
6070 don't want to bias pcMin/pcMax; but at all other levels
6071 we do since those are derived from svmas in the Dwarf
6072 we're reading. Be paranoid ... */
6073 if (varp
->level
== 0) {
6074 vg_assert(pcMin
== (Addr
)0);
6075 vg_assert(pcMax
== ~(Addr
)0);
6077 /* vg_assert(pcMin > (Addr)0);
6078 No .. we can legitimately expect to see ranges like
6079 0x0-0x11D (pre-biasing, of course). */
6080 vg_assert(pcMax
< ~(Addr
)0);
6083 /* Apply text biasing, for non-global variables. */
6084 if (varp
->level
> 0) {
6085 pcMin
+= di
->text_debug_bias
;
6086 pcMax
+= di
->text_debug_bias
;
6089 if (i
> 0 && (i
%2) == 0)
6091 TRACE_D3("[%#lx,%#lx] ", pcMin
, pcMax
);
6096 varp
->name
, varp
->typeR
,
6097 varp
->gexpr
, varp
->fbGX
,
6098 varp
->fndn_ix
, varp
->fLine
, td3
6104 /* and move on to the next var */
6107 /* Now free all the TempVars */
6108 n
= VG_(sizeXA
)( tempvars
);
6109 for (i
= 0; i
< n
; i
++) {
6110 varp
= *(TempVar
**)VG_(indexXA
)( tempvars
, i
);
6111 ML_(dinfo_free
)(varp
);
6113 VG_(deleteXA
)( tempvars
);
6116 /* and the temp lookup table */
6117 VG_(deleteXA
)( dioff_lookup_tab
);
6119 /* and the ranges tree. Note that we need to also free the XArrays
6120 which constitute the keys, hence pass VG_(deleteXA) as a
6122 VG_(deleteFM
)( rangestree
, (void(*)(UWord
))VG_(deleteXA
), NULL
);
6124 /* and the tyents_to_keep cache */
6125 ML_(dinfo_free
)( tyents_to_keep_cache
);
6126 tyents_to_keep_cache
= NULL
;
6128 /* And the signatured type hash. */
6129 VG_(HT_destruct
) ( signature_types
, ML_(dinfo_free
) );
6131 /* record the GExprs in di so they can be freed later */
6132 vg_assert(!di
->admin_gexprs
);
6133 di
->admin_gexprs
= gexprs
;
6136 // Free up dynamically allocated memory
6137 if (VG_(clo_read_var_info
)) {
6138 type_parser_release(&typarser
);
6139 var_parser_release(&varparser
);
6144 /*------------------------------------------------------------*/
6146 /*--- The "new" DWARF3 reader -- top level control logic ---*/
6148 /*------------------------------------------------------------*/
6150 static Bool d3rd_jmpbuf_valid
= False
;
6151 static const HChar
* d3rd_jmpbuf_reason
= NULL
;
6152 static VG_MINIMAL_JMP_BUF(d3rd_jmpbuf
);
6154 static __attribute__((noreturn
)) void barf ( const HChar
* reason
) {
6155 vg_assert(d3rd_jmpbuf_valid
);
6156 d3rd_jmpbuf_reason
= reason
;
6157 VG_MINIMAL_LONGJMP(d3rd_jmpbuf
);
6164 ML_(new_dwarf3_reader
) (
6166 DiSlice escn_debug_info
, DiSlice escn_debug_types
,
6167 DiSlice escn_debug_abbv
, DiSlice escn_debug_line
,
6168 DiSlice escn_debug_str
, DiSlice escn_debug_ranges
,
6169 DiSlice escn_debug_rnglists
, DiSlice escn_debug_loclists
,
6170 DiSlice escn_debug_loc
, DiSlice escn_debug_info_alt
,
6171 DiSlice escn_debug_abbv_alt
, DiSlice escn_debug_line_alt
,
6172 DiSlice escn_debug_str_alt
, DiSlice escn_debug_line_str
,
6173 DiSlice escn_debug_addr
, DiSlice escn_debug_str_offsets
6176 volatile Int jumped
;
6177 volatile Bool td3
= di
->trace_symtab
;
6179 /* Run the _wrk function to read the dwarf3. If it succeeds, it
6180 just returns normally. If there is any failure, it longjmp's
6181 back here, having first set d3rd_jmpbuf_reason to something
6183 vg_assert(d3rd_jmpbuf_valid
== False
);
6184 vg_assert(d3rd_jmpbuf_reason
== NULL
);
6186 d3rd_jmpbuf_valid
= True
;
6187 jumped
= VG_MINIMAL_SETJMP(d3rd_jmpbuf
);
6190 new_dwarf3_reader_wrk( di
, barf
,
6191 escn_debug_info
, escn_debug_types
,
6192 escn_debug_abbv
, escn_debug_line
,
6193 escn_debug_str
, escn_debug_ranges
,
6194 escn_debug_rnglists
, escn_debug_loclists
,
6195 escn_debug_loc
, escn_debug_info_alt
,
6196 escn_debug_abbv_alt
, escn_debug_line_alt
,
6197 escn_debug_str_alt
, escn_debug_line_str
,
6198 escn_debug_addr
, escn_debug_str_offsets
);
6199 d3rd_jmpbuf_valid
= False
;
6200 TRACE_D3("\n------ .debug_info reading was successful ------\n");
6203 d3rd_jmpbuf_valid
= False
;
6204 /* Can't longjump without giving some sort of reason. */
6205 vg_assert(d3rd_jmpbuf_reason
!= NULL
);
6207 TRACE_D3("\n------ .debug_info reading failed ------\n");
6209 ML_(symerr
)(di
, True
, d3rd_jmpbuf_reason
);
6212 d3rd_jmpbuf_valid
= False
;
6213 d3rd_jmpbuf_reason
= NULL
;
6218 /* --- Unused code fragments which might be useful one day. --- */
6221 /* Read the arange tables */
6223 TRACE_SYMTAB("\n------ The contents of .debug_arange ------\n");
6224 init_Cursor( &aranges
, debug_aranges_img
,
6225 debug_aranges_sz
, 0, barf
,
6226 "Overrun whilst reading .debug_aranges section" );
6228 ULong len
, d_i_offset
;
6231 UChar asize
, segsize
;
6233 if (is_at_end_Cursor( &aranges
))
6235 /* Read one arange thingy */
6236 /* initial_length field */
6237 len
= get_Initial_Length( &is64
, &aranges
,
6238 "in .debug_aranges: invalid initial-length field" );
6239 version
= get_UShort( &aranges
);
6240 d_i_offset
= get_Dwarfish_UWord( &aranges
, is64
);
6241 asize
= get_UChar( &aranges
);
6242 segsize
= get_UChar( &aranges
);
6243 TRACE_D3(" Length: %llu\n", len
);
6244 TRACE_D3(" Version: %d\n", (Int
)version
);
6245 TRACE_D3(" Offset into .debug_info: %llx\n", d_i_offset
);
6246 TRACE_D3(" Pointer Size: %d\n", (Int
)asize
);
6247 TRACE_D3(" Segment Size: %d\n", (Int
)segsize
);
6249 TRACE_D3(" Address Length\n");
6251 while ((get_position_of_Cursor( &aranges
) % (2 * asize
)) > 0) {
6252 (void)get_UChar( & aranges
);
6255 ULong address
= get_Dwarfish_UWord( &aranges
, asize
==8 );
6256 ULong length
= get_Dwarfish_UWord( &aranges
, asize
==8 );
6257 TRACE_D3(" 0x%016llx 0x%llx\n", address
, length
);
6258 if (address
== 0 && length
== 0) break;
6264 #endif // defined(VGO_linux) || defined(VGO_darwin) || defined(VGO_solaris) || defined(VGO_freebsd)
6266 /*--------------------------------------------------------------------*/
6268 /*--------------------------------------------------------------------*/