1 /* -*- mode: C; c-basic-offset: 3; -*- */
3 /*--------------------------------------------------------------------*/
4 /*--- Read DWARF3/4 ".debug_info" sections (DIE trees). ---*/
5 /*--- readdwarf3.c ---*/
6 /*--------------------------------------------------------------------*/
9 This file is part of Valgrind, a dynamic binary instrumentation
12 Copyright (C) 2008-2017 OpenWorks LLP
15 This program is free software; you can redistribute it and/or
16 modify it under the terms of the GNU General Public License as
17 published by the Free Software Foundation; either version 2 of the
18 License, or (at your option) any later version.
20 This program is distributed in the hope that it will be useful, but
21 WITHOUT ANY WARRANTY; without even the implied warranty of
22 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
23 General Public License for more details.
25 You should have received a copy of the GNU General Public License
26 along with this program; if not, see <http://www.gnu.org/licenses/>.
28 The GNU General Public License is contained in the file COPYING.
30 Neither the names of the U.S. Department of Energy nor the
31 University of California nor the names of its contributors may be
32 used to endorse or promote products derived from this software
33 without prior written permission.
36 #if defined(VGO_linux) || defined(VGO_darwin) || defined(VGO_solaris) || defined(VGO_freebsd)
38 /* REFERENCE (without which this code will not make much sense):
40 DWARF Debugging Information Format, Version 3,
41 dated 20 December 2005 (the "D3 spec").
43 Available at http://www.dwarfstd.org/Dwarf3.pdf. There's also a
44 .doc (MS Word) version, but for some reason the section numbers
45 between the Word and PDF versions differ by 1 in the first digit.
46 All section references in this code are to the PDF version.
50 DW_TAG_{const,volatile}_type no DW_AT_type is allowed; it is
51 assumed to mean "const void" or "volatile void" respectively.
52 GDB appears to interpret them like this, anyway.
54 In many cases it is important to know the svma of a CU (the "base
55 address of the CU", as the D3 spec calls it). There are some
56 situations in which the spec implies this value is unknown, but the
57 Dwarf3 produced by gcc-4.1 seems to assume is not unknown but
58 merely zero when not explicitly stated. So we too have to make
61 POTENTIAL BUG? Spotted 6 Sept 08. Why doesn't
62 unitary_range_list() bias the resulting range list in the same way
63 that its more general cousin, get_range_list(), does? I don't
68 get rid of cu_svma_known and document the assumed-zero svma hack.
70 ML_(sizeOfType): differentiate between zero sized types and types
71 for which the size is unknown. Is this important? I don't know.
73 DW_TAG_array_types: deal with explicit sizes (currently we compute
74 the size from the bounds and the element size, although that's
75 fragile, if the bounds incompletely specified, or completely
78 Document reason for difference (by 1) of stack preening depth in
79 parse_var_DIE vs parse_type_DIE.
81 Don't hand to ML_(addVars), vars whose locations are entirely in
82 registers (DW_OP_reg*). This is merely a space-saving
83 optimisation, as ML_(evaluate_Dwarf3_Expr) should handle these
84 expressions correctly, by failing to evaluate them and hence
85 effectively ignoring the variable with which they are associated.
87 Deal with DW_TAG_array_types which have element size != stride
89 In some cases, the info for a variable is split between two
90 different DIEs (generally a declarer and a definer). We punt on
91 these. Could do better here.
93 The 'data_bias' argument passed to the expression evaluator
94 (ML_(evaluate_Dwarf3_Expr)) should really be changed to a
95 MaybeUWord, to make it clear when we do vs don't know what it is
96 for the evaluation of an expression. At the moment zero is passed
97 for this parameter in the don't know case. That's a bit fragile
98 and obscure; using a MaybeUWord would be clearer.
100 POTENTIAL PERFORMANCE IMPROVEMENTS:
102 Currently, duplicate removal and all other queries for the type
103 entities array is done using cuOffset-based pointing, which
104 involves a binary search (VG_(lookupXA)) for each access. This is
105 wildly inefficient, although simple. It would be better to
106 translate all the cuOffset-based references (iow, all the "R" and
107 "Rs" fields in the TyEnts in 'tyents') to direct index numbers in
108 'tyents' right at the start of dedup_types(), and use direct
109 indexing (VG_(indexXA)) wherever possible after that.
111 cmp__XArrays_of_AddrRange is also a performance bottleneck. Move
112 VG_(indexXA) into pub_tool_xarray.h so it can be inlined at all use
113 points, and possibly also make an _UNCHECKED version which skips
114 the range checks in performance-critical situations such as this.
116 Handle interaction between read_DIE and parse_{var,type}_DIE
117 better. Currently read_DIE reads the entire DIE just to find where
118 the end is (and for debug printing), so that it can later reliably
119 move the cursor to the end regardless of what parse_{var,type}_DIE
120 do. This means many DIEs (most, even?) are read twice. It would
121 be smarter to make parse_{var,type}_DIE return a Bool indicating
122 whether or not they advanced the DIE cursor, and only if they
123 didn't should read_DIE itself read through the DIE.
125 ML_(addVar) and add_var_to_arange: quite a lot of DiAddrRanges have
126 zero variables in their .vars XArray. Rather than have an XArray
127 with zero elements (which uses 2 malloc'd blocks), allow the .vars
128 pointer to be NULL in this case.
130 More generally, reduce the amount of memory allocated and freed
131 while reading Dwarf3 type/variable information. Even modest (20MB)
132 objects cause this module to allocate and free hundreds of
133 thousands of small blocks, and ML_(arena_malloc) and its various
134 groupies always show up at the top of performance profiles. */
136 #include "pub_core_basics.h"
137 #include "pub_core_debuginfo.h"
138 #include "pub_core_libcbase.h"
139 #include "pub_core_libcassert.h"
140 #include "pub_core_libcprint.h"
141 #include "pub_core_libcsetjmp.h" // setjmp facilities
142 #include "pub_core_hashtable.h"
143 #include "pub_core_options.h"
144 #include "pub_core_tooliface.h" /* VG_(needs) */
145 #include "pub_core_xarray.h"
146 #include "pub_core_wordfm.h"
147 #include "priv_misc.h" /* dinfo_zalloc/free */
148 #include "priv_image.h"
149 #include "priv_tytypes.h"
150 #include "priv_d3basics.h"
151 #include "priv_storage.h"
152 #include "priv_readdwarf3.h" /* self */
155 /*------------------------------------------------------------*/
157 /*--- Basic machinery for parsing DIEs. ---*/
159 /*------------------------------------------------------------*/
161 #define TRACE_D3(format, args...) \
162 if (UNLIKELY(td3)) { VG_(printf)(format, ## args); }
163 #define TD3 (UNLIKELY(td3))
165 #define D3_INVALID_CUOFF ((UWord)(-1UL))
166 #define D3_FAKEVOID_CUOFF ((UWord)(-2UL))
170 DiSlice sli
; // to which this cursor applies
171 DiOffT sli_next
; // offset in underlying DiImage; must be >= sli.ioff
172 void (*barf
)( const HChar
* ) __attribute__((noreturn
));
173 const HChar
* barfstr
;
177 static inline Bool
is_sane_Cursor ( const Cursor
* c
) {
178 if (!c
) return False
;
179 if (!c
->barf
) return False
;
180 if (!c
->barfstr
) return False
;
181 if (!ML_(sli_is_valid
)(c
->sli
)) return False
;
182 if (c
->sli
.ioff
== DiOffT_INVALID
) return False
;
183 if (c
->sli_next
< c
->sli
.ioff
) return False
;
187 // Initialise a cursor from a DiSlice (ELF section, really) so as to
188 // start reading at offset |sli_initial_offset| from the start of the
190 static void init_Cursor ( /*OUT*/Cursor
* c
,
192 ULong sli_initial_offset
,
193 __attribute__((noreturn
)) void (*barf
)(const HChar
*),
194 const HChar
* barfstr
)
197 VG_(bzero_inline
)(c
, sizeof(*c
));
199 c
->sli_next
= c
->sli
.ioff
+ sli_initial_offset
;
201 c
->barfstr
= barfstr
;
202 vg_assert(is_sane_Cursor(c
));
205 static Bool
is_at_end_Cursor ( const Cursor
* c
) {
206 vg_assert(is_sane_Cursor(c
));
207 return c
->sli_next
>= c
->sli
.ioff
+ c
->sli
.szB
;
210 static inline ULong
get_position_of_Cursor ( const Cursor
* c
) {
211 vg_assert(is_sane_Cursor(c
));
212 return c
->sli_next
- c
->sli
.ioff
;
214 static inline void set_position_of_Cursor ( Cursor
* c
, ULong pos
) {
215 c
->sli_next
= c
->sli
.ioff
+ pos
;
216 vg_assert(is_sane_Cursor(c
));
218 static inline void advance_position_of_Cursor ( Cursor
* c
, ULong delta
) {
219 c
->sli_next
+= delta
;
220 vg_assert(is_sane_Cursor(c
));
223 static /*signed*/Long
get_remaining_length_Cursor ( const Cursor
* c
) {
224 vg_assert(is_sane_Cursor(c
));
225 return c
->sli
.ioff
+ c
->sli
.szB
- c
->sli_next
;
228 //static void* get_address_of_Cursor ( Cursor* c ) {
229 // vg_assert(is_sane_Cursor(c));
230 // return &c->region_start_img[ c->region_next ];
233 static DiCursor
get_DiCursor_from_Cursor ( const Cursor
* c
) {
234 return mk_DiCursor(c
->sli
.img
, c
->sli_next
);
237 /* FIXME: document assumptions on endianness for
238 get_UShort/UInt/ULong. */
239 static inline UChar
get_UChar ( Cursor
* c
) {
241 vg_assert(is_sane_Cursor(c
));
242 if (c
->sli_next
+ sizeof(UChar
) > c
->sli
.ioff
+ c
->sli
.szB
) {
247 r
= ML_(img_get_UChar
)(c
->sli
.img
, c
->sli_next
);
248 c
->sli_next
+= sizeof(UChar
);
251 static UShort
get_UShort ( Cursor
* c
) {
253 vg_assert(is_sane_Cursor(c
));
254 if (c
->sli_next
+ sizeof(UShort
) > c
->sli
.ioff
+ c
->sli
.szB
) {
259 r
= ML_(img_get_UShort
)(c
->sli
.img
, c
->sli_next
);
260 c
->sli_next
+= sizeof(UShort
);
263 static UInt
get_UInt ( Cursor
* c
) {
265 vg_assert(is_sane_Cursor(c
));
266 if (c
->sli_next
+ sizeof(UInt
) > c
->sli
.ioff
+ c
->sli
.szB
) {
271 r
= ML_(img_get_UInt
)(c
->sli
.img
, c
->sli_next
);
272 c
->sli_next
+= sizeof(UInt
);
275 static ULong
get_ULong ( Cursor
* c
) {
277 vg_assert(is_sane_Cursor(c
));
278 if (c
->sli_next
+ sizeof(ULong
) > c
->sli
.ioff
+ c
->sli
.szB
) {
283 r
= ML_(img_get_ULong
)(c
->sli
.img
, c
->sli_next
);
284 c
->sli_next
+= sizeof(ULong
);
287 static ULong
get_ULEB128 ( Cursor
* c
) {
291 /* unroll first iteration */
292 byte
= get_UChar( c
);
293 result
= (ULong
)(byte
& 0x7f);
294 if (LIKELY(!(byte
& 0x80))) return result
;
296 /* end unroll first iteration */
298 byte
= get_UChar( c
);
299 result
|= ((ULong
)(byte
& 0x7f)) << shift
;
301 } while (byte
& 0x80);
304 static Long
get_SLEB128 ( Cursor
* c
) {
310 result
|= ((ULong
)(byte
& 0x7f)) << shift
;
312 } while (byte
& 0x80);
313 if (shift
< 64 && (byte
& 0x40))
314 result
|= -(1ULL << shift
);
317 static UInt
get_UInt3 ( Cursor
* c
) {
319 vg_assert(is_sane_Cursor(c
));
320 if (c
->sli_next
+ 3 > c
->sli
.ioff
+ c
->sli
.szB
) {
325 c1
= ML_(img_get_UChar
)(c
->sli
.img
, c
->sli_next
);
326 c2
= ML_(img_get_UChar
)(c
->sli
.img
, c
->sli_next
+1);
327 c3
= ML_(img_get_UChar
)(c
->sli
.img
, c
->sli_next
+2);
329 #if defined(VG_BIGENDIAN)
330 return c1
<< 16 | c2
<< 8 | c3
;
332 return c1
| c2
<< 8 | c3
<< 16;
337 /* Assume 'c' points to the start of a string. Return a DiCursor of
338 whatever it points at, and advance it past the terminating zero.
339 This makes it safe for the caller to then copy the string with
340 ML_(addStr), since (w.r.t. image overruns) the process of advancing
341 past the terminating zero will already have "vetted" the string. */
342 static DiCursor
get_AsciiZ ( Cursor
* c
) {
344 DiCursor res
= get_DiCursor_from_Cursor(c
);
345 do { uc
= get_UChar(c
); } while (uc
!= 0);
349 static ULong
peek_ULEB128 ( Cursor
* c
) {
350 DiOffT here
= c
->sli_next
;
351 ULong r
= get_ULEB128( c
);
355 static UChar
peek_UChar ( Cursor
* c
) {
356 DiOffT here
= c
->sli_next
;
357 UChar r
= get_UChar( c
);
362 static ULong
get_Dwarfish_UWord ( Cursor
* c
, Bool is_dw64
) {
363 return is_dw64
? get_ULong(c
) : (ULong
) get_UInt(c
);
366 static UWord
get_UWord ( Cursor
* c
) {
367 vg_assert(sizeof(UWord
) == sizeof(void*));
368 if (sizeof(UWord
) == 4) return get_UInt(c
);
369 if (sizeof(UWord
) == 8) return get_ULong(c
);
373 /* Read a DWARF3 'Initial Length' field */
374 static ULong
get_Initial_Length ( /*OUT*/Bool
* is64
,
376 const HChar
* barfMsg
)
382 if (w32
>= 0xFFFFFFF0 && w32
< 0xFFFFFFFF) {
385 else if (w32
== 0xFFFFFFFF) {
387 w64
= get_ULong( c
);
396 /*------------------------------------------------------------*/
398 /*--- "CUConst" structure ---*/
400 /*------------------------------------------------------------*/
404 ULong at_name
; // Dwarf Attribute name
405 ULong at_form
; // Dwarf Attribute form
406 Long at_val
; // Dwarf Attribute value (for implicit_const)
407 UInt skip_szB
; // Nr of bytes skippable from here ...
408 UInt next_nf
; // ... to reach this attr/form index in the g_abbv.nf
410 /* skip_szB and next_nf are used to optimise the skipping of uninteresting DIEs.
411 Each name_form maintains how many (fixed) nr of bytes can be skipped from
412 the beginning of this form till the next attr/form to look at.
413 The next form to look can be:
414 an 'interesting' attr/form to read while skipping a DIE
415 (currently, this is only DW_AT_sibling)
417 a variable length form which must be read to be skipped.
418 For a variable length form, the skip_szB will be equal to VARSZ_FORM.
420 Note: this technique could also be used to speed up the parsing
421 of DIEs : for each parser kind, we could have the nr of bytes
422 to skip to directly reach the interesting form(s) for the parser. */
426 struct _g_abbv
*next
; // read/write by hash table.
427 UWord abbv_code
; // key, read by hash table
431 /* Variable-length array of name/form pairs, terminated
433 The skip_szB/next_nf allows to skip efficiently a DIE
434 described by this g_abbv; */
437 /* Holds information about the .debug_abbrev section for this CU. The current
438 Cursor into the abbrev section, the known abbrev codes are but into an hash
439 table. The (starting) offset into the abbrev_offset can be used to check
440 whether the abbv can be shared between CUs. The done boolean is set when all
441 known codes have been read. Initialize a new abbv_state with init_ht_abbvs.
442 To read any new abbrev codes not yet in the hash table call find_ht_abbvs
443 (get_abbv will first query the ht_abbvs, then if not done, call
447 Cursor c
; /* Current cursor into .debug_abbrev. */
448 VgHashTable
*ht_abbvs
; /* Hash table mapping codes to abbrevs. */
449 ULong debug_abbrev_offset
; /* Starting offset into .debug_abbrev. */
450 Bool done
; /* Whether there (might) still be new abbrev codes not yet
454 /* Holds information that is constant through the parsing of a
455 Compilation Unit. This is basically plumbed through to
459 /* Call here if anything goes wrong */
460 void (*barf
)( const HChar
* ) __attribute__((noreturn
));
461 /* Is this 64-bit DWARF ? */
463 /* Which DWARF version ? (2, 3, 4 or 5) */
465 /* Length of this Compilation Unit, as stated in the
466 .unit_length :: InitialLength field of the CU Header.
467 However, this size (as specified by the D3 spec) does not
468 include the size of the .unit_length field itself, which is
469 either 4 or 12 bytes (32-bit or 64-bit Dwarf3). That value
470 can be obtained through the expression ".is_dw64 ? 12 : 4". */
472 /* Offset of start of this unit in .debug_info */
473 UWord cu_start_offset
;
474 /* SVMA for this CU. In the D3 spec, is known as the "base
475 address of the compilation unit (last para sec 3.1.1).
476 Needed for (amongst things) interpretation of location-list
481 /* The debug_abbreviations table to be used for this Unit */
483 /* Upper bound on size thereof (an overestimate, in general) */
484 //UWord debug_abbv_maxszB;
485 /* A bounded area of the image, to be used as the
486 debug_abbreviations table tobe used for this Unit. */
489 /* Image information for various sections. */
490 DiSlice escn_debug_str
;
491 DiSlice escn_debug_ranges
;
492 DiSlice escn_debug_rnglists
;
493 DiSlice escn_debug_loclists
;
494 DiSlice escn_debug_loc
;
495 DiSlice escn_debug_line
;
496 DiSlice escn_debug_info
;
497 DiSlice escn_debug_types
;
498 DiSlice escn_debug_info_alt
;
499 DiSlice escn_debug_str_alt
;
500 DiSlice escn_debug_line_str
;
501 DiSlice escn_debug_addr
;
502 DiSlice escn_debug_str_offsets
;
503 /* How much to add to .debug_types resp. alternate .debug_info offsets
505 UWord types_cuOff_bias
;
506 UWord alt_cuOff_bias
;
507 /* DW_AT_addr_base */
509 Bool cu_has_addr_base
;
510 /* DW_AT_str_offsets_base */
511 Addr cu_str_offsets_base
;
512 Bool cu_has_str_offsets_base
;
513 /* DW_AT_rnglists_base */
514 Addr cu_rnglists_base
;
515 Bool cu_has_rnglists_base
;
516 /* DW_AT_loclists_base */
517 Addr cu_loclists_base
;
518 Bool cu_has_loclists_base
;
519 /* --- Needed so we can add stuff to the string table. --- */
520 struct _DebugInfo
* di
;
521 /* --- State of the hash table of g_abbv (i.e. parsed abbreviations)
522 technically makes this struct not const. --- */
525 /* True if this came from .debug_types; otherwise it came from
528 /* For a unit coming from .debug_types, these hold the TU's type
529 signature and the uncooked DIE offset of the TU's signatured
530 type. For a unit coming from .debug_info, these are unused. */
531 ULong type_signature
;
534 /* Signatured type hash; computed once and then shared by all
536 VgHashTable
*signature_types
;
538 /* True if this came from alternate .debug_info; otherwise
539 it came from normal .debug_info or .debug_types. */
545 /* Return the cooked value of DIE depending on whether CC represents a
546 .debug_types unit. To cook a DIE, we pretend that the .debug_info,
547 .debug_types and optional alternate .debug_info sections form
548 a contiguous whole, so that DIEs coming from .debug_types are numbered
549 starting at the end of .debug_info and DIEs coming from alternate
550 .debug_info are numbered starting at the end of .debug_types. */
551 static UWord
cook_die( const CUConst
* cc
, UWord die
)
553 if (cc
->is_type_unit
)
554 die
+= cc
->types_cuOff_bias
;
555 else if (cc
->is_alt_info
)
556 die
+= cc
->alt_cuOff_bias
;
560 /* Like cook_die, but understand that DIEs coming from a
561 DW_FORM_ref_sig8 reference are already cooked. Also, handle
562 DW_FORM_GNU_ref_alt from within primary .debug_info or .debug_types
563 as reference to alternate .debug_info. */
564 static UWord
cook_die_using_form( const CUConst
*cc
, UWord die
, DW_FORM form
)
566 if (form
== DW_FORM_ref_sig8
)
568 if (form
== DW_FORM_GNU_ref_alt
)
569 return die
+ cc
->alt_cuOff_bias
;
570 return cook_die( cc
, die
);
573 /* Return the uncooked offset of DIE and set *TYPE_FLAG to true if the DIE
574 came from the .debug_types section and *ALT_FLAG to true if the DIE
575 came from alternate .debug_info section. */
576 static UWord
uncook_die( const CUConst
*cc
, UWord die
, /*OUT*/Bool
*type_flag
,
581 /* The use of escn_debug_{info,types}.szB seems safe to me even if
582 escn_debug_{info,types} are DiSlice_INVALID (meaning the
583 sections were not found), because DiSlice_INVALID.szB is always
584 zero. That said, it seems unlikely we'd ever get here if
585 .debug_info or .debug_types were missing. */
586 if (die
>= cc
->escn_debug_info
.szB
) {
587 if (die
>= cc
->escn_debug_info
.szB
+ cc
->escn_debug_types
.szB
) {
589 die
-= cc
->escn_debug_info
.szB
+ cc
->escn_debug_types
.szB
;
592 die
-= cc
->escn_debug_info
.szB
;
598 /* Return an entry from .debug_addr with the given index.
599 Call one of the variants below that do error-checking. */
600 static ULong
get_debug_addr_entry_common( ULong index
, const CUConst
* cc
)
602 vg_assert(cc
->cu_has_addr_base
);
603 /* We make the same word-size assumption as DW_FORM_addr. */
604 UWord addr_pos
= cc
->cu_addr_base
+ index
* sizeof(UWord
);
606 init_Cursor( &cur
, cc
->escn_debug_addr
, addr_pos
, cc
->barf
,
607 "get_debug_addr_entry_common: index points outside .debug_addr" );
608 return (ULong
)(UWord
)get_UWord(&cur
);
611 static ULong
get_debug_addr_entry_form( ULong index
, const CUConst
* cc
,
614 if(!cc
->cu_has_addr_base
) {
616 "get_debug_addr_entry_form: %u (%s) without DW_AT_addr_base\n",
617 form
, ML_(pp_DW_FORM
)(form
));
618 cc
->barf("get_debug_addr_entry_form: DW_AT_addr_base not set");
620 return get_debug_addr_entry_common( index
, cc
);
623 static ULong
get_debug_addr_entry_lle( ULong index
, const CUConst
* cc
,
626 if(!cc
->cu_has_addr_base
) {
628 "get_debug_addr_entry_lle: %u (%s) without DW_AT_addr_base\n",
629 entry
, ML_(pp_DW_LLE
)(entry
));
630 cc
->barf("get_debug_addr_entry_lle: DW_AT_addr_base not set");
632 return get_debug_addr_entry_common( index
, cc
);
635 static ULong
get_debug_addr_entry_rle( ULong index
, const CUConst
* cc
,
638 if(!cc
->cu_has_addr_base
) {
640 "get_debug_addr_entry_rle: %u (%s) without DW_AT_addr_base\n",
641 entry
, ML_(pp_DW_RLE
)(entry
));
642 cc
->barf("get_debug_addr_entry_rle: DW_AT_addr_base not set");
644 return get_debug_addr_entry_common( index
, cc
);
647 /*------------------------------------------------------------*/
649 /*--- Helper functions for Guarded Expressions ---*/
651 /*------------------------------------------------------------*/
653 /* Parse the location list starting at img-offset 'debug_loc_offset'
654 in .debug_loc. Results are biased with 'svma_of_referencing_CU'
655 and so I believe are correct SVMAs for the object as a whole. This
656 function allocates the UChar*, and the caller must deallocate it.
657 The resulting block is in so-called Guarded-Expression format.
659 Guarded-Expression format is similar but not identical to the DWARF3
660 location-list format. The format of each returned block is:
664 followed by zero or more of
666 (Addr aMin; Addr aMax; UShort nbytes; ..bytes..; UChar isEnd)
668 '..bytes..' is an standard DWARF3 location expression which is
669 valid when aMin <= pc <= aMax (possibly after suitable biasing).
671 The number of bytes in '..bytes..' is nbytes.
673 The end of the sequence is marked by an isEnd == 1 value. All
674 previous isEnd values must be zero.
676 biasMe is 1 if the aMin/aMax fields need this DebugInfo's
677 text_bias added before use, and 0 if the GX is this is not
678 necessary (is ready to go).
680 Hence the block can be quickly parsed and is self-describing. Note
681 that aMax is 1 less than the corresponding value in a DWARF3
682 location list. Zero length ranges, with aMax == aMin-1, are not
685 /* 2008-sept-12: moved ML_(pp_GX) from here to d3basics.c, where
686 it more logically belongs. */
689 /* Apply a text bias to a GX. */
690 static void bias_GX ( /*MOD*/GExpr
* gx
, const DebugInfo
* di
)
693 UChar
* p
= &gx
->payload
[0];
696 uc
= *p
++; /*biasMe*/
700 p
[-1] = 0; /* mark it as done */
708 ML_(write_Addr
)(pA
, ML_(read_Addr
)(pA
) + di
->text_debug_bias
);
712 ML_(write_Addr
)(pA
, ML_(read_Addr
)(pA
) + di
->text_debug_bias
);
714 /* nbytes, and actual expression */
715 nbytes
= ML_(read_UShort
)(p
); p
+= sizeof(UShort
);
720 __attribute__((noinline
))
721 static GExpr
* make_singleton_GX ( DiCursor block
, ULong nbytes
)
727 vg_assert(sizeof(UWord
) == sizeof(Addr
));
728 vg_assert(nbytes
<= 0xFFFF); /* else we overflow the nbytes field */
730 = sizeof(UChar
) /*biasMe*/ + sizeof(UChar
) /*!isEnd*/
731 + sizeof(UWord
) /*aMin*/ + sizeof(UWord
) /*aMax*/
732 + sizeof(UShort
) /*nbytes*/ + (SizeT
)nbytes
733 + sizeof(UChar
); /*isEnd*/
735 gx
= ML_(dinfo_zalloc
)( "di.readdwarf3.msGX.1",
736 sizeof(GExpr
) + bytesReqd
);
738 p
= pstart
= &gx
->payload
[0];
740 p
= ML_(write_UChar
)(p
, 0); /*biasMe*/
741 p
= ML_(write_UChar
)(p
, 0); /*!isEnd*/
742 p
= ML_(write_Addr
)(p
, 0); /*aMin*/
743 p
= ML_(write_Addr
)(p
, ~0); /*aMax*/
744 p
= ML_(write_UShort
)(p
, nbytes
); /*nbytes*/
745 ML_(cur_read_get
)(p
, block
, nbytes
); p
+= nbytes
;
746 p
= ML_(write_UChar
)(p
, 1); /*isEnd*/
748 vg_assert( (SizeT
)(p
- pstart
) == bytesReqd
);
749 vg_assert( &gx
->payload
[bytesReqd
]
750 == ((UChar
*)gx
) + sizeof(GExpr
) + bytesReqd
);
755 __attribute__((noinline
))
756 static GExpr
* make_general_GX ( const CUConst
* cc
,
759 Addr svma_of_referencing_CU
)
764 XArray
* xa
; /* XArray of UChar */
767 Bool addBase
= cc
->version
< 5;
769 vg_assert(sizeof(UWord
) == sizeof(Addr
));
770 if (cc
->version
< 5 && (!ML_(sli_is_valid
)(cc
->escn_debug_loc
)
771 || cc
->escn_debug_loc
.szB
== 0))
772 cc
->barf("make_general_GX: .debug_loc is empty/missing");
773 if (cc
->version
>= 5 && (!ML_(sli_is_valid
)(cc
->escn_debug_loclists
)
774 || cc
->escn_debug_loclists
.szB
== 0))
775 cc
->barf("make_general_GX: .debug_loclists is empty/missing");
778 init_Cursor( &loc
, cc
->escn_debug_loc
, 0, cc
->barf
,
779 "Overrun whilst reading .debug_loc section(2)" );
781 init_Cursor( &loc
, cc
->escn_debug_loclists
, 0, cc
->barf
,
782 "Overrun whilst reading .debug_loclists section(2)" );
783 set_position_of_Cursor( &loc
, offset
);
785 TRACE_D3("make_general_GX (offset = %llu, ioff = %llu) {\n",
786 offset
, get_DiCursor_from_Cursor(&loc
).ioff
);
788 /* Who frees this xa? It is freed before this fn exits. */
789 xa
= VG_(newXA
)( ML_(dinfo_zalloc
), "di.readdwarf3.mgGX.1",
793 { UChar c
= 1; /*biasMe*/ VG_(addBytesToXA
)( xa
, &c
, sizeof(c
) ); }
802 if (cc
->version
< 5) {
803 /* Read a (host-)word pair. This is something of a hack since
804 the word size to read is really dictated by the ELF file;
805 however, we assume we're reading a file with the same
806 word-sizeness as the host. Reasonably enough. */
807 w1
= get_UWord( &loc
);
808 w2
= get_UWord( &loc
);
810 TRACE_D3(" %08lx %08lx\n", w1
, w2
);
811 if (w1
== 0 && w2
== 0) {
813 break; /* end of list */
817 /* new value for 'base' */
821 /* else a location expression follows */
822 len
= (UWord
)get_UShort( &loc
);
827 DW_LLE r
= get_UChar( &loc
);
829 case DW_LLE_end_of_list
:
832 case DW_LLE_base_address
:
833 base
= get_UWord( &loc
);
835 case DW_LLE_start_length
:
836 w1
= get_UWord( &loc
);
837 w2
= w1
+ get_ULEB128( &loc
);
838 len
= get_ULEB128( &loc
);
840 case DW_LLE_offset_pair
:
841 w1
= base
+ get_ULEB128( &loc
);
842 w2
= base
+ get_ULEB128( &loc
);
843 len
= get_ULEB128( &loc
);
845 case DW_LLE_start_end
:
846 w1
= get_UWord ( &loc
);
847 w2
= get_UWord ( &loc
);
848 len
= get_ULEB128( &loc
);
850 case DW_LLE_GNU_view_pair
:
854 case DW_LLE_base_addressx
:
855 base
= get_debug_addr_entry_lle( get_ULEB128( &loc
), cc
,
856 DW_LLE_base_addressx
);
858 case DW_LLE_startx_endx
:
859 w1
= get_debug_addr_entry_lle( get_ULEB128( &loc
), cc
,
860 DW_LLE_startx_endx
);
861 w2
= get_debug_addr_entry_lle( get_ULEB128( &loc
), cc
,
862 DW_LLE_startx_endx
);
863 len
= get_ULEB128( &loc
);
865 case DW_LLE_startx_length
:
866 w1
= get_debug_addr_entry_lle( get_ULEB128( &loc
), cc
,
867 DW_LLE_startx_length
);
868 w2
= w1
+ get_ULEB128( &loc
);
869 len
= get_ULEB128( &loc
);
871 case DW_LLE_default_location
:
873 cc
->barf( "Unhandled or unknown loclists entry" );
878 /* else enumerate [w1+base, w2+base) */
879 /* w2 is 1 past end of range, as per D3 defn for "DW_AT_high_pc"
882 TRACE_D3("negative range is for .debug_loc expr at "
883 "file offset %llu\n",
885 cc
->barf( "negative range in .debug_loc section" );
888 /* ignore zero length ranges */
896 VG_(addBytesToXA
)( xa
, &c
, sizeof(c
) );
897 w
= w1
+ (addBase
? base
: 0) + svma_of_referencing_CU
;
898 VG_(addBytesToXA
)( xa
, &w
, sizeof(w
) );
899 w
= w2
-1 + (addBase
? base
: 0) + svma_of_referencing_CU
;
900 VG_(addBytesToXA
)( xa
, &w
, sizeof(w
) );
902 VG_(addBytesToXA
)( xa
, &s
, sizeof(s
) );
906 UChar byte
= get_UChar( &loc
);
907 TRACE_D3("%02x", (UInt
)byte
);
909 VG_(addBytesToXA
)( xa
, &byte
, 1 );
915 { UChar c
= 1; /*isEnd*/ VG_(addBytesToXA
)( xa
, &c
, sizeof(c
) ); }
917 nbytes
= VG_(sizeXA
)( xa
);
918 vg_assert(nbytes
>= 1);
920 gx
= ML_(dinfo_zalloc
)( "di.readdwarf3.mgGX.2", sizeof(GExpr
) + nbytes
);
921 VG_(memcpy
)( &gx
->payload
[0], (UChar
*)VG_(indexXA
)(xa
,0), nbytes
);
922 vg_assert( &gx
->payload
[nbytes
]
923 == ((UChar
*)gx
) + sizeof(GExpr
) + nbytes
);
933 /*------------------------------------------------------------*/
935 /*--- Helper functions for range lists and CU headers ---*/
937 /*------------------------------------------------------------*/
939 /* Denotes an address range. Both aMin and aMax are included in the
940 range; hence a complete range is (0, ~0) and an empty range is any
941 (X, X-1) for X > 0.*/
943 struct { Addr aMin
; Addr aMax
; }
947 /* Generate an arbitrary structural total ordering on
948 XArray* of AddrRange. */
949 static Word
cmp__XArrays_of_AddrRange ( const XArray
* rngs1
,
950 const XArray
* rngs2
)
953 vg_assert(rngs1
&& rngs2
);
954 n1
= VG_(sizeXA
)( rngs1
);
955 n2
= VG_(sizeXA
)( rngs2
);
956 if (n1
< n2
) return -1;
957 if (n1
> n2
) return 1;
958 for (i
= 0; i
< n1
; i
++) {
959 AddrRange
* rng1
= (AddrRange
*)VG_(indexXA
)( rngs1
, i
);
960 AddrRange
* rng2
= (AddrRange
*)VG_(indexXA
)( rngs2
, i
);
961 if (rng1
->aMin
< rng2
->aMin
) return -1;
962 if (rng1
->aMin
> rng2
->aMin
) return 1;
963 if (rng1
->aMax
< rng2
->aMax
) return -1;
964 if (rng1
->aMax
> rng2
->aMax
) return 1;
970 __attribute__((noinline
))
971 static XArray
* /* of AddrRange */ empty_range_list ( void )
973 XArray
* xa
; /* XArray of AddrRange */
974 /* Who frees this xa? varstack_preen() does. */
975 xa
= VG_(newXA
)( ML_(dinfo_zalloc
), "di.readdwarf3.erl.1",
982 __attribute__((noinline
))
983 static XArray
* unitary_range_list ( Addr aMin
, Addr aMax
)
987 vg_assert(aMin
<= aMax
);
988 /* Who frees this xa? varstack_preen() does. */
989 xa
= VG_(newXA
)( ML_(dinfo_zalloc
), "di.readdwarf3.url.1",
994 VG_(addToXA
)( xa
, &pair
);
999 /* Enumerate the address ranges starting at img-offset
1000 'debug_ranges_offset' in .debug_ranges. Results are biased with
1001 'svma_of_referencing_CU' and so I believe are correct SVMAs for the
1002 object as a whole. This function allocates the XArray, and the
1003 caller must deallocate it. */
1004 __attribute__((noinline
))
1005 static XArray
* /* of AddrRange */
1006 get_range_list ( const CUConst
* cc
,
1008 UWord debug_ranges_offset
,
1009 Addr svma_of_referencing_CU
)
1013 XArray
* xa
; /* XArray of AddrRange */
1016 if (cc
->version
< 5 && (!ML_(sli_is_valid
)(cc
->escn_debug_ranges
)
1017 || cc
->escn_debug_ranges
.szB
== 0))
1018 cc
->barf("get_range_list: .debug_ranges is empty/missing");
1019 if (cc
->version
>= 5 && (!ML_(sli_is_valid
)(cc
->escn_debug_rnglists
)
1020 || cc
->escn_debug_rnglists
.szB
== 0))
1021 cc
->barf("get_range_list: .debug_rnglists is empty/missing");
1023 if (cc
->version
< 5)
1024 init_Cursor( &ranges
, cc
->escn_debug_ranges
, 0, cc
->barf
,
1025 "Overrun whilst reading .debug_ranges section(2)" );
1027 init_Cursor( &ranges
, cc
->escn_debug_rnglists
, 0, cc
->barf
,
1028 "Overrun whilst reading .debug_rnglists section(2)" );
1030 set_position_of_Cursor( &ranges
, debug_ranges_offset
);
1032 /* Who frees this xa? varstack_preen() does. */
1033 xa
= VG_(newXA
)( ML_(dinfo_zalloc
), "di.readdwarf3.grl.1", ML_(dinfo_free
),
1034 sizeof(AddrRange
) );
1036 if (cc
->version
< 5) {
1038 /* Read a (host-)word pair. This is something of a hack since
1039 the word size to read is really dictated by the ELF file;
1040 however, we assume we're reading a file with the same
1041 word-sizeness as the host. Reasonably enough. */
1042 UWord w1
= get_UWord( &ranges
);
1043 UWord w2
= get_UWord( &ranges
);
1045 if (w1
== 0 && w2
== 0)
1046 break; /* end of list. */
1049 /* new value for 'base' */
1054 /* else enumerate [w1+base, w2+base) */
1055 /* w2 is 1 past end of range, as per D3 defn for "DW_AT_high_pc"
1058 cc
->barf( "negative range in .debug_ranges section" );
1060 pair
.aMin
= w1
+ base
+ svma_of_referencing_CU
;
1061 pair
.aMax
= w2
- 1 + base
+ svma_of_referencing_CU
;
1062 vg_assert(pair
.aMin
<= pair
.aMax
);
1063 VG_(addToXA
)( xa
, &pair
);
1071 DW_RLE r
= get_UChar( &ranges
);
1073 case DW_RLE_end_of_list
:
1076 case DW_RLE_base_address
:
1077 base
= get_UWord( &ranges
);
1079 case DW_RLE_start_length
:
1080 w1
= get_UWord( &ranges
);
1081 w2
= w1
+ get_ULEB128( &ranges
);
1083 case DW_RLE_offset_pair
:
1084 w1
= base
+ get_ULEB128( &ranges
);
1085 w2
= base
+ get_ULEB128( &ranges
);
1087 case DW_RLE_start_end
:
1088 w1
= get_UWord ( &ranges
);
1089 w2
= get_UWord ( &ranges
);
1091 case DW_RLE_base_addressx
:
1092 base
= get_debug_addr_entry_rle( get_ULEB128( &ranges
), cc
,
1093 DW_RLE_base_addressx
);
1095 case DW_RLE_startx_endx
:
1096 w1
= get_debug_addr_entry_rle( get_ULEB128( &ranges
), cc
,
1097 DW_RLE_startx_endx
);
1098 w2
= get_debug_addr_entry_rle( get_ULEB128( &ranges
), cc
,
1099 DW_RLE_startx_endx
);
1101 case DW_RLE_startx_length
:
1102 w1
= get_debug_addr_entry_rle( get_ULEB128( &ranges
), cc
,
1103 DW_RLE_startx_length
);
1104 w2
= w1
+ get_ULEB128( &ranges
);
1107 cc
->barf( "Unhandled or unknown range list entry" );
1111 cc
->barf( "negative range in .debug_rnglists section" );
1113 pair
.aMin
= w1
+ svma_of_referencing_CU
;
1114 pair
.aMax
= w2
- 1 + svma_of_referencing_CU
;
1115 vg_assert(pair
.aMin
<= pair
.aMax
);
1116 VG_(addToXA
)( xa
, &pair
);
1123 #define VARSZ_FORM 0xffffffff
1124 static UInt
get_Form_szB (const CUConst
* cc
, DW_FORM form
);
1126 /* Initialises the hash table of abbreviations. This only sets up the abbv
1127 Cursor and hash table, but does not try to read any abbrevs yes. The actual
1128 reading of abbrevs will be done by get_abbv by calling find_ht_abbvs on
1129 demand if a requested abbrev code isn't in the hash table yet. When using the
1130 inline parser a lot of abbrevs will not be needed so reading everything
1131 upfront will often waste time and memory. */
1132 static void init_ht_abbvs (CUConst
* cc
, ULong debug_abbrev_offset
,
1135 Cursor
*c
= &cc
->abbv
.c
;
1136 init_Cursor( c
, cc
->debug_abbv
, 0, cc
->barf
,
1137 "Overrun whilst parsing .debug_abbrev section(2)" );
1138 cc
->abbv
.ht_abbvs
= VG_(HT_construct
) ("di.readdwarf3.ht_abbvs");
1139 cc
->abbv
.debug_abbrev_offset
= debug_abbrev_offset
;
1140 cc
->abbv
.done
= False
;
1143 static g_abbv
*find_ht_abbvs (CUConst
* cc
, ULong abbv_code
,
1147 g_abbv
*ta
; // temporary abbreviation, reallocated if needed.
1148 UInt ta_nf_maxE
; // max nr of pairs in ta.nf[], doubled when reallocated.
1149 UInt ta_nf_n
; // nr of pairs in ta->nf that are initialised.
1150 g_abbv
*ht_ta
; // abbv to insert in hash table.
1153 #define SZ_G_ABBV(_nf_szE) (sizeof(g_abbv) + _nf_szE * sizeof(name_form))
1155 ta_nf_maxE
= 10; // starting with enough for 9 pairs+terminating pair.
1156 ta
= ML_(dinfo_zalloc
) ("di.readdwarf3.ht_ta_nf", SZ_G_ABBV(ta_nf_maxE
));
1161 ta
->abbv_code
= get_ULEB128( c
);
1162 if (ta
->abbv_code
== 0) {
1163 cc
->abbv
.done
= True
;
1164 break; /* end of the table */
1167 ta
->atag
= get_ULEB128( c
);
1168 ta
->has_children
= get_UChar( c
);
1171 if (ta_nf_n
>= ta_nf_maxE
) {
1172 g_abbv
*old_ta
= ta
;
1173 ta
= ML_(dinfo_zalloc
) ("di.readdwarf3.ht_ta_nf",
1174 SZ_G_ABBV(2 * ta_nf_maxE
));
1175 ta_nf_maxE
= 2 * ta_nf_maxE
;
1176 VG_(memcpy
) (ta
, old_ta
, SZ_G_ABBV(ta_nf_n
));
1177 ML_(dinfo_free
) (old_ta
);
1179 ta
->nf
[ta_nf_n
].at_name
= get_ULEB128( c
);
1180 ta
->nf
[ta_nf_n
].at_form
= get_ULEB128( c
);
1181 if (ta
->nf
[ta_nf_n
].at_form
== DW_FORM_implicit_const
)
1182 ta
->nf
[ta_nf_n
].at_val
= get_SLEB128( c
);
1183 if (ta
->nf
[ta_nf_n
].at_name
== 0 && ta
->nf
[ta_nf_n
].at_form
== 0) {
1190 // Initialises the skip_szB/next_nf elements : an element at position
1191 // i must contain the sum of its own size + the sizes of all elements
1192 // following i till either the next variable size element, the next
1193 // sibling element or the end of the DIE.
1194 ta
->nf
[ta_nf_n
- 1].skip_szB
= 0;
1195 ta
->nf
[ta_nf_n
- 1].next_nf
= 0;
1196 for (i
= ta_nf_n
- 2; i
>= 0; i
--) {
1197 const UInt form_szB
= get_Form_szB (cc
, (DW_FORM
)ta
->nf
[i
].at_form
);
1199 if (ta
->nf
[i
+1].at_name
== DW_AT_sibling
1200 || ta
->nf
[i
+1].skip_szB
== VARSZ_FORM
) {
1201 ta
->nf
[i
].skip_szB
= form_szB
;
1202 ta
->nf
[i
].next_nf
= i
+1;
1203 } else if (form_szB
== VARSZ_FORM
) {
1204 ta
->nf
[i
].skip_szB
= form_szB
;
1205 ta
->nf
[i
].next_nf
= i
+1;
1207 ta
->nf
[i
].skip_szB
= ta
->nf
[i
+1].skip_szB
+ form_szB
;
1208 ta
->nf
[i
].next_nf
= ta
->nf
[i
+1].next_nf
;
1212 ht_ta
= ML_(dinfo_zalloc
) ("di.readdwarf3.ht_ta", SZ_G_ABBV(ta_nf_n
));
1213 VG_(memcpy
) (ht_ta
, ta
, SZ_G_ABBV(ta_nf_n
));
1214 VG_(HT_add_node
) ( cc
->abbv
.ht_abbvs
, ht_ta
);
1216 TRACE_D3(" Adding abbv_code %lu TAG %s [%s] nf %u ",
1217 ht_ta
->abbv_code
, ML_(pp_DW_TAG
)(ht_ta
->atag
),
1218 ML_(pp_DW_children
)(ht_ta
->has_children
),
1221 for (i
= 0; i
< ta_nf_n
; i
++)
1222 TRACE_D3("[%u,%u] ", ta
->nf
[i
].skip_szB
, ta
->nf
[i
].next_nf
);
1225 if (ht_ta
->abbv_code
== abbv_code
)
1229 ML_(dinfo_free
) (ta
);
1235 static g_abbv
* get_abbv (CUConst
* cc
, ULong abbv_code
,
1240 abbv
= VG_(HT_lookup
) (cc
->abbv
.ht_abbvs
, abbv_code
);
1241 if (!abbv
&& !cc
->abbv
.done
)
1242 abbv
= find_ht_abbvs (cc
, abbv_code
, td3
);
1244 cc
->barf ("abbv_code not found in ht_abbvs table");
1249 /* Parse the Compilation Unit header indicated at 'c' and
1250 initialise 'cc' accordingly. */
1251 static __attribute__((noinline
))
1252 void parse_CU_Header ( /*OUT*/CUConst
* cc
,
1255 DiSlice escn_debug_abbv
,
1256 abbv_state last_abbv
,
1260 UChar address_size
, unit_type
;
1261 ULong debug_abbrev_offset
;
1263 VG_(memset
)(cc
, 0, sizeof(*cc
));
1264 vg_assert(c
&& c
->barf
);
1267 /* initial_length field */
1269 = get_Initial_Length( &cc
->is_dw64
, c
,
1270 "parse_CU_Header: invalid initial-length field" );
1272 TRACE_D3(" Length: %llu\n", cc
->unit_length
);
1275 cc
->version
= get_UShort( c
);
1276 if (cc
->version
!= 2 && cc
->version
!= 3 && cc
->version
!= 4
1277 && cc
->version
!= 5)
1278 cc
->barf( "parse_CU_Header: "
1279 "is neither DWARF2 nor DWARF3 nor DWARF4 nor DWARF5" );
1280 TRACE_D3(" Version: %d\n", (Int
)cc
->version
);
1283 if (cc
->version
>= 5) {
1284 unit_type
= get_UChar( c
);
1285 address_size
= get_UChar( c
);
1287 unit_type
= type_unit
? DW_UT_type
: DW_UT_compile
;
1288 address_size
= 0; /* Will be read later. */
1291 /* debug_abbrev_offset */
1292 debug_abbrev_offset
= get_Dwarfish_UWord( c
, cc
->is_dw64
);
1293 if (debug_abbrev_offset
>= escn_debug_abbv
.szB
)
1294 cc
->barf( "parse_CU_Header: invalid debug_abbrev_offset" );
1295 TRACE_D3(" Abbrev Offset: %llu\n", debug_abbrev_offset
);
1297 /* address size. If this isn't equal to the host word size, just
1298 give up. This makes it safe to assume elsewhere that
1299 DW_FORM_addr and DW_FORM_ref_addr can be treated as a host
1301 if (cc
->version
< 5)
1302 address_size
= get_UChar( c
);
1304 if (address_size
!= sizeof(void*))
1305 cc
->barf( "parse_CU_Header: invalid address_size" );
1306 TRACE_D3(" Pointer Size: %d\n", (Int
)address_size
);
1308 cc
->is_type_unit
= type_unit
;
1309 cc
->is_alt_info
= alt_info
;
1311 if (type_unit
|| (cc
->version
>= 5 && (unit_type
== DW_UT_type
1312 || unit_type
== DW_UT_split_type
))) {
1313 cc
->type_signature
= get_ULong( c
);
1314 cc
->type_offset
= get_Dwarfish_UWord( c
, cc
->is_dw64
);
1317 if (cc
->version
>= 5 && (unit_type
== DW_UT_skeleton
1318 || unit_type
== DW_UT_split_compile
)) {
1319 /* dwo_id = */ get_ULong( c
);
1322 /* Set up cc->debug_abbv to point to the relevant table for this
1323 CU. Set its .szB so that at least we can't read off the end of
1324 the debug_abbrev section -- potentially (and quite likely) too
1325 big, if this isn't the last table in the section, but at least
1328 This amounts to taking debug_abbv_escn and moving the start
1329 position along by debug_abbrev_offset bytes, hence forming a
1330 smaller DiSlice which has the same end point. Since we checked
1331 just above that debug_abbrev_offset is less than the size of
1332 debug_abbv_escn, this should leave us with a nonempty slice. */
1333 vg_assert(debug_abbrev_offset
< escn_debug_abbv
.szB
);
1334 cc
->debug_abbv
= escn_debug_abbv
;
1335 cc
->debug_abbv
.ioff
+= debug_abbrev_offset
;
1336 cc
->debug_abbv
.szB
-= debug_abbrev_offset
;
1338 if (last_abbv
.ht_abbvs
!= NULL
1339 && debug_abbrev_offset
== last_abbv
.debug_abbrev_offset
) {
1340 cc
->abbv
= last_abbv
;
1342 if (last_abbv
.ht_abbvs
!= NULL
)
1343 VG_(HT_destruct
) (last_abbv
.ht_abbvs
, ML_(dinfo_free
));
1344 init_ht_abbvs(cc
, debug_abbrev_offset
, td3
);
1348 /* This represents a single signatured type. It maps a type signature
1349 (a ULong) to a cooked DIE offset. Objects of this type are stored
1350 in the type signature hash table. */
1352 struct D3SignatureType
{
1353 struct D3SignatureType
*next
;
1355 ULong type_signature
;
1360 /* Record a signatured type in the hash table. */
1361 static void record_signatured_type ( VgHashTable
*tab
,
1362 ULong type_signature
,
1365 D3SignatureType
*dstype
= ML_(dinfo_zalloc
) ( "di.readdwarf3.sigtype",
1366 sizeof(D3SignatureType
) );
1367 dstype
->data
= (UWord
) type_signature
;
1368 dstype
->type_signature
= type_signature
;
1370 VG_(HT_add_node
) ( tab
, dstype
);
1373 /* Given a type signature hash table and a type signature, return the
1374 cooked DIE offset of the type. If the type cannot be found, call
1376 static UWord
lookup_signatured_type ( const VgHashTable
*tab
,
1377 ULong type_signature
,
1378 void (*barf
)( const HChar
* ) __attribute__((noreturn
)) )
1380 D3SignatureType
*dstype
= VG_(HT_lookup
) ( tab
, (UWord
) type_signature
);
1381 /* This may be unwarranted chumminess with the hash table
1383 while ( dstype
!= NULL
&& dstype
->type_signature
!= type_signature
)
1384 dstype
= dstype
->next
;
1385 if (dstype
== NULL
) {
1386 barf("lookup_signatured_type: could not find signatured type");
1394 /* Represents Form data. If szB is 1/2/4/8 then the result is in the
1395 lowest 1/2/4/8 bytes of u.val. If szB is zero or negative then the
1396 result is an image section beginning at u.cur and with size -szB.
1397 No other szB values are allowed. */
1400 Long szB
; // 1, 2, 4, 8 or non-positive values only.
1401 union { ULong val
; DiCursor cur
; } u
;
1405 // Read data for get_Form_contents() from .debug_addr for the 'index' entry.
1406 static void get_Form_contents_addr( /*OUT*/FormContents
* cts
, DW_FORM form
,
1407 ULong index
, const CUConst
* cc
, Bool td3
)
1409 cts
->u
.val
= get_debug_addr_entry_form( index
, cc
, form
);
1410 cts
->szB
= sizeof(UWord
);
1411 TRACE_D3("0x%lx", (UWord
)cts
->u
.val
);
1414 // Read data for get_Form_contents() from .debug_str for the given offset.
1415 static void get_Form_contents_str( /*OUT*/FormContents
* cts
, DW_FORM form
,
1416 UWord offset
, const CUConst
* cc
, Bool td3
)
1418 if (!ML_(sli_is_valid
)(cc
->escn_debug_str
)
1419 || offset
>= cc
->escn_debug_str
.szB
) {
1421 "get_Form_contents_str: %u (%s) points outside .debug_str\n",
1422 form
, ML_(pp_DW_FORM
)(form
));
1423 cc
->barf("get_Form_contents_str: index points outside .debug_str");
1425 /* FIXME: check the entire string lies inside debug_str,
1426 not just the first byte of it. */
1428 = ML_(cur_plus
)( ML_(cur_from_sli
)(cc
->escn_debug_str
), offset
);
1430 HChar
* tmp
= ML_(cur_read_strdup
)(str
, "di.getFC.1");
1431 TRACE_D3("(indirect string, offset: 0x%lx): %s", offset
, tmp
);
1432 ML_(dinfo_free
)(tmp
);
1435 cts
->szB
= - (Long
)(1 + (ULong
)ML_(cur_strlen
)(str
));
1438 static inline UInt
sizeof_Dwarfish_UWord (Bool is_dw64
)
1441 return sizeof(ULong
);
1443 return sizeof(UInt
);
1446 // Read data for get_Form_contents() from .debug_str_offsets for the 'index' entry.
1447 static void get_Form_contents_str_offsets( /*OUT*/FormContents
* cts
, DW_FORM form
,
1448 ULong index
, const CUConst
* cc
, Bool td3
)
1450 if(!cc
->cu_has_str_offsets_base
) {
1452 "get_Form_contents_str_offsets: %u (%s) without DW_AT_str_offsets_base\n",
1453 form
, ML_(pp_DW_FORM
)(form
));
1454 cc
->barf("get_Form_contents_str_offsets: DW_AT_str_offsets_base not set");
1456 UWord str_offsets_pos
= cc
->cu_str_offsets_base
1457 + index
* sizeof_Dwarfish_UWord (cc
->is_dw64
);
1459 init_Cursor( &cur
, cc
->escn_debug_str_offsets
, str_offsets_pos
, cc
->barf
,
1460 "get_Form_contents_str_offsets: index "
1461 "points outside .debug_str_offsets" );
1463 HChar
* tmp
= ML_(cur_read_strdup
)(get_DiCursor_from_Cursor(&cur
), "di.getFC.1");
1464 TRACE_D3("(indirect string offset, offset: 0x%lx): %s", str_offsets_pos
, tmp
);
1465 ML_(dinfo_free
)(tmp
);
1467 get_Form_contents_str( cts
, form
, get_Dwarfish_UWord(&cur
, cc
->is_dw64
), cc
, td3
);
1470 /* From 'c', get the Form data into 'cts'. Either it gets a 1/2/4/8
1471 byte scalar value, or (a reference to) zero or more bytes starting
1474 void get_Form_contents ( /*OUT*/FormContents
* cts
,
1475 const CUConst
* cc
, Cursor
* c
,
1476 Bool td3
, const name_form
*abbv
)
1478 DW_FORM form
= abbv
->at_form
;
1479 VG_(bzero_inline
)(cts
, sizeof(*cts
));
1480 // !!! keep switch in sync with get_Form_szB. The nr of characters read below
1481 // must be computed similarly in get_Form_szB.
1482 // The consistency is verified in trace_DIE.
1485 cts
->u
.val
= (ULong
)(UChar
)get_UChar(c
);
1487 TRACE_D3("%u", (UInt
)cts
->u
.val
);
1490 cts
->u
.val
= (ULong
)(UShort
)get_UShort(c
);
1492 TRACE_D3("%u", (UInt
)cts
->u
.val
);
1495 cts
->u
.val
= (ULong
)(UInt
)get_UInt(c
);
1497 TRACE_D3("%u", (UInt
)cts
->u
.val
);
1500 cts
->u
.val
= get_ULong(c
);
1502 TRACE_D3("%llu", cts
->u
.val
);
1504 case DW_FORM_data16
: {
1505 /* This is more like a block than an integral value. */
1507 DiCursor data16
= get_DiCursor_from_Cursor(c
);
1508 TRACE_D3("data16: ");
1509 for (u64b
= 16; u64b
> 0; u64b
--) {
1510 UChar u8
= get_UChar(c
);
1511 TRACE_D3("%x ", (UInt
)u8
);
1513 cts
->u
.cur
= data16
;
1514 cts
->szB
= - (Long
)16;
1517 case DW_FORM_sec_offset
:
1518 cts
->u
.val
= (ULong
)get_Dwarfish_UWord( c
, cc
->is_dw64
);
1519 cts
->szB
= cc
->is_dw64
? 8 : 4;
1520 TRACE_D3("%llu", cts
->u
.val
);
1522 case DW_FORM_rnglistx
: {
1523 if(!cc
->cu_has_rnglists_base
) {
1524 cc
->barf("get_Form_contents: DW_FORM_rnglistsx"
1525 " without DW_AT_rnglists_base");
1527 /* Convert index to offset pointing to the offsets list. */
1528 ULong index
= get_ULEB128(c
);
1529 ULong offset_to_offset
= cc
->cu_rnglists_base
+ index
* sizeof_Dwarfish_UWord( cc
->is_dw64
);
1530 /* And read the offset value from there. */
1532 init_Cursor( &cur
, cc
->escn_debug_rnglists
, offset_to_offset
, cc
->barf
,
1533 "get_Form_contents: index points outside .debug_rnglists" );
1534 cts
->u
.val
= cc
->cu_rnglists_base
+ get_Dwarfish_UWord(&cur
, cc
->is_dw64
);
1536 TRACE_D3("%llu", cts
->u
.val
);
1539 case DW_FORM_loclistx
: {
1540 if(!cc
->cu_has_loclists_base
) {
1541 cc
->barf("get_Form_contents: DW_FORM_loclistsx"
1542 " without DW_AT_loclists_base");
1544 /* Convert index to offset pointing to the offsets list. */
1545 ULong index
= get_ULEB128(c
);
1546 ULong offset_to_offset
= cc
->cu_loclists_base
+ index
* sizeof_Dwarfish_UWord( cc
->is_dw64
);
1547 /* And read the offset value from there. */
1549 init_Cursor( &cur
, cc
->escn_debug_loclists
, offset_to_offset
, cc
->barf
,
1550 "get_Form_contents: index points outside .debug_loclists" );
1551 cts
->u
.val
= cc
->cu_loclists_base
+ get_Dwarfish_UWord(&cur
, cc
->is_dw64
);
1553 TRACE_D3("%llu", cts
->u
.val
);
1557 cts
->u
.val
= (ULong
)(Long
)get_SLEB128(c
);
1559 TRACE_D3("%llu", cts
->u
.val
);
1562 cts
->u
.val
= (ULong
)(Long
)get_ULEB128(c
);
1564 TRACE_D3("%llu", cts
->u
.val
);
1567 /* note, this is a hack. DW_FORM_addr is defined as getting
1568 a word the size of the target machine as defined by the
1569 address_size field in the CU Header. However,
1570 parse_CU_Header() rejects all inputs except those for
1571 which address_size == sizeof(Word), hence we can just
1572 treat it as a (host) Word. */
1573 cts
->u
.val
= (ULong
)(UWord
)get_UWord(c
);
1574 cts
->szB
= sizeof(UWord
);
1575 TRACE_D3("0x%lx", (UWord
)cts
->u
.val
);
1578 case DW_FORM_ref_addr
:
1579 /* We make the same word-size assumption as DW_FORM_addr. */
1580 /* What does this really mean? From D3 Sec 7.5.4,
1581 description of "reference", it would appear to reference
1582 some other DIE, by specifying the offset from the
1583 beginning of a .debug_info section. The D3 spec mentions
1584 that this might be in some other shared object and
1585 executable. But I don't see how the name of the other
1586 object/exe is specified.
1588 At least for the DW_FORM_ref_addrs created by icc11, the
1589 references seem to be within the same object/executable.
1590 So for the moment we merely range-check, to see that they
1591 actually do specify a plausible offset within this
1592 object's .debug_info, and return the value unchanged.
1594 In DWARF 2, DW_FORM_ref_addr is address-sized, but in
1595 DWARF 3 and later, it is offset-sized.
1597 if (cc
->version
== 2) {
1598 cts
->u
.val
= (ULong
)(UWord
)get_UWord(c
);
1599 cts
->szB
= sizeof(UWord
);
1601 cts
->u
.val
= get_Dwarfish_UWord(c
, cc
->is_dw64
);
1602 cts
->szB
= cc
->is_dw64
? sizeof(ULong
) : sizeof(UInt
);
1604 TRACE_D3("0x%lx", (UWord
)cts
->u
.val
);
1605 if (0) VG_(printf
)("DW_FORM_ref_addr 0x%lx\n", (UWord
)cts
->u
.val
);
1606 if (/* the following is surely impossible, but ... */
1607 !ML_(sli_is_valid
)(cc
->escn_debug_info
)
1608 || cts
->u
.val
>= (ULong
)cc
->escn_debug_info
.szB
) {
1609 /* Hmm. Offset is nonsensical for this object's .debug_info
1610 section. Be safe and reject it. */
1611 cc
->barf("get_Form_contents: DW_FORM_ref_addr points "
1612 "outside .debug_info");
1616 case DW_FORM_strp
: {
1617 /* this is an offset into .debug_str */
1618 UWord uw
= (UWord
)get_Dwarfish_UWord( c
, cc
->is_dw64
);
1619 get_Form_contents_str( cts
, form
, uw
, cc
, td3
);
1622 case DW_FORM_line_strp
: {
1623 /* this is an offset into .debug_line_str */
1624 UWord uw
= (UWord
)get_Dwarfish_UWord( c
, cc
->is_dw64
);
1625 if (!ML_(sli_is_valid
)(cc
->escn_debug_line_str
)
1626 || uw
>= cc
->escn_debug_line_str
.szB
)
1627 cc
->barf("get_Form_contents: DW_FORM_line_strp "
1628 "points outside .debug_line_str");
1629 /* FIXME: check the entire string lies inside debug_line_str,
1630 not just the first byte of it. */
1632 = ML_(cur_plus
)( ML_(cur_from_sli
)(cc
->escn_debug_line_str
), uw
);
1634 HChar
* tmp
= ML_(cur_read_strdup
)(line_str
, "di.getFC.1.5");
1635 TRACE_D3("(indirect line string, offset: 0x%lx): %s", uw
, tmp
);
1636 ML_(dinfo_free
)(tmp
);
1638 cts
->u
.cur
= line_str
;
1639 cts
->szB
= - (Long
)(1 + (ULong
)ML_(cur_strlen
)(line_str
));
1642 case DW_FORM_string
: {
1643 DiCursor str
= get_AsciiZ(c
);
1645 HChar
* tmp
= ML_(cur_read_strdup
)(str
, "di.getFC.2");
1646 TRACE_D3("%s", tmp
);
1647 ML_(dinfo_free
)(tmp
);
1650 /* strlen is safe because get_AsciiZ already 'vetted' the
1652 cts
->szB
= - (Long
)(1 + (ULong
)ML_(cur_strlen
)(str
));
1655 case DW_FORM_ref1
: {
1656 UChar u8
= get_UChar(c
);
1657 UWord res
= cc
->cu_start_offset
+ (UWord
)u8
;
1658 cts
->u
.val
= (ULong
)res
;
1659 cts
->szB
= sizeof(UWord
);
1660 TRACE_D3("<%lx>", res
);
1663 case DW_FORM_ref2
: {
1664 UShort u16
= get_UShort(c
);
1665 UWord res
= cc
->cu_start_offset
+ (UWord
)u16
;
1666 cts
->u
.val
= (ULong
)res
;
1667 cts
->szB
= sizeof(UWord
);
1668 TRACE_D3("<%lx>", res
);
1671 case DW_FORM_ref4
: {
1672 UInt u32
= get_UInt(c
);
1673 UWord res
= cc
->cu_start_offset
+ (UWord
)u32
;
1674 cts
->u
.val
= (ULong
)res
;
1675 cts
->szB
= sizeof(UWord
);
1676 TRACE_D3("<%lx>", res
);
1679 case DW_FORM_ref8
: {
1680 ULong u64
= get_ULong(c
);
1681 UWord res
= cc
->cu_start_offset
+ (UWord
)u64
;
1682 cts
->u
.val
= (ULong
)res
;
1683 cts
->szB
= sizeof(UWord
);
1684 TRACE_D3("<%lx>", res
);
1687 case DW_FORM_ref_udata
: {
1688 ULong u64
= get_ULEB128(c
);
1689 UWord res
= cc
->cu_start_offset
+ (UWord
)u64
;
1690 cts
->u
.val
= (ULong
)res
;
1691 cts
->szB
= sizeof(UWord
);
1692 TRACE_D3("<%lx>", res
);
1695 case DW_FORM_flag
: {
1696 UChar u8
= get_UChar(c
);
1697 TRACE_D3("%u", (UInt
)u8
);
1698 cts
->u
.val
= (ULong
)u8
;
1702 case DW_FORM_flag_present
:
1707 case DW_FORM_implicit_const
:
1708 cts
->u
.val
= (ULong
)abbv
->at_val
;
1710 TRACE_D3("%llu", cts
->u
.val
);
1712 case DW_FORM_block1
: {
1714 ULong u64
= (ULong
)get_UChar(c
);
1715 DiCursor block
= get_DiCursor_from_Cursor(c
);
1716 TRACE_D3("%llu byte block: ", u64
);
1717 for (u64b
= u64
; u64b
> 0; u64b
--) {
1718 UChar u8
= get_UChar(c
);
1719 TRACE_D3("%x ", (UInt
)u8
);
1722 cts
->szB
= - (Long
)u64
;
1725 case DW_FORM_block2
: {
1727 ULong u64
= (ULong
)get_UShort(c
);
1728 DiCursor block
= get_DiCursor_from_Cursor(c
);
1729 TRACE_D3("%llu byte block: ", u64
);
1730 for (u64b
= u64
; u64b
> 0; u64b
--) {
1731 UChar u8
= get_UChar(c
);
1732 TRACE_D3("%x ", (UInt
)u8
);
1735 cts
->szB
= - (Long
)u64
;
1738 case DW_FORM_block4
: {
1740 ULong u64
= (ULong
)get_UInt(c
);
1741 DiCursor block
= get_DiCursor_from_Cursor(c
);
1742 TRACE_D3("%llu byte block: ", u64
);
1743 for (u64b
= u64
; u64b
> 0; u64b
--) {
1744 UChar u8
= get_UChar(c
);
1745 TRACE_D3("%x ", (UInt
)u8
);
1748 cts
->szB
= - (Long
)u64
;
1751 case DW_FORM_exprloc
:
1752 case DW_FORM_block
: {
1754 ULong u64
= (ULong
)get_ULEB128(c
);
1755 DiCursor block
= get_DiCursor_from_Cursor(c
);
1756 TRACE_D3("%llu byte block: ", u64
);
1757 for (u64b
= u64
; u64b
> 0; u64b
--) {
1758 UChar u8
= get_UChar(c
);
1759 TRACE_D3("%x ", (UInt
)u8
);
1762 cts
->szB
= - (Long
)u64
;
1765 case DW_FORM_ref_sig8
: {
1767 ULong signature
= get_ULong (c
);
1768 ULong work
= signature
;
1769 TRACE_D3("8 byte signature: ");
1770 for (u64b
= 8; u64b
> 0; u64b
--) {
1771 UChar u8
= work
& 0xff;
1772 TRACE_D3("%x ", (UInt
)u8
);
1776 /* cc->signature_types is only built/initialised when
1777 VG_(clo_read_var_info) is set. In this case,
1778 the DW_FORM_ref_sig8 can be looked up.
1779 But we can also arrive here when only reading inline info
1780 and VG_(clo_trace_symtab) is set. In such a case,
1781 we cannot lookup the DW_FORM_ref_sig8, we rather assign
1782 a dummy value. This is a kludge, but otherwise,
1783 the 'dwarf inline info reader' tracing would have to
1784 do type processing/reading. It is better to avoid
1785 adding significant 'real' processing only due to tracing. */
1786 if (VG_(clo_read_var_info
)) {
1787 /* Due to the way that the hash table is constructed, the
1788 resulting DIE offset here is already "cooked". See
1789 cook_die_using_form. */
1790 cts
->u
.val
= lookup_signatured_type (cc
->signature_types
, signature
,
1794 vg_assert (VG_(clo_read_inline_info
));
1795 TRACE_D3("<not dereferencing signature type>");
1796 cts
->u
.val
= 0; /* Assign a dummy/rubbish value */
1798 cts
->szB
= sizeof(UWord
);
1801 case DW_FORM_indirect
: {
1802 /* Urgh, this is ugly and somewhat unclear how it works
1803 with DW_FORM_implicit_const. HACK. */
1804 name_form nfi
= *abbv
;
1805 nfi
.at_form
= (DW_FORM
)get_ULEB128(c
);
1806 get_Form_contents (cts
, cc
, c
, td3
, &nfi
);
1810 case DW_FORM_GNU_ref_alt
:
1811 cts
->u
.val
= get_Dwarfish_UWord(c
, cc
->is_dw64
);
1812 cts
->szB
= cc
->is_dw64
? sizeof(ULong
) : sizeof(UInt
);
1813 TRACE_D3("0x%lx", (UWord
)cts
->u
.val
);
1814 if (0) VG_(printf
)("DW_FORM_GNU_ref_alt 0x%lx\n", (UWord
)cts
->u
.val
);
1815 if (/* the following is surely impossible, but ... */
1816 !ML_(sli_is_valid
)(cc
->escn_debug_info_alt
))
1817 cc
->barf("get_Form_contents: DW_FORM_GNU_ref_addr used, "
1818 "but no alternate .debug_info");
1819 else if (cts
->u
.val
>= (ULong
)cc
->escn_debug_info_alt
.szB
) {
1820 /* Hmm. Offset is nonsensical for this object's .debug_info
1821 section. Be safe and reject it. */
1822 cc
->barf("get_Form_contents: DW_FORM_GNU_ref_addr points "
1823 "outside alternate .debug_info");
1827 case DW_FORM_GNU_strp_alt
: {
1828 /* this is an offset into alternate .debug_str */
1829 SizeT uw
= (UWord
)get_Dwarfish_UWord( c
, cc
->is_dw64
);
1830 if (!ML_(sli_is_valid
)(cc
->escn_debug_str_alt
))
1831 cc
->barf("get_Form_contents: DW_FORM_GNU_strp_alt used, "
1832 "but no alternate .debug_str");
1833 else if (uw
>= cc
->escn_debug_str_alt
.szB
)
1834 cc
->barf("get_Form_contents: DW_FORM_GNU_strp_alt "
1835 "points outside alternate .debug_str");
1836 /* FIXME: check the entire string lies inside debug_str,
1837 not just the first byte of it. */
1839 = ML_(cur_plus
)( ML_(cur_from_sli
)(cc
->escn_debug_str_alt
), uw
);
1841 HChar
* tmp
= ML_(cur_read_strdup
)(str
, "di.getFC.3");
1842 TRACE_D3("(indirect alt string, offset: 0x%lx): %s", uw
, tmp
);
1843 ML_(dinfo_free
)(tmp
);
1846 cts
->szB
= - (Long
)(1 + (ULong
)ML_(cur_strlen
)(str
));
1850 case DW_FORM_addrx
: {
1851 /* this is an offset into .debug_addr */
1852 ULong index
= (ULong
)(Long
)get_ULEB128(c
);
1853 get_Form_contents_addr(cts
, form
, index
, cc
, td3
);
1856 case DW_FORM_addrx1
: {
1857 /* this is an offset into .debug_addr */
1858 ULong index
= (ULong
)get_UChar(c
);
1859 get_Form_contents_addr(cts
, form
, index
, cc
, td3
);
1862 case DW_FORM_addrx2
: {
1863 /* this is an offset into .debug_addr */
1864 ULong index
= (ULong
)get_UShort(c
);
1865 get_Form_contents_addr(cts
, form
, index
, cc
, td3
);
1868 case DW_FORM_addrx3
: {
1869 /* this is an offset into .debug_addr */
1870 ULong index
= (ULong
)get_UInt3(c
);
1871 get_Form_contents_addr(cts
, form
, index
, cc
, td3
);
1874 case DW_FORM_addrx4
: {
1875 /* this is an offset into .debug_addr */
1876 ULong index
= (ULong
)get_UInt(c
);
1877 get_Form_contents_addr(cts
, form
, index
, cc
, td3
);
1880 case DW_FORM_strx
: {
1881 /* this is an offset into .debug_str_offsets */
1882 ULong index
= (ULong
)(Long
)get_ULEB128(c
);
1883 get_Form_contents_str_offsets(cts
, form
, index
, cc
, td3
);
1886 case DW_FORM_strx1
: {
1887 /* this is an offset into .debug_str_offsets */
1888 ULong index
= get_UChar(c
);
1889 get_Form_contents_str_offsets(cts
, form
, index
, cc
, td3
);
1892 case DW_FORM_strx2
: {
1893 /* this is an offset into .debug_str_offsets */
1894 ULong index
= (ULong
)get_UShort(c
);
1895 get_Form_contents_str_offsets(cts
, form
, index
, cc
, td3
);
1898 case DW_FORM_strx3
: {
1899 /* this is an offset into .debug_str_offsets */
1900 ULong index
= (ULong
)get_UInt3(c
);
1901 get_Form_contents_str_offsets(cts
, form
, index
, cc
, td3
);
1904 case DW_FORM_strx4
: {
1905 /* this is an offset into .debug_str_offsets */
1906 ULong index
= (ULong
)get_UInt(c
);
1907 get_Form_contents_str_offsets(cts
, form
, index
, cc
, td3
);
1913 "get_Form_contents: unhandled %u (%s) at <%llx>\n",
1914 form
, ML_(pp_DW_FORM
)(form
), get_position_of_Cursor(c
));
1915 c
->barf("get_Form_contents: unhandled DW_FORM");
1919 #define VARSZ_FORM 0xffffffff
1920 /* If the form is a fixed length form, return the nr of bytes for this form.
1921 If the form is a variable length form, return VARSZ_FORM. */
1923 UInt
get_Form_szB (const CUConst
* cc
, DW_FORM form
)
1925 // !!! keep switch in sync with get_Form_contents : the nr of bytes
1926 // read from a cursor by get_Form_contents must be returned by
1927 // the below switch.
1928 // The consistency is verified in trace_DIE.
1930 case DW_FORM_data1
: return 1;
1931 case DW_FORM_data2
: return 2;
1932 case DW_FORM_data4
: return 4;
1933 case DW_FORM_data8
: return 8;
1934 case DW_FORM_data16
: return 16;
1935 case DW_FORM_sec_offset
:
1940 case DW_FORM_rnglistx
:
1941 case DW_FORM_loclistx
:
1947 case DW_FORM_addr
: // See hack in get_Form_contents
1948 return sizeof(UWord
);
1949 case DW_FORM_ref_addr
: // See hack in get_Form_contents
1950 if (cc
->version
== 2)
1951 return sizeof(UWord
);
1953 return sizeof_Dwarfish_UWord (cc
->is_dw64
);
1955 case DW_FORM_line_strp
:
1956 return sizeof_Dwarfish_UWord (cc
->is_dw64
);
1957 case DW_FORM_string
:
1967 case DW_FORM_ref_udata
:
1971 case DW_FORM_flag_present
:
1972 return 0; // !!! special case, no data.
1973 case DW_FORM_block1
:
1975 case DW_FORM_block2
:
1977 case DW_FORM_block4
:
1979 case DW_FORM_exprloc
:
1982 case DW_FORM_ref_sig8
:
1984 case DW_FORM_indirect
:
1986 case DW_FORM_GNU_ref_alt
:
1987 return sizeof_Dwarfish_UWord(cc
->is_dw64
);
1988 case DW_FORM_GNU_strp_alt
:
1989 return sizeof_Dwarfish_UWord(cc
->is_dw64
);
1990 case DW_FORM_implicit_const
:
1991 return 0; /* Value inside abbrev. */
1996 case DW_FORM_addrx1
:
1999 case DW_FORM_addrx2
:
2002 case DW_FORM_addrx3
:
2005 case DW_FORM_addrx4
:
2010 "get_Form_szB: unhandled %u (%s)\n",
2011 form
, ML_(pp_DW_FORM
)(form
));
2012 cc
->barf("get_Form_contents: unhandled DW_FORM");
2016 /* Skip a DIE as described by abbv.
2017 If the DIE has a sibling, *sibling is set to the skipped DIE sibling value. */
2019 void skip_DIE (UWord
*sibling
,
2028 if (abbv
->nf
[nf_i
].at_name
== DW_AT_sibling
) {
2029 get_Form_contents( &cts
, cc
, c_die
, False
/*td3*/,
2032 *sibling
= cts
.u
.val
;
2034 } else if (abbv
->nf
[nf_i
].skip_szB
== VARSZ_FORM
) {
2035 DW_FORM form
= abbv
->nf
[nf_i
].at_form
;
2036 if(form
== DW_FORM_addrx
|| form
== DW_FORM_strx
2037 || form
== DW_FORM_rnglistx
|| form
== DW_FORM_loclistx
) {
2038 /* Skip without interpreting them, they may depend on e.g.
2039 DW_AT_addr_base that has not been read yet. */
2040 (void) get_ULEB128(c_die
);
2042 get_Form_contents( &cts
, cc
, c_die
, False
/*td3*/,
2046 advance_position_of_Cursor (c_die
, (ULong
)abbv
->nf
[nf_i
].skip_szB
);
2047 nf_i
= abbv
->nf
[nf_i
].next_nf
;
2055 /*------------------------------------------------------------*/
2057 /*--- Parsing of variable-related DIEs ---*/
2059 /*------------------------------------------------------------*/
2063 const HChar
* name
; /* in DebugInfo's .strpool */
2064 /* Represent ranges economically. nRanges is the number of
2066 0: .rngOneMin .rngOneMax .manyRanges are all zero
2067 1: .rngOneMin .rngOneMax hold the range; .rngMany is NULL
2068 2: .rngOneMin .rngOneMax are zero; .rngMany holds the ranges.
2069 This is merely an optimisation to avoid having to allocate
2070 and free the XArray in the common (98%) of cases where there
2071 is zero or one address ranges. */
2075 XArray
* rngMany
; /* of AddrRange. NON-UNIQUE PTR in AR_DINFO. */
2076 /* Do not free .rngMany, since many TempVars will have the same
2077 value. Instead the associated storage is to be freed by
2078 deleting 'rangetree', which stores a single copy of each
2082 UWord typeR
; /* a cuOff */
2083 GExpr
* gexpr
; /* for this variable */
2084 GExpr
* fbGX
; /* to find the frame base of the enclosing fn, if
2086 UInt fndn_ix
; /* declaring file/dirname index in fndnpool, or 0 */
2087 Int fLine
; /* declaring file line number, or zero */
2088 /* offset in .debug_info, so that abstract instances can be
2089 found to satisfy references from concrete instances. */
2091 UWord absOri
; /* so the absOri fields refer to dioff fields
2092 in some other, related TempVar. */
2098 /* Contains the range stack: a stack of address ranges, one
2099 stack entry for each nested scope.
2101 Some scope entries are created by function definitions
2102 (DW_AT_subprogram), and for those, we also note the GExpr
2103 derived from its DW_AT_frame_base attribute, if any.
2104 Consequently it should be possible to find, for any
2105 variable's DIE, the GExpr for the containing function's
2106 DW_AT_frame_base by scanning back through the stack to find
2107 the nearest entry associated with a function. This somewhat
2108 elaborate scheme is provided so as to make it possible to
2109 obtain the correct DW_AT_frame_base expression even in the
2110 presence of nested functions (or to be more precise, in the
2111 presence of nested DW_AT_subprogram DIEs).
2113 Int sp
; /* [sp] is innermost active entry; sp==-1 for empty
2116 XArray
**ranges
; /* XArray of AddrRange */
2117 Int
*level
; /* D3 DIE levels */
2118 Bool
*isFunc
; /* from DW_AT_subprogram? */
2119 GExpr
**fbGX
; /* if isFunc, contains the FB expr, else NULL */
2123 /* Completely initialise a variable parser object */
2125 var_parser_init ( D3VarParser
*parser
)
2128 parser
->stack_size
= 0;
2129 parser
->ranges
= NULL
;
2130 parser
->level
= NULL
;
2131 parser
->isFunc
= NULL
;
2132 parser
->fbGX
= NULL
;
2135 /* Release any memory hanging off a variable parser object */
2137 var_parser_release ( D3VarParser
*parser
)
2139 ML_(dinfo_free
)( parser
->ranges
);
2140 ML_(dinfo_free
)( parser
->level
);
2141 ML_(dinfo_free
)( parser
->isFunc
);
2142 ML_(dinfo_free
)( parser
->fbGX
);
2145 static void varstack_show ( const D3VarParser
* parser
, const HChar
* str
)
2148 VG_(printf
)(" varstack (%s) {\n", str
);
2149 for (i
= 0; i
<= parser
->sp
; i
++) {
2150 XArray
* xa
= parser
->ranges
[i
];
2152 VG_(printf
)(" [%ld] (level %d)", i
, parser
->level
[i
]);
2153 if (parser
->isFunc
[i
]) {
2154 VG_(printf
)(" (fbGX=%p)", parser
->fbGX
[i
]);
2156 vg_assert(parser
->fbGX
[i
] == NULL
);
2159 if (VG_(sizeXA
)( xa
) == 0) {
2160 VG_(printf
)("** empty PC range array **");
2162 for (j
= 0; j
< VG_(sizeXA
)( xa
); j
++) {
2163 AddrRange
* range
= (AddrRange
*) VG_(indexXA
)( xa
, j
);
2165 VG_(printf
)("[%#lx,%#lx] ", range
->aMin
, range
->aMax
);
2170 VG_(printf
)(" }\n");
2173 /* Remove from the stack, all entries with .level > 'level' */
2175 void varstack_preen ( D3VarParser
* parser
, Bool td3
, Int level
)
2177 Bool changed
= False
;
2178 vg_assert(parser
->sp
< parser
->stack_size
);
2180 vg_assert(parser
->sp
>= -1);
2181 if (parser
->sp
== -1) break;
2182 if (parser
->level
[parser
->sp
] <= level
) break;
2184 TRACE_D3("BBBBAAAA varstack_pop [newsp=%d]\n", parser
->sp
-1);
2185 vg_assert(parser
->ranges
[parser
->sp
]);
2186 /* Who allocated this xa? get_range_list() or
2187 unitary_range_list(). */
2188 VG_(deleteXA
)( parser
->ranges
[parser
->sp
] );
2193 varstack_show( parser
, "after preen" );
2196 static void varstack_push ( const CUConst
* cc
,
2197 D3VarParser
* parser
,
2199 XArray
* ranges
, Int level
,
2200 Bool isFunc
, GExpr
* fbGX
) {
2202 TRACE_D3("BBBBAAAA varstack_push[newsp=%d]: %d %p\n",
2203 parser
->sp
+1, level
, ranges
);
2205 /* First we need to zap everything >= 'level', as we are about to
2206 replace any previous entry at 'level', so .. */
2207 varstack_preen(parser
, /*td3*/False
, level
-1);
2209 vg_assert(parser
->sp
>= -1);
2210 vg_assert(parser
->sp
< parser
->stack_size
);
2211 if (parser
->sp
== parser
->stack_size
- 1) {
2212 parser
->stack_size
+= 48;
2214 ML_(dinfo_realloc
)("di.readdwarf3.varpush.1", parser
->ranges
,
2215 parser
->stack_size
* sizeof parser
->ranges
[0]);
2217 ML_(dinfo_realloc
)("di.readdwarf3.varpush.2", parser
->level
,
2218 parser
->stack_size
* sizeof parser
->level
[0]);
2220 ML_(dinfo_realloc
)("di.readdwarf3.varpush.3", parser
->isFunc
,
2221 parser
->stack_size
* sizeof parser
->isFunc
[0]);
2223 ML_(dinfo_realloc
)("di.readdwarf3.varpush.4", parser
->fbGX
,
2224 parser
->stack_size
* sizeof parser
->fbGX
[0]);
2226 if (parser
->sp
>= 0)
2227 vg_assert(parser
->level
[parser
->sp
] < level
);
2229 vg_assert(ranges
!= NULL
);
2230 if (!isFunc
) vg_assert(fbGX
== NULL
);
2231 parser
->ranges
[parser
->sp
] = ranges
;
2232 parser
->level
[parser
->sp
] = level
;
2233 parser
->isFunc
[parser
->sp
] = isFunc
;
2234 parser
->fbGX
[parser
->sp
] = fbGX
;
2236 varstack_show( parser
, "after push" );
2240 /* cts is derived from a DW_AT_location and so refers either to a
2241 location expression or to a location list. Figure out which, and
2242 in both cases bundle the expression or location list into a
2243 so-called GExpr (guarded expression). */
2244 __attribute__((noinline
))
2245 static GExpr
* get_GX ( const CUConst
* cc
, Bool td3
, const FormContents
* cts
)
2247 GExpr
* gexpr
= NULL
;
2249 /* represents a non-empty in-line location expression, and
2250 cts->u.cur points at the image bytes */
2251 gexpr
= make_singleton_GX( cts
->u
.cur
, (ULong
)(- cts
->szB
) );
2255 /* represents a location list. cts->u.val is the offset of it
2257 if (!cc
->cu_svma_known
)
2258 cc
->barf("get_GX: location list, but CU svma is unknown");
2259 gexpr
= make_general_GX( cc
, td3
, cts
->u
.val
, cc
->cu_svma
);
2262 vg_assert(0); /* else caller is bogus */
2268 HChar
* get_line_str (struct _DebugInfo
* di
, Bool is_dw64
,
2269 Cursor
*data
, const UInt form
,
2270 DiSlice debugstr_img
, DiSlice debuglinestr_img
)
2274 case DW_FORM_string
: {
2275 DiCursor distr
= get_AsciiZ(data
);
2276 str
= ML_(cur_step_strdup
)(&distr
, "di.gls.string");
2279 case DW_FORM_strp
: {
2280 UWord uw
= (UWord
)get_Dwarfish_UWord( data
, is_dw64
);
2282 = ML_(cur_plus
)( ML_(cur_from_sli
)(debugstr_img
), uw
);
2283 str
= ML_(cur_read_strdup
)(distr
, "di.gls.strp");
2286 case DW_FORM_line_strp
: {
2287 UWord uw
= (UWord
)get_Dwarfish_UWord( data
, is_dw64
);
2289 = ML_(cur_plus
)( ML_(cur_from_sli
)(debuglinestr_img
), uw
);
2290 str
= ML_(cur_read_strdup
)(distr
, "di.gls.line_strp");
2294 ML_(symerr
)(di
, True
,
2295 "Unknown path string FORM in .debug_line");
2302 Int
get_line_ndx (struct _DebugInfo
* di
,
2303 Cursor
*data
, const UInt form
)
2308 res
= get_UChar(data
);
2311 res
= get_UShort(data
);
2314 res
= get_ULEB128(data
);
2317 ML_(symerr
)(di
, True
,
2318 "Unknown directory_index value FORM in .debug_line");
2325 void skip_line_form (struct _DebugInfo
* di
, Bool is_dw64
,
2326 Cursor
*d
, const UInt form
)
2329 case DW_FORM_block
: {
2330 ULong len
= get_ULEB128(d
);
2331 advance_position_of_Cursor (d
, len
);
2334 case DW_FORM_block1
: {
2335 UChar len
= get_UChar(d
);
2336 advance_position_of_Cursor (d
, len
);
2339 case DW_FORM_block2
: {
2340 UShort len
= get_UShort(d
);
2341 advance_position_of_Cursor (d
, len
);
2344 case DW_FORM_block4
: {
2345 UInt len
= get_UInt(d
);
2346 advance_position_of_Cursor (d
, len
);
2351 advance_position_of_Cursor (d
, 1);
2354 advance_position_of_Cursor (d
, 2);
2357 advance_position_of_Cursor (d
, 4);
2360 advance_position_of_Cursor (d
, 8);
2362 case DW_FORM_data16
:
2363 advance_position_of_Cursor (d
, 16);
2365 case DW_FORM_string
:
2366 (void)get_AsciiZ (d
);
2369 case DW_FORM_line_strp
:
2370 case DW_FORM_sec_offset
:
2371 advance_position_of_Cursor (d
, is_dw64
? 8 : 4);
2374 (void)get_ULEB128(d
);
2377 (void)get_SLEB128(d
);
2380 ML_(symerr
)(di
, True
, "Unknown FORM in .debug_line");
2385 /* Returns an xarray* of directory names (indexed by the dwarf dirname
2387 If 'compdir' is NULL, entry [0] will be set to "."
2388 otherwise entry [0] is set to compdir.
2389 Entry [0] basically means "the current directory of the compilation",
2390 whatever that means, according to the DWARF3 spec.
2391 FIXME??? readdwarf3.c/readdwarf.c have a lot of duplicated code */
2393 XArray
* read_dirname_xa (DebugInfo
* di
, UShort version
, const HChar
*compdir
,
2394 Cursor
*c
, const CUConst
*cc
,
2397 XArray
* dirname_xa
; /* xarray of HChar* dirname */
2398 const HChar
* dirname
;
2401 dirname_xa
= VG_(newXA
) (ML_(dinfo_zalloc
), "di.rdxa.1", ML_(dinfo_free
),
2404 if (compdir
== NULL
) {
2409 compdir_len
= VG_(strlen
)(compdir
);
2412 /* For version 5, the compdir is the first (zero) entry. */
2414 VG_(addToXA
) (dirname_xa
, &dirname
);
2417 TRACE_D3("The Directory Table%s\n",
2418 peek_UChar(c
) == 0 ? " is empty." : ":" );
2420 while (peek_UChar(c
) != 0) {
2422 DiCursor cur
= get_AsciiZ(c
);
2423 HChar
* data_str
= ML_(cur_read_strdup
)( cur
, "dirname_xa.1" );
2424 TRACE_D3(" %s\n", data_str
);
2426 /* If data_str[0] is '/', then 'data' is an absolute path and we
2427 don't mess with it. Otherwise, construct the
2428 path 'compdir' ++ "/" ++ 'data'. */
2430 if (data_str
[0] != '/'
2431 /* not an absolute path */
2433 /* actually got something sensible for compdir */
2436 SizeT len
= compdir_len
+ 1 + VG_(strlen
)(data_str
);
2437 HChar
*buf
= ML_(dinfo_zalloc
)("dirname_xa.2", len
+ 1);
2439 VG_(strcpy
)(buf
, compdir
);
2440 VG_(strcat
)(buf
, "/");
2441 VG_(strcat
)(buf
, data_str
);
2443 dirname
= ML_(addStr
)(di
, buf
, len
);
2444 VG_(addToXA
) (dirname_xa
, &dirname
);
2445 if (0) VG_(printf
)("rel path %s\n", buf
);
2446 ML_(dinfo_free
)(buf
);
2448 /* just use 'data'. */
2449 dirname
= ML_(addStr
)(di
,data_str
,-1);
2450 VG_(addToXA
) (dirname_xa
, &dirname
);
2451 if (0) VG_(printf
)("abs path %s\n", data_str
);
2454 ML_(dinfo_free
)(data_str
);
2459 UInt directories_count
;
2460 UChar directory_entry_format_count
;
2462 DiSlice debugstr_img
= cc
->escn_debug_str
;
2463 DiSlice debuglinestr_img
= cc
->escn_debug_line_str
;
2465 directory_entry_format_count
= get_UChar(c
);
2466 for (n
= 0; n
< directory_entry_format_count
; n
++) {
2467 UInt lnct
= get_ULEB128(c
);
2468 UInt form
= get_ULEB128(c
);
2469 if (lnct
== DW_LNCT_path
)
2473 directories_count
= get_ULEB128(c
);
2474 TRACE_D3("The Directory Table%s\n",
2475 directories_count
== 0 ? " is empty." : ":" );
2477 for (n
= 0; n
< directories_count
; n
++) {
2479 for (f
= 0; f
< directory_entry_format_count
; f
++) {
2480 UInt form
= forms
[f
];
2482 HChar
*data_str
= get_line_str (di
, cc
->is_dw64
, c
, form
,
2485 TRACE_D3(" %s\n", data_str
);
2487 /* If data_str[0] is '/', then 'data' is an absolute path and we
2488 don't mess with it. Otherwise, construct the
2489 path 'compdir' ++ "/" ++ 'data'. */
2491 if (data_str
[0] != '/'
2492 /* not an absolute path */
2494 /* actually got something sensible for compdir */
2497 SizeT len
= compdir_len
+ 1 + VG_(strlen
)(data_str
);
2498 HChar
*buf
= ML_(dinfo_zalloc
)("dirname_xa.2", len
+ 1);
2500 VG_(strcpy
)(buf
, compdir
);
2501 VG_(strcat
)(buf
, "/");
2502 VG_(strcat
)(buf
, data_str
);
2504 dirname
= ML_(addStr
)(di
, buf
, len
);
2505 VG_(addToXA
) (dirname_xa
, &dirname
);
2506 if (0) VG_(printf
)("rel path %s\n", buf
);
2507 ML_(dinfo_free
)(buf
);
2509 /* just use 'data'. */
2510 dirname
= ML_(addStr
)(di
,data_str
,-1);
2511 VG_(addToXA
) (dirname_xa
, &dirname
);
2512 if (0) VG_(printf
)("abs path %s\n", data_str
);
2515 ML_(dinfo_free
)(data_str
);
2517 skip_line_form (di
, cc
->is_dw64
, c
, form
);
2525 if (version
< 5 && get_UChar (c
) != 0) {
2526 ML_(symerr
)(NULL
, True
,
2527 "could not get NUL at end of DWARF directory table");
2528 VG_(deleteXA
)(dirname_xa
);
2536 void read_filename_table( /*MOD*/XArray
* /* of UInt* */ fndn_ix_Table
,
2537 const HChar
* compdir
,
2538 const CUConst
* cc
, ULong debug_line_offset
,
2547 XArray
* dirname_xa
; /* xarray of HChar* dirname */
2548 ULong dir_xa_ix
; /* Index in dirname_xa, as read from dwarf info. */
2549 const HChar
* dirname
;
2552 vg_assert(fndn_ix_Table
&& cc
&& cc
->barf
);
2553 if (!ML_(sli_is_valid
)(cc
->escn_debug_line
)
2554 || cc
->escn_debug_line
.szB
<= debug_line_offset
) {
2555 cc
->barf("read_filename_table: .debug_line is missing?");
2558 init_Cursor( &c
, cc
->escn_debug_line
, debug_line_offset
, cc
->barf
,
2559 "Overrun whilst reading .debug_line section(1)" );
2562 get_Initial_Length( &is_dw64
, &c
,
2563 "read_filename_table: invalid initial-length field" );
2564 version
= get_UShort( &c
);
2565 if (version
!= 2 && version
!= 3 && version
!= 4 && version
!= 5)
2566 cc
->barf("read_filename_table: Only DWARF version 2, 3, 4 and 5 "
2567 "line info is currently supported.");
2569 /* addrs_size = */ get_UChar( &c
);
2570 /* seg_size = */ get_UChar( &c
);
2572 /*header_length = (ULong)*/ get_Dwarfish_UWord( &c
, is_dw64
);
2573 /*minimum_instruction_length = */ get_UChar( &c
);
2575 /*maximum_operations_per_insn = */ get_UChar( &c
);
2576 /*default_is_stmt = */ get_UChar( &c
);
2577 /*line_base = (Char)*/ get_UChar( &c
);
2578 /*line_range = */ get_UChar( &c
);
2579 opcode_base
= get_UChar( &c
);
2580 /* skip over "standard_opcode_lengths" */
2581 for (i
= 1; i
< (Word
)opcode_base
; i
++)
2582 (void)get_UChar( &c
);
2584 dirname_xa
= read_dirname_xa(cc
->di
, version
, compdir
, &c
, cc
, td3
);
2586 /* Read and record the file names table */
2587 vg_assert( VG_(sizeXA
)( fndn_ix_Table
) == 0 );
2589 /* Add a dummy index-zero entry. DWARF3 numbers its files
2590 from 1, for some reason. */
2591 fndn_ix
= ML_(addFnDn
) ( cc
->di
, "<unknown_file>", NULL
);
2592 VG_(addToXA
)( fndn_ix_Table
, &fndn_ix
);
2593 while (peek_UChar(&c
) != 0) {
2594 DiCursor cur
= get_AsciiZ(&c
);
2595 str
= ML_(addStrFromCursor
)( cc
->di
, cur
);
2596 dir_xa_ix
= get_ULEB128( &c
);
2597 if (dirname_xa
!= NULL
&& dir_xa_ix
< VG_(sizeXA
) (dirname_xa
))
2598 dirname
= *(HChar
**)VG_(indexXA
) ( dirname_xa
, dir_xa_ix
);
2601 fndn_ix
= ML_(addFnDn
)( cc
->di
, str
, dirname
);
2602 TRACE_D3(" read_filename_table: %ld fndn_ix %u %s %s\n",
2603 VG_(sizeXA
)(fndn_ix_Table
), fndn_ix
,
2605 VG_(addToXA
)( fndn_ix_Table
, &fndn_ix
);
2606 (void)get_ULEB128( &c
); /* skip last mod time */
2607 (void)get_ULEB128( &c
); /* file size */
2611 UChar p_ndx
= 0, d_ndx
= 0;
2612 UInt file_names_count
;
2613 UChar file_names_entry_format_count
;
2615 DiSlice debugstr_img
= cc
->escn_debug_str
;
2616 DiSlice debuglinestr_img
= cc
->escn_debug_line_str
;
2617 file_names_entry_format_count
= get_UChar( &c
);
2618 for (n
= 0; n
< file_names_entry_format_count
; n
++) {
2619 UInt lnct
= get_ULEB128( &c
);
2620 UInt form
= get_ULEB128( &c
);
2621 if (lnct
== DW_LNCT_path
)
2623 if (lnct
== DW_LNCT_directory_index
)
2627 file_names_count
= get_ULEB128( &c
);
2628 for (n
= 0; n
< file_names_count
; n
++) {
2632 for (f
= 0; f
< file_names_entry_format_count
; f
++) {
2633 UInt form
= forms
[f
];
2635 str
= get_line_str (cc
->di
, cc
->is_dw64
, &c
, form
,
2636 debugstr_img
, debuglinestr_img
);
2637 else if (f
== d_ndx
)
2638 dir_xa_ix
= get_line_ndx (cc
->di
, &c
, form
);
2640 skip_line_form (cc
->di
, cc
->is_dw64
, &c
, form
);
2643 if (dirname_xa
!= NULL
2644 && dir_xa_ix
>= 0 && dir_xa_ix
< VG_(sizeXA
) (dirname_xa
))
2645 dirname
= *(HChar
**)VG_(indexXA
) ( dirname_xa
, dir_xa_ix
);
2648 fndn_ix
= ML_(addFnDn
)( cc
->di
, str
, dirname
);
2649 TRACE_D3(" read_filename_table: %ld fndn_ix %u %s %s\n",
2650 VG_(sizeXA
)(fndn_ix_Table
), fndn_ix
,
2652 VG_(addToXA
)( fndn_ix_Table
, &fndn_ix
);
2655 /* We're done! The rest of it is not interesting. */
2656 if (dirname_xa
!= NULL
)
2657 VG_(deleteXA
)(dirname_xa
);
2660 /* setup_cu_svma to be called when a cu is found at level 0,
2661 to establish the cu_svma. */
2662 static void setup_cu_svma(CUConst
* cc
, Bool have_lo
, Addr ip_lo
, Bool td3
)
2665 /* We have potentially more than one type of parser parsing the
2666 dwarf information. At least currently, each parser establishes
2667 the cu_svma. So, in case cu_svma_known, we check that the same
2668 result is obtained by the 2nd parsing of the cu.
2670 Alternatively, we could reset cu_svma_known after each parsing
2671 and then check that we only see a single DW_TAG_compile_unit DIE
2672 at level 0, DWARF3 only allows exactly one top level DIE per
2678 /* Now, it may be that this DIE doesn't tell us the CU's
2679 SVMA, by way of not having a DW_AT_low_pc. That's OK --
2680 the CU doesn't *have* to have its SVMA specified.
2682 But as per last para D3 spec sec 3.1.1 ("Normal and
2683 Partial Compilation Unit Entries", "If the base address
2684 (viz, the SVMA) is undefined, then any DWARF entry of
2685 structure defined interms of the base address of that
2686 compilation unit is not valid.". So that means, if whilst
2687 processing the children of this top level DIE (or their
2688 children, etc) we see a DW_AT_range, and cu_svma_known is
2689 False, then the DIE that contains it is (per the spec)
2690 invalid, and we can legitimately stop and complain. */
2691 /* .. whereas The Reality is, simply assume the SVMA is zero
2692 if it isn't specified. */
2696 if (cc
->cu_svma_known
) {
2697 vg_assert (cu_svma
== cc
->cu_svma
);
2699 cc
->cu_svma_known
= True
;
2700 cc
->cu_svma
= cu_svma
;
2702 TRACE_D3("setup_cu_svma: acquire CU_SVMA of %p\n", (void*) cc
->cu_svma
);
2706 /* Setup info from DW_AT_addr_base, DW_AT_str_offsets_base, DW_AT_rnglists_base
2707 and DW_AT_loclists_base. This needs to be done early, because other DW_AT_*
2708 info may depend on it. */
2709 static void setup_cu_bases(CUConst
* cc
, Cursor
* c_die
, const g_abbv
* abbv
)
2714 if(cc
->cu_has_addr_base
&& cc
->cu_has_str_offsets_base
2715 && cc
->cu_has_rnglists_base
&& cc
->cu_has_loclists_base
)
2717 saved_c_pos
= get_position_of_Cursor(c_die
);
2719 while (!cc
->cu_has_addr_base
|| !cc
->cu_has_str_offsets_base
2720 || !cc
->cu_has_rnglists_base
|| !cc
->cu_has_loclists_base
) {
2721 DW_AT attr
= (DW_AT
) abbv
->nf
[nf_i
].at_name
;
2722 DW_FORM form
= (DW_FORM
)abbv
->nf
[nf_i
].at_form
;
2723 const name_form
*nf
= &abbv
->nf
[nf_i
];
2724 if (attr
== 0 && form
== 0)
2726 if (attr
!= DW_AT_addr_base
&& attr
!= DW_AT_str_offsets_base
2727 && attr
!= DW_AT_rnglists_base
&& attr
!= DW_AT_loclists_base
) {
2728 const UInt form_szB
= get_Form_szB (cc
, form
);
2729 if (form_szB
== VARSZ_FORM
) {
2730 if(form
== DW_FORM_addrx
|| form
== DW_FORM_strx
2731 || form
== DW_FORM_rnglistx
|| form
== DW_FORM_loclistx
) {
2732 /* Skip without interpreting them, they depend on *_base. */
2733 (void) get_ULEB128(c_die
);
2735 /* Need to read the contents of this one to skip it. */
2736 get_Form_contents( &cts
, cc
, c_die
, False
/*td3*/,
2740 /* Skip without even reading it, as it may depend on *_base. */
2741 advance_position_of_Cursor (c_die
, form_szB
);
2746 get_Form_contents( &cts
, cc
, c_die
, False
/*td3*/, nf
);
2747 if (attr
== DW_AT_addr_base
&& cts
.szB
> 0) {
2748 Addr addr_base
= cts
.u
.val
;
2749 if (cc
->cu_has_addr_base
)
2750 vg_assert (addr_base
== cc
->cu_addr_base
);
2752 cc
->cu_has_addr_base
= True
;
2753 cc
->cu_addr_base
= addr_base
;
2756 if (attr
== DW_AT_str_offsets_base
&& cts
.szB
> 0) {
2757 Addr str_offsets_base
= cts
.u
.val
;
2758 if (cc
->cu_has_str_offsets_base
)
2759 vg_assert (str_offsets_base
== cc
->cu_str_offsets_base
);
2761 cc
->cu_has_str_offsets_base
= True
;
2762 cc
->cu_str_offsets_base
= str_offsets_base
;
2765 if (attr
== DW_AT_rnglists_base
&& cts
.szB
> 0) {
2766 Addr rnglists_base
= cts
.u
.val
;
2767 if (cc
->cu_has_rnglists_base
)
2768 vg_assert (rnglists_base
== cc
->cu_rnglists_base
);
2770 cc
->cu_has_rnglists_base
= True
;
2771 cc
->cu_rnglists_base
= rnglists_base
;
2774 if (attr
== DW_AT_loclists_base
&& cts
.szB
> 0) {
2775 Addr loclists_base
= cts
.u
.val
;
2776 if (cc
->cu_has_loclists_base
)
2777 vg_assert (loclists_base
== cc
->cu_loclists_base
);
2779 cc
->cu_has_loclists_base
= True
;
2780 cc
->cu_loclists_base
= loclists_base
;
2785 set_position_of_Cursor(c_die
, saved_c_pos
);
2788 static void trace_DIE(
2792 UWord saved_die_c_offset
,
2800 Bool debug_types_flag
;
2803 UWord check_sibling
= 0;
2805 posn
= uncook_die( cc
, posn
, &debug_types_flag
, &alt_flag
);
2807 debug_types_flag
? cc
->escn_debug_types
:
2808 alt_flag
? cc
->escn_debug_info_alt
: cc
->escn_debug_info
,
2809 saved_die_c_offset
, cc
->barf
,
2810 "Overrun trace_DIE");
2812 VG_(printf
)(" <%d><%lx>: Abbrev Number: %llu (%s)%s%s\n",
2813 level
, posn
, (ULong
) abbv
->abbv_code
, ML_(pp_DW_TAG
)( dtag
),
2814 debug_types_flag
? " (in .debug_types)" : "",
2815 alt_flag
? " (in alternate .debug_info)" : "");
2818 DW_AT attr
= (DW_AT
) abbv
->nf
[nf_i
].at_name
;
2819 DW_FORM form
= (DW_FORM
)abbv
->nf
[nf_i
].at_form
;
2820 const name_form
*nf
= &abbv
->nf
[nf_i
];
2822 if (attr
== 0 && form
== 0) break;
2823 VG_(printf
)(" %-18s: ", ML_(pp_DW_AT
)(attr
));
2824 /* Get the form contents, so as to print them */
2825 get_Form_contents( &cts
, cc
, &c
, True
, nf
);
2826 if (attr
== DW_AT_sibling
&& cts
.szB
> 0) {
2827 sibling
= cts
.u
.val
;
2829 VG_(printf
)("\t\n");
2832 /* Verify that skipping a DIE gives the same displacement as
2833 tracing (i.e. reading) a DIE. If there is an inconsistency in
2834 the nr of bytes read by get_Form_contents and get_Form_szB, this
2835 should be detected by the below. Using --trace-symtab=yes
2836 --read-var-info=yes will ensure all DIEs are systematically
2838 skip_DIE (&check_sibling
, &check_skip
, abbv
, cc
);
2839 vg_assert (check_sibling
== sibling
);
2840 vg_assert (get_position_of_Cursor (&check_skip
)
2841 == get_position_of_Cursor (&c
));
2844 __attribute__((noreturn
))
2845 static void dump_bad_die_and_barf(
2846 const HChar
*whichparser
,
2851 UWord saved_die_c_offset
,
2855 trace_DIE (dtag
, posn
, level
, saved_die_c_offset
, abbv
, cc
);
2856 VG_(printf
)("%s:\n", whichparser
);
2857 cc
->barf("confused by the above DIE");
2860 __attribute__((noinline
))
2861 static void bad_DIE_confusion(int linenr
)
2863 VG_(printf
)("\nparse DIE(readdwarf3.c:%d): confused by:\n", linenr
);
2865 #define goto_bad_DIE do {bad_DIE_confusion(__LINE__); goto bad_DIE;} while (0)
2867 /* Reset the fndn_ix_Table. When we come across the top level DIE for a CU we
2868 will copy all the file names out of the .debug_line img area and use this
2869 table to look up the copies when we later see filename numbers in
2870 DW_TAG_variables etc. The table can be be reused between parsers (var and
2871 inline) and between CUs. So we keep a copy of the last one parsed. Call
2872 reset_fndn_ix_table before reading a new one from a new offset. */
2874 void reset_fndn_ix_table (XArray
** fndn_ix_Table
, ULong
*debug_line_offset
,
2877 vg_assert (new_offset
== -1
2878 || *debug_line_offset
!= new_offset
);
2879 Int size
= *fndn_ix_Table
== NULL
? 0 : VG_(sizeXA
) (*fndn_ix_Table
);
2881 VG_(deleteXA
) (*fndn_ix_Table
);
2882 *fndn_ix_Table
= NULL
;
2884 if (*fndn_ix_Table
== NULL
)
2885 *fndn_ix_Table
= VG_(newXA
)( ML_(dinfo_zalloc
),
2886 "di.readdwarf3.reset_ix_table",
2889 *debug_line_offset
= new_offset
;
2892 __attribute__((noinline
))
2893 static void parse_var_DIE (
2894 /*MOD*/WordFM
* /* of (XArray* of AddrRange, void) */ rangestree
,
2895 /*MOD*/XArray
* /* of TempVar* */ tempvars
,
2896 /*MOD*/XArray
* /* of GExpr* */ gexprs
,
2897 /*MOD*/D3VarParser
* parser
,
2898 XArray
** fndn_ix_Table
,
2899 ULong
*debug_line_offset
,
2912 UWord saved_die_c_offset
= get_position_of_Cursor( c_die
);
2914 varstack_preen( parser
, td3
, level
-1 );
2916 if (dtag
== DW_TAG_compile_unit
2917 || dtag
== DW_TAG_type_unit
2918 || dtag
== DW_TAG_partial_unit
2919 || dtag
== DW_TAG_skeleton_unit
) {
2920 Bool have_lo
= False
;
2921 Bool have_hi1
= False
;
2922 Bool hiIsRelative
= False
;
2923 Bool have_range
= False
;
2927 const HChar
*compdir
= NULL
;
2930 setup_cu_bases(cc
, c_die
, abbv
);
2933 DW_AT attr
= (DW_AT
) abbv
->nf
[nf_i
].at_name
;
2934 DW_FORM form
= (DW_FORM
)abbv
->nf
[nf_i
].at_form
;
2935 const name_form
*nf
= &abbv
->nf
[nf_i
];
2937 if (attr
== 0 && form
== 0) break;
2938 get_Form_contents( &cts
, cc
, c_die
, False
/*td3*/, nf
);
2939 if (attr
== DW_AT_low_pc
&& cts
.szB
> 0) {
2943 if (attr
== DW_AT_high_pc
&& cts
.szB
> 0) {
2946 if (form
!= DW_FORM_addr
)
2947 hiIsRelative
= True
;
2949 if (attr
== DW_AT_ranges
&& cts
.szB
> 0) {
2950 rangeoff
= cts
.u
.val
;
2953 if (attr
== DW_AT_comp_dir
) {
2955 cc
->barf("parse_var_DIE compdir: expecting indirect string");
2956 HChar
*str
= ML_(cur_read_strdup
)( cts
.u
.cur
,
2957 "parse_var_DIE.compdir" );
2958 compdir
= ML_(addStr
)(cc
->di
, str
, -1);
2959 ML_(dinfo_free
) (str
);
2961 if (attr
== DW_AT_stmt_list
&& cts
.szB
> 0) {
2962 if (cts
.u
.val
!= *debug_line_offset
) {
2963 reset_fndn_ix_table( fndn_ix_Table
, debug_line_offset
,
2965 read_filename_table( *fndn_ix_Table
, compdir
,
2966 cc
, cts
.u
.val
, td3
);
2970 if (have_lo
&& have_hi1
&& hiIsRelative
)
2973 /* Now, does this give us an opportunity to find this
2976 setup_cu_svma(cc
, have_lo
, ip_lo
, td3
);
2978 /* Do we have something that looks sane? */
2979 if (have_lo
&& have_hi1
&& (!have_range
)) {
2981 varstack_push( cc
, parser
, td3
,
2982 unitary_range_list(ip_lo
, ip_hi1
- 1),
2984 False
/*isFunc*/, NULL
/*fbGX*/ );
2985 else if (ip_lo
== 0 && ip_hi1
== 0)
2986 /* CU has no code, presumably?
2987 Such situations have been encountered for code
2988 compiled with -ffunction-sections -fdata-sections
2989 and linked with --gc-sections. Completely
2990 eliminated CU gives such 0 lo/hi pc. Similarly
2991 to a CU which has no lo/hi/range pc, we push
2992 an empty range list. */
2993 varstack_push( cc
, parser
, td3
,
2996 False
/*isFunc*/, NULL
/*fbGX*/ );
2998 if ((!have_lo
) && (!have_hi1
) && have_range
) {
2999 varstack_push( cc
, parser
, td3
,
3000 get_range_list( cc
, td3
,
3001 rangeoff
, cc
->cu_svma
),
3003 False
/*isFunc*/, NULL
/*fbGX*/ );
3005 if ((!have_lo
) && (!have_hi1
) && (!have_range
)) {
3006 /* CU has no code, presumably? */
3007 varstack_push( cc
, parser
, td3
,
3010 False
/*isFunc*/, NULL
/*fbGX*/ );
3012 if (have_lo
&& (!have_hi1
) && have_range
&& ip_lo
== 0) {
3013 /* broken DIE created by gcc-4.3.X ? Ignore the
3014 apparently-redundant DW_AT_low_pc and use the DW_AT_ranges
3016 varstack_push( cc
, parser
, td3
,
3017 get_range_list( cc
, td3
,
3018 rangeoff
, cc
->cu_svma
),
3020 False
/*isFunc*/, NULL
/*fbGX*/ );
3022 if (0) VG_(printf
)("I got hlo %d hhi1 %d hrange %d\n",
3023 (Int
)have_lo
, (Int
)have_hi1
, (Int
)have_range
);
3028 if (dtag
== DW_TAG_lexical_block
|| dtag
== DW_TAG_subprogram
) {
3029 Bool have_lo
= False
;
3030 Bool have_hi1
= False
;
3031 Bool have_range
= False
;
3032 Bool hiIsRelative
= False
;
3036 Bool isFunc
= dtag
== DW_TAG_subprogram
;
3040 DW_AT attr
= (DW_AT
) abbv
->nf
[nf_i
].at_name
;
3041 DW_FORM form
= (DW_FORM
)abbv
->nf
[nf_i
].at_form
;
3042 const name_form
*nf
= &abbv
->nf
[nf_i
];
3044 if (attr
== 0 && form
== 0) break;
3045 get_Form_contents( &cts
, cc
, c_die
, False
/*td3*/, nf
);
3046 if (attr
== DW_AT_low_pc
&& cts
.szB
> 0) {
3050 if (attr
== DW_AT_high_pc
&& cts
.szB
> 0) {
3053 if (form
!= DW_FORM_addr
)
3054 hiIsRelative
= True
;
3056 if (attr
== DW_AT_ranges
&& cts
.szB
> 0) {
3057 rangeoff
= cts
.u
.val
;
3061 && attr
== DW_AT_frame_base
3062 && cts
.szB
!= 0 /* either scalar or nonempty block */) {
3063 fbGX
= get_GX( cc
, False
/*td3*/, &cts
);
3065 VG_(addToXA
)(gexprs
, &fbGX
);
3068 if (have_lo
&& have_hi1
&& hiIsRelative
)
3070 /* Do we have something that looks sane? */
3071 if (dtag
== DW_TAG_subprogram
3072 && (!have_lo
) && (!have_hi1
) && (!have_range
)) {
3073 /* This is legit - ignore it. Sec 3.3.3: "A subroutine entry
3074 representing a subroutine declaration that is not also a
3075 definition does not have code address or range
3078 if (dtag
== DW_TAG_lexical_block
3079 && (!have_lo
) && (!have_hi1
) && (!have_range
)) {
3080 /* I believe this is legit, and means the lexical block
3081 contains no insns (whatever that might mean). Ignore. */
3083 if (have_lo
&& have_hi1
&& (!have_range
)) {
3084 /* This scope supplies just a single address range. */
3086 varstack_push( cc
, parser
, td3
,
3087 unitary_range_list(ip_lo
, ip_hi1
- 1),
3088 level
, isFunc
, fbGX
);
3090 if ((!have_lo
) && (!have_hi1
) && have_range
) {
3091 /* This scope supplies multiple address ranges via the use of
3093 varstack_push( cc
, parser
, td3
,
3094 get_range_list( cc
, td3
,
3095 rangeoff
, cc
->cu_svma
),
3096 level
, isFunc
, fbGX
);
3098 if (have_lo
&& (!have_hi1
) && (!have_range
)) {
3099 /* This scope is bogus. The D3 spec sec 3.4 (Lexical Block
3100 Entries) says fairly clearly that a scope must have either
3101 _range or (_low_pc and _high_pc). */
3102 /* The spec is a bit ambiguous though. Perhaps a single byte
3103 range is intended? See sec 2.17 (Code Addresses And Ranges) */
3104 /* This case is here because icc9 produced this:
3105 <2><13bd>: DW_TAG_lexical_block
3106 DW_AT_decl_line : 5229
3107 DW_AT_decl_column : 37
3109 DW_AT_low_pc : 0x401b03
3111 /* Ignore (seems safe than pushing a single byte range) */
3116 if (dtag
== DW_TAG_variable
|| dtag
== DW_TAG_formal_parameter
) {
3117 const HChar
* name
= NULL
;
3118 UWord typeR
= D3_INVALID_CUOFF
;
3119 Bool global
= False
;
3120 GExpr
* gexpr
= NULL
;
3121 UWord abs_ori
= (UWord
)D3_INVALID_CUOFF
;
3126 DW_AT attr
= (DW_AT
) abbv
->nf
[nf_i
].at_name
;
3127 DW_FORM form
= (DW_FORM
)abbv
->nf
[nf_i
].at_form
;
3128 const name_form
*nf
= &abbv
->nf
[nf_i
];
3130 if (attr
== 0 && form
== 0) break;
3131 get_Form_contents( &cts
, cc
, c_die
, False
/*td3*/, nf
);
3132 if (attr
== DW_AT_name
&& cts
.szB
< 0) {
3133 name
= ML_(addStrFromCursor
)( cc
->di
, cts
.u
.cur
);
3135 if (attr
== DW_AT_location
3136 && cts
.szB
!= 0 /* either scalar or nonempty block */) {
3137 gexpr
= get_GX( cc
, False
/*td3*/, &cts
);
3139 VG_(addToXA
)(gexprs
, &gexpr
);
3141 if (attr
== DW_AT_type
&& cts
.szB
> 0) {
3142 typeR
= cook_die_using_form( cc
, cts
.u
.val
, form
);
3144 if (attr
== DW_AT_external
&& cts
.szB
> 0 && cts
.u
.val
> 0) {
3147 if (attr
== DW_AT_abstract_origin
&& cts
.szB
> 0) {
3148 abs_ori
= (UWord
)cts
.u
.val
;
3150 if (attr
== DW_AT_declaration
&& cts
.szB
> 0 && cts
.u
.val
> 0) {
3151 /*declaration = True;*/
3153 if (attr
== DW_AT_decl_line
&& cts
.szB
> 0) {
3154 lineNo
= (Int
)cts
.u
.val
;
3156 if (attr
== DW_AT_decl_file
&& cts
.szB
> 0) {
3157 Int ftabIx
= (Int
)cts
.u
.val
;
3159 && ftabIx
< VG_(sizeXA
)( *fndn_ix_Table
)) {
3160 fndn_ix
= *(UInt
*)VG_(indexXA
)( *fndn_ix_Table
, ftabIx
);
3162 if (0) VG_(printf
)("XXX filename fndn_ix = %u %s\n", fndn_ix
,
3163 ML_(fndn_ix2filename
) (cc
->di
, fndn_ix
));
3166 if (!global
&& dtag
== DW_TAG_variable
&& level
== 1) {
3167 /* Case of a static variable. It is better to declare
3168 it global as the variable is not really related to
3169 a PC range, as its address can be used by program
3170 counters outside of the ranges where it is visible . */
3174 /* We'll collect it under if one of the following three
3176 (1) has location and type -> completed
3177 (2) has type only -> is an abstract instance
3178 (3) has location and abs_ori -> is a concrete instance
3179 Name, fndn_ix and line number are all optional frills.
3181 if ( /* 1 */ (gexpr
&& typeR
!= D3_INVALID_CUOFF
)
3182 /* 2 */ || (typeR
!= D3_INVALID_CUOFF
)
3183 /* 3 */ || (gexpr
&& abs_ori
!= (UWord
)D3_INVALID_CUOFF
) ) {
3185 /* Add this variable to the list of interesting looking
3186 variables. Crucially, note along with it the address
3187 range(s) associated with the variable, which for locals
3188 will be the address ranges at the top of the varparser's
3192 const XArray
* /* of AddrRange */ xa
;
3194 /* Stack can't be empty; we put a dummy entry on it for the
3195 entire address range before starting with the DIEs for
3197 vg_assert(parser
->sp
>= 0);
3199 /* If this is a local variable (non-global), try to find
3200 the GExpr for the DW_AT_frame_base of the containing
3201 function. It should have been pushed on the stack at the
3202 time we encountered its DW_TAG_subprogram DIE, so the way
3203 to find it is to scan back down the stack looking for it.
3204 If there isn't an enclosing stack entry marked 'isFunc'
3205 then we must be seeing variable or formal param DIEs
3206 outside of a function, so we deem the Dwarf to be
3207 malformed if that happens. Note that the fbGX may be NULL
3208 if the containing DT_TAG_subprogram didn't supply a
3209 DW_AT_frame_base -- that's OK, but there must actually be
3210 a containing DW_TAG_subprogram. */
3213 for (i
= parser
->sp
; i
>= 0; i
--) {
3214 if (parser
->isFunc
[i
]) {
3215 fbGX
= parser
->fbGX
[i
];
3221 if (0 && VG_(clo_verbosity
) >= 0) {
3222 VG_(message
)(Vg_DebugMsg
,
3223 "warning: parse_var_DIE: non-global variable "
3224 "outside DW_TAG_subprogram\n");
3227 /* This seems to happen a lot. Just ignore it -- if,
3228 when we come to evaluation of the location (guarded)
3229 expression, it requires a frame base value, and
3230 there's no expression for that, then evaluation as a
3231 whole will fail. Harmless - a bit of a waste of
3232 cycles but nothing more. */
3236 /* re "global ? 0 : parser->sp" (twice), if the var is
3237 marked 'global' then we must put it at the global scope,
3238 as only the global scope (level 0) covers the entire PC
3239 address space. It is asserted elsewhere that level 0
3240 always covers the entire address space. */
3241 xa
= parser
->ranges
[global
? 0 : parser
->sp
];
3242 nRanges
= VG_(sizeXA
)(xa
);
3243 vg_assert(nRanges
>= 0);
3245 tv
= ML_(dinfo_zalloc
)( "di.readdwarf3.pvD.1", sizeof(TempVar
) );
3247 tv
->level
= global
? 0 : parser
->sp
;
3251 tv
->fndn_ix
= fndn_ix
;
3254 tv
->absOri
= abs_ori
;
3256 /* See explanation on definition of type TempVar for the
3257 reason for this elaboration. */
3258 tv
->nRanges
= nRanges
;
3263 AddrRange
* range
= VG_(indexXA
)(xa
, 0);
3264 tv
->rngOneMin
= range
->aMin
;
3265 tv
->rngOneMax
= range
->aMax
;
3267 else if (nRanges
> 1) {
3268 /* See if we already have a range list which is
3269 structurally identical. If so, use that; if not, clone
3270 this one, and add it to our collection. */
3272 if (VG_(lookupFM
)( rangestree
, &keyW
, &valW
, (UWord
)xa
)) {
3273 XArray
* old
= (XArray
*)keyW
;
3274 vg_assert(valW
== 0);
3275 vg_assert(old
!= xa
);
3278 XArray
* cloned
= VG_(cloneXA
)( "di.readdwarf3.pvD.2", xa
);
3279 tv
->rngMany
= cloned
;
3280 VG_(addToFM
)( rangestree
, (UWord
)cloned
, 0 );
3284 VG_(addToXA
)( tempvars
, &tv
);
3286 TRACE_D3(" Recording this variable, with %ld PC range(s)\n",
3288 /* collect stats on how effective the ->ranges special
3291 static Int ntot
=0, ngt
=0;
3293 if (tv
->rngMany
) ngt
++;
3294 if (0 == (ntot
% 100000))
3295 VG_(printf
)("XXXX %d tot, %d cloned\n", ntot
, ngt
);
3300 /* Here are some other weird cases seen in the wild:
3302 We have a variable with a name and a type, but no
3303 location. I guess that's a sign that it has been
3304 optimised away. Ignore it. Here's an example:
3306 static Int lc_compar(void* n1, void* n2) {
3307 MC_Chunk* mc1 = *(MC_Chunk**)n1;
3308 MC_Chunk* mc2 = *(MC_Chunk**)n2;
3309 return (mc1->data < mc2->data ? -1 : 1);
3312 Both mc1 and mc2 are like this
3313 <2><5bc>: Abbrev Number: 21 (DW_TAG_variable)
3316 DW_AT_decl_line : 216
3319 whereas n1 and n2 do have locations specified.
3321 ---------------------------------------------
3323 We see a DW_TAG_formal_parameter with a type, but
3324 no name and no location. It's probably part of a function type
3325 construction, thusly, hence ignore it:
3326 <1><2b4>: Abbrev Number: 12 (DW_TAG_subroutine_type)
3327 DW_AT_sibling : <2c9>
3328 DW_AT_prototyped : 1
3330 <2><2be>: Abbrev Number: 13 (DW_TAG_formal_parameter)
3332 <2><2c3>: Abbrev Number: 13 (DW_TAG_formal_parameter)
3335 ---------------------------------------------
3337 Is very minimal, like this:
3338 <4><81d>: Abbrev Number: 44 (DW_TAG_variable)
3339 DW_AT_abstract_origin: <7ba>
3340 What that signifies I have no idea. Ignore.
3342 ----------------------------------------------
3344 Is very minimal, like this:
3345 <200f>: DW_TAG_formal_parameter
3346 DW_AT_abstract_ori: <1f4c>
3347 DW_AT_location : 13440
3348 What that signifies I have no idea. Ignore.
3349 It might be significant, though: the variable at least
3350 has a location and so might exist somewhere.
3351 Maybe we should handle this.
3353 ---------------------------------------------
3355 <22407>: DW_TAG_variable
3356 DW_AT_name : (indirect string, offset: 0x6579):
3357 vgPlain_trampoline_stuff_start
3358 DW_AT_decl_file : 29
3359 DW_AT_decl_line : 56
3361 DW_AT_declaration : 1
3363 Nameless and typeless variable that has a location? Who
3365 <2><3d178>: Abbrev Number: 22 (DW_TAG_variable)
3366 DW_AT_location : 9 byte block: 3 c0 c7 13 38 0 0 0 0
3367 (DW_OP_addr: 3813c7c0)
3369 No, really. Check it out. gcc is quite simply borked.
3370 <3><168cc>: Abbrev Number: 141 (DW_TAG_variable)
3371 // followed by no attributes, and the next DIE is a sibling,
3378 dump_bad_die_and_barf("parse_var_DIE", dtag
, posn
, level
,
3379 c_die
, saved_die_c_offset
,
3387 UWord sibling
; // sibling of the last read DIE (if it has a sibling).
3391 /* Return the function name corresponding to absori.
3393 absori is a 'cooked' reference to a DIE, i.e. absori can be either
3394 in cc->escn_debug_info or in cc->escn_debug_info_alt.
3395 get_inlFnName will uncook absori.
3397 The returned value is a (permanent) string in DebugInfo's .strchunks.
3399 LIMITATION: absori must point in the CU of cc. If absori points
3400 in another CU, returns "UnknownInlinedFun".
3402 Here are the problems to retrieve the fun name if absori is in
3403 another CU: the DIE reading code cannot properly extract data from
3404 another CU, as the abbv code retrieved in the other CU cannot be
3405 translated in an abbreviation. Reading data from the alternate debug
3406 info also gives problems as the string reference is also in the alternate
3407 file, but when reading the alt DIE, the string form is a 'local' string,
3408 but cannot be read in the current CU, but must be read in the alt CU.
3409 See bug 338803 comment#3 and attachment for a failed attempt to handle
3410 these problems (failed because with the patch, only one alt abbrev hash
3411 table is kept, while we must handle all abbreviations in all CUs
3412 referenced by an absori (being a reference to an alt CU, or a previous
3413 or following CU). */
3414 static const HChar
* get_inlFnName (Int absori
, CUConst
* cc
, Bool td3
)
3418 ULong atag
, abbv_code
;
3421 Bool type_flag
, alt_flag
;
3422 const HChar
*ret
= NULL
;
3426 /* Some inlined subroutine call dwarf entries do not have the abstract
3427 origin attribute, resulting in absori being 0 (see callers of
3428 get_inlFnName). This is observed at least with gcc 6.3.0 when compiling
3429 valgrind with lto. So, in case we have a 0 absori, do not report an
3430 error, instead, rather return an unknown inlined function. */
3432 static Bool absori0_reported
= False
;
3433 if (!absori0_reported
&& VG_(clo_verbosity
) > 1) {
3434 VG_(message
)(Vg_DebugMsg
,
3435 "Warning: inlined fn name without absori\n"
3436 "is shown as UnknownInlinedFun\n");
3437 absori0_reported
= True
;
3439 TRACE_D3(" <get_inlFnName>: absori is not set");
3440 return ML_(addStr
)(cc
->di
, "UnknownInlinedFun", -1);
3443 posn
= uncook_die( cc
, absori
, &type_flag
, &alt_flag
);
3445 cc
->barf("get_inlFnName: uncooked absori in type debug info");
3447 /* LIMITATION: check we are in the same CU.
3448 If not, return unknown inlined function name. */
3449 /* if crossing between alt debug info<>normal info
3450 or posn not in the cu range,
3451 then it is in another CU. */
3452 if (alt_flag
!= cc
->is_alt_info
3453 || posn
< cc
->cu_start_offset
3454 || posn
>= cc
->cu_start_offset
+ cc
->unit_length
) {
3455 static Bool reported
= False
;
3456 if (!reported
&& VG_(clo_verbosity
) > 1) {
3457 VG_(message
)(Vg_DebugMsg
,
3458 "Warning: cross-CU LIMITATION: some inlined fn names\n"
3459 "might be shown as UnknownInlinedFun\n");
3462 TRACE_D3(" <get_inlFnName><%lx>: cross-CU LIMITATION", posn
);
3463 return ML_(addStr
)(cc
->di
, "UnknownInlinedFun", -1);
3466 init_Cursor (&c
, cc
->escn_debug_info
, posn
, cc
->barf
,
3467 "Overrun get_inlFnName absori");
3469 abbv_code
= get_ULEB128( &c
);
3470 abbv
= get_abbv ( cc
, abbv_code
, td3
);
3472 TRACE_D3(" <get_inlFnName><%lx>: Abbrev Number: %llu (%s)\n",
3473 posn
, abbv_code
, ML_(pp_DW_TAG
)( atag
) );
3476 cc
->barf("get_inlFnName: invalid zero tag on DIE");
3478 has_children
= abbv
->has_children
;
3479 if (has_children
!= DW_children_no
&& has_children
!= DW_children_yes
)
3480 cc
->barf("get_inlFnName: invalid has_children value");
3482 if (atag
!= DW_TAG_subprogram
)
3483 cc
->barf("get_inlFnName: absori not a subprogram");
3487 DW_AT attr
= (DW_AT
) abbv
->nf
[nf_i
].at_name
;
3488 DW_FORM form
= (DW_FORM
)abbv
->nf
[nf_i
].at_form
;
3489 const name_form
*nf
= &abbv
->nf
[nf_i
];
3491 if (attr
== 0 && form
== 0) break;
3492 get_Form_contents( &cts
, cc
, &c
, False
/*td3*/, nf
);
3493 if (attr
== DW_AT_name
) {
3496 cc
->barf("get_inlFnName: expecting indirect string");
3497 fnname
= ML_(cur_read_strdup
)( cts
.u
.cur
,
3498 "get_inlFnName.1" );
3499 ret
= ML_(addStr
)(cc
->di
, fnname
, -1);
3500 ML_(dinfo_free
) (fnname
);
3501 break; /* Name found, get out of the loop, as this has priority over
3502 DW_AT_specification. */
3504 if (attr
== DW_AT_specification
) {
3508 cc
->barf("get_inlFnName: AT specification missing");
3510 /* The recursive call to get_inlFnName will uncook its arg.
3511 So, we need to cook it here, so as to reference the
3512 correct section (e.g. the alt info). */
3513 cdie
= cook_die_using_form(cc
, (UWord
)cts
.u
.val
, form
);
3515 /* hoping that there is no loop */
3516 ret
= get_inlFnName (cdie
, cc
, td3
);
3517 /* Unclear if having both DW_AT_specification and DW_AT_name is
3518 possible but in any case, we do not break here.
3519 If we find later on a DW_AT_name, it will override the name found
3520 in the DW_AT_specification.*/
3527 TRACE_D3("AbsOriFnNameNotFound");
3528 return ML_(addStr
)(cc
->di
, "AbsOriFnNameNotFound", -1);
3532 /* Returns True if the (possibly) childrens of the current DIE are interesting
3533 to parse. Returns False otherwise.
3534 If the current DIE has a sibling, the non interesting children can
3535 maybe be skipped (if the DIE has a DW_AT_sibling). */
3536 __attribute__((noinline
))
3537 static Bool
parse_inl_DIE (
3538 /*MOD*/D3InlParser
* parser
,
3539 XArray
** fndn_ix_Table
,
3540 ULong
*debug_line_offset
,
3553 UWord saved_die_c_offset
= get_position_of_Cursor( c_die
);
3555 /* Get info about DW_TAG_compile_unit and DW_TAG_partial_unit which in theory
3556 could also contain inlined fn calls, if they cover an address range. */
3557 Bool unit_has_addrs
= False
;
3558 if (dtag
== DW_TAG_compile_unit
|| dtag
== DW_TAG_partial_unit
3559 || dtag
== DW_TAG_skeleton_unit
) {
3560 Bool have_lo
= False
;
3562 const HChar
*compdir
= NULL
;
3563 Bool has_stmt_list
= False
;
3564 ULong cu_line_offset
= 0;
3567 setup_cu_bases(cc
, c_die
, abbv
);
3570 DW_AT attr
= (DW_AT
) abbv
->nf
[nf_i
].at_name
;
3571 DW_FORM form
= (DW_FORM
)abbv
->nf
[nf_i
].at_form
;
3572 const name_form
*nf
= &abbv
->nf
[nf_i
];
3574 if (attr
== 0 && form
== 0) break;
3575 get_Form_contents( &cts
, cc
, c_die
, False
/*td3*/, nf
);
3576 if (attr
== DW_AT_low_pc
&& cts
.szB
> 0) {
3579 unit_has_addrs
= True
;
3581 if (attr
== DW_AT_ranges
&& cts
.szB
> 0)
3582 unit_has_addrs
= True
;
3583 if (attr
== DW_AT_comp_dir
) {
3585 cc
->barf("parse_inl_DIE compdir: expecting indirect string");
3586 HChar
*str
= ML_(cur_read_strdup
)( cts
.u
.cur
,
3587 "parse_inl_DIE.compdir" );
3588 compdir
= ML_(addStr
)(cc
->di
, str
, -1);
3589 ML_(dinfo_free
) (str
);
3591 if (attr
== DW_AT_stmt_list
&& cts
.szB
> 0) {
3592 has_stmt_list
= True
;
3593 cu_line_offset
= cts
.u
.val
;
3595 if (attr
== DW_AT_sibling
&& cts
.szB
> 0) {
3596 parser
->sibling
= cts
.u
.val
;
3600 setup_cu_svma (cc
, have_lo
, ip_lo
, td3
);
3601 if (has_stmt_list
&& unit_has_addrs
3602 && *debug_line_offset
!= cu_line_offset
) {
3603 reset_fndn_ix_table ( fndn_ix_Table
, debug_line_offset
,
3605 read_filename_table( *fndn_ix_Table
, compdir
,
3606 cc
, cu_line_offset
, td3
);
3611 if (dtag
== DW_TAG_inlined_subroutine
) {
3612 Bool have_lo
= False
;
3613 Bool have_hi1
= False
;
3614 Bool have_range
= False
;
3615 Bool hiIsRelative
= False
;
3619 UInt caller_fndn_ix
= 0;
3620 Int caller_lineno
= 0;
3621 Int inlinedfn_abstract_origin
= 0;
3622 // 0 will be interpreted as no abstract origin by get_inlFnName
3626 DW_AT attr
= (DW_AT
) abbv
->nf
[nf_i
].at_name
;
3627 DW_FORM form
= (DW_FORM
)abbv
->nf
[nf_i
].at_form
;
3628 const name_form
*nf
= &abbv
->nf
[nf_i
];
3630 if (attr
== 0 && form
== 0) break;
3631 get_Form_contents( &cts
, cc
, c_die
, False
/*td3*/, nf
);
3632 if (attr
== DW_AT_call_file
&& cts
.szB
> 0) {
3633 Int ftabIx
= (Int
)cts
.u
.val
;
3635 && ftabIx
< VG_(sizeXA
)( *fndn_ix_Table
)) {
3636 caller_fndn_ix
= *(UInt
*)
3637 VG_(indexXA
)( *fndn_ix_Table
, ftabIx
);
3639 if (0) VG_(printf
)("XXX caller_fndn_ix = %u %s\n", caller_fndn_ix
,
3640 ML_(fndn_ix2filename
) (cc
->di
, caller_fndn_ix
));
3642 if (attr
== DW_AT_call_line
&& cts
.szB
> 0) {
3643 caller_lineno
= cts
.u
.val
;
3646 if (attr
== DW_AT_abstract_origin
&& cts
.szB
> 0) {
3647 inlinedfn_abstract_origin
3648 = cook_die_using_form (cc
, (UWord
)cts
.u
.val
, form
);
3651 if (attr
== DW_AT_low_pc
&& cts
.szB
> 0) {
3655 if (attr
== DW_AT_high_pc
&& cts
.szB
> 0) {
3658 if (form
!= DW_FORM_addr
)
3659 hiIsRelative
= True
;
3661 if (attr
== DW_AT_ranges
&& cts
.szB
> 0) {
3662 rangeoff
= cts
.u
.val
;
3665 if (attr
== DW_AT_sibling
&& cts
.szB
> 0) {
3666 parser
->sibling
= cts
.u
.val
;
3669 if (have_lo
&& have_hi1
&& hiIsRelative
)
3671 /* Do we have something that looks sane? */
3672 if (dtag
== DW_TAG_inlined_subroutine
3673 && (!have_lo
) && (!have_hi1
) && (!have_range
)) {
3674 /* Seems strange. How can an inlined subroutine have
3678 if (have_lo
&& have_hi1
&& (!have_range
)) {
3679 /* This inlined call is just a single address range. */
3680 if (ip_lo
< ip_hi1
) {
3681 /* Apply text debug biasing */
3682 ip_lo
+= cc
->di
->text_debug_bias
;
3683 ip_hi1
+= cc
->di
->text_debug_bias
;
3684 ML_(addInlInfo
) (cc
->di
,
3686 get_inlFnName (inlinedfn_abstract_origin
, cc
, td3
),
3688 caller_lineno
, level
);
3690 } else if (have_range
) {
3691 /* This inlined call is several address ranges. */
3694 const HChar
*inlfnname
=
3695 get_inlFnName (inlinedfn_abstract_origin
, cc
, td3
);
3697 /* Ranges are biased for the inline info using the same logic
3698 as what is used for biasing ranges for the var info, for which
3699 ranges are read using cc->cu_svma (see parse_var_DIE).
3700 Then text_debug_bias is added when a (non global) var
3701 is recorded (see just before the call to ML_(addVar)) */
3702 ranges
= get_range_list( cc
, td3
,
3703 rangeoff
, cc
->cu_svma
);
3704 for (j
= 0; j
< VG_(sizeXA
)( ranges
); j
++) {
3705 AddrRange
* range
= (AddrRange
*) VG_(indexXA
)( ranges
, j
);
3706 ML_(addInlInfo
) (cc
->di
,
3707 range
->aMin
+ cc
->di
->text_debug_bias
,
3708 range
->aMax
+1 + cc
->di
->text_debug_bias
,
3709 // aMax+1 as range has its last bound included
3710 // while ML_(addInlInfo) expects last bound not
3714 caller_lineno
, level
);
3716 VG_(deleteXA
)( ranges
);
3721 // Only recursively parse the (possible) children for the DIE which
3722 // might maybe contain a DW_TAG_inlined_subroutine:
3723 Bool ret
= (unit_has_addrs
3724 || dtag
== DW_TAG_lexical_block
|| dtag
== DW_TAG_subprogram
3725 || dtag
== DW_TAG_inlined_subroutine
|| dtag
== DW_TAG_namespace
);
3729 dump_bad_die_and_barf("parse_inl_DIE", dtag
, posn
, level
,
3730 c_die
, saved_die_c_offset
,
3737 /*------------------------------------------------------------*/
3739 /*--- Parsing of type-related DIEs ---*/
3741 /*------------------------------------------------------------*/
3745 /* What source language? 'A'=Ada83/95,
3749 Established once per compilation unit. */
3751 /* A stack of types which are currently under construction */
3752 Int sp
; /* [sp] is innermost active entry; sp==-1 for empty
3755 /* Note that the TyEnts in qparentE are temporary copies of the
3756 ones accumulating in the main tyent array. So it is not safe
3757 to free up anything on them when popping them off the stack
3758 (iow, it isn't safe to use TyEnt__make_EMPTY on them). Just
3759 memset them to zero when done. */
3760 TyEnt
*qparentE
; /* parent TyEnts */
3765 /* Completely initialise a type parser object */
3767 type_parser_init ( D3TypeParser
*parser
)
3770 parser
->language
= '?';
3771 parser
->stack_size
= 0;
3772 parser
->qparentE
= NULL
;
3773 parser
->qlevel
= NULL
;
3776 /* Release any memory hanging off a type parser object */
3778 type_parser_release ( D3TypeParser
*parser
)
3780 ML_(dinfo_free
)( parser
->qparentE
);
3781 ML_(dinfo_free
)( parser
->qlevel
);
3784 static void typestack_show ( const D3TypeParser
* parser
, const HChar
* str
)
3787 VG_(printf
)(" typestack (%s) {\n", str
);
3788 for (i
= 0; i
<= parser
->sp
; i
++) {
3789 VG_(printf
)(" [%ld] (level %d): ", i
, parser
->qlevel
[i
]);
3790 ML_(pp_TyEnt
)( &parser
->qparentE
[i
] );
3793 VG_(printf
)(" }\n");
3796 /* Remove from the stack, all entries with .level > 'level' */
3798 void typestack_preen ( D3TypeParser
* parser
, Bool td3
, Int level
)
3800 Bool changed
= False
;
3801 vg_assert(parser
->sp
< parser
->stack_size
);
3803 vg_assert(parser
->sp
>= -1);
3804 if (parser
->sp
== -1) break;
3805 if (parser
->qlevel
[parser
->sp
] <= level
) break;
3807 TRACE_D3("BBBBAAAA typestack_pop [newsp=%d]\n", parser
->sp
-1);
3808 vg_assert(ML_(TyEnt__is_type
)(&parser
->qparentE
[parser
->sp
]));
3813 typestack_show( parser
, "after preen" );
3816 static Bool
typestack_is_empty ( const D3TypeParser
* parser
)
3818 vg_assert(parser
->sp
>= -1 && parser
->sp
< parser
->stack_size
);
3819 return parser
->sp
== -1;
3822 static void typestack_push ( const CUConst
* cc
,
3823 D3TypeParser
* parser
,
3825 const TyEnt
* parentE
, Int level
)
3828 TRACE_D3("BBBBAAAA typestack_push[newsp=%d]: %d %05lx\n",
3829 parser
->sp
+1, level
, parentE
->cuOff
);
3831 /* First we need to zap everything >= 'level', as we are about to
3832 replace any previous entry at 'level', so .. */
3833 typestack_preen(parser
, /*td3*/False
, level
-1);
3835 vg_assert(parser
->sp
>= -1);
3836 vg_assert(parser
->sp
< parser
->stack_size
);
3837 if (parser
->sp
== parser
->stack_size
- 1) {
3838 parser
->stack_size
+= 16;
3840 ML_(dinfo_realloc
)("di.readdwarf3.typush.1", parser
->qparentE
,
3841 parser
->stack_size
* sizeof parser
->qparentE
[0]);
3843 ML_(dinfo_realloc
)("di.readdwarf3.typush.2", parser
->qlevel
,
3844 parser
->stack_size
* sizeof parser
->qlevel
[0]);
3846 if (parser
->sp
>= 0)
3847 vg_assert(parser
->qlevel
[parser
->sp
] < level
);
3850 vg_assert(ML_(TyEnt__is_type
)(parentE
));
3851 vg_assert(parentE
->cuOff
!= D3_INVALID_CUOFF
);
3852 parser
->qparentE
[parser
->sp
] = *parentE
;
3853 parser
->qlevel
[parser
->sp
] = level
;
3855 typestack_show( parser
, "after push" );
3858 /* True if the subrange type being parsed gives the bounds of an array. */
3859 static Bool
subrange_type_denotes_array_bounds ( const D3TypeParser
* parser
,
3861 vg_assert(dtag
== DW_TAG_subrange_type
);
3862 /* If we don't know the language, assume false. */
3863 if (parser
->language
== '?')
3865 /* For most languages, a subrange_type dtag always gives the
3867 For Ada, there are additional conditions as a subrange_type
3868 is also used for other purposes. */
3869 if (parser
->language
!= 'A')
3870 /* not Ada, so it definitely denotes an array bound. */
3873 /* Extra constraints for Ada: it only denotes an array bound if .. */
3874 return (! typestack_is_empty(parser
)
3875 && parser
->qparentE
[parser
->sp
].tag
== Te_TyArray
);
3878 /* True if the form is one of the forms supported to give an array bound.
3879 For some arrays (scope local arrays with variable size),
3880 a DW_FORM_ref4 was used, and was wrongly used as the bound value.
3881 So, refuse the forms that are known to give a problem. */
3882 static Bool
form_expected_for_bound ( DW_FORM form
) {
3883 if (form
== DW_FORM_ref1
3884 || form
== DW_FORM_ref2
3885 || form
== DW_FORM_ref4
3886 || form
== DW_FORM_ref8
)
3892 /* Parse a type-related DIE. 'parser' holds the current parser state.
3893 'admin' is where the completed types are dumped. 'dtag' is the tag
3894 for this DIE. 'c_die' points to the start of the data fields (FORM
3895 stuff) for the DIE. abbv is the parsed abbreviation which describe
3898 We may find the DIE uninteresting, in which case we should ignore
3901 What happens: the DIE is examined. If uninteresting, it is ignored.
3902 Otherwise, the DIE gives rise to two things:
3904 (1) the offset of this DIE in the CU -- the cuOffset, a UWord
3905 (2) a TyAdmin structure, which holds the type, or related stuff
3907 (2) is added at the end of 'tyadmins', at some index, say 'i'.
3909 A pair (cuOffset, i) is added to 'tydict'.
3911 Hence 'tyadmins' holds the actual type entities, and 'tydict' holds
3912 a mapping from cuOffset to the index of the corresponding entry in
3915 When resolving a cuOffset to a TyAdmin, first look up the cuOffset
3916 in the tydict (by binary search). This gives an index into
3917 tyadmins, and the required entity lives in tyadmins at that index.
3919 __attribute__((noinline
))
3920 static void parse_type_DIE ( /*MOD*/XArray
* /* of TyEnt */ tyents
,
3921 /*MOD*/D3TypeParser
* parser
,
3937 UWord saved_die_c_offset
= get_position_of_Cursor( c_die
);
3939 VG_(memset
)( &typeE
, 0xAA, sizeof(typeE
) );
3940 VG_(memset
)( &atomE
, 0xAA, sizeof(atomE
) );
3941 VG_(memset
)( &fieldE
, 0xAA, sizeof(fieldE
) );
3942 VG_(memset
)( &boundE
, 0xAA, sizeof(boundE
) );
3944 /* If we've returned to a level at or above any previously noted
3945 parent, un-note it, so we don't believe we're still collecting
3947 typestack_preen( parser
, td3
, level
-1 );
3949 if (dtag
== DW_TAG_compile_unit
3950 || dtag
== DW_TAG_type_unit
3951 || dtag
== DW_TAG_partial_unit
3952 || dtag
== DW_TAG_skeleton_unit
) {
3954 setup_cu_bases(cc
, c_die
, abbv
);
3955 /* See if we can find DW_AT_language, since it is important for
3956 establishing array bounds (see DW_TAG_subrange_type below in
3960 DW_AT attr
= (DW_AT
) abbv
->nf
[nf_i
].at_name
;
3961 DW_FORM form
= (DW_FORM
)abbv
->nf
[nf_i
].at_form
;
3962 const name_form
*nf
= &abbv
->nf
[nf_i
];
3964 if (attr
== 0 && form
== 0) break;
3965 get_Form_contents( &cts
, cc
, c_die
, False
/*td3*/, nf
);
3966 if (attr
!= DW_AT_language
)
3970 switch (cts
.u
.val
) {
3971 case DW_LANG_C89
: case DW_LANG_C
:
3972 case DW_LANG_C_plus_plus
: case DW_LANG_ObjC
:
3973 case DW_LANG_ObjC_plus_plus
: case DW_LANG_UPC
:
3974 case DW_LANG_Upc
: case DW_LANG_C99
: case DW_LANG_C11
:
3975 case DW_LANG_C17
: case DW_LANG_C23
:
3976 case DW_LANG_C_plus_plus_11
: case DW_LANG_C_plus_plus_14
:
3977 case DW_LANG_C_plus_plus_17
: case DW_LANG_C_plus_plus_20
:
3978 case DW_LANG_C_plus_plus_23
:
3979 parser
->language
= 'C'; break;
3980 case DW_LANG_Fortran77
: case DW_LANG_Fortran90
:
3981 case DW_LANG_Fortran95
: case DW_LANG_Fortran03
:
3982 case DW_LANG_Fortran08
: case DW_LANG_Fortran18
:
3983 case DW_LANG_Fortran23
:
3984 parser
->language
= 'F'; break;
3985 case DW_LANG_Ada83
: case DW_LANG_Ada95
:
3986 case DW_LANG_Ada2005
: case DW_LANG_Ada2012
:
3987 parser
->language
= 'A'; break;
3988 case DW_LANG_Cobol74
:
3989 case DW_LANG_Cobol85
: case DW_LANG_Pascal83
:
3990 case DW_LANG_Modula2
: case DW_LANG_Java
:
3992 case DW_LANG_D
: case DW_LANG_Python
:
3993 case DW_LANG_OpenCL
: case DW_LANG_Go
:
3994 case DW_LANG_Modula3
: case DW_LANG_Haskell
:
3995 case DW_LANG_OCaml
: case DW_LANG_Rust
: case DW_LANG_Swift
:
3996 case DW_LANG_Julia
: case DW_LANG_Dylan
:
3997 case DW_LANG_RenderScript
: case DW_LANG_BLISS
:
3998 case DW_LANG_Kotlin
: case DW_LANG_Zig
:
3999 case DW_LANG_Crystal
: case DW_LANG_HIP
:
4000 case DW_LANG_Assembly
: case DW_LANG_C_sharp
:
4001 case DW_LANG_Mojo
: case DW_LANG_GLSL
:
4002 case DW_LANG_GLSL_ES
: case DW_LANG_HLSL
:
4003 case DW_LANG_OpenCL_CPP
: case DW_LANG_CPP_for_OpenCL
:
4011 case DW_LANG_Mips_Assembler
:
4012 parser
->language
= '?'; break;
4019 if (dtag
== DW_TAG_base_type
) {
4020 /* We can pick up a new base type any time. */
4021 VG_(memset
)(&typeE
, 0, sizeof(typeE
));
4022 typeE
.cuOff
= D3_INVALID_CUOFF
;
4023 typeE
.tag
= Te_TyBase
;
4026 DW_AT attr
= (DW_AT
) abbv
->nf
[nf_i
].at_name
;
4027 DW_FORM form
= (DW_FORM
)abbv
->nf
[nf_i
].at_form
;
4028 const name_form
*nf
= &abbv
->nf
[nf_i
];
4030 if (attr
== 0 && form
== 0) break;
4031 get_Form_contents( &cts
, cc
, c_die
, False
/*td3*/, nf
);
4032 if (attr
== DW_AT_name
&& cts
.szB
< 0) {
4033 typeE
.Te
.TyBase
.name
4034 = ML_(cur_read_strdup
)( cts
.u
.cur
,
4035 "di.readdwarf3.ptD.base_type.1" );
4037 if (attr
== DW_AT_byte_size
&& cts
.szB
> 0) {
4038 typeE
.Te
.TyBase
.szB
= cts
.u
.val
;
4040 if (attr
== DW_AT_encoding
&& cts
.szB
> 0) {
4041 switch (cts
.u
.val
) {
4042 case DW_ATE_unsigned
: case DW_ATE_unsigned_char
:
4043 case DW_ATE_UTF
: /* since DWARF4, e.g. char16_t from C++ */
4044 case DW_ATE_boolean
:/* FIXME - is this correct? */
4045 case DW_ATE_unsigned_fixed
:
4046 typeE
.Te
.TyBase
.enc
= 'U'; break;
4047 case DW_ATE_signed
: case DW_ATE_signed_char
:
4048 case DW_ATE_signed_fixed
:
4049 typeE
.Te
.TyBase
.enc
= 'S'; break;
4051 typeE
.Te
.TyBase
.enc
= 'F'; break;
4052 case DW_ATE_complex_float
:
4053 typeE
.Te
.TyBase
.enc
= 'C'; break;
4060 /* Invent a name if it doesn't have one. gcc-4.3
4061 -ftree-vectorize is observed to emit nameless base types. */
4062 if (!typeE
.Te
.TyBase
.name
)
4063 typeE
.Te
.TyBase
.name
4064 = ML_(dinfo_strdup
)( "di.readdwarf3.ptD.base_type.2",
4065 "<anon_base_type>" );
4067 /* Do we have something that looks sane? */
4068 if (/* must have a name */
4069 typeE
.Te
.TyBase
.name
== NULL
4070 /* and a plausible size. Yes, really 32: "complex long
4071 double" apparently has size=32 */
4072 || typeE
.Te
.TyBase
.szB
< 0 || typeE
.Te
.TyBase
.szB
> 32
4073 /* and a plausible encoding */
4074 || (typeE
.Te
.TyBase
.enc
!= 'U'
4075 && typeE
.Te
.TyBase
.enc
!= 'S'
4076 && typeE
.Te
.TyBase
.enc
!= 'F'
4077 && typeE
.Te
.TyBase
.enc
!= 'C'))
4079 /* Last minute hack: if we see this
4080 <1><515>: DW_TAG_base_type
4084 convert it into a real Void type. */
4085 if (typeE
.Te
.TyBase
.szB
== 0
4086 && 0 == VG_(strcmp
)("void", typeE
.Te
.TyBase
.name
)) {
4087 ML_(TyEnt__make_EMPTY
)(&typeE
);
4088 typeE
.tag
= Te_TyVoid
;
4089 typeE
.Te
.TyVoid
.isFake
= False
; /* it's a real one! */
4096 * An example of DW_TAG_rvalue_reference_type:
4098 * $ readelf --debug-dump /usr/lib/debug/usr/lib/libstdc++.so.6.0.16.debug
4099 * <1><1014>: Abbrev Number: 55 (DW_TAG_rvalue_reference_type)
4100 * <1015> DW_AT_byte_size : 4
4101 * <1016> DW_AT_type : <0xe52>
4103 if (dtag
== DW_TAG_pointer_type
|| dtag
== DW_TAG_reference_type
4104 || dtag
== DW_TAG_ptr_to_member_type
4105 || dtag
== DW_TAG_rvalue_reference_type
) {
4106 /* This seems legit for _pointer_type and _reference_type. I
4107 don't know if rolling _ptr_to_member_type in here really is
4108 legit, but it's better than not handling it at all. */
4109 VG_(memset
)(&typeE
, 0, sizeof(typeE
));
4110 typeE
.cuOff
= D3_INVALID_CUOFF
;
4112 case DW_TAG_pointer_type
:
4113 typeE
.tag
= Te_TyPtr
;
4115 case DW_TAG_reference_type
:
4116 typeE
.tag
= Te_TyRef
;
4118 case DW_TAG_ptr_to_member_type
:
4119 typeE
.tag
= Te_TyPtrMbr
;
4121 case DW_TAG_rvalue_reference_type
:
4122 typeE
.tag
= Te_TyRvalRef
;
4127 /* target type defaults to void */
4128 typeE
.Te
.TyPorR
.typeR
= D3_FAKEVOID_CUOFF
;
4129 /* These four type kinds don't *have* to specify their size, in
4130 which case we assume it's a machine word. But if they do
4131 specify it, it must be a machine word :-) This probably
4132 assumes that the word size of the Dwarf3 we're reading is the
4133 same size as that on the machine. gcc appears to give a size
4134 whereas icc9 doesn't. */
4135 typeE
.Te
.TyPorR
.szB
= sizeof(UWord
);
4138 DW_AT attr
= (DW_AT
) abbv
->nf
[nf_i
].at_name
;
4139 DW_FORM form
= (DW_FORM
)abbv
->nf
[nf_i
].at_form
;
4140 const name_form
*nf
= &abbv
->nf
[nf_i
];
4142 if (attr
== 0 && form
== 0) break;
4143 get_Form_contents( &cts
, cc
, c_die
, False
/*td3*/, nf
);
4144 if (attr
== DW_AT_byte_size
&& cts
.szB
> 0) {
4145 typeE
.Te
.TyPorR
.szB
= cts
.u
.val
;
4147 if (attr
== DW_AT_type
&& cts
.szB
> 0) {
4148 typeE
.Te
.TyPorR
.typeR
4149 = cook_die_using_form( cc
, (UWord
)cts
.u
.val
, form
);
4152 /* Do we have something that looks sane? */
4153 if (typeE
.Te
.TyPorR
.szB
!= sizeof(UWord
))
4159 if (dtag
== DW_TAG_enumeration_type
) {
4160 /* Create a new Type to hold the results. */
4161 VG_(memset
)(&typeE
, 0, sizeof(typeE
));
4163 typeE
.tag
= Te_TyEnum
;
4164 Bool is_decl
= False
;
4165 typeE
.Te
.TyEnum
.atomRs
4166 = VG_(newXA
)( ML_(dinfo_zalloc
), "di.readdwarf3.ptD.enum_type.1",
4171 DW_AT attr
= (DW_AT
) abbv
->nf
[nf_i
].at_name
;
4172 DW_FORM form
= (DW_FORM
)abbv
->nf
[nf_i
].at_form
;
4173 const name_form
*nf
= &abbv
->nf
[nf_i
];
4175 if (attr
== 0 && form
== 0) break;
4176 get_Form_contents( &cts
, cc
, c_die
, False
/*td3*/, nf
);
4177 if (attr
== DW_AT_name
&& cts
.szB
< 0) {
4178 typeE
.Te
.TyEnum
.name
4179 = ML_(cur_read_strdup
)( cts
.u
.cur
,
4180 "di.readdwarf3.pTD.enum_type.2" );
4182 if (attr
== DW_AT_byte_size
&& cts
.szB
> 0) {
4183 typeE
.Te
.TyEnum
.szB
= cts
.u
.val
;
4185 if (attr
== DW_AT_declaration
) {
4190 if (!typeE
.Te
.TyEnum
.name
)
4191 typeE
.Te
.TyEnum
.name
4192 = ML_(dinfo_strdup
)( "di.readdwarf3.pTD.enum_type.3",
4193 "<anon_enum_type>" );
4195 /* Do we have something that looks sane? */
4196 if (typeE
.Te
.TyEnum
.szB
== 0
4197 /* we must know the size */
4198 /* but not for Ada, which uses such dummy
4199 enumerations as helper for gdb ada mode.
4200 Also GCC allows incomplete enums as GNU extension.
4201 http://gcc.gnu.org/onlinedocs/gcc/Incomplete-Enums.html
4202 These are marked as DW_AT_declaration and won't have
4203 a size. They can only be used in declaration or as
4204 pointer types. You can't allocate variables or storage
4205 using such an enum type. (Also GCC seems to have a bug
4206 that will put such an enumeration_type into a .debug_types
4207 unit which should only contain complete types.) */
4208 && (parser
->language
!= 'A' && !is_decl
)) {
4213 typestack_push( cc
, parser
, td3
, &typeE
, level
);
4217 /* gcc (GCC) 4.4.0 20081017 (experimental) occasionally produces
4218 DW_TAG_enumerator with only a DW_AT_name but no
4219 DW_AT_const_value. This is in violation of the Dwarf3 standard,
4220 and appears to be a new "feature" of gcc - versions 4.3.x and
4221 earlier do not appear to do this. So accept DW_TAG_enumerator
4222 which only have a name but no value. An example:
4224 <1><180>: Abbrev Number: 6 (DW_TAG_enumeration_type)
4225 <181> DW_AT_name : (indirect string, offset: 0xda70):
4227 <185> DW_AT_byte_size : 4
4228 <186> DW_AT_decl_file : 14
4229 <187> DW_AT_decl_line : 1480
4230 <189> DW_AT_sibling : <0x1a7>
4231 <2><18d>: Abbrev Number: 7 (DW_TAG_enumerator)
4232 <18e> DW_AT_name : (indirect string, offset: 0x9e18):
4234 <2><192>: Abbrev Number: 7 (DW_TAG_enumerator)
4235 <193> DW_AT_name : (indirect string, offset: 0x1505f):
4237 <2><197>: Abbrev Number: 7 (DW_TAG_enumerator)
4238 <198> DW_AT_name : (indirect string, offset: 0x16f4a):
4240 <2><19c>: Abbrev Number: 7 (DW_TAG_enumerator)
4241 <19d> DW_AT_name : (indirect string, offset: 0x156dd):
4243 <2><1a1>: Abbrev Number: 7 (DW_TAG_enumerator)
4244 <1a2> DW_AT_name : (indirect string, offset: 0x13660):
4247 if (dtag
== DW_TAG_enumerator
) {
4248 VG_(memset
)( &atomE
, 0, sizeof(atomE
) );
4250 atomE
.tag
= Te_Atom
;
4253 DW_AT attr
= (DW_AT
) abbv
->nf
[nf_i
].at_name
;
4254 DW_FORM form
= (DW_FORM
)abbv
->nf
[nf_i
].at_form
;
4255 const name_form
*nf
= &abbv
->nf
[nf_i
];
4257 if (attr
== 0 && form
== 0) break;
4258 get_Form_contents( &cts
, cc
, c_die
, False
/*td3*/, nf
);
4259 if (attr
== DW_AT_name
&& cts
.szB
< 0) {
4261 = ML_(cur_read_strdup
)( cts
.u
.cur
,
4262 "di.readdwarf3.pTD.enumerator.1" );
4264 if (attr
== DW_AT_const_value
&& cts
.szB
> 0) {
4265 atomE
.Te
.Atom
.value
= cts
.u
.val
;
4266 atomE
.Te
.Atom
.valueKnown
= True
;
4269 /* Do we have something that looks sane? */
4270 if (atomE
.Te
.Atom
.name
== NULL
)
4272 /* Do we have a plausible parent? */
4273 if (typestack_is_empty(parser
)) goto_bad_DIE
;
4274 vg_assert(ML_(TyEnt__is_type
)(&parser
->qparentE
[parser
->sp
]));
4275 vg_assert(parser
->qparentE
[parser
->sp
].cuOff
!= D3_INVALID_CUOFF
);
4276 if (level
!= parser
->qlevel
[parser
->sp
]+1) goto_bad_DIE
;
4277 if (parser
->qparentE
[parser
->sp
].tag
!= Te_TyEnum
) goto_bad_DIE
;
4278 /* Record this child in the parent */
4279 vg_assert(parser
->qparentE
[parser
->sp
].Te
.TyEnum
.atomRs
);
4280 VG_(addToXA
)( parser
->qparentE
[parser
->sp
].Te
.TyEnum
.atomRs
,
4282 /* And record the child itself */
4286 /* Treat DW_TAG_class_type as if it was a DW_TAG_structure_type. I
4287 don't know if this is correct, but it at least makes this reader
4288 usable for gcc-4.3 produced Dwarf3. */
4289 if (dtag
== DW_TAG_structure_type
|| dtag
== DW_TAG_class_type
4290 || dtag
== DW_TAG_union_type
) {
4291 Bool have_szB
= False
;
4292 Bool is_decl
= False
;
4293 Bool is_spec
= False
;
4294 /* Create a new Type to hold the results. */
4295 VG_(memset
)(&typeE
, 0, sizeof(typeE
));
4297 typeE
.tag
= Te_TyStOrUn
;
4298 typeE
.Te
.TyStOrUn
.name
= NULL
;
4299 typeE
.Te
.TyStOrUn
.typeR
= D3_INVALID_CUOFF
;
4300 typeE
.Te
.TyStOrUn
.fieldRs
4301 = VG_(newXA
)( ML_(dinfo_zalloc
), "di.readdwarf3.pTD.struct_type.1",
4304 typeE
.Te
.TyStOrUn
.complete
= True
;
4305 typeE
.Te
.TyStOrUn
.isStruct
= dtag
== DW_TAG_structure_type
4306 || dtag
== DW_TAG_class_type
;
4309 DW_AT attr
= (DW_AT
) abbv
->nf
[nf_i
].at_name
;
4310 DW_FORM form
= (DW_FORM
)abbv
->nf
[nf_i
].at_form
;
4311 const name_form
*nf
= &abbv
->nf
[nf_i
];
4313 if (attr
== 0 && form
== 0) break;
4314 get_Form_contents( &cts
, cc
, c_die
, False
/*td3*/, nf
);
4315 if (attr
== DW_AT_name
&& cts
.szB
< 0) {
4316 typeE
.Te
.TyStOrUn
.name
4317 = ML_(cur_read_strdup
)( cts
.u
.cur
,
4318 "di.readdwarf3.ptD.struct_type.2" );
4320 if (attr
== DW_AT_byte_size
&& cts
.szB
>= 0) {
4321 typeE
.Te
.TyStOrUn
.szB
= cts
.u
.val
;
4324 if (attr
== DW_AT_declaration
&& cts
.szB
> 0 && cts
.u
.val
> 0) {
4327 if (attr
== DW_AT_specification
&& cts
.szB
> 0 && cts
.u
.val
> 0) {
4330 if (attr
== DW_AT_signature
&& form
== DW_FORM_ref_sig8
4333 typeE
.Te
.TyStOrUn
.szB
= 8;
4334 typeE
.Te
.TyStOrUn
.typeR
4335 = cook_die_using_form( cc
, (UWord
)cts
.u
.val
, form
);
4338 /* Do we have something that looks sane? */
4339 if (is_decl
&& (!is_spec
)) {
4340 /* It's a DW_AT_declaration. We require the name but
4342 /* JRS 2012-06-28: following discussion w/ tromey, if the
4343 type doesn't have name, just make one up, and accept it.
4344 It might be referred to by other DIEs, so ignoring it
4345 doesn't seem like a safe option. */
4346 if (typeE
.Te
.TyStOrUn
.name
== NULL
)
4347 typeE
.Te
.TyStOrUn
.name
4348 = ML_(dinfo_strdup
)( "di.readdwarf3.ptD.struct_type.3",
4349 "<anon_struct_type>" );
4350 typeE
.Te
.TyStOrUn
.complete
= False
;
4351 /* JRS 2009 Aug 10: <possible kludge>? */
4352 /* Push this tyent on the stack, even though it's incomplete.
4353 It appears that gcc-4.4 on Fedora 11 will sometimes create
4354 DW_TAG_member entries for it, and so we need to have a
4355 plausible parent present in order for that to work. See
4356 #200029 comments 8 and 9. */
4357 typestack_push( cc
, parser
, td3
, &typeE
, level
);
4358 /* </possible kludge> */
4361 if ((!is_decl
) /* && (!is_spec) */) {
4362 /* this is the common, ordinary case */
4363 /* The name can be present, or not */
4365 /* We must know the size.
4366 But in Ada, record with discriminants might have no size.
4367 But in C, VLA in the middle of a struct (gcc extension)
4369 Instead, some GNAT dwarf extensions and/or dwarf entries
4370 allow to calculate the struct size at runtime.
4371 We cannot do that (yet?) so, the temporary kludge is to use
4373 typeE
.Te
.TyStOrUn
.szB
= 1;
4376 typestack_push( cc
, parser
, td3
, &typeE
, level
);
4380 /* don't know how to handle any other variants just now */
4385 if (dtag
== DW_TAG_member
) {
4386 /* Acquire member entries for both DW_TAG_structure_type and
4387 DW_TAG_union_type. They differ minorly, in that struct
4388 members must have a DW_AT_data_member_location expression
4389 whereas union members must not. */
4390 Bool parent_is_struct
;
4391 Bool is_artificial
= False
;
4392 VG_(memset
)( &fieldE
, 0, sizeof(fieldE
) );
4393 fieldE
.cuOff
= posn
;
4394 fieldE
.tag
= Te_Field
;
4395 fieldE
.Te
.Field
.typeR
= D3_INVALID_CUOFF
;
4398 DW_AT attr
= (DW_AT
) abbv
->nf
[nf_i
].at_name
;
4399 DW_FORM form
= (DW_FORM
)abbv
->nf
[nf_i
].at_form
;
4400 const name_form
*nf
= &abbv
->nf
[nf_i
];
4402 if (attr
== 0 && form
== 0) break;
4403 get_Form_contents( &cts
, cc
, c_die
, False
/*td3*/, nf
);
4404 if (attr
== DW_AT_name
&& cts
.szB
< 0) {
4405 fieldE
.Te
.Field
.name
4406 = ML_(cur_read_strdup
)( cts
.u
.cur
,
4407 "di.readdwarf3.ptD.member.1" );
4409 if (attr
== DW_AT_type
&& cts
.szB
> 0) {
4410 fieldE
.Te
.Field
.typeR
4411 = cook_die_using_form( cc
, (UWord
)cts
.u
.val
, form
);
4413 /* There are 2 different cases for DW_AT_data_member_location.
4414 If it is a constant class attribute, it contains byte offset
4415 from the beginning of the containing entity.
4416 Otherwise it is a location expression. */
4417 if (attr
== DW_AT_data_member_location
&& cts
.szB
> 0) {
4418 fieldE
.Te
.Field
.nLoc
= -1;
4419 fieldE
.Te
.Field
.pos
.offset
= cts
.u
.val
;
4421 if (attr
== DW_AT_data_member_location
&& cts
.szB
<= 0) {
4422 fieldE
.Te
.Field
.nLoc
= (UWord
)(-cts
.szB
);
4423 fieldE
.Te
.Field
.pos
.loc
4424 = ML_(cur_read_memdup
)( cts
.u
.cur
,
4425 (SizeT
)fieldE
.Te
.Field
.nLoc
,
4426 "di.readdwarf3.ptD.member.2" );
4428 if (attr
== DW_AT_artificial
&& cts
.u
.val
== 1)
4429 is_artificial
= True
;
4431 /* Skip artificial members, they might not behave as expected. */
4434 /* Do we have a plausible parent? */
4435 if (typestack_is_empty(parser
)) goto_bad_DIE
;
4436 vg_assert(ML_(TyEnt__is_type
)(&parser
->qparentE
[parser
->sp
]));
4437 vg_assert(parser
->qparentE
[parser
->sp
].cuOff
!= D3_INVALID_CUOFF
);
4438 if (level
!= parser
->qlevel
[parser
->sp
]+1) goto_bad_DIE
;
4439 if (parser
->qparentE
[parser
->sp
].tag
!= Te_TyStOrUn
) goto_bad_DIE
;
4440 /* Do we have something that looks sane? If this a member of a
4441 struct, we must have a location expression; but if a member
4442 of a union that is irrelevant (D3 spec sec 5.6.6). We ought
4443 to reject in the latter case, but some compilers have been
4444 observed to emit constant-zero expressions. So just ignore
4447 = parser
->qparentE
[parser
->sp
].Te
.TyStOrUn
.isStruct
;
4448 if (!fieldE
.Te
.Field
.name
)
4449 fieldE
.Te
.Field
.name
4450 = ML_(dinfo_strdup
)( "di.readdwarf3.ptD.member.3",
4452 if (fieldE
.Te
.Field
.typeR
== D3_INVALID_CUOFF
)
4454 if (fieldE
.Te
.Field
.nLoc
) {
4455 if (!parent_is_struct
) {
4456 /* If this is a union type, pretend we haven't seen the data
4457 member location expression, as it is by definition
4458 redundant (it must be zero). */
4459 if (fieldE
.Te
.Field
.nLoc
> 0)
4460 ML_(dinfo_free
)(fieldE
.Te
.Field
.pos
.loc
);
4461 fieldE
.Te
.Field
.pos
.loc
= NULL
;
4462 fieldE
.Te
.Field
.nLoc
= 0;
4464 /* Record this child in the parent */
4465 fieldE
.Te
.Field
.isStruct
= parent_is_struct
;
4466 vg_assert(parser
->qparentE
[parser
->sp
].Te
.TyStOrUn
.fieldRs
);
4467 VG_(addToXA
)( parser
->qparentE
[parser
->sp
].Te
.TyStOrUn
.fieldRs
,
4469 /* And record the child itself */
4472 /* Member with no location - this can happen with static
4473 const members in C++ code which are compile time constants
4474 that do no exist in the class. They're not of any interest
4475 to us so we ignore them. */
4477 ML_(TyEnt__make_EMPTY
)(&fieldE
);
4481 if (dtag
== DW_TAG_array_type
) {
4482 VG_(memset
)(&typeE
, 0, sizeof(typeE
));
4484 typeE
.tag
= Te_TyArray
;
4485 typeE
.Te
.TyArray
.typeR
= D3_INVALID_CUOFF
;
4486 typeE
.Te
.TyArray
.boundRs
4487 = VG_(newXA
)( ML_(dinfo_zalloc
), "di.readdwarf3.ptD.array_type.1",
4492 DW_AT attr
= (DW_AT
) abbv
->nf
[nf_i
].at_name
;
4493 DW_FORM form
= (DW_FORM
)abbv
->nf
[nf_i
].at_form
;
4494 const name_form
*nf
= &abbv
->nf
[nf_i
];
4496 if (attr
== 0 && form
== 0) break;
4497 get_Form_contents( &cts
, cc
, c_die
, False
/*td3*/, nf
);
4498 if (attr
== DW_AT_type
&& cts
.szB
> 0) {
4499 typeE
.Te
.TyArray
.typeR
4500 = cook_die_using_form( cc
, (UWord
)cts
.u
.val
, form
);
4503 if (typeE
.Te
.TyArray
.typeR
== D3_INVALID_CUOFF
)
4506 typestack_push( cc
, parser
, td3
, &typeE
, level
);
4510 /* this is a subrange type defining the bounds of an array. */
4511 if (dtag
== DW_TAG_subrange_type
4512 && subrange_type_denotes_array_bounds(parser
, dtag
)) {
4513 Bool have_lower
= False
;
4514 Bool have_upper
= False
;
4515 Bool have_count
= False
;
4520 switch (parser
->language
) {
4521 case 'C': have_lower
= True
; lower
= 0; break;
4522 case 'F': have_lower
= True
; lower
= 1; break;
4523 case '?': have_lower
= False
; break;
4524 case 'A': have_lower
= False
; break;
4525 default: vg_assert(0); /* assured us by handling of
4526 DW_TAG_compile_unit in this fn */
4529 VG_(memset
)( &boundE
, 0, sizeof(boundE
) );
4530 boundE
.cuOff
= D3_INVALID_CUOFF
;
4531 boundE
.tag
= Te_Bound
;
4534 DW_AT attr
= (DW_AT
) abbv
->nf
[nf_i
].at_name
;
4535 DW_FORM form
= (DW_FORM
)abbv
->nf
[nf_i
].at_form
;
4536 const name_form
*nf
= &abbv
->nf
[nf_i
];
4538 if (attr
== 0 && form
== 0) break;
4539 get_Form_contents( &cts
, cc
, c_die
, False
/*td3*/, nf
);
4540 if (attr
== DW_AT_lower_bound
&& cts
.szB
> 0
4541 && form_expected_for_bound (form
)) {
4542 lower
= (Long
)cts
.u
.val
;
4545 if (attr
== DW_AT_upper_bound
&& cts
.szB
> 0
4546 && form_expected_for_bound (form
)) {
4547 upper
= (Long
)cts
.u
.val
;
4550 if (attr
== DW_AT_count
&& cts
.szB
> 0) {
4551 count
= (Long
)cts
.u
.val
;
4555 /* FIXME: potentially skip the rest if no parent present, since
4556 it could be the case that this subrange type is free-standing
4557 (not being used to describe the bounds of a containing array
4559 /* Do we have a plausible parent? */
4560 if (typestack_is_empty(parser
)) goto_bad_DIE
;
4561 vg_assert(ML_(TyEnt__is_type
)(&parser
->qparentE
[parser
->sp
]));
4562 vg_assert(parser
->qparentE
[parser
->sp
].cuOff
!= D3_INVALID_CUOFF
);
4563 if (level
!= parser
->qlevel
[parser
->sp
]+1) goto_bad_DIE
;
4564 if (parser
->qparentE
[parser
->sp
].tag
!= Te_TyArray
) goto_bad_DIE
;
4566 /* Figure out if we have a definite range or not */
4567 if (have_lower
&& have_upper
&& (!have_count
)) {
4568 boundE
.Te
.Bound
.knownL
= True
;
4569 boundE
.Te
.Bound
.knownU
= True
;
4570 boundE
.Te
.Bound
.boundL
= lower
;
4571 boundE
.Te
.Bound
.boundU
= upper
;
4573 else if (have_lower
&& (!have_upper
) && (!have_count
)) {
4574 boundE
.Te
.Bound
.knownL
= True
;
4575 boundE
.Te
.Bound
.knownU
= False
;
4576 boundE
.Te
.Bound
.boundL
= lower
;
4577 boundE
.Te
.Bound
.boundU
= 0;
4579 else if ((!have_lower
) && have_upper
&& (!have_count
)) {
4580 boundE
.Te
.Bound
.knownL
= False
;
4581 boundE
.Te
.Bound
.knownU
= True
;
4582 boundE
.Te
.Bound
.boundL
= 0;
4583 boundE
.Te
.Bound
.boundU
= upper
;
4585 else if ((!have_lower
) && (!have_upper
) && (!have_count
)) {
4586 boundE
.Te
.Bound
.knownL
= False
;
4587 boundE
.Te
.Bound
.knownU
= False
;
4588 boundE
.Te
.Bound
.boundL
= 0;
4589 boundE
.Te
.Bound
.boundU
= 0;
4590 } else if (have_lower
&& (!have_upper
) && (have_count
)) {
4591 boundE
.Te
.Bound
.knownL
= True
;
4592 boundE
.Te
.Bound
.knownU
= True
;
4593 boundE
.Te
.Bound
.boundL
= lower
;
4594 boundE
.Te
.Bound
.boundU
= lower
+ count
- 1;
4596 /* FIXME: handle more cases */
4600 /* Record this bound in the parent */
4601 boundE
.cuOff
= posn
;
4602 vg_assert(parser
->qparentE
[parser
->sp
].Te
.TyArray
.boundRs
);
4603 VG_(addToXA
)( parser
->qparentE
[parser
->sp
].Te
.TyArray
.boundRs
,
4605 /* And record the child itself */
4609 /* typedef or subrange_type other than array bounds. */
4610 if (dtag
== DW_TAG_typedef
4611 || (dtag
== DW_TAG_subrange_type
4612 && !subrange_type_denotes_array_bounds(parser
, dtag
))) {
4613 /* subrange_type other than array bound is only for Ada. */
4614 vg_assert (dtag
== DW_TAG_typedef
|| (parser
->language
== 'A'
4615 || parser
->language
== '?'));
4616 /* We can pick up a new typedef/subrange_type any time. */
4617 VG_(memset
)(&typeE
, 0, sizeof(typeE
));
4618 typeE
.cuOff
= D3_INVALID_CUOFF
;
4619 typeE
.tag
= Te_TyTyDef
;
4620 typeE
.Te
.TyTyDef
.name
= NULL
;
4621 typeE
.Te
.TyTyDef
.typeR
= D3_INVALID_CUOFF
;
4624 DW_AT attr
= (DW_AT
) abbv
->nf
[nf_i
].at_name
;
4625 DW_FORM form
= (DW_FORM
)abbv
->nf
[nf_i
].at_form
;
4626 const name_form
*nf
= &abbv
->nf
[nf_i
];
4628 if (attr
== 0 && form
== 0) break;
4629 get_Form_contents( &cts
, cc
, c_die
, False
/*td3*/, nf
);
4630 if (attr
== DW_AT_name
&& cts
.szB
< 0) {
4631 typeE
.Te
.TyTyDef
.name
4632 = ML_(cur_read_strdup
)( cts
.u
.cur
,
4633 "di.readdwarf3.ptD.typedef.1" );
4635 if (attr
== DW_AT_type
&& cts
.szB
> 0) {
4636 typeE
.Te
.TyTyDef
.typeR
4637 = cook_die_using_form( cc
, (UWord
)cts
.u
.val
, form
);
4640 /* Do we have something that looks sane?
4641 gcc gnat Ada generates minimal typedef
4643 <6><91cc>: DW_TAG_typedef
4644 DW_AT_abstract_ori: <9066>
4645 g++ for OMP can generate artificial functions that have
4646 parameters that refer to pointers to unnamed typedefs.
4647 See https://bugs.kde.org/show_bug.cgi?id=273475
4648 So we cannot require a name for a DW_TAG_typedef.
4653 if (dtag
== DW_TAG_subroutine_type
) {
4654 /* function type? just record that one fact and ask no
4655 further questions. */
4656 VG_(memset
)(&typeE
, 0, sizeof(typeE
));
4657 typeE
.cuOff
= D3_INVALID_CUOFF
;
4658 typeE
.tag
= Te_TyFn
;
4662 if (dtag
== DW_TAG_volatile_type
|| dtag
== DW_TAG_const_type
4663 || dtag
== DW_TAG_restrict_type
|| dtag
== DW_TAG_atomic_type
) {
4665 VG_(memset
)(&typeE
, 0, sizeof(typeE
));
4666 typeE
.cuOff
= D3_INVALID_CUOFF
;
4667 typeE
.tag
= Te_TyQual
;
4668 typeE
.Te
.TyQual
.qual
4669 = (dtag
== DW_TAG_volatile_type
? 'V'
4670 : (dtag
== DW_TAG_const_type
? 'C'
4671 : (dtag
== DW_TAG_restrict_type
? 'R' : 'A')));
4672 /* target type defaults to 'void' */
4673 typeE
.Te
.TyQual
.typeR
= D3_FAKEVOID_CUOFF
;
4676 DW_AT attr
= (DW_AT
) abbv
->nf
[nf_i
].at_name
;
4677 DW_FORM form
= (DW_FORM
)abbv
->nf
[nf_i
].at_form
;
4678 const name_form
*nf
= &abbv
->nf
[nf_i
];
4680 if (attr
== 0 && form
== 0) break;
4681 get_Form_contents( &cts
, cc
, c_die
, False
/*td3*/, nf
);
4682 if (attr
== DW_AT_type
&& cts
.szB
> 0) {
4683 typeE
.Te
.TyQual
.typeR
4684 = cook_die_using_form( cc
, (UWord
)cts
.u
.val
, form
);
4688 /* gcc sometimes generates DW_TAG_const/volatile_type without
4689 DW_AT_type and GDB appears to interpret the type as 'const
4690 void' (resp. 'volatile void'). So just allow it .. */
4691 if (have_ty
== 1 || have_ty
== 0)
4698 * Treat DW_TAG_unspecified_type as type void. An example of DW_TAG_unspecified_type:
4700 * $ readelf --debug-dump /usr/lib/debug/usr/lib/libstdc++.so.6.0.16.debug
4701 * <1><10d4>: Abbrev Number: 53 (DW_TAG_unspecified_type)
4702 * <10d5> DW_AT_name : (indirect string, offset: 0xdb7): decltype(nullptr)
4704 if (dtag
== DW_TAG_unspecified_type
) {
4705 VG_(memset
)(&typeE
, 0, sizeof(typeE
));
4706 typeE
.cuOff
= D3_INVALID_CUOFF
;
4707 typeE
.tag
= Te_TyQual
;
4708 typeE
.Te
.TyQual
.typeR
= D3_FAKEVOID_CUOFF
;
4712 /* else ignore this DIE */
4717 if (0) VG_(printf
)("YYYY Acquire Type\n");
4718 vg_assert(ML_(TyEnt__is_type
)( &typeE
));
4719 vg_assert(typeE
.cuOff
== D3_INVALID_CUOFF
|| typeE
.cuOff
== posn
);
4721 VG_(addToXA
)( tyents
, &typeE
);
4726 if (0) VG_(printf
)("YYYY Acquire Atom\n");
4727 vg_assert(atomE
.tag
== Te_Atom
);
4728 vg_assert(atomE
.cuOff
== D3_INVALID_CUOFF
|| atomE
.cuOff
== posn
);
4730 VG_(addToXA
)( tyents
, &atomE
);
4735 /* For union members, Expr should be absent */
4736 if (0) VG_(printf
)("YYYY Acquire Field\n");
4737 vg_assert(fieldE
.tag
== Te_Field
);
4738 vg_assert(fieldE
.Te
.Field
.nLoc
<= 0 || fieldE
.Te
.Field
.pos
.loc
!= NULL
);
4739 vg_assert(fieldE
.Te
.Field
.nLoc
!= 0 || fieldE
.Te
.Field
.pos
.loc
== NULL
);
4740 if (fieldE
.Te
.Field
.isStruct
) {
4741 vg_assert(fieldE
.Te
.Field
.nLoc
!= 0);
4743 vg_assert(fieldE
.Te
.Field
.nLoc
== 0);
4745 vg_assert(fieldE
.cuOff
== D3_INVALID_CUOFF
|| fieldE
.cuOff
== posn
);
4746 fieldE
.cuOff
= posn
;
4747 VG_(addToXA
)( tyents
, &fieldE
);
4752 if (0) VG_(printf
)("YYYY Acquire Bound\n");
4753 vg_assert(boundE
.tag
== Te_Bound
);
4754 vg_assert(boundE
.cuOff
== D3_INVALID_CUOFF
|| boundE
.cuOff
== posn
);
4755 boundE
.cuOff
= posn
;
4756 VG_(addToXA
)( tyents
, &boundE
);
4761 dump_bad_die_and_barf("parse_type_DIE", dtag
, posn
, level
,
4762 c_die
, saved_die_c_offset
,
4769 /*------------------------------------------------------------*/
4771 /*--- Compression of type DIE information ---*/
4773 /*------------------------------------------------------------*/
4775 static UWord
chase_cuOff ( Bool
* changed
,
4776 const XArray
* /* of TyEnt */ ents
,
4777 TyEntIndexCache
* ents_cache
,
4781 ent
= ML_(TyEnts__index_by_cuOff
)( ents
, ents_cache
, cuOff
);
4784 if (VG_(clo_verbosity
) > 1)
4785 VG_(printf
)("chase_cuOff: no entry for 0x%05lx\n", cuOff
);
4790 vg_assert(ent
->tag
!= Te_EMPTY
);
4791 if (ent
->tag
!= Te_INDIR
) {
4795 vg_assert(ent
->Te
.INDIR
.indR
< cuOff
);
4797 return ent
->Te
.INDIR
.indR
;
4802 void chase_cuOffs_in_XArray ( Bool
* changed
,
4803 const XArray
* /* of TyEnt */ ents
,
4804 TyEntIndexCache
* ents_cache
,
4805 /*MOD*/XArray
* /* of UWord */ cuOffs
)
4808 Word i
, n
= VG_(sizeXA
)( cuOffs
);
4809 for (i
= 0; i
< n
; i
++) {
4811 UWord
* p
= VG_(indexXA
)( cuOffs
, i
);
4812 *p
= chase_cuOff( &b
, ents
, ents_cache
, *p
);
4819 static Bool
TyEnt__subst_R_fields ( const XArray
* /* of TyEnt */ ents
,
4820 TyEntIndexCache
* ents_cache
,
4823 Bool b
, changed
= False
;
4829 = chase_cuOff( &b
, ents
, ents_cache
, te
->Te
.INDIR
.indR
);
4830 if (b
) changed
= True
;
4838 = chase_cuOff( &b
, ents
, ents_cache
, te
->Te
.Field
.typeR
);
4839 if (b
) changed
= True
;
4850 = chase_cuOff( &b
, ents
, ents_cache
, te
->Te
.TyPorR
.typeR
);
4851 if (b
) changed
= True
;
4854 te
->Te
.TyTyDef
.typeR
4855 = chase_cuOff( &b
, ents
, ents_cache
, te
->Te
.TyTyDef
.typeR
);
4856 if (b
) changed
= True
;
4859 chase_cuOffs_in_XArray( &b
, ents
, ents_cache
, te
->Te
.TyStOrUn
.fieldRs
);
4860 if (b
) changed
= True
;
4863 chase_cuOffs_in_XArray( &b
, ents
, ents_cache
, te
->Te
.TyEnum
.atomRs
);
4864 if (b
) changed
= True
;
4867 te
->Te
.TyArray
.typeR
4868 = chase_cuOff( &b
, ents
, ents_cache
, te
->Te
.TyArray
.typeR
);
4869 if (b
) changed
= True
;
4870 chase_cuOffs_in_XArray( &b
, ents
, ents_cache
, te
->Te
.TyArray
.boundRs
);
4871 if (b
) changed
= True
;
4877 = chase_cuOff( &b
, ents
, ents_cache
, te
->Te
.TyQual
.typeR
);
4878 if (b
) changed
= True
;
4889 /* Make a pass over 'ents'. For each tyent, inspect the target of any
4890 'R' or 'Rs' fields (those which refer to other tyents), and replace
4891 any which point to INDIR nodes with the target of the indirection
4892 (which should not itself be an indirection). In summary, this
4893 routine shorts out all references to indirection nodes. */
4895 Word
dedup_types_substitution_pass ( /*MOD*/XArray
* /* of TyEnt */ ents
,
4896 TyEntIndexCache
* ents_cache
)
4898 Word i
, n
, nChanged
= 0;
4900 n
= VG_(sizeXA
)( ents
);
4901 for (i
= 0; i
< n
; i
++) {
4902 TyEnt
* ent
= VG_(indexXA
)( ents
, i
);
4903 vg_assert(ent
->tag
!= Te_EMPTY
);
4904 /* We have to substitute everything, even indirections, so as to
4905 ensure that chains of indirections don't build up. */
4906 b
= TyEnt__subst_R_fields( ents
, ents_cache
, ent
);
4915 /* Make a pass over 'ents', building a dictionary of TyEnts as we go.
4916 Look up each new tyent in the dictionary in turn. If it is already
4917 in the dictionary, replace this tyent with an indirection to the
4918 existing one, and delete any malloc'd stuff hanging off this one.
4919 In summary, this routine commons up all tyents that are identical
4920 as defined by TyEnt__cmp_by_all_except_cuOff. */
4922 Word
dedup_types_commoning_pass ( /*MOD*/XArray
* /* of TyEnt */ ents
)
4924 Word n
, i
, nDeleted
;
4925 WordFM
* dict
; /* TyEnt* -> void */
4930 ML_(dinfo_zalloc
), "di.readdwarf3.dtcp.1",
4932 (Word(*)(UWord
,UWord
)) ML_(TyEnt__cmp_by_all_except_cuOff
)
4936 n
= VG_(sizeXA
)( ents
);
4937 for (i
= 0; i
< n
; i
++) {
4938 ent
= VG_(indexXA
)( ents
, i
);
4939 vg_assert(ent
->tag
!= Te_EMPTY
);
4941 /* Ignore indirections, although check that they are
4942 not forming a cycle. */
4943 if (ent
->tag
== Te_INDIR
) {
4944 vg_assert(ent
->Te
.INDIR
.indR
< ent
->cuOff
);
4949 if (VG_(lookupFM
)( dict
, &keyW
, &valW
, (UWord
)ent
)) {
4950 /* it's already in the dictionary. */
4951 TyEnt
* old
= (TyEnt
*)keyW
;
4952 vg_assert(valW
== 0);
4953 vg_assert(old
!= ent
);
4954 vg_assert(old
->tag
!= Te_INDIR
);
4955 /* since we are traversing the array in increasing order of
4957 vg_assert(old
->cuOff
< ent
->cuOff
);
4958 /* So anyway, dump this entry and replace it with an
4959 indirection to the one in the dictionary. Note that the
4960 assertion above guarantees that we cannot create cycles of
4961 indirections, since we are always creating an indirection
4962 to a tyent with a cuOff lower than this one. */
4963 ML_(TyEnt__make_EMPTY
)( ent
);
4964 ent
->tag
= Te_INDIR
;
4965 ent
->Te
.INDIR
.indR
= old
->cuOff
;
4968 /* not in dictionary; add it and keep going. */
4969 VG_(addToFM
)( dict
, (UWord
)ent
, 0 );
4973 VG_(deleteFM
)( dict
, NULL
, NULL
);
4980 void dedup_types ( Bool td3
,
4981 /*MOD*/XArray
* /* of TyEnt */ ents
,
4982 TyEntIndexCache
* ents_cache
)
4984 Word m
, n
, i
, nDel
, nSubst
, nThresh
;
4987 n
= VG_(sizeXA
)( ents
);
4989 /* If a commoning pass and a substitution pass both make fewer than
4990 this many changes, just stop. It's pointless to burn up CPU
4991 time trying to compress the last 1% or so out of the array. */
4994 /* First we must sort .ents by its .cuOff fields, so we
4995 can index into it. */
4996 VG_(setCmpFnXA
)( ents
, (XACmpFn_t
) ML_(TyEnt__cmp_by_cuOff_only
) );
4997 VG_(sortXA
)( ents
);
4999 /* Now repeatedly do commoning and substitution passes over
5000 the array, until there are no more changes. */
5002 nDel
= dedup_types_commoning_pass ( ents
);
5003 nSubst
= dedup_types_substitution_pass ( ents
, ents_cache
);
5004 vg_assert(nDel
>= 0 && nSubst
>= 0);
5005 TRACE_D3(" %ld deletions, %ld substitutions\n", nDel
, nSubst
);
5006 } while (nDel
> nThresh
|| nSubst
> nThresh
);
5008 /* Sanity check: all INDIR nodes should point at a non-INDIR thing.
5009 In fact this should be true at the end of every loop iteration
5010 above (a commoning pass followed by a substitution pass), but
5011 checking it on every iteration is excessively expensive. Note,
5012 this loop also computes 'm' for the stats printing below it. */
5014 n
= VG_(sizeXA
)( ents
);
5015 for (i
= 0; i
< n
; i
++) {
5017 ent
= VG_(indexXA
)( ents
, i
);
5018 if (ent
->tag
!= Te_INDIR
) continue;
5020 ind
= ML_(TyEnts__index_by_cuOff
)( ents
, ents_cache
,
5021 ent
->Te
.INDIR
.indR
);
5023 vg_assert(ind
->tag
!= Te_INDIR
);
5026 TRACE_D3("Overall: %ld before, %ld after\n", n
, n
-m
);
5030 /*------------------------------------------------------------*/
5032 /*--- Resolution of references to type DIEs ---*/
5034 /*------------------------------------------------------------*/
5036 /* Make a pass through the (temporary) variables array. Examine the
5037 type of each variable, check is it found, and chase any Te_INDIRs.
5038 Postcondition is: each variable has a typeR field that refers to a
5039 valid type in tyents, or a Te_UNKNOWN, and is certainly guaranteed
5040 not to refer to a Te_INDIR. (This is so that we can throw all the
5041 Te_INDIRs away later). */
5043 __attribute__((noinline
))
5044 static void resolve_variable_types (
5045 void (*barf
)( const HChar
* ) __attribute__((noreturn
)),
5046 /*R-O*/XArray
* /* of TyEnt */ ents
,
5047 /*MOD*/TyEntIndexCache
* ents_cache
,
5048 /*MOD*/XArray
* /* of TempVar* */ vars
5052 n
= VG_(sizeXA
)( vars
);
5053 for (i
= 0; i
< n
; i
++) {
5054 TempVar
* var
= *(TempVar
**)VG_(indexXA
)( vars
, i
);
5055 /* This is the stated type of the variable. But it might be
5056 an indirection, so be careful. */
5057 TyEnt
* ent
= ML_(TyEnts__index_by_cuOff
)( ents
, ents_cache
,
5059 if (ent
&& ent
->tag
== Te_INDIR
) {
5060 ent
= ML_(TyEnts__index_by_cuOff
)( ents
, ents_cache
,
5061 ent
->Te
.INDIR
.indR
);
5063 vg_assert(ent
->tag
!= Te_INDIR
);
5066 /* Deal first with "normal" cases */
5067 if (ent
&& ML_(TyEnt__is_type
)(ent
)) {
5068 var
->typeR
= ent
->cuOff
;
5072 /* If there's no ent, it probably we did not manage to read a
5073 type at the cuOffset which is stated as being this variable's
5074 type. Maybe a deficiency in parse_type_DIE. Complain. */
5076 VG_(printf
)("\n: Invalid cuOff = 0x%05lx\n", var
->typeR
);
5077 barf("resolve_variable_types: "
5078 "cuOff does not refer to a known type");
5081 /* If ent has any other tag, something bad happened, along the
5082 lines of var->typeR not referring to a type at all. */
5083 vg_assert(ent
->tag
== Te_UNKNOWN
);
5084 /* Just accept it; the type will be useless, but at least keep
5086 var
->typeR
= ent
->cuOff
;
5091 /*------------------------------------------------------------*/
5093 /*--- Parsing of Compilation Units ---*/
5095 /*------------------------------------------------------------*/
5097 static Int
cmp_TempVar_by_dioff ( const void* v1
, const void* v2
) {
5098 const TempVar
* t1
= *(const TempVar
*const *)v1
;
5099 const TempVar
* t2
= *(const TempVar
*const *)v2
;
5100 if (t1
->dioff
< t2
->dioff
) return -1;
5101 if (t1
->dioff
> t2
->dioff
) return 1;
5105 static void read_DIE (
5106 /*MOD*/WordFM
* /* of (XArray* of AddrRange, void) */ rangestree
,
5107 /*MOD*/XArray
* /* of TyEnt */ tyents
,
5108 /*MOD*/XArray
* /* of TempVar* */ tempvars
,
5109 /*MOD*/XArray
* /* of GExpr* */ gexprs
,
5110 /*MOD*/D3TypeParser
* typarser
,
5111 /*MOD*/D3VarParser
* varparser
,
5112 /*MOD*/D3InlParser
* inlparser
,
5113 XArray
** fndn_ix_Table
,
5114 ULong
*debug_line_offset
,
5115 Cursor
* c
, Bool td3
, CUConst
* cc
, Int level
5119 ULong atag
, abbv_code
;
5122 UWord start_die_c_offset
;
5123 UWord after_die_c_offset
;
5124 // If the DIE we will parse has a sibling and the parser(s) are
5125 // all indicating that parse_children is not necessary, then
5126 // we will skip the children by jumping to the sibling of this DIE
5127 // (if it has a sibling).
5129 Bool parse_children
= False
;
5131 /* --- Deal with this DIE --- */
5132 posn
= cook_die( cc
, get_position_of_Cursor( c
) );
5133 abbv_code
= get_ULEB128( c
);
5134 abbv
= get_abbv(cc
, abbv_code
, td3
);
5139 trace_DIE ((DW_TAG
)atag
, posn
, level
,
5140 get_position_of_Cursor( c
), abbv
, cc
);
5144 cc
->barf("read_DIE: invalid zero tag on DIE");
5146 has_children
= abbv
->has_children
;
5147 if (has_children
!= DW_children_no
&& has_children
!= DW_children_yes
)
5148 cc
->barf("read_DIE: invalid has_children value");
5150 /* We're set up to look at the fields of this DIE. Hand it off to
5151 any parser(s) that want to see it. Since they will in general
5152 advance the DIE cursor, remember the current settings so that we
5153 can then back up. . */
5154 start_die_c_offset
= get_position_of_Cursor( c
);
5155 after_die_c_offset
= 0; // set to c position if a parser has read the DIE.
5157 if (VG_(clo_read_var_info
)) {
5158 parse_type_DIE( tyents
,
5167 if (get_position_of_Cursor( c
) != start_die_c_offset
) {
5168 after_die_c_offset
= get_position_of_Cursor( c
);
5169 set_position_of_Cursor( c
, start_die_c_offset
);
5172 parse_var_DIE( rangestree
,
5185 if (get_position_of_Cursor( c
) != start_die_c_offset
) {
5186 after_die_c_offset
= get_position_of_Cursor( c
);
5187 set_position_of_Cursor( c
, start_die_c_offset
);
5190 parse_children
= True
;
5191 // type and var parsers do not have logic to skip childrens and establish
5192 // the value of sibling.
5195 if (VG_(clo_read_inline_info
)) {
5196 inlparser
->sibling
= 0;
5198 parse_inl_DIE( inlparser
,
5209 if (get_position_of_Cursor( c
) != start_die_c_offset
) {
5210 after_die_c_offset
= get_position_of_Cursor( c
);
5211 // Last parser, no need to reset the cursor to start_die_c_offset.
5214 sibling
= inlparser
->sibling
;
5215 vg_assert (inlparser
->sibling
== 0 || inlparser
->sibling
== sibling
);
5218 /* Top level CU DIE, but we don't want to read anything else, just skip
5219 to the end and return. */
5220 if (level
== 0 && !parse_children
) {
5221 UWord cu_size_including_IniLen
= (cc
->unit_length
5222 + (cc
->is_dw64
? 12 : 4));
5223 set_position_of_Cursor( c
, (cc
->cu_start_offset
5224 + cu_size_including_IniLen
));
5228 if (after_die_c_offset
> 0) {
5229 // DIE was read by a parser above, so we know where the DIE ends.
5230 set_position_of_Cursor( c
, after_die_c_offset
);
5232 /* No parser has parsed this DIE. So, we need to skip the DIE,
5233 in order to read the next DIE.
5234 At the same time, establish sibling value if the DIE has one. */
5235 TRACE_D3(" uninteresting DIE -> skipping ...\n");
5236 skip_DIE (&sibling
, c
, abbv
, cc
);
5239 /* --- Now recurse into its children, if any
5240 and the parsing of the children is requested by a parser --- */
5241 if (has_children
== DW_children_yes
) {
5242 if (parse_children
|| sibling
== 0) {
5243 if (0) TRACE_D3("BEGIN children of level %d\n", level
);
5245 atag
= peek_ULEB128( c
);
5246 if (atag
== 0) break;
5247 if (parse_children
) {
5248 read_DIE( rangestree
, tyents
, tempvars
, gexprs
,
5249 typarser
, varparser
, inlparser
,
5250 fndn_ix_Table
, debug_line_offset
,
5251 c
, td3
, cc
, level
+1 );
5253 Int skip_level
= level
+ 1;
5255 atag
= peek_ULEB128( c
);
5258 if (skip_level
== level
) break;
5259 /* Eat the terminating zero and continue skipping the
5260 children one level up. */
5261 atag
= get_ULEB128( c
);
5262 vg_assert(atag
== 0);
5266 abbv_code
= get_ULEB128( c
);
5267 abbv
= get_abbv(cc
, abbv_code
, td3
);
5269 skip_DIE (&sibling
, c
, abbv
, cc
);
5270 if (abbv
->has_children
) {
5274 set_position_of_Cursor( c
, sibling
);
5279 /* Now we need to eat the terminating zero */
5280 atag
= get_ULEB128( c
);
5281 vg_assert(atag
== 0);
5282 if (0) TRACE_D3("END children of level %d\n", level
);
5284 // We can skip the childrens, by jumping to the sibling
5285 TRACE_D3(" SKIPPING DIE's children,"
5286 "jumping to sibling <%d><%lx>\n",
5288 set_position_of_Cursor( c
, sibling
);
5294 static void trace_debug_loc (const DebugInfo
* di
,
5295 __attribute__((noreturn
)) void (*barf
)( const HChar
* ),
5296 DiSlice escn_debug_loc
)
5299 /* This doesn't work properly because it assumes all entries are
5300 packed end to end, with no holes. But that doesn't always
5301 appear to be the case, so it loses sync. And the D3 spec
5302 doesn't appear to require a no-hole situation either. */
5303 /* Display .debug_loc */
5306 Cursor loc
; /* for showing .debug_loc */
5307 Bool td3
= di
->trace_symtab
;
5310 TRACE_SYMTAB("\n------ The contents of .debug_loc ------\n");
5311 TRACE_SYMTAB(" Offset Begin End Expression\n");
5312 if (ML_(sli_is_valid
)(escn_debug_loc
)) {
5313 init_Cursor( &loc
, escn_debug_loc
, 0, barf
,
5314 "Overrun whilst reading .debug_loc section(1)" );
5320 if (is_at_end_Cursor( &loc
))
5323 /* Read a (host-)word pair. This is something of a hack since
5324 the word size to read is really dictated by the ELF file;
5325 however, we assume we're reading a file with the same
5326 word-sizeness as the host. Reasonably enough. */
5327 w1
= get_UWord( &loc
);
5328 w2
= get_UWord( &loc
);
5330 if (w1
== 0 && w2
== 0) {
5331 /* end of list. reset 'base' */
5332 TRACE_D3(" %08lx <End of list>\n", dl_offset
);
5334 dl_offset
= get_position_of_Cursor( &loc
);
5339 /* new value for 'base' */
5340 TRACE_D3(" %08lx %16lx %08lx (base address)\n",
5346 /* else a location expression follows */
5347 TRACE_D3(" %08lx %08lx %08lx ",
5348 dl_offset
, w1
+ dl_base
, w2
+ dl_base
);
5349 len
= (UWord
)get_UShort( &loc
);
5351 UChar byte
= get_UChar( &loc
);
5352 TRACE_D3("%02x", (UInt
)byte
);
5361 static void trace_debug_ranges (const DebugInfo
* di
,
5362 __attribute__((noreturn
)) void (*barf
)( const HChar
* ),
5363 DiSlice escn_debug_ranges
)
5365 Cursor ranges
; /* for showing .debug_ranges */
5368 Bool td3
= di
->trace_symtab
;
5370 /* Display .debug_ranges */
5372 TRACE_SYMTAB("\n------ The contents of .debug_ranges ------\n");
5373 TRACE_SYMTAB(" Offset Begin End\n");
5374 if (ML_(sli_is_valid
)(escn_debug_ranges
)) {
5375 init_Cursor( &ranges
, escn_debug_ranges
, 0, barf
,
5376 "Overrun whilst reading .debug_ranges section(1)" );
5382 if (is_at_end_Cursor( &ranges
))
5385 /* Read a (host-)word pair. This is something of a hack since
5386 the word size to read is really dictated by the ELF file;
5387 however, we assume we're reading a file with the same
5388 word-sizeness as the host. Reasonably enough. */
5389 w1
= get_UWord( &ranges
);
5390 w2
= get_UWord( &ranges
);
5392 if (w1
== 0 && w2
== 0) {
5393 /* end of list. reset 'base' */
5394 TRACE_D3(" %08lx <End of list>\n", dr_offset
);
5396 dr_offset
= get_position_of_Cursor( &ranges
);
5401 /* new value for 'base' */
5402 TRACE_D3(" %08lx %16lx %08lx (base address)\n",
5408 /* else a range [w1+base, w2+base) is denoted */
5409 TRACE_D3(" %08lx %08lx %08lx\n",
5410 dr_offset
, w1
+ dr_base
, w2
+ dr_base
);
5415 static void trace_debug_abbrev (const DebugInfo
* di
,
5416 __attribute__((noreturn
)) void (*barf
)( const HChar
* ),
5417 DiSlice escn_debug_abbv
)
5419 Cursor abbv
; /* for showing .debug_abbrev */
5420 Bool td3
= di
->trace_symtab
;
5422 /* Display .debug_abbrev */
5424 TRACE_SYMTAB("\n------ The contents of .debug_abbrev ------\n");
5425 if (ML_(sli_is_valid
)(escn_debug_abbv
)) {
5426 init_Cursor( &abbv
, escn_debug_abbv
, 0, barf
,
5427 "Overrun whilst reading .debug_abbrev section" );
5429 if (is_at_end_Cursor( &abbv
))
5431 /* Read one abbreviation table */
5432 TRACE_D3(" Number TAG\n");
5436 ULong acode
= get_ULEB128( &abbv
);
5437 if (acode
== 0) break; /* end of the table */
5438 atag
= get_ULEB128( &abbv
);
5439 has_children
= get_UChar( &abbv
);
5440 TRACE_D3(" %llu %s [%s]\n",
5441 acode
, ML_(pp_DW_TAG
)(atag
),
5442 ML_(pp_DW_children
)(has_children
));
5444 ULong at_name
= get_ULEB128( &abbv
);
5445 ULong at_form
= get_ULEB128( &abbv
);
5446 if (at_form
== DW_FORM_implicit_const
) {
5447 /* Long at_val = */ get_SLEB128 ( &abbv
);
5449 if (at_name
== 0 && at_form
== 0) break;
5450 TRACE_D3(" %-18s %s\n",
5451 ML_(pp_DW_AT
)(at_name
), ML_(pp_DW_FORM
)(at_form
));
5459 void new_dwarf3_reader_wrk (
5461 __attribute__((noreturn
)) void (*barf
)( const HChar
* ),
5462 DiSlice escn_debug_info
, DiSlice escn_debug_types
,
5463 DiSlice escn_debug_abbv
, DiSlice escn_debug_line
,
5464 DiSlice escn_debug_str
, DiSlice escn_debug_ranges
,
5465 DiSlice escn_debug_rnglists
, DiSlice escn_debug_loclists
,
5466 DiSlice escn_debug_loc
, DiSlice escn_debug_info_alt
,
5467 DiSlice escn_debug_abbv_alt
, DiSlice escn_debug_line_alt
,
5468 DiSlice escn_debug_str_alt
, DiSlice escn_debug_line_str
,
5469 DiSlice escn_debug_addr
, DiSlice escn_debug_str_offsets
5472 XArray
* /* of TyEnt */ tyents
= NULL
;
5473 XArray
* /* of TyEnt */ tyents_to_keep
= NULL
;
5474 XArray
* /* of GExpr* */ gexprs
= NULL
;
5475 XArray
* /* of TempVar* */ tempvars
= NULL
;
5476 WordFM
* /* of (XArray* of AddrRange, void) */ rangestree
= NULL
;
5477 TyEntIndexCache
* tyents_cache
= NULL
;
5478 TyEntIndexCache
* tyents_to_keep_cache
= NULL
;
5479 TempVar
*varp
, *varp2
;
5481 Cursor info
; /* primary cursor for parsing .debug_info */
5482 D3TypeParser typarser
;
5483 D3VarParser varparser
;
5484 D3InlParser inlparser
;
5485 XArray
* /* of UInt */ fndn_ix_Table
= NULL
;
5486 ULong debug_line_offset
= (ULong
) -1;
5488 Bool td3
= di
->trace_symtab
;
5489 XArray
* /* of TempVar* */ dioff_lookup_tab
;
5491 VgHashTable
*signature_types
= NULL
;
5493 /* Display/trace various information, if requested. */
5495 trace_debug_loc (di
, barf
, escn_debug_loc
);
5496 trace_debug_ranges (di
, barf
, escn_debug_ranges
);
5497 trace_debug_abbrev (di
, barf
, escn_debug_abbv
);
5501 /* Zero out all parsers. Parsers will really be initialised
5502 according to VG_(clo_read_*_info). */
5503 VG_(memset
)( &inlparser
, 0, sizeof(inlparser
) );
5505 if (VG_(clo_read_var_info
)) {
5506 /* We'll park the harvested type information in here. Also create
5507 a fake "void" entry with offset D3_FAKEVOID_CUOFF, so we always
5508 have at least one type entry to refer to. D3_FAKEVOID_CUOFF is
5509 huge and presumably will not occur in any valid DWARF3 file --
5510 it would need to have a .debug_info section 4GB long for that to
5511 happen. These type entries end up in the DebugInfo. */
5512 tyents
= VG_(newXA
)( ML_(dinfo_zalloc
),
5513 "di.readdwarf3.ndrw.1 (TyEnt temp array)",
5514 ML_(dinfo_free
), sizeof(TyEnt
) );
5516 VG_(memset
)(&tyent
, 0, sizeof(tyent
));
5517 tyent
.tag
= Te_TyVoid
;
5518 tyent
.cuOff
= D3_FAKEVOID_CUOFF
;
5519 tyent
.Te
.TyVoid
.isFake
= True
;
5520 VG_(addToXA
)( tyents
, &tyent
);
5523 VG_(memset
)(&tyent
, 0, sizeof(tyent
));
5524 tyent
.tag
= Te_UNKNOWN
;
5525 tyent
.cuOff
= D3_INVALID_CUOFF
;
5526 VG_(addToXA
)( tyents
, &tyent
);
5529 /* This is a tree used to unique-ify the range lists that are
5530 manufactured by parse_var_DIE. References to the keys in the
5531 tree wind up in .rngMany fields in TempVars. We'll need to
5532 delete this tree, and the XArrays attached to it, at the end of
5534 rangestree
= VG_(newFM
)( ML_(dinfo_zalloc
),
5535 "di.readdwarf3.ndrw.2 (rangestree)",
5537 (Word(*)(UWord
,UWord
))cmp__XArrays_of_AddrRange
);
5539 /* List of variables we're accumulating. These don't end up in the
5540 DebugInfo; instead their contents are handed to ML_(addVar) and
5541 the list elements are then deleted. */
5542 tempvars
= VG_(newXA
)( ML_(dinfo_zalloc
),
5543 "di.readdwarf3.ndrw.3 (TempVar*s array)",
5547 /* List of GExprs we're accumulating. These wind up in the
5549 gexprs
= VG_(newXA
)( ML_(dinfo_zalloc
), "di.readdwarf3.ndrw.4",
5550 ML_(dinfo_free
), sizeof(GExpr
*) );
5552 /* We need a D3TypeParser to keep track of partially constructed
5553 types. It'll be discarded as soon as we've completed the CU,
5554 since the resulting information is tipped in to 'tyents' as it
5556 type_parser_init(&typarser
);
5558 var_parser_init(&varparser
);
5560 signature_types
= VG_(HT_construct
) ("signature_types");
5563 /* Do an initial pass to scan the .debug_types section, if any, and
5564 fill in the signatured types hash table. This lets us handle
5565 mapping from a type signature to a (cooked) DIE offset directly
5566 in get_Form_contents. */
5567 if (VG_(clo_read_var_info
) && ML_(sli_is_valid
)(escn_debug_types
)) {
5568 init_Cursor( &info
, escn_debug_types
, 0, barf
,
5569 "Overrun whilst reading .debug_types section" );
5570 TRACE_D3("\n------ Collecting signatures from "
5571 ".debug_types section ------\n");
5573 abbv_state last_abbv
;
5574 last_abbv
.debug_abbrev_offset
= (ULong
) -1;
5575 last_abbv
.ht_abbvs
= NULL
;
5577 UWord cu_start_offset
, cu_offset_now
;
5580 cu_start_offset
= get_position_of_Cursor( &info
);
5582 TRACE_D3(" Compilation Unit @ offset 0x%lx:\n", cu_start_offset
);
5583 /* parse_CU_header initialises the CU's abbv hash table. */
5584 parse_CU_Header( &cc
, td3
, &info
, escn_debug_abbv
,
5585 last_abbv
, True
, False
);
5587 /* Needed by cook_die. */
5588 cc
.types_cuOff_bias
= escn_debug_info
.szB
;
5590 record_signatured_type( signature_types
, cc
.type_signature
,
5591 cook_die( &cc
, cc
.type_offset
));
5593 /* Until proven otherwise we assume we don't need the icc9
5594 workaround in this case; see the DIE-reading loop below
5596 cu_offset_now
= (cu_start_offset
+ cc
.unit_length
5597 + (cc
.is_dw64
? 12 : 4));
5599 last_abbv
= cc
.abbv
;
5601 if (cu_offset_now
>= escn_debug_types
.szB
) {
5605 set_position_of_Cursor ( &info
, cu_offset_now
);
5607 if (last_abbv
.ht_abbvs
!= NULL
)
5608 VG_(HT_destruct
) (last_abbv
.ht_abbvs
, ML_(dinfo_free
));
5611 /* Perform three DIE-reading passes. The first pass reads DIEs from
5612 alternate .debug_info (if any), the second pass reads DIEs from
5613 .debug_info, and the third pass reads DIEs from .debug_types.
5614 Moving the body of this loop into a separate function would
5615 require a large number of arguments to be passed in, so it is
5616 kept inline instead. */
5617 for (pass
= 0; pass
< 3; ++pass
) {
5621 if (!ML_(sli_is_valid
)(escn_debug_info_alt
))
5623 /* Now loop over the Compilation Units listed in the alternate
5624 .debug_info section (see D3SPEC sec 7.5) paras 1 and 2.
5625 Each compilation unit contains a Compilation Unit Header
5626 followed by precisely one DW_TAG_compile_unit or
5627 DW_TAG_partial_unit DIE. */
5628 init_Cursor( &info
, escn_debug_info_alt
, 0, barf
,
5629 "Overrun whilst reading alternate .debug_info section" );
5630 section_size
= escn_debug_info_alt
.szB
;
5632 /* Keep track of the last line table we have seen,
5633 it might turn up again. */
5634 reset_fndn_ix_table(&fndn_ix_Table
, &debug_line_offset
, (ULong
) -1);
5636 TRACE_D3("\n------ Parsing alternate .debug_info section ------\n");
5637 } else if (pass
== 1) {
5638 /* Now loop over the Compilation Units listed in the .debug_info
5639 section (see D3SPEC sec 7.5) paras 1 and 2. Each compilation
5640 unit contains a Compilation Unit Header followed by precisely
5641 one DW_TAG_compile_unit or DW_TAG_partial_unit DIE. */
5642 init_Cursor( &info
, escn_debug_info
, 0, barf
,
5643 "Overrun whilst reading .debug_info section" );
5644 section_size
= escn_debug_info
.szB
;
5646 /* Keep track of the last line table we have seen,
5647 it might turn up again. */
5648 reset_fndn_ix_table(&fndn_ix_Table
, &debug_line_offset
, (ULong
) -1);
5650 TRACE_D3("\n------ Parsing .debug_info section ------\n");
5652 if (!ML_(sli_is_valid
)(escn_debug_types
))
5654 if (!VG_(clo_read_var_info
))
5655 continue; // Types not needed when only reading inline info.
5656 init_Cursor( &info
, escn_debug_types
, 0, barf
,
5657 "Overrun whilst reading .debug_types section" );
5658 section_size
= escn_debug_types
.szB
;
5660 /* Keep track of the last line table we have seen,
5661 it might turn up again. */
5662 reset_fndn_ix_table(&fndn_ix_Table
, &debug_line_offset
, (ULong
) -1);
5664 TRACE_D3("\n------ Parsing .debug_types section ------\n");
5667 abbv_state last_abbv
;
5668 last_abbv
.debug_abbrev_offset
= (ULong
) -1;
5669 last_abbv
.ht_abbvs
= NULL
;
5671 ULong cu_start_offset
, cu_offset_now
;
5673 /* It may be that the stated size of this CU is larger than the
5674 amount of stuff actually in it. icc9 seems to generate CUs
5675 thusly. We use these variables to figure out if this is
5676 indeed the case, and if so how many bytes we need to skip to
5677 get to the start of the next CU. Not skipping those bytes
5678 causes us to misidentify the start of the next CU, and it all
5679 goes badly wrong after that (not surprisingly). */
5680 UWord cu_size_including_IniLen
, cu_amount_used
;
5682 /* It seems icc9 finishes the DIE info before debug_info_sz
5683 bytes have been used up. So be flexible, and declare the
5684 sequence complete if there is not enough remaining bytes to
5685 hold even the smallest conceivable CU header. (11 bytes I
5687 /* JRS 23Jan09: I suspect this is no longer necessary now that
5688 the code below contains a 'while (cu_amount_used <
5689 cu_size_including_IniLen ...' style loop, which skips over
5690 any leftover bytes at the end of a CU in the case where the
5691 CU's stated size is larger than its actual size (as
5692 determined by reading all its DIEs). However, for prudence,
5693 I'll leave the following test in place. I can't see that a
5694 CU header can be smaller than 11 bytes, so I don't think
5695 there's any harm possible through the test -- it just adds
5697 Word avail
= get_remaining_length_Cursor( &info
);
5700 TRACE_D3("new_dwarf3_reader_wrk: warning: "
5701 "%ld unused bytes after end of DIEs\n", avail
);
5705 if (VG_(clo_read_var_info
)) {
5706 /* Check the varparser's stack is in a sane state. */
5707 vg_assert(varparser
.sp
== -1);
5708 /* Check the typarser's stack is in a sane state. */
5709 vg_assert(typarser
.sp
== -1);
5712 cu_start_offset
= get_position_of_Cursor( &info
);
5714 TRACE_D3(" Compilation Unit @ offset 0x%llx:\n", cu_start_offset
);
5715 /* parse_CU_header initialises the CU's hashtable of abbvs ht_abbvs */
5717 parse_CU_Header( &cc
, td3
, &info
, escn_debug_abbv_alt
,
5718 last_abbv
, False
, True
);
5720 parse_CU_Header( &cc
, td3
, &info
, escn_debug_abbv
,
5721 last_abbv
, pass
== 2, False
);
5723 cc
.escn_debug_str
= pass
== 0 ? escn_debug_str_alt
5725 cc
.escn_debug_ranges
= escn_debug_ranges
;
5726 cc
.escn_debug_rnglists
= escn_debug_rnglists
;
5727 cc
.escn_debug_loclists
= escn_debug_loclists
;
5728 cc
.escn_debug_loc
= escn_debug_loc
;
5729 cc
.escn_debug_line
= pass
== 0 ? escn_debug_line_alt
5731 cc
.escn_debug_info
= pass
== 0 ? escn_debug_info_alt
5733 cc
.escn_debug_types
= escn_debug_types
;
5734 cc
.escn_debug_info_alt
= escn_debug_info_alt
;
5735 cc
.escn_debug_str_alt
= escn_debug_str_alt
;
5736 cc
.escn_debug_line_str
= escn_debug_line_str
;
5737 cc
.escn_debug_addr
= escn_debug_addr
;
5738 cc
.escn_debug_str_offsets
= escn_debug_str_offsets
;
5739 cc
.types_cuOff_bias
= escn_debug_info
.szB
;
5740 cc
.alt_cuOff_bias
= escn_debug_info
.szB
+ escn_debug_types
.szB
;
5741 cc
.cu_start_offset
= cu_start_offset
;
5742 cc
.cu_addr_base
= 0;
5743 cc
.cu_has_addr_base
= False
;
5744 cc
.cu_str_offsets_base
= 0;
5745 cc
.cu_has_str_offsets_base
= False
;
5746 cc
.cu_rnglists_base
= 0;
5747 cc
.cu_has_rnglists_base
= False
;
5748 cc
.cu_loclists_base
= 0;
5749 cc
.cu_has_loclists_base
= False
;
5751 /* The CU's svma can be deduced by looking at the AT_low_pc
5752 value in the top level TAG_compile_unit, which is the topmost
5753 DIE. We'll leave it for the 'varparser' to acquire that info
5754 and fill it in -- since it is the only party to want to know
5756 cc
.cu_svma_known
= False
;
5759 if (VG_(clo_read_var_info
)) {
5760 cc
.signature_types
= signature_types
;
5762 /* Create a fake outermost-level range covering the entire
5763 address range. So we always have *something* to catch all
5764 variable declarations. */
5765 varstack_push( &cc
, &varparser
, td3
,
5766 unitary_range_list(0UL, ~0UL),
5767 -1, False
/*isFunc*/, NULL
/*fbGX*/ );
5771 /* Now read the one-and-only top-level DIE for this CU. */
5772 vg_assert(!VG_(clo_read_var_info
) || varparser
.sp
== 0);
5773 read_DIE( rangestree
,
5774 tyents
, tempvars
, gexprs
,
5775 &typarser
, &varparser
, &inlparser
,
5776 &fndn_ix_Table
, &debug_line_offset
,
5777 &info
, td3
, &cc
, 0 );
5779 cu_offset_now
= get_position_of_Cursor( &info
);
5781 if (0) VG_(printf
)("Travelled: %llu size %llu\n",
5782 cu_offset_now
- cc
.cu_start_offset
,
5783 cc
.unit_length
+ (cc
.is_dw64
? 12 : 4));
5785 /* How big the CU claims it is .. */
5786 cu_size_including_IniLen
= cc
.unit_length
+ (cc
.is_dw64
? 12 : 4);
5787 /* .. vs how big we have found it to be */
5788 cu_amount_used
= cu_offset_now
- cc
.cu_start_offset
;
5790 if (1) TRACE_D3("offset now %llu, d-i-size %llu\n",
5791 cu_offset_now
, section_size
);
5792 if (cu_offset_now
> section_size
)
5793 barf("toplevel DIEs beyond end of CU");
5795 /* If the CU is bigger than it claims to be, we've got a serious
5797 if (cu_amount_used
> cu_size_including_IniLen
)
5798 barf("CU's actual size appears to be larger than it claims it is");
5800 /* If the CU is smaller than it claims to be, we need to skip some
5801 bytes. Loop updates cu_offset_new and cu_amount_used. */
5802 while (cu_amount_used
< cu_size_including_IniLen
5803 && get_remaining_length_Cursor( &info
) > 0) {
5804 if (0) VG_(printf
)("SKIP\n");
5805 (void)get_UChar( &info
);
5806 cu_offset_now
= get_position_of_Cursor( &info
);
5807 cu_amount_used
= cu_offset_now
- cc
.cu_start_offset
;
5810 if (VG_(clo_read_var_info
)) {
5811 /* Preen to level -2. DIEs have level >= 0 so -2 cannot occur
5812 anywhere else at all. Our fake the-entire-address-space
5813 range is at level -1, so preening to -2 should completely
5814 empty the stack out. */
5816 varstack_preen( &varparser
, td3
, -2 );
5817 /* Similarly, empty the type stack out. */
5818 typestack_preen( &typarser
, td3
, -2 );
5821 last_abbv
= cc
.abbv
;
5823 if (cu_offset_now
== section_size
)
5825 /* else keep going */
5827 if (last_abbv
.ht_abbvs
!= NULL
)
5828 VG_(HT_destruct
) (last_abbv
.ht_abbvs
, ML_(dinfo_free
));
5831 if (fndn_ix_Table
!= NULL
)
5832 VG_(deleteXA
)(fndn_ix_Table
);
5834 if (VG_(clo_read_var_info
)) {
5835 /* From here on we're post-processing the stuff we got
5836 out of the .debug_info section. */
5839 ML_(pp_TyEnts
)(tyents
, "Initial type entity (TyEnt) array");
5841 TRACE_D3("------ Compressing type entries ------\n");
5844 tyents_cache
= ML_(dinfo_zalloc
)( "di.readdwarf3.ndrw.6",
5845 sizeof(TyEntIndexCache
) );
5846 ML_(TyEntIndexCache__invalidate
)( tyents_cache
);
5847 dedup_types( td3
, tyents
, tyents_cache
);
5850 ML_(pp_TyEnts
)(tyents
, "After type entity (TyEnt) compression");
5854 TRACE_D3("------ Resolving the types of variables ------\n" );
5855 resolve_variable_types( barf
, tyents
, tyents_cache
, tempvars
);
5857 /* Copy all the non-INDIR tyents into a new table. For large
5858 .so's, about 90% of the tyents will by now have been resolved to
5859 INDIRs, and we no longer need them, and so don't need to store
5862 = VG_(newXA
)( ML_(dinfo_zalloc
),
5863 "di.readdwarf3.ndrw.7 (TyEnt to-keep array)",
5864 ML_(dinfo_free
), sizeof(TyEnt
) );
5865 n
= VG_(sizeXA
)( tyents
);
5866 for (i
= 0; i
< n
; i
++) {
5867 TyEnt
* ent
= VG_(indexXA
)( tyents
, i
);
5868 if (ent
->tag
!= Te_INDIR
)
5869 VG_(addToXA
)( tyents_to_keep
, ent
);
5872 VG_(deleteXA
)( tyents
);
5874 ML_(dinfo_free
)( tyents_cache
);
5875 tyents_cache
= NULL
;
5877 /* Sort tyents_to_keep so we can lookup in it. A complete (if
5878 minor) waste of time, since tyents itself is sorted, but
5879 necessary since VG_(lookupXA) refuses to cooperate if we
5881 VG_(setCmpFnXA
)( tyents_to_keep
, (XACmpFn_t
) ML_(TyEnt__cmp_by_cuOff_only
) );
5882 VG_(sortXA
)( tyents_to_keep
);
5884 /* Enable cacheing on tyents_to_keep */
5885 tyents_to_keep_cache
5886 = ML_(dinfo_zalloc
)( "di.readdwarf3.ndrw.8",
5887 sizeof(TyEntIndexCache
) );
5888 ML_(TyEntIndexCache__invalidate
)( tyents_to_keep_cache
);
5890 /* And record the tyents in the DebugInfo. We do this before
5891 starting to hand variables to ML_(addVar), since if ML_(addVar)
5892 wants to do debug printing (of the types of said vars) then it
5893 will need the tyents.*/
5894 vg_assert(!di
->admin_tyents
);
5895 di
->admin_tyents
= tyents_to_keep
;
5897 /* Bias all the location expressions. */
5899 TRACE_D3("------ Biasing the location expressions ------\n" );
5901 n
= VG_(sizeXA
)( gexprs
);
5902 for (i
= 0; i
< n
; i
++) {
5903 gexpr
= *(GExpr
**)VG_(indexXA
)( gexprs
, i
);
5904 bias_GX( gexpr
, di
);
5908 TRACE_D3("------ Acquired the following variables: ------\n\n");
5910 /* Park (pointers to) all the vars in an XArray, so we can look up
5911 abstract origins quickly. The array is sorted (hence, looked-up
5912 by) the .dioff fields. Since the .dioffs should be in strictly
5913 ascending order, there is no need to sort the array after
5914 construction. The ascendingness is however asserted for. */
5916 = VG_(newXA
)( ML_(dinfo_zalloc
), "di.readdwarf3.ndrw.9",
5920 n
= VG_(sizeXA
)( tempvars
);
5921 Word first_primary_var
= 0;
5922 for (first_primary_var
= 0;
5923 escn_debug_info_alt
.szB
/*really?*/ && first_primary_var
< n
;
5924 first_primary_var
++) {
5925 varp
= *(TempVar
**)VG_(indexXA
)( tempvars
, first_primary_var
);
5926 if (varp
->dioff
< escn_debug_info
.szB
+ escn_debug_types
.szB
)
5929 for (i
= 0; i
< n
; i
++) {
5930 varp
= *(TempVar
**)VG_(indexXA
)( tempvars
, (i
+ first_primary_var
) % n
);
5931 if (i
> first_primary_var
) {
5932 varp2
= *(TempVar
**)VG_(indexXA
)( tempvars
,
5933 (i
+ first_primary_var
- 1) % n
);
5934 /* why should this hold? Only, I think, because we've
5935 constructed the array by reading .debug_info sequentially,
5936 and so the array .dioff fields should reflect that, and be
5937 strictly ascending. */
5938 vg_assert(varp2
->dioff
< varp
->dioff
);
5940 VG_(addToXA
)( dioff_lookup_tab
, &varp
);
5942 VG_(setCmpFnXA
)( dioff_lookup_tab
, cmp_TempVar_by_dioff
);
5943 VG_(sortXA
)( dioff_lookup_tab
); /* POINTLESS; FIXME: rm */
5945 /* Now visit each var. Collect up as much info as possible for
5946 each var and hand it to ML_(addVar). */
5947 n
= VG_(sizeXA
)( tempvars
);
5948 for (j
= 0; j
< n
; j
++) {
5950 varp
= *(TempVar
**)VG_(indexXA
)( tempvars
, j
);
5952 /* Possibly show .. */
5954 VG_(printf
)("<%lx> addVar: level %d: %s :: ",
5957 varp
->name
? varp
->name
: "<anon_var>" );
5959 ML_(pp_TyEnt_C_ishly
)( tyents_to_keep
, varp
->typeR
);
5961 VG_(printf
)("NULL");
5963 VG_(printf
)("\n Loc=");
5965 ML_(pp_GX
)(varp
->gexpr
);
5967 VG_(printf
)("NULL");
5971 VG_(printf
)(" FrB=");
5972 ML_(pp_GX
)( varp
->fbGX
);
5975 VG_(printf
)(" FrB=none\n");
5977 VG_(printf
)(" declared at: %u %s:%d\n",
5979 ML_(fndn_ix2filename
) (di
, varp
->fndn_ix
),
5981 if (varp
->absOri
!= (UWord
)D3_INVALID_CUOFF
)
5982 VG_(printf
)(" abstract origin: <%lx>\n", varp
->absOri
);
5985 /* Skip variables which have no location. These must be
5986 abstract instances; they are useless as-is since with no
5987 location they have no specified memory location. They will
5988 presumably be referred to via the absOri fields of other
5991 TRACE_D3(" SKIP (no location)\n\n");
5995 /* So it has a location, at least. If it refers to some other
5996 entry through its absOri field, pull in further info through
5998 if (varp
->absOri
!= (UWord
)D3_INVALID_CUOFF
) {
6000 Word ixFirst
, ixLast
;
6002 TempVar
* keyp
= &key
;
6004 VG_(memset
)(&key
, 0, sizeof(key
)); /* not necessary */
6005 key
.dioff
= varp
->absOri
; /* this is what we want to find */
6006 found
= VG_(lookupXA
)( dioff_lookup_tab
, &keyp
,
6007 &ixFirst
, &ixLast
);
6009 /* barf("DW_AT_abstract_origin can't be resolved"); */
6010 TRACE_D3(" SKIP (DW_AT_abstract_origin can't be resolved)\n\n");
6013 /* If the following fails, there is more than one entry with
6014 the same dioff. Which can't happen. */
6015 vg_assert(ixFirst
== ixLast
);
6016 varAI
= *(TempVar
**)VG_(indexXA
)( dioff_lookup_tab
, ixFirst
);
6019 vg_assert(varAI
->dioff
== varp
->absOri
);
6021 /* Copy what useful info we can. */
6022 if (varAI
->typeR
&& !varp
->typeR
)
6023 varp
->typeR
= varAI
->typeR
;
6024 if (varAI
->name
&& !varp
->name
)
6025 varp
->name
= varAI
->name
;
6026 if (varAI
->fndn_ix
&& !varp
->fndn_ix
)
6027 varp
->fndn_ix
= varAI
->fndn_ix
;
6028 if (varAI
->fLine
> 0 && varp
->fLine
== 0)
6029 varp
->fLine
= varAI
->fLine
;
6032 /* Give it a name if it doesn't have one. */
6034 varp
->name
= ML_(addStr
)( di
, "<anon_var>", -1 );
6036 /* So now does it have enough info to be useful? */
6037 /* NOTE: re typeR: this is a hack. If typeR is Te_UNKNOWN then
6038 the type didn't get resolved. Really, in that case
6039 something's broken earlier on, and should be fixed, rather
6040 than just skipping the variable. */
6041 ent
= ML_(TyEnts__index_by_cuOff
)( tyents_to_keep
,
6042 tyents_to_keep_cache
,
6044 /* The next two assertions should be guaranteed by
6045 our previous call to resolve_variable_types. */
6047 vg_assert(ML_(TyEnt__is_type
)(ent
) || ent
->tag
== Te_UNKNOWN
);
6049 if (ent
->tag
== Te_UNKNOWN
) continue;
6051 vg_assert(varp
->gexpr
);
6052 vg_assert(varp
->name
);
6053 vg_assert(varp
->typeR
);
6054 vg_assert(varp
->level
>= 0);
6056 /* Ok. So we're going to keep it. Call ML_(addVar) once for
6057 each address range in which the variable exists. */
6058 TRACE_D3(" ACQUIRE for range(s) ");
6059 { AddrRange oneRange
;
6060 AddrRange
* varPcRanges
;
6062 /* Set up to iterate over address ranges, however
6064 if (varp
->nRanges
== 0 || varp
->nRanges
== 1) {
6065 vg_assert(!varp
->rngMany
);
6066 if (varp
->nRanges
== 0) {
6067 vg_assert(varp
->rngOneMin
== 0);
6068 vg_assert(varp
->rngOneMax
== 0);
6070 nVarPcRanges
= varp
->nRanges
;
6071 oneRange
.aMin
= varp
->rngOneMin
;
6072 oneRange
.aMax
= varp
->rngOneMax
;
6073 varPcRanges
= &oneRange
;
6075 vg_assert(varp
->rngMany
);
6076 vg_assert(varp
->rngOneMin
== 0);
6077 vg_assert(varp
->rngOneMax
== 0);
6078 nVarPcRanges
= VG_(sizeXA
)(varp
->rngMany
);
6079 vg_assert(nVarPcRanges
>= 2);
6080 vg_assert(nVarPcRanges
== (Word
)varp
->nRanges
);
6081 varPcRanges
= VG_(indexXA
)(varp
->rngMany
, 0);
6083 if (varp
->level
== 0)
6084 vg_assert( nVarPcRanges
== 1 );
6086 for (i
= 0; i
< nVarPcRanges
; i
++) {
6087 Addr pcMin
= varPcRanges
[i
].aMin
;
6088 Addr pcMax
= varPcRanges
[i
].aMax
;
6089 vg_assert(pcMin
<= pcMax
);
6090 /* Level 0 is the global address range. So at level 0 we
6091 don't want to bias pcMin/pcMax; but at all other levels
6092 we do since those are derived from svmas in the Dwarf
6093 we're reading. Be paranoid ... */
6094 if (varp
->level
== 0) {
6095 vg_assert(pcMin
== (Addr
)0);
6096 vg_assert(pcMax
== ~(Addr
)0);
6098 /* vg_assert(pcMin > (Addr)0);
6099 No .. we can legitimately expect to see ranges like
6100 0x0-0x11D (pre-biasing, of course). */
6101 vg_assert(pcMax
< ~(Addr
)0);
6104 /* Apply text biasing, for non-global variables. */
6105 if (varp
->level
> 0) {
6106 pcMin
+= di
->text_debug_bias
;
6107 pcMax
+= di
->text_debug_bias
;
6110 if (i
> 0 && (i
%2) == 0)
6112 TRACE_D3("[%#lx,%#lx] ", pcMin
, pcMax
);
6117 varp
->name
, varp
->typeR
,
6118 varp
->gexpr
, varp
->fbGX
,
6119 varp
->fndn_ix
, varp
->fLine
, td3
6125 /* and move on to the next var */
6128 /* Now free all the TempVars */
6129 n
= VG_(sizeXA
)( tempvars
);
6130 for (i
= 0; i
< n
; i
++) {
6131 varp
= *(TempVar
**)VG_(indexXA
)( tempvars
, i
);
6132 ML_(dinfo_free
)(varp
);
6134 VG_(deleteXA
)( tempvars
);
6137 /* and the temp lookup table */
6138 VG_(deleteXA
)( dioff_lookup_tab
);
6140 /* and the ranges tree. Note that we need to also free the XArrays
6141 which constitute the keys, hence pass VG_(deleteXA) as a
6143 VG_(deleteFM
)( rangestree
, (void(*)(UWord
))VG_(deleteXA
), NULL
);
6145 /* and the tyents_to_keep cache */
6146 ML_(dinfo_free
)( tyents_to_keep_cache
);
6147 tyents_to_keep_cache
= NULL
;
6149 /* And the signatured type hash. */
6150 VG_(HT_destruct
) ( signature_types
, ML_(dinfo_free
) );
6152 /* record the GExprs in di so they can be freed later */
6153 vg_assert(!di
->admin_gexprs
);
6154 di
->admin_gexprs
= gexprs
;
6157 // Free up dynamically allocated memory
6158 if (VG_(clo_read_var_info
)) {
6159 type_parser_release(&typarser
);
6160 var_parser_release(&varparser
);
6165 /*------------------------------------------------------------*/
6167 /*--- The "new" DWARF3 reader -- top level control logic ---*/
6169 /*------------------------------------------------------------*/
6171 static Bool d3rd_jmpbuf_valid
= False
;
6172 static const HChar
* d3rd_jmpbuf_reason
= NULL
;
6173 static VG_MINIMAL_JMP_BUF(d3rd_jmpbuf
);
6175 static __attribute__((noreturn
)) void barf ( const HChar
* reason
) {
6176 vg_assert(d3rd_jmpbuf_valid
);
6177 d3rd_jmpbuf_reason
= reason
;
6178 VG_MINIMAL_LONGJMP(d3rd_jmpbuf
);
6185 ML_(new_dwarf3_reader
) (
6187 DiSlice escn_debug_info
, DiSlice escn_debug_types
,
6188 DiSlice escn_debug_abbv
, DiSlice escn_debug_line
,
6189 DiSlice escn_debug_str
, DiSlice escn_debug_ranges
,
6190 DiSlice escn_debug_rnglists
, DiSlice escn_debug_loclists
,
6191 DiSlice escn_debug_loc
, DiSlice escn_debug_info_alt
,
6192 DiSlice escn_debug_abbv_alt
, DiSlice escn_debug_line_alt
,
6193 DiSlice escn_debug_str_alt
, DiSlice escn_debug_line_str
,
6194 DiSlice escn_debug_addr
, DiSlice escn_debug_str_offsets
6197 volatile Int jumped
;
6198 volatile Bool td3
= di
->trace_symtab
;
6200 /* Run the _wrk function to read the dwarf3. If it succeeds, it
6201 just returns normally. If there is any failure, it longjmp's
6202 back here, having first set d3rd_jmpbuf_reason to something
6204 vg_assert(d3rd_jmpbuf_valid
== False
);
6205 vg_assert(d3rd_jmpbuf_reason
== NULL
);
6207 d3rd_jmpbuf_valid
= True
;
6208 jumped
= VG_MINIMAL_SETJMP(d3rd_jmpbuf
);
6211 new_dwarf3_reader_wrk( di
, barf
,
6212 escn_debug_info
, escn_debug_types
,
6213 escn_debug_abbv
, escn_debug_line
,
6214 escn_debug_str
, escn_debug_ranges
,
6215 escn_debug_rnglists
, escn_debug_loclists
,
6216 escn_debug_loc
, escn_debug_info_alt
,
6217 escn_debug_abbv_alt
, escn_debug_line_alt
,
6218 escn_debug_str_alt
, escn_debug_line_str
,
6219 escn_debug_addr
, escn_debug_str_offsets
);
6220 d3rd_jmpbuf_valid
= False
;
6221 TRACE_D3("\n------ .debug_info reading was successful ------\n");
6224 d3rd_jmpbuf_valid
= False
;
6225 /* Can't longjump without giving some sort of reason. */
6226 vg_assert(d3rd_jmpbuf_reason
!= NULL
);
6228 TRACE_D3("\n------ .debug_info reading failed ------\n");
6230 ML_(symerr
)(di
, True
, d3rd_jmpbuf_reason
);
6233 d3rd_jmpbuf_valid
= False
;
6234 d3rd_jmpbuf_reason
= NULL
;
6239 /* --- Unused code fragments which might be useful one day. --- */
6242 /* Read the arange tables */
6244 TRACE_SYMTAB("\n------ The contents of .debug_arange ------\n");
6245 init_Cursor( &aranges
, debug_aranges_img
,
6246 debug_aranges_sz
, 0, barf
,
6247 "Overrun whilst reading .debug_aranges section" );
6249 ULong len
, d_i_offset
;
6252 UChar asize
, segsize
;
6254 if (is_at_end_Cursor( &aranges
))
6256 /* Read one arange thingy */
6257 /* initial_length field */
6258 len
= get_Initial_Length( &is64
, &aranges
,
6259 "in .debug_aranges: invalid initial-length field" );
6260 version
= get_UShort( &aranges
);
6261 d_i_offset
= get_Dwarfish_UWord( &aranges
, is64
);
6262 asize
= get_UChar( &aranges
);
6263 segsize
= get_UChar( &aranges
);
6264 TRACE_D3(" Length: %llu\n", len
);
6265 TRACE_D3(" Version: %d\n", (Int
)version
);
6266 TRACE_D3(" Offset into .debug_info: %llx\n", d_i_offset
);
6267 TRACE_D3(" Pointer Size: %d\n", (Int
)asize
);
6268 TRACE_D3(" Segment Size: %d\n", (Int
)segsize
);
6270 TRACE_D3(" Address Length\n");
6272 while ((get_position_of_Cursor( &aranges
) % (2 * asize
)) > 0) {
6273 (void)get_UChar( & aranges
);
6276 ULong address
= get_Dwarfish_UWord( &aranges
, asize
==8 );
6277 ULong length
= get_Dwarfish_UWord( &aranges
, asize
==8 );
6278 TRACE_D3(" 0x%016llx 0x%llx\n", address
, length
);
6279 if (address
== 0 && length
== 0) break;
6285 #endif // defined(VGO_linux) || defined(VGO_darwin) || defined(VGO_solaris) || defined(VGO_freebsd)
6287 /*--------------------------------------------------------------------*/
6289 /*--------------------------------------------------------------------*/