1 /* -*- mode: C; c-basic-offset: 3; -*- */
3 /*--------------------------------------------------------------------*/
4 /*--- Read DWARF3/4 ".debug_info" sections (DIE trees). ---*/
5 /*--- readdwarf3.c ---*/
6 /*--------------------------------------------------------------------*/
9 This file is part of Valgrind, a dynamic binary instrumentation
12 Copyright (C) 2008-2017 OpenWorks LLP
15 This program is free software; you can redistribute it and/or
16 modify it under the terms of the GNU General Public License as
17 published by the Free Software Foundation; either version 2 of the
18 License, or (at your option) any later version.
20 This program is distributed in the hope that it will be useful, but
21 WITHOUT ANY WARRANTY; without even the implied warranty of
22 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
23 General Public License for more details.
25 You should have received a copy of the GNU General Public License
26 along with this program; if not, see <http://www.gnu.org/licenses/>.
28 The GNU General Public License is contained in the file COPYING.
30 Neither the names of the U.S. Department of Energy nor the
31 University of California nor the names of its contributors may be
32 used to endorse or promote products derived from this software
33 without prior written permission.
36 #if defined(VGO_linux) || defined(VGO_darwin) || defined(VGO_solaris)
38 /* REFERENCE (without which this code will not make much sense):
40 DWARF Debugging Information Format, Version 3,
41 dated 20 December 2005 (the "D3 spec").
43 Available at http://www.dwarfstd.org/Dwarf3.pdf. There's also a
44 .doc (MS Word) version, but for some reason the section numbers
45 between the Word and PDF versions differ by 1 in the first digit.
46 All section references in this code are to the PDF version.
50 DW_TAG_{const,volatile}_type no DW_AT_type is allowed; it is
51 assumed to mean "const void" or "volatile void" respectively.
52 GDB appears to interpret them like this, anyway.
54 In many cases it is important to know the svma of a CU (the "base
55 address of the CU", as the D3 spec calls it). There are some
56 situations in which the spec implies this value is unknown, but the
57 Dwarf3 produced by gcc-4.1 seems to assume is not unknown but
58 merely zero when not explicitly stated. So we too have to make
61 POTENTIAL BUG? Spotted 6 Sept 08. Why doesn't
62 unitary_range_list() bias the resulting range list in the same way
63 that its more general cousin, get_range_list(), does? I don't
68 get rid of cu_svma_known and document the assumed-zero svma hack.
70 ML_(sizeOfType): differentiate between zero sized types and types
71 for which the size is unknown. Is this important? I don't know.
73 DW_TAG_array_types: deal with explicit sizes (currently we compute
74 the size from the bounds and the element size, although that's
75 fragile, if the bounds incompletely specified, or completely
78 Document reason for difference (by 1) of stack preening depth in
79 parse_var_DIE vs parse_type_DIE.
81 Don't hand to ML_(addVars), vars whose locations are entirely in
82 registers (DW_OP_reg*). This is merely a space-saving
83 optimisation, as ML_(evaluate_Dwarf3_Expr) should handle these
84 expressions correctly, by failing to evaluate them and hence
85 effectively ignoring the variable with which they are associated.
87 Deal with DW_TAG_array_types which have element size != stride
89 In some cases, the info for a variable is split between two
90 different DIEs (generally a declarer and a definer). We punt on
91 these. Could do better here.
93 The 'data_bias' argument passed to the expression evaluator
94 (ML_(evaluate_Dwarf3_Expr)) should really be changed to a
95 MaybeUWord, to make it clear when we do vs don't know what it is
96 for the evaluation of an expression. At the moment zero is passed
97 for this parameter in the don't know case. That's a bit fragile
98 and obscure; using a MaybeUWord would be clearer.
100 POTENTIAL PERFORMANCE IMPROVEMENTS:
102 Currently, duplicate removal and all other queries for the type
103 entities array is done using cuOffset-based pointing, which
104 involves a binary search (VG_(lookupXA)) for each access. This is
105 wildly inefficient, although simple. It would be better to
106 translate all the cuOffset-based references (iow, all the "R" and
107 "Rs" fields in the TyEnts in 'tyents') to direct index numbers in
108 'tyents' right at the start of dedup_types(), and use direct
109 indexing (VG_(indexXA)) wherever possible after that.
111 cmp__XArrays_of_AddrRange is also a performance bottleneck. Move
112 VG_(indexXA) into pub_tool_xarray.h so it can be inlined at all use
113 points, and possibly also make an _UNCHECKED version which skips
114 the range checks in performance-critical situations such as this.
116 Handle interaction between read_DIE and parse_{var,type}_DIE
117 better. Currently read_DIE reads the entire DIE just to find where
118 the end is (and for debug printing), so that it can later reliably
119 move the cursor to the end regardless of what parse_{var,type}_DIE
120 do. This means many DIEs (most, even?) are read twice. It would
121 be smarter to make parse_{var,type}_DIE return a Bool indicating
122 whether or not they advanced the DIE cursor, and only if they
123 didn't should read_DIE itself read through the DIE.
125 ML_(addVar) and add_var_to_arange: quite a lot of DiAddrRanges have
126 zero variables in their .vars XArray. Rather than have an XArray
127 with zero elements (which uses 2 malloc'd blocks), allow the .vars
128 pointer to be NULL in this case.
130 More generally, reduce the amount of memory allocated and freed
131 while reading Dwarf3 type/variable information. Even modest (20MB)
132 objects cause this module to allocate and free hundreds of
133 thousands of small blocks, and ML_(arena_malloc) and its various
134 groupies always show up at the top of performance profiles. */
136 #include "pub_core_basics.h"
137 #include "pub_core_debuginfo.h"
138 #include "pub_core_libcbase.h"
139 #include "pub_core_libcassert.h"
140 #include "pub_core_libcprint.h"
141 #include "pub_core_libcsetjmp.h" // setjmp facilities
142 #include "pub_core_hashtable.h"
143 #include "pub_core_options.h"
144 #include "pub_core_tooliface.h" /* VG_(needs) */
145 #include "pub_core_xarray.h"
146 #include "pub_core_wordfm.h"
147 #include "priv_misc.h" /* dinfo_zalloc/free */
148 #include "priv_image.h"
149 #include "priv_tytypes.h"
150 #include "priv_d3basics.h"
151 #include "priv_storage.h"
152 #include "priv_readdwarf3.h" /* self */
155 /*------------------------------------------------------------*/
157 /*--- Basic machinery for parsing DIEs. ---*/
159 /*------------------------------------------------------------*/
161 #define TRACE_D3(format, args...) \
162 if (UNLIKELY(td3)) { VG_(printf)(format, ## args); }
163 #define TD3 (UNLIKELY(td3))
165 #define D3_INVALID_CUOFF ((UWord)(-1UL))
166 #define D3_FAKEVOID_CUOFF ((UWord)(-2UL))
170 DiSlice sli
; // to which this cursor applies
171 DiOffT sli_next
; // offset in underlying DiImage; must be >= sli.ioff
172 void (*barf
)( const HChar
* ) __attribute__((noreturn
));
173 const HChar
* barfstr
;
177 static inline Bool
is_sane_Cursor ( const Cursor
* c
) {
178 if (!c
) return False
;
179 if (!c
->barf
) return False
;
180 if (!c
->barfstr
) return False
;
181 if (!ML_(sli_is_valid
)(c
->sli
)) return False
;
182 if (c
->sli
.ioff
== DiOffT_INVALID
) return False
;
183 if (c
->sli_next
< c
->sli
.ioff
) return False
;
187 // Initialise a cursor from a DiSlice (ELF section, really) so as to
188 // start reading at offset |sli_initial_offset| from the start of the
190 static void init_Cursor ( /*OUT*/Cursor
* c
,
192 ULong sli_initial_offset
,
193 __attribute__((noreturn
)) void (*barf
)(const HChar
*),
194 const HChar
* barfstr
)
197 VG_(bzero_inline
)(c
, sizeof(*c
));
199 c
->sli_next
= c
->sli
.ioff
+ sli_initial_offset
;
201 c
->barfstr
= barfstr
;
202 vg_assert(is_sane_Cursor(c
));
205 static Bool
is_at_end_Cursor ( const Cursor
* c
) {
206 vg_assert(is_sane_Cursor(c
));
207 return c
->sli_next
>= c
->sli
.ioff
+ c
->sli
.szB
;
210 static inline ULong
get_position_of_Cursor ( const Cursor
* c
) {
211 vg_assert(is_sane_Cursor(c
));
212 return c
->sli_next
- c
->sli
.ioff
;
214 static inline void set_position_of_Cursor ( Cursor
* c
, ULong pos
) {
215 c
->sli_next
= c
->sli
.ioff
+ pos
;
216 vg_assert(is_sane_Cursor(c
));
218 static inline void advance_position_of_Cursor ( Cursor
* c
, ULong delta
) {
219 c
->sli_next
+= delta
;
220 vg_assert(is_sane_Cursor(c
));
223 static /*signed*/Long
get_remaining_length_Cursor ( const Cursor
* c
) {
224 vg_assert(is_sane_Cursor(c
));
225 return c
->sli
.ioff
+ c
->sli
.szB
- c
->sli_next
;
228 //static void* get_address_of_Cursor ( Cursor* c ) {
229 // vg_assert(is_sane_Cursor(c));
230 // return &c->region_start_img[ c->region_next ];
233 static DiCursor
get_DiCursor_from_Cursor ( const Cursor
* c
) {
234 return mk_DiCursor(c
->sli
.img
, c
->sli_next
);
237 /* FIXME: document assumptions on endianness for
238 get_UShort/UInt/ULong. */
239 static inline UChar
get_UChar ( Cursor
* c
) {
241 vg_assert(is_sane_Cursor(c
));
242 if (c
->sli_next
+ sizeof(UChar
) > c
->sli
.ioff
+ c
->sli
.szB
) {
247 r
= ML_(img_get_UChar
)(c
->sli
.img
, c
->sli_next
);
248 c
->sli_next
+= sizeof(UChar
);
251 static UShort
get_UShort ( Cursor
* c
) {
253 vg_assert(is_sane_Cursor(c
));
254 if (c
->sli_next
+ sizeof(UShort
) > c
->sli
.ioff
+ c
->sli
.szB
) {
259 r
= ML_(img_get_UShort
)(c
->sli
.img
, c
->sli_next
);
260 c
->sli_next
+= sizeof(UShort
);
263 static UInt
get_UInt ( Cursor
* c
) {
265 vg_assert(is_sane_Cursor(c
));
266 if (c
->sli_next
+ sizeof(UInt
) > c
->sli
.ioff
+ c
->sli
.szB
) {
271 r
= ML_(img_get_UInt
)(c
->sli
.img
, c
->sli_next
);
272 c
->sli_next
+= sizeof(UInt
);
275 static ULong
get_ULong ( Cursor
* c
) {
277 vg_assert(is_sane_Cursor(c
));
278 if (c
->sli_next
+ sizeof(ULong
) > c
->sli
.ioff
+ c
->sli
.szB
) {
283 r
= ML_(img_get_ULong
)(c
->sli
.img
, c
->sli_next
);
284 c
->sli_next
+= sizeof(ULong
);
287 static ULong
get_ULEB128 ( Cursor
* c
) {
291 /* unroll first iteration */
292 byte
= get_UChar( c
);
293 result
= (ULong
)(byte
& 0x7f);
294 if (LIKELY(!(byte
& 0x80))) return result
;
296 /* end unroll first iteration */
298 byte
= get_UChar( c
);
299 result
|= ((ULong
)(byte
& 0x7f)) << shift
;
301 } while (byte
& 0x80);
304 static Long
get_SLEB128 ( Cursor
* c
) {
310 result
|= ((ULong
)(byte
& 0x7f)) << shift
;
312 } while (byte
& 0x80);
313 if (shift
< 64 && (byte
& 0x40))
314 result
|= -(1ULL << shift
);
318 /* Assume 'c' points to the start of a string. Return a DiCursor of
319 whatever it points at, and advance it past the terminating zero.
320 This makes it safe for the caller to then copy the string with
321 ML_(addStr), since (w.r.t. image overruns) the process of advancing
322 past the terminating zero will already have "vetted" the string. */
323 static DiCursor
get_AsciiZ ( Cursor
* c
) {
325 DiCursor res
= get_DiCursor_from_Cursor(c
);
326 do { uc
= get_UChar(c
); } while (uc
!= 0);
330 static ULong
peek_ULEB128 ( Cursor
* c
) {
331 DiOffT here
= c
->sli_next
;
332 ULong r
= get_ULEB128( c
);
336 static UChar
peek_UChar ( Cursor
* c
) {
337 DiOffT here
= c
->sli_next
;
338 UChar r
= get_UChar( c
);
343 static ULong
get_Dwarfish_UWord ( Cursor
* c
, Bool is_dw64
) {
344 return is_dw64
? get_ULong(c
) : (ULong
) get_UInt(c
);
347 static UWord
get_UWord ( Cursor
* c
) {
348 vg_assert(sizeof(UWord
) == sizeof(void*));
349 if (sizeof(UWord
) == 4) return get_UInt(c
);
350 if (sizeof(UWord
) == 8) return get_ULong(c
);
354 /* Read a DWARF3 'Initial Length' field */
355 static ULong
get_Initial_Length ( /*OUT*/Bool
* is64
,
357 const HChar
* barfMsg
)
363 if (w32
>= 0xFFFFFFF0 && w32
< 0xFFFFFFFF) {
366 else if (w32
== 0xFFFFFFFF) {
368 w64
= get_ULong( c
);
377 /*------------------------------------------------------------*/
379 /*--- "CUConst" structure ---*/
381 /*------------------------------------------------------------*/
385 ULong at_name
; // Dwarf Attribute name
386 ULong at_form
; // Dwarf Attribute form
387 Long at_val
; // Dwarf Attribute value (for implicit_const)
388 UInt skip_szB
; // Nr of bytes skippable from here ...
389 UInt next_nf
; // ... to reach this attr/form index in the g_abbv.nf
391 /* skip_szB and next_nf are used to optimise the skipping of uninteresting DIEs.
392 Each name_form maintains how many (fixed) nr of bytes can be skipped from
393 the beginning of this form till the next attr/form to look at.
394 The next form to look can be:
395 an 'interesting' attr/form to read while skipping a DIE
396 (currently, this is only DW_AT_sibling)
398 a variable length form which must be read to be skipped.
399 For a variable length form, the skip_szB will be equal to VARSZ_FORM.
401 Note: this technique could also be used to speed up the parsing
402 of DIEs : for each parser kind, we could have the nr of bytes
403 to skip to directly reach the interesting form(s) for the parser. */
407 struct _g_abbv
*next
; // read/write by hash table.
408 UWord abbv_code
; // key, read by hash table
412 /* Variable-length array of name/form pairs, terminated
414 The skip_szB/next_nf allows to skip efficiently a DIE
415 described by this g_abbv; */
418 /* Holds information that is constant through the parsing of a
419 Compilation Unit. This is basically plumbed through to
423 /* Call here if anything goes wrong */
424 void (*barf
)( const HChar
* ) __attribute__((noreturn
));
425 /* Is this 64-bit DWARF ? */
427 /* Which DWARF version ? (2, 3, 4 or 5) */
429 /* Length of this Compilation Unit, as stated in the
430 .unit_length :: InitialLength field of the CU Header.
431 However, this size (as specified by the D3 spec) does not
432 include the size of the .unit_length field itself, which is
433 either 4 or 12 bytes (32-bit or 64-bit Dwarf3). That value
434 can be obtained through the expression ".is_dw64 ? 12 : 4". */
436 /* Offset of start of this unit in .debug_info */
437 UWord cu_start_offset
;
438 /* SVMA for this CU. In the D3 spec, is known as the "base
439 address of the compilation unit (last para sec 3.1.1).
440 Needed for (amongst things) interpretation of location-list
445 /* The debug_abbreviations table to be used for this Unit */
447 /* Upper bound on size thereof (an overestimate, in general) */
448 //UWord debug_abbv_maxszB;
449 /* A bounded area of the image, to be used as the
450 debug_abbreviations table tobe used for this Unit. */
453 /* Image information for various sections. */
454 DiSlice escn_debug_str
;
455 DiSlice escn_debug_ranges
;
456 DiSlice escn_debug_rnglists
;
457 DiSlice escn_debug_loclists
;
458 DiSlice escn_debug_loc
;
459 DiSlice escn_debug_line
;
460 DiSlice escn_debug_info
;
461 DiSlice escn_debug_types
;
462 DiSlice escn_debug_info_alt
;
463 DiSlice escn_debug_str_alt
;
464 DiSlice escn_debug_line_str
;
465 /* How much to add to .debug_types resp. alternate .debug_info offsets
467 UWord types_cuOff_bias
;
468 UWord alt_cuOff_bias
;
469 /* --- Needed so we can add stuff to the string table. --- */
470 struct _DebugInfo
* di
;
471 /* --- a hash table of g_abbv (i.e. parsed abbreviations) --- */
472 VgHashTable
*ht_abbvs
;
474 /* True if this came from .debug_types; otherwise it came from
477 /* For a unit coming from .debug_types, these hold the TU's type
478 signature and the uncooked DIE offset of the TU's signatured
479 type. For a unit coming from .debug_info, these are unused. */
480 ULong type_signature
;
483 /* Signatured type hash; computed once and then shared by all
485 VgHashTable
*signature_types
;
487 /* True if this came from alternate .debug_info; otherwise
488 it came from normal .debug_info or .debug_types. */
494 /* Return the cooked value of DIE depending on whether CC represents a
495 .debug_types unit. To cook a DIE, we pretend that the .debug_info,
496 .debug_types and optional alternate .debug_info sections form
497 a contiguous whole, so that DIEs coming from .debug_types are numbered
498 starting at the end of .debug_info and DIEs coming from alternate
499 .debug_info are numbered starting at the end of .debug_types. */
500 static UWord
cook_die( const CUConst
* cc
, UWord die
)
502 if (cc
->is_type_unit
)
503 die
+= cc
->types_cuOff_bias
;
504 else if (cc
->is_alt_info
)
505 die
+= cc
->alt_cuOff_bias
;
509 /* Like cook_die, but understand that DIEs coming from a
510 DW_FORM_ref_sig8 reference are already cooked. Also, handle
511 DW_FORM_GNU_ref_alt from within primary .debug_info or .debug_types
512 as reference to alternate .debug_info. */
513 static UWord
cook_die_using_form( const CUConst
*cc
, UWord die
, DW_FORM form
)
515 if (form
== DW_FORM_ref_sig8
)
517 if (form
== DW_FORM_GNU_ref_alt
)
518 return die
+ cc
->alt_cuOff_bias
;
519 return cook_die( cc
, die
);
522 /* Return the uncooked offset of DIE and set *TYPE_FLAG to true if the DIE
523 came from the .debug_types section and *ALT_FLAG to true if the DIE
524 came from alternate .debug_info section. */
525 static UWord
uncook_die( const CUConst
*cc
, UWord die
, /*OUT*/Bool
*type_flag
,
530 /* The use of escn_debug_{info,types}.szB seems safe to me even if
531 escn_debug_{info,types} are DiSlice_INVALID (meaning the
532 sections were not found), because DiSlice_INVALID.szB is always
533 zero. That said, it seems unlikely we'd ever get here if
534 .debug_info or .debug_types were missing. */
535 if (die
>= cc
->escn_debug_info
.szB
) {
536 if (die
>= cc
->escn_debug_info
.szB
+ cc
->escn_debug_types
.szB
) {
538 die
-= cc
->escn_debug_info
.szB
+ cc
->escn_debug_types
.szB
;
541 die
-= cc
->escn_debug_info
.szB
;
547 /*------------------------------------------------------------*/
549 /*--- Helper functions for Guarded Expressions ---*/
551 /*------------------------------------------------------------*/
553 /* Parse the location list starting at img-offset 'debug_loc_offset'
554 in .debug_loc. Results are biased with 'svma_of_referencing_CU'
555 and so I believe are correct SVMAs for the object as a whole. This
556 function allocates the UChar*, and the caller must deallocate it.
557 The resulting block is in so-called Guarded-Expression format.
559 Guarded-Expression format is similar but not identical to the DWARF3
560 location-list format. The format of each returned block is:
564 followed by zero or more of
566 (Addr aMin; Addr aMax; UShort nbytes; ..bytes..; UChar isEnd)
568 '..bytes..' is an standard DWARF3 location expression which is
569 valid when aMin <= pc <= aMax (possibly after suitable biasing).
571 The number of bytes in '..bytes..' is nbytes.
573 The end of the sequence is marked by an isEnd == 1 value. All
574 previous isEnd values must be zero.
576 biasMe is 1 if the aMin/aMax fields need this DebugInfo's
577 text_bias added before use, and 0 if the GX is this is not
578 necessary (is ready to go).
580 Hence the block can be quickly parsed and is self-describing. Note
581 that aMax is 1 less than the corresponding value in a DWARF3
582 location list. Zero length ranges, with aMax == aMin-1, are not
585 /* 2008-sept-12: moved ML_(pp_GX) from here to d3basics.c, where
586 it more logically belongs. */
589 /* Apply a text bias to a GX. */
590 static void bias_GX ( /*MOD*/GExpr
* gx
, const DebugInfo
* di
)
593 UChar
* p
= &gx
->payload
[0];
596 uc
= *p
++; /*biasMe*/
600 p
[-1] = 0; /* mark it as done */
608 ML_(write_Addr
)(pA
, ML_(read_Addr
)(pA
) + di
->text_debug_bias
);
612 ML_(write_Addr
)(pA
, ML_(read_Addr
)(pA
) + di
->text_debug_bias
);
614 /* nbytes, and actual expression */
615 nbytes
= ML_(read_UShort
)(p
); p
+= sizeof(UShort
);
620 __attribute__((noinline
))
621 static GExpr
* make_singleton_GX ( DiCursor block
, ULong nbytes
)
627 vg_assert(sizeof(UWord
) == sizeof(Addr
));
628 vg_assert(nbytes
<= 0xFFFF); /* else we overflow the nbytes field */
630 = sizeof(UChar
) /*biasMe*/ + sizeof(UChar
) /*!isEnd*/
631 + sizeof(UWord
) /*aMin*/ + sizeof(UWord
) /*aMax*/
632 + sizeof(UShort
) /*nbytes*/ + (SizeT
)nbytes
633 + sizeof(UChar
); /*isEnd*/
635 gx
= ML_(dinfo_zalloc
)( "di.readdwarf3.msGX.1",
636 sizeof(GExpr
) + bytesReqd
);
638 p
= pstart
= &gx
->payload
[0];
640 p
= ML_(write_UChar
)(p
, 0); /*biasMe*/
641 p
= ML_(write_UChar
)(p
, 0); /*!isEnd*/
642 p
= ML_(write_Addr
)(p
, 0); /*aMin*/
643 p
= ML_(write_Addr
)(p
, ~0); /*aMax*/
644 p
= ML_(write_UShort
)(p
, nbytes
); /*nbytes*/
645 ML_(cur_read_get
)(p
, block
, nbytes
); p
+= nbytes
;
646 p
= ML_(write_UChar
)(p
, 1); /*isEnd*/
648 vg_assert( (SizeT
)(p
- pstart
) == bytesReqd
);
649 vg_assert( &gx
->payload
[bytesReqd
]
650 == ((UChar
*)gx
) + sizeof(GExpr
) + bytesReqd
);
655 __attribute__((noinline
))
656 static GExpr
* make_general_GX ( const CUConst
* cc
,
659 Addr svma_of_referencing_CU
)
664 XArray
* xa
; /* XArray of UChar */
667 Bool addBase
= cc
->version
< 5;
669 vg_assert(sizeof(UWord
) == sizeof(Addr
));
670 if (cc
->version
< 5 && (!ML_(sli_is_valid
)(cc
->escn_debug_loc
)
671 || cc
->escn_debug_loc
.szB
== 0))
672 cc
->barf("make_general_GX: .debug_loc is empty/missing");
673 if (cc
->version
>= 5 && (!ML_(sli_is_valid
)(cc
->escn_debug_loclists
)
674 || cc
->escn_debug_loclists
.szB
== 0))
675 cc
->barf("make_general_GX: .debug_loclists is empty/missing");
678 init_Cursor( &loc
, cc
->escn_debug_loc
, 0, cc
->barf
,
679 "Overrun whilst reading .debug_loc section(2)" );
681 init_Cursor( &loc
, cc
->escn_debug_loclists
, 0, cc
->barf
,
682 "Overrun whilst reading .debug_loclists section(2)" );
683 set_position_of_Cursor( &loc
, offset
);
685 TRACE_D3("make_general_GX (offset = %llu, ioff = %llu) {\n",
686 offset
, get_DiCursor_from_Cursor(&loc
).ioff
);
688 /* Who frees this xa? It is freed before this fn exits. */
689 xa
= VG_(newXA
)( ML_(dinfo_zalloc
), "di.readdwarf3.mgGX.1",
693 { UChar c
= 1; /*biasMe*/ VG_(addBytesToXA
)( xa
, &c
, sizeof(c
) ); }
702 if (cc
->version
< 5) {
703 /* Read a (host-)word pair. This is something of a hack since
704 the word size to read is really dictated by the ELF file;
705 however, we assume we're reading a file with the same
706 word-sizeness as the host. Reasonably enough. */
707 w1
= get_UWord( &loc
);
708 w2
= get_UWord( &loc
);
710 TRACE_D3(" %08lx %08lx\n", w1
, w2
);
711 if (w1
== 0 && w2
== 0) {
713 break; /* end of list */
717 /* new value for 'base' */
721 /* else a location expression follows */
722 len
= (UWord
)get_UShort( &loc
);
727 DW_LLE r
= get_UChar( &loc
);
729 case DW_LLE_end_of_list
:
732 case DW_LLE_base_address
:
733 base
= get_UWord( &loc
);
735 case DW_LLE_start_length
:
736 w1
= get_UWord( &loc
);
737 w2
= w1
+ get_ULEB128( &loc
);
738 len
= get_ULEB128( &loc
);
740 case DW_LLE_offset_pair
:
741 w1
= base
+ get_ULEB128( &loc
);
742 w2
= base
+ get_ULEB128( &loc
);
743 len
= get_ULEB128( &loc
);
745 case DW_LLE_start_end
:
746 w1
= get_UWord ( &loc
);
747 w2
= get_UWord ( &loc
);
748 len
= get_ULEB128( &loc
);
750 case DW_LLE_GNU_view_pair
:
754 case DW_LLE_base_addressx
:
755 case DW_LLE_startx_endx
:
756 case DW_LLE_startx_length
:
757 case DW_LLE_default_location
:
759 cc
->barf( "Unhandled or unknown loclists entry" );
764 /* else enumerate [w1+base, w2+base) */
765 /* w2 is 1 past end of range, as per D3 defn for "DW_AT_high_pc"
768 TRACE_D3("negative range is for .debug_loc expr at "
769 "file offset %llu\n",
771 cc
->barf( "negative range in .debug_loc section" );
774 /* ignore zero length ranges */
782 VG_(addBytesToXA
)( xa
, &c
, sizeof(c
) );
783 w
= w1
+ (addBase
? base
: 0) + svma_of_referencing_CU
;
784 VG_(addBytesToXA
)( xa
, &w
, sizeof(w
) );
785 w
= w2
-1 + (addBase
? base
: 0) + svma_of_referencing_CU
;
786 VG_(addBytesToXA
)( xa
, &w
, sizeof(w
) );
788 VG_(addBytesToXA
)( xa
, &s
, sizeof(s
) );
792 UChar byte
= get_UChar( &loc
);
793 TRACE_D3("%02x", (UInt
)byte
);
795 VG_(addBytesToXA
)( xa
, &byte
, 1 );
801 { UChar c
= 1; /*isEnd*/ VG_(addBytesToXA
)( xa
, &c
, sizeof(c
) ); }
803 nbytes
= VG_(sizeXA
)( xa
);
804 vg_assert(nbytes
>= 1);
806 gx
= ML_(dinfo_zalloc
)( "di.readdwarf3.mgGX.2", sizeof(GExpr
) + nbytes
);
807 VG_(memcpy
)( &gx
->payload
[0], (UChar
*)VG_(indexXA
)(xa
,0), nbytes
);
808 vg_assert( &gx
->payload
[nbytes
]
809 == ((UChar
*)gx
) + sizeof(GExpr
) + nbytes
);
819 /*------------------------------------------------------------*/
821 /*--- Helper functions for range lists and CU headers ---*/
823 /*------------------------------------------------------------*/
825 /* Denotes an address range. Both aMin and aMax are included in the
826 range; hence a complete range is (0, ~0) and an empty range is any
827 (X, X-1) for X > 0.*/
829 struct { Addr aMin
; Addr aMax
; }
833 /* Generate an arbitrary structural total ordering on
834 XArray* of AddrRange. */
835 static Word
cmp__XArrays_of_AddrRange ( const XArray
* rngs1
,
836 const XArray
* rngs2
)
839 vg_assert(rngs1
&& rngs2
);
840 n1
= VG_(sizeXA
)( rngs1
);
841 n2
= VG_(sizeXA
)( rngs2
);
842 if (n1
< n2
) return -1;
843 if (n1
> n2
) return 1;
844 for (i
= 0; i
< n1
; i
++) {
845 AddrRange
* rng1
= (AddrRange
*)VG_(indexXA
)( rngs1
, i
);
846 AddrRange
* rng2
= (AddrRange
*)VG_(indexXA
)( rngs2
, i
);
847 if (rng1
->aMin
< rng2
->aMin
) return -1;
848 if (rng1
->aMin
> rng2
->aMin
) return 1;
849 if (rng1
->aMax
< rng2
->aMax
) return -1;
850 if (rng1
->aMax
> rng2
->aMax
) return 1;
856 __attribute__((noinline
))
857 static XArray
* /* of AddrRange */ empty_range_list ( void )
859 XArray
* xa
; /* XArray of AddrRange */
860 /* Who frees this xa? varstack_preen() does. */
861 xa
= VG_(newXA
)( ML_(dinfo_zalloc
), "di.readdwarf3.erl.1",
868 __attribute__((noinline
))
869 static XArray
* unitary_range_list ( Addr aMin
, Addr aMax
)
873 vg_assert(aMin
<= aMax
);
874 /* Who frees this xa? varstack_preen() does. */
875 xa
= VG_(newXA
)( ML_(dinfo_zalloc
), "di.readdwarf3.url.1",
880 VG_(addToXA
)( xa
, &pair
);
885 /* Enumerate the address ranges starting at img-offset
886 'debug_ranges_offset' in .debug_ranges. Results are biased with
887 'svma_of_referencing_CU' and so I believe are correct SVMAs for the
888 object as a whole. This function allocates the XArray, and the
889 caller must deallocate it. */
890 __attribute__((noinline
))
891 static XArray
* /* of AddrRange */
892 get_range_list ( const CUConst
* cc
,
894 UWord debug_ranges_offset
,
895 Addr svma_of_referencing_CU
)
899 XArray
* xa
; /* XArray of AddrRange */
902 if (cc
->version
< 5 && (!ML_(sli_is_valid
)(cc
->escn_debug_ranges
)
903 || cc
->escn_debug_ranges
.szB
== 0))
904 cc
->barf("get_range_list: .debug_ranges is empty/missing");
905 if (cc
->version
>= 5 && (!ML_(sli_is_valid
)(cc
->escn_debug_rnglists
)
906 || cc
->escn_debug_rnglists
.szB
== 0))
907 cc
->barf("get_range_list: .debug_rnglists is empty/missing");
910 init_Cursor( &ranges
, cc
->escn_debug_ranges
, 0, cc
->barf
,
911 "Overrun whilst reading .debug_ranges section(2)" );
913 init_Cursor( &ranges
, cc
->escn_debug_rnglists
, 0, cc
->barf
,
914 "Overrun whilst reading .debug_rnglists section(2)" );
916 set_position_of_Cursor( &ranges
, debug_ranges_offset
);
918 /* Who frees this xa? varstack_preen() does. */
919 xa
= VG_(newXA
)( ML_(dinfo_zalloc
), "di.readdwarf3.grl.1", ML_(dinfo_free
),
922 if (cc
->version
< 5) {
924 /* Read a (host-)word pair. This is something of a hack since
925 the word size to read is really dictated by the ELF file;
926 however, we assume we're reading a file with the same
927 word-sizeness as the host. Reasonably enough. */
928 UWord w1
= get_UWord( &ranges
);
929 UWord w2
= get_UWord( &ranges
);
931 if (w1
== 0 && w2
== 0)
932 break; /* end of list. */
935 /* new value for 'base' */
940 /* else enumerate [w1+base, w2+base) */
941 /* w2 is 1 past end of range, as per D3 defn for "DW_AT_high_pc"
944 cc
->barf( "negative range in .debug_ranges section" );
946 pair
.aMin
= w1
+ base
+ svma_of_referencing_CU
;
947 pair
.aMax
= w2
- 1 + base
+ svma_of_referencing_CU
;
948 vg_assert(pair
.aMin
<= pair
.aMax
);
949 VG_(addToXA
)( xa
, &pair
);
957 DW_RLE r
= get_UChar( &ranges
);
959 case DW_RLE_end_of_list
:
962 case DW_RLE_base_address
:
963 base
= get_UWord( &ranges
);
965 case DW_RLE_start_length
:
966 w1
= get_UWord( &ranges
);
967 w2
= w1
+ get_ULEB128( &ranges
);
969 case DW_RLE_offset_pair
:
970 w1
= base
+ get_ULEB128( &ranges
);
971 w2
= base
+ get_ULEB128( &ranges
);
973 case DW_RLE_start_end
:
974 w1
= get_UWord ( &ranges
);
975 w2
= get_UWord ( &ranges
);
977 case DW_RLE_base_addressx
:
978 case DW_RLE_startx_endx
:
979 case DW_RLE_startx_length
:
981 cc
->barf( "Unhandled or unknown range list entry" );
985 cc
->barf( "negative range in .debug_rnglists section" );
987 pair
.aMin
= w1
+ svma_of_referencing_CU
;
988 pair
.aMax
= w2
- 1 + svma_of_referencing_CU
;
989 vg_assert(pair
.aMin
<= pair
.aMax
);
990 VG_(addToXA
)( xa
, &pair
);
997 #define VARSZ_FORM 0xffffffff
998 static UInt
get_Form_szB (const CUConst
* cc
, DW_FORM form
);
1000 /* Initialises the hash table of abbreviations.
1001 We do a single scan of the abbv slice to parse and
1002 build all abbreviations, for the following reasons:
1003 * all or most abbreviations will be needed in any case
1004 (at least for var-info reading).
1005 * re-reading each time an abbreviation causes a lot of calls
1007 * a CU should not have many abbreviations. */
1008 static void init_ht_abbvs (CUConst
* cc
,
1012 g_abbv
*ta
; // temporary abbreviation, reallocated if needed.
1013 UInt ta_nf_maxE
; // max nr of pairs in ta.nf[], doubled when reallocated.
1014 UInt ta_nf_n
; // nr of pairs in ta->nf that are initialised.
1015 g_abbv
*ht_ta
; // abbv to insert in hash table.
1018 #define SZ_G_ABBV(_nf_szE) (sizeof(g_abbv) + _nf_szE * sizeof(name_form))
1020 ta_nf_maxE
= 10; // starting with enough for 9 pairs+terminating pair.
1021 ta
= ML_(dinfo_zalloc
) ("di.readdwarf3.ht_ta_nf", SZ_G_ABBV(ta_nf_maxE
));
1022 cc
->ht_abbvs
= VG_(HT_construct
) ("di.readdwarf3.ht_abbvs");
1024 init_Cursor( &c
, cc
->debug_abbv
, 0, cc
->barf
,
1025 "Overrun whilst parsing .debug_abbrev section(2)" );
1027 ta
->abbv_code
= get_ULEB128( &c
);
1028 if (ta
->abbv_code
== 0) break; /* end of the table */
1030 ta
->atag
= get_ULEB128( &c
);
1031 ta
->has_children
= get_UChar( &c
);
1034 if (ta_nf_n
>= ta_nf_maxE
) {
1035 g_abbv
*old_ta
= ta
;
1036 ta
= ML_(dinfo_zalloc
) ("di.readdwarf3.ht_ta_nf",
1037 SZ_G_ABBV(2 * ta_nf_maxE
));
1038 ta_nf_maxE
= 2 * ta_nf_maxE
;
1039 VG_(memcpy
) (ta
, old_ta
, SZ_G_ABBV(ta_nf_n
));
1040 ML_(dinfo_free
) (old_ta
);
1042 ta
->nf
[ta_nf_n
].at_name
= get_ULEB128( &c
);
1043 ta
->nf
[ta_nf_n
].at_form
= get_ULEB128( &c
);
1044 if (ta
->nf
[ta_nf_n
].at_form
== DW_FORM_implicit_const
)
1045 ta
->nf
[ta_nf_n
].at_val
= get_SLEB128( &c
);
1046 if (ta
->nf
[ta_nf_n
].at_name
== 0 && ta
->nf
[ta_nf_n
].at_form
== 0) {
1053 // Initialises the skip_szB/next_nf elements : an element at position
1054 // i must contain the sum of its own size + the sizes of all elements
1055 // following i till either the next variable size element, the next
1056 // sibling element or the end of the DIE.
1057 ta
->nf
[ta_nf_n
- 1].skip_szB
= 0;
1058 ta
->nf
[ta_nf_n
- 1].next_nf
= 0;
1059 for (i
= ta_nf_n
- 2; i
>= 0; i
--) {
1060 const UInt form_szB
= get_Form_szB (cc
, (DW_FORM
)ta
->nf
[i
].at_form
);
1062 if (ta
->nf
[i
+1].at_name
== DW_AT_sibling
1063 || ta
->nf
[i
+1].skip_szB
== VARSZ_FORM
) {
1064 ta
->nf
[i
].skip_szB
= form_szB
;
1065 ta
->nf
[i
].next_nf
= i
+1;
1066 } else if (form_szB
== VARSZ_FORM
) {
1067 ta
->nf
[i
].skip_szB
= form_szB
;
1068 ta
->nf
[i
].next_nf
= i
+1;
1070 ta
->nf
[i
].skip_szB
= ta
->nf
[i
+1].skip_szB
+ form_szB
;
1071 ta
->nf
[i
].next_nf
= ta
->nf
[i
+1].next_nf
;
1075 ht_ta
= ML_(dinfo_zalloc
) ("di.readdwarf3.ht_ta", SZ_G_ABBV(ta_nf_n
));
1076 VG_(memcpy
) (ht_ta
, ta
, SZ_G_ABBV(ta_nf_n
));
1077 VG_(HT_add_node
) ( cc
->ht_abbvs
, ht_ta
);
1079 TRACE_D3(" Adding abbv_code %lu TAG %s [%s] nf %u ",
1080 ht_ta
->abbv_code
, ML_(pp_DW_TAG
)(ht_ta
->atag
),
1081 ML_(pp_DW_children
)(ht_ta
->has_children
),
1084 for (i
= 0; i
< ta_nf_n
; i
++)
1085 TRACE_D3("[%u,%u] ", ta
->nf
[i
].skip_szB
, ta
->nf
[i
].next_nf
);
1090 ML_(dinfo_free
) (ta
);
1094 static g_abbv
* get_abbv (const CUConst
* cc
, ULong abbv_code
)
1098 abbv
= VG_(HT_lookup
) (cc
->ht_abbvs
, abbv_code
);
1100 cc
->barf ("abbv_code not found in ht_abbvs table");
1104 /* Free the memory allocated in CUConst. */
1105 static void clear_CUConst (CUConst
* cc
)
1107 VG_(HT_destruct
) ( cc
->ht_abbvs
, ML_(dinfo_free
));
1108 cc
->ht_abbvs
= NULL
;
1111 /* Parse the Compilation Unit header indicated at 'c' and
1112 initialise 'cc' accordingly. */
1113 static __attribute__((noinline
))
1114 void parse_CU_Header ( /*OUT*/CUConst
* cc
,
1117 DiSlice escn_debug_abbv
,
1121 UChar address_size
, unit_type
;
1122 ULong debug_abbrev_offset
;
1124 VG_(memset
)(cc
, 0, sizeof(*cc
));
1125 vg_assert(c
&& c
->barf
);
1128 /* initial_length field */
1130 = get_Initial_Length( &cc
->is_dw64
, c
,
1131 "parse_CU_Header: invalid initial-length field" );
1133 TRACE_D3(" Length: %llu\n", cc
->unit_length
);
1136 cc
->version
= get_UShort( c
);
1137 if (cc
->version
!= 2 && cc
->version
!= 3 && cc
->version
!= 4
1138 && cc
->version
!= 5)
1139 cc
->barf( "parse_CU_Header: "
1140 "is neither DWARF2 nor DWARF3 nor DWARF4 nor DWARF5" );
1141 TRACE_D3(" Version: %d\n", (Int
)cc
->version
);
1144 if (cc
->version
>= 5) {
1145 unit_type
= get_UChar( c
);
1146 address_size
= get_UChar( c
);
1148 unit_type
= type_unit
? DW_UT_type
: DW_UT_compile
;
1149 address_size
= 0; /* Will be read later. */
1152 /* debug_abbrev_offset */
1153 debug_abbrev_offset
= get_Dwarfish_UWord( c
, cc
->is_dw64
);
1154 if (debug_abbrev_offset
>= escn_debug_abbv
.szB
)
1155 cc
->barf( "parse_CU_Header: invalid debug_abbrev_offset" );
1156 TRACE_D3(" Abbrev Offset: %llu\n", debug_abbrev_offset
);
1158 /* address size. If this isn't equal to the host word size, just
1159 give up. This makes it safe to assume elsewhere that
1160 DW_FORM_addr and DW_FORM_ref_addr can be treated as a host
1162 if (cc
->version
< 5)
1163 address_size
= get_UChar( c
);
1165 if (address_size
!= sizeof(void*))
1166 cc
->barf( "parse_CU_Header: invalid address_size" );
1167 TRACE_D3(" Pointer Size: %d\n", (Int
)address_size
);
1169 cc
->is_type_unit
= type_unit
;
1170 cc
->is_alt_info
= alt_info
;
1172 if (type_unit
|| (cc
->version
>= 5 && unit_type
== DW_UT_type
)) {
1173 cc
->type_signature
= get_ULong( c
);
1174 cc
->type_offset
= get_Dwarfish_UWord( c
, cc
->is_dw64
);
1177 /* Set up cc->debug_abbv to point to the relevant table for this
1178 CU. Set its .szB so that at least we can't read off the end of
1179 the debug_abbrev section -- potentially (and quite likely) too
1180 big, if this isn't the last table in the section, but at least
1183 This amounts to taking debug_abbv_escn and moving the start
1184 position along by debug_abbrev_offset bytes, hence forming a
1185 smaller DiSlice which has the same end point. Since we checked
1186 just above that debug_abbrev_offset is less than the size of
1187 debug_abbv_escn, this should leave us with a nonempty slice. */
1188 vg_assert(debug_abbrev_offset
< escn_debug_abbv
.szB
);
1189 cc
->debug_abbv
= escn_debug_abbv
;
1190 cc
->debug_abbv
.ioff
+= debug_abbrev_offset
;
1191 cc
->debug_abbv
.szB
-= debug_abbrev_offset
;
1193 init_ht_abbvs(cc
, td3
);
1196 /* This represents a single signatured type. It maps a type signature
1197 (a ULong) to a cooked DIE offset. Objects of this type are stored
1198 in the type signature hash table. */
1200 struct D3SignatureType
{
1201 struct D3SignatureType
*next
;
1203 ULong type_signature
;
1208 /* Record a signatured type in the hash table. */
1209 static void record_signatured_type ( VgHashTable
*tab
,
1210 ULong type_signature
,
1213 D3SignatureType
*dstype
= ML_(dinfo_zalloc
) ( "di.readdwarf3.sigtype",
1214 sizeof(D3SignatureType
) );
1215 dstype
->data
= (UWord
) type_signature
;
1216 dstype
->type_signature
= type_signature
;
1218 VG_(HT_add_node
) ( tab
, dstype
);
1221 /* Given a type signature hash table and a type signature, return the
1222 cooked DIE offset of the type. If the type cannot be found, call
1224 static UWord
lookup_signatured_type ( const VgHashTable
*tab
,
1225 ULong type_signature
,
1226 void (*barf
)( const HChar
* ) __attribute__((noreturn
)) )
1228 D3SignatureType
*dstype
= VG_(HT_lookup
) ( tab
, (UWord
) type_signature
);
1229 /* This may be unwarranted chumminess with the hash table
1231 while ( dstype
!= NULL
&& dstype
->type_signature
!= type_signature
)
1232 dstype
= dstype
->next
;
1233 if (dstype
== NULL
) {
1234 barf("lookup_signatured_type: could not find signatured type");
1242 /* Represents Form data. If szB is 1/2/4/8 then the result is in the
1243 lowest 1/2/4/8 bytes of u.val. If szB is zero or negative then the
1244 result is an image section beginning at u.cur and with size -szB.
1245 No other szB values are allowed. */
1248 Long szB
; // 1, 2, 4, 8 or non-positive values only.
1249 union { ULong val
; DiCursor cur
; } u
;
1253 /* From 'c', get the Form data into 'cts'. Either it gets a 1/2/4/8
1254 byte scalar value, or (a reference to) zero or more bytes starting
1257 void get_Form_contents ( /*OUT*/FormContents
* cts
,
1258 const CUConst
* cc
, Cursor
* c
,
1259 Bool td3
, const name_form
*abbv
)
1261 DW_FORM form
= abbv
->at_form
;
1262 VG_(bzero_inline
)(cts
, sizeof(*cts
));
1263 // !!! keep switch in sync with get_Form_szB. The nr of characters read below
1264 // must be computed similarly in get_Form_szB.
1265 // The consistency is verified in trace_DIE.
1268 cts
->u
.val
= (ULong
)(UChar
)get_UChar(c
);
1270 TRACE_D3("%u", (UInt
)cts
->u
.val
);
1273 cts
->u
.val
= (ULong
)(UShort
)get_UShort(c
);
1275 TRACE_D3("%u", (UInt
)cts
->u
.val
);
1278 cts
->u
.val
= (ULong
)(UInt
)get_UInt(c
);
1280 TRACE_D3("%u", (UInt
)cts
->u
.val
);
1283 cts
->u
.val
= get_ULong(c
);
1285 TRACE_D3("%llu", cts
->u
.val
);
1287 case DW_FORM_data16
: {
1288 /* This is more like a block than an integral value. */
1290 DiCursor data16
= get_DiCursor_from_Cursor(c
);
1291 TRACE_D3("data16: ");
1292 for (u64b
= 16; u64b
> 0; u64b
--) {
1293 UChar u8
= get_UChar(c
);
1294 TRACE_D3("%x ", (UInt
)u8
);
1296 cts
->u
.cur
= data16
;
1297 cts
->szB
= - (Long
)16;
1300 case DW_FORM_sec_offset
:
1301 cts
->u
.val
= (ULong
)get_Dwarfish_UWord( c
, cc
->is_dw64
);
1302 cts
->szB
= cc
->is_dw64
? 8 : 4;
1303 TRACE_D3("%llu", cts
->u
.val
);
1306 cts
->u
.val
= (ULong
)(Long
)get_SLEB128(c
);
1308 TRACE_D3("%llu", cts
->u
.val
);
1311 cts
->u
.val
= (ULong
)(Long
)get_ULEB128(c
);
1313 TRACE_D3("%llu", cts
->u
.val
);
1316 /* note, this is a hack. DW_FORM_addr is defined as getting
1317 a word the size of the target machine as defined by the
1318 address_size field in the CU Header. However,
1319 parse_CU_Header() rejects all inputs except those for
1320 which address_size == sizeof(Word), hence we can just
1321 treat it as a (host) Word. */
1322 cts
->u
.val
= (ULong
)(UWord
)get_UWord(c
);
1323 cts
->szB
= sizeof(UWord
);
1324 TRACE_D3("0x%lx", (UWord
)cts
->u
.val
);
1327 case DW_FORM_ref_addr
:
1328 /* We make the same word-size assumption as DW_FORM_addr. */
1329 /* What does this really mean? From D3 Sec 7.5.4,
1330 description of "reference", it would appear to reference
1331 some other DIE, by specifying the offset from the
1332 beginning of a .debug_info section. The D3 spec mentions
1333 that this might be in some other shared object and
1334 executable. But I don't see how the name of the other
1335 object/exe is specified.
1337 At least for the DW_FORM_ref_addrs created by icc11, the
1338 references seem to be within the same object/executable.
1339 So for the moment we merely range-check, to see that they
1340 actually do specify a plausible offset within this
1341 object's .debug_info, and return the value unchanged.
1343 In DWARF 2, DW_FORM_ref_addr is address-sized, but in
1344 DWARF 3 and later, it is offset-sized.
1346 if (cc
->version
== 2) {
1347 cts
->u
.val
= (ULong
)(UWord
)get_UWord(c
);
1348 cts
->szB
= sizeof(UWord
);
1350 cts
->u
.val
= get_Dwarfish_UWord(c
, cc
->is_dw64
);
1351 cts
->szB
= cc
->is_dw64
? sizeof(ULong
) : sizeof(UInt
);
1353 TRACE_D3("0x%lx", (UWord
)cts
->u
.val
);
1354 if (0) VG_(printf
)("DW_FORM_ref_addr 0x%lx\n", (UWord
)cts
->u
.val
);
1355 if (/* the following is surely impossible, but ... */
1356 !ML_(sli_is_valid
)(cc
->escn_debug_info
)
1357 || cts
->u
.val
>= (ULong
)cc
->escn_debug_info
.szB
) {
1358 /* Hmm. Offset is nonsensical for this object's .debug_info
1359 section. Be safe and reject it. */
1360 cc
->barf("get_Form_contents: DW_FORM_ref_addr points "
1361 "outside .debug_info");
1365 case DW_FORM_strp
: {
1366 /* this is an offset into .debug_str */
1367 UWord uw
= (UWord
)get_Dwarfish_UWord( c
, cc
->is_dw64
);
1368 if (!ML_(sli_is_valid
)(cc
->escn_debug_str
)
1369 || uw
>= cc
->escn_debug_str
.szB
)
1370 cc
->barf("get_Form_contents: DW_FORM_strp "
1371 "points outside .debug_str");
1372 /* FIXME: check the entire string lies inside debug_str,
1373 not just the first byte of it. */
1375 = ML_(cur_plus
)( ML_(cur_from_sli
)(cc
->escn_debug_str
), uw
);
1377 HChar
* tmp
= ML_(cur_read_strdup
)(str
, "di.getFC.1");
1378 TRACE_D3("(indirect string, offset: 0x%lx): %s", uw
, tmp
);
1379 ML_(dinfo_free
)(tmp
);
1382 cts
->szB
= - (Long
)(1 + (ULong
)ML_(cur_strlen
)(str
));
1385 case DW_FORM_line_strp
: {
1386 /* this is an offset into .debug_line_str */
1387 UWord uw
= (UWord
)get_Dwarfish_UWord( c
, cc
->is_dw64
);
1388 if (!ML_(sli_is_valid
)(cc
->escn_debug_line_str
)
1389 || uw
>= cc
->escn_debug_line_str
.szB
)
1390 cc
->barf("get_Form_contents: DW_FORM_line_strp "
1391 "points outside .debug_line_str");
1392 /* FIXME: check the entire string lies inside debug_line_str,
1393 not just the first byte of it. */
1395 = ML_(cur_plus
)( ML_(cur_from_sli
)(cc
->escn_debug_line_str
), uw
);
1397 HChar
* tmp
= ML_(cur_read_strdup
)(line_str
, "di.getFC.1.5");
1398 TRACE_D3("(indirect line string, offset: 0x%lx): %s", uw
, tmp
);
1399 ML_(dinfo_free
)(tmp
);
1401 cts
->u
.cur
= line_str
;
1402 cts
->szB
= - (Long
)(1 + (ULong
)ML_(cur_strlen
)(line_str
));
1405 case DW_FORM_string
: {
1406 DiCursor str
= get_AsciiZ(c
);
1408 HChar
* tmp
= ML_(cur_read_strdup
)(str
, "di.getFC.2");
1409 TRACE_D3("%s", tmp
);
1410 ML_(dinfo_free
)(tmp
);
1413 /* strlen is safe because get_AsciiZ already 'vetted' the
1415 cts
->szB
= - (Long
)(1 + (ULong
)ML_(cur_strlen
)(str
));
1418 case DW_FORM_ref1
: {
1419 UChar u8
= get_UChar(c
);
1420 UWord res
= cc
->cu_start_offset
+ (UWord
)u8
;
1421 cts
->u
.val
= (ULong
)res
;
1422 cts
->szB
= sizeof(UWord
);
1423 TRACE_D3("<%lx>", res
);
1426 case DW_FORM_ref2
: {
1427 UShort u16
= get_UShort(c
);
1428 UWord res
= cc
->cu_start_offset
+ (UWord
)u16
;
1429 cts
->u
.val
= (ULong
)res
;
1430 cts
->szB
= sizeof(UWord
);
1431 TRACE_D3("<%lx>", res
);
1434 case DW_FORM_ref4
: {
1435 UInt u32
= get_UInt(c
);
1436 UWord res
= cc
->cu_start_offset
+ (UWord
)u32
;
1437 cts
->u
.val
= (ULong
)res
;
1438 cts
->szB
= sizeof(UWord
);
1439 TRACE_D3("<%lx>", res
);
1442 case DW_FORM_ref8
: {
1443 ULong u64
= get_ULong(c
);
1444 UWord res
= cc
->cu_start_offset
+ (UWord
)u64
;
1445 cts
->u
.val
= (ULong
)res
;
1446 cts
->szB
= sizeof(UWord
);
1447 TRACE_D3("<%lx>", res
);
1450 case DW_FORM_ref_udata
: {
1451 ULong u64
= get_ULEB128(c
);
1452 UWord res
= cc
->cu_start_offset
+ (UWord
)u64
;
1453 cts
->u
.val
= (ULong
)res
;
1454 cts
->szB
= sizeof(UWord
);
1455 TRACE_D3("<%lx>", res
);
1458 case DW_FORM_flag
: {
1459 UChar u8
= get_UChar(c
);
1460 TRACE_D3("%u", (UInt
)u8
);
1461 cts
->u
.val
= (ULong
)u8
;
1465 case DW_FORM_flag_present
:
1470 case DW_FORM_implicit_const
:
1471 cts
->u
.val
= (ULong
)abbv
->at_val
;
1473 TRACE_D3("%llu", cts
->u
.val
);
1475 case DW_FORM_block1
: {
1477 ULong u64
= (ULong
)get_UChar(c
);
1478 DiCursor block
= get_DiCursor_from_Cursor(c
);
1479 TRACE_D3("%llu byte block: ", u64
);
1480 for (u64b
= u64
; u64b
> 0; u64b
--) {
1481 UChar u8
= get_UChar(c
);
1482 TRACE_D3("%x ", (UInt
)u8
);
1485 cts
->szB
= - (Long
)u64
;
1488 case DW_FORM_block2
: {
1490 ULong u64
= (ULong
)get_UShort(c
);
1491 DiCursor block
= get_DiCursor_from_Cursor(c
);
1492 TRACE_D3("%llu byte block: ", u64
);
1493 for (u64b
= u64
; u64b
> 0; u64b
--) {
1494 UChar u8
= get_UChar(c
);
1495 TRACE_D3("%x ", (UInt
)u8
);
1498 cts
->szB
= - (Long
)u64
;
1501 case DW_FORM_block4
: {
1503 ULong u64
= (ULong
)get_UInt(c
);
1504 DiCursor block
= get_DiCursor_from_Cursor(c
);
1505 TRACE_D3("%llu byte block: ", u64
);
1506 for (u64b
= u64
; u64b
> 0; u64b
--) {
1507 UChar u8
= get_UChar(c
);
1508 TRACE_D3("%x ", (UInt
)u8
);
1511 cts
->szB
= - (Long
)u64
;
1514 case DW_FORM_exprloc
:
1515 case DW_FORM_block
: {
1517 ULong u64
= (ULong
)get_ULEB128(c
);
1518 DiCursor block
= get_DiCursor_from_Cursor(c
);
1519 TRACE_D3("%llu byte block: ", u64
);
1520 for (u64b
= u64
; u64b
> 0; u64b
--) {
1521 UChar u8
= get_UChar(c
);
1522 TRACE_D3("%x ", (UInt
)u8
);
1525 cts
->szB
= - (Long
)u64
;
1528 case DW_FORM_ref_sig8
: {
1530 ULong signature
= get_ULong (c
);
1531 ULong work
= signature
;
1532 TRACE_D3("8 byte signature: ");
1533 for (u64b
= 8; u64b
> 0; u64b
--) {
1534 UChar u8
= work
& 0xff;
1535 TRACE_D3("%x ", (UInt
)u8
);
1539 /* cc->signature_types is only built/initialised when
1540 VG_(clo_read_var_info) is set. In this case,
1541 the DW_FORM_ref_sig8 can be looked up.
1542 But we can also arrive here when only reading inline info
1543 and VG_(clo_trace_symtab) is set. In such a case,
1544 we cannot lookup the DW_FORM_ref_sig8, we rather assign
1545 a dummy value. This is a kludge, but otherwise,
1546 the 'dwarf inline info reader' tracing would have to
1547 do type processing/reading. It is better to avoid
1548 adding significant 'real' processing only due to tracing. */
1549 if (VG_(clo_read_var_info
)) {
1550 /* Due to the way that the hash table is constructed, the
1551 resulting DIE offset here is already "cooked". See
1552 cook_die_using_form. */
1553 cts
->u
.val
= lookup_signatured_type (cc
->signature_types
, signature
,
1557 vg_assert (VG_(clo_read_inline_info
));
1558 TRACE_D3("<not dereferencing signature type>");
1559 cts
->u
.val
= 0; /* Assign a dummy/rubbish value */
1561 cts
->szB
= sizeof(UWord
);
1564 case DW_FORM_indirect
: {
1565 /* Urgh, this is ugly and somewhat unclear how it works
1566 with DW_FORM_implicit_const. HACK. */
1567 name_form nfi
= *abbv
;
1568 nfi
.at_form
= (DW_FORM
)get_ULEB128(c
);
1569 get_Form_contents (cts
, cc
, c
, td3
, &nfi
);
1573 case DW_FORM_GNU_ref_alt
:
1574 cts
->u
.val
= get_Dwarfish_UWord(c
, cc
->is_dw64
);
1575 cts
->szB
= cc
->is_dw64
? sizeof(ULong
) : sizeof(UInt
);
1576 TRACE_D3("0x%lx", (UWord
)cts
->u
.val
);
1577 if (0) VG_(printf
)("DW_FORM_GNU_ref_alt 0x%lx\n", (UWord
)cts
->u
.val
);
1578 if (/* the following is surely impossible, but ... */
1579 !ML_(sli_is_valid
)(cc
->escn_debug_info_alt
))
1580 cc
->barf("get_Form_contents: DW_FORM_GNU_ref_addr used, "
1581 "but no alternate .debug_info");
1582 else if (cts
->u
.val
>= (ULong
)cc
->escn_debug_info_alt
.szB
) {
1583 /* Hmm. Offset is nonsensical for this object's .debug_info
1584 section. Be safe and reject it. */
1585 cc
->barf("get_Form_contents: DW_FORM_GNU_ref_addr points "
1586 "outside alternate .debug_info");
1590 case DW_FORM_GNU_strp_alt
: {
1591 /* this is an offset into alternate .debug_str */
1592 SizeT uw
= (UWord
)get_Dwarfish_UWord( c
, cc
->is_dw64
);
1593 if (!ML_(sli_is_valid
)(cc
->escn_debug_str_alt
))
1594 cc
->barf("get_Form_contents: DW_FORM_GNU_strp_alt used, "
1595 "but no alternate .debug_str");
1596 else if (uw
>= cc
->escn_debug_str_alt
.szB
)
1597 cc
->barf("get_Form_contents: DW_FORM_GNU_strp_alt "
1598 "points outside alternate .debug_str");
1599 /* FIXME: check the entire string lies inside debug_str,
1600 not just the first byte of it. */
1602 = ML_(cur_plus
)( ML_(cur_from_sli
)(cc
->escn_debug_str_alt
), uw
);
1604 HChar
* tmp
= ML_(cur_read_strdup
)(str
, "di.getFC.3");
1605 TRACE_D3("(indirect alt string, offset: 0x%lx): %s", uw
, tmp
);
1606 ML_(dinfo_free
)(tmp
);
1609 cts
->szB
= - (Long
)(1 + (ULong
)ML_(cur_strlen
)(str
));
1615 "get_Form_contents: unhandled %u (%s) at <%llx>\n",
1616 form
, ML_(pp_DW_FORM
)(form
), get_position_of_Cursor(c
));
1617 c
->barf("get_Form_contents: unhandled DW_FORM");
1621 static inline UInt
sizeof_Dwarfish_UWord (Bool is_dw64
)
1624 return sizeof(ULong
);
1626 return sizeof(UInt
);
1629 #define VARSZ_FORM 0xffffffff
1630 /* If the form is a fixed length form, return the nr of bytes for this form.
1631 If the form is a variable length form, return VARSZ_FORM. */
1633 UInt
get_Form_szB (const CUConst
* cc
, DW_FORM form
)
1635 // !!! keep switch in sync with get_Form_contents : the nr of bytes
1636 // read from a cursor by get_Form_contents must be returned by
1637 // the below switch.
1638 // The consistency is verified in trace_DIE.
1640 case DW_FORM_data1
: return 1;
1641 case DW_FORM_data2
: return 2;
1642 case DW_FORM_data4
: return 4;
1643 case DW_FORM_data8
: return 8;
1644 case DW_FORM_data16
: return 16;
1645 case DW_FORM_sec_offset
:
1654 case DW_FORM_addr
: // See hack in get_Form_contents
1655 return sizeof(UWord
);
1656 case DW_FORM_ref_addr
: // See hack in get_Form_contents
1657 if (cc
->version
== 2)
1658 return sizeof(UWord
);
1660 return sizeof_Dwarfish_UWord (cc
->is_dw64
);
1662 case DW_FORM_line_strp
:
1663 return sizeof_Dwarfish_UWord (cc
->is_dw64
);
1664 case DW_FORM_string
:
1674 case DW_FORM_ref_udata
:
1678 case DW_FORM_flag_present
:
1679 return 0; // !!! special case, no data.
1680 case DW_FORM_block1
:
1682 case DW_FORM_block2
:
1684 case DW_FORM_block4
:
1686 case DW_FORM_exprloc
:
1689 case DW_FORM_ref_sig8
:
1691 case DW_FORM_indirect
:
1693 case DW_FORM_GNU_ref_alt
:
1694 return sizeof_Dwarfish_UWord(cc
->is_dw64
);
1695 case DW_FORM_GNU_strp_alt
:
1696 return sizeof_Dwarfish_UWord(cc
->is_dw64
);
1697 case DW_FORM_implicit_const
:
1698 return 0; /* Value inside abbrev. */
1701 "get_Form_szB: unhandled %u (%s)\n",
1702 form
, ML_(pp_DW_FORM
)(form
));
1703 cc
->barf("get_Form_contents: unhandled DW_FORM");
1707 /* Skip a DIE as described by abbv.
1708 If the DIE has a sibling, *sibling is set to the skipped DIE sibling value. */
1710 void skip_DIE (UWord
*sibling
,
1719 if (abbv
->nf
[nf_i
].at_name
== DW_AT_sibling
) {
1720 get_Form_contents( &cts
, cc
, c_die
, False
/*td3*/,
1723 *sibling
= cts
.u
.val
;
1725 } else if (abbv
->nf
[nf_i
].skip_szB
== VARSZ_FORM
) {
1726 get_Form_contents( &cts
, cc
, c_die
, False
/*td3*/,
1730 advance_position_of_Cursor (c_die
, (ULong
)abbv
->nf
[nf_i
].skip_szB
);
1731 nf_i
= abbv
->nf
[nf_i
].next_nf
;
1739 /*------------------------------------------------------------*/
1741 /*--- Parsing of variable-related DIEs ---*/
1743 /*------------------------------------------------------------*/
1747 const HChar
* name
; /* in DebugInfo's .strpool */
1748 /* Represent ranges economically. nRanges is the number of
1750 0: .rngOneMin .rngOneMax .manyRanges are all zero
1751 1: .rngOneMin .rngOneMax hold the range; .rngMany is NULL
1752 2: .rngOneMin .rngOneMax are zero; .rngMany holds the ranges.
1753 This is merely an optimisation to avoid having to allocate
1754 and free the XArray in the common (98%) of cases where there
1755 is zero or one address ranges. */
1759 XArray
* rngMany
; /* of AddrRange. NON-UNIQUE PTR in AR_DINFO. */
1760 /* Do not free .rngMany, since many TempVars will have the same
1761 value. Instead the associated storage is to be freed by
1762 deleting 'rangetree', which stores a single copy of each
1766 UWord typeR
; /* a cuOff */
1767 GExpr
* gexpr
; /* for this variable */
1768 GExpr
* fbGX
; /* to find the frame base of the enclosing fn, if
1770 UInt fndn_ix
; /* declaring file/dirname index in fndnpool, or 0 */
1771 Int fLine
; /* declaring file line number, or zero */
1772 /* offset in .debug_info, so that abstract instances can be
1773 found to satisfy references from concrete instances. */
1775 UWord absOri
; /* so the absOri fields refer to dioff fields
1776 in some other, related TempVar. */
1782 /* Contains the range stack: a stack of address ranges, one
1783 stack entry for each nested scope.
1785 Some scope entries are created by function definitions
1786 (DW_AT_subprogram), and for those, we also note the GExpr
1787 derived from its DW_AT_frame_base attribute, if any.
1788 Consequently it should be possible to find, for any
1789 variable's DIE, the GExpr for the containing function's
1790 DW_AT_frame_base by scanning back through the stack to find
1791 the nearest entry associated with a function. This somewhat
1792 elaborate scheme is provided so as to make it possible to
1793 obtain the correct DW_AT_frame_base expression even in the
1794 presence of nested functions (or to be more precise, in the
1795 presence of nested DW_AT_subprogram DIEs).
1797 Int sp
; /* [sp] is innermost active entry; sp==-1 for empty
1800 XArray
**ranges
; /* XArray of AddrRange */
1801 Int
*level
; /* D3 DIE levels */
1802 Bool
*isFunc
; /* from DW_AT_subprogram? */
1803 GExpr
**fbGX
; /* if isFunc, contains the FB expr, else NULL */
1804 /* The fndn_ix file name/dirname table. Is a mapping from dwarf
1805 integer index to the index in di->fndnpool. */
1806 XArray
* /* of UInt* */ fndn_ix_Table
;
1810 /* Completely initialise a variable parser object */
1812 var_parser_init ( D3VarParser
*parser
)
1815 parser
->stack_size
= 0;
1816 parser
->ranges
= NULL
;
1817 parser
->level
= NULL
;
1818 parser
->isFunc
= NULL
;
1819 parser
->fbGX
= NULL
;
1820 parser
->fndn_ix_Table
= NULL
;
1823 /* Release any memory hanging off a variable parser object */
1825 var_parser_release ( D3VarParser
*parser
)
1827 ML_(dinfo_free
)( parser
->ranges
);
1828 ML_(dinfo_free
)( parser
->level
);
1829 ML_(dinfo_free
)( parser
->isFunc
);
1830 ML_(dinfo_free
)( parser
->fbGX
);
1833 static void varstack_show ( const D3VarParser
* parser
, const HChar
* str
)
1836 VG_(printf
)(" varstack (%s) {\n", str
);
1837 for (i
= 0; i
<= parser
->sp
; i
++) {
1838 XArray
* xa
= parser
->ranges
[i
];
1840 VG_(printf
)(" [%ld] (level %d)", i
, parser
->level
[i
]);
1841 if (parser
->isFunc
[i
]) {
1842 VG_(printf
)(" (fbGX=%p)", parser
->fbGX
[i
]);
1844 vg_assert(parser
->fbGX
[i
] == NULL
);
1847 if (VG_(sizeXA
)( xa
) == 0) {
1848 VG_(printf
)("** empty PC range array **");
1850 for (j
= 0; j
< VG_(sizeXA
)( xa
); j
++) {
1851 AddrRange
* range
= (AddrRange
*) VG_(indexXA
)( xa
, j
);
1853 VG_(printf
)("[%#lx,%#lx] ", range
->aMin
, range
->aMax
);
1858 VG_(printf
)(" }\n");
1861 /* Remove from the stack, all entries with .level > 'level' */
1863 void varstack_preen ( D3VarParser
* parser
, Bool td3
, Int level
)
1865 Bool changed
= False
;
1866 vg_assert(parser
->sp
< parser
->stack_size
);
1868 vg_assert(parser
->sp
>= -1);
1869 if (parser
->sp
== -1) break;
1870 if (parser
->level
[parser
->sp
] <= level
) break;
1872 TRACE_D3("BBBBAAAA varstack_pop [newsp=%d]\n", parser
->sp
-1);
1873 vg_assert(parser
->ranges
[parser
->sp
]);
1874 /* Who allocated this xa? get_range_list() or
1875 unitary_range_list(). */
1876 VG_(deleteXA
)( parser
->ranges
[parser
->sp
] );
1881 varstack_show( parser
, "after preen" );
1884 static void varstack_push ( const CUConst
* cc
,
1885 D3VarParser
* parser
,
1887 XArray
* ranges
, Int level
,
1888 Bool isFunc
, GExpr
* fbGX
) {
1890 TRACE_D3("BBBBAAAA varstack_push[newsp=%d]: %d %p\n",
1891 parser
->sp
+1, level
, ranges
);
1893 /* First we need to zap everything >= 'level', as we are about to
1894 replace any previous entry at 'level', so .. */
1895 varstack_preen(parser
, /*td3*/False
, level
-1);
1897 vg_assert(parser
->sp
>= -1);
1898 vg_assert(parser
->sp
< parser
->stack_size
);
1899 if (parser
->sp
== parser
->stack_size
- 1) {
1900 parser
->stack_size
+= 48;
1902 ML_(dinfo_realloc
)("di.readdwarf3.varpush.1", parser
->ranges
,
1903 parser
->stack_size
* sizeof parser
->ranges
[0]);
1905 ML_(dinfo_realloc
)("di.readdwarf3.varpush.2", parser
->level
,
1906 parser
->stack_size
* sizeof parser
->level
[0]);
1908 ML_(dinfo_realloc
)("di.readdwarf3.varpush.3", parser
->isFunc
,
1909 parser
->stack_size
* sizeof parser
->isFunc
[0]);
1911 ML_(dinfo_realloc
)("di.readdwarf3.varpush.4", parser
->fbGX
,
1912 parser
->stack_size
* sizeof parser
->fbGX
[0]);
1914 if (parser
->sp
>= 0)
1915 vg_assert(parser
->level
[parser
->sp
] < level
);
1917 vg_assert(ranges
!= NULL
);
1918 if (!isFunc
) vg_assert(fbGX
== NULL
);
1919 parser
->ranges
[parser
->sp
] = ranges
;
1920 parser
->level
[parser
->sp
] = level
;
1921 parser
->isFunc
[parser
->sp
] = isFunc
;
1922 parser
->fbGX
[parser
->sp
] = fbGX
;
1924 varstack_show( parser
, "after push" );
1928 /* cts is derived from a DW_AT_location and so refers either to a
1929 location expression or to a location list. Figure out which, and
1930 in both cases bundle the expression or location list into a
1931 so-called GExpr (guarded expression). */
1932 __attribute__((noinline
))
1933 static GExpr
* get_GX ( const CUConst
* cc
, Bool td3
, const FormContents
* cts
)
1935 GExpr
* gexpr
= NULL
;
1937 /* represents a non-empty in-line location expression, and
1938 cts->u.cur points at the image bytes */
1939 gexpr
= make_singleton_GX( cts
->u
.cur
, (ULong
)(- cts
->szB
) );
1943 /* represents a location list. cts->u.val is the offset of it
1945 if (!cc
->cu_svma_known
)
1946 cc
->barf("get_GX: location list, but CU svma is unknown");
1947 gexpr
= make_general_GX( cc
, td3
, cts
->u
.val
, cc
->cu_svma
);
1950 vg_assert(0); /* else caller is bogus */
1956 HChar
* get_line_str (struct _DebugInfo
* di
, Bool is_dw64
,
1957 Cursor
*data
, const UInt form
,
1958 DiSlice debugstr_img
, DiSlice debuglinestr_img
)
1962 case DW_FORM_string
: {
1963 DiCursor distr
= get_AsciiZ(data
);
1964 str
= ML_(cur_step_strdup
)(&distr
, "di.gls.string");
1967 case DW_FORM_strp
: {
1968 UWord uw
= (UWord
)get_Dwarfish_UWord( data
, is_dw64
);
1970 = ML_(cur_plus
)( ML_(cur_from_sli
)(debugstr_img
), uw
);
1971 str
= ML_(cur_read_strdup
)(distr
, "di.gls.strp");
1974 case DW_FORM_line_strp
: {
1975 UWord uw
= (UWord
)get_Dwarfish_UWord( data
, is_dw64
);
1977 = ML_(cur_plus
)( ML_(cur_from_sli
)(debuglinestr_img
), uw
);
1978 str
= ML_(cur_read_strdup
)(distr
, "di.gls.line_strp");
1982 ML_(symerr
)(di
, True
,
1983 "Unknown path string FORM in .debug_line");
1990 Int
get_line_ndx (struct _DebugInfo
* di
,
1991 Cursor
*data
, const UInt form
)
1996 res
= get_UChar(data
);
1999 res
= get_UShort(data
);
2002 res
= get_ULEB128(data
);
2005 ML_(symerr
)(di
, True
,
2006 "Unknown directory_index value FORM in .debug_line");
2013 void skip_line_form (struct _DebugInfo
* di
, Bool is_dw64
,
2014 Cursor
*d
, const UInt form
)
2017 case DW_FORM_block
: {
2018 ULong len
= get_ULEB128(d
);
2019 advance_position_of_Cursor (d
, len
);
2022 case DW_FORM_block1
: {
2023 UChar len
= get_UChar(d
);
2024 advance_position_of_Cursor (d
, len
);
2027 case DW_FORM_block2
: {
2028 UShort len
= get_UShort(d
);
2029 advance_position_of_Cursor (d
, len
);
2032 case DW_FORM_block4
: {
2033 UInt len
= get_UInt(d
);
2034 advance_position_of_Cursor (d
, len
);
2039 advance_position_of_Cursor (d
, 1);
2042 advance_position_of_Cursor (d
, 2);
2045 advance_position_of_Cursor (d
, 4);
2048 advance_position_of_Cursor (d
, 8);
2050 case DW_FORM_data16
:
2051 advance_position_of_Cursor (d
, 16);
2053 case DW_FORM_string
:
2054 (void)get_AsciiZ (d
);
2057 case DW_FORM_line_strp
:
2058 case DW_FORM_sec_offset
:
2059 advance_position_of_Cursor (d
, is_dw64
? 8 : 4);
2062 (void)get_ULEB128(d
);
2065 (void)get_SLEB128(d
);
2068 ML_(symerr
)(di
, True
, "Unknown FORM in .debug_line");
2073 /* Returns an xarray* of directory names (indexed by the dwarf dirname
2075 If 'compdir' is NULL, entry [0] will be set to "."
2076 otherwise entry [0] is set to compdir.
2077 Entry [0] basically means "the current directory of the compilation",
2078 whatever that means, according to the DWARF3 spec.
2079 FIXME??? readdwarf3.c/readdwarf.c have a lot of duplicated code */
2081 XArray
* read_dirname_xa (DebugInfo
* di
, UShort version
, const HChar
*compdir
,
2082 Cursor
*c
, const CUConst
*cc
,
2085 XArray
* dirname_xa
; /* xarray of HChar* dirname */
2086 const HChar
* dirname
;
2089 dirname_xa
= VG_(newXA
) (ML_(dinfo_zalloc
), "di.rdxa.1", ML_(dinfo_free
),
2092 if (compdir
== NULL
) {
2097 compdir_len
= VG_(strlen
)(compdir
);
2100 /* For version 5, the compdir is the first (zero) entry. */
2102 VG_(addToXA
) (dirname_xa
, &dirname
);
2105 TRACE_D3("The Directory Table%s\n",
2106 peek_UChar(c
) == 0 ? " is empty." : ":" );
2108 while (peek_UChar(c
) != 0) {
2110 DiCursor cur
= get_AsciiZ(c
);
2111 HChar
* data_str
= ML_(cur_read_strdup
)( cur
, "dirname_xa.1" );
2112 TRACE_D3(" %s\n", data_str
);
2114 /* If data_str[0] is '/', then 'data' is an absolute path and we
2115 don't mess with it. Otherwise, construct the
2116 path 'compdir' ++ "/" ++ 'data'. */
2118 if (data_str
[0] != '/'
2119 /* not an absolute path */
2121 /* actually got something sensible for compdir */
2124 SizeT len
= compdir_len
+ 1 + VG_(strlen
)(data_str
);
2125 HChar
*buf
= ML_(dinfo_zalloc
)("dirname_xa.2", len
+ 1);
2127 VG_(strcpy
)(buf
, compdir
);
2128 VG_(strcat
)(buf
, "/");
2129 VG_(strcat
)(buf
, data_str
);
2131 dirname
= ML_(addStr
)(di
, buf
, len
);
2132 VG_(addToXA
) (dirname_xa
, &dirname
);
2133 if (0) VG_(printf
)("rel path %s\n", buf
);
2134 ML_(dinfo_free
)(buf
);
2136 /* just use 'data'. */
2137 dirname
= ML_(addStr
)(di
,data_str
,-1);
2138 VG_(addToXA
) (dirname_xa
, &dirname
);
2139 if (0) VG_(printf
)("abs path %s\n", data_str
);
2142 ML_(dinfo_free
)(data_str
);
2147 UInt directories_count
;
2148 UChar directory_entry_format_count
;
2150 DiSlice debugstr_img
= cc
->escn_debug_str
;
2151 DiSlice debuglinestr_img
= cc
->escn_debug_line_str
;
2153 directory_entry_format_count
= get_UChar(c
);
2154 for (n
= 0; n
< directory_entry_format_count
; n
++) {
2155 UInt lnct
= get_ULEB128(c
);
2156 UInt form
= get_ULEB128(c
);
2157 if (lnct
== DW_LNCT_path
)
2161 directories_count
= get_ULEB128(c
);
2162 TRACE_D3("The Directory Table%s\n",
2163 directories_count
== 0 ? " is empty." : ":" );
2165 for (n
= 0; n
< directories_count
; n
++) {
2167 for (f
= 0; f
< directory_entry_format_count
; f
++) {
2168 UInt form
= forms
[f
];
2170 HChar
*data_str
= get_line_str (di
, cc
->is_dw64
, c
, form
,
2173 TRACE_D3(" %s\n", data_str
);
2175 /* If data_str[0] is '/', then 'data' is an absolute path and we
2176 don't mess with it. Otherwise, construct the
2177 path 'compdir' ++ "/" ++ 'data'. */
2179 if (data_str
[0] != '/'
2180 /* not an absolute path */
2182 /* actually got something sensible for compdir */
2185 SizeT len
= compdir_len
+ 1 + VG_(strlen
)(data_str
);
2186 HChar
*buf
= ML_(dinfo_zalloc
)("dirname_xa.2", len
+ 1);
2188 VG_(strcpy
)(buf
, compdir
);
2189 VG_(strcat
)(buf
, "/");
2190 VG_(strcat
)(buf
, data_str
);
2192 dirname
= ML_(addStr
)(di
, buf
, len
);
2193 VG_(addToXA
) (dirname_xa
, &dirname
);
2194 if (0) VG_(printf
)("rel path %s\n", buf
);
2195 ML_(dinfo_free
)(buf
);
2197 /* just use 'data'. */
2198 dirname
= ML_(addStr
)(di
,data_str
,-1);
2199 VG_(addToXA
) (dirname_xa
, &dirname
);
2200 if (0) VG_(printf
)("abs path %s\n", data_str
);
2203 ML_(dinfo_free
)(data_str
);
2205 skip_line_form (di
, cc
->is_dw64
, c
, form
);
2213 if (version
< 5 && get_UChar (c
) != 0) {
2214 ML_(symerr
)(NULL
, True
,
2215 "could not get NUL at end of DWARF directory table");
2216 VG_(deleteXA
)(dirname_xa
);
2224 void read_filename_table( /*MOD*/XArray
* /* of UInt* */ fndn_ix_Table
,
2225 const HChar
* compdir
,
2226 const CUConst
* cc
, ULong debug_line_offset
,
2235 XArray
* dirname_xa
; /* xarray of HChar* dirname */
2236 ULong dir_xa_ix
; /* Index in dirname_xa, as read from dwarf info. */
2237 const HChar
* dirname
;
2240 vg_assert(fndn_ix_Table
&& cc
&& cc
->barf
);
2241 if (!ML_(sli_is_valid
)(cc
->escn_debug_line
)
2242 || cc
->escn_debug_line
.szB
<= debug_line_offset
) {
2243 cc
->barf("read_filename_table: .debug_line is missing?");
2246 init_Cursor( &c
, cc
->escn_debug_line
, debug_line_offset
, cc
->barf
,
2247 "Overrun whilst reading .debug_line section(1)" );
2250 get_Initial_Length( &is_dw64
, &c
,
2251 "read_filename_table: invalid initial-length field" );
2252 version
= get_UShort( &c
);
2253 if (version
!= 2 && version
!= 3 && version
!= 4 && version
!= 5)
2254 cc
->barf("read_filename_table: Only DWARF version 2, 3, 4 and 5 "
2255 "line info is currently supported.");
2257 /* addrs_size = */ get_UChar( &c
);
2258 /* seg_size = */ get_UChar( &c
);
2260 /*header_length = (ULong)*/ get_Dwarfish_UWord( &c
, is_dw64
);
2261 /*minimum_instruction_length = */ get_UChar( &c
);
2263 /*maximum_operations_per_insn = */ get_UChar( &c
);
2264 /*default_is_stmt = */ get_UChar( &c
);
2265 /*line_base = (Char)*/ get_UChar( &c
);
2266 /*line_range = */ get_UChar( &c
);
2267 opcode_base
= get_UChar( &c
);
2268 /* skip over "standard_opcode_lengths" */
2269 for (i
= 1; i
< (Word
)opcode_base
; i
++)
2270 (void)get_UChar( &c
);
2272 dirname_xa
= read_dirname_xa(cc
->di
, version
, compdir
, &c
, cc
, td3
);
2274 /* Read and record the file names table */
2275 vg_assert( VG_(sizeXA
)( fndn_ix_Table
) == 0 );
2277 /* Add a dummy index-zero entry. DWARF3 numbers its files
2278 from 1, for some reason. */
2279 fndn_ix
= ML_(addFnDn
) ( cc
->di
, "<unknown_file>", NULL
);
2280 VG_(addToXA
)( fndn_ix_Table
, &fndn_ix
);
2281 while (peek_UChar(&c
) != 0) {
2282 DiCursor cur
= get_AsciiZ(&c
);
2283 str
= ML_(addStrFromCursor
)( cc
->di
, cur
);
2284 dir_xa_ix
= get_ULEB128( &c
);
2285 if (dirname_xa
!= NULL
2286 && dir_xa_ix
>= 0 && dir_xa_ix
< VG_(sizeXA
) (dirname_xa
))
2287 dirname
= *(HChar
**)VG_(indexXA
) ( dirname_xa
, dir_xa_ix
);
2290 fndn_ix
= ML_(addFnDn
)( cc
->di
, str
, dirname
);
2291 TRACE_D3(" read_filename_table: %ld fndn_ix %u %s %s\n",
2292 VG_(sizeXA
)(fndn_ix_Table
), fndn_ix
,
2294 VG_(addToXA
)( fndn_ix_Table
, &fndn_ix
);
2295 (void)get_ULEB128( &c
); /* skip last mod time */
2296 (void)get_ULEB128( &c
); /* file size */
2300 UChar p_ndx
= 0, d_ndx
= 0;
2301 UInt file_names_count
;
2302 UChar file_names_entry_format_count
;
2304 DiSlice debugstr_img
= cc
->escn_debug_str
;
2305 DiSlice debuglinestr_img
= cc
->escn_debug_line_str
;
2306 file_names_entry_format_count
= get_UChar( &c
);
2307 for (n
= 0; n
< file_names_entry_format_count
; n
++) {
2308 UInt lnct
= get_ULEB128( &c
);
2309 UInt form
= get_ULEB128( &c
);
2310 if (lnct
== DW_LNCT_path
)
2312 if (lnct
== DW_LNCT_directory_index
)
2316 file_names_count
= get_ULEB128( &c
);
2317 for (n
= 0; n
< file_names_count
; n
++) {
2321 for (f
= 0; f
< file_names_entry_format_count
; f
++) {
2322 UInt form
= forms
[f
];
2324 str
= get_line_str (cc
->di
, cc
->is_dw64
, &c
, form
,
2325 debugstr_img
, debuglinestr_img
);
2326 else if (f
== d_ndx
)
2327 dir_xa_ix
= get_line_ndx (cc
->di
, &c
, form
);
2329 skip_line_form (cc
->di
, cc
->is_dw64
, &c
, form
);
2332 if (dirname_xa
!= NULL
2333 && dir_xa_ix
>= 0 && dir_xa_ix
< VG_(sizeXA
) (dirname_xa
))
2334 dirname
= *(HChar
**)VG_(indexXA
) ( dirname_xa
, dir_xa_ix
);
2337 fndn_ix
= ML_(addFnDn
)( cc
->di
, str
, dirname
);
2338 TRACE_D3(" read_filename_table: %ld fndn_ix %u %s %s\n",
2339 VG_(sizeXA
)(fndn_ix_Table
), fndn_ix
,
2341 VG_(addToXA
)( fndn_ix_Table
, &fndn_ix
);
2344 /* We're done! The rest of it is not interesting. */
2345 if (dirname_xa
!= NULL
)
2346 VG_(deleteXA
)(dirname_xa
);
2349 /* setup_cu_svma to be called when a cu is found at level 0,
2350 to establish the cu_svma. */
2351 static void setup_cu_svma(CUConst
* cc
, Bool have_lo
, Addr ip_lo
, Bool td3
)
2354 /* We have potentially more than one type of parser parsing the
2355 dwarf information. At least currently, each parser establishes
2356 the cu_svma. So, in case cu_svma_known, we check that the same
2357 result is obtained by the 2nd parsing of the cu.
2359 Alternatively, we could reset cu_svma_known after each parsing
2360 and then check that we only see a single DW_TAG_compile_unit DIE
2361 at level 0, DWARF3 only allows exactly one top level DIE per
2367 /* Now, it may be that this DIE doesn't tell us the CU's
2368 SVMA, by way of not having a DW_AT_low_pc. That's OK --
2369 the CU doesn't *have* to have its SVMA specified.
2371 But as per last para D3 spec sec 3.1.1 ("Normal and
2372 Partial Compilation Unit Entries", "If the base address
2373 (viz, the SVMA) is undefined, then any DWARF entry of
2374 structure defined interms of the base address of that
2375 compilation unit is not valid.". So that means, if whilst
2376 processing the children of this top level DIE (or their
2377 children, etc) we see a DW_AT_range, and cu_svma_known is
2378 False, then the DIE that contains it is (per the spec)
2379 invalid, and we can legitimately stop and complain. */
2380 /* .. whereas The Reality is, simply assume the SVMA is zero
2381 if it isn't specified. */
2385 if (cc
->cu_svma_known
) {
2386 vg_assert (cu_svma
== cc
->cu_svma
);
2388 cc
->cu_svma_known
= True
;
2389 cc
->cu_svma
= cu_svma
;
2391 TRACE_D3("setup_cu_svma: acquire CU_SVMA of %p\n", (void*) cc
->cu_svma
);
2395 static void trace_DIE(
2399 UWord saved_die_c_offset
,
2407 Bool debug_types_flag
;
2410 UWord check_sibling
= 0;
2412 posn
= uncook_die( cc
, posn
, &debug_types_flag
, &alt_flag
);
2414 debug_types_flag
? cc
->escn_debug_types
:
2415 alt_flag
? cc
->escn_debug_info_alt
: cc
->escn_debug_info
,
2416 saved_die_c_offset
, cc
->barf
,
2417 "Overrun trace_DIE");
2419 VG_(printf
)(" <%d><%lx>: Abbrev Number: %llu (%s)%s%s\n",
2420 level
, posn
, (ULong
) abbv
->abbv_code
, ML_(pp_DW_TAG
)( dtag
),
2421 debug_types_flag
? " (in .debug_types)" : "",
2422 alt_flag
? " (in alternate .debug_info)" : "");
2425 DW_AT attr
= (DW_AT
) abbv
->nf
[nf_i
].at_name
;
2426 DW_FORM form
= (DW_FORM
)abbv
->nf
[nf_i
].at_form
;
2427 const name_form
*nf
= &abbv
->nf
[nf_i
];
2429 if (attr
== 0 && form
== 0) break;
2430 VG_(printf
)(" %-18s: ", ML_(pp_DW_AT
)(attr
));
2431 /* Get the form contents, so as to print them */
2432 get_Form_contents( &cts
, cc
, &c
, True
, nf
);
2433 if (attr
== DW_AT_sibling
&& cts
.szB
> 0) {
2434 sibling
= cts
.u
.val
;
2436 VG_(printf
)("\t\n");
2439 /* Verify that skipping a DIE gives the same displacement as
2440 tracing (i.e. reading) a DIE. If there is an inconsistency in
2441 the nr of bytes read by get_Form_contents and get_Form_szB, this
2442 should be detected by the below. Using --trace-symtab=yes
2443 --read-var-info=yes will ensure all DIEs are systematically
2445 skip_DIE (&check_sibling
, &check_skip
, abbv
, cc
);
2446 vg_assert (check_sibling
== sibling
);
2447 vg_assert (get_position_of_Cursor (&check_skip
)
2448 == get_position_of_Cursor (&c
));
2451 __attribute__((noreturn
))
2452 static void dump_bad_die_and_barf(
2453 const HChar
*whichparser
,
2458 UWord saved_die_c_offset
,
2462 trace_DIE (dtag
, posn
, level
, saved_die_c_offset
, abbv
, cc
);
2463 VG_(printf
)("%s:\n", whichparser
);
2464 cc
->barf("confused by the above DIE");
2467 __attribute__((noinline
))
2468 static void bad_DIE_confusion(int linenr
)
2470 VG_(printf
)("\nparse DIE(readdwarf3.c:%d): confused by:\n", linenr
);
2472 #define goto_bad_DIE do {bad_DIE_confusion(__LINE__); goto bad_DIE;} while (0)
2474 __attribute__((noinline
))
2475 static void parse_var_DIE (
2476 /*MOD*/WordFM
* /* of (XArray* of AddrRange, void) */ rangestree
,
2477 /*MOD*/XArray
* /* of TempVar* */ tempvars
,
2478 /*MOD*/XArray
* /* of GExpr* */ gexprs
,
2479 /*MOD*/D3VarParser
* parser
,
2492 UWord saved_die_c_offset
= get_position_of_Cursor( c_die
);
2494 varstack_preen( parser
, td3
, level
-1 );
2496 if (dtag
== DW_TAG_compile_unit
2497 || dtag
== DW_TAG_type_unit
2498 || dtag
== DW_TAG_partial_unit
) {
2499 Bool have_lo
= False
;
2500 Bool have_hi1
= False
;
2501 Bool hiIsRelative
= False
;
2502 Bool have_range
= False
;
2506 const HChar
*compdir
= NULL
;
2509 DW_AT attr
= (DW_AT
) abbv
->nf
[nf_i
].at_name
;
2510 DW_FORM form
= (DW_FORM
)abbv
->nf
[nf_i
].at_form
;
2511 const name_form
*nf
= &abbv
->nf
[nf_i
];
2513 if (attr
== 0 && form
== 0) break;
2514 get_Form_contents( &cts
, cc
, c_die
, False
/*td3*/, nf
);
2515 if (attr
== DW_AT_low_pc
&& cts
.szB
> 0) {
2519 if (attr
== DW_AT_high_pc
&& cts
.szB
> 0) {
2522 if (form
!= DW_FORM_addr
)
2523 hiIsRelative
= True
;
2525 if (attr
== DW_AT_ranges
&& cts
.szB
> 0) {
2526 rangeoff
= cts
.u
.val
;
2529 if (attr
== DW_AT_comp_dir
) {
2531 cc
->barf("parse_var_DIE compdir: expecting indirect string");
2532 HChar
*str
= ML_(cur_read_strdup
)( cts
.u
.cur
,
2533 "parse_var_DIE.compdir" );
2534 compdir
= ML_(addStr
)(cc
->di
, str
, -1);
2535 ML_(dinfo_free
) (str
);
2537 if (attr
== DW_AT_stmt_list
&& cts
.szB
> 0) {
2538 read_filename_table( parser
->fndn_ix_Table
, compdir
,
2539 cc
, cts
.u
.val
, td3
);
2542 if (have_lo
&& have_hi1
&& hiIsRelative
)
2545 /* Now, does this give us an opportunity to find this
2548 setup_cu_svma(cc
, have_lo
, ip_lo
, td3
);
2550 /* Do we have something that looks sane? */
2551 if (have_lo
&& have_hi1
&& (!have_range
)) {
2553 varstack_push( cc
, parser
, td3
,
2554 unitary_range_list(ip_lo
, ip_hi1
- 1),
2556 False
/*isFunc*/, NULL
/*fbGX*/ );
2557 else if (ip_lo
== 0 && ip_hi1
== 0)
2558 /* CU has no code, presumably?
2559 Such situations have been encountered for code
2560 compiled with -ffunction-sections -fdata-sections
2561 and linked with --gc-sections. Completely
2562 eliminated CU gives such 0 lo/hi pc. Similarly
2563 to a CU which has no lo/hi/range pc, we push
2564 an empty range list. */
2565 varstack_push( cc
, parser
, td3
,
2568 False
/*isFunc*/, NULL
/*fbGX*/ );
2570 if ((!have_lo
) && (!have_hi1
) && have_range
) {
2571 varstack_push( cc
, parser
, td3
,
2572 get_range_list( cc
, td3
,
2573 rangeoff
, cc
->cu_svma
),
2575 False
/*isFunc*/, NULL
/*fbGX*/ );
2577 if ((!have_lo
) && (!have_hi1
) && (!have_range
)) {
2578 /* CU has no code, presumably? */
2579 varstack_push( cc
, parser
, td3
,
2582 False
/*isFunc*/, NULL
/*fbGX*/ );
2584 if (have_lo
&& (!have_hi1
) && have_range
&& ip_lo
== 0) {
2585 /* broken DIE created by gcc-4.3.X ? Ignore the
2586 apparently-redundant DW_AT_low_pc and use the DW_AT_ranges
2588 varstack_push( cc
, parser
, td3
,
2589 get_range_list( cc
, td3
,
2590 rangeoff
, cc
->cu_svma
),
2592 False
/*isFunc*/, NULL
/*fbGX*/ );
2594 if (0) VG_(printf
)("I got hlo %d hhi1 %d hrange %d\n",
2595 (Int
)have_lo
, (Int
)have_hi1
, (Int
)have_range
);
2600 if (dtag
== DW_TAG_lexical_block
|| dtag
== DW_TAG_subprogram
) {
2601 Bool have_lo
= False
;
2602 Bool have_hi1
= False
;
2603 Bool have_range
= False
;
2604 Bool hiIsRelative
= False
;
2608 Bool isFunc
= dtag
== DW_TAG_subprogram
;
2612 DW_AT attr
= (DW_AT
) abbv
->nf
[nf_i
].at_name
;
2613 DW_FORM form
= (DW_FORM
)abbv
->nf
[nf_i
].at_form
;
2614 const name_form
*nf
= &abbv
->nf
[nf_i
];
2616 if (attr
== 0 && form
== 0) break;
2617 get_Form_contents( &cts
, cc
, c_die
, False
/*td3*/, nf
);
2618 if (attr
== DW_AT_low_pc
&& cts
.szB
> 0) {
2622 if (attr
== DW_AT_high_pc
&& cts
.szB
> 0) {
2625 if (form
!= DW_FORM_addr
)
2626 hiIsRelative
= True
;
2628 if (attr
== DW_AT_ranges
&& cts
.szB
> 0) {
2629 rangeoff
= cts
.u
.val
;
2633 && attr
== DW_AT_frame_base
2634 && cts
.szB
!= 0 /* either scalar or nonempty block */) {
2635 fbGX
= get_GX( cc
, False
/*td3*/, &cts
);
2637 VG_(addToXA
)(gexprs
, &fbGX
);
2640 if (have_lo
&& have_hi1
&& hiIsRelative
)
2642 /* Do we have something that looks sane? */
2643 if (dtag
== DW_TAG_subprogram
2644 && (!have_lo
) && (!have_hi1
) && (!have_range
)) {
2645 /* This is legit - ignore it. Sec 3.3.3: "A subroutine entry
2646 representing a subroutine declaration that is not also a
2647 definition does not have code address or range
2650 if (dtag
== DW_TAG_lexical_block
2651 && (!have_lo
) && (!have_hi1
) && (!have_range
)) {
2652 /* I believe this is legit, and means the lexical block
2653 contains no insns (whatever that might mean). Ignore. */
2655 if (have_lo
&& have_hi1
&& (!have_range
)) {
2656 /* This scope supplies just a single address range. */
2658 varstack_push( cc
, parser
, td3
,
2659 unitary_range_list(ip_lo
, ip_hi1
- 1),
2660 level
, isFunc
, fbGX
);
2662 if ((!have_lo
) && (!have_hi1
) && have_range
) {
2663 /* This scope supplies multiple address ranges via the use of
2665 varstack_push( cc
, parser
, td3
,
2666 get_range_list( cc
, td3
,
2667 rangeoff
, cc
->cu_svma
),
2668 level
, isFunc
, fbGX
);
2670 if (have_lo
&& (!have_hi1
) && (!have_range
)) {
2671 /* This scope is bogus. The D3 spec sec 3.4 (Lexical Block
2672 Entries) says fairly clearly that a scope must have either
2673 _range or (_low_pc and _high_pc). */
2674 /* The spec is a bit ambiguous though. Perhaps a single byte
2675 range is intended? See sec 2.17 (Code Addresses And Ranges) */
2676 /* This case is here because icc9 produced this:
2677 <2><13bd>: DW_TAG_lexical_block
2678 DW_AT_decl_line : 5229
2679 DW_AT_decl_column : 37
2681 DW_AT_low_pc : 0x401b03
2683 /* Ignore (seems safe than pushing a single byte range) */
2688 if (dtag
== DW_TAG_variable
|| dtag
== DW_TAG_formal_parameter
) {
2689 const HChar
* name
= NULL
;
2690 UWord typeR
= D3_INVALID_CUOFF
;
2691 Bool global
= False
;
2692 GExpr
* gexpr
= NULL
;
2694 UWord abs_ori
= (UWord
)D3_INVALID_CUOFF
;
2699 DW_AT attr
= (DW_AT
) abbv
->nf
[nf_i
].at_name
;
2700 DW_FORM form
= (DW_FORM
)abbv
->nf
[nf_i
].at_form
;
2701 const name_form
*nf
= &abbv
->nf
[nf_i
];
2703 if (attr
== 0 && form
== 0) break;
2704 get_Form_contents( &cts
, cc
, c_die
, False
/*td3*/, nf
);
2706 if (attr
== DW_AT_name
&& cts
.szB
< 0) {
2707 name
= ML_(addStrFromCursor
)( cc
->di
, cts
.u
.cur
);
2709 if (attr
== DW_AT_location
2710 && cts
.szB
!= 0 /* either scalar or nonempty block */) {
2711 gexpr
= get_GX( cc
, False
/*td3*/, &cts
);
2713 VG_(addToXA
)(gexprs
, &gexpr
);
2715 if (attr
== DW_AT_type
&& cts
.szB
> 0) {
2716 typeR
= cook_die_using_form( cc
, cts
.u
.val
, form
);
2718 if (attr
== DW_AT_external
&& cts
.szB
> 0 && cts
.u
.val
> 0) {
2721 if (attr
== DW_AT_abstract_origin
&& cts
.szB
> 0) {
2722 abs_ori
= (UWord
)cts
.u
.val
;
2724 if (attr
== DW_AT_declaration
&& cts
.szB
> 0 && cts
.u
.val
> 0) {
2725 /*declaration = True;*/
2727 if (attr
== DW_AT_decl_line
&& cts
.szB
> 0) {
2728 lineNo
= (Int
)cts
.u
.val
;
2730 if (attr
== DW_AT_decl_file
&& cts
.szB
> 0) {
2731 Int ftabIx
= (Int
)cts
.u
.val
;
2733 && ftabIx
< VG_(sizeXA
)( parser
->fndn_ix_Table
)) {
2734 fndn_ix
= *(UInt
*)VG_(indexXA
)( parser
->fndn_ix_Table
, ftabIx
);
2736 if (0) VG_(printf
)("XXX filename fndn_ix = %u %s\n", fndn_ix
,
2737 ML_(fndn_ix2filename
) (cc
->di
, fndn_ix
));
2740 if (!global
&& dtag
== DW_TAG_variable
&& level
== 1) {
2741 /* Case of a static variable. It is better to declare
2742 it global as the variable is not really related to
2743 a PC range, as its address can be used by program
2744 counters outside of the ranges where it is visible . */
2748 /* We'll collect it under if one of the following three
2750 (1) has location and type -> completed
2751 (2) has type only -> is an abstract instance
2752 (3) has location and abs_ori -> is a concrete instance
2753 Name, fndn_ix and line number are all optional frills.
2755 if ( /* 1 */ (gexpr
&& typeR
!= D3_INVALID_CUOFF
)
2756 /* 2 */ || (typeR
!= D3_INVALID_CUOFF
)
2757 /* 3 */ || (gexpr
&& abs_ori
!= (UWord
)D3_INVALID_CUOFF
) ) {
2759 /* Add this variable to the list of interesting looking
2760 variables. Crucially, note along with it the address
2761 range(s) associated with the variable, which for locals
2762 will be the address ranges at the top of the varparser's
2766 const XArray
* /* of AddrRange */ xa
;
2768 /* Stack can't be empty; we put a dummy entry on it for the
2769 entire address range before starting with the DIEs for
2771 vg_assert(parser
->sp
>= 0);
2773 /* If this is a local variable (non-global), try to find
2774 the GExpr for the DW_AT_frame_base of the containing
2775 function. It should have been pushed on the stack at the
2776 time we encountered its DW_TAG_subprogram DIE, so the way
2777 to find it is to scan back down the stack looking for it.
2778 If there isn't an enclosing stack entry marked 'isFunc'
2779 then we must be seeing variable or formal param DIEs
2780 outside of a function, so we deem the Dwarf to be
2781 malformed if that happens. Note that the fbGX may be NULL
2782 if the containing DT_TAG_subprogram didn't supply a
2783 DW_AT_frame_base -- that's OK, but there must actually be
2784 a containing DW_TAG_subprogram. */
2787 for (i
= parser
->sp
; i
>= 0; i
--) {
2788 if (parser
->isFunc
[i
]) {
2789 fbGX
= parser
->fbGX
[i
];
2795 if (0 && VG_(clo_verbosity
) >= 0) {
2796 VG_(message
)(Vg_DebugMsg
,
2797 "warning: parse_var_DIE: non-global variable "
2798 "outside DW_TAG_subprogram\n");
2801 /* This seems to happen a lot. Just ignore it -- if,
2802 when we come to evaluation of the location (guarded)
2803 expression, it requires a frame base value, and
2804 there's no expression for that, then evaluation as a
2805 whole will fail. Harmless - a bit of a waste of
2806 cycles but nothing more. */
2810 /* re "global ? 0 : parser->sp" (twice), if the var is
2811 marked 'global' then we must put it at the global scope,
2812 as only the global scope (level 0) covers the entire PC
2813 address space. It is asserted elsewhere that level 0
2814 always covers the entire address space. */
2815 xa
= parser
->ranges
[global
? 0 : parser
->sp
];
2816 nRanges
= VG_(sizeXA
)(xa
);
2817 vg_assert(nRanges
>= 0);
2819 tv
= ML_(dinfo_zalloc
)( "di.readdwarf3.pvD.1", sizeof(TempVar
) );
2821 tv
->level
= global
? 0 : parser
->sp
;
2825 tv
->fndn_ix
= fndn_ix
;
2828 tv
->absOri
= abs_ori
;
2830 /* See explanation on definition of type TempVar for the
2831 reason for this elaboration. */
2832 tv
->nRanges
= nRanges
;
2837 AddrRange
* range
= VG_(indexXA
)(xa
, 0);
2838 tv
->rngOneMin
= range
->aMin
;
2839 tv
->rngOneMax
= range
->aMax
;
2841 else if (nRanges
> 1) {
2842 /* See if we already have a range list which is
2843 structurally identical. If so, use that; if not, clone
2844 this one, and add it to our collection. */
2846 if (VG_(lookupFM
)( rangestree
, &keyW
, &valW
, (UWord
)xa
)) {
2847 XArray
* old
= (XArray
*)keyW
;
2848 vg_assert(valW
== 0);
2849 vg_assert(old
!= xa
);
2852 XArray
* cloned
= VG_(cloneXA
)( "di.readdwarf3.pvD.2", xa
);
2853 tv
->rngMany
= cloned
;
2854 VG_(addToFM
)( rangestree
, (UWord
)cloned
, 0 );
2858 VG_(addToXA
)( tempvars
, &tv
);
2860 TRACE_D3(" Recording this variable, with %ld PC range(s)\n",
2862 /* collect stats on how effective the ->ranges special
2865 static Int ntot
=0, ngt
=0;
2867 if (tv
->rngMany
) ngt
++;
2868 if (0 == (ntot
% 100000))
2869 VG_(printf
)("XXXX %d tot, %d cloned\n", ntot
, ngt
);
2874 /* Here are some other weird cases seen in the wild:
2876 We have a variable with a name and a type, but no
2877 location. I guess that's a sign that it has been
2878 optimised away. Ignore it. Here's an example:
2880 static Int lc_compar(void* n1, void* n2) {
2881 MC_Chunk* mc1 = *(MC_Chunk**)n1;
2882 MC_Chunk* mc2 = *(MC_Chunk**)n2;
2883 return (mc1->data < mc2->data ? -1 : 1);
2886 Both mc1 and mc2 are like this
2887 <2><5bc>: Abbrev Number: 21 (DW_TAG_variable)
2890 DW_AT_decl_line : 216
2893 whereas n1 and n2 do have locations specified.
2895 ---------------------------------------------
2897 We see a DW_TAG_formal_parameter with a type, but
2898 no name and no location. It's probably part of a function type
2899 construction, thusly, hence ignore it:
2900 <1><2b4>: Abbrev Number: 12 (DW_TAG_subroutine_type)
2901 DW_AT_sibling : <2c9>
2902 DW_AT_prototyped : 1
2904 <2><2be>: Abbrev Number: 13 (DW_TAG_formal_parameter)
2906 <2><2c3>: Abbrev Number: 13 (DW_TAG_formal_parameter)
2909 ---------------------------------------------
2911 Is very minimal, like this:
2912 <4><81d>: Abbrev Number: 44 (DW_TAG_variable)
2913 DW_AT_abstract_origin: <7ba>
2914 What that signifies I have no idea. Ignore.
2916 ----------------------------------------------
2918 Is very minimal, like this:
2919 <200f>: DW_TAG_formal_parameter
2920 DW_AT_abstract_ori: <1f4c>
2921 DW_AT_location : 13440
2922 What that signifies I have no idea. Ignore.
2923 It might be significant, though: the variable at least
2924 has a location and so might exist somewhere.
2925 Maybe we should handle this.
2927 ---------------------------------------------
2929 <22407>: DW_TAG_variable
2930 DW_AT_name : (indirect string, offset: 0x6579):
2931 vgPlain_trampoline_stuff_start
2932 DW_AT_decl_file : 29
2933 DW_AT_decl_line : 56
2935 DW_AT_declaration : 1
2937 Nameless and typeless variable that has a location? Who
2939 <2><3d178>: Abbrev Number: 22 (DW_TAG_variable)
2940 DW_AT_location : 9 byte block: 3 c0 c7 13 38 0 0 0 0
2941 (DW_OP_addr: 3813c7c0)
2943 No, really. Check it out. gcc is quite simply borked.
2944 <3><168cc>: Abbrev Number: 141 (DW_TAG_variable)
2945 // followed by no attributes, and the next DIE is a sibling,
2952 dump_bad_die_and_barf("parse_var_DIE", dtag
, posn
, level
,
2953 c_die
, saved_die_c_offset
,
2961 /* The fndn_ix file name/dirname table. Is a mapping from dwarf
2962 integer index to the index in di->fndnpool. */
2963 XArray
* /* of UInt* */ fndn_ix_Table
;
2964 UWord sibling
; // sibling of the last read DIE (if it has a sibling).
2968 /* Return the function name corresponding to absori.
2970 absori is a 'cooked' reference to a DIE, i.e. absori can be either
2971 in cc->escn_debug_info or in cc->escn_debug_info_alt.
2972 get_inlFnName will uncook absori.
2974 The returned value is a (permanent) string in DebugInfo's .strchunks.
2976 LIMITATION: absori must point in the CU of cc. If absori points
2977 in another CU, returns "UnknownInlinedFun".
2979 Here are the problems to retrieve the fun name if absori is in
2980 another CU: the DIE reading code cannot properly extract data from
2981 another CU, as the abbv code retrieved in the other CU cannot be
2982 translated in an abbreviation. Reading data from the alternate debug
2983 info also gives problems as the string reference is also in the alternate
2984 file, but when reading the alt DIE, the string form is a 'local' string,
2985 but cannot be read in the current CU, but must be read in the alt CU.
2986 See bug 338803 comment#3 and attachment for a failed attempt to handle
2987 these problems (failed because with the patch, only one alt abbrev hash
2988 table is kept, while we must handle all abbreviations in all CUs
2989 referenced by an absori (being a reference to an alt CU, or a previous
2990 or following CU). */
2991 static const HChar
* get_inlFnName (Int absori
, const CUConst
* cc
, Bool td3
)
2995 ULong atag
, abbv_code
;
2998 Bool type_flag
, alt_flag
;
2999 const HChar
*ret
= NULL
;
3003 /* Some inlined subroutine call dwarf entries do not have the abstract
3004 origin attribute, resulting in absori being 0 (see callers of
3005 get_inlFnName). This is observed at least with gcc 6.3.0 when compiling
3006 valgrind with lto. So, in case we have a 0 absori, do not report an
3007 error, instead, rather return an unknown inlined function. */
3009 static Bool absori0_reported
= False
;
3010 if (!absori0_reported
&& VG_(clo_verbosity
) > 1) {
3011 VG_(message
)(Vg_DebugMsg
,
3012 "Warning: inlined fn name without absori\n"
3013 "is shown as UnknownInlinedFun\n");
3014 absori0_reported
= True
;
3016 TRACE_D3(" <get_inlFnName>: absori is not set");
3017 return ML_(addStr
)(cc
->di
, "UnknownInlinedFun", -1);
3020 posn
= uncook_die( cc
, absori
, &type_flag
, &alt_flag
);
3022 cc
->barf("get_inlFnName: uncooked absori in type debug info");
3024 /* LIMITATION: check we are in the same CU.
3025 If not, return unknown inlined function name. */
3026 /* if crossing between alt debug info<>normal info
3027 or posn not in the cu range,
3028 then it is in another CU. */
3029 if (alt_flag
!= cc
->is_alt_info
3030 || posn
< cc
->cu_start_offset
3031 || posn
>= cc
->cu_start_offset
+ cc
->unit_length
) {
3032 static Bool reported
= False
;
3033 if (!reported
&& VG_(clo_verbosity
) > 1) {
3034 VG_(message
)(Vg_DebugMsg
,
3035 "Warning: cross-CU LIMITATION: some inlined fn names\n"
3036 "might be shown as UnknownInlinedFun\n");
3039 TRACE_D3(" <get_inlFnName><%lx>: cross-CU LIMITATION", posn
);
3040 return ML_(addStr
)(cc
->di
, "UnknownInlinedFun", -1);
3043 init_Cursor (&c
, cc
->escn_debug_info
, posn
, cc
->barf
,
3044 "Overrun get_inlFnName absori");
3046 abbv_code
= get_ULEB128( &c
);
3047 abbv
= get_abbv ( cc
, abbv_code
);
3049 TRACE_D3(" <get_inlFnName><%lx>: Abbrev Number: %llu (%s)\n",
3050 posn
, abbv_code
, ML_(pp_DW_TAG
)( atag
) );
3053 cc
->barf("get_inlFnName: invalid zero tag on DIE");
3055 has_children
= abbv
->has_children
;
3056 if (has_children
!= DW_children_no
&& has_children
!= DW_children_yes
)
3057 cc
->barf("get_inlFnName: invalid has_children value");
3059 if (atag
!= DW_TAG_subprogram
)
3060 cc
->barf("get_inlFnName: absori not a subprogram");
3064 DW_AT attr
= (DW_AT
) abbv
->nf
[nf_i
].at_name
;
3065 DW_FORM form
= (DW_FORM
)abbv
->nf
[nf_i
].at_form
;
3066 const name_form
*nf
= &abbv
->nf
[nf_i
];
3068 if (attr
== 0 && form
== 0) break;
3069 get_Form_contents( &cts
, cc
, &c
, False
/*td3*/, nf
);
3070 if (attr
== DW_AT_name
) {
3073 cc
->barf("get_inlFnName: expecting indirect string");
3074 fnname
= ML_(cur_read_strdup
)( cts
.u
.cur
,
3075 "get_inlFnName.1" );
3076 ret
= ML_(addStr
)(cc
->di
, fnname
, -1);
3077 ML_(dinfo_free
) (fnname
);
3078 break; /* Name found, get out of the loop, as this has priority over
3079 DW_AT_specification. */
3081 if (attr
== DW_AT_specification
) {
3085 cc
->barf("get_inlFnName: AT specification missing");
3087 /* The recursive call to get_inlFnName will uncook its arg.
3088 So, we need to cook it here, so as to reference the
3089 correct section (e.g. the alt info). */
3090 cdie
= cook_die_using_form(cc
, (UWord
)cts
.u
.val
, form
);
3092 /* hoping that there is no loop */
3093 ret
= get_inlFnName (cdie
, cc
, td3
);
3094 /* Unclear if having both DW_AT_specification and DW_AT_name is
3095 possible but in any case, we do not break here.
3096 If we find later on a DW_AT_name, it will override the name found
3097 in the DW_AT_specification.*/
3104 TRACE_D3("AbsOriFnNameNotFound");
3105 return ML_(addStr
)(cc
->di
, "AbsOriFnNameNotFound", -1);
3109 /* Returns True if the (possibly) childrens of the current DIE are interesting
3110 to parse. Returns False otherwise.
3111 If the current DIE has a sibling, the non interesting children can
3112 maybe be skipped (if the DIE has a DW_AT_sibling). */
3113 __attribute__((noinline
))
3114 static Bool
parse_inl_DIE (
3115 /*MOD*/D3InlParser
* parser
,
3128 UWord saved_die_c_offset
= get_position_of_Cursor( c_die
);
3130 /* Get info about DW_TAG_compile_unit and DW_TAG_partial_unit 'which
3131 in theory could also contain inlined fn calls). */
3132 if (dtag
== DW_TAG_compile_unit
|| dtag
== DW_TAG_partial_unit
) {
3133 Bool have_lo
= False
;
3135 const HChar
*compdir
= NULL
;
3139 DW_AT attr
= (DW_AT
) abbv
->nf
[nf_i
].at_name
;
3140 DW_FORM form
= (DW_FORM
)abbv
->nf
[nf_i
].at_form
;
3141 const name_form
*nf
= &abbv
->nf
[nf_i
];
3143 if (attr
== 0 && form
== 0) break;
3144 get_Form_contents( &cts
, cc
, c_die
, False
/*td3*/, nf
);
3145 if (attr
== DW_AT_low_pc
&& cts
.szB
> 0) {
3149 if (attr
== DW_AT_comp_dir
) {
3151 cc
->barf("parse_inl_DIE compdir: expecting indirect string");
3152 HChar
*str
= ML_(cur_read_strdup
)( cts
.u
.cur
,
3153 "parse_inl_DIE.compdir" );
3154 compdir
= ML_(addStr
)(cc
->di
, str
, -1);
3155 ML_(dinfo_free
) (str
);
3157 if (attr
== DW_AT_stmt_list
&& cts
.szB
> 0) {
3158 read_filename_table( parser
->fndn_ix_Table
, compdir
,
3159 cc
, cts
.u
.val
, td3
);
3161 if (attr
== DW_AT_sibling
&& cts
.szB
> 0) {
3162 parser
->sibling
= cts
.u
.val
;
3166 setup_cu_svma (cc
, have_lo
, ip_lo
, td3
);
3169 if (dtag
== DW_TAG_inlined_subroutine
) {
3170 Bool have_lo
= False
;
3171 Bool have_hi1
= False
;
3172 Bool have_range
= False
;
3173 Bool hiIsRelative
= False
;
3177 UInt caller_fndn_ix
= 0;
3178 Int caller_lineno
= 0;
3179 Int inlinedfn_abstract_origin
= 0;
3180 // 0 will be interpreted as no abstract origin by get_inlFnName
3184 DW_AT attr
= (DW_AT
) abbv
->nf
[nf_i
].at_name
;
3185 DW_FORM form
= (DW_FORM
)abbv
->nf
[nf_i
].at_form
;
3186 const name_form
*nf
= &abbv
->nf
[nf_i
];
3188 if (attr
== 0 && form
== 0) break;
3189 get_Form_contents( &cts
, cc
, c_die
, False
/*td3*/, nf
);
3190 if (attr
== DW_AT_call_file
&& cts
.szB
> 0) {
3191 Int ftabIx
= (Int
)cts
.u
.val
;
3193 && ftabIx
< VG_(sizeXA
)( parser
->fndn_ix_Table
)) {
3194 caller_fndn_ix
= *(UInt
*)
3195 VG_(indexXA
)( parser
->fndn_ix_Table
, ftabIx
);
3197 if (0) VG_(printf
)("XXX caller_fndn_ix = %u %s\n", caller_fndn_ix
,
3198 ML_(fndn_ix2filename
) (cc
->di
, caller_fndn_ix
));
3200 if (attr
== DW_AT_call_line
&& cts
.szB
> 0) {
3201 caller_lineno
= cts
.u
.val
;
3204 if (attr
== DW_AT_abstract_origin
&& cts
.szB
> 0) {
3205 inlinedfn_abstract_origin
3206 = cook_die_using_form (cc
, (UWord
)cts
.u
.val
, form
);
3209 if (attr
== DW_AT_low_pc
&& cts
.szB
> 0) {
3213 if (attr
== DW_AT_high_pc
&& cts
.szB
> 0) {
3216 if (form
!= DW_FORM_addr
)
3217 hiIsRelative
= True
;
3219 if (attr
== DW_AT_ranges
&& cts
.szB
> 0) {
3220 rangeoff
= cts
.u
.val
;
3223 if (attr
== DW_AT_sibling
&& cts
.szB
> 0) {
3224 parser
->sibling
= cts
.u
.val
;
3227 if (have_lo
&& have_hi1
&& hiIsRelative
)
3229 /* Do we have something that looks sane? */
3230 if (dtag
== DW_TAG_inlined_subroutine
3231 && (!have_lo
) && (!have_hi1
) && (!have_range
)) {
3232 /* Seems strange. How can an inlined subroutine have
3236 if (have_lo
&& have_hi1
&& (!have_range
)) {
3237 /* This inlined call is just a single address range. */
3238 if (ip_lo
< ip_hi1
) {
3239 /* Apply text debug biasing */
3240 ip_lo
+= cc
->di
->text_debug_bias
;
3241 ip_hi1
+= cc
->di
->text_debug_bias
;
3242 ML_(addInlInfo
) (cc
->di
,
3244 get_inlFnName (inlinedfn_abstract_origin
, cc
, td3
),
3246 caller_lineno
, level
);
3248 } else if (have_range
) {
3249 /* This inlined call is several address ranges. */
3252 const HChar
*inlfnname
=
3253 get_inlFnName (inlinedfn_abstract_origin
, cc
, td3
);
3255 /* Ranges are biased for the inline info using the same logic
3256 as what is used for biasing ranges for the var info, for which
3257 ranges are read using cc->cu_svma (see parse_var_DIE).
3258 Then text_debug_bias is added when a (non global) var
3259 is recorded (see just before the call to ML_(addVar)) */
3260 ranges
= get_range_list( cc
, td3
,
3261 rangeoff
, cc
->cu_svma
);
3262 for (j
= 0; j
< VG_(sizeXA
)( ranges
); j
++) {
3263 AddrRange
* range
= (AddrRange
*) VG_(indexXA
)( ranges
, j
);
3264 ML_(addInlInfo
) (cc
->di
,
3265 range
->aMin
+ cc
->di
->text_debug_bias
,
3266 range
->aMax
+1 + cc
->di
->text_debug_bias
,
3267 // aMax+1 as range has its last bound included
3268 // while ML_(addInlInfo) expects last bound not
3272 caller_lineno
, level
);
3274 VG_(deleteXA
)( ranges
);
3279 // Only recursively parse the (possible) children for the DIE which
3280 // might maybe contain a DW_TAG_inlined_subroutine:
3281 return dtag
== DW_TAG_lexical_block
|| dtag
== DW_TAG_subprogram
3282 || dtag
== DW_TAG_inlined_subroutine
3283 || dtag
== DW_TAG_compile_unit
|| dtag
== DW_TAG_partial_unit
;
3286 dump_bad_die_and_barf("parse_inl_DIE", dtag
, posn
, level
,
3287 c_die
, saved_die_c_offset
,
3294 /*------------------------------------------------------------*/
3296 /*--- Parsing of type-related DIEs ---*/
3298 /*------------------------------------------------------------*/
3302 /* What source language? 'A'=Ada83/95,
3306 Established once per compilation unit. */
3308 /* A stack of types which are currently under construction */
3309 Int sp
; /* [sp] is innermost active entry; sp==-1 for empty
3312 /* Note that the TyEnts in qparentE are temporary copies of the
3313 ones accumulating in the main tyent array. So it is not safe
3314 to free up anything on them when popping them off the stack
3315 (iow, it isn't safe to use TyEnt__make_EMPTY on them). Just
3316 memset them to zero when done. */
3317 TyEnt
*qparentE
; /* parent TyEnts */
3322 /* Completely initialise a type parser object */
3324 type_parser_init ( D3TypeParser
*parser
)
3327 parser
->language
= '?';
3328 parser
->stack_size
= 0;
3329 parser
->qparentE
= NULL
;
3330 parser
->qlevel
= NULL
;
3333 /* Release any memory hanging off a type parser object */
3335 type_parser_release ( D3TypeParser
*parser
)
3337 ML_(dinfo_free
)( parser
->qparentE
);
3338 ML_(dinfo_free
)( parser
->qlevel
);
3341 static void typestack_show ( const D3TypeParser
* parser
, const HChar
* str
)
3344 VG_(printf
)(" typestack (%s) {\n", str
);
3345 for (i
= 0; i
<= parser
->sp
; i
++) {
3346 VG_(printf
)(" [%ld] (level %d): ", i
, parser
->qlevel
[i
]);
3347 ML_(pp_TyEnt
)( &parser
->qparentE
[i
] );
3350 VG_(printf
)(" }\n");
3353 /* Remove from the stack, all entries with .level > 'level' */
3355 void typestack_preen ( D3TypeParser
* parser
, Bool td3
, Int level
)
3357 Bool changed
= False
;
3358 vg_assert(parser
->sp
< parser
->stack_size
);
3360 vg_assert(parser
->sp
>= -1);
3361 if (parser
->sp
== -1) break;
3362 if (parser
->qlevel
[parser
->sp
] <= level
) break;
3364 TRACE_D3("BBBBAAAA typestack_pop [newsp=%d]\n", parser
->sp
-1);
3365 vg_assert(ML_(TyEnt__is_type
)(&parser
->qparentE
[parser
->sp
]));
3370 typestack_show( parser
, "after preen" );
3373 static Bool
typestack_is_empty ( const D3TypeParser
* parser
)
3375 vg_assert(parser
->sp
>= -1 && parser
->sp
< parser
->stack_size
);
3376 return parser
->sp
== -1;
3379 static void typestack_push ( const CUConst
* cc
,
3380 D3TypeParser
* parser
,
3382 const TyEnt
* parentE
, Int level
)
3385 TRACE_D3("BBBBAAAA typestack_push[newsp=%d]: %d %05lx\n",
3386 parser
->sp
+1, level
, parentE
->cuOff
);
3388 /* First we need to zap everything >= 'level', as we are about to
3389 replace any previous entry at 'level', so .. */
3390 typestack_preen(parser
, /*td3*/False
, level
-1);
3392 vg_assert(parser
->sp
>= -1);
3393 vg_assert(parser
->sp
< parser
->stack_size
);
3394 if (parser
->sp
== parser
->stack_size
- 1) {
3395 parser
->stack_size
+= 16;
3397 ML_(dinfo_realloc
)("di.readdwarf3.typush.1", parser
->qparentE
,
3398 parser
->stack_size
* sizeof parser
->qparentE
[0]);
3400 ML_(dinfo_realloc
)("di.readdwarf3.typush.2", parser
->qlevel
,
3401 parser
->stack_size
* sizeof parser
->qlevel
[0]);
3403 if (parser
->sp
>= 0)
3404 vg_assert(parser
->qlevel
[parser
->sp
] < level
);
3407 vg_assert(ML_(TyEnt__is_type
)(parentE
));
3408 vg_assert(parentE
->cuOff
!= D3_INVALID_CUOFF
);
3409 parser
->qparentE
[parser
->sp
] = *parentE
;
3410 parser
->qlevel
[parser
->sp
] = level
;
3412 typestack_show( parser
, "after push" );
3415 /* True if the subrange type being parsed gives the bounds of an array. */
3416 static Bool
subrange_type_denotes_array_bounds ( const D3TypeParser
* parser
,
3418 vg_assert(dtag
== DW_TAG_subrange_type
);
3419 /* For most languages, a subrange_type dtag always gives the
3421 For Ada, there are additional conditions as a subrange_type
3422 is also used for other purposes. */
3423 if (parser
->language
!= 'A')
3424 /* not Ada, so it definitely denotes an array bound. */
3427 /* Extra constraints for Ada: it only denotes an array bound if .. */
3428 return (! typestack_is_empty(parser
)
3429 && parser
->qparentE
[parser
->sp
].tag
== Te_TyArray
);
3432 /* True if the form is one of the forms supported to give an array bound.
3433 For some arrays (scope local arrays with variable size),
3434 a DW_FORM_ref4 was used, and was wrongly used as the bound value.
3435 So, refuse the forms that are known to give a problem. */
3436 static Bool
form_expected_for_bound ( DW_FORM form
) {
3437 if (form
== DW_FORM_ref1
3438 || form
== DW_FORM_ref2
3439 || form
== DW_FORM_ref4
3440 || form
== DW_FORM_ref8
)
3446 /* Parse a type-related DIE. 'parser' holds the current parser state.
3447 'admin' is where the completed types are dumped. 'dtag' is the tag
3448 for this DIE. 'c_die' points to the start of the data fields (FORM
3449 stuff) for the DIE. abbv is the parsed abbreviation which describe
3452 We may find the DIE uninteresting, in which case we should ignore
3455 What happens: the DIE is examined. If uninteresting, it is ignored.
3456 Otherwise, the DIE gives rise to two things:
3458 (1) the offset of this DIE in the CU -- the cuOffset, a UWord
3459 (2) a TyAdmin structure, which holds the type, or related stuff
3461 (2) is added at the end of 'tyadmins', at some index, say 'i'.
3463 A pair (cuOffset, i) is added to 'tydict'.
3465 Hence 'tyadmins' holds the actual type entities, and 'tydict' holds
3466 a mapping from cuOffset to the index of the corresponding entry in
3469 When resolving a cuOffset to a TyAdmin, first look up the cuOffset
3470 in the tydict (by binary search). This gives an index into
3471 tyadmins, and the required entity lives in tyadmins at that index.
3473 __attribute__((noinline
))
3474 static void parse_type_DIE ( /*MOD*/XArray
* /* of TyEnt */ tyents
,
3475 /*MOD*/D3TypeParser
* parser
,
3491 UWord saved_die_c_offset
= get_position_of_Cursor( c_die
);
3493 VG_(memset
)( &typeE
, 0xAA, sizeof(typeE
) );
3494 VG_(memset
)( &atomE
, 0xAA, sizeof(atomE
) );
3495 VG_(memset
)( &fieldE
, 0xAA, sizeof(fieldE
) );
3496 VG_(memset
)( &boundE
, 0xAA, sizeof(boundE
) );
3498 /* If we've returned to a level at or above any previously noted
3499 parent, un-note it, so we don't believe we're still collecting
3501 typestack_preen( parser
, td3
, level
-1 );
3503 if (dtag
== DW_TAG_compile_unit
3504 || dtag
== DW_TAG_type_unit
3505 || dtag
== DW_TAG_partial_unit
) {
3506 /* See if we can find DW_AT_language, since it is important for
3507 establishing array bounds (see DW_TAG_subrange_type below in
3511 DW_AT attr
= (DW_AT
) abbv
->nf
[nf_i
].at_name
;
3512 DW_FORM form
= (DW_FORM
)abbv
->nf
[nf_i
].at_form
;
3513 const name_form
*nf
= &abbv
->nf
[nf_i
];
3515 if (attr
== 0 && form
== 0) break;
3516 get_Form_contents( &cts
, cc
, c_die
, False
/*td3*/, nf
);
3517 if (attr
!= DW_AT_language
)
3521 switch (cts
.u
.val
) {
3522 case DW_LANG_C89
: case DW_LANG_C
:
3523 case DW_LANG_C_plus_plus
: case DW_LANG_ObjC
:
3524 case DW_LANG_ObjC_plus_plus
: case DW_LANG_UPC
:
3525 case DW_LANG_Upc
: case DW_LANG_C99
: case DW_LANG_C11
:
3526 case DW_LANG_C_plus_plus_11
: case DW_LANG_C_plus_plus_14
:
3527 parser
->language
= 'C'; break;
3528 case DW_LANG_Fortran77
: case DW_LANG_Fortran90
:
3529 case DW_LANG_Fortran95
: case DW_LANG_Fortran03
:
3530 case DW_LANG_Fortran08
:
3531 parser
->language
= 'F'; break;
3532 case DW_LANG_Ada83
: case DW_LANG_Ada95
:
3533 parser
->language
= 'A'; break;
3534 case DW_LANG_Cobol74
:
3535 case DW_LANG_Cobol85
: case DW_LANG_Pascal83
:
3536 case DW_LANG_Modula2
: case DW_LANG_Java
:
3538 case DW_LANG_D
: case DW_LANG_Python
: case DW_LANG_Go
:
3539 case DW_LANG_Mips_Assembler
:
3540 parser
->language
= '?'; break;
3547 if (dtag
== DW_TAG_base_type
) {
3548 /* We can pick up a new base type any time. */
3549 VG_(memset
)(&typeE
, 0, sizeof(typeE
));
3550 typeE
.cuOff
= D3_INVALID_CUOFF
;
3551 typeE
.tag
= Te_TyBase
;
3554 DW_AT attr
= (DW_AT
) abbv
->nf
[nf_i
].at_name
;
3555 DW_FORM form
= (DW_FORM
)abbv
->nf
[nf_i
].at_form
;
3556 const name_form
*nf
= &abbv
->nf
[nf_i
];
3558 if (attr
== 0 && form
== 0) break;
3559 get_Form_contents( &cts
, cc
, c_die
, False
/*td3*/, nf
);
3560 if (attr
== DW_AT_name
&& cts
.szB
< 0) {
3561 typeE
.Te
.TyBase
.name
3562 = ML_(cur_read_strdup
)( cts
.u
.cur
,
3563 "di.readdwarf3.ptD.base_type.1" );
3565 if (attr
== DW_AT_byte_size
&& cts
.szB
> 0) {
3566 typeE
.Te
.TyBase
.szB
= cts
.u
.val
;
3568 if (attr
== DW_AT_encoding
&& cts
.szB
> 0) {
3569 switch (cts
.u
.val
) {
3570 case DW_ATE_unsigned
: case DW_ATE_unsigned_char
:
3571 case DW_ATE_UTF
: /* since DWARF4, e.g. char16_t from C++ */
3572 case DW_ATE_boolean
:/* FIXME - is this correct? */
3573 case DW_ATE_unsigned_fixed
:
3574 typeE
.Te
.TyBase
.enc
= 'U'; break;
3575 case DW_ATE_signed
: case DW_ATE_signed_char
:
3576 case DW_ATE_signed_fixed
:
3577 typeE
.Te
.TyBase
.enc
= 'S'; break;
3579 typeE
.Te
.TyBase
.enc
= 'F'; break;
3580 case DW_ATE_complex_float
:
3581 typeE
.Te
.TyBase
.enc
= 'C'; break;
3588 /* Invent a name if it doesn't have one. gcc-4.3
3589 -ftree-vectorize is observed to emit nameless base types. */
3590 if (!typeE
.Te
.TyBase
.name
)
3591 typeE
.Te
.TyBase
.name
3592 = ML_(dinfo_strdup
)( "di.readdwarf3.ptD.base_type.2",
3593 "<anon_base_type>" );
3595 /* Do we have something that looks sane? */
3596 if (/* must have a name */
3597 typeE
.Te
.TyBase
.name
== NULL
3598 /* and a plausible size. Yes, really 32: "complex long
3599 double" apparently has size=32 */
3600 || typeE
.Te
.TyBase
.szB
< 0 || typeE
.Te
.TyBase
.szB
> 32
3601 /* and a plausible encoding */
3602 || (typeE
.Te
.TyBase
.enc
!= 'U'
3603 && typeE
.Te
.TyBase
.enc
!= 'S'
3604 && typeE
.Te
.TyBase
.enc
!= 'F'
3605 && typeE
.Te
.TyBase
.enc
!= 'C'))
3607 /* Last minute hack: if we see this
3608 <1><515>: DW_TAG_base_type
3612 convert it into a real Void type. */
3613 if (typeE
.Te
.TyBase
.szB
== 0
3614 && 0 == VG_(strcmp
)("void", typeE
.Te
.TyBase
.name
)) {
3615 ML_(TyEnt__make_EMPTY
)(&typeE
);
3616 typeE
.tag
= Te_TyVoid
;
3617 typeE
.Te
.TyVoid
.isFake
= False
; /* it's a real one! */
3624 * An example of DW_TAG_rvalue_reference_type:
3626 * $ readelf --debug-dump /usr/lib/debug/usr/lib/libstdc++.so.6.0.16.debug
3627 * <1><1014>: Abbrev Number: 55 (DW_TAG_rvalue_reference_type)
3628 * <1015> DW_AT_byte_size : 4
3629 * <1016> DW_AT_type : <0xe52>
3631 if (dtag
== DW_TAG_pointer_type
|| dtag
== DW_TAG_reference_type
3632 || dtag
== DW_TAG_ptr_to_member_type
3633 || dtag
== DW_TAG_rvalue_reference_type
) {
3634 /* This seems legit for _pointer_type and _reference_type. I
3635 don't know if rolling _ptr_to_member_type in here really is
3636 legit, but it's better than not handling it at all. */
3637 VG_(memset
)(&typeE
, 0, sizeof(typeE
));
3638 typeE
.cuOff
= D3_INVALID_CUOFF
;
3640 case DW_TAG_pointer_type
:
3641 typeE
.tag
= Te_TyPtr
;
3643 case DW_TAG_reference_type
:
3644 typeE
.tag
= Te_TyRef
;
3646 case DW_TAG_ptr_to_member_type
:
3647 typeE
.tag
= Te_TyPtrMbr
;
3649 case DW_TAG_rvalue_reference_type
:
3650 typeE
.tag
= Te_TyRvalRef
;
3655 /* target type defaults to void */
3656 typeE
.Te
.TyPorR
.typeR
= D3_FAKEVOID_CUOFF
;
3657 /* These four type kinds don't *have* to specify their size, in
3658 which case we assume it's a machine word. But if they do
3659 specify it, it must be a machine word :-) This probably
3660 assumes that the word size of the Dwarf3 we're reading is the
3661 same size as that on the machine. gcc appears to give a size
3662 whereas icc9 doesn't. */
3663 typeE
.Te
.TyPorR
.szB
= sizeof(UWord
);
3666 DW_AT attr
= (DW_AT
) abbv
->nf
[nf_i
].at_name
;
3667 DW_FORM form
= (DW_FORM
)abbv
->nf
[nf_i
].at_form
;
3668 const name_form
*nf
= &abbv
->nf
[nf_i
];
3670 if (attr
== 0 && form
== 0) break;
3671 get_Form_contents( &cts
, cc
, c_die
, False
/*td3*/, nf
);
3672 if (attr
== DW_AT_byte_size
&& cts
.szB
> 0) {
3673 typeE
.Te
.TyPorR
.szB
= cts
.u
.val
;
3675 if (attr
== DW_AT_type
&& cts
.szB
> 0) {
3676 typeE
.Te
.TyPorR
.typeR
3677 = cook_die_using_form( cc
, (UWord
)cts
.u
.val
, form
);
3680 /* Do we have something that looks sane? */
3681 if (typeE
.Te
.TyPorR
.szB
!= sizeof(UWord
))
3687 if (dtag
== DW_TAG_enumeration_type
) {
3688 /* Create a new Type to hold the results. */
3689 VG_(memset
)(&typeE
, 0, sizeof(typeE
));
3691 typeE
.tag
= Te_TyEnum
;
3692 Bool is_decl
= False
;
3693 typeE
.Te
.TyEnum
.atomRs
3694 = VG_(newXA
)( ML_(dinfo_zalloc
), "di.readdwarf3.ptD.enum_type.1",
3699 DW_AT attr
= (DW_AT
) abbv
->nf
[nf_i
].at_name
;
3700 DW_FORM form
= (DW_FORM
)abbv
->nf
[nf_i
].at_form
;
3701 const name_form
*nf
= &abbv
->nf
[nf_i
];
3703 if (attr
== 0 && form
== 0) break;
3704 get_Form_contents( &cts
, cc
, c_die
, False
/*td3*/, nf
);
3705 if (attr
== DW_AT_name
&& cts
.szB
< 0) {
3706 typeE
.Te
.TyEnum
.name
3707 = ML_(cur_read_strdup
)( cts
.u
.cur
,
3708 "di.readdwarf3.pTD.enum_type.2" );
3710 if (attr
== DW_AT_byte_size
&& cts
.szB
> 0) {
3711 typeE
.Te
.TyEnum
.szB
= cts
.u
.val
;
3713 if (attr
== DW_AT_declaration
) {
3718 if (!typeE
.Te
.TyEnum
.name
)
3719 typeE
.Te
.TyEnum
.name
3720 = ML_(dinfo_strdup
)( "di.readdwarf3.pTD.enum_type.3",
3721 "<anon_enum_type>" );
3723 /* Do we have something that looks sane? */
3724 if (typeE
.Te
.TyEnum
.szB
== 0
3725 /* we must know the size */
3726 /* but not for Ada, which uses such dummy
3727 enumerations as helper for gdb ada mode.
3728 Also GCC allows incomplete enums as GNU extension.
3729 http://gcc.gnu.org/onlinedocs/gcc/Incomplete-Enums.html
3730 These are marked as DW_AT_declaration and won't have
3731 a size. They can only be used in declaration or as
3732 pointer types. You can't allocate variables or storage
3733 using such an enum type. (Also GCC seems to have a bug
3734 that will put such an enumeration_type into a .debug_types
3735 unit which should only contain complete types.) */
3736 && (parser
->language
!= 'A' && !is_decl
)) {
3741 typestack_push( cc
, parser
, td3
, &typeE
, level
);
3745 /* gcc (GCC) 4.4.0 20081017 (experimental) occasionally produces
3746 DW_TAG_enumerator with only a DW_AT_name but no
3747 DW_AT_const_value. This is in violation of the Dwarf3 standard,
3748 and appears to be a new "feature" of gcc - versions 4.3.x and
3749 earlier do not appear to do this. So accept DW_TAG_enumerator
3750 which only have a name but no value. An example:
3752 <1><180>: Abbrev Number: 6 (DW_TAG_enumeration_type)
3753 <181> DW_AT_name : (indirect string, offset: 0xda70):
3755 <185> DW_AT_byte_size : 4
3756 <186> DW_AT_decl_file : 14
3757 <187> DW_AT_decl_line : 1480
3758 <189> DW_AT_sibling : <0x1a7>
3759 <2><18d>: Abbrev Number: 7 (DW_TAG_enumerator)
3760 <18e> DW_AT_name : (indirect string, offset: 0x9e18):
3762 <2><192>: Abbrev Number: 7 (DW_TAG_enumerator)
3763 <193> DW_AT_name : (indirect string, offset: 0x1505f):
3765 <2><197>: Abbrev Number: 7 (DW_TAG_enumerator)
3766 <198> DW_AT_name : (indirect string, offset: 0x16f4a):
3768 <2><19c>: Abbrev Number: 7 (DW_TAG_enumerator)
3769 <19d> DW_AT_name : (indirect string, offset: 0x156dd):
3771 <2><1a1>: Abbrev Number: 7 (DW_TAG_enumerator)
3772 <1a2> DW_AT_name : (indirect string, offset: 0x13660):
3775 if (dtag
== DW_TAG_enumerator
) {
3776 VG_(memset
)( &atomE
, 0, sizeof(atomE
) );
3778 atomE
.tag
= Te_Atom
;
3781 DW_AT attr
= (DW_AT
) abbv
->nf
[nf_i
].at_name
;
3782 DW_FORM form
= (DW_FORM
)abbv
->nf
[nf_i
].at_form
;
3783 const name_form
*nf
= &abbv
->nf
[nf_i
];
3785 if (attr
== 0 && form
== 0) break;
3786 get_Form_contents( &cts
, cc
, c_die
, False
/*td3*/, nf
);
3787 if (attr
== DW_AT_name
&& cts
.szB
< 0) {
3789 = ML_(cur_read_strdup
)( cts
.u
.cur
,
3790 "di.readdwarf3.pTD.enumerator.1" );
3792 if (attr
== DW_AT_const_value
&& cts
.szB
> 0) {
3793 atomE
.Te
.Atom
.value
= cts
.u
.val
;
3794 atomE
.Te
.Atom
.valueKnown
= True
;
3797 /* Do we have something that looks sane? */
3798 if (atomE
.Te
.Atom
.name
== NULL
)
3800 /* Do we have a plausible parent? */
3801 if (typestack_is_empty(parser
)) goto_bad_DIE
;
3802 vg_assert(ML_(TyEnt__is_type
)(&parser
->qparentE
[parser
->sp
]));
3803 vg_assert(parser
->qparentE
[parser
->sp
].cuOff
!= D3_INVALID_CUOFF
);
3804 if (level
!= parser
->qlevel
[parser
->sp
]+1) goto_bad_DIE
;
3805 if (parser
->qparentE
[parser
->sp
].tag
!= Te_TyEnum
) goto_bad_DIE
;
3806 /* Record this child in the parent */
3807 vg_assert(parser
->qparentE
[parser
->sp
].Te
.TyEnum
.atomRs
);
3808 VG_(addToXA
)( parser
->qparentE
[parser
->sp
].Te
.TyEnum
.atomRs
,
3810 /* And record the child itself */
3814 /* Treat DW_TAG_class_type as if it was a DW_TAG_structure_type. I
3815 don't know if this is correct, but it at least makes this reader
3816 usable for gcc-4.3 produced Dwarf3. */
3817 if (dtag
== DW_TAG_structure_type
|| dtag
== DW_TAG_class_type
3818 || dtag
== DW_TAG_union_type
) {
3819 Bool have_szB
= False
;
3820 Bool is_decl
= False
;
3821 Bool is_spec
= False
;
3822 /* Create a new Type to hold the results. */
3823 VG_(memset
)(&typeE
, 0, sizeof(typeE
));
3825 typeE
.tag
= Te_TyStOrUn
;
3826 typeE
.Te
.TyStOrUn
.name
= NULL
;
3827 typeE
.Te
.TyStOrUn
.typeR
= D3_INVALID_CUOFF
;
3828 typeE
.Te
.TyStOrUn
.fieldRs
3829 = VG_(newXA
)( ML_(dinfo_zalloc
), "di.readdwarf3.pTD.struct_type.1",
3832 typeE
.Te
.TyStOrUn
.complete
= True
;
3833 typeE
.Te
.TyStOrUn
.isStruct
= dtag
== DW_TAG_structure_type
3834 || dtag
== DW_TAG_class_type
;
3837 DW_AT attr
= (DW_AT
) abbv
->nf
[nf_i
].at_name
;
3838 DW_FORM form
= (DW_FORM
)abbv
->nf
[nf_i
].at_form
;
3839 const name_form
*nf
= &abbv
->nf
[nf_i
];
3841 if (attr
== 0 && form
== 0) break;
3842 get_Form_contents( &cts
, cc
, c_die
, False
/*td3*/, nf
);
3843 if (attr
== DW_AT_name
&& cts
.szB
< 0) {
3844 typeE
.Te
.TyStOrUn
.name
3845 = ML_(cur_read_strdup
)( cts
.u
.cur
,
3846 "di.readdwarf3.ptD.struct_type.2" );
3848 if (attr
== DW_AT_byte_size
&& cts
.szB
>= 0) {
3849 typeE
.Te
.TyStOrUn
.szB
= cts
.u
.val
;
3852 if (attr
== DW_AT_declaration
&& cts
.szB
> 0 && cts
.u
.val
> 0) {
3855 if (attr
== DW_AT_specification
&& cts
.szB
> 0 && cts
.u
.val
> 0) {
3858 if (attr
== DW_AT_signature
&& form
== DW_FORM_ref_sig8
3861 typeE
.Te
.TyStOrUn
.szB
= 8;
3862 typeE
.Te
.TyStOrUn
.typeR
3863 = cook_die_using_form( cc
, (UWord
)cts
.u
.val
, form
);
3866 /* Do we have something that looks sane? */
3867 if (is_decl
&& (!is_spec
)) {
3868 /* It's a DW_AT_declaration. We require the name but
3870 /* JRS 2012-06-28: following discussion w/ tromey, if the
3871 type doesn't have name, just make one up, and accept it.
3872 It might be referred to by other DIEs, so ignoring it
3873 doesn't seem like a safe option. */
3874 if (typeE
.Te
.TyStOrUn
.name
== NULL
)
3875 typeE
.Te
.TyStOrUn
.name
3876 = ML_(dinfo_strdup
)( "di.readdwarf3.ptD.struct_type.3",
3877 "<anon_struct_type>" );
3878 typeE
.Te
.TyStOrUn
.complete
= False
;
3879 /* JRS 2009 Aug 10: <possible kludge>? */
3880 /* Push this tyent on the stack, even though it's incomplete.
3881 It appears that gcc-4.4 on Fedora 11 will sometimes create
3882 DW_TAG_member entries for it, and so we need to have a
3883 plausible parent present in order for that to work. See
3884 #200029 comments 8 and 9. */
3885 typestack_push( cc
, parser
, td3
, &typeE
, level
);
3886 /* </possible kludge> */
3889 if ((!is_decl
) /* && (!is_spec) */) {
3890 /* this is the common, ordinary case */
3891 /* The name can be present, or not */
3893 /* We must know the size.
3894 But in Ada, record with discriminants might have no size.
3895 But in C, VLA in the middle of a struct (gcc extension)
3897 Instead, some GNAT dwarf extensions and/or dwarf entries
3898 allow to calculate the struct size at runtime.
3899 We cannot do that (yet?) so, the temporary kludge is to use
3901 typeE
.Te
.TyStOrUn
.szB
= 1;
3904 typestack_push( cc
, parser
, td3
, &typeE
, level
);
3908 /* don't know how to handle any other variants just now */
3913 if (dtag
== DW_TAG_member
) {
3914 /* Acquire member entries for both DW_TAG_structure_type and
3915 DW_TAG_union_type. They differ minorly, in that struct
3916 members must have a DW_AT_data_member_location expression
3917 whereas union members must not. */
3918 Bool parent_is_struct
;
3919 VG_(memset
)( &fieldE
, 0, sizeof(fieldE
) );
3920 fieldE
.cuOff
= posn
;
3921 fieldE
.tag
= Te_Field
;
3922 fieldE
.Te
.Field
.typeR
= D3_INVALID_CUOFF
;
3925 DW_AT attr
= (DW_AT
) abbv
->nf
[nf_i
].at_name
;
3926 DW_FORM form
= (DW_FORM
)abbv
->nf
[nf_i
].at_form
;
3927 const name_form
*nf
= &abbv
->nf
[nf_i
];
3929 if (attr
== 0 && form
== 0) break;
3930 get_Form_contents( &cts
, cc
, c_die
, False
/*td3*/, nf
);
3931 if (attr
== DW_AT_name
&& cts
.szB
< 0) {
3932 fieldE
.Te
.Field
.name
3933 = ML_(cur_read_strdup
)( cts
.u
.cur
,
3934 "di.readdwarf3.ptD.member.1" );
3936 if (attr
== DW_AT_type
&& cts
.szB
> 0) {
3937 fieldE
.Te
.Field
.typeR
3938 = cook_die_using_form( cc
, (UWord
)cts
.u
.val
, form
);
3940 /* There are 2 different cases for DW_AT_data_member_location.
3941 If it is a constant class attribute, it contains byte offset
3942 from the beginning of the containing entity.
3943 Otherwise it is a location expression. */
3944 if (attr
== DW_AT_data_member_location
&& cts
.szB
> 0) {
3945 fieldE
.Te
.Field
.nLoc
= -1;
3946 fieldE
.Te
.Field
.pos
.offset
= cts
.u
.val
;
3948 if (attr
== DW_AT_data_member_location
&& cts
.szB
<= 0) {
3949 fieldE
.Te
.Field
.nLoc
= (UWord
)(-cts
.szB
);
3950 fieldE
.Te
.Field
.pos
.loc
3951 = ML_(cur_read_memdup
)( cts
.u
.cur
,
3952 (SizeT
)fieldE
.Te
.Field
.nLoc
,
3953 "di.readdwarf3.ptD.member.2" );
3956 /* Do we have a plausible parent? */
3957 if (typestack_is_empty(parser
)) goto_bad_DIE
;
3958 vg_assert(ML_(TyEnt__is_type
)(&parser
->qparentE
[parser
->sp
]));
3959 vg_assert(parser
->qparentE
[parser
->sp
].cuOff
!= D3_INVALID_CUOFF
);
3960 if (level
!= parser
->qlevel
[parser
->sp
]+1) goto_bad_DIE
;
3961 if (parser
->qparentE
[parser
->sp
].tag
!= Te_TyStOrUn
) goto_bad_DIE
;
3962 /* Do we have something that looks sane? If this a member of a
3963 struct, we must have a location expression; but if a member
3964 of a union that is irrelevant (D3 spec sec 5.6.6). We ought
3965 to reject in the latter case, but some compilers have been
3966 observed to emit constant-zero expressions. So just ignore
3969 = parser
->qparentE
[parser
->sp
].Te
.TyStOrUn
.isStruct
;
3970 if (!fieldE
.Te
.Field
.name
)
3971 fieldE
.Te
.Field
.name
3972 = ML_(dinfo_strdup
)( "di.readdwarf3.ptD.member.3",
3974 if (fieldE
.Te
.Field
.typeR
== D3_INVALID_CUOFF
)
3976 if (fieldE
.Te
.Field
.nLoc
) {
3977 if (!parent_is_struct
) {
3978 /* If this is a union type, pretend we haven't seen the data
3979 member location expression, as it is by definition
3980 redundant (it must be zero). */
3981 if (fieldE
.Te
.Field
.nLoc
> 0)
3982 ML_(dinfo_free
)(fieldE
.Te
.Field
.pos
.loc
);
3983 fieldE
.Te
.Field
.pos
.loc
= NULL
;
3984 fieldE
.Te
.Field
.nLoc
= 0;
3986 /* Record this child in the parent */
3987 fieldE
.Te
.Field
.isStruct
= parent_is_struct
;
3988 vg_assert(parser
->qparentE
[parser
->sp
].Te
.TyStOrUn
.fieldRs
);
3989 VG_(addToXA
)( parser
->qparentE
[parser
->sp
].Te
.TyStOrUn
.fieldRs
,
3991 /* And record the child itself */
3994 /* Member with no location - this can happen with static
3995 const members in C++ code which are compile time constants
3996 that do no exist in the class. They're not of any interest
3997 to us so we ignore them. */
3998 ML_(TyEnt__make_EMPTY
)(&fieldE
);
4002 if (dtag
== DW_TAG_array_type
) {
4003 VG_(memset
)(&typeE
, 0, sizeof(typeE
));
4005 typeE
.tag
= Te_TyArray
;
4006 typeE
.Te
.TyArray
.typeR
= D3_INVALID_CUOFF
;
4007 typeE
.Te
.TyArray
.boundRs
4008 = VG_(newXA
)( ML_(dinfo_zalloc
), "di.readdwarf3.ptD.array_type.1",
4013 DW_AT attr
= (DW_AT
) abbv
->nf
[nf_i
].at_name
;
4014 DW_FORM form
= (DW_FORM
)abbv
->nf
[nf_i
].at_form
;
4015 const name_form
*nf
= &abbv
->nf
[nf_i
];
4017 if (attr
== 0 && form
== 0) break;
4018 get_Form_contents( &cts
, cc
, c_die
, False
/*td3*/, nf
);
4019 if (attr
== DW_AT_type
&& cts
.szB
> 0) {
4020 typeE
.Te
.TyArray
.typeR
4021 = cook_die_using_form( cc
, (UWord
)cts
.u
.val
, form
);
4024 if (typeE
.Te
.TyArray
.typeR
== D3_INVALID_CUOFF
)
4027 typestack_push( cc
, parser
, td3
, &typeE
, level
);
4031 /* this is a subrange type defining the bounds of an array. */
4032 if (dtag
== DW_TAG_subrange_type
4033 && subrange_type_denotes_array_bounds(parser
, dtag
)) {
4034 Bool have_lower
= False
;
4035 Bool have_upper
= False
;
4036 Bool have_count
= False
;
4041 switch (parser
->language
) {
4042 case 'C': have_lower
= True
; lower
= 0; break;
4043 case 'F': have_lower
= True
; lower
= 1; break;
4044 case '?': have_lower
= False
; break;
4045 case 'A': have_lower
= False
; break;
4046 default: vg_assert(0); /* assured us by handling of
4047 DW_TAG_compile_unit in this fn */
4050 VG_(memset
)( &boundE
, 0, sizeof(boundE
) );
4051 boundE
.cuOff
= D3_INVALID_CUOFF
;
4052 boundE
.tag
= Te_Bound
;
4055 DW_AT attr
= (DW_AT
) abbv
->nf
[nf_i
].at_name
;
4056 DW_FORM form
= (DW_FORM
)abbv
->nf
[nf_i
].at_form
;
4057 const name_form
*nf
= &abbv
->nf
[nf_i
];
4059 if (attr
== 0 && form
== 0) break;
4060 get_Form_contents( &cts
, cc
, c_die
, False
/*td3*/, nf
);
4061 if (attr
== DW_AT_lower_bound
&& cts
.szB
> 0
4062 && form_expected_for_bound (form
)) {
4063 lower
= (Long
)cts
.u
.val
;
4066 if (attr
== DW_AT_upper_bound
&& cts
.szB
> 0
4067 && form_expected_for_bound (form
)) {
4068 upper
= (Long
)cts
.u
.val
;
4071 if (attr
== DW_AT_count
&& cts
.szB
> 0) {
4072 count
= (Long
)cts
.u
.val
;
4076 /* FIXME: potentially skip the rest if no parent present, since
4077 it could be the case that this subrange type is free-standing
4078 (not being used to describe the bounds of a containing array
4080 /* Do we have a plausible parent? */
4081 if (typestack_is_empty(parser
)) goto_bad_DIE
;
4082 vg_assert(ML_(TyEnt__is_type
)(&parser
->qparentE
[parser
->sp
]));
4083 vg_assert(parser
->qparentE
[parser
->sp
].cuOff
!= D3_INVALID_CUOFF
);
4084 if (level
!= parser
->qlevel
[parser
->sp
]+1) goto_bad_DIE
;
4085 if (parser
->qparentE
[parser
->sp
].tag
!= Te_TyArray
) goto_bad_DIE
;
4087 /* Figure out if we have a definite range or not */
4088 if (have_lower
&& have_upper
&& (!have_count
)) {
4089 boundE
.Te
.Bound
.knownL
= True
;
4090 boundE
.Te
.Bound
.knownU
= True
;
4091 boundE
.Te
.Bound
.boundL
= lower
;
4092 boundE
.Te
.Bound
.boundU
= upper
;
4094 else if (have_lower
&& (!have_upper
) && (!have_count
)) {
4095 boundE
.Te
.Bound
.knownL
= True
;
4096 boundE
.Te
.Bound
.knownU
= False
;
4097 boundE
.Te
.Bound
.boundL
= lower
;
4098 boundE
.Te
.Bound
.boundU
= 0;
4100 else if ((!have_lower
) && have_upper
&& (!have_count
)) {
4101 boundE
.Te
.Bound
.knownL
= False
;
4102 boundE
.Te
.Bound
.knownU
= True
;
4103 boundE
.Te
.Bound
.boundL
= 0;
4104 boundE
.Te
.Bound
.boundU
= upper
;
4106 else if ((!have_lower
) && (!have_upper
) && (!have_count
)) {
4107 boundE
.Te
.Bound
.knownL
= False
;
4108 boundE
.Te
.Bound
.knownU
= False
;
4109 boundE
.Te
.Bound
.boundL
= 0;
4110 boundE
.Te
.Bound
.boundU
= 0;
4111 } else if (have_lower
&& (!have_upper
) && (have_count
)) {
4112 boundE
.Te
.Bound
.knownL
= True
;
4113 boundE
.Te
.Bound
.knownU
= True
;
4114 boundE
.Te
.Bound
.boundL
= lower
;
4115 boundE
.Te
.Bound
.boundU
= lower
+ count
;
4117 /* FIXME: handle more cases */
4121 /* Record this bound in the parent */
4122 boundE
.cuOff
= posn
;
4123 vg_assert(parser
->qparentE
[parser
->sp
].Te
.TyArray
.boundRs
);
4124 VG_(addToXA
)( parser
->qparentE
[parser
->sp
].Te
.TyArray
.boundRs
,
4126 /* And record the child itself */
4130 /* typedef or subrange_type other than array bounds. */
4131 if (dtag
== DW_TAG_typedef
4132 || (dtag
== DW_TAG_subrange_type
4133 && !subrange_type_denotes_array_bounds(parser
, dtag
))) {
4134 /* subrange_type other than array bound is only for Ada. */
4135 vg_assert (dtag
== DW_TAG_typedef
|| parser
->language
== 'A');
4136 /* We can pick up a new typedef/subrange_type any time. */
4137 VG_(memset
)(&typeE
, 0, sizeof(typeE
));
4138 typeE
.cuOff
= D3_INVALID_CUOFF
;
4139 typeE
.tag
= Te_TyTyDef
;
4140 typeE
.Te
.TyTyDef
.name
= NULL
;
4141 typeE
.Te
.TyTyDef
.typeR
= D3_INVALID_CUOFF
;
4144 DW_AT attr
= (DW_AT
) abbv
->nf
[nf_i
].at_name
;
4145 DW_FORM form
= (DW_FORM
)abbv
->nf
[nf_i
].at_form
;
4146 const name_form
*nf
= &abbv
->nf
[nf_i
];
4148 if (attr
== 0 && form
== 0) break;
4149 get_Form_contents( &cts
, cc
, c_die
, False
/*td3*/, nf
);
4150 if (attr
== DW_AT_name
&& cts
.szB
< 0) {
4151 typeE
.Te
.TyTyDef
.name
4152 = ML_(cur_read_strdup
)( cts
.u
.cur
,
4153 "di.readdwarf3.ptD.typedef.1" );
4155 if (attr
== DW_AT_type
&& cts
.szB
> 0) {
4156 typeE
.Te
.TyTyDef
.typeR
4157 = cook_die_using_form( cc
, (UWord
)cts
.u
.val
, form
);
4160 /* Do we have something that looks sane?
4161 gcc gnat Ada generates minimal typedef
4163 <6><91cc>: DW_TAG_typedef
4164 DW_AT_abstract_ori: <9066>
4165 g++ for OMP can generate artificial functions that have
4166 parameters that refer to pointers to unnamed typedefs.
4167 See https://bugs.kde.org/show_bug.cgi?id=273475
4168 So we cannot require a name for a DW_TAG_typedef.
4173 if (dtag
== DW_TAG_subroutine_type
) {
4174 /* function type? just record that one fact and ask no
4175 further questions. */
4176 VG_(memset
)(&typeE
, 0, sizeof(typeE
));
4177 typeE
.cuOff
= D3_INVALID_CUOFF
;
4178 typeE
.tag
= Te_TyFn
;
4182 if (dtag
== DW_TAG_volatile_type
|| dtag
== DW_TAG_const_type
4183 || dtag
== DW_TAG_restrict_type
) {
4185 VG_(memset
)(&typeE
, 0, sizeof(typeE
));
4186 typeE
.cuOff
= D3_INVALID_CUOFF
;
4187 typeE
.tag
= Te_TyQual
;
4188 typeE
.Te
.TyQual
.qual
4189 = (dtag
== DW_TAG_volatile_type
? 'V'
4190 : (dtag
== DW_TAG_const_type
? 'C' : 'R'));
4191 /* target type defaults to 'void' */
4192 typeE
.Te
.TyQual
.typeR
= D3_FAKEVOID_CUOFF
;
4195 DW_AT attr
= (DW_AT
) abbv
->nf
[nf_i
].at_name
;
4196 DW_FORM form
= (DW_FORM
)abbv
->nf
[nf_i
].at_form
;
4197 const name_form
*nf
= &abbv
->nf
[nf_i
];
4199 if (attr
== 0 && form
== 0) break;
4200 get_Form_contents( &cts
, cc
, c_die
, False
/*td3*/, nf
);
4201 if (attr
== DW_AT_type
&& cts
.szB
> 0) {
4202 typeE
.Te
.TyQual
.typeR
4203 = cook_die_using_form( cc
, (UWord
)cts
.u
.val
, form
);
4207 /* gcc sometimes generates DW_TAG_const/volatile_type without
4208 DW_AT_type and GDB appears to interpret the type as 'const
4209 void' (resp. 'volatile void'). So just allow it .. */
4210 if (have_ty
== 1 || have_ty
== 0)
4217 * Treat DW_TAG_unspecified_type as type void. An example of DW_TAG_unspecified_type:
4219 * $ readelf --debug-dump /usr/lib/debug/usr/lib/libstdc++.so.6.0.16.debug
4220 * <1><10d4>: Abbrev Number: 53 (DW_TAG_unspecified_type)
4221 * <10d5> DW_AT_name : (indirect string, offset: 0xdb7): decltype(nullptr)
4223 if (dtag
== DW_TAG_unspecified_type
) {
4224 VG_(memset
)(&typeE
, 0, sizeof(typeE
));
4225 typeE
.cuOff
= D3_INVALID_CUOFF
;
4226 typeE
.tag
= Te_TyQual
;
4227 typeE
.Te
.TyQual
.typeR
= D3_FAKEVOID_CUOFF
;
4231 /* else ignore this DIE */
4236 if (0) VG_(printf
)("YYYY Acquire Type\n");
4237 vg_assert(ML_(TyEnt__is_type
)( &typeE
));
4238 vg_assert(typeE
.cuOff
== D3_INVALID_CUOFF
|| typeE
.cuOff
== posn
);
4240 VG_(addToXA
)( tyents
, &typeE
);
4245 if (0) VG_(printf
)("YYYY Acquire Atom\n");
4246 vg_assert(atomE
.tag
== Te_Atom
);
4247 vg_assert(atomE
.cuOff
== D3_INVALID_CUOFF
|| atomE
.cuOff
== posn
);
4249 VG_(addToXA
)( tyents
, &atomE
);
4254 /* For union members, Expr should be absent */
4255 if (0) VG_(printf
)("YYYY Acquire Field\n");
4256 vg_assert(fieldE
.tag
== Te_Field
);
4257 vg_assert(fieldE
.Te
.Field
.nLoc
<= 0 || fieldE
.Te
.Field
.pos
.loc
!= NULL
);
4258 vg_assert(fieldE
.Te
.Field
.nLoc
!= 0 || fieldE
.Te
.Field
.pos
.loc
== NULL
);
4259 if (fieldE
.Te
.Field
.isStruct
) {
4260 vg_assert(fieldE
.Te
.Field
.nLoc
!= 0);
4262 vg_assert(fieldE
.Te
.Field
.nLoc
== 0);
4264 vg_assert(fieldE
.cuOff
== D3_INVALID_CUOFF
|| fieldE
.cuOff
== posn
);
4265 fieldE
.cuOff
= posn
;
4266 VG_(addToXA
)( tyents
, &fieldE
);
4271 if (0) VG_(printf
)("YYYY Acquire Bound\n");
4272 vg_assert(boundE
.tag
== Te_Bound
);
4273 vg_assert(boundE
.cuOff
== D3_INVALID_CUOFF
|| boundE
.cuOff
== posn
);
4274 boundE
.cuOff
= posn
;
4275 VG_(addToXA
)( tyents
, &boundE
);
4280 dump_bad_die_and_barf("parse_type_DIE", dtag
, posn
, level
,
4281 c_die
, saved_die_c_offset
,
4288 /*------------------------------------------------------------*/
4290 /*--- Compression of type DIE information ---*/
4292 /*------------------------------------------------------------*/
4294 static UWord
chase_cuOff ( Bool
* changed
,
4295 const XArray
* /* of TyEnt */ ents
,
4296 TyEntIndexCache
* ents_cache
,
4300 ent
= ML_(TyEnts__index_by_cuOff
)( ents
, ents_cache
, cuOff
);
4303 VG_(printf
)("chase_cuOff: no entry for 0x%05lx\n", cuOff
);
4308 vg_assert(ent
->tag
!= Te_EMPTY
);
4309 if (ent
->tag
!= Te_INDIR
) {
4313 vg_assert(ent
->Te
.INDIR
.indR
< cuOff
);
4315 return ent
->Te
.INDIR
.indR
;
4320 void chase_cuOffs_in_XArray ( Bool
* changed
,
4321 const XArray
* /* of TyEnt */ ents
,
4322 TyEntIndexCache
* ents_cache
,
4323 /*MOD*/XArray
* /* of UWord */ cuOffs
)
4326 Word i
, n
= VG_(sizeXA
)( cuOffs
);
4327 for (i
= 0; i
< n
; i
++) {
4329 UWord
* p
= VG_(indexXA
)( cuOffs
, i
);
4330 *p
= chase_cuOff( &b
, ents
, ents_cache
, *p
);
4337 static Bool
TyEnt__subst_R_fields ( const XArray
* /* of TyEnt */ ents
,
4338 TyEntIndexCache
* ents_cache
,
4341 Bool b
, changed
= False
;
4347 = chase_cuOff( &b
, ents
, ents_cache
, te
->Te
.INDIR
.indR
);
4348 if (b
) changed
= True
;
4356 = chase_cuOff( &b
, ents
, ents_cache
, te
->Te
.Field
.typeR
);
4357 if (b
) changed
= True
;
4368 = chase_cuOff( &b
, ents
, ents_cache
, te
->Te
.TyPorR
.typeR
);
4369 if (b
) changed
= True
;
4372 te
->Te
.TyTyDef
.typeR
4373 = chase_cuOff( &b
, ents
, ents_cache
, te
->Te
.TyTyDef
.typeR
);
4374 if (b
) changed
= True
;
4377 chase_cuOffs_in_XArray( &b
, ents
, ents_cache
, te
->Te
.TyStOrUn
.fieldRs
);
4378 if (b
) changed
= True
;
4381 chase_cuOffs_in_XArray( &b
, ents
, ents_cache
, te
->Te
.TyEnum
.atomRs
);
4382 if (b
) changed
= True
;
4385 te
->Te
.TyArray
.typeR
4386 = chase_cuOff( &b
, ents
, ents_cache
, te
->Te
.TyArray
.typeR
);
4387 if (b
) changed
= True
;
4388 chase_cuOffs_in_XArray( &b
, ents
, ents_cache
, te
->Te
.TyArray
.boundRs
);
4389 if (b
) changed
= True
;
4395 = chase_cuOff( &b
, ents
, ents_cache
, te
->Te
.TyQual
.typeR
);
4396 if (b
) changed
= True
;
4407 /* Make a pass over 'ents'. For each tyent, inspect the target of any
4408 'R' or 'Rs' fields (those which refer to other tyents), and replace
4409 any which point to INDIR nodes with the target of the indirection
4410 (which should not itself be an indirection). In summary, this
4411 routine shorts out all references to indirection nodes. */
4413 Word
dedup_types_substitution_pass ( /*MOD*/XArray
* /* of TyEnt */ ents
,
4414 TyEntIndexCache
* ents_cache
)
4416 Word i
, n
, nChanged
= 0;
4418 n
= VG_(sizeXA
)( ents
);
4419 for (i
= 0; i
< n
; i
++) {
4420 TyEnt
* ent
= VG_(indexXA
)( ents
, i
);
4421 vg_assert(ent
->tag
!= Te_EMPTY
);
4422 /* We have to substitute everything, even indirections, so as to
4423 ensure that chains of indirections don't build up. */
4424 b
= TyEnt__subst_R_fields( ents
, ents_cache
, ent
);
4433 /* Make a pass over 'ents', building a dictionary of TyEnts as we go.
4434 Look up each new tyent in the dictionary in turn. If it is already
4435 in the dictionary, replace this tyent with an indirection to the
4436 existing one, and delete any malloc'd stuff hanging off this one.
4437 In summary, this routine commons up all tyents that are identical
4438 as defined by TyEnt__cmp_by_all_except_cuOff. */
4440 Word
dedup_types_commoning_pass ( /*MOD*/XArray
* /* of TyEnt */ ents
)
4442 Word n
, i
, nDeleted
;
4443 WordFM
* dict
; /* TyEnt* -> void */
4448 ML_(dinfo_zalloc
), "di.readdwarf3.dtcp.1",
4450 (Word(*)(UWord
,UWord
)) ML_(TyEnt__cmp_by_all_except_cuOff
)
4454 n
= VG_(sizeXA
)( ents
);
4455 for (i
= 0; i
< n
; i
++) {
4456 ent
= VG_(indexXA
)( ents
, i
);
4457 vg_assert(ent
->tag
!= Te_EMPTY
);
4459 /* Ignore indirections, although check that they are
4460 not forming a cycle. */
4461 if (ent
->tag
== Te_INDIR
) {
4462 vg_assert(ent
->Te
.INDIR
.indR
< ent
->cuOff
);
4467 if (VG_(lookupFM
)( dict
, &keyW
, &valW
, (UWord
)ent
)) {
4468 /* it's already in the dictionary. */
4469 TyEnt
* old
= (TyEnt
*)keyW
;
4470 vg_assert(valW
== 0);
4471 vg_assert(old
!= ent
);
4472 vg_assert(old
->tag
!= Te_INDIR
);
4473 /* since we are traversing the array in increasing order of
4475 vg_assert(old
->cuOff
< ent
->cuOff
);
4476 /* So anyway, dump this entry and replace it with an
4477 indirection to the one in the dictionary. Note that the
4478 assertion above guarantees that we cannot create cycles of
4479 indirections, since we are always creating an indirection
4480 to a tyent with a cuOff lower than this one. */
4481 ML_(TyEnt__make_EMPTY
)( ent
);
4482 ent
->tag
= Te_INDIR
;
4483 ent
->Te
.INDIR
.indR
= old
->cuOff
;
4486 /* not in dictionary; add it and keep going. */
4487 VG_(addToFM
)( dict
, (UWord
)ent
, 0 );
4491 VG_(deleteFM
)( dict
, NULL
, NULL
);
4498 void dedup_types ( Bool td3
,
4499 /*MOD*/XArray
* /* of TyEnt */ ents
,
4500 TyEntIndexCache
* ents_cache
)
4502 Word m
, n
, i
, nDel
, nSubst
, nThresh
;
4505 n
= VG_(sizeXA
)( ents
);
4507 /* If a commoning pass and a substitution pass both make fewer than
4508 this many changes, just stop. It's pointless to burn up CPU
4509 time trying to compress the last 1% or so out of the array. */
4512 /* First we must sort .ents by its .cuOff fields, so we
4513 can index into it. */
4514 VG_(setCmpFnXA
)( ents
, (XACmpFn_t
) ML_(TyEnt__cmp_by_cuOff_only
) );
4515 VG_(sortXA
)( ents
);
4517 /* Now repeatedly do commoning and substitution passes over
4518 the array, until there are no more changes. */
4520 nDel
= dedup_types_commoning_pass ( ents
);
4521 nSubst
= dedup_types_substitution_pass ( ents
, ents_cache
);
4522 vg_assert(nDel
>= 0 && nSubst
>= 0);
4523 TRACE_D3(" %ld deletions, %ld substitutions\n", nDel
, nSubst
);
4524 } while (nDel
> nThresh
|| nSubst
> nThresh
);
4526 /* Sanity check: all INDIR nodes should point at a non-INDIR thing.
4527 In fact this should be true at the end of every loop iteration
4528 above (a commoning pass followed by a substitution pass), but
4529 checking it on every iteration is excessively expensive. Note,
4530 this loop also computes 'm' for the stats printing below it. */
4532 n
= VG_(sizeXA
)( ents
);
4533 for (i
= 0; i
< n
; i
++) {
4535 ent
= VG_(indexXA
)( ents
, i
);
4536 if (ent
->tag
!= Te_INDIR
) continue;
4538 ind
= ML_(TyEnts__index_by_cuOff
)( ents
, ents_cache
,
4539 ent
->Te
.INDIR
.indR
);
4541 vg_assert(ind
->tag
!= Te_INDIR
);
4544 TRACE_D3("Overall: %ld before, %ld after\n", n
, n
-m
);
4548 /*------------------------------------------------------------*/
4550 /*--- Resolution of references to type DIEs ---*/
4552 /*------------------------------------------------------------*/
4554 /* Make a pass through the (temporary) variables array. Examine the
4555 type of each variable, check is it found, and chase any Te_INDIRs.
4556 Postcondition is: each variable has a typeR field that refers to a
4557 valid type in tyents, or a Te_UNKNOWN, and is certainly guaranteed
4558 not to refer to a Te_INDIR. (This is so that we can throw all the
4559 Te_INDIRs away later). */
4561 __attribute__((noinline
))
4562 static void resolve_variable_types (
4563 void (*barf
)( const HChar
* ) __attribute__((noreturn
)),
4564 /*R-O*/XArray
* /* of TyEnt */ ents
,
4565 /*MOD*/TyEntIndexCache
* ents_cache
,
4566 /*MOD*/XArray
* /* of TempVar* */ vars
4570 n
= VG_(sizeXA
)( vars
);
4571 for (i
= 0; i
< n
; i
++) {
4572 TempVar
* var
= *(TempVar
**)VG_(indexXA
)( vars
, i
);
4573 /* This is the stated type of the variable. But it might be
4574 an indirection, so be careful. */
4575 TyEnt
* ent
= ML_(TyEnts__index_by_cuOff
)( ents
, ents_cache
,
4577 if (ent
&& ent
->tag
== Te_INDIR
) {
4578 ent
= ML_(TyEnts__index_by_cuOff
)( ents
, ents_cache
,
4579 ent
->Te
.INDIR
.indR
);
4581 vg_assert(ent
->tag
!= Te_INDIR
);
4584 /* Deal first with "normal" cases */
4585 if (ent
&& ML_(TyEnt__is_type
)(ent
)) {
4586 var
->typeR
= ent
->cuOff
;
4590 /* If there's no ent, it probably we did not manage to read a
4591 type at the cuOffset which is stated as being this variable's
4592 type. Maybe a deficiency in parse_type_DIE. Complain. */
4594 VG_(printf
)("\n: Invalid cuOff = 0x%05lx\n", var
->typeR
);
4595 barf("resolve_variable_types: "
4596 "cuOff does not refer to a known type");
4599 /* If ent has any other tag, something bad happened, along the
4600 lines of var->typeR not referring to a type at all. */
4601 vg_assert(ent
->tag
== Te_UNKNOWN
);
4602 /* Just accept it; the type will be useless, but at least keep
4604 var
->typeR
= ent
->cuOff
;
4609 /*------------------------------------------------------------*/
4611 /*--- Parsing of Compilation Units ---*/
4613 /*------------------------------------------------------------*/
4615 static Int
cmp_TempVar_by_dioff ( const void* v1
, const void* v2
) {
4616 const TempVar
* t1
= *(const TempVar
*const *)v1
;
4617 const TempVar
* t2
= *(const TempVar
*const *)v2
;
4618 if (t1
->dioff
< t2
->dioff
) return -1;
4619 if (t1
->dioff
> t2
->dioff
) return 1;
4623 static void read_DIE (
4624 /*MOD*/WordFM
* /* of (XArray* of AddrRange, void) */ rangestree
,
4625 /*MOD*/XArray
* /* of TyEnt */ tyents
,
4626 /*MOD*/XArray
* /* of TempVar* */ tempvars
,
4627 /*MOD*/XArray
* /* of GExpr* */ gexprs
,
4628 /*MOD*/D3TypeParser
* typarser
,
4629 /*MOD*/D3VarParser
* varparser
,
4630 /*MOD*/D3InlParser
* inlparser
,
4631 Cursor
* c
, Bool td3
, CUConst
* cc
, Int level
4635 ULong atag
, abbv_code
;
4638 UWord start_die_c_offset
;
4639 UWord after_die_c_offset
;
4640 // If the DIE we will parse has a sibling and the parser(s) are
4641 // all indicating that parse_children is not necessary, then
4642 // we will skip the children by jumping to the sibling of this DIE
4643 // (if it has a sibling).
4645 Bool parse_children
= False
;
4647 /* --- Deal with this DIE --- */
4648 posn
= cook_die( cc
, get_position_of_Cursor( c
) );
4649 abbv_code
= get_ULEB128( c
);
4650 abbv
= get_abbv(cc
, abbv_code
);
4655 trace_DIE ((DW_TAG
)atag
, posn
, level
,
4656 get_position_of_Cursor( c
), abbv
, cc
);
4660 cc
->barf("read_DIE: invalid zero tag on DIE");
4662 has_children
= abbv
->has_children
;
4663 if (has_children
!= DW_children_no
&& has_children
!= DW_children_yes
)
4664 cc
->barf("read_DIE: invalid has_children value");
4666 /* We're set up to look at the fields of this DIE. Hand it off to
4667 any parser(s) that want to see it. Since they will in general
4668 advance the DIE cursor, remember the current settings so that we
4669 can then back up. . */
4670 start_die_c_offset
= get_position_of_Cursor( c
);
4671 after_die_c_offset
= 0; // set to c position if a parser has read the DIE.
4673 if (VG_(clo_read_var_info
)) {
4674 parse_type_DIE( tyents
,
4683 if (get_position_of_Cursor( c
) != start_die_c_offset
) {
4684 after_die_c_offset
= get_position_of_Cursor( c
);
4685 set_position_of_Cursor( c
, start_die_c_offset
);
4688 parse_var_DIE( rangestree
,
4699 if (get_position_of_Cursor( c
) != start_die_c_offset
) {
4700 after_die_c_offset
= get_position_of_Cursor( c
);
4701 set_position_of_Cursor( c
, start_die_c_offset
);
4704 parse_children
= True
;
4705 // type and var parsers do not have logic to skip childrens and establish
4706 // the value of sibling.
4709 if (VG_(clo_read_inline_info
)) {
4710 inlparser
->sibling
= 0;
4712 parse_inl_DIE( inlparser
,
4721 if (get_position_of_Cursor( c
) != start_die_c_offset
) {
4722 after_die_c_offset
= get_position_of_Cursor( c
);
4723 // Last parser, no need to reset the cursor to start_die_c_offset.
4726 sibling
= inlparser
->sibling
;
4727 vg_assert (inlparser
->sibling
== 0 || inlparser
->sibling
== sibling
);
4730 if (after_die_c_offset
> 0) {
4731 // DIE was read by a parser above, so we know where the DIE ends.
4732 set_position_of_Cursor( c
, after_die_c_offset
);
4734 /* No parser has parsed this DIE. So, we need to skip the DIE,
4735 in order to read the next DIE.
4736 At the same time, establish sibling value if the DIE has one. */
4737 TRACE_D3(" uninteresting DIE -> skipping ...\n");
4738 skip_DIE (&sibling
, c
, abbv
, cc
);
4741 /* --- Now recurse into its children, if any
4742 and the parsing of the children is requested by a parser --- */
4743 if (has_children
== DW_children_yes
) {
4744 if (parse_children
|| sibling
== 0) {
4745 if (0) TRACE_D3("BEGIN children of level %d\n", level
);
4747 atag
= peek_ULEB128( c
);
4748 if (atag
== 0) break;
4749 read_DIE( rangestree
, tyents
, tempvars
, gexprs
,
4750 typarser
, varparser
, inlparser
,
4751 c
, td3
, cc
, level
+1 );
4753 /* Now we need to eat the terminating zero */
4754 atag
= get_ULEB128( c
);
4755 vg_assert(atag
== 0);
4756 if (0) TRACE_D3("END children of level %d\n", level
);
4758 // We can skip the childrens, by jumping to the sibling
4759 TRACE_D3(" SKIPPING DIE's children,"
4760 "jumping to sibling <%d><%lx>\n",
4762 set_position_of_Cursor( c
, sibling
);
4768 static void trace_debug_loc (const DebugInfo
* di
,
4769 __attribute__((noreturn
)) void (*barf
)( const HChar
* ),
4770 DiSlice escn_debug_loc
)
4773 /* This doesn't work properly because it assumes all entries are
4774 packed end to end, with no holes. But that doesn't always
4775 appear to be the case, so it loses sync. And the D3 spec
4776 doesn't appear to require a no-hole situation either. */
4777 /* Display .debug_loc */
4780 Cursor loc
; /* for showing .debug_loc */
4781 Bool td3
= di
->trace_symtab
;
4784 TRACE_SYMTAB("\n------ The contents of .debug_loc ------\n");
4785 TRACE_SYMTAB(" Offset Begin End Expression\n");
4786 if (ML_(sli_is_valid
)(escn_debug_loc
)) {
4787 init_Cursor( &loc
, escn_debug_loc
, 0, barf
,
4788 "Overrun whilst reading .debug_loc section(1)" );
4794 if (is_at_end_Cursor( &loc
))
4797 /* Read a (host-)word pair. This is something of a hack since
4798 the word size to read is really dictated by the ELF file;
4799 however, we assume we're reading a file with the same
4800 word-sizeness as the host. Reasonably enough. */
4801 w1
= get_UWord( &loc
);
4802 w2
= get_UWord( &loc
);
4804 if (w1
== 0 && w2
== 0) {
4805 /* end of list. reset 'base' */
4806 TRACE_D3(" %08lx <End of list>\n", dl_offset
);
4808 dl_offset
= get_position_of_Cursor( &loc
);
4813 /* new value for 'base' */
4814 TRACE_D3(" %08lx %16lx %08lx (base address)\n",
4820 /* else a location expression follows */
4821 TRACE_D3(" %08lx %08lx %08lx ",
4822 dl_offset
, w1
+ dl_base
, w2
+ dl_base
);
4823 len
= (UWord
)get_UShort( &loc
);
4825 UChar byte
= get_UChar( &loc
);
4826 TRACE_D3("%02x", (UInt
)byte
);
4835 static void trace_debug_ranges (const DebugInfo
* di
,
4836 __attribute__((noreturn
)) void (*barf
)( const HChar
* ),
4837 DiSlice escn_debug_ranges
)
4839 Cursor ranges
; /* for showing .debug_ranges */
4842 Bool td3
= di
->trace_symtab
;
4844 /* Display .debug_ranges */
4846 TRACE_SYMTAB("\n------ The contents of .debug_ranges ------\n");
4847 TRACE_SYMTAB(" Offset Begin End\n");
4848 if (ML_(sli_is_valid
)(escn_debug_ranges
)) {
4849 init_Cursor( &ranges
, escn_debug_ranges
, 0, barf
,
4850 "Overrun whilst reading .debug_ranges section(1)" );
4856 if (is_at_end_Cursor( &ranges
))
4859 /* Read a (host-)word pair. This is something of a hack since
4860 the word size to read is really dictated by the ELF file;
4861 however, we assume we're reading a file with the same
4862 word-sizeness as the host. Reasonably enough. */
4863 w1
= get_UWord( &ranges
);
4864 w2
= get_UWord( &ranges
);
4866 if (w1
== 0 && w2
== 0) {
4867 /* end of list. reset 'base' */
4868 TRACE_D3(" %08lx <End of list>\n", dr_offset
);
4870 dr_offset
= get_position_of_Cursor( &ranges
);
4875 /* new value for 'base' */
4876 TRACE_D3(" %08lx %16lx %08lx (base address)\n",
4882 /* else a range [w1+base, w2+base) is denoted */
4883 TRACE_D3(" %08lx %08lx %08lx\n",
4884 dr_offset
, w1
+ dr_base
, w2
+ dr_base
);
4889 static void trace_debug_abbrev (const DebugInfo
* di
,
4890 __attribute__((noreturn
)) void (*barf
)( const HChar
* ),
4891 DiSlice escn_debug_abbv
)
4893 Cursor abbv
; /* for showing .debug_abbrev */
4894 Bool td3
= di
->trace_symtab
;
4896 /* Display .debug_abbrev */
4898 TRACE_SYMTAB("\n------ The contents of .debug_abbrev ------\n");
4899 if (ML_(sli_is_valid
)(escn_debug_abbv
)) {
4900 init_Cursor( &abbv
, escn_debug_abbv
, 0, barf
,
4901 "Overrun whilst reading .debug_abbrev section" );
4903 if (is_at_end_Cursor( &abbv
))
4905 /* Read one abbreviation table */
4906 TRACE_D3(" Number TAG\n");
4910 ULong acode
= get_ULEB128( &abbv
);
4911 if (acode
== 0) break; /* end of the table */
4912 atag
= get_ULEB128( &abbv
);
4913 has_children
= get_UChar( &abbv
);
4914 TRACE_D3(" %llu %s [%s]\n",
4915 acode
, ML_(pp_DW_TAG
)(atag
),
4916 ML_(pp_DW_children
)(has_children
));
4918 ULong at_name
= get_ULEB128( &abbv
);
4919 ULong at_form
= get_ULEB128( &abbv
);
4920 if (at_form
== DW_FORM_implicit_const
) {
4921 /* Long at_val = */ get_SLEB128 ( &abbv
);
4923 if (at_name
== 0 && at_form
== 0) break;
4924 TRACE_D3(" %-18s %s\n",
4925 ML_(pp_DW_AT
)(at_name
), ML_(pp_DW_FORM
)(at_form
));
4933 void new_dwarf3_reader_wrk (
4935 __attribute__((noreturn
)) void (*barf
)( const HChar
* ),
4936 DiSlice escn_debug_info
, DiSlice escn_debug_types
,
4937 DiSlice escn_debug_abbv
, DiSlice escn_debug_line
,
4938 DiSlice escn_debug_str
, DiSlice escn_debug_ranges
,
4939 DiSlice escn_debug_rnglists
, DiSlice escn_debug_loclists
,
4940 DiSlice escn_debug_loc
, DiSlice escn_debug_info_alt
,
4941 DiSlice escn_debug_abbv_alt
, DiSlice escn_debug_line_alt
,
4942 DiSlice escn_debug_str_alt
, DiSlice escn_debug_line_str
4945 XArray
* /* of TyEnt */ tyents
= NULL
;
4946 XArray
* /* of TyEnt */ tyents_to_keep
= NULL
;
4947 XArray
* /* of GExpr* */ gexprs
= NULL
;
4948 XArray
* /* of TempVar* */ tempvars
= NULL
;
4949 WordFM
* /* of (XArray* of AddrRange, void) */ rangestree
= NULL
;
4950 TyEntIndexCache
* tyents_cache
= NULL
;
4951 TyEntIndexCache
* tyents_to_keep_cache
= NULL
;
4952 TempVar
*varp
, *varp2
;
4954 Cursor info
; /* primary cursor for parsing .debug_info */
4955 D3TypeParser typarser
;
4956 D3VarParser varparser
;
4957 D3InlParser inlparser
;
4959 Bool td3
= di
->trace_symtab
;
4960 XArray
* /* of TempVar* */ dioff_lookup_tab
;
4962 VgHashTable
*signature_types
= NULL
;
4964 /* Display/trace various information, if requested. */
4966 trace_debug_loc (di
, barf
, escn_debug_loc
);
4967 trace_debug_ranges (di
, barf
, escn_debug_ranges
);
4968 trace_debug_abbrev (di
, barf
, escn_debug_abbv
);
4972 /* Zero out all parsers. Parsers will really be initialised
4973 according to VG_(clo_read_*_info). */
4974 VG_(memset
)( &inlparser
, 0, sizeof(inlparser
) );
4976 if (VG_(clo_read_var_info
)) {
4977 /* We'll park the harvested type information in here. Also create
4978 a fake "void" entry with offset D3_FAKEVOID_CUOFF, so we always
4979 have at least one type entry to refer to. D3_FAKEVOID_CUOFF is
4980 huge and presumably will not occur in any valid DWARF3 file --
4981 it would need to have a .debug_info section 4GB long for that to
4982 happen. These type entries end up in the DebugInfo. */
4983 tyents
= VG_(newXA
)( ML_(dinfo_zalloc
),
4984 "di.readdwarf3.ndrw.1 (TyEnt temp array)",
4985 ML_(dinfo_free
), sizeof(TyEnt
) );
4987 VG_(memset
)(&tyent
, 0, sizeof(tyent
));
4988 tyent
.tag
= Te_TyVoid
;
4989 tyent
.cuOff
= D3_FAKEVOID_CUOFF
;
4990 tyent
.Te
.TyVoid
.isFake
= True
;
4991 VG_(addToXA
)( tyents
, &tyent
);
4994 VG_(memset
)(&tyent
, 0, sizeof(tyent
));
4995 tyent
.tag
= Te_UNKNOWN
;
4996 tyent
.cuOff
= D3_INVALID_CUOFF
;
4997 VG_(addToXA
)( tyents
, &tyent
);
5000 /* This is a tree used to unique-ify the range lists that are
5001 manufactured by parse_var_DIE. References to the keys in the
5002 tree wind up in .rngMany fields in TempVars. We'll need to
5003 delete this tree, and the XArrays attached to it, at the end of
5005 rangestree
= VG_(newFM
)( ML_(dinfo_zalloc
),
5006 "di.readdwarf3.ndrw.2 (rangestree)",
5008 (Word(*)(UWord
,UWord
))cmp__XArrays_of_AddrRange
);
5010 /* List of variables we're accumulating. These don't end up in the
5011 DebugInfo; instead their contents are handed to ML_(addVar) and
5012 the list elements are then deleted. */
5013 tempvars
= VG_(newXA
)( ML_(dinfo_zalloc
),
5014 "di.readdwarf3.ndrw.3 (TempVar*s array)",
5018 /* List of GExprs we're accumulating. These wind up in the
5020 gexprs
= VG_(newXA
)( ML_(dinfo_zalloc
), "di.readdwarf3.ndrw.4",
5021 ML_(dinfo_free
), sizeof(GExpr
*) );
5023 /* We need a D3TypeParser to keep track of partially constructed
5024 types. It'll be discarded as soon as we've completed the CU,
5025 since the resulting information is tipped in to 'tyents' as it
5027 type_parser_init(&typarser
);
5029 var_parser_init(&varparser
);
5031 signature_types
= VG_(HT_construct
) ("signature_types");
5034 /* Do an initial pass to scan the .debug_types section, if any, and
5035 fill in the signatured types hash table. This lets us handle
5036 mapping from a type signature to a (cooked) DIE offset directly
5037 in get_Form_contents. */
5038 if (VG_(clo_read_var_info
) && ML_(sli_is_valid
)(escn_debug_types
)) {
5039 init_Cursor( &info
, escn_debug_types
, 0, barf
,
5040 "Overrun whilst reading .debug_types section" );
5041 TRACE_D3("\n------ Collecting signatures from "
5042 ".debug_types section ------\n");
5045 UWord cu_start_offset
, cu_offset_now
;
5048 cu_start_offset
= get_position_of_Cursor( &info
);
5050 TRACE_D3(" Compilation Unit @ offset 0x%lx:\n", cu_start_offset
);
5051 /* parse_CU_header initialises the CU's abbv hash table. */
5052 parse_CU_Header( &cc
, td3
, &info
, escn_debug_abbv
, True
, False
);
5054 /* Needed by cook_die. */
5055 cc
.types_cuOff_bias
= escn_debug_info
.szB
;
5057 record_signatured_type( signature_types
, cc
.type_signature
,
5058 cook_die( &cc
, cc
.type_offset
));
5060 /* Until proven otherwise we assume we don't need the icc9
5061 workaround in this case; see the DIE-reading loop below
5063 cu_offset_now
= (cu_start_offset
+ cc
.unit_length
5064 + (cc
.is_dw64
? 12 : 4));
5066 clear_CUConst ( &cc
);
5068 if (cu_offset_now
>= escn_debug_types
.szB
) {
5072 set_position_of_Cursor ( &info
, cu_offset_now
);
5076 /* Perform three DIE-reading passes. The first pass reads DIEs from
5077 alternate .debug_info (if any), the second pass reads DIEs from
5078 .debug_info, and the third pass reads DIEs from .debug_types.
5079 Moving the body of this loop into a separate function would
5080 require a large number of arguments to be passed in, so it is
5081 kept inline instead. */
5082 for (pass
= 0; pass
< 3; ++pass
) {
5086 if (!ML_(sli_is_valid
)(escn_debug_info_alt
))
5088 /* Now loop over the Compilation Units listed in the alternate
5089 .debug_info section (see D3SPEC sec 7.5) paras 1 and 2.
5090 Each compilation unit contains a Compilation Unit Header
5091 followed by precisely one DW_TAG_compile_unit or
5092 DW_TAG_partial_unit DIE. */
5093 init_Cursor( &info
, escn_debug_info_alt
, 0, barf
,
5094 "Overrun whilst reading alternate .debug_info section" );
5095 section_size
= escn_debug_info_alt
.szB
;
5097 TRACE_D3("\n------ Parsing alternate .debug_info section ------\n");
5098 } else if (pass
== 1) {
5099 /* Now loop over the Compilation Units listed in the .debug_info
5100 section (see D3SPEC sec 7.5) paras 1 and 2. Each compilation
5101 unit contains a Compilation Unit Header followed by precisely
5102 one DW_TAG_compile_unit or DW_TAG_partial_unit DIE. */
5103 init_Cursor( &info
, escn_debug_info
, 0, barf
,
5104 "Overrun whilst reading .debug_info section" );
5105 section_size
= escn_debug_info
.szB
;
5107 TRACE_D3("\n------ Parsing .debug_info section ------\n");
5109 if (!ML_(sli_is_valid
)(escn_debug_types
))
5111 if (!VG_(clo_read_var_info
))
5112 continue; // Types not needed when only reading inline info.
5113 init_Cursor( &info
, escn_debug_types
, 0, barf
,
5114 "Overrun whilst reading .debug_types section" );
5115 section_size
= escn_debug_types
.szB
;
5117 TRACE_D3("\n------ Parsing .debug_types section ------\n");
5121 ULong cu_start_offset
, cu_offset_now
;
5123 /* It may be that the stated size of this CU is larger than the
5124 amount of stuff actually in it. icc9 seems to generate CUs
5125 thusly. We use these variables to figure out if this is
5126 indeed the case, and if so how many bytes we need to skip to
5127 get to the start of the next CU. Not skipping those bytes
5128 causes us to misidentify the start of the next CU, and it all
5129 goes badly wrong after that (not surprisingly). */
5130 UWord cu_size_including_IniLen
, cu_amount_used
;
5132 /* It seems icc9 finishes the DIE info before debug_info_sz
5133 bytes have been used up. So be flexible, and declare the
5134 sequence complete if there is not enough remaining bytes to
5135 hold even the smallest conceivable CU header. (11 bytes I
5137 /* JRS 23Jan09: I suspect this is no longer necessary now that
5138 the code below contains a 'while (cu_amount_used <
5139 cu_size_including_IniLen ...' style loop, which skips over
5140 any leftover bytes at the end of a CU in the case where the
5141 CU's stated size is larger than its actual size (as
5142 determined by reading all its DIEs). However, for prudence,
5143 I'll leave the following test in place. I can't see that a
5144 CU header can be smaller than 11 bytes, so I don't think
5145 there's any harm possible through the test -- it just adds
5147 Word avail
= get_remaining_length_Cursor( &info
);
5150 TRACE_D3("new_dwarf3_reader_wrk: warning: "
5151 "%ld unused bytes after end of DIEs\n", avail
);
5155 if (VG_(clo_read_var_info
)) {
5156 /* Check the varparser's stack is in a sane state. */
5157 vg_assert(varparser
.sp
== -1);
5158 /* Check the typarser's stack is in a sane state. */
5159 vg_assert(typarser
.sp
== -1);
5162 cu_start_offset
= get_position_of_Cursor( &info
);
5164 TRACE_D3(" Compilation Unit @ offset 0x%llx:\n", cu_start_offset
);
5165 /* parse_CU_header initialises the CU's hashtable of abbvs ht_abbvs */
5167 parse_CU_Header( &cc
, td3
, &info
, escn_debug_abbv_alt
,
5170 parse_CU_Header( &cc
, td3
, &info
, escn_debug_abbv
,
5173 cc
.escn_debug_str
= pass
== 0 ? escn_debug_str_alt
5175 cc
.escn_debug_ranges
= escn_debug_ranges
;
5176 cc
.escn_debug_rnglists
= escn_debug_rnglists
;
5177 cc
.escn_debug_loclists
= escn_debug_loclists
;
5178 cc
.escn_debug_loc
= escn_debug_loc
;
5179 cc
.escn_debug_line
= pass
== 0 ? escn_debug_line_alt
5181 cc
.escn_debug_info
= pass
== 0 ? escn_debug_info_alt
5183 cc
.escn_debug_types
= escn_debug_types
;
5184 cc
.escn_debug_info_alt
= escn_debug_info_alt
;
5185 cc
.escn_debug_str_alt
= escn_debug_str_alt
;
5186 cc
.escn_debug_line_str
= escn_debug_line_str
;
5187 cc
.types_cuOff_bias
= escn_debug_info
.szB
;
5188 cc
.alt_cuOff_bias
= escn_debug_info
.szB
+ escn_debug_types
.szB
;
5189 cc
.cu_start_offset
= cu_start_offset
;
5191 /* The CU's svma can be deduced by looking at the AT_low_pc
5192 value in the top level TAG_compile_unit, which is the topmost
5193 DIE. We'll leave it for the 'varparser' to acquire that info
5194 and fill it in -- since it is the only party to want to know
5196 cc
.cu_svma_known
= False
;
5199 if (VG_(clo_read_var_info
)) {
5200 cc
.signature_types
= signature_types
;
5202 /* Create a fake outermost-level range covering the entire
5203 address range. So we always have *something* to catch all
5204 variable declarations. */
5205 varstack_push( &cc
, &varparser
, td3
,
5206 unitary_range_list(0UL, ~0UL),
5207 -1, False
/*isFunc*/, NULL
/*fbGX*/ );
5209 /* And set up the fndn_ix_Table. When we come across the top
5210 level DIE for this CU (which is what the next call to
5211 read_DIE should process) we will copy all the file names out
5212 of the .debug_line img area and use this table to look up the
5213 copies when we later see filename numbers in DW_TAG_variables
5215 vg_assert(!varparser
.fndn_ix_Table
);
5216 varparser
.fndn_ix_Table
5217 = VG_(newXA
)( ML_(dinfo_zalloc
), "di.readdwarf3.ndrw.5var",
5222 if (VG_(clo_read_inline_info
)) {
5223 /* fndn_ix_Table for the inlined call parser */
5224 vg_assert(!inlparser
.fndn_ix_Table
);
5225 inlparser
.fndn_ix_Table
5226 = VG_(newXA
)( ML_(dinfo_zalloc
), "di.readdwarf3.ndrw.5inl",
5231 /* Now read the one-and-only top-level DIE for this CU. */
5232 vg_assert(!VG_(clo_read_var_info
) || varparser
.sp
== 0);
5233 read_DIE( rangestree
,
5234 tyents
, tempvars
, gexprs
,
5235 &typarser
, &varparser
, &inlparser
,
5236 &info
, td3
, &cc
, 0 );
5238 cu_offset_now
= get_position_of_Cursor( &info
);
5240 if (0) VG_(printf
)("Travelled: %llu size %llu\n",
5241 cu_offset_now
- cc
.cu_start_offset
,
5242 cc
.unit_length
+ (cc
.is_dw64
? 12 : 4));
5244 /* How big the CU claims it is .. */
5245 cu_size_including_IniLen
= cc
.unit_length
+ (cc
.is_dw64
? 12 : 4);
5246 /* .. vs how big we have found it to be */
5247 cu_amount_used
= cu_offset_now
- cc
.cu_start_offset
;
5249 if (1) TRACE_D3("offset now %llu, d-i-size %llu\n",
5250 cu_offset_now
, section_size
);
5251 if (cu_offset_now
> section_size
)
5252 barf("toplevel DIEs beyond end of CU");
5254 /* If the CU is bigger than it claims to be, we've got a serious
5256 if (cu_amount_used
> cu_size_including_IniLen
)
5257 barf("CU's actual size appears to be larger than it claims it is");
5259 /* If the CU is smaller than it claims to be, we need to skip some
5260 bytes. Loop updates cu_offset_new and cu_amount_used. */
5261 while (cu_amount_used
< cu_size_including_IniLen
5262 && get_remaining_length_Cursor( &info
) > 0) {
5263 if (0) VG_(printf
)("SKIP\n");
5264 (void)get_UChar( &info
);
5265 cu_offset_now
= get_position_of_Cursor( &info
);
5266 cu_amount_used
= cu_offset_now
- cc
.cu_start_offset
;
5269 if (VG_(clo_read_var_info
)) {
5270 /* Preen to level -2. DIEs have level >= 0 so -2 cannot occur
5271 anywhere else at all. Our fake the-entire-address-space
5272 range is at level -1, so preening to -2 should completely
5273 empty the stack out. */
5275 varstack_preen( &varparser
, td3
, -2 );
5276 /* Similarly, empty the type stack out. */
5277 typestack_preen( &typarser
, td3
, -2 );
5280 if (VG_(clo_read_var_info
)) {
5281 vg_assert(varparser
.fndn_ix_Table
);
5282 VG_(deleteXA
)( varparser
.fndn_ix_Table
);
5283 varparser
.fndn_ix_Table
= NULL
;
5285 if (VG_(clo_read_inline_info
)) {
5286 vg_assert(inlparser
.fndn_ix_Table
);
5287 VG_(deleteXA
)( inlparser
.fndn_ix_Table
);
5288 inlparser
.fndn_ix_Table
= NULL
;
5292 if (cu_offset_now
== section_size
)
5294 /* else keep going */
5299 if (VG_(clo_read_var_info
)) {
5300 /* From here on we're post-processing the stuff we got
5301 out of the .debug_info section. */
5304 ML_(pp_TyEnts
)(tyents
, "Initial type entity (TyEnt) array");
5306 TRACE_D3("------ Compressing type entries ------\n");
5309 tyents_cache
= ML_(dinfo_zalloc
)( "di.readdwarf3.ndrw.6",
5310 sizeof(TyEntIndexCache
) );
5311 ML_(TyEntIndexCache__invalidate
)( tyents_cache
);
5312 dedup_types( td3
, tyents
, tyents_cache
);
5315 ML_(pp_TyEnts
)(tyents
, "After type entity (TyEnt) compression");
5319 TRACE_D3("------ Resolving the types of variables ------\n" );
5320 resolve_variable_types( barf
, tyents
, tyents_cache
, tempvars
);
5322 /* Copy all the non-INDIR tyents into a new table. For large
5323 .so's, about 90% of the tyents will by now have been resolved to
5324 INDIRs, and we no longer need them, and so don't need to store
5327 = VG_(newXA
)( ML_(dinfo_zalloc
),
5328 "di.readdwarf3.ndrw.7 (TyEnt to-keep array)",
5329 ML_(dinfo_free
), sizeof(TyEnt
) );
5330 n
= VG_(sizeXA
)( tyents
);
5331 for (i
= 0; i
< n
; i
++) {
5332 TyEnt
* ent
= VG_(indexXA
)( tyents
, i
);
5333 if (ent
->tag
!= Te_INDIR
)
5334 VG_(addToXA
)( tyents_to_keep
, ent
);
5337 VG_(deleteXA
)( tyents
);
5339 ML_(dinfo_free
)( tyents_cache
);
5340 tyents_cache
= NULL
;
5342 /* Sort tyents_to_keep so we can lookup in it. A complete (if
5343 minor) waste of time, since tyents itself is sorted, but
5344 necessary since VG_(lookupXA) refuses to cooperate if we
5346 VG_(setCmpFnXA
)( tyents_to_keep
, (XACmpFn_t
) ML_(TyEnt__cmp_by_cuOff_only
) );
5347 VG_(sortXA
)( tyents_to_keep
);
5349 /* Enable cacheing on tyents_to_keep */
5350 tyents_to_keep_cache
5351 = ML_(dinfo_zalloc
)( "di.readdwarf3.ndrw.8",
5352 sizeof(TyEntIndexCache
) );
5353 ML_(TyEntIndexCache__invalidate
)( tyents_to_keep_cache
);
5355 /* And record the tyents in the DebugInfo. We do this before
5356 starting to hand variables to ML_(addVar), since if ML_(addVar)
5357 wants to do debug printing (of the types of said vars) then it
5358 will need the tyents.*/
5359 vg_assert(!di
->admin_tyents
);
5360 di
->admin_tyents
= tyents_to_keep
;
5362 /* Bias all the location expressions. */
5364 TRACE_D3("------ Biasing the location expressions ------\n" );
5366 n
= VG_(sizeXA
)( gexprs
);
5367 for (i
= 0; i
< n
; i
++) {
5368 gexpr
= *(GExpr
**)VG_(indexXA
)( gexprs
, i
);
5369 bias_GX( gexpr
, di
);
5373 TRACE_D3("------ Acquired the following variables: ------\n\n");
5375 /* Park (pointers to) all the vars in an XArray, so we can look up
5376 abstract origins quickly. The array is sorted (hence, looked-up
5377 by) the .dioff fields. Since the .dioffs should be in strictly
5378 ascending order, there is no need to sort the array after
5379 construction. The ascendingness is however asserted for. */
5381 = VG_(newXA
)( ML_(dinfo_zalloc
), "di.readdwarf3.ndrw.9",
5385 n
= VG_(sizeXA
)( tempvars
);
5386 Word first_primary_var
= 0;
5387 for (first_primary_var
= 0;
5388 escn_debug_info_alt
.szB
/*really?*/ && first_primary_var
< n
;
5389 first_primary_var
++) {
5390 varp
= *(TempVar
**)VG_(indexXA
)( tempvars
, first_primary_var
);
5391 if (varp
->dioff
< escn_debug_info
.szB
+ escn_debug_types
.szB
)
5394 for (i
= 0; i
< n
; i
++) {
5395 varp
= *(TempVar
**)VG_(indexXA
)( tempvars
, (i
+ first_primary_var
) % n
);
5396 if (i
> first_primary_var
) {
5397 varp2
= *(TempVar
**)VG_(indexXA
)( tempvars
,
5398 (i
+ first_primary_var
- 1) % n
);
5399 /* why should this hold? Only, I think, because we've
5400 constructed the array by reading .debug_info sequentially,
5401 and so the array .dioff fields should reflect that, and be
5402 strictly ascending. */
5403 vg_assert(varp2
->dioff
< varp
->dioff
);
5405 VG_(addToXA
)( dioff_lookup_tab
, &varp
);
5407 VG_(setCmpFnXA
)( dioff_lookup_tab
, cmp_TempVar_by_dioff
);
5408 VG_(sortXA
)( dioff_lookup_tab
); /* POINTLESS; FIXME: rm */
5410 /* Now visit each var. Collect up as much info as possible for
5411 each var and hand it to ML_(addVar). */
5412 n
= VG_(sizeXA
)( tempvars
);
5413 for (j
= 0; j
< n
; j
++) {
5415 varp
= *(TempVar
**)VG_(indexXA
)( tempvars
, j
);
5417 /* Possibly show .. */
5419 VG_(printf
)("<%lx> addVar: level %d: %s :: ",
5422 varp
->name
? varp
->name
: "<anon_var>" );
5424 ML_(pp_TyEnt_C_ishly
)( tyents_to_keep
, varp
->typeR
);
5426 VG_(printf
)("NULL");
5428 VG_(printf
)("\n Loc=");
5430 ML_(pp_GX
)(varp
->gexpr
);
5432 VG_(printf
)("NULL");
5436 VG_(printf
)(" FrB=");
5437 ML_(pp_GX
)( varp
->fbGX
);
5440 VG_(printf
)(" FrB=none\n");
5442 VG_(printf
)(" declared at: %u %s:%d\n",
5444 ML_(fndn_ix2filename
) (di
, varp
->fndn_ix
),
5446 if (varp
->absOri
!= (UWord
)D3_INVALID_CUOFF
)
5447 VG_(printf
)(" abstract origin: <%lx>\n", varp
->absOri
);
5450 /* Skip variables which have no location. These must be
5451 abstract instances; they are useless as-is since with no
5452 location they have no specified memory location. They will
5453 presumably be referred to via the absOri fields of other
5456 TRACE_D3(" SKIP (no location)\n\n");
5460 /* So it has a location, at least. If it refers to some other
5461 entry through its absOri field, pull in further info through
5463 if (varp
->absOri
!= (UWord
)D3_INVALID_CUOFF
) {
5465 Word ixFirst
, ixLast
;
5467 TempVar
* keyp
= &key
;
5469 VG_(memset
)(&key
, 0, sizeof(key
)); /* not necessary */
5470 key
.dioff
= varp
->absOri
; /* this is what we want to find */
5471 found
= VG_(lookupXA
)( dioff_lookup_tab
, &keyp
,
5472 &ixFirst
, &ixLast
);
5474 /* barf("DW_AT_abstract_origin can't be resolved"); */
5475 TRACE_D3(" SKIP (DW_AT_abstract_origin can't be resolved)\n\n");
5478 /* If the following fails, there is more than one entry with
5479 the same dioff. Which can't happen. */
5480 vg_assert(ixFirst
== ixLast
);
5481 varAI
= *(TempVar
**)VG_(indexXA
)( dioff_lookup_tab
, ixFirst
);
5484 vg_assert(varAI
->dioff
== varp
->absOri
);
5486 /* Copy what useful info we can. */
5487 if (varAI
->typeR
&& !varp
->typeR
)
5488 varp
->typeR
= varAI
->typeR
;
5489 if (varAI
->name
&& !varp
->name
)
5490 varp
->name
= varAI
->name
;
5491 if (varAI
->fndn_ix
&& !varp
->fndn_ix
)
5492 varp
->fndn_ix
= varAI
->fndn_ix
;
5493 if (varAI
->fLine
> 0 && varp
->fLine
== 0)
5494 varp
->fLine
= varAI
->fLine
;
5497 /* Give it a name if it doesn't have one. */
5499 varp
->name
= ML_(addStr
)( di
, "<anon_var>", -1 );
5501 /* So now does it have enough info to be useful? */
5502 /* NOTE: re typeR: this is a hack. If typeR is Te_UNKNOWN then
5503 the type didn't get resolved. Really, in that case
5504 something's broken earlier on, and should be fixed, rather
5505 than just skipping the variable. */
5506 ent
= ML_(TyEnts__index_by_cuOff
)( tyents_to_keep
,
5507 tyents_to_keep_cache
,
5509 /* The next two assertions should be guaranteed by
5510 our previous call to resolve_variable_types. */
5512 vg_assert(ML_(TyEnt__is_type
)(ent
) || ent
->tag
== Te_UNKNOWN
);
5514 if (ent
->tag
== Te_UNKNOWN
) continue;
5516 vg_assert(varp
->gexpr
);
5517 vg_assert(varp
->name
);
5518 vg_assert(varp
->typeR
);
5519 vg_assert(varp
->level
>= 0);
5521 /* Ok. So we're going to keep it. Call ML_(addVar) once for
5522 each address range in which the variable exists. */
5523 TRACE_D3(" ACQUIRE for range(s) ");
5524 { AddrRange oneRange
;
5525 AddrRange
* varPcRanges
;
5527 /* Set up to iterate over address ranges, however
5529 if (varp
->nRanges
== 0 || varp
->nRanges
== 1) {
5530 vg_assert(!varp
->rngMany
);
5531 if (varp
->nRanges
== 0) {
5532 vg_assert(varp
->rngOneMin
== 0);
5533 vg_assert(varp
->rngOneMax
== 0);
5535 nVarPcRanges
= varp
->nRanges
;
5536 oneRange
.aMin
= varp
->rngOneMin
;
5537 oneRange
.aMax
= varp
->rngOneMax
;
5538 varPcRanges
= &oneRange
;
5540 vg_assert(varp
->rngMany
);
5541 vg_assert(varp
->rngOneMin
== 0);
5542 vg_assert(varp
->rngOneMax
== 0);
5543 nVarPcRanges
= VG_(sizeXA
)(varp
->rngMany
);
5544 vg_assert(nVarPcRanges
>= 2);
5545 vg_assert(nVarPcRanges
== (Word
)varp
->nRanges
);
5546 varPcRanges
= VG_(indexXA
)(varp
->rngMany
, 0);
5548 if (varp
->level
== 0)
5549 vg_assert( nVarPcRanges
== 1 );
5551 for (i
= 0; i
< nVarPcRanges
; i
++) {
5552 Addr pcMin
= varPcRanges
[i
].aMin
;
5553 Addr pcMax
= varPcRanges
[i
].aMax
;
5554 vg_assert(pcMin
<= pcMax
);
5555 /* Level 0 is the global address range. So at level 0 we
5556 don't want to bias pcMin/pcMax; but at all other levels
5557 we do since those are derived from svmas in the Dwarf
5558 we're reading. Be paranoid ... */
5559 if (varp
->level
== 0) {
5560 vg_assert(pcMin
== (Addr
)0);
5561 vg_assert(pcMax
== ~(Addr
)0);
5563 /* vg_assert(pcMin > (Addr)0);
5564 No .. we can legitimately expect to see ranges like
5565 0x0-0x11D (pre-biasing, of course). */
5566 vg_assert(pcMax
< ~(Addr
)0);
5569 /* Apply text biasing, for non-global variables. */
5570 if (varp
->level
> 0) {
5571 pcMin
+= di
->text_debug_bias
;
5572 pcMax
+= di
->text_debug_bias
;
5575 if (i
> 0 && (i
%2) == 0)
5577 TRACE_D3("[%#lx,%#lx] ", pcMin
, pcMax
);
5582 varp
->name
, varp
->typeR
,
5583 varp
->gexpr
, varp
->fbGX
,
5584 varp
->fndn_ix
, varp
->fLine
, td3
5590 /* and move on to the next var */
5593 /* Now free all the TempVars */
5594 n
= VG_(sizeXA
)( tempvars
);
5595 for (i
= 0; i
< n
; i
++) {
5596 varp
= *(TempVar
**)VG_(indexXA
)( tempvars
, i
);
5597 ML_(dinfo_free
)(varp
);
5599 VG_(deleteXA
)( tempvars
);
5602 /* and the temp lookup table */
5603 VG_(deleteXA
)( dioff_lookup_tab
);
5605 /* and the ranges tree. Note that we need to also free the XArrays
5606 which constitute the keys, hence pass VG_(deleteXA) as a
5608 VG_(deleteFM
)( rangestree
, (void(*)(UWord
))VG_(deleteXA
), NULL
);
5610 /* and the tyents_to_keep cache */
5611 ML_(dinfo_free
)( tyents_to_keep_cache
);
5612 tyents_to_keep_cache
= NULL
;
5614 vg_assert( varparser
.fndn_ix_Table
== NULL
);
5616 /* And the signatured type hash. */
5617 VG_(HT_destruct
) ( signature_types
, ML_(dinfo_free
) );
5619 /* record the GExprs in di so they can be freed later */
5620 vg_assert(!di
->admin_gexprs
);
5621 di
->admin_gexprs
= gexprs
;
5624 // Free up dynamically allocated memory
5625 if (VG_(clo_read_var_info
)) {
5626 type_parser_release(&typarser
);
5627 var_parser_release(&varparser
);
5632 /*------------------------------------------------------------*/
5634 /*--- The "new" DWARF3 reader -- top level control logic ---*/
5636 /*------------------------------------------------------------*/
5638 static Bool d3rd_jmpbuf_valid
= False
;
5639 static const HChar
* d3rd_jmpbuf_reason
= NULL
;
5640 static VG_MINIMAL_JMP_BUF(d3rd_jmpbuf
);
5642 static __attribute__((noreturn
)) void barf ( const HChar
* reason
) {
5643 vg_assert(d3rd_jmpbuf_valid
);
5644 d3rd_jmpbuf_reason
= reason
;
5645 VG_MINIMAL_LONGJMP(d3rd_jmpbuf
);
5652 ML_(new_dwarf3_reader
) (
5654 DiSlice escn_debug_info
, DiSlice escn_debug_types
,
5655 DiSlice escn_debug_abbv
, DiSlice escn_debug_line
,
5656 DiSlice escn_debug_str
, DiSlice escn_debug_ranges
,
5657 DiSlice escn_debug_rnglists
, DiSlice escn_debug_loclists
,
5658 DiSlice escn_debug_loc
, DiSlice escn_debug_info_alt
,
5659 DiSlice escn_debug_abbv_alt
, DiSlice escn_debug_line_alt
,
5660 DiSlice escn_debug_str_alt
, DiSlice escn_debug_line_str
5663 volatile Int jumped
;
5664 volatile Bool td3
= di
->trace_symtab
;
5666 /* Run the _wrk function to read the dwarf3. If it succeeds, it
5667 just returns normally. If there is any failure, it longjmp's
5668 back here, having first set d3rd_jmpbuf_reason to something
5670 vg_assert(d3rd_jmpbuf_valid
== False
);
5671 vg_assert(d3rd_jmpbuf_reason
== NULL
);
5673 d3rd_jmpbuf_valid
= True
;
5674 jumped
= VG_MINIMAL_SETJMP(d3rd_jmpbuf
);
5677 new_dwarf3_reader_wrk( di
, barf
,
5678 escn_debug_info
, escn_debug_types
,
5679 escn_debug_abbv
, escn_debug_line
,
5680 escn_debug_str
, escn_debug_ranges
,
5681 escn_debug_rnglists
, escn_debug_loclists
,
5682 escn_debug_loc
, escn_debug_info_alt
,
5683 escn_debug_abbv_alt
, escn_debug_line_alt
,
5684 escn_debug_str_alt
, escn_debug_line_str
);
5685 d3rd_jmpbuf_valid
= False
;
5686 TRACE_D3("\n------ .debug_info reading was successful ------\n");
5689 d3rd_jmpbuf_valid
= False
;
5690 /* Can't longjump without giving some sort of reason. */
5691 vg_assert(d3rd_jmpbuf_reason
!= NULL
);
5693 TRACE_D3("\n------ .debug_info reading failed ------\n");
5695 ML_(symerr
)(di
, True
, d3rd_jmpbuf_reason
);
5698 d3rd_jmpbuf_valid
= False
;
5699 d3rd_jmpbuf_reason
= NULL
;
5704 /* --- Unused code fragments which might be useful one day. --- */
5707 /* Read the arange tables */
5709 TRACE_SYMTAB("\n------ The contents of .debug_arange ------\n");
5710 init_Cursor( &aranges
, debug_aranges_img
,
5711 debug_aranges_sz
, 0, barf
,
5712 "Overrun whilst reading .debug_aranges section" );
5714 ULong len
, d_i_offset
;
5717 UChar asize
, segsize
;
5719 if (is_at_end_Cursor( &aranges
))
5721 /* Read one arange thingy */
5722 /* initial_length field */
5723 len
= get_Initial_Length( &is64
, &aranges
,
5724 "in .debug_aranges: invalid initial-length field" );
5725 version
= get_UShort( &aranges
);
5726 d_i_offset
= get_Dwarfish_UWord( &aranges
, is64
);
5727 asize
= get_UChar( &aranges
);
5728 segsize
= get_UChar( &aranges
);
5729 TRACE_D3(" Length: %llu\n", len
);
5730 TRACE_D3(" Version: %d\n", (Int
)version
);
5731 TRACE_D3(" Offset into .debug_info: %llx\n", d_i_offset
);
5732 TRACE_D3(" Pointer Size: %d\n", (Int
)asize
);
5733 TRACE_D3(" Segment Size: %d\n", (Int
)segsize
);
5735 TRACE_D3(" Address Length\n");
5737 while ((get_position_of_Cursor( &aranges
) % (2 * asize
)) > 0) {
5738 (void)get_UChar( & aranges
);
5741 ULong address
= get_Dwarfish_UWord( &aranges
, asize
==8 );
5742 ULong length
= get_Dwarfish_UWord( &aranges
, asize
==8 );
5743 TRACE_D3(" 0x%016llx 0x%llx\n", address
, length
);
5744 if (address
== 0 && length
== 0) break;
5750 #endif // defined(VGO_linux) || defined(VGO_darwin) || defined(VGO_solaris)
5752 /*--------------------------------------------------------------------*/
5754 /*--------------------------------------------------------------------*/