1 /* -*- mode: C; c-basic-offset: 3; -*- */
3 /*--------------------------------------------------------------------*/
4 /*--- Read DWARF3/4 ".debug_info" sections (DIE trees). ---*/
5 /*--- readdwarf3.c ---*/
6 /*--------------------------------------------------------------------*/
9 This file is part of Valgrind, a dynamic binary instrumentation
12 Copyright (C) 2008-2017 OpenWorks LLP
15 This program is free software; you can redistribute it and/or
16 modify it under the terms of the GNU General Public License as
17 published by the Free Software Foundation; either version 2 of the
18 License, or (at your option) any later version.
20 This program is distributed in the hope that it will be useful, but
21 WITHOUT ANY WARRANTY; without even the implied warranty of
22 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
23 General Public License for more details.
25 You should have received a copy of the GNU General Public License
26 along with this program; if not, see <http://www.gnu.org/licenses/>.
28 The GNU General Public License is contained in the file COPYING.
30 Neither the names of the U.S. Department of Energy nor the
31 University of California nor the names of its contributors may be
32 used to endorse or promote products derived from this software
33 without prior written permission.
36 #if defined(VGO_linux) || defined(VGO_darwin) || defined(VGO_solaris)
38 /* REFERENCE (without which this code will not make much sense):
40 DWARF Debugging Information Format, Version 3,
41 dated 20 December 2005 (the "D3 spec").
43 Available at http://www.dwarfstd.org/Dwarf3.pdf. There's also a
44 .doc (MS Word) version, but for some reason the section numbers
45 between the Word and PDF versions differ by 1 in the first digit.
46 All section references in this code are to the PDF version.
50 DW_TAG_{const,volatile}_type no DW_AT_type is allowed; it is
51 assumed to mean "const void" or "volatile void" respectively.
52 GDB appears to interpret them like this, anyway.
54 In many cases it is important to know the svma of a CU (the "base
55 address of the CU", as the D3 spec calls it). There are some
56 situations in which the spec implies this value is unknown, but the
57 Dwarf3 produced by gcc-4.1 seems to assume is not unknown but
58 merely zero when not explicitly stated. So we too have to make
61 POTENTIAL BUG? Spotted 6 Sept 08. Why doesn't
62 unitary_range_list() bias the resulting range list in the same way
63 that its more general cousin, get_range_list(), does? I don't
68 get rid of cu_svma_known and document the assumed-zero svma hack.
70 ML_(sizeOfType): differentiate between zero sized types and types
71 for which the size is unknown. Is this important? I don't know.
73 DW_TAG_array_types: deal with explicit sizes (currently we compute
74 the size from the bounds and the element size, although that's
75 fragile, if the bounds incompletely specified, or completely
78 Document reason for difference (by 1) of stack preening depth in
79 parse_var_DIE vs parse_type_DIE.
81 Don't hand to ML_(addVars), vars whose locations are entirely in
82 registers (DW_OP_reg*). This is merely a space-saving
83 optimisation, as ML_(evaluate_Dwarf3_Expr) should handle these
84 expressions correctly, by failing to evaluate them and hence
85 effectively ignoring the variable with which they are associated.
87 Deal with DW_TAG_array_types which have element size != stride
89 In some cases, the info for a variable is split between two
90 different DIEs (generally a declarer and a definer). We punt on
91 these. Could do better here.
93 The 'data_bias' argument passed to the expression evaluator
94 (ML_(evaluate_Dwarf3_Expr)) should really be changed to a
95 MaybeUWord, to make it clear when we do vs don't know what it is
96 for the evaluation of an expression. At the moment zero is passed
97 for this parameter in the don't know case. That's a bit fragile
98 and obscure; using a MaybeUWord would be clearer.
100 POTENTIAL PERFORMANCE IMPROVEMENTS:
102 Currently, duplicate removal and all other queries for the type
103 entities array is done using cuOffset-based pointing, which
104 involves a binary search (VG_(lookupXA)) for each access. This is
105 wildly inefficient, although simple. It would be better to
106 translate all the cuOffset-based references (iow, all the "R" and
107 "Rs" fields in the TyEnts in 'tyents') to direct index numbers in
108 'tyents' right at the start of dedup_types(), and use direct
109 indexing (VG_(indexXA)) wherever possible after that.
111 cmp__XArrays_of_AddrRange is also a performance bottleneck. Move
112 VG_(indexXA) into pub_tool_xarray.h so it can be inlined at all use
113 points, and possibly also make an _UNCHECKED version which skips
114 the range checks in performance-critical situations such as this.
116 Handle interaction between read_DIE and parse_{var,type}_DIE
117 better. Currently read_DIE reads the entire DIE just to find where
118 the end is (and for debug printing), so that it can later reliably
119 move the cursor to the end regardless of what parse_{var,type}_DIE
120 do. This means many DIEs (most, even?) are read twice. It would
121 be smarter to make parse_{var,type}_DIE return a Bool indicating
122 whether or not they advanced the DIE cursor, and only if they
123 didn't should read_DIE itself read through the DIE.
125 ML_(addVar) and add_var_to_arange: quite a lot of DiAddrRanges have
126 zero variables in their .vars XArray. Rather than have an XArray
127 with zero elements (which uses 2 malloc'd blocks), allow the .vars
128 pointer to be NULL in this case.
130 More generally, reduce the amount of memory allocated and freed
131 while reading Dwarf3 type/variable information. Even modest (20MB)
132 objects cause this module to allocate and free hundreds of
133 thousands of small blocks, and ML_(arena_malloc) and its various
134 groupies always show up at the top of performance profiles. */
136 #include "pub_core_basics.h"
137 #include "pub_core_debuginfo.h"
138 #include "pub_core_libcbase.h"
139 #include "pub_core_libcassert.h"
140 #include "pub_core_libcprint.h"
141 #include "pub_core_libcsetjmp.h" // setjmp facilities
142 #include "pub_core_hashtable.h"
143 #include "pub_core_options.h"
144 #include "pub_core_tooliface.h" /* VG_(needs) */
145 #include "pub_core_xarray.h"
146 #include "pub_core_wordfm.h"
147 #include "priv_misc.h" /* dinfo_zalloc/free */
148 #include "priv_image.h"
149 #include "priv_tytypes.h"
150 #include "priv_d3basics.h"
151 #include "priv_storage.h"
152 #include "priv_readdwarf3.h" /* self */
155 /*------------------------------------------------------------*/
157 /*--- Basic machinery for parsing DIEs. ---*/
159 /*------------------------------------------------------------*/
161 #define TRACE_D3(format, args...) \
162 if (UNLIKELY(td3)) { VG_(printf)(format, ## args); }
163 #define TD3 (UNLIKELY(td3))
165 #define D3_INVALID_CUOFF ((UWord)(-1UL))
166 #define D3_FAKEVOID_CUOFF ((UWord)(-2UL))
170 DiSlice sli
; // to which this cursor applies
171 DiOffT sli_next
; // offset in underlying DiImage; must be >= sli.ioff
172 void (*barf
)( const HChar
* ) __attribute__((noreturn
));
173 const HChar
* barfstr
;
177 static inline Bool
is_sane_Cursor ( const Cursor
* c
) {
178 if (!c
) return False
;
179 if (!c
->barf
) return False
;
180 if (!c
->barfstr
) return False
;
181 if (!ML_(sli_is_valid
)(c
->sli
)) return False
;
182 if (c
->sli
.ioff
== DiOffT_INVALID
) return False
;
183 if (c
->sli_next
< c
->sli
.ioff
) return False
;
187 // Initialise a cursor from a DiSlice (ELF section, really) so as to
188 // start reading at offset |sli_initial_offset| from the start of the
190 static void init_Cursor ( /*OUT*/Cursor
* c
,
192 ULong sli_initial_offset
,
193 __attribute__((noreturn
)) void (*barf
)(const HChar
*),
194 const HChar
* barfstr
)
197 VG_(bzero_inline
)(c
, sizeof(*c
));
199 c
->sli_next
= c
->sli
.ioff
+ sli_initial_offset
;
201 c
->barfstr
= barfstr
;
202 vg_assert(is_sane_Cursor(c
));
205 static Bool
is_at_end_Cursor ( const Cursor
* c
) {
206 vg_assert(is_sane_Cursor(c
));
207 return c
->sli_next
>= c
->sli
.ioff
+ c
->sli
.szB
;
210 static inline ULong
get_position_of_Cursor ( const Cursor
* c
) {
211 vg_assert(is_sane_Cursor(c
));
212 return c
->sli_next
- c
->sli
.ioff
;
214 static inline void set_position_of_Cursor ( Cursor
* c
, ULong pos
) {
215 c
->sli_next
= c
->sli
.ioff
+ pos
;
216 vg_assert(is_sane_Cursor(c
));
218 static inline void advance_position_of_Cursor ( Cursor
* c
, ULong delta
) {
219 c
->sli_next
+= delta
;
220 vg_assert(is_sane_Cursor(c
));
223 static /*signed*/Long
get_remaining_length_Cursor ( const Cursor
* c
) {
224 vg_assert(is_sane_Cursor(c
));
225 return c
->sli
.ioff
+ c
->sli
.szB
- c
->sli_next
;
228 //static void* get_address_of_Cursor ( Cursor* c ) {
229 // vg_assert(is_sane_Cursor(c));
230 // return &c->region_start_img[ c->region_next ];
233 static DiCursor
get_DiCursor_from_Cursor ( const Cursor
* c
) {
234 return mk_DiCursor(c
->sli
.img
, c
->sli_next
);
237 /* FIXME: document assumptions on endianness for
238 get_UShort/UInt/ULong. */
239 static inline UChar
get_UChar ( Cursor
* c
) {
241 vg_assert(is_sane_Cursor(c
));
242 if (c
->sli_next
+ sizeof(UChar
) > c
->sli
.ioff
+ c
->sli
.szB
) {
247 r
= ML_(img_get_UChar
)(c
->sli
.img
, c
->sli_next
);
248 c
->sli_next
+= sizeof(UChar
);
251 static UShort
get_UShort ( Cursor
* c
) {
253 vg_assert(is_sane_Cursor(c
));
254 if (c
->sli_next
+ sizeof(UShort
) > c
->sli
.ioff
+ c
->sli
.szB
) {
259 r
= ML_(img_get_UShort
)(c
->sli
.img
, c
->sli_next
);
260 c
->sli_next
+= sizeof(UShort
);
263 static UInt
get_UInt ( Cursor
* c
) {
265 vg_assert(is_sane_Cursor(c
));
266 if (c
->sli_next
+ sizeof(UInt
) > c
->sli
.ioff
+ c
->sli
.szB
) {
271 r
= ML_(img_get_UInt
)(c
->sli
.img
, c
->sli_next
);
272 c
->sli_next
+= sizeof(UInt
);
275 static ULong
get_ULong ( Cursor
* c
) {
277 vg_assert(is_sane_Cursor(c
));
278 if (c
->sli_next
+ sizeof(ULong
) > c
->sli
.ioff
+ c
->sli
.szB
) {
283 r
= ML_(img_get_ULong
)(c
->sli
.img
, c
->sli_next
);
284 c
->sli_next
+= sizeof(ULong
);
287 static ULong
get_ULEB128 ( Cursor
* c
) {
291 /* unroll first iteration */
292 byte
= get_UChar( c
);
293 result
= (ULong
)(byte
& 0x7f);
294 if (LIKELY(!(byte
& 0x80))) return result
;
296 /* end unroll first iteration */
298 byte
= get_UChar( c
);
299 result
|= ((ULong
)(byte
& 0x7f)) << shift
;
301 } while (byte
& 0x80);
304 static Long
get_SLEB128 ( Cursor
* c
) {
310 result
|= ((ULong
)(byte
& 0x7f)) << shift
;
312 } while (byte
& 0x80);
313 if (shift
< 64 && (byte
& 0x40))
314 result
|= -(1ULL << shift
);
318 /* Assume 'c' points to the start of a string. Return a DiCursor of
319 whatever it points at, and advance it past the terminating zero.
320 This makes it safe for the caller to then copy the string with
321 ML_(addStr), since (w.r.t. image overruns) the process of advancing
322 past the terminating zero will already have "vetted" the string. */
323 static DiCursor
get_AsciiZ ( Cursor
* c
) {
325 DiCursor res
= get_DiCursor_from_Cursor(c
);
326 do { uc
= get_UChar(c
); } while (uc
!= 0);
330 static ULong
peek_ULEB128 ( Cursor
* c
) {
331 DiOffT here
= c
->sli_next
;
332 ULong r
= get_ULEB128( c
);
336 static UChar
peek_UChar ( Cursor
* c
) {
337 DiOffT here
= c
->sli_next
;
338 UChar r
= get_UChar( c
);
343 static ULong
get_Dwarfish_UWord ( Cursor
* c
, Bool is_dw64
) {
344 return is_dw64
? get_ULong(c
) : (ULong
) get_UInt(c
);
347 static UWord
get_UWord ( Cursor
* c
) {
348 vg_assert(sizeof(UWord
) == sizeof(void*));
349 if (sizeof(UWord
) == 4) return get_UInt(c
);
350 if (sizeof(UWord
) == 8) return get_ULong(c
);
354 /* Read a DWARF3 'Initial Length' field */
355 static ULong
get_Initial_Length ( /*OUT*/Bool
* is64
,
357 const HChar
* barfMsg
)
363 if (w32
>= 0xFFFFFFF0 && w32
< 0xFFFFFFFF) {
366 else if (w32
== 0xFFFFFFFF) {
368 w64
= get_ULong( c
);
377 /*------------------------------------------------------------*/
379 /*--- "CUConst" structure ---*/
381 /*------------------------------------------------------------*/
385 ULong at_name
; // Dwarf Attribute name
386 ULong at_form
; // Dwarf Attribute form
387 UInt skip_szB
; // Nr of bytes skippable from here ...
388 UInt next_nf
; // ... to reach this attr/form index in the g_abbv.nf
390 /* skip_szB and next_nf are used to optimise the skipping of uninteresting DIEs.
391 Each name_form maintains how many (fixed) nr of bytes can be skipped from
392 the beginning of this form till the next attr/form to look at.
393 The next form to look can be:
394 an 'interesting' attr/form to read while skipping a DIE
395 (currently, this is only DW_AT_sibling)
397 a variable length form which must be read to be skipped.
398 For a variable length form, the skip_szB will be equal to VARSZ_FORM.
400 Note: this technique could also be used to speed up the parsing
401 of DIEs : for each parser kind, we could have the nr of bytes
402 to skip to directly reach the interesting form(s) for the parser. */
406 struct _g_abbv
*next
; // read/write by hash table.
407 UWord abbv_code
; // key, read by hash table
411 /* Variable-length array of name/form pairs, terminated
413 The skip_szB/next_nf allows to skip efficiently a DIE
414 described by this g_abbv; */
417 /* Holds information that is constant through the parsing of a
418 Compilation Unit. This is basically plumbed through to
422 /* Call here if anything goes wrong */
423 void (*barf
)( const HChar
* ) __attribute__((noreturn
));
424 /* Is this 64-bit DWARF ? */
426 /* Which DWARF version ? (2, 3 or 4) */
428 /* Length of this Compilation Unit, as stated in the
429 .unit_length :: InitialLength field of the CU Header.
430 However, this size (as specified by the D3 spec) does not
431 include the size of the .unit_length field itself, which is
432 either 4 or 12 bytes (32-bit or 64-bit Dwarf3). That value
433 can be obtained through the expression ".is_dw64 ? 12 : 4". */
435 /* Offset of start of this unit in .debug_info */
436 UWord cu_start_offset
;
437 /* SVMA for this CU. In the D3 spec, is known as the "base
438 address of the compilation unit (last para sec 3.1.1).
439 Needed for (amongst things) interpretation of location-list
444 /* The debug_abbreviations table to be used for this Unit */
446 /* Upper bound on size thereof (an overestimate, in general) */
447 //UWord debug_abbv_maxszB;
448 /* A bounded area of the image, to be used as the
449 debug_abbreviations table tobe used for this Unit. */
452 /* Image information for various sections. */
453 DiSlice escn_debug_str
;
454 DiSlice escn_debug_ranges
;
455 DiSlice escn_debug_loc
;
456 DiSlice escn_debug_line
;
457 DiSlice escn_debug_info
;
458 DiSlice escn_debug_types
;
459 DiSlice escn_debug_info_alt
;
460 DiSlice escn_debug_str_alt
;
461 /* How much to add to .debug_types resp. alternate .debug_info offsets
463 UWord types_cuOff_bias
;
464 UWord alt_cuOff_bias
;
465 /* --- Needed so we can add stuff to the string table. --- */
466 struct _DebugInfo
* di
;
467 /* --- a hash table of g_abbv (i.e. parsed abbreviations) --- */
468 VgHashTable
*ht_abbvs
;
470 /* True if this came from .debug_types; otherwise it came from
473 /* For a unit coming from .debug_types, these hold the TU's type
474 signature and the uncooked DIE offset of the TU's signatured
475 type. For a unit coming from .debug_info, these are unused. */
476 ULong type_signature
;
479 /* Signatured type hash; computed once and then shared by all
481 VgHashTable
*signature_types
;
483 /* True if this came from alternate .debug_info; otherwise
484 it came from normal .debug_info or .debug_types. */
490 /* Return the cooked value of DIE depending on whether CC represents a
491 .debug_types unit. To cook a DIE, we pretend that the .debug_info,
492 .debug_types and optional alternate .debug_info sections form
493 a contiguous whole, so that DIEs coming from .debug_types are numbered
494 starting at the end of .debug_info and DIEs coming from alternate
495 .debug_info are numbered starting at the end of .debug_types. */
496 static UWord
cook_die( const CUConst
* cc
, UWord die
)
498 if (cc
->is_type_unit
)
499 die
+= cc
->types_cuOff_bias
;
500 else if (cc
->is_alt_info
)
501 die
+= cc
->alt_cuOff_bias
;
505 /* Like cook_die, but understand that DIEs coming from a
506 DW_FORM_ref_sig8 reference are already cooked. Also, handle
507 DW_FORM_GNU_ref_alt from within primary .debug_info or .debug_types
508 as reference to alternate .debug_info. */
509 static UWord
cook_die_using_form( const CUConst
*cc
, UWord die
, DW_FORM form
)
511 if (form
== DW_FORM_ref_sig8
)
513 if (form
== DW_FORM_GNU_ref_alt
)
514 return die
+ cc
->alt_cuOff_bias
;
515 return cook_die( cc
, die
);
518 /* Return the uncooked offset of DIE and set *TYPE_FLAG to true if the DIE
519 came from the .debug_types section and *ALT_FLAG to true if the DIE
520 came from alternate .debug_info section. */
521 static UWord
uncook_die( const CUConst
*cc
, UWord die
, /*OUT*/Bool
*type_flag
,
526 /* The use of escn_debug_{info,types}.szB seems safe to me even if
527 escn_debug_{info,types} are DiSlice_INVALID (meaning the
528 sections were not found), because DiSlice_INVALID.szB is always
529 zero. That said, it seems unlikely we'd ever get here if
530 .debug_info or .debug_types were missing. */
531 if (die
>= cc
->escn_debug_info
.szB
) {
532 if (die
>= cc
->escn_debug_info
.szB
+ cc
->escn_debug_types
.szB
) {
534 die
-= cc
->escn_debug_info
.szB
+ cc
->escn_debug_types
.szB
;
537 die
-= cc
->escn_debug_info
.szB
;
543 /*------------------------------------------------------------*/
545 /*--- Helper functions for Guarded Expressions ---*/
547 /*------------------------------------------------------------*/
549 /* Parse the location list starting at img-offset 'debug_loc_offset'
550 in .debug_loc. Results are biased with 'svma_of_referencing_CU'
551 and so I believe are correct SVMAs for the object as a whole. This
552 function allocates the UChar*, and the caller must deallocate it.
553 The resulting block is in so-called Guarded-Expression format.
555 Guarded-Expression format is similar but not identical to the DWARF3
556 location-list format. The format of each returned block is:
560 followed by zero or more of
562 (Addr aMin; Addr aMax; UShort nbytes; ..bytes..; UChar isEnd)
564 '..bytes..' is an standard DWARF3 location expression which is
565 valid when aMin <= pc <= aMax (possibly after suitable biasing).
567 The number of bytes in '..bytes..' is nbytes.
569 The end of the sequence is marked by an isEnd == 1 value. All
570 previous isEnd values must be zero.
572 biasMe is 1 if the aMin/aMax fields need this DebugInfo's
573 text_bias added before use, and 0 if the GX is this is not
574 necessary (is ready to go).
576 Hence the block can be quickly parsed and is self-describing. Note
577 that aMax is 1 less than the corresponding value in a DWARF3
578 location list. Zero length ranges, with aMax == aMin-1, are not
581 /* 2008-sept-12: moved ML_(pp_GX) from here to d3basics.c, where
582 it more logically belongs. */
585 /* Apply a text bias to a GX. */
586 static void bias_GX ( /*MOD*/GExpr
* gx
, const DebugInfo
* di
)
589 UChar
* p
= &gx
->payload
[0];
592 uc
= *p
++; /*biasMe*/
596 p
[-1] = 0; /* mark it as done */
604 ML_(write_Addr
)(pA
, ML_(read_Addr
)(pA
) + di
->text_debug_bias
);
608 ML_(write_Addr
)(pA
, ML_(read_Addr
)(pA
) + di
->text_debug_bias
);
610 /* nbytes, and actual expression */
611 nbytes
= ML_(read_UShort
)(p
); p
+= sizeof(UShort
);
616 __attribute__((noinline
))
617 static GExpr
* make_singleton_GX ( DiCursor block
, ULong nbytes
)
623 vg_assert(sizeof(UWord
) == sizeof(Addr
));
624 vg_assert(nbytes
<= 0xFFFF); /* else we overflow the nbytes field */
626 = sizeof(UChar
) /*biasMe*/ + sizeof(UChar
) /*!isEnd*/
627 + sizeof(UWord
) /*aMin*/ + sizeof(UWord
) /*aMax*/
628 + sizeof(UShort
) /*nbytes*/ + (SizeT
)nbytes
629 + sizeof(UChar
); /*isEnd*/
631 gx
= ML_(dinfo_zalloc
)( "di.readdwarf3.msGX.1",
632 sizeof(GExpr
) + bytesReqd
);
634 p
= pstart
= &gx
->payload
[0];
636 p
= ML_(write_UChar
)(p
, 0); /*biasMe*/
637 p
= ML_(write_UChar
)(p
, 0); /*!isEnd*/
638 p
= ML_(write_Addr
)(p
, 0); /*aMin*/
639 p
= ML_(write_Addr
)(p
, ~0); /*aMax*/
640 p
= ML_(write_UShort
)(p
, nbytes
); /*nbytes*/
641 ML_(cur_read_get
)(p
, block
, nbytes
); p
+= nbytes
;
642 p
= ML_(write_UChar
)(p
, 1); /*isEnd*/
644 vg_assert( (SizeT
)(p
- pstart
) == bytesReqd
);
645 vg_assert( &gx
->payload
[bytesReqd
]
646 == ((UChar
*)gx
) + sizeof(GExpr
) + bytesReqd
);
651 __attribute__((noinline
))
652 static GExpr
* make_general_GX ( const CUConst
* cc
,
654 ULong debug_loc_offset
,
655 Addr svma_of_referencing_CU
)
659 XArray
* xa
; /* XArray of UChar */
663 vg_assert(sizeof(UWord
) == sizeof(Addr
));
664 if (!ML_(sli_is_valid
)(cc
->escn_debug_loc
) || cc
->escn_debug_loc
.szB
== 0)
665 cc
->barf("make_general_GX: .debug_loc is empty/missing");
667 init_Cursor( &loc
, cc
->escn_debug_loc
, 0, cc
->barf
,
668 "Overrun whilst reading .debug_loc section(2)" );
669 set_position_of_Cursor( &loc
, debug_loc_offset
);
671 TRACE_D3("make_general_GX (.debug_loc_offset = %llu, ioff = %llu) {\n",
672 debug_loc_offset
, get_DiCursor_from_Cursor(&loc
).ioff
);
674 /* Who frees this xa? It is freed before this fn exits. */
675 xa
= VG_(newXA
)( ML_(dinfo_zalloc
), "di.readdwarf3.mgGX.1",
679 { UChar c
= 1; /*biasMe*/ VG_(addBytesToXA
)( xa
, &c
, sizeof(c
) ); }
685 /* Read a (host-)word pair. This is something of a hack since
686 the word size to read is really dictated by the ELF file;
687 however, we assume we're reading a file with the same
688 word-sizeness as the host. Reasonably enough. */
689 UWord w1
= get_UWord( &loc
);
690 UWord w2
= get_UWord( &loc
);
692 TRACE_D3(" %08lx %08lx\n", w1
, w2
);
693 if (w1
== 0 && w2
== 0)
694 break; /* end of list */
697 /* new value for 'base' */
702 /* else a location expression follows */
703 /* else enumerate [w1+base, w2+base) */
704 /* w2 is 1 past end of range, as per D3 defn for "DW_AT_high_pc"
707 TRACE_D3("negative range is for .debug_loc expr at "
708 "file offset %llu\n",
710 cc
->barf( "negative range in .debug_loc section" );
713 /* ignore zero length ranges */
715 len
= (UWord
)get_UShort( &loc
);
722 VG_(addBytesToXA
)( xa
, &c
, sizeof(c
) );
723 w
= w1
+ base
+ svma_of_referencing_CU
;
724 VG_(addBytesToXA
)( xa
, &w
, sizeof(w
) );
725 w
= w2
-1 + base
+ svma_of_referencing_CU
;
726 VG_(addBytesToXA
)( xa
, &w
, sizeof(w
) );
728 VG_(addBytesToXA
)( xa
, &s
, sizeof(s
) );
732 UChar byte
= get_UChar( &loc
);
733 TRACE_D3("%02x", (UInt
)byte
);
735 VG_(addBytesToXA
)( xa
, &byte
, 1 );
741 { UChar c
= 1; /*isEnd*/ VG_(addBytesToXA
)( xa
, &c
, sizeof(c
) ); }
743 nbytes
= VG_(sizeXA
)( xa
);
744 vg_assert(nbytes
>= 1);
746 gx
= ML_(dinfo_zalloc
)( "di.readdwarf3.mgGX.2", sizeof(GExpr
) + nbytes
);
747 VG_(memcpy
)( &gx
->payload
[0], (UChar
*)VG_(indexXA
)(xa
,0), nbytes
);
748 vg_assert( &gx
->payload
[nbytes
]
749 == ((UChar
*)gx
) + sizeof(GExpr
) + nbytes
);
759 /*------------------------------------------------------------*/
761 /*--- Helper functions for range lists and CU headers ---*/
763 /*------------------------------------------------------------*/
765 /* Denotes an address range. Both aMin and aMax are included in the
766 range; hence a complete range is (0, ~0) and an empty range is any
767 (X, X-1) for X > 0.*/
769 struct { Addr aMin
; Addr aMax
; }
773 /* Generate an arbitrary structural total ordering on
774 XArray* of AddrRange. */
775 static Word
cmp__XArrays_of_AddrRange ( const XArray
* rngs1
,
776 const XArray
* rngs2
)
779 vg_assert(rngs1
&& rngs2
);
780 n1
= VG_(sizeXA
)( rngs1
);
781 n2
= VG_(sizeXA
)( rngs2
);
782 if (n1
< n2
) return -1;
783 if (n1
> n2
) return 1;
784 for (i
= 0; i
< n1
; i
++) {
785 AddrRange
* rng1
= (AddrRange
*)VG_(indexXA
)( rngs1
, i
);
786 AddrRange
* rng2
= (AddrRange
*)VG_(indexXA
)( rngs2
, i
);
787 if (rng1
->aMin
< rng2
->aMin
) return -1;
788 if (rng1
->aMin
> rng2
->aMin
) return 1;
789 if (rng1
->aMax
< rng2
->aMax
) return -1;
790 if (rng1
->aMax
> rng2
->aMax
) return 1;
796 __attribute__((noinline
))
797 static XArray
* /* of AddrRange */ empty_range_list ( void )
799 XArray
* xa
; /* XArray of AddrRange */
800 /* Who frees this xa? varstack_preen() does. */
801 xa
= VG_(newXA
)( ML_(dinfo_zalloc
), "di.readdwarf3.erl.1",
808 __attribute__((noinline
))
809 static XArray
* unitary_range_list ( Addr aMin
, Addr aMax
)
813 vg_assert(aMin
<= aMax
);
814 /* Who frees this xa? varstack_preen() does. */
815 xa
= VG_(newXA
)( ML_(dinfo_zalloc
), "di.readdwarf3.url.1",
820 VG_(addToXA
)( xa
, &pair
);
825 /* Enumerate the address ranges starting at img-offset
826 'debug_ranges_offset' in .debug_ranges. Results are biased with
827 'svma_of_referencing_CU' and so I believe are correct SVMAs for the
828 object as a whole. This function allocates the XArray, and the
829 caller must deallocate it. */
830 __attribute__((noinline
))
831 static XArray
* /* of AddrRange */
832 get_range_list ( const CUConst
* cc
,
834 UWord debug_ranges_offset
,
835 Addr svma_of_referencing_CU
)
839 XArray
* xa
; /* XArray of AddrRange */
842 if (!ML_(sli_is_valid
)(cc
->escn_debug_ranges
)
843 || cc
->escn_debug_ranges
.szB
== 0)
844 cc
->barf("get_range_list: .debug_ranges is empty/missing");
846 init_Cursor( &ranges
, cc
->escn_debug_ranges
, 0, cc
->barf
,
847 "Overrun whilst reading .debug_ranges section(2)" );
848 set_position_of_Cursor( &ranges
, debug_ranges_offset
);
850 /* Who frees this xa? varstack_preen() does. */
851 xa
= VG_(newXA
)( ML_(dinfo_zalloc
), "di.readdwarf3.grl.1", ML_(dinfo_free
),
855 /* Read a (host-)word pair. This is something of a hack since
856 the word size to read is really dictated by the ELF file;
857 however, we assume we're reading a file with the same
858 word-sizeness as the host. Reasonably enough. */
859 UWord w1
= get_UWord( &ranges
);
860 UWord w2
= get_UWord( &ranges
);
862 if (w1
== 0 && w2
== 0)
863 break; /* end of list. */
866 /* new value for 'base' */
871 /* else enumerate [w1+base, w2+base) */
872 /* w2 is 1 past end of range, as per D3 defn for "DW_AT_high_pc"
875 cc
->barf( "negative range in .debug_ranges section" );
877 pair
.aMin
= w1
+ base
+ svma_of_referencing_CU
;
878 pair
.aMax
= w2
- 1 + base
+ svma_of_referencing_CU
;
879 vg_assert(pair
.aMin
<= pair
.aMax
);
880 VG_(addToXA
)( xa
, &pair
);
886 #define VARSZ_FORM 0xffffffff
887 static UInt
get_Form_szB (const CUConst
* cc
, DW_FORM form
);
889 /* Initialises the hash table of abbreviations.
890 We do a single scan of the abbv slice to parse and
891 build all abbreviations, for the following reasons:
892 * all or most abbreviations will be needed in any case
893 (at least for var-info reading).
894 * re-reading each time an abbreviation causes a lot of calls
896 * a CU should not have many abbreviations. */
897 static void init_ht_abbvs (CUConst
* cc
,
901 g_abbv
*ta
; // temporary abbreviation, reallocated if needed.
902 UInt ta_nf_maxE
; // max nr of pairs in ta.nf[], doubled when reallocated.
903 UInt ta_nf_n
; // nr of pairs in ta->nf that are initialised.
904 g_abbv
*ht_ta
; // abbv to insert in hash table.
907 #define SZ_G_ABBV(_nf_szE) (sizeof(g_abbv) + _nf_szE * sizeof(name_form))
909 ta_nf_maxE
= 10; // starting with enough for 9 pairs+terminating pair.
910 ta
= ML_(dinfo_zalloc
) ("di.readdwarf3.ht_ta_nf", SZ_G_ABBV(ta_nf_maxE
));
911 cc
->ht_abbvs
= VG_(HT_construct
) ("di.readdwarf3.ht_abbvs");
913 init_Cursor( &c
, cc
->debug_abbv
, 0, cc
->barf
,
914 "Overrun whilst parsing .debug_abbrev section(2)" );
916 ta
->abbv_code
= get_ULEB128( &c
);
917 if (ta
->abbv_code
== 0) break; /* end of the table */
919 ta
->atag
= get_ULEB128( &c
);
920 ta
->has_children
= get_UChar( &c
);
923 if (ta_nf_n
>= ta_nf_maxE
) {
925 ta
= ML_(dinfo_zalloc
) ("di.readdwarf3.ht_ta_nf",
926 SZ_G_ABBV(2 * ta_nf_maxE
));
927 ta_nf_maxE
= 2 * ta_nf_maxE
;
928 VG_(memcpy
) (ta
, old_ta
, SZ_G_ABBV(ta_nf_n
));
929 ML_(dinfo_free
) (old_ta
);
931 ta
->nf
[ta_nf_n
].at_name
= get_ULEB128( &c
);
932 ta
->nf
[ta_nf_n
].at_form
= get_ULEB128( &c
);
933 if (ta
->nf
[ta_nf_n
].at_name
== 0 && ta
->nf
[ta_nf_n
].at_form
== 0) {
940 // Initialises the skip_szB/next_nf elements : an element at position
941 // i must contain the sum of its own size + the sizes of all elements
942 // following i till either the next variable size element, the next
943 // sibling element or the end of the DIE.
944 ta
->nf
[ta_nf_n
- 1].skip_szB
= 0;
945 ta
->nf
[ta_nf_n
- 1].next_nf
= 0;
946 for (i
= ta_nf_n
- 2; i
>= 0; i
--) {
947 const UInt form_szB
= get_Form_szB (cc
, (DW_FORM
)ta
->nf
[i
].at_form
);
949 if (ta
->nf
[i
+1].at_name
== DW_AT_sibling
950 || ta
->nf
[i
+1].skip_szB
== VARSZ_FORM
) {
951 ta
->nf
[i
].skip_szB
= form_szB
;
952 ta
->nf
[i
].next_nf
= i
+1;
953 } else if (form_szB
== VARSZ_FORM
) {
954 ta
->nf
[i
].skip_szB
= form_szB
;
955 ta
->nf
[i
].next_nf
= i
+1;
957 ta
->nf
[i
].skip_szB
= ta
->nf
[i
+1].skip_szB
+ form_szB
;
958 ta
->nf
[i
].next_nf
= ta
->nf
[i
+1].next_nf
;
962 ht_ta
= ML_(dinfo_zalloc
) ("di.readdwarf3.ht_ta", SZ_G_ABBV(ta_nf_n
));
963 VG_(memcpy
) (ht_ta
, ta
, SZ_G_ABBV(ta_nf_n
));
964 VG_(HT_add_node
) ( cc
->ht_abbvs
, ht_ta
);
966 TRACE_D3(" Adding abbv_code %lu TAG %s [%s] nf %u ",
967 ht_ta
->abbv_code
, ML_(pp_DW_TAG
)(ht_ta
->atag
),
968 ML_(pp_DW_children
)(ht_ta
->has_children
),
971 for (i
= 0; i
< ta_nf_n
; i
++)
972 TRACE_D3("[%u,%u] ", ta
->nf
[i
].skip_szB
, ta
->nf
[i
].next_nf
);
977 ML_(dinfo_free
) (ta
);
981 static g_abbv
* get_abbv (const CUConst
* cc
, ULong abbv_code
)
985 abbv
= VG_(HT_lookup
) (cc
->ht_abbvs
, abbv_code
);
987 cc
->barf ("abbv_code not found in ht_abbvs table");
991 /* Free the memory allocated in CUConst. */
992 static void clear_CUConst (CUConst
* cc
)
994 VG_(HT_destruct
) ( cc
->ht_abbvs
, ML_(dinfo_free
));
998 /* Parse the Compilation Unit header indicated at 'c' and
999 initialise 'cc' accordingly. */
1000 static __attribute__((noinline
))
1001 void parse_CU_Header ( /*OUT*/CUConst
* cc
,
1004 DiSlice escn_debug_abbv
,
1009 ULong debug_abbrev_offset
;
1011 VG_(memset
)(cc
, 0, sizeof(*cc
));
1012 vg_assert(c
&& c
->barf
);
1015 /* initial_length field */
1017 = get_Initial_Length( &cc
->is_dw64
, c
,
1018 "parse_CU_Header: invalid initial-length field" );
1020 TRACE_D3(" Length: %llu\n", cc
->unit_length
);
1023 cc
->version
= get_UShort( c
);
1024 if (cc
->version
!= 2 && cc
->version
!= 3 && cc
->version
!= 4)
1025 cc
->barf( "parse_CU_Header: is neither DWARF2 nor DWARF3 nor DWARF4" );
1026 TRACE_D3(" Version: %d\n", (Int
)cc
->version
);
1028 /* debug_abbrev_offset */
1029 debug_abbrev_offset
= get_Dwarfish_UWord( c
, cc
->is_dw64
);
1030 if (debug_abbrev_offset
>= escn_debug_abbv
.szB
)
1031 cc
->barf( "parse_CU_Header: invalid debug_abbrev_offset" );
1032 TRACE_D3(" Abbrev Offset: %llu\n", debug_abbrev_offset
);
1034 /* address size. If this isn't equal to the host word size, just
1035 give up. This makes it safe to assume elsewhere that
1036 DW_FORM_addr and DW_FORM_ref_addr can be treated as a host
1038 address_size
= get_UChar( c
);
1039 if (address_size
!= sizeof(void*))
1040 cc
->barf( "parse_CU_Header: invalid address_size" );
1041 TRACE_D3(" Pointer Size: %d\n", (Int
)address_size
);
1043 cc
->is_type_unit
= type_unit
;
1044 cc
->is_alt_info
= alt_info
;
1047 cc
->type_signature
= get_ULong( c
);
1048 cc
->type_offset
= get_Dwarfish_UWord( c
, cc
->is_dw64
);
1051 /* Set up cc->debug_abbv to point to the relevant table for this
1052 CU. Set its .szB so that at least we can't read off the end of
1053 the debug_abbrev section -- potentially (and quite likely) too
1054 big, if this isn't the last table in the section, but at least
1057 This amounts to taking debug_abbv_escn and moving the start
1058 position along by debug_abbrev_offset bytes, hence forming a
1059 smaller DiSlice which has the same end point. Since we checked
1060 just above that debug_abbrev_offset is less than the size of
1061 debug_abbv_escn, this should leave us with a nonempty slice. */
1062 vg_assert(debug_abbrev_offset
< escn_debug_abbv
.szB
);
1063 cc
->debug_abbv
= escn_debug_abbv
;
1064 cc
->debug_abbv
.ioff
+= debug_abbrev_offset
;
1065 cc
->debug_abbv
.szB
-= debug_abbrev_offset
;
1067 init_ht_abbvs(cc
, td3
);
1070 /* This represents a single signatured type. It maps a type signature
1071 (a ULong) to a cooked DIE offset. Objects of this type are stored
1072 in the type signature hash table. */
1074 struct D3SignatureType
{
1075 struct D3SignatureType
*next
;
1077 ULong type_signature
;
1082 /* Record a signatured type in the hash table. */
1083 static void record_signatured_type ( VgHashTable
*tab
,
1084 ULong type_signature
,
1087 D3SignatureType
*dstype
= ML_(dinfo_zalloc
) ( "di.readdwarf3.sigtype",
1088 sizeof(D3SignatureType
) );
1089 dstype
->data
= (UWord
) type_signature
;
1090 dstype
->type_signature
= type_signature
;
1092 VG_(HT_add_node
) ( tab
, dstype
);
1095 /* Given a type signature hash table and a type signature, return the
1096 cooked DIE offset of the type. If the type cannot be found, call
1098 static UWord
lookup_signatured_type ( const VgHashTable
*tab
,
1099 ULong type_signature
,
1100 void (*barf
)( const HChar
* ) __attribute__((noreturn
)) )
1102 D3SignatureType
*dstype
= VG_(HT_lookup
) ( tab
, (UWord
) type_signature
);
1103 /* This may be unwarranted chumminess with the hash table
1105 while ( dstype
!= NULL
&& dstype
->type_signature
!= type_signature
)
1106 dstype
= dstype
->next
;
1107 if (dstype
== NULL
) {
1108 barf("lookup_signatured_type: could not find signatured type");
1116 /* Represents Form data. If szB is 1/2/4/8 then the result is in the
1117 lowest 1/2/4/8 bytes of u.val. If szB is zero or negative then the
1118 result is an image section beginning at u.cur and with size -szB.
1119 No other szB values are allowed. */
1122 Long szB
; // 1, 2, 4, 8 or non-positive values only.
1123 union { ULong val
; DiCursor cur
; } u
;
1127 /* From 'c', get the Form data into 'cts'. Either it gets a 1/2/4/8
1128 byte scalar value, or (a reference to) zero or more bytes starting
1131 void get_Form_contents ( /*OUT*/FormContents
* cts
,
1132 const CUConst
* cc
, Cursor
* c
,
1133 Bool td3
, DW_FORM form
)
1135 VG_(bzero_inline
)(cts
, sizeof(*cts
));
1136 // !!! keep switch in sync with get_Form_szB. The nr of characters read below
1137 // must be computed similarly in get_Form_szB.
1138 // The consistency is verified in trace_DIE.
1141 cts
->u
.val
= (ULong
)(UChar
)get_UChar(c
);
1143 TRACE_D3("%u", (UInt
)cts
->u
.val
);
1146 cts
->u
.val
= (ULong
)(UShort
)get_UShort(c
);
1148 TRACE_D3("%u", (UInt
)cts
->u
.val
);
1151 cts
->u
.val
= (ULong
)(UInt
)get_UInt(c
);
1153 TRACE_D3("%u", (UInt
)cts
->u
.val
);
1156 cts
->u
.val
= get_ULong(c
);
1158 TRACE_D3("%llu", cts
->u
.val
);
1160 case DW_FORM_sec_offset
:
1161 cts
->u
.val
= (ULong
)get_Dwarfish_UWord( c
, cc
->is_dw64
);
1162 cts
->szB
= cc
->is_dw64
? 8 : 4;
1163 TRACE_D3("%llu", cts
->u
.val
);
1166 cts
->u
.val
= (ULong
)(Long
)get_SLEB128(c
);
1168 TRACE_D3("%llu", cts
->u
.val
);
1171 cts
->u
.val
= (ULong
)(Long
)get_ULEB128(c
);
1173 TRACE_D3("%llu", cts
->u
.val
);
1176 /* note, this is a hack. DW_FORM_addr is defined as getting
1177 a word the size of the target machine as defined by the
1178 address_size field in the CU Header. However,
1179 parse_CU_Header() rejects all inputs except those for
1180 which address_size == sizeof(Word), hence we can just
1181 treat it as a (host) Word. */
1182 cts
->u
.val
= (ULong
)(UWord
)get_UWord(c
);
1183 cts
->szB
= sizeof(UWord
);
1184 TRACE_D3("0x%lx", (UWord
)cts
->u
.val
);
1187 case DW_FORM_ref_addr
:
1188 /* We make the same word-size assumption as DW_FORM_addr. */
1189 /* What does this really mean? From D3 Sec 7.5.4,
1190 description of "reference", it would appear to reference
1191 some other DIE, by specifying the offset from the
1192 beginning of a .debug_info section. The D3 spec mentions
1193 that this might be in some other shared object and
1194 executable. But I don't see how the name of the other
1195 object/exe is specified.
1197 At least for the DW_FORM_ref_addrs created by icc11, the
1198 references seem to be within the same object/executable.
1199 So for the moment we merely range-check, to see that they
1200 actually do specify a plausible offset within this
1201 object's .debug_info, and return the value unchanged.
1203 In DWARF 2, DW_FORM_ref_addr is address-sized, but in
1204 DWARF 3 and later, it is offset-sized.
1206 if (cc
->version
== 2) {
1207 cts
->u
.val
= (ULong
)(UWord
)get_UWord(c
);
1208 cts
->szB
= sizeof(UWord
);
1210 cts
->u
.val
= get_Dwarfish_UWord(c
, cc
->is_dw64
);
1211 cts
->szB
= cc
->is_dw64
? sizeof(ULong
) : sizeof(UInt
);
1213 TRACE_D3("0x%lx", (UWord
)cts
->u
.val
);
1214 if (0) VG_(printf
)("DW_FORM_ref_addr 0x%lx\n", (UWord
)cts
->u
.val
);
1215 if (/* the following is surely impossible, but ... */
1216 !ML_(sli_is_valid
)(cc
->escn_debug_info
)
1217 || cts
->u
.val
>= (ULong
)cc
->escn_debug_info
.szB
) {
1218 /* Hmm. Offset is nonsensical for this object's .debug_info
1219 section. Be safe and reject it. */
1220 cc
->barf("get_Form_contents: DW_FORM_ref_addr points "
1221 "outside .debug_info");
1225 case DW_FORM_strp
: {
1226 /* this is an offset into .debug_str */
1227 UWord uw
= (UWord
)get_Dwarfish_UWord( c
, cc
->is_dw64
);
1228 if (!ML_(sli_is_valid
)(cc
->escn_debug_str
)
1229 || uw
>= cc
->escn_debug_str
.szB
)
1230 cc
->barf("get_Form_contents: DW_FORM_strp "
1231 "points outside .debug_str");
1232 /* FIXME: check the entire string lies inside debug_str,
1233 not just the first byte of it. */
1235 = ML_(cur_plus
)( ML_(cur_from_sli
)(cc
->escn_debug_str
), uw
);
1237 HChar
* tmp
= ML_(cur_read_strdup
)(str
, "di.getFC.1");
1238 TRACE_D3("(indirect string, offset: 0x%lx): %s", uw
, tmp
);
1239 ML_(dinfo_free
)(tmp
);
1242 cts
->szB
= - (Long
)(1 + (ULong
)ML_(cur_strlen
)(str
));
1245 case DW_FORM_string
: {
1246 DiCursor str
= get_AsciiZ(c
);
1248 HChar
* tmp
= ML_(cur_read_strdup
)(str
, "di.getFC.2");
1249 TRACE_D3("%s", tmp
);
1250 ML_(dinfo_free
)(tmp
);
1253 /* strlen is safe because get_AsciiZ already 'vetted' the
1255 cts
->szB
= - (Long
)(1 + (ULong
)ML_(cur_strlen
)(str
));
1258 case DW_FORM_ref1
: {
1259 UChar u8
= get_UChar(c
);
1260 UWord res
= cc
->cu_start_offset
+ (UWord
)u8
;
1261 cts
->u
.val
= (ULong
)res
;
1262 cts
->szB
= sizeof(UWord
);
1263 TRACE_D3("<%lx>", res
);
1266 case DW_FORM_ref2
: {
1267 UShort u16
= get_UShort(c
);
1268 UWord res
= cc
->cu_start_offset
+ (UWord
)u16
;
1269 cts
->u
.val
= (ULong
)res
;
1270 cts
->szB
= sizeof(UWord
);
1271 TRACE_D3("<%lx>", res
);
1274 case DW_FORM_ref4
: {
1275 UInt u32
= get_UInt(c
);
1276 UWord res
= cc
->cu_start_offset
+ (UWord
)u32
;
1277 cts
->u
.val
= (ULong
)res
;
1278 cts
->szB
= sizeof(UWord
);
1279 TRACE_D3("<%lx>", res
);
1282 case DW_FORM_ref8
: {
1283 ULong u64
= get_ULong(c
);
1284 UWord res
= cc
->cu_start_offset
+ (UWord
)u64
;
1285 cts
->u
.val
= (ULong
)res
;
1286 cts
->szB
= sizeof(UWord
);
1287 TRACE_D3("<%lx>", res
);
1290 case DW_FORM_ref_udata
: {
1291 ULong u64
= get_ULEB128(c
);
1292 UWord res
= cc
->cu_start_offset
+ (UWord
)u64
;
1293 cts
->u
.val
= (ULong
)res
;
1294 cts
->szB
= sizeof(UWord
);
1295 TRACE_D3("<%lx>", res
);
1298 case DW_FORM_flag
: {
1299 UChar u8
= get_UChar(c
);
1300 TRACE_D3("%u", (UInt
)u8
);
1301 cts
->u
.val
= (ULong
)u8
;
1305 case DW_FORM_flag_present
:
1310 case DW_FORM_block1
: {
1312 ULong u64
= (ULong
)get_UChar(c
);
1313 DiCursor block
= get_DiCursor_from_Cursor(c
);
1314 TRACE_D3("%llu byte block: ", u64
);
1315 for (u64b
= u64
; u64b
> 0; u64b
--) {
1316 UChar u8
= get_UChar(c
);
1317 TRACE_D3("%x ", (UInt
)u8
);
1320 cts
->szB
= - (Long
)u64
;
1323 case DW_FORM_block2
: {
1325 ULong u64
= (ULong
)get_UShort(c
);
1326 DiCursor block
= get_DiCursor_from_Cursor(c
);
1327 TRACE_D3("%llu byte block: ", u64
);
1328 for (u64b
= u64
; u64b
> 0; u64b
--) {
1329 UChar u8
= get_UChar(c
);
1330 TRACE_D3("%x ", (UInt
)u8
);
1333 cts
->szB
= - (Long
)u64
;
1336 case DW_FORM_block4
: {
1338 ULong u64
= (ULong
)get_UInt(c
);
1339 DiCursor block
= get_DiCursor_from_Cursor(c
);
1340 TRACE_D3("%llu byte block: ", u64
);
1341 for (u64b
= u64
; u64b
> 0; u64b
--) {
1342 UChar u8
= get_UChar(c
);
1343 TRACE_D3("%x ", (UInt
)u8
);
1346 cts
->szB
= - (Long
)u64
;
1349 case DW_FORM_exprloc
:
1350 case DW_FORM_block
: {
1352 ULong u64
= (ULong
)get_ULEB128(c
);
1353 DiCursor block
= get_DiCursor_from_Cursor(c
);
1354 TRACE_D3("%llu byte block: ", u64
);
1355 for (u64b
= u64
; u64b
> 0; u64b
--) {
1356 UChar u8
= get_UChar(c
);
1357 TRACE_D3("%x ", (UInt
)u8
);
1360 cts
->szB
= - (Long
)u64
;
1363 case DW_FORM_ref_sig8
: {
1365 ULong signature
= get_ULong (c
);
1366 ULong work
= signature
;
1367 TRACE_D3("8 byte signature: ");
1368 for (u64b
= 8; u64b
> 0; u64b
--) {
1369 UChar u8
= work
& 0xff;
1370 TRACE_D3("%x ", (UInt
)u8
);
1374 /* cc->signature_types is only built/initialised when
1375 VG_(clo_read_var_info) is set. In this case,
1376 the DW_FORM_ref_sig8 can be looked up.
1377 But we can also arrive here when only reading inline info
1378 and VG_(clo_trace_symtab) is set. In such a case,
1379 we cannot lookup the DW_FORM_ref_sig8, we rather assign
1380 a dummy value. This is a kludge, but otherwise,
1381 the 'dwarf inline info reader' tracing would have to
1382 do type processing/reading. It is better to avoid
1383 adding significant 'real' processing only due to tracing. */
1384 if (VG_(clo_read_var_info
)) {
1385 /* Due to the way that the hash table is constructed, the
1386 resulting DIE offset here is already "cooked". See
1387 cook_die_using_form. */
1388 cts
->u
.val
= lookup_signatured_type (cc
->signature_types
, signature
,
1392 vg_assert (VG_(clo_read_inline_info
));
1393 TRACE_D3("<not dereferencing signature type>");
1394 cts
->u
.val
= 0; /* Assign a dummy/rubbish value */
1396 cts
->szB
= sizeof(UWord
);
1399 case DW_FORM_indirect
:
1400 get_Form_contents (cts
, cc
, c
, td3
, (DW_FORM
)get_ULEB128(c
));
1403 case DW_FORM_GNU_ref_alt
:
1404 cts
->u
.val
= get_Dwarfish_UWord(c
, cc
->is_dw64
);
1405 cts
->szB
= cc
->is_dw64
? sizeof(ULong
) : sizeof(UInt
);
1406 TRACE_D3("0x%lx", (UWord
)cts
->u
.val
);
1407 if (0) VG_(printf
)("DW_FORM_GNU_ref_alt 0x%lx\n", (UWord
)cts
->u
.val
);
1408 if (/* the following is surely impossible, but ... */
1409 !ML_(sli_is_valid
)(cc
->escn_debug_info_alt
))
1410 cc
->barf("get_Form_contents: DW_FORM_GNU_ref_addr used, "
1411 "but no alternate .debug_info");
1412 else if (cts
->u
.val
>= (ULong
)cc
->escn_debug_info_alt
.szB
) {
1413 /* Hmm. Offset is nonsensical for this object's .debug_info
1414 section. Be safe and reject it. */
1415 cc
->barf("get_Form_contents: DW_FORM_GNU_ref_addr points "
1416 "outside alternate .debug_info");
1420 case DW_FORM_GNU_strp_alt
: {
1421 /* this is an offset into alternate .debug_str */
1422 SizeT uw
= (UWord
)get_Dwarfish_UWord( c
, cc
->is_dw64
);
1423 if (!ML_(sli_is_valid
)(cc
->escn_debug_str_alt
))
1424 cc
->barf("get_Form_contents: DW_FORM_GNU_strp_alt used, "
1425 "but no alternate .debug_str");
1426 else if (uw
>= cc
->escn_debug_str_alt
.szB
)
1427 cc
->barf("get_Form_contents: DW_FORM_GNU_strp_alt "
1428 "points outside alternate .debug_str");
1429 /* FIXME: check the entire string lies inside debug_str,
1430 not just the first byte of it. */
1432 = ML_(cur_plus
)( ML_(cur_from_sli
)(cc
->escn_debug_str_alt
), uw
);
1434 HChar
* tmp
= ML_(cur_read_strdup
)(str
, "di.getFC.3");
1435 TRACE_D3("(indirect alt string, offset: 0x%lx): %s", uw
, tmp
);
1436 ML_(dinfo_free
)(tmp
);
1439 cts
->szB
= - (Long
)(1 + (ULong
)ML_(cur_strlen
)(str
));
1445 "get_Form_contents: unhandled %u (%s) at <%llx>\n",
1446 form
, ML_(pp_DW_FORM
)(form
), get_position_of_Cursor(c
));
1447 c
->barf("get_Form_contents: unhandled DW_FORM");
1451 static inline UInt
sizeof_Dwarfish_UWord (Bool is_dw64
)
1454 return sizeof(ULong
);
1456 return sizeof(UInt
);
1459 #define VARSZ_FORM 0xffffffff
1460 /* If the form is a fixed length form, return the nr of bytes for this form.
1461 If the form is a variable length form, return VARSZ_FORM. */
1463 UInt
get_Form_szB (const CUConst
* cc
, DW_FORM form
)
1465 // !!! keep switch in sync with get_Form_contents : the nr of bytes
1466 // read from a cursor by get_Form_contents must be returned by
1467 // the below switch.
1468 // The consistency is verified in trace_DIE.
1470 case DW_FORM_data1
: return 1;
1471 case DW_FORM_data2
: return 2;
1472 case DW_FORM_data4
: return 4;
1473 case DW_FORM_data8
: return 8;
1474 case DW_FORM_sec_offset
:
1483 case DW_FORM_addr
: // See hack in get_Form_contents
1484 return sizeof(UWord
);
1485 case DW_FORM_ref_addr
: // See hack in get_Form_contents
1486 if (cc
->version
== 2)
1487 return sizeof(UWord
);
1489 return sizeof_Dwarfish_UWord (cc
->is_dw64
);
1491 return sizeof_Dwarfish_UWord (cc
->is_dw64
);
1492 case DW_FORM_string
:
1502 case DW_FORM_ref_udata
:
1506 case DW_FORM_flag_present
:
1507 return 0; // !!! special case, no data.
1508 case DW_FORM_block1
:
1510 case DW_FORM_block2
:
1512 case DW_FORM_block4
:
1514 case DW_FORM_exprloc
:
1517 case DW_FORM_ref_sig8
:
1519 case DW_FORM_indirect
:
1521 case DW_FORM_GNU_ref_alt
:
1522 return sizeof_Dwarfish_UWord(cc
->is_dw64
);
1523 case DW_FORM_GNU_strp_alt
:
1524 return sizeof_Dwarfish_UWord(cc
->is_dw64
);
1527 "get_Form_szB: unhandled %u (%s)\n",
1528 form
, ML_(pp_DW_FORM
)(form
));
1529 cc
->barf("get_Form_contents: unhandled DW_FORM");
1533 /* Skip a DIE as described by abbv.
1534 If the DIE has a sibling, *sibling is set to the skipped DIE sibling value. */
1536 void skip_DIE (UWord
*sibling
,
1545 if (abbv
->nf
[nf_i
].at_name
== DW_AT_sibling
) {
1546 get_Form_contents( &cts
, cc
, c_die
, False
/*td3*/,
1547 (DW_FORM
)abbv
->nf
[nf_i
].at_form
);
1549 *sibling
= cts
.u
.val
;
1551 } else if (abbv
->nf
[nf_i
].skip_szB
== VARSZ_FORM
) {
1552 get_Form_contents( &cts
, cc
, c_die
, False
/*td3*/,
1553 (DW_FORM
)abbv
->nf
[nf_i
].at_form
);
1556 advance_position_of_Cursor (c_die
, (ULong
)abbv
->nf
[nf_i
].skip_szB
);
1557 nf_i
= abbv
->nf
[nf_i
].next_nf
;
1565 /*------------------------------------------------------------*/
1567 /*--- Parsing of variable-related DIEs ---*/
1569 /*------------------------------------------------------------*/
1573 const HChar
* name
; /* in DebugInfo's .strpool */
1574 /* Represent ranges economically. nRanges is the number of
1576 0: .rngOneMin .rngOneMax .manyRanges are all zero
1577 1: .rngOneMin .rngOneMax hold the range; .rngMany is NULL
1578 2: .rngOneMin .rngOneMax are zero; .rngMany holds the ranges.
1579 This is merely an optimisation to avoid having to allocate
1580 and free the XArray in the common (98%) of cases where there
1581 is zero or one address ranges. */
1585 XArray
* rngMany
; /* of AddrRange. NON-UNIQUE PTR in AR_DINFO. */
1586 /* Do not free .rngMany, since many TempVars will have the same
1587 value. Instead the associated storage is to be freed by
1588 deleting 'rangetree', which stores a single copy of each
1592 UWord typeR
; /* a cuOff */
1593 GExpr
* gexpr
; /* for this variable */
1594 GExpr
* fbGX
; /* to find the frame base of the enclosing fn, if
1596 UInt fndn_ix
; /* declaring file/dirname index in fndnpool, or 0 */
1597 Int fLine
; /* declaring file line number, or zero */
1598 /* offset in .debug_info, so that abstract instances can be
1599 found to satisfy references from concrete instances. */
1601 UWord absOri
; /* so the absOri fields refer to dioff fields
1602 in some other, related TempVar. */
1608 /* Contains the range stack: a stack of address ranges, one
1609 stack entry for each nested scope.
1611 Some scope entries are created by function definitions
1612 (DW_AT_subprogram), and for those, we also note the GExpr
1613 derived from its DW_AT_frame_base attribute, if any.
1614 Consequently it should be possible to find, for any
1615 variable's DIE, the GExpr for the containing function's
1616 DW_AT_frame_base by scanning back through the stack to find
1617 the nearest entry associated with a function. This somewhat
1618 elaborate scheme is provided so as to make it possible to
1619 obtain the correct DW_AT_frame_base expression even in the
1620 presence of nested functions (or to be more precise, in the
1621 presence of nested DW_AT_subprogram DIEs).
1623 Int sp
; /* [sp] is innermost active entry; sp==-1 for empty
1626 XArray
**ranges
; /* XArray of AddrRange */
1627 Int
*level
; /* D3 DIE levels */
1628 Bool
*isFunc
; /* from DW_AT_subprogram? */
1629 GExpr
**fbGX
; /* if isFunc, contains the FB expr, else NULL */
1630 /* The fndn_ix file name/dirname table. Is a mapping from dwarf
1631 integer index to the index in di->fndnpool. */
1632 XArray
* /* of UInt* */ fndn_ix_Table
;
1636 /* Completely initialise a variable parser object */
1638 var_parser_init ( D3VarParser
*parser
)
1641 parser
->stack_size
= 0;
1642 parser
->ranges
= NULL
;
1643 parser
->level
= NULL
;
1644 parser
->isFunc
= NULL
;
1645 parser
->fbGX
= NULL
;
1646 parser
->fndn_ix_Table
= NULL
;
1649 /* Release any memory hanging off a variable parser object */
1651 var_parser_release ( D3VarParser
*parser
)
1653 ML_(dinfo_free
)( parser
->ranges
);
1654 ML_(dinfo_free
)( parser
->level
);
1655 ML_(dinfo_free
)( parser
->isFunc
);
1656 ML_(dinfo_free
)( parser
->fbGX
);
1659 static void varstack_show ( const D3VarParser
* parser
, const HChar
* str
)
1662 VG_(printf
)(" varstack (%s) {\n", str
);
1663 for (i
= 0; i
<= parser
->sp
; i
++) {
1664 XArray
* xa
= parser
->ranges
[i
];
1666 VG_(printf
)(" [%ld] (level %d)", i
, parser
->level
[i
]);
1667 if (parser
->isFunc
[i
]) {
1668 VG_(printf
)(" (fbGX=%p)", parser
->fbGX
[i
]);
1670 vg_assert(parser
->fbGX
[i
] == NULL
);
1673 if (VG_(sizeXA
)( xa
) == 0) {
1674 VG_(printf
)("** empty PC range array **");
1676 for (j
= 0; j
< VG_(sizeXA
)( xa
); j
++) {
1677 AddrRange
* range
= (AddrRange
*) VG_(indexXA
)( xa
, j
);
1679 VG_(printf
)("[%#lx,%#lx] ", range
->aMin
, range
->aMax
);
1684 VG_(printf
)(" }\n");
1687 /* Remove from the stack, all entries with .level > 'level' */
1689 void varstack_preen ( D3VarParser
* parser
, Bool td3
, Int level
)
1691 Bool changed
= False
;
1692 vg_assert(parser
->sp
< parser
->stack_size
);
1694 vg_assert(parser
->sp
>= -1);
1695 if (parser
->sp
== -1) break;
1696 if (parser
->level
[parser
->sp
] <= level
) break;
1698 TRACE_D3("BBBBAAAA varstack_pop [newsp=%d]\n", parser
->sp
-1);
1699 vg_assert(parser
->ranges
[parser
->sp
]);
1700 /* Who allocated this xa? get_range_list() or
1701 unitary_range_list(). */
1702 VG_(deleteXA
)( parser
->ranges
[parser
->sp
] );
1707 varstack_show( parser
, "after preen" );
1710 static void varstack_push ( const CUConst
* cc
,
1711 D3VarParser
* parser
,
1713 XArray
* ranges
, Int level
,
1714 Bool isFunc
, GExpr
* fbGX
) {
1716 TRACE_D3("BBBBAAAA varstack_push[newsp=%d]: %d %p\n",
1717 parser
->sp
+1, level
, ranges
);
1719 /* First we need to zap everything >= 'level', as we are about to
1720 replace any previous entry at 'level', so .. */
1721 varstack_preen(parser
, /*td3*/False
, level
-1);
1723 vg_assert(parser
->sp
>= -1);
1724 vg_assert(parser
->sp
< parser
->stack_size
);
1725 if (parser
->sp
== parser
->stack_size
- 1) {
1726 parser
->stack_size
+= 48;
1728 ML_(dinfo_realloc
)("di.readdwarf3.varpush.1", parser
->ranges
,
1729 parser
->stack_size
* sizeof parser
->ranges
[0]);
1731 ML_(dinfo_realloc
)("di.readdwarf3.varpush.2", parser
->level
,
1732 parser
->stack_size
* sizeof parser
->level
[0]);
1734 ML_(dinfo_realloc
)("di.readdwarf3.varpush.3", parser
->isFunc
,
1735 parser
->stack_size
* sizeof parser
->isFunc
[0]);
1737 ML_(dinfo_realloc
)("di.readdwarf3.varpush.4", parser
->fbGX
,
1738 parser
->stack_size
* sizeof parser
->fbGX
[0]);
1740 if (parser
->sp
>= 0)
1741 vg_assert(parser
->level
[parser
->sp
] < level
);
1743 vg_assert(ranges
!= NULL
);
1744 if (!isFunc
) vg_assert(fbGX
== NULL
);
1745 parser
->ranges
[parser
->sp
] = ranges
;
1746 parser
->level
[parser
->sp
] = level
;
1747 parser
->isFunc
[parser
->sp
] = isFunc
;
1748 parser
->fbGX
[parser
->sp
] = fbGX
;
1750 varstack_show( parser
, "after push" );
1754 /* cts is derived from a DW_AT_location and so refers either to a
1755 location expression or to a location list. Figure out which, and
1756 in both cases bundle the expression or location list into a
1757 so-called GExpr (guarded expression). */
1758 __attribute__((noinline
))
1759 static GExpr
* get_GX ( const CUConst
* cc
, Bool td3
, const FormContents
* cts
)
1761 GExpr
* gexpr
= NULL
;
1763 /* represents a non-empty in-line location expression, and
1764 cts->u.cur points at the image bytes */
1765 gexpr
= make_singleton_GX( cts
->u
.cur
, (ULong
)(- cts
->szB
) );
1769 /* represents a location list. cts->u.val is the offset of it
1771 if (!cc
->cu_svma_known
)
1772 cc
->barf("get_GX: location list, but CU svma is unknown");
1773 gexpr
= make_general_GX( cc
, td3
, cts
->u
.val
, cc
->cu_svma
);
1776 vg_assert(0); /* else caller is bogus */
1781 /* Returns an xarray* of directory names (indexed by the dwarf dirname
1783 If 'compdir' is NULL, entry [0] will be set to "."
1784 otherwise entry [0] is set to compdir.
1785 Entry [0] basically means "the current directory of the compilation",
1786 whatever that means, according to the DWARF3 spec.
1787 FIXME??? readdwarf3.c/readdwarf.c have a lot of duplicated code */
1789 XArray
* read_dirname_xa (DebugInfo
* di
, const HChar
*compdir
,
1793 XArray
* dirname_xa
; /* xarray of HChar* dirname */
1794 const HChar
* dirname
;
1797 dirname_xa
= VG_(newXA
) (ML_(dinfo_zalloc
), "di.rdxa.1", ML_(dinfo_free
),
1800 if (compdir
== NULL
) {
1805 compdir_len
= VG_(strlen
)(compdir
);
1807 VG_(addToXA
) (dirname_xa
, &dirname
);
1809 TRACE_D3(" The Directory Table%s\n",
1810 peek_UChar(c
) == 0 ? " is empty." : ":" );
1812 while (peek_UChar(c
) != 0) {
1814 DiCursor cur
= get_AsciiZ(c
);
1815 HChar
* data_str
= ML_(cur_read_strdup
)( cur
, "dirname_xa.1" );
1816 TRACE_D3(" %s\n", data_str
);
1818 /* If data_str[0] is '/', then 'data' is an absolute path and we
1819 don't mess with it. Otherwise, construct the
1820 path 'compdir' ++ "/" ++ 'data'. */
1822 if (data_str
[0] != '/'
1823 /* not an absolute path */
1825 /* actually got something sensible for compdir */
1828 SizeT len
= compdir_len
+ 1 + VG_(strlen
)(data_str
);
1829 HChar
*buf
= ML_(dinfo_zalloc
)("dirname_xa.2", len
+ 1);
1831 VG_(strcpy
)(buf
, compdir
);
1832 VG_(strcat
)(buf
, "/");
1833 VG_(strcat
)(buf
, data_str
);
1835 dirname
= ML_(addStr
)(di
, buf
, len
);
1836 VG_(addToXA
) (dirname_xa
, &dirname
);
1837 if (0) VG_(printf
)("rel path %s\n", buf
);
1838 ML_(dinfo_free
)(buf
);
1840 /* just use 'data'. */
1841 dirname
= ML_(addStr
)(di
,data_str
,-1);
1842 VG_(addToXA
) (dirname_xa
, &dirname
);
1843 if (0) VG_(printf
)("abs path %s\n", data_str
);
1846 ML_(dinfo_free
)(data_str
);
1851 if (get_UChar (c
) != 0) {
1852 ML_(symerr
)(NULL
, True
,
1853 "could not get NUL at end of DWARF directory table");
1854 VG_(deleteXA
)(dirname_xa
);
1862 void read_filename_table( /*MOD*/XArray
* /* of UInt* */ fndn_ix_Table
,
1863 const HChar
* compdir
,
1864 const CUConst
* cc
, ULong debug_line_offset
,
1873 XArray
* dirname_xa
; /* xarray of HChar* dirname */
1874 ULong dir_xa_ix
; /* Index in dirname_xa, as read from dwarf info. */
1875 const HChar
* dirname
;
1878 vg_assert(fndn_ix_Table
&& cc
&& cc
->barf
);
1879 if (!ML_(sli_is_valid
)(cc
->escn_debug_line
)
1880 || cc
->escn_debug_line
.szB
<= debug_line_offset
) {
1881 cc
->barf("read_filename_table: .debug_line is missing?");
1884 init_Cursor( &c
, cc
->escn_debug_line
, debug_line_offset
, cc
->barf
,
1885 "Overrun whilst reading .debug_line section(1)" );
1888 get_Initial_Length( &is_dw64
, &c
,
1889 "read_filename_table: invalid initial-length field" );
1890 version
= get_UShort( &c
);
1891 if (version
!= 2 && version
!= 3 && version
!= 4)
1892 cc
->barf("read_filename_table: Only DWARF version 2, 3 and 4 line info "
1893 "is currently supported.");
1894 /*header_length = (ULong)*/ get_Dwarfish_UWord( &c
, is_dw64
);
1895 /*minimum_instruction_length = */ get_UChar( &c
);
1897 /*maximum_operations_per_insn = */ get_UChar( &c
);
1898 /*default_is_stmt = */ get_UChar( &c
);
1899 /*line_base = (Char)*/ get_UChar( &c
);
1900 /*line_range = */ get_UChar( &c
);
1901 opcode_base
= get_UChar( &c
);
1902 /* skip over "standard_opcode_lengths" */
1903 for (i
= 1; i
< (Word
)opcode_base
; i
++)
1904 (void)get_UChar( &c
);
1906 dirname_xa
= read_dirname_xa(cc
->di
, compdir
, &c
, td3
);
1908 /* Read and record the file names table */
1909 vg_assert( VG_(sizeXA
)( fndn_ix_Table
) == 0 );
1910 /* Add a dummy index-zero entry. DWARF3 numbers its files
1911 from 1, for some reason. */
1912 fndn_ix
= ML_(addFnDn
) ( cc
->di
, "<unknown_file>", NULL
);
1913 VG_(addToXA
)( fndn_ix_Table
, &fndn_ix
);
1914 while (peek_UChar(&c
) != 0) {
1915 DiCursor cur
= get_AsciiZ(&c
);
1916 str
= ML_(addStrFromCursor
)( cc
->di
, cur
);
1917 dir_xa_ix
= get_ULEB128( &c
);
1918 if (dirname_xa
!= NULL
1919 && dir_xa_ix
>= 0 && dir_xa_ix
< VG_(sizeXA
) (dirname_xa
))
1920 dirname
= *(HChar
**)VG_(indexXA
) ( dirname_xa
, dir_xa_ix
);
1923 fndn_ix
= ML_(addFnDn
)( cc
->di
, str
, dirname
);
1924 TRACE_D3(" read_filename_table: %ld fndn_ix %u %s %s\n",
1925 VG_(sizeXA
)(fndn_ix_Table
), fndn_ix
,
1927 VG_(addToXA
)( fndn_ix_Table
, &fndn_ix
);
1928 (void)get_ULEB128( &c
); /* skip last mod time */
1929 (void)get_ULEB128( &c
); /* file size */
1931 /* We're done! The rest of it is not interesting. */
1932 if (dirname_xa
!= NULL
)
1933 VG_(deleteXA
)(dirname_xa
);
1936 /* setup_cu_svma to be called when a cu is found at level 0,
1937 to establish the cu_svma. */
1938 static void setup_cu_svma(CUConst
* cc
, Bool have_lo
, Addr ip_lo
, Bool td3
)
1941 /* We have potentially more than one type of parser parsing the
1942 dwarf information. At least currently, each parser establishes
1943 the cu_svma. So, in case cu_svma_known, we check that the same
1944 result is obtained by the 2nd parsing of the cu.
1946 Alternatively, we could reset cu_svma_known after each parsing
1947 and then check that we only see a single DW_TAG_compile_unit DIE
1948 at level 0, DWARF3 only allows exactly one top level DIE per
1954 /* Now, it may be that this DIE doesn't tell us the CU's
1955 SVMA, by way of not having a DW_AT_low_pc. That's OK --
1956 the CU doesn't *have* to have its SVMA specified.
1958 But as per last para D3 spec sec 3.1.1 ("Normal and
1959 Partial Compilation Unit Entries", "If the base address
1960 (viz, the SVMA) is undefined, then any DWARF entry of
1961 structure defined interms of the base address of that
1962 compilation unit is not valid.". So that means, if whilst
1963 processing the children of this top level DIE (or their
1964 children, etc) we see a DW_AT_range, and cu_svma_known is
1965 False, then the DIE that contains it is (per the spec)
1966 invalid, and we can legitimately stop and complain. */
1967 /* .. whereas The Reality is, simply assume the SVMA is zero
1968 if it isn't specified. */
1972 if (cc
->cu_svma_known
) {
1973 vg_assert (cu_svma
== cc
->cu_svma
);
1975 cc
->cu_svma_known
= True
;
1976 cc
->cu_svma
= cu_svma
;
1978 TRACE_D3("setup_cu_svma: acquire CU_SVMA of %p\n", (void*) cc
->cu_svma
);
1982 static void trace_DIE(
1986 UWord saved_die_c_offset
,
1994 Bool debug_types_flag
;
1997 UWord check_sibling
= 0;
1999 posn
= uncook_die( cc
, posn
, &debug_types_flag
, &alt_flag
);
2001 debug_types_flag
? cc
->escn_debug_types
:
2002 alt_flag
? cc
->escn_debug_info_alt
: cc
->escn_debug_info
,
2003 saved_die_c_offset
, cc
->barf
,
2004 "Overrun trace_DIE");
2006 VG_(printf
)(" <%d><%lx>: Abbrev Number: %llu (%s)%s%s\n",
2007 level
, posn
, (ULong
) abbv
->abbv_code
, ML_(pp_DW_TAG
)( dtag
),
2008 debug_types_flag
? " (in .debug_types)" : "",
2009 alt_flag
? " (in alternate .debug_info)" : "");
2012 DW_AT attr
= (DW_AT
) abbv
->nf
[nf_i
].at_name
;
2013 DW_FORM form
= (DW_FORM
)abbv
->nf
[nf_i
].at_form
;
2015 if (attr
== 0 && form
== 0) break;
2016 VG_(printf
)(" %-18s: ", ML_(pp_DW_AT
)(attr
));
2017 /* Get the form contents, so as to print them */
2018 get_Form_contents( &cts
, cc
, &c
, True
, form
);
2019 if (attr
== DW_AT_sibling
&& cts
.szB
> 0) {
2020 sibling
= cts
.u
.val
;
2022 VG_(printf
)("\t\n");
2025 /* Verify that skipping a DIE gives the same displacement as
2026 tracing (i.e. reading) a DIE. If there is an inconsistency in
2027 the nr of bytes read by get_Form_contents and get_Form_szB, this
2028 should be detected by the below. Using --trace-symtab=yes
2029 --read-var-info=yes will ensure all DIEs are systematically
2031 skip_DIE (&check_sibling
, &check_skip
, abbv
, cc
);
2032 vg_assert (check_sibling
== sibling
);
2033 vg_assert (get_position_of_Cursor (&check_skip
)
2034 == get_position_of_Cursor (&c
));
2037 __attribute__((noreturn
))
2038 static void dump_bad_die_and_barf(
2039 const HChar
*whichparser
,
2044 UWord saved_die_c_offset
,
2048 trace_DIE (dtag
, posn
, level
, saved_die_c_offset
, abbv
, cc
);
2049 VG_(printf
)("%s:\n", whichparser
);
2050 cc
->barf("confused by the above DIE");
2053 __attribute__((noinline
))
2054 static void bad_DIE_confusion(int linenr
)
2056 VG_(printf
)("\nparse DIE(readdwarf3.c:%d): confused by:\n", linenr
);
2058 #define goto_bad_DIE do {bad_DIE_confusion(__LINE__); goto bad_DIE;} while (0)
2060 __attribute__((noinline
))
2061 static void parse_var_DIE (
2062 /*MOD*/WordFM
* /* of (XArray* of AddrRange, void) */ rangestree
,
2063 /*MOD*/XArray
* /* of TempVar* */ tempvars
,
2064 /*MOD*/XArray
* /* of GExpr* */ gexprs
,
2065 /*MOD*/D3VarParser
* parser
,
2078 UWord saved_die_c_offset
= get_position_of_Cursor( c_die
);
2080 varstack_preen( parser
, td3
, level
-1 );
2082 if (dtag
== DW_TAG_compile_unit
2083 || dtag
== DW_TAG_type_unit
2084 || dtag
== DW_TAG_partial_unit
) {
2085 Bool have_lo
= False
;
2086 Bool have_hi1
= False
;
2087 Bool hiIsRelative
= False
;
2088 Bool have_range
= False
;
2092 const HChar
*compdir
= NULL
;
2095 DW_AT attr
= (DW_AT
) abbv
->nf
[nf_i
].at_name
;
2096 DW_FORM form
= (DW_FORM
)abbv
->nf
[nf_i
].at_form
;
2098 if (attr
== 0 && form
== 0) break;
2099 get_Form_contents( &cts
, cc
, c_die
, False
/*td3*/, form
);
2100 if (attr
== DW_AT_low_pc
&& cts
.szB
> 0) {
2104 if (attr
== DW_AT_high_pc
&& cts
.szB
> 0) {
2107 if (form
!= DW_FORM_addr
)
2108 hiIsRelative
= True
;
2110 if (attr
== DW_AT_ranges
&& cts
.szB
> 0) {
2111 rangeoff
= cts
.u
.val
;
2114 if (attr
== DW_AT_comp_dir
) {
2116 cc
->barf("parse_var_DIE compdir: expecting indirect string");
2117 HChar
*str
= ML_(cur_read_strdup
)( cts
.u
.cur
,
2118 "parse_var_DIE.compdir" );
2119 compdir
= ML_(addStr
)(cc
->di
, str
, -1);
2120 ML_(dinfo_free
) (str
);
2122 if (attr
== DW_AT_stmt_list
&& cts
.szB
> 0) {
2123 read_filename_table( parser
->fndn_ix_Table
, compdir
,
2124 cc
, cts
.u
.val
, td3
);
2127 if (have_lo
&& have_hi1
&& hiIsRelative
)
2130 /* Now, does this give us an opportunity to find this
2133 setup_cu_svma(cc
, have_lo
, ip_lo
, td3
);
2135 /* Do we have something that looks sane? */
2136 if (have_lo
&& have_hi1
&& (!have_range
)) {
2138 varstack_push( cc
, parser
, td3
,
2139 unitary_range_list(ip_lo
, ip_hi1
- 1),
2141 False
/*isFunc*/, NULL
/*fbGX*/ );
2142 else if (ip_lo
== 0 && ip_hi1
== 0)
2143 /* CU has no code, presumably?
2144 Such situations have been encountered for code
2145 compiled with -ffunction-sections -fdata-sections
2146 and linked with --gc-sections. Completely
2147 eliminated CU gives such 0 lo/hi pc. Similarly
2148 to a CU which has no lo/hi/range pc, we push
2149 an empty range list. */
2150 varstack_push( cc
, parser
, td3
,
2153 False
/*isFunc*/, NULL
/*fbGX*/ );
2155 if ((!have_lo
) && (!have_hi1
) && have_range
) {
2156 varstack_push( cc
, parser
, td3
,
2157 get_range_list( cc
, td3
,
2158 rangeoff
, cc
->cu_svma
),
2160 False
/*isFunc*/, NULL
/*fbGX*/ );
2162 if ((!have_lo
) && (!have_hi1
) && (!have_range
)) {
2163 /* CU has no code, presumably? */
2164 varstack_push( cc
, parser
, td3
,
2167 False
/*isFunc*/, NULL
/*fbGX*/ );
2169 if (have_lo
&& (!have_hi1
) && have_range
&& ip_lo
== 0) {
2170 /* broken DIE created by gcc-4.3.X ? Ignore the
2171 apparently-redundant DW_AT_low_pc and use the DW_AT_ranges
2173 varstack_push( cc
, parser
, td3
,
2174 get_range_list( cc
, td3
,
2175 rangeoff
, cc
->cu_svma
),
2177 False
/*isFunc*/, NULL
/*fbGX*/ );
2179 if (0) VG_(printf
)("I got hlo %d hhi1 %d hrange %d\n",
2180 (Int
)have_lo
, (Int
)have_hi1
, (Int
)have_range
);
2185 if (dtag
== DW_TAG_lexical_block
|| dtag
== DW_TAG_subprogram
) {
2186 Bool have_lo
= False
;
2187 Bool have_hi1
= False
;
2188 Bool have_range
= False
;
2189 Bool hiIsRelative
= False
;
2193 Bool isFunc
= dtag
== DW_TAG_subprogram
;
2197 DW_AT attr
= (DW_AT
) abbv
->nf
[nf_i
].at_name
;
2198 DW_FORM form
= (DW_FORM
)abbv
->nf
[nf_i
].at_form
;
2200 if (attr
== 0 && form
== 0) break;
2201 get_Form_contents( &cts
, cc
, c_die
, False
/*td3*/, form
);
2202 if (attr
== DW_AT_low_pc
&& cts
.szB
> 0) {
2206 if (attr
== DW_AT_high_pc
&& cts
.szB
> 0) {
2209 if (form
!= DW_FORM_addr
)
2210 hiIsRelative
= True
;
2212 if (attr
== DW_AT_ranges
&& cts
.szB
> 0) {
2213 rangeoff
= cts
.u
.val
;
2217 && attr
== DW_AT_frame_base
2218 && cts
.szB
!= 0 /* either scalar or nonempty block */) {
2219 fbGX
= get_GX( cc
, False
/*td3*/, &cts
);
2221 VG_(addToXA
)(gexprs
, &fbGX
);
2224 if (have_lo
&& have_hi1
&& hiIsRelative
)
2226 /* Do we have something that looks sane? */
2227 if (dtag
== DW_TAG_subprogram
2228 && (!have_lo
) && (!have_hi1
) && (!have_range
)) {
2229 /* This is legit - ignore it. Sec 3.3.3: "A subroutine entry
2230 representing a subroutine declaration that is not also a
2231 definition does not have code address or range
2234 if (dtag
== DW_TAG_lexical_block
2235 && (!have_lo
) && (!have_hi1
) && (!have_range
)) {
2236 /* I believe this is legit, and means the lexical block
2237 contains no insns (whatever that might mean). Ignore. */
2239 if (have_lo
&& have_hi1
&& (!have_range
)) {
2240 /* This scope supplies just a single address range. */
2242 varstack_push( cc
, parser
, td3
,
2243 unitary_range_list(ip_lo
, ip_hi1
- 1),
2244 level
, isFunc
, fbGX
);
2246 if ((!have_lo
) && (!have_hi1
) && have_range
) {
2247 /* This scope supplies multiple address ranges via the use of
2249 varstack_push( cc
, parser
, td3
,
2250 get_range_list( cc
, td3
,
2251 rangeoff
, cc
->cu_svma
),
2252 level
, isFunc
, fbGX
);
2254 if (have_lo
&& (!have_hi1
) && (!have_range
)) {
2255 /* This scope is bogus. The D3 spec sec 3.4 (Lexical Block
2256 Entries) says fairly clearly that a scope must have either
2257 _range or (_low_pc and _high_pc). */
2258 /* The spec is a bit ambiguous though. Perhaps a single byte
2259 range is intended? See sec 2.17 (Code Addresses And Ranges) */
2260 /* This case is here because icc9 produced this:
2261 <2><13bd>: DW_TAG_lexical_block
2262 DW_AT_decl_line : 5229
2263 DW_AT_decl_column : 37
2265 DW_AT_low_pc : 0x401b03
2267 /* Ignore (seems safe than pushing a single byte range) */
2272 if (dtag
== DW_TAG_variable
|| dtag
== DW_TAG_formal_parameter
) {
2273 const HChar
* name
= NULL
;
2274 UWord typeR
= D3_INVALID_CUOFF
;
2275 Bool global
= False
;
2276 GExpr
* gexpr
= NULL
;
2278 UWord abs_ori
= (UWord
)D3_INVALID_CUOFF
;
2283 DW_AT attr
= (DW_AT
) abbv
->nf
[nf_i
].at_name
;
2284 DW_FORM form
= (DW_FORM
)abbv
->nf
[nf_i
].at_form
;
2286 if (attr
== 0 && form
== 0) break;
2287 get_Form_contents( &cts
, cc
, c_die
, False
/*td3*/, form
);
2289 if (attr
== DW_AT_name
&& cts
.szB
< 0) {
2290 name
= ML_(addStrFromCursor
)( cc
->di
, cts
.u
.cur
);
2292 if (attr
== DW_AT_location
2293 && cts
.szB
!= 0 /* either scalar or nonempty block */) {
2294 gexpr
= get_GX( cc
, False
/*td3*/, &cts
);
2296 VG_(addToXA
)(gexprs
, &gexpr
);
2298 if (attr
== DW_AT_type
&& cts
.szB
> 0) {
2299 typeR
= cook_die_using_form( cc
, cts
.u
.val
, form
);
2301 if (attr
== DW_AT_external
&& cts
.szB
> 0 && cts
.u
.val
> 0) {
2304 if (attr
== DW_AT_abstract_origin
&& cts
.szB
> 0) {
2305 abs_ori
= (UWord
)cts
.u
.val
;
2307 if (attr
== DW_AT_declaration
&& cts
.szB
> 0 && cts
.u
.val
> 0) {
2308 /*declaration = True;*/
2310 if (attr
== DW_AT_decl_line
&& cts
.szB
> 0) {
2311 lineNo
= (Int
)cts
.u
.val
;
2313 if (attr
== DW_AT_decl_file
&& cts
.szB
> 0) {
2314 Int ftabIx
= (Int
)cts
.u
.val
;
2316 && ftabIx
< VG_(sizeXA
)( parser
->fndn_ix_Table
)) {
2317 fndn_ix
= *(UInt
*)VG_(indexXA
)( parser
->fndn_ix_Table
, ftabIx
);
2319 if (0) VG_(printf
)("XXX filename fndn_ix = %u %s\n", fndn_ix
,
2320 ML_(fndn_ix2filename
) (cc
->di
, fndn_ix
));
2323 if (!global
&& dtag
== DW_TAG_variable
&& level
== 1) {
2324 /* Case of a static variable. It is better to declare
2325 it global as the variable is not really related to
2326 a PC range, as its address can be used by program
2327 counters outside of the ranges where it is visible . */
2331 /* We'll collect it under if one of the following three
2333 (1) has location and type -> completed
2334 (2) has type only -> is an abstract instance
2335 (3) has location and abs_ori -> is a concrete instance
2336 Name, fndn_ix and line number are all optional frills.
2338 if ( /* 1 */ (gexpr
&& typeR
!= D3_INVALID_CUOFF
)
2339 /* 2 */ || (typeR
!= D3_INVALID_CUOFF
)
2340 /* 3 */ || (gexpr
&& abs_ori
!= (UWord
)D3_INVALID_CUOFF
) ) {
2342 /* Add this variable to the list of interesting looking
2343 variables. Crucially, note along with it the address
2344 range(s) associated with the variable, which for locals
2345 will be the address ranges at the top of the varparser's
2349 const XArray
* /* of AddrRange */ xa
;
2351 /* Stack can't be empty; we put a dummy entry on it for the
2352 entire address range before starting with the DIEs for
2354 vg_assert(parser
->sp
>= 0);
2356 /* If this is a local variable (non-global), try to find
2357 the GExpr for the DW_AT_frame_base of the containing
2358 function. It should have been pushed on the stack at the
2359 time we encountered its DW_TAG_subprogram DIE, so the way
2360 to find it is to scan back down the stack looking for it.
2361 If there isn't an enclosing stack entry marked 'isFunc'
2362 then we must be seeing variable or formal param DIEs
2363 outside of a function, so we deem the Dwarf to be
2364 malformed if that happens. Note that the fbGX may be NULL
2365 if the containing DT_TAG_subprogram didn't supply a
2366 DW_AT_frame_base -- that's OK, but there must actually be
2367 a containing DW_TAG_subprogram. */
2370 for (i
= parser
->sp
; i
>= 0; i
--) {
2371 if (parser
->isFunc
[i
]) {
2372 fbGX
= parser
->fbGX
[i
];
2378 if (0 && VG_(clo_verbosity
) >= 0) {
2379 VG_(message
)(Vg_DebugMsg
,
2380 "warning: parse_var_DIE: non-global variable "
2381 "outside DW_TAG_subprogram\n");
2384 /* This seems to happen a lot. Just ignore it -- if,
2385 when we come to evaluation of the location (guarded)
2386 expression, it requires a frame base value, and
2387 there's no expression for that, then evaluation as a
2388 whole will fail. Harmless - a bit of a waste of
2389 cycles but nothing more. */
2393 /* re "global ? 0 : parser->sp" (twice), if the var is
2394 marked 'global' then we must put it at the global scope,
2395 as only the global scope (level 0) covers the entire PC
2396 address space. It is asserted elsewhere that level 0
2397 always covers the entire address space. */
2398 xa
= parser
->ranges
[global
? 0 : parser
->sp
];
2399 nRanges
= VG_(sizeXA
)(xa
);
2400 vg_assert(nRanges
>= 0);
2402 tv
= ML_(dinfo_zalloc
)( "di.readdwarf3.pvD.1", sizeof(TempVar
) );
2404 tv
->level
= global
? 0 : parser
->sp
;
2408 tv
->fndn_ix
= fndn_ix
;
2411 tv
->absOri
= abs_ori
;
2413 /* See explanation on definition of type TempVar for the
2414 reason for this elaboration. */
2415 tv
->nRanges
= nRanges
;
2420 AddrRange
* range
= VG_(indexXA
)(xa
, 0);
2421 tv
->rngOneMin
= range
->aMin
;
2422 tv
->rngOneMax
= range
->aMax
;
2424 else if (nRanges
> 1) {
2425 /* See if we already have a range list which is
2426 structurally identical. If so, use that; if not, clone
2427 this one, and add it to our collection. */
2429 if (VG_(lookupFM
)( rangestree
, &keyW
, &valW
, (UWord
)xa
)) {
2430 XArray
* old
= (XArray
*)keyW
;
2431 vg_assert(valW
== 0);
2432 vg_assert(old
!= xa
);
2435 XArray
* cloned
= VG_(cloneXA
)( "di.readdwarf3.pvD.2", xa
);
2436 tv
->rngMany
= cloned
;
2437 VG_(addToFM
)( rangestree
, (UWord
)cloned
, 0 );
2441 VG_(addToXA
)( tempvars
, &tv
);
2443 TRACE_D3(" Recording this variable, with %ld PC range(s)\n",
2445 /* collect stats on how effective the ->ranges special
2448 static Int ntot
=0, ngt
=0;
2450 if (tv
->rngMany
) ngt
++;
2451 if (0 == (ntot
% 100000))
2452 VG_(printf
)("XXXX %d tot, %d cloned\n", ntot
, ngt
);
2457 /* Here are some other weird cases seen in the wild:
2459 We have a variable with a name and a type, but no
2460 location. I guess that's a sign that it has been
2461 optimised away. Ignore it. Here's an example:
2463 static Int lc_compar(void* n1, void* n2) {
2464 MC_Chunk* mc1 = *(MC_Chunk**)n1;
2465 MC_Chunk* mc2 = *(MC_Chunk**)n2;
2466 return (mc1->data < mc2->data ? -1 : 1);
2469 Both mc1 and mc2 are like this
2470 <2><5bc>: Abbrev Number: 21 (DW_TAG_variable)
2473 DW_AT_decl_line : 216
2476 whereas n1 and n2 do have locations specified.
2478 ---------------------------------------------
2480 We see a DW_TAG_formal_parameter with a type, but
2481 no name and no location. It's probably part of a function type
2482 construction, thusly, hence ignore it:
2483 <1><2b4>: Abbrev Number: 12 (DW_TAG_subroutine_type)
2484 DW_AT_sibling : <2c9>
2485 DW_AT_prototyped : 1
2487 <2><2be>: Abbrev Number: 13 (DW_TAG_formal_parameter)
2489 <2><2c3>: Abbrev Number: 13 (DW_TAG_formal_parameter)
2492 ---------------------------------------------
2494 Is very minimal, like this:
2495 <4><81d>: Abbrev Number: 44 (DW_TAG_variable)
2496 DW_AT_abstract_origin: <7ba>
2497 What that signifies I have no idea. Ignore.
2499 ----------------------------------------------
2501 Is very minimal, like this:
2502 <200f>: DW_TAG_formal_parameter
2503 DW_AT_abstract_ori: <1f4c>
2504 DW_AT_location : 13440
2505 What that signifies I have no idea. Ignore.
2506 It might be significant, though: the variable at least
2507 has a location and so might exist somewhere.
2508 Maybe we should handle this.
2510 ---------------------------------------------
2512 <22407>: DW_TAG_variable
2513 DW_AT_name : (indirect string, offset: 0x6579):
2514 vgPlain_trampoline_stuff_start
2515 DW_AT_decl_file : 29
2516 DW_AT_decl_line : 56
2518 DW_AT_declaration : 1
2520 Nameless and typeless variable that has a location? Who
2522 <2><3d178>: Abbrev Number: 22 (DW_TAG_variable)
2523 DW_AT_location : 9 byte block: 3 c0 c7 13 38 0 0 0 0
2524 (DW_OP_addr: 3813c7c0)
2526 No, really. Check it out. gcc is quite simply borked.
2527 <3><168cc>: Abbrev Number: 141 (DW_TAG_variable)
2528 // followed by no attributes, and the next DIE is a sibling,
2535 dump_bad_die_and_barf("parse_var_DIE", dtag
, posn
, level
,
2536 c_die
, saved_die_c_offset
,
2544 /* The fndn_ix file name/dirname table. Is a mapping from dwarf
2545 integer index to the index in di->fndnpool. */
2546 XArray
* /* of UInt* */ fndn_ix_Table
;
2547 UWord sibling
; // sibling of the last read DIE (if it has a sibling).
2551 /* Return the function name corresponding to absori.
2553 absori is a 'cooked' reference to a DIE, i.e. absori can be either
2554 in cc->escn_debug_info or in cc->escn_debug_info_alt.
2555 get_inlFnName will uncook absori.
2557 The returned value is a (permanent) string in DebugInfo's .strchunks.
2559 LIMITATION: absori must point in the CU of cc. If absori points
2560 in another CU, returns "UnknownInlinedFun".
2562 Here are the problems to retrieve the fun name if absori is in
2563 another CU: the DIE reading code cannot properly extract data from
2564 another CU, as the abbv code retrieved in the other CU cannot be
2565 translated in an abbreviation. Reading data from the alternate debug
2566 info also gives problems as the string reference is also in the alternate
2567 file, but when reading the alt DIE, the string form is a 'local' string,
2568 but cannot be read in the current CU, but must be read in the alt CU.
2569 See bug 338803 comment#3 and attachment for a failed attempt to handle
2570 these problems (failed because with the patch, only one alt abbrev hash
2571 table is kept, while we must handle all abbreviations in all CUs
2572 referenced by an absori (being a reference to an alt CU, or a previous
2573 or following CU). */
2574 static const HChar
* get_inlFnName (Int absori
, const CUConst
* cc
, Bool td3
)
2578 ULong atag
, abbv_code
;
2581 Bool type_flag
, alt_flag
;
2582 const HChar
*ret
= NULL
;
2586 /* Some inlined subroutine call dwarf entries do not have the abstract
2587 origin attribute, resulting in absori being 0 (see callers of
2588 get_inlFnName). This is observed at least with gcc 6.3.0 when compiling
2589 valgrind with lto. So, in case we have a 0 absori, do not report an
2590 error, instead, rather return an unknown inlined function. */
2592 static Bool absori0_reported
= False
;
2593 if (!absori0_reported
&& VG_(clo_verbosity
) > 1) {
2594 VG_(message
)(Vg_DebugMsg
,
2595 "Warning: inlined fn name without absori\n"
2596 "is shown as UnknownInlinedFun\n");
2597 absori0_reported
= True
;
2599 TRACE_D3(" <get_inlFnName>: absori is not set");
2600 return ML_(addStr
)(cc
->di
, "UnknownInlinedFun", -1);
2603 posn
= uncook_die( cc
, absori
, &type_flag
, &alt_flag
);
2605 cc
->barf("get_inlFnName: uncooked absori in type debug info");
2607 /* LIMITATION: check we are in the same CU.
2608 If not, return unknown inlined function name. */
2609 /* if crossing between alt debug info<>normal info
2610 or posn not in the cu range,
2611 then it is in another CU. */
2612 if (alt_flag
!= cc
->is_alt_info
2613 || posn
< cc
->cu_start_offset
2614 || posn
>= cc
->cu_start_offset
+ cc
->unit_length
) {
2615 static Bool reported
= False
;
2616 if (!reported
&& VG_(clo_verbosity
) > 1) {
2617 VG_(message
)(Vg_DebugMsg
,
2618 "Warning: cross-CU LIMITATION: some inlined fn names\n"
2619 "might be shown as UnknownInlinedFun\n");
2622 TRACE_D3(" <get_inlFnName><%lx>: cross-CU LIMITATION", posn
);
2623 return ML_(addStr
)(cc
->di
, "UnknownInlinedFun", -1);
2626 init_Cursor (&c
, cc
->escn_debug_info
, posn
, cc
->barf
,
2627 "Overrun get_inlFnName absori");
2629 abbv_code
= get_ULEB128( &c
);
2630 abbv
= get_abbv ( cc
, abbv_code
);
2632 TRACE_D3(" <get_inlFnName><%lx>: Abbrev Number: %llu (%s)\n",
2633 posn
, abbv_code
, ML_(pp_DW_TAG
)( atag
) );
2636 cc
->barf("get_inlFnName: invalid zero tag on DIE");
2638 has_children
= abbv
->has_children
;
2639 if (has_children
!= DW_children_no
&& has_children
!= DW_children_yes
)
2640 cc
->barf("get_inlFnName: invalid has_children value");
2642 if (atag
!= DW_TAG_subprogram
)
2643 cc
->barf("get_inlFnName: absori not a subprogram");
2647 DW_AT attr
= (DW_AT
) abbv
->nf
[nf_i
].at_name
;
2648 DW_FORM form
= (DW_FORM
)abbv
->nf
[nf_i
].at_form
;
2650 if (attr
== 0 && form
== 0) break;
2651 get_Form_contents( &cts
, cc
, &c
, False
/*td3*/, form
);
2652 if (attr
== DW_AT_name
) {
2655 cc
->barf("get_inlFnName: expecting indirect string");
2656 fnname
= ML_(cur_read_strdup
)( cts
.u
.cur
,
2657 "get_inlFnName.1" );
2658 ret
= ML_(addStr
)(cc
->di
, fnname
, -1);
2659 ML_(dinfo_free
) (fnname
);
2660 break; /* Name found, get out of the loop, as this has priority over
2661 DW_AT_specification. */
2663 if (attr
== DW_AT_specification
) {
2667 cc
->barf("get_inlFnName: AT specification missing");
2669 /* The recursive call to get_inlFnName will uncook its arg.
2670 So, we need to cook it here, so as to reference the
2671 correct section (e.g. the alt info). */
2672 cdie
= cook_die_using_form(cc
, (UWord
)cts
.u
.val
, form
);
2674 /* hoping that there is no loop */
2675 ret
= get_inlFnName (cdie
, cc
, td3
);
2676 /* Unclear if having both DW_AT_specification and DW_AT_name is
2677 possible but in any case, we do not break here.
2678 If we find later on a DW_AT_name, it will override the name found
2679 in the DW_AT_specification.*/
2686 TRACE_D3("AbsOriFnNameNotFound");
2687 return ML_(addStr
)(cc
->di
, "AbsOriFnNameNotFound", -1);
2691 /* Returns True if the (possibly) childrens of the current DIE are interesting
2692 to parse. Returns False otherwise.
2693 If the current DIE has a sibling, the non interesting children can
2694 maybe be skipped (if the DIE has a DW_AT_sibling). */
2695 __attribute__((noinline
))
2696 static Bool
parse_inl_DIE (
2697 /*MOD*/D3InlParser
* parser
,
2710 UWord saved_die_c_offset
= get_position_of_Cursor( c_die
);
2712 /* Get info about DW_TAG_compile_unit and DW_TAG_partial_unit 'which
2713 in theory could also contain inlined fn calls). */
2714 if (dtag
== DW_TAG_compile_unit
|| dtag
== DW_TAG_partial_unit
) {
2715 Bool have_lo
= False
;
2717 const HChar
*compdir
= NULL
;
2721 DW_AT attr
= (DW_AT
) abbv
->nf
[nf_i
].at_name
;
2722 DW_FORM form
= (DW_FORM
)abbv
->nf
[nf_i
].at_form
;
2724 if (attr
== 0 && form
== 0) break;
2725 get_Form_contents( &cts
, cc
, c_die
, False
/*td3*/, form
);
2726 if (attr
== DW_AT_low_pc
&& cts
.szB
> 0) {
2730 if (attr
== DW_AT_comp_dir
) {
2732 cc
->barf("parse_inl_DIE compdir: expecting indirect string");
2733 HChar
*str
= ML_(cur_read_strdup
)( cts
.u
.cur
,
2734 "parse_inl_DIE.compdir" );
2735 compdir
= ML_(addStr
)(cc
->di
, str
, -1);
2736 ML_(dinfo_free
) (str
);
2738 if (attr
== DW_AT_stmt_list
&& cts
.szB
> 0) {
2739 read_filename_table( parser
->fndn_ix_Table
, compdir
,
2740 cc
, cts
.u
.val
, td3
);
2742 if (attr
== DW_AT_sibling
&& cts
.szB
> 0) {
2743 parser
->sibling
= cts
.u
.val
;
2747 setup_cu_svma (cc
, have_lo
, ip_lo
, td3
);
2750 if (dtag
== DW_TAG_inlined_subroutine
) {
2751 Bool have_lo
= False
;
2752 Bool have_hi1
= False
;
2753 Bool have_range
= False
;
2754 Bool hiIsRelative
= False
;
2758 UInt caller_fndn_ix
= 0;
2759 Int caller_lineno
= 0;
2760 Int inlinedfn_abstract_origin
= 0;
2761 // 0 will be interpreted as no abstract origin by get_inlFnName
2765 DW_AT attr
= (DW_AT
) abbv
->nf
[nf_i
].at_name
;
2766 DW_FORM form
= (DW_FORM
)abbv
->nf
[nf_i
].at_form
;
2768 if (attr
== 0 && form
== 0) break;
2769 get_Form_contents( &cts
, cc
, c_die
, False
/*td3*/, form
);
2770 if (attr
== DW_AT_call_file
&& cts
.szB
> 0) {
2771 Int ftabIx
= (Int
)cts
.u
.val
;
2773 && ftabIx
< VG_(sizeXA
)( parser
->fndn_ix_Table
)) {
2774 caller_fndn_ix
= *(UInt
*)
2775 VG_(indexXA
)( parser
->fndn_ix_Table
, ftabIx
);
2777 if (0) VG_(printf
)("XXX caller_fndn_ix = %u %s\n", caller_fndn_ix
,
2778 ML_(fndn_ix2filename
) (cc
->di
, caller_fndn_ix
));
2780 if (attr
== DW_AT_call_line
&& cts
.szB
> 0) {
2781 caller_lineno
= cts
.u
.val
;
2784 if (attr
== DW_AT_abstract_origin
&& cts
.szB
> 0) {
2785 inlinedfn_abstract_origin
2786 = cook_die_using_form (cc
, (UWord
)cts
.u
.val
, form
);
2789 if (attr
== DW_AT_low_pc
&& cts
.szB
> 0) {
2793 if (attr
== DW_AT_high_pc
&& cts
.szB
> 0) {
2796 if (form
!= DW_FORM_addr
)
2797 hiIsRelative
= True
;
2799 if (attr
== DW_AT_ranges
&& cts
.szB
> 0) {
2800 rangeoff
= cts
.u
.val
;
2803 if (attr
== DW_AT_sibling
&& cts
.szB
> 0) {
2804 parser
->sibling
= cts
.u
.val
;
2807 if (have_lo
&& have_hi1
&& hiIsRelative
)
2809 /* Do we have something that looks sane? */
2810 if (dtag
== DW_TAG_inlined_subroutine
2811 && (!have_lo
) && (!have_hi1
) && (!have_range
)) {
2812 /* Seems strange. How can an inlined subroutine have
2816 if (have_lo
&& have_hi1
&& (!have_range
)) {
2817 /* This inlined call is just a single address range. */
2818 if (ip_lo
< ip_hi1
) {
2819 /* Apply text debug biasing */
2820 ip_lo
+= cc
->di
->text_debug_bias
;
2821 ip_hi1
+= cc
->di
->text_debug_bias
;
2822 ML_(addInlInfo
) (cc
->di
,
2824 get_inlFnName (inlinedfn_abstract_origin
, cc
, td3
),
2826 caller_lineno
, level
);
2828 } else if (have_range
) {
2829 /* This inlined call is several address ranges. */
2832 const HChar
*inlfnname
=
2833 get_inlFnName (inlinedfn_abstract_origin
, cc
, td3
);
2835 /* Ranges are biased for the inline info using the same logic
2836 as what is used for biasing ranges for the var info, for which
2837 ranges are read using cc->cu_svma (see parse_var_DIE).
2838 Then text_debug_bias is added when a (non global) var
2839 is recorded (see just before the call to ML_(addVar)) */
2840 ranges
= get_range_list( cc
, td3
,
2841 rangeoff
, cc
->cu_svma
);
2842 for (j
= 0; j
< VG_(sizeXA
)( ranges
); j
++) {
2843 AddrRange
* range
= (AddrRange
*) VG_(indexXA
)( ranges
, j
);
2844 ML_(addInlInfo
) (cc
->di
,
2845 range
->aMin
+ cc
->di
->text_debug_bias
,
2846 range
->aMax
+1 + cc
->di
->text_debug_bias
,
2847 // aMax+1 as range has its last bound included
2848 // while ML_(addInlInfo) expects last bound not
2852 caller_lineno
, level
);
2854 VG_(deleteXA
)( ranges
);
2859 // Only recursively parse the (possible) children for the DIE which
2860 // might maybe contain a DW_TAG_inlined_subroutine:
2861 return dtag
== DW_TAG_lexical_block
|| dtag
== DW_TAG_subprogram
2862 || dtag
== DW_TAG_inlined_subroutine
2863 || dtag
== DW_TAG_compile_unit
|| dtag
== DW_TAG_partial_unit
;
2866 dump_bad_die_and_barf("parse_inl_DIE", dtag
, posn
, level
,
2867 c_die
, saved_die_c_offset
,
2874 /*------------------------------------------------------------*/
2876 /*--- Parsing of type-related DIEs ---*/
2878 /*------------------------------------------------------------*/
2882 /* What source language? 'A'=Ada83/95,
2886 Established once per compilation unit. */
2888 /* A stack of types which are currently under construction */
2889 Int sp
; /* [sp] is innermost active entry; sp==-1 for empty
2892 /* Note that the TyEnts in qparentE are temporary copies of the
2893 ones accumulating in the main tyent array. So it is not safe
2894 to free up anything on them when popping them off the stack
2895 (iow, it isn't safe to use TyEnt__make_EMPTY on them). Just
2896 memset them to zero when done. */
2897 TyEnt
*qparentE
; /* parent TyEnts */
2902 /* Completely initialise a type parser object */
2904 type_parser_init ( D3TypeParser
*parser
)
2907 parser
->language
= '?';
2908 parser
->stack_size
= 0;
2909 parser
->qparentE
= NULL
;
2910 parser
->qlevel
= NULL
;
2913 /* Release any memory hanging off a type parser object */
2915 type_parser_release ( D3TypeParser
*parser
)
2917 ML_(dinfo_free
)( parser
->qparentE
);
2918 ML_(dinfo_free
)( parser
->qlevel
);
2921 static void typestack_show ( const D3TypeParser
* parser
, const HChar
* str
)
2924 VG_(printf
)(" typestack (%s) {\n", str
);
2925 for (i
= 0; i
<= parser
->sp
; i
++) {
2926 VG_(printf
)(" [%ld] (level %d): ", i
, parser
->qlevel
[i
]);
2927 ML_(pp_TyEnt
)( &parser
->qparentE
[i
] );
2930 VG_(printf
)(" }\n");
2933 /* Remove from the stack, all entries with .level > 'level' */
2935 void typestack_preen ( D3TypeParser
* parser
, Bool td3
, Int level
)
2937 Bool changed
= False
;
2938 vg_assert(parser
->sp
< parser
->stack_size
);
2940 vg_assert(parser
->sp
>= -1);
2941 if (parser
->sp
== -1) break;
2942 if (parser
->qlevel
[parser
->sp
] <= level
) break;
2944 TRACE_D3("BBBBAAAA typestack_pop [newsp=%d]\n", parser
->sp
-1);
2945 vg_assert(ML_(TyEnt__is_type
)(&parser
->qparentE
[parser
->sp
]));
2950 typestack_show( parser
, "after preen" );
2953 static Bool
typestack_is_empty ( const D3TypeParser
* parser
)
2955 vg_assert(parser
->sp
>= -1 && parser
->sp
< parser
->stack_size
);
2956 return parser
->sp
== -1;
2959 static void typestack_push ( const CUConst
* cc
,
2960 D3TypeParser
* parser
,
2962 const TyEnt
* parentE
, Int level
)
2965 TRACE_D3("BBBBAAAA typestack_push[newsp=%d]: %d %05lx\n",
2966 parser
->sp
+1, level
, parentE
->cuOff
);
2968 /* First we need to zap everything >= 'level', as we are about to
2969 replace any previous entry at 'level', so .. */
2970 typestack_preen(parser
, /*td3*/False
, level
-1);
2972 vg_assert(parser
->sp
>= -1);
2973 vg_assert(parser
->sp
< parser
->stack_size
);
2974 if (parser
->sp
== parser
->stack_size
- 1) {
2975 parser
->stack_size
+= 16;
2977 ML_(dinfo_realloc
)("di.readdwarf3.typush.1", parser
->qparentE
,
2978 parser
->stack_size
* sizeof parser
->qparentE
[0]);
2980 ML_(dinfo_realloc
)("di.readdwarf3.typush.2", parser
->qlevel
,
2981 parser
->stack_size
* sizeof parser
->qlevel
[0]);
2983 if (parser
->sp
>= 0)
2984 vg_assert(parser
->qlevel
[parser
->sp
] < level
);
2987 vg_assert(ML_(TyEnt__is_type
)(parentE
));
2988 vg_assert(parentE
->cuOff
!= D3_INVALID_CUOFF
);
2989 parser
->qparentE
[parser
->sp
] = *parentE
;
2990 parser
->qlevel
[parser
->sp
] = level
;
2992 typestack_show( parser
, "after push" );
2995 /* True if the subrange type being parsed gives the bounds of an array. */
2996 static Bool
subrange_type_denotes_array_bounds ( const D3TypeParser
* parser
,
2998 vg_assert(dtag
== DW_TAG_subrange_type
);
2999 /* For most languages, a subrange_type dtag always gives the
3001 For Ada, there are additional conditions as a subrange_type
3002 is also used for other purposes. */
3003 if (parser
->language
!= 'A')
3004 /* not Ada, so it definitely denotes an array bound. */
3007 /* Extra constraints for Ada: it only denotes an array bound if .. */
3008 return (! typestack_is_empty(parser
)
3009 && parser
->qparentE
[parser
->sp
].tag
== Te_TyArray
);
3012 /* True if the form is one of the forms supported to give an array bound.
3013 For some arrays (scope local arrays with variable size),
3014 a DW_FORM_ref4 was used, and was wrongly used as the bound value.
3015 So, refuse the forms that are known to give a problem. */
3016 static Bool
form_expected_for_bound ( DW_FORM form
) {
3017 if (form
== DW_FORM_ref1
3018 || form
== DW_FORM_ref2
3019 || form
== DW_FORM_ref4
3020 || form
== DW_FORM_ref8
)
3026 /* Parse a type-related DIE. 'parser' holds the current parser state.
3027 'admin' is where the completed types are dumped. 'dtag' is the tag
3028 for this DIE. 'c_die' points to the start of the data fields (FORM
3029 stuff) for the DIE. abbv is the parsed abbreviation which describe
3032 We may find the DIE uninteresting, in which case we should ignore
3035 What happens: the DIE is examined. If uninteresting, it is ignored.
3036 Otherwise, the DIE gives rise to two things:
3038 (1) the offset of this DIE in the CU -- the cuOffset, a UWord
3039 (2) a TyAdmin structure, which holds the type, or related stuff
3041 (2) is added at the end of 'tyadmins', at some index, say 'i'.
3043 A pair (cuOffset, i) is added to 'tydict'.
3045 Hence 'tyadmins' holds the actual type entities, and 'tydict' holds
3046 a mapping from cuOffset to the index of the corresponding entry in
3049 When resolving a cuOffset to a TyAdmin, first look up the cuOffset
3050 in the tydict (by binary search). This gives an index into
3051 tyadmins, and the required entity lives in tyadmins at that index.
3053 __attribute__((noinline
))
3054 static void parse_type_DIE ( /*MOD*/XArray
* /* of TyEnt */ tyents
,
3055 /*MOD*/D3TypeParser
* parser
,
3071 UWord saved_die_c_offset
= get_position_of_Cursor( c_die
);
3073 VG_(memset
)( &typeE
, 0xAA, sizeof(typeE
) );
3074 VG_(memset
)( &atomE
, 0xAA, sizeof(atomE
) );
3075 VG_(memset
)( &fieldE
, 0xAA, sizeof(fieldE
) );
3076 VG_(memset
)( &boundE
, 0xAA, sizeof(boundE
) );
3078 /* If we've returned to a level at or above any previously noted
3079 parent, un-note it, so we don't believe we're still collecting
3081 typestack_preen( parser
, td3
, level
-1 );
3083 if (dtag
== DW_TAG_compile_unit
3084 || dtag
== DW_TAG_type_unit
3085 || dtag
== DW_TAG_partial_unit
) {
3086 /* See if we can find DW_AT_language, since it is important for
3087 establishing array bounds (see DW_TAG_subrange_type below in
3091 DW_AT attr
= (DW_AT
) abbv
->nf
[nf_i
].at_name
;
3092 DW_FORM form
= (DW_FORM
)abbv
->nf
[nf_i
].at_form
;
3094 if (attr
== 0 && form
== 0) break;
3095 get_Form_contents( &cts
, cc
, c_die
, False
/*td3*/, form
);
3096 if (attr
!= DW_AT_language
)
3100 switch (cts
.u
.val
) {
3101 case DW_LANG_C89
: case DW_LANG_C
:
3102 case DW_LANG_C_plus_plus
: case DW_LANG_ObjC
:
3103 case DW_LANG_ObjC_plus_plus
: case DW_LANG_UPC
:
3104 case DW_LANG_Upc
: case DW_LANG_C99
: case DW_LANG_C11
:
3105 case DW_LANG_C_plus_plus_11
: case DW_LANG_C_plus_plus_14
:
3106 parser
->language
= 'C'; break;
3107 case DW_LANG_Fortran77
: case DW_LANG_Fortran90
:
3108 case DW_LANG_Fortran95
: case DW_LANG_Fortran03
:
3109 case DW_LANG_Fortran08
:
3110 parser
->language
= 'F'; break;
3111 case DW_LANG_Ada83
: case DW_LANG_Ada95
:
3112 parser
->language
= 'A'; break;
3113 case DW_LANG_Cobol74
:
3114 case DW_LANG_Cobol85
: case DW_LANG_Pascal83
:
3115 case DW_LANG_Modula2
: case DW_LANG_Java
:
3117 case DW_LANG_D
: case DW_LANG_Python
: case DW_LANG_Go
:
3118 case DW_LANG_Mips_Assembler
:
3119 parser
->language
= '?'; break;
3126 if (dtag
== DW_TAG_base_type
) {
3127 /* We can pick up a new base type any time. */
3128 VG_(memset
)(&typeE
, 0, sizeof(typeE
));
3129 typeE
.cuOff
= D3_INVALID_CUOFF
;
3130 typeE
.tag
= Te_TyBase
;
3133 DW_AT attr
= (DW_AT
) abbv
->nf
[nf_i
].at_name
;
3134 DW_FORM form
= (DW_FORM
)abbv
->nf
[nf_i
].at_form
;
3136 if (attr
== 0 && form
== 0) break;
3137 get_Form_contents( &cts
, cc
, c_die
, False
/*td3*/, form
);
3138 if (attr
== DW_AT_name
&& cts
.szB
< 0) {
3139 typeE
.Te
.TyBase
.name
3140 = ML_(cur_read_strdup
)( cts
.u
.cur
,
3141 "di.readdwarf3.ptD.base_type.1" );
3143 if (attr
== DW_AT_byte_size
&& cts
.szB
> 0) {
3144 typeE
.Te
.TyBase
.szB
= cts
.u
.val
;
3146 if (attr
== DW_AT_encoding
&& cts
.szB
> 0) {
3147 switch (cts
.u
.val
) {
3148 case DW_ATE_unsigned
: case DW_ATE_unsigned_char
:
3149 case DW_ATE_UTF
: /* since DWARF4, e.g. char16_t from C++ */
3150 case DW_ATE_boolean
:/* FIXME - is this correct? */
3151 case DW_ATE_unsigned_fixed
:
3152 typeE
.Te
.TyBase
.enc
= 'U'; break;
3153 case DW_ATE_signed
: case DW_ATE_signed_char
:
3154 case DW_ATE_signed_fixed
:
3155 typeE
.Te
.TyBase
.enc
= 'S'; break;
3157 typeE
.Te
.TyBase
.enc
= 'F'; break;
3158 case DW_ATE_complex_float
:
3159 typeE
.Te
.TyBase
.enc
= 'C'; break;
3166 /* Invent a name if it doesn't have one. gcc-4.3
3167 -ftree-vectorize is observed to emit nameless base types. */
3168 if (!typeE
.Te
.TyBase
.name
)
3169 typeE
.Te
.TyBase
.name
3170 = ML_(dinfo_strdup
)( "di.readdwarf3.ptD.base_type.2",
3171 "<anon_base_type>" );
3173 /* Do we have something that looks sane? */
3174 if (/* must have a name */
3175 typeE
.Te
.TyBase
.name
== NULL
3176 /* and a plausible size. Yes, really 32: "complex long
3177 double" apparently has size=32 */
3178 || typeE
.Te
.TyBase
.szB
< 0 || typeE
.Te
.TyBase
.szB
> 32
3179 /* and a plausible encoding */
3180 || (typeE
.Te
.TyBase
.enc
!= 'U'
3181 && typeE
.Te
.TyBase
.enc
!= 'S'
3182 && typeE
.Te
.TyBase
.enc
!= 'F'
3183 && typeE
.Te
.TyBase
.enc
!= 'C'))
3185 /* Last minute hack: if we see this
3186 <1><515>: DW_TAG_base_type
3190 convert it into a real Void type. */
3191 if (typeE
.Te
.TyBase
.szB
== 0
3192 && 0 == VG_(strcmp
)("void", typeE
.Te
.TyBase
.name
)) {
3193 ML_(TyEnt__make_EMPTY
)(&typeE
);
3194 typeE
.tag
= Te_TyVoid
;
3195 typeE
.Te
.TyVoid
.isFake
= False
; /* it's a real one! */
3202 * An example of DW_TAG_rvalue_reference_type:
3204 * $ readelf --debug-dump /usr/lib/debug/usr/lib/libstdc++.so.6.0.16.debug
3205 * <1><1014>: Abbrev Number: 55 (DW_TAG_rvalue_reference_type)
3206 * <1015> DW_AT_byte_size : 4
3207 * <1016> DW_AT_type : <0xe52>
3209 if (dtag
== DW_TAG_pointer_type
|| dtag
== DW_TAG_reference_type
3210 || dtag
== DW_TAG_ptr_to_member_type
3211 || dtag
== DW_TAG_rvalue_reference_type
) {
3212 /* This seems legit for _pointer_type and _reference_type. I
3213 don't know if rolling _ptr_to_member_type in here really is
3214 legit, but it's better than not handling it at all. */
3215 VG_(memset
)(&typeE
, 0, sizeof(typeE
));
3216 typeE
.cuOff
= D3_INVALID_CUOFF
;
3218 case DW_TAG_pointer_type
:
3219 typeE
.tag
= Te_TyPtr
;
3221 case DW_TAG_reference_type
:
3222 typeE
.tag
= Te_TyRef
;
3224 case DW_TAG_ptr_to_member_type
:
3225 typeE
.tag
= Te_TyPtrMbr
;
3227 case DW_TAG_rvalue_reference_type
:
3228 typeE
.tag
= Te_TyRvalRef
;
3233 /* target type defaults to void */
3234 typeE
.Te
.TyPorR
.typeR
= D3_FAKEVOID_CUOFF
;
3235 /* These four type kinds don't *have* to specify their size, in
3236 which case we assume it's a machine word. But if they do
3237 specify it, it must be a machine word :-) This probably
3238 assumes that the word size of the Dwarf3 we're reading is the
3239 same size as that on the machine. gcc appears to give a size
3240 whereas icc9 doesn't. */
3241 typeE
.Te
.TyPorR
.szB
= sizeof(UWord
);
3244 DW_AT attr
= (DW_AT
) abbv
->nf
[nf_i
].at_name
;
3245 DW_FORM form
= (DW_FORM
)abbv
->nf
[nf_i
].at_form
;
3247 if (attr
== 0 && form
== 0) break;
3248 get_Form_contents( &cts
, cc
, c_die
, False
/*td3*/, form
);
3249 if (attr
== DW_AT_byte_size
&& cts
.szB
> 0) {
3250 typeE
.Te
.TyPorR
.szB
= cts
.u
.val
;
3252 if (attr
== DW_AT_type
&& cts
.szB
> 0) {
3253 typeE
.Te
.TyPorR
.typeR
3254 = cook_die_using_form( cc
, (UWord
)cts
.u
.val
, form
);
3257 /* Do we have something that looks sane? */
3258 if (typeE
.Te
.TyPorR
.szB
!= sizeof(UWord
))
3264 if (dtag
== DW_TAG_enumeration_type
) {
3265 /* Create a new Type to hold the results. */
3266 VG_(memset
)(&typeE
, 0, sizeof(typeE
));
3268 typeE
.tag
= Te_TyEnum
;
3269 Bool is_decl
= False
;
3270 typeE
.Te
.TyEnum
.atomRs
3271 = VG_(newXA
)( ML_(dinfo_zalloc
), "di.readdwarf3.ptD.enum_type.1",
3276 DW_AT attr
= (DW_AT
) abbv
->nf
[nf_i
].at_name
;
3277 DW_FORM form
= (DW_FORM
)abbv
->nf
[nf_i
].at_form
;
3279 if (attr
== 0 && form
== 0) break;
3280 get_Form_contents( &cts
, cc
, c_die
, False
/*td3*/, form
);
3281 if (attr
== DW_AT_name
&& cts
.szB
< 0) {
3282 typeE
.Te
.TyEnum
.name
3283 = ML_(cur_read_strdup
)( cts
.u
.cur
,
3284 "di.readdwarf3.pTD.enum_type.2" );
3286 if (attr
== DW_AT_byte_size
&& cts
.szB
> 0) {
3287 typeE
.Te
.TyEnum
.szB
= cts
.u
.val
;
3289 if (attr
== DW_AT_declaration
) {
3294 if (!typeE
.Te
.TyEnum
.name
)
3295 typeE
.Te
.TyEnum
.name
3296 = ML_(dinfo_strdup
)( "di.readdwarf3.pTD.enum_type.3",
3297 "<anon_enum_type>" );
3299 /* Do we have something that looks sane? */
3300 if (typeE
.Te
.TyEnum
.szB
== 0
3301 /* we must know the size */
3302 /* but not for Ada, which uses such dummy
3303 enumerations as helper for gdb ada mode.
3304 Also GCC allows incomplete enums as GNU extension.
3305 http://gcc.gnu.org/onlinedocs/gcc/Incomplete-Enums.html
3306 These are marked as DW_AT_declaration and won't have
3307 a size. They can only be used in declaration or as
3308 pointer types. You can't allocate variables or storage
3309 using such an enum type. (Also GCC seems to have a bug
3310 that will put such an enumeration_type into a .debug_types
3311 unit which should only contain complete types.) */
3312 && (parser
->language
!= 'A' && !is_decl
)) {
3317 typestack_push( cc
, parser
, td3
, &typeE
, level
);
3321 /* gcc (GCC) 4.4.0 20081017 (experimental) occasionally produces
3322 DW_TAG_enumerator with only a DW_AT_name but no
3323 DW_AT_const_value. This is in violation of the Dwarf3 standard,
3324 and appears to be a new "feature" of gcc - versions 4.3.x and
3325 earlier do not appear to do this. So accept DW_TAG_enumerator
3326 which only have a name but no value. An example:
3328 <1><180>: Abbrev Number: 6 (DW_TAG_enumeration_type)
3329 <181> DW_AT_name : (indirect string, offset: 0xda70):
3331 <185> DW_AT_byte_size : 4
3332 <186> DW_AT_decl_file : 14
3333 <187> DW_AT_decl_line : 1480
3334 <189> DW_AT_sibling : <0x1a7>
3335 <2><18d>: Abbrev Number: 7 (DW_TAG_enumerator)
3336 <18e> DW_AT_name : (indirect string, offset: 0x9e18):
3338 <2><192>: Abbrev Number: 7 (DW_TAG_enumerator)
3339 <193> DW_AT_name : (indirect string, offset: 0x1505f):
3341 <2><197>: Abbrev Number: 7 (DW_TAG_enumerator)
3342 <198> DW_AT_name : (indirect string, offset: 0x16f4a):
3344 <2><19c>: Abbrev Number: 7 (DW_TAG_enumerator)
3345 <19d> DW_AT_name : (indirect string, offset: 0x156dd):
3347 <2><1a1>: Abbrev Number: 7 (DW_TAG_enumerator)
3348 <1a2> DW_AT_name : (indirect string, offset: 0x13660):
3351 if (dtag
== DW_TAG_enumerator
) {
3352 VG_(memset
)( &atomE
, 0, sizeof(atomE
) );
3354 atomE
.tag
= Te_Atom
;
3357 DW_AT attr
= (DW_AT
) abbv
->nf
[nf_i
].at_name
;
3358 DW_FORM form
= (DW_FORM
)abbv
->nf
[nf_i
].at_form
;
3360 if (attr
== 0 && form
== 0) break;
3361 get_Form_contents( &cts
, cc
, c_die
, False
/*td3*/, form
);
3362 if (attr
== DW_AT_name
&& cts
.szB
< 0) {
3364 = ML_(cur_read_strdup
)( cts
.u
.cur
,
3365 "di.readdwarf3.pTD.enumerator.1" );
3367 if (attr
== DW_AT_const_value
&& cts
.szB
> 0) {
3368 atomE
.Te
.Atom
.value
= cts
.u
.val
;
3369 atomE
.Te
.Atom
.valueKnown
= True
;
3372 /* Do we have something that looks sane? */
3373 if (atomE
.Te
.Atom
.name
== NULL
)
3375 /* Do we have a plausible parent? */
3376 if (typestack_is_empty(parser
)) goto_bad_DIE
;
3377 vg_assert(ML_(TyEnt__is_type
)(&parser
->qparentE
[parser
->sp
]));
3378 vg_assert(parser
->qparentE
[parser
->sp
].cuOff
!= D3_INVALID_CUOFF
);
3379 if (level
!= parser
->qlevel
[parser
->sp
]+1) goto_bad_DIE
;
3380 if (parser
->qparentE
[parser
->sp
].tag
!= Te_TyEnum
) goto_bad_DIE
;
3381 /* Record this child in the parent */
3382 vg_assert(parser
->qparentE
[parser
->sp
].Te
.TyEnum
.atomRs
);
3383 VG_(addToXA
)( parser
->qparentE
[parser
->sp
].Te
.TyEnum
.atomRs
,
3385 /* And record the child itself */
3389 /* Treat DW_TAG_class_type as if it was a DW_TAG_structure_type. I
3390 don't know if this is correct, but it at least makes this reader
3391 usable for gcc-4.3 produced Dwarf3. */
3392 if (dtag
== DW_TAG_structure_type
|| dtag
== DW_TAG_class_type
3393 || dtag
== DW_TAG_union_type
) {
3394 Bool have_szB
= False
;
3395 Bool is_decl
= False
;
3396 Bool is_spec
= False
;
3397 /* Create a new Type to hold the results. */
3398 VG_(memset
)(&typeE
, 0, sizeof(typeE
));
3400 typeE
.tag
= Te_TyStOrUn
;
3401 typeE
.Te
.TyStOrUn
.name
= NULL
;
3402 typeE
.Te
.TyStOrUn
.typeR
= D3_INVALID_CUOFF
;
3403 typeE
.Te
.TyStOrUn
.fieldRs
3404 = VG_(newXA
)( ML_(dinfo_zalloc
), "di.readdwarf3.pTD.struct_type.1",
3407 typeE
.Te
.TyStOrUn
.complete
= True
;
3408 typeE
.Te
.TyStOrUn
.isStruct
= dtag
== DW_TAG_structure_type
3409 || dtag
== DW_TAG_class_type
;
3412 DW_AT attr
= (DW_AT
) abbv
->nf
[nf_i
].at_name
;
3413 DW_FORM form
= (DW_FORM
)abbv
->nf
[nf_i
].at_form
;
3415 if (attr
== 0 && form
== 0) break;
3416 get_Form_contents( &cts
, cc
, c_die
, False
/*td3*/, form
);
3417 if (attr
== DW_AT_name
&& cts
.szB
< 0) {
3418 typeE
.Te
.TyStOrUn
.name
3419 = ML_(cur_read_strdup
)( cts
.u
.cur
,
3420 "di.readdwarf3.ptD.struct_type.2" );
3422 if (attr
== DW_AT_byte_size
&& cts
.szB
>= 0) {
3423 typeE
.Te
.TyStOrUn
.szB
= cts
.u
.val
;
3426 if (attr
== DW_AT_declaration
&& cts
.szB
> 0 && cts
.u
.val
> 0) {
3429 if (attr
== DW_AT_specification
&& cts
.szB
> 0 && cts
.u
.val
> 0) {
3432 if (attr
== DW_AT_signature
&& form
== DW_FORM_ref_sig8
3435 typeE
.Te
.TyStOrUn
.szB
= 8;
3436 typeE
.Te
.TyStOrUn
.typeR
3437 = cook_die_using_form( cc
, (UWord
)cts
.u
.val
, form
);
3440 /* Do we have something that looks sane? */
3441 if (is_decl
&& (!is_spec
)) {
3442 /* It's a DW_AT_declaration. We require the name but
3444 /* JRS 2012-06-28: following discussion w/ tromey, if the
3445 type doesn't have name, just make one up, and accept it.
3446 It might be referred to by other DIEs, so ignoring it
3447 doesn't seem like a safe option. */
3448 if (typeE
.Te
.TyStOrUn
.name
== NULL
)
3449 typeE
.Te
.TyStOrUn
.name
3450 = ML_(dinfo_strdup
)( "di.readdwarf3.ptD.struct_type.3",
3451 "<anon_struct_type>" );
3452 typeE
.Te
.TyStOrUn
.complete
= False
;
3453 /* JRS 2009 Aug 10: <possible kludge>? */
3454 /* Push this tyent on the stack, even though it's incomplete.
3455 It appears that gcc-4.4 on Fedora 11 will sometimes create
3456 DW_TAG_member entries for it, and so we need to have a
3457 plausible parent present in order for that to work. See
3458 #200029 comments 8 and 9. */
3459 typestack_push( cc
, parser
, td3
, &typeE
, level
);
3460 /* </possible kludge> */
3463 if ((!is_decl
) /* && (!is_spec) */) {
3464 /* this is the common, ordinary case */
3465 /* The name can be present, or not */
3467 /* We must know the size.
3468 But in Ada, record with discriminants might have no size.
3469 But in C, VLA in the middle of a struct (gcc extension)
3471 Instead, some GNAT dwarf extensions and/or dwarf entries
3472 allow to calculate the struct size at runtime.
3473 We cannot do that (yet?) so, the temporary kludge is to use
3475 typeE
.Te
.TyStOrUn
.szB
= 1;
3478 typestack_push( cc
, parser
, td3
, &typeE
, level
);
3482 /* don't know how to handle any other variants just now */
3487 if (dtag
== DW_TAG_member
) {
3488 /* Acquire member entries for both DW_TAG_structure_type and
3489 DW_TAG_union_type. They differ minorly, in that struct
3490 members must have a DW_AT_data_member_location expression
3491 whereas union members must not. */
3492 Bool parent_is_struct
;
3493 VG_(memset
)( &fieldE
, 0, sizeof(fieldE
) );
3494 fieldE
.cuOff
= posn
;
3495 fieldE
.tag
= Te_Field
;
3496 fieldE
.Te
.Field
.typeR
= D3_INVALID_CUOFF
;
3499 DW_AT attr
= (DW_AT
) abbv
->nf
[nf_i
].at_name
;
3500 DW_FORM form
= (DW_FORM
)abbv
->nf
[nf_i
].at_form
;
3502 if (attr
== 0 && form
== 0) break;
3503 get_Form_contents( &cts
, cc
, c_die
, False
/*td3*/, form
);
3504 if (attr
== DW_AT_name
&& cts
.szB
< 0) {
3505 fieldE
.Te
.Field
.name
3506 = ML_(cur_read_strdup
)( cts
.u
.cur
,
3507 "di.readdwarf3.ptD.member.1" );
3509 if (attr
== DW_AT_type
&& cts
.szB
> 0) {
3510 fieldE
.Te
.Field
.typeR
3511 = cook_die_using_form( cc
, (UWord
)cts
.u
.val
, form
);
3513 /* There are 2 different cases for DW_AT_data_member_location.
3514 If it is a constant class attribute, it contains byte offset
3515 from the beginning of the containing entity.
3516 Otherwise it is a location expression. */
3517 if (attr
== DW_AT_data_member_location
&& cts
.szB
> 0) {
3518 fieldE
.Te
.Field
.nLoc
= -1;
3519 fieldE
.Te
.Field
.pos
.offset
= cts
.u
.val
;
3521 if (attr
== DW_AT_data_member_location
&& cts
.szB
<= 0) {
3522 fieldE
.Te
.Field
.nLoc
= (UWord
)(-cts
.szB
);
3523 fieldE
.Te
.Field
.pos
.loc
3524 = ML_(cur_read_memdup
)( cts
.u
.cur
,
3525 (SizeT
)fieldE
.Te
.Field
.nLoc
,
3526 "di.readdwarf3.ptD.member.2" );
3529 /* Do we have a plausible parent? */
3530 if (typestack_is_empty(parser
)) goto_bad_DIE
;
3531 vg_assert(ML_(TyEnt__is_type
)(&parser
->qparentE
[parser
->sp
]));
3532 vg_assert(parser
->qparentE
[parser
->sp
].cuOff
!= D3_INVALID_CUOFF
);
3533 if (level
!= parser
->qlevel
[parser
->sp
]+1) goto_bad_DIE
;
3534 if (parser
->qparentE
[parser
->sp
].tag
!= Te_TyStOrUn
) goto_bad_DIE
;
3535 /* Do we have something that looks sane? If this a member of a
3536 struct, we must have a location expression; but if a member
3537 of a union that is irrelevant (D3 spec sec 5.6.6). We ought
3538 to reject in the latter case, but some compilers have been
3539 observed to emit constant-zero expressions. So just ignore
3542 = parser
->qparentE
[parser
->sp
].Te
.TyStOrUn
.isStruct
;
3543 if (!fieldE
.Te
.Field
.name
)
3544 fieldE
.Te
.Field
.name
3545 = ML_(dinfo_strdup
)( "di.readdwarf3.ptD.member.3",
3547 if (fieldE
.Te
.Field
.typeR
== D3_INVALID_CUOFF
)
3549 if (fieldE
.Te
.Field
.nLoc
) {
3550 if (!parent_is_struct
) {
3551 /* If this is a union type, pretend we haven't seen the data
3552 member location expression, as it is by definition
3553 redundant (it must be zero). */
3554 if (fieldE
.Te
.Field
.nLoc
> 0)
3555 ML_(dinfo_free
)(fieldE
.Te
.Field
.pos
.loc
);
3556 fieldE
.Te
.Field
.pos
.loc
= NULL
;
3557 fieldE
.Te
.Field
.nLoc
= 0;
3559 /* Record this child in the parent */
3560 fieldE
.Te
.Field
.isStruct
= parent_is_struct
;
3561 vg_assert(parser
->qparentE
[parser
->sp
].Te
.TyStOrUn
.fieldRs
);
3562 VG_(addToXA
)( parser
->qparentE
[parser
->sp
].Te
.TyStOrUn
.fieldRs
,
3564 /* And record the child itself */
3567 /* Member with no location - this can happen with static
3568 const members in C++ code which are compile time constants
3569 that do no exist in the class. They're not of any interest
3570 to us so we ignore them. */
3571 ML_(TyEnt__make_EMPTY
)(&fieldE
);
3575 if (dtag
== DW_TAG_array_type
) {
3576 VG_(memset
)(&typeE
, 0, sizeof(typeE
));
3578 typeE
.tag
= Te_TyArray
;
3579 typeE
.Te
.TyArray
.typeR
= D3_INVALID_CUOFF
;
3580 typeE
.Te
.TyArray
.boundRs
3581 = VG_(newXA
)( ML_(dinfo_zalloc
), "di.readdwarf3.ptD.array_type.1",
3586 DW_AT attr
= (DW_AT
) abbv
->nf
[nf_i
].at_name
;
3587 DW_FORM form
= (DW_FORM
)abbv
->nf
[nf_i
].at_form
;
3589 if (attr
== 0 && form
== 0) break;
3590 get_Form_contents( &cts
, cc
, c_die
, False
/*td3*/, form
);
3591 if (attr
== DW_AT_type
&& cts
.szB
> 0) {
3592 typeE
.Te
.TyArray
.typeR
3593 = cook_die_using_form( cc
, (UWord
)cts
.u
.val
, form
);
3596 if (typeE
.Te
.TyArray
.typeR
== D3_INVALID_CUOFF
)
3599 typestack_push( cc
, parser
, td3
, &typeE
, level
);
3603 /* this is a subrange type defining the bounds of an array. */
3604 if (dtag
== DW_TAG_subrange_type
3605 && subrange_type_denotes_array_bounds(parser
, dtag
)) {
3606 Bool have_lower
= False
;
3607 Bool have_upper
= False
;
3608 Bool have_count
= False
;
3613 switch (parser
->language
) {
3614 case 'C': have_lower
= True
; lower
= 0; break;
3615 case 'F': have_lower
= True
; lower
= 1; break;
3616 case '?': have_lower
= False
; break;
3617 case 'A': have_lower
= False
; break;
3618 default: vg_assert(0); /* assured us by handling of
3619 DW_TAG_compile_unit in this fn */
3622 VG_(memset
)( &boundE
, 0, sizeof(boundE
) );
3623 boundE
.cuOff
= D3_INVALID_CUOFF
;
3624 boundE
.tag
= Te_Bound
;
3627 DW_AT attr
= (DW_AT
) abbv
->nf
[nf_i
].at_name
;
3628 DW_FORM form
= (DW_FORM
)abbv
->nf
[nf_i
].at_form
;
3630 if (attr
== 0 && form
== 0) break;
3631 get_Form_contents( &cts
, cc
, c_die
, False
/*td3*/, form
);
3632 if (attr
== DW_AT_lower_bound
&& cts
.szB
> 0
3633 && form_expected_for_bound (form
)) {
3634 lower
= (Long
)cts
.u
.val
;
3637 if (attr
== DW_AT_upper_bound
&& cts
.szB
> 0
3638 && form_expected_for_bound (form
)) {
3639 upper
= (Long
)cts
.u
.val
;
3642 if (attr
== DW_AT_count
&& cts
.szB
> 0) {
3643 count
= (Long
)cts
.u
.val
;
3647 /* FIXME: potentially skip the rest if no parent present, since
3648 it could be the case that this subrange type is free-standing
3649 (not being used to describe the bounds of a containing array
3651 /* Do we have a plausible parent? */
3652 if (typestack_is_empty(parser
)) goto_bad_DIE
;
3653 vg_assert(ML_(TyEnt__is_type
)(&parser
->qparentE
[parser
->sp
]));
3654 vg_assert(parser
->qparentE
[parser
->sp
].cuOff
!= D3_INVALID_CUOFF
);
3655 if (level
!= parser
->qlevel
[parser
->sp
]+1) goto_bad_DIE
;
3656 if (parser
->qparentE
[parser
->sp
].tag
!= Te_TyArray
) goto_bad_DIE
;
3658 /* Figure out if we have a definite range or not */
3659 if (have_lower
&& have_upper
&& (!have_count
)) {
3660 boundE
.Te
.Bound
.knownL
= True
;
3661 boundE
.Te
.Bound
.knownU
= True
;
3662 boundE
.Te
.Bound
.boundL
= lower
;
3663 boundE
.Te
.Bound
.boundU
= upper
;
3665 else if (have_lower
&& (!have_upper
) && (!have_count
)) {
3666 boundE
.Te
.Bound
.knownL
= True
;
3667 boundE
.Te
.Bound
.knownU
= False
;
3668 boundE
.Te
.Bound
.boundL
= lower
;
3669 boundE
.Te
.Bound
.boundU
= 0;
3671 else if ((!have_lower
) && have_upper
&& (!have_count
)) {
3672 boundE
.Te
.Bound
.knownL
= False
;
3673 boundE
.Te
.Bound
.knownU
= True
;
3674 boundE
.Te
.Bound
.boundL
= 0;
3675 boundE
.Te
.Bound
.boundU
= upper
;
3677 else if ((!have_lower
) && (!have_upper
) && (!have_count
)) {
3678 boundE
.Te
.Bound
.knownL
= False
;
3679 boundE
.Te
.Bound
.knownU
= False
;
3680 boundE
.Te
.Bound
.boundL
= 0;
3681 boundE
.Te
.Bound
.boundU
= 0;
3682 } else if (have_lower
&& (!have_upper
) && (have_count
)) {
3683 boundE
.Te
.Bound
.knownL
= True
;
3684 boundE
.Te
.Bound
.knownU
= True
;
3685 boundE
.Te
.Bound
.boundL
= lower
;
3686 boundE
.Te
.Bound
.boundU
= lower
+ count
;
3688 /* FIXME: handle more cases */
3692 /* Record this bound in the parent */
3693 boundE
.cuOff
= posn
;
3694 vg_assert(parser
->qparentE
[parser
->sp
].Te
.TyArray
.boundRs
);
3695 VG_(addToXA
)( parser
->qparentE
[parser
->sp
].Te
.TyArray
.boundRs
,
3697 /* And record the child itself */
3701 /* typedef or subrange_type other than array bounds. */
3702 if (dtag
== DW_TAG_typedef
3703 || (dtag
== DW_TAG_subrange_type
3704 && !subrange_type_denotes_array_bounds(parser
, dtag
))) {
3705 /* subrange_type other than array bound is only for Ada. */
3706 vg_assert (dtag
== DW_TAG_typedef
|| parser
->language
== 'A');
3707 /* We can pick up a new typedef/subrange_type any time. */
3708 VG_(memset
)(&typeE
, 0, sizeof(typeE
));
3709 typeE
.cuOff
= D3_INVALID_CUOFF
;
3710 typeE
.tag
= Te_TyTyDef
;
3711 typeE
.Te
.TyTyDef
.name
= NULL
;
3712 typeE
.Te
.TyTyDef
.typeR
= D3_INVALID_CUOFF
;
3715 DW_AT attr
= (DW_AT
) abbv
->nf
[nf_i
].at_name
;
3716 DW_FORM form
= (DW_FORM
)abbv
->nf
[nf_i
].at_form
;
3718 if (attr
== 0 && form
== 0) break;
3719 get_Form_contents( &cts
, cc
, c_die
, False
/*td3*/, form
);
3720 if (attr
== DW_AT_name
&& cts
.szB
< 0) {
3721 typeE
.Te
.TyTyDef
.name
3722 = ML_(cur_read_strdup
)( cts
.u
.cur
,
3723 "di.readdwarf3.ptD.typedef.1" );
3725 if (attr
== DW_AT_type
&& cts
.szB
> 0) {
3726 typeE
.Te
.TyTyDef
.typeR
3727 = cook_die_using_form( cc
, (UWord
)cts
.u
.val
, form
);
3730 /* Do we have something that looks sane?
3731 gcc gnat Ada generates minimal typedef
3733 <6><91cc>: DW_TAG_typedef
3734 DW_AT_abstract_ori: <9066>
3735 g++ for OMP can generate artificial functions that have
3736 parameters that refer to pointers to unnamed typedefs.
3737 See https://bugs.kde.org/show_bug.cgi?id=273475
3738 So we cannot require a name for a DW_TAG_typedef.
3743 if (dtag
== DW_TAG_subroutine_type
) {
3744 /* function type? just record that one fact and ask no
3745 further questions. */
3746 VG_(memset
)(&typeE
, 0, sizeof(typeE
));
3747 typeE
.cuOff
= D3_INVALID_CUOFF
;
3748 typeE
.tag
= Te_TyFn
;
3752 if (dtag
== DW_TAG_volatile_type
|| dtag
== DW_TAG_const_type
3753 || dtag
== DW_TAG_restrict_type
) {
3755 VG_(memset
)(&typeE
, 0, sizeof(typeE
));
3756 typeE
.cuOff
= D3_INVALID_CUOFF
;
3757 typeE
.tag
= Te_TyQual
;
3758 typeE
.Te
.TyQual
.qual
3759 = (dtag
== DW_TAG_volatile_type
? 'V'
3760 : (dtag
== DW_TAG_const_type
? 'C' : 'R'));
3761 /* target type defaults to 'void' */
3762 typeE
.Te
.TyQual
.typeR
= D3_FAKEVOID_CUOFF
;
3765 DW_AT attr
= (DW_AT
) abbv
->nf
[nf_i
].at_name
;
3766 DW_FORM form
= (DW_FORM
)abbv
->nf
[nf_i
].at_form
;
3768 if (attr
== 0 && form
== 0) break;
3769 get_Form_contents( &cts
, cc
, c_die
, False
/*td3*/, form
);
3770 if (attr
== DW_AT_type
&& cts
.szB
> 0) {
3771 typeE
.Te
.TyQual
.typeR
3772 = cook_die_using_form( cc
, (UWord
)cts
.u
.val
, form
);
3776 /* gcc sometimes generates DW_TAG_const/volatile_type without
3777 DW_AT_type and GDB appears to interpret the type as 'const
3778 void' (resp. 'volatile void'). So just allow it .. */
3779 if (have_ty
== 1 || have_ty
== 0)
3786 * Treat DW_TAG_unspecified_type as type void. An example of DW_TAG_unspecified_type:
3788 * $ readelf --debug-dump /usr/lib/debug/usr/lib/libstdc++.so.6.0.16.debug
3789 * <1><10d4>: Abbrev Number: 53 (DW_TAG_unspecified_type)
3790 * <10d5> DW_AT_name : (indirect string, offset: 0xdb7): decltype(nullptr)
3792 if (dtag
== DW_TAG_unspecified_type
) {
3793 VG_(memset
)(&typeE
, 0, sizeof(typeE
));
3794 typeE
.cuOff
= D3_INVALID_CUOFF
;
3795 typeE
.tag
= Te_TyQual
;
3796 typeE
.Te
.TyQual
.typeR
= D3_FAKEVOID_CUOFF
;
3800 /* else ignore this DIE */
3805 if (0) VG_(printf
)("YYYY Acquire Type\n");
3806 vg_assert(ML_(TyEnt__is_type
)( &typeE
));
3807 vg_assert(typeE
.cuOff
== D3_INVALID_CUOFF
|| typeE
.cuOff
== posn
);
3809 VG_(addToXA
)( tyents
, &typeE
);
3814 if (0) VG_(printf
)("YYYY Acquire Atom\n");
3815 vg_assert(atomE
.tag
== Te_Atom
);
3816 vg_assert(atomE
.cuOff
== D3_INVALID_CUOFF
|| atomE
.cuOff
== posn
);
3818 VG_(addToXA
)( tyents
, &atomE
);
3823 /* For union members, Expr should be absent */
3824 if (0) VG_(printf
)("YYYY Acquire Field\n");
3825 vg_assert(fieldE
.tag
== Te_Field
);
3826 vg_assert(fieldE
.Te
.Field
.nLoc
<= 0 || fieldE
.Te
.Field
.pos
.loc
!= NULL
);
3827 vg_assert(fieldE
.Te
.Field
.nLoc
!= 0 || fieldE
.Te
.Field
.pos
.loc
== NULL
);
3828 if (fieldE
.Te
.Field
.isStruct
) {
3829 vg_assert(fieldE
.Te
.Field
.nLoc
!= 0);
3831 vg_assert(fieldE
.Te
.Field
.nLoc
== 0);
3833 vg_assert(fieldE
.cuOff
== D3_INVALID_CUOFF
|| fieldE
.cuOff
== posn
);
3834 fieldE
.cuOff
= posn
;
3835 VG_(addToXA
)( tyents
, &fieldE
);
3840 if (0) VG_(printf
)("YYYY Acquire Bound\n");
3841 vg_assert(boundE
.tag
== Te_Bound
);
3842 vg_assert(boundE
.cuOff
== D3_INVALID_CUOFF
|| boundE
.cuOff
== posn
);
3843 boundE
.cuOff
= posn
;
3844 VG_(addToXA
)( tyents
, &boundE
);
3849 dump_bad_die_and_barf("parse_type_DIE", dtag
, posn
, level
,
3850 c_die
, saved_die_c_offset
,
3857 /*------------------------------------------------------------*/
3859 /*--- Compression of type DIE information ---*/
3861 /*------------------------------------------------------------*/
3863 static UWord
chase_cuOff ( Bool
* changed
,
3864 const XArray
* /* of TyEnt */ ents
,
3865 TyEntIndexCache
* ents_cache
,
3869 ent
= ML_(TyEnts__index_by_cuOff
)( ents
, ents_cache
, cuOff
);
3872 VG_(printf
)("chase_cuOff: no entry for 0x%05lx\n", cuOff
);
3877 vg_assert(ent
->tag
!= Te_EMPTY
);
3878 if (ent
->tag
!= Te_INDIR
) {
3882 vg_assert(ent
->Te
.INDIR
.indR
< cuOff
);
3884 return ent
->Te
.INDIR
.indR
;
3889 void chase_cuOffs_in_XArray ( Bool
* changed
,
3890 const XArray
* /* of TyEnt */ ents
,
3891 TyEntIndexCache
* ents_cache
,
3892 /*MOD*/XArray
* /* of UWord */ cuOffs
)
3895 Word i
, n
= VG_(sizeXA
)( cuOffs
);
3896 for (i
= 0; i
< n
; i
++) {
3898 UWord
* p
= VG_(indexXA
)( cuOffs
, i
);
3899 *p
= chase_cuOff( &b
, ents
, ents_cache
, *p
);
3906 static Bool
TyEnt__subst_R_fields ( const XArray
* /* of TyEnt */ ents
,
3907 TyEntIndexCache
* ents_cache
,
3910 Bool b
, changed
= False
;
3916 = chase_cuOff( &b
, ents
, ents_cache
, te
->Te
.INDIR
.indR
);
3917 if (b
) changed
= True
;
3925 = chase_cuOff( &b
, ents
, ents_cache
, te
->Te
.Field
.typeR
);
3926 if (b
) changed
= True
;
3937 = chase_cuOff( &b
, ents
, ents_cache
, te
->Te
.TyPorR
.typeR
);
3938 if (b
) changed
= True
;
3941 te
->Te
.TyTyDef
.typeR
3942 = chase_cuOff( &b
, ents
, ents_cache
, te
->Te
.TyTyDef
.typeR
);
3943 if (b
) changed
= True
;
3946 chase_cuOffs_in_XArray( &b
, ents
, ents_cache
, te
->Te
.TyStOrUn
.fieldRs
);
3947 if (b
) changed
= True
;
3950 chase_cuOffs_in_XArray( &b
, ents
, ents_cache
, te
->Te
.TyEnum
.atomRs
);
3951 if (b
) changed
= True
;
3954 te
->Te
.TyArray
.typeR
3955 = chase_cuOff( &b
, ents
, ents_cache
, te
->Te
.TyArray
.typeR
);
3956 if (b
) changed
= True
;
3957 chase_cuOffs_in_XArray( &b
, ents
, ents_cache
, te
->Te
.TyArray
.boundRs
);
3958 if (b
) changed
= True
;
3964 = chase_cuOff( &b
, ents
, ents_cache
, te
->Te
.TyQual
.typeR
);
3965 if (b
) changed
= True
;
3976 /* Make a pass over 'ents'. For each tyent, inspect the target of any
3977 'R' or 'Rs' fields (those which refer to other tyents), and replace
3978 any which point to INDIR nodes with the target of the indirection
3979 (which should not itself be an indirection). In summary, this
3980 routine shorts out all references to indirection nodes. */
3982 Word
dedup_types_substitution_pass ( /*MOD*/XArray
* /* of TyEnt */ ents
,
3983 TyEntIndexCache
* ents_cache
)
3985 Word i
, n
, nChanged
= 0;
3987 n
= VG_(sizeXA
)( ents
);
3988 for (i
= 0; i
< n
; i
++) {
3989 TyEnt
* ent
= VG_(indexXA
)( ents
, i
);
3990 vg_assert(ent
->tag
!= Te_EMPTY
);
3991 /* We have to substitute everything, even indirections, so as to
3992 ensure that chains of indirections don't build up. */
3993 b
= TyEnt__subst_R_fields( ents
, ents_cache
, ent
);
4002 /* Make a pass over 'ents', building a dictionary of TyEnts as we go.
4003 Look up each new tyent in the dictionary in turn. If it is already
4004 in the dictionary, replace this tyent with an indirection to the
4005 existing one, and delete any malloc'd stuff hanging off this one.
4006 In summary, this routine commons up all tyents that are identical
4007 as defined by TyEnt__cmp_by_all_except_cuOff. */
4009 Word
dedup_types_commoning_pass ( /*MOD*/XArray
* /* of TyEnt */ ents
)
4011 Word n
, i
, nDeleted
;
4012 WordFM
* dict
; /* TyEnt* -> void */
4017 ML_(dinfo_zalloc
), "di.readdwarf3.dtcp.1",
4019 (Word(*)(UWord
,UWord
)) ML_(TyEnt__cmp_by_all_except_cuOff
)
4023 n
= VG_(sizeXA
)( ents
);
4024 for (i
= 0; i
< n
; i
++) {
4025 ent
= VG_(indexXA
)( ents
, i
);
4026 vg_assert(ent
->tag
!= Te_EMPTY
);
4028 /* Ignore indirections, although check that they are
4029 not forming a cycle. */
4030 if (ent
->tag
== Te_INDIR
) {
4031 vg_assert(ent
->Te
.INDIR
.indR
< ent
->cuOff
);
4036 if (VG_(lookupFM
)( dict
, &keyW
, &valW
, (UWord
)ent
)) {
4037 /* it's already in the dictionary. */
4038 TyEnt
* old
= (TyEnt
*)keyW
;
4039 vg_assert(valW
== 0);
4040 vg_assert(old
!= ent
);
4041 vg_assert(old
->tag
!= Te_INDIR
);
4042 /* since we are traversing the array in increasing order of
4044 vg_assert(old
->cuOff
< ent
->cuOff
);
4045 /* So anyway, dump this entry and replace it with an
4046 indirection to the one in the dictionary. Note that the
4047 assertion above guarantees that we cannot create cycles of
4048 indirections, since we are always creating an indirection
4049 to a tyent with a cuOff lower than this one. */
4050 ML_(TyEnt__make_EMPTY
)( ent
);
4051 ent
->tag
= Te_INDIR
;
4052 ent
->Te
.INDIR
.indR
= old
->cuOff
;
4055 /* not in dictionary; add it and keep going. */
4056 VG_(addToFM
)( dict
, (UWord
)ent
, 0 );
4060 VG_(deleteFM
)( dict
, NULL
, NULL
);
4067 void dedup_types ( Bool td3
,
4068 /*MOD*/XArray
* /* of TyEnt */ ents
,
4069 TyEntIndexCache
* ents_cache
)
4071 Word m
, n
, i
, nDel
, nSubst
, nThresh
;
4074 n
= VG_(sizeXA
)( ents
);
4076 /* If a commoning pass and a substitution pass both make fewer than
4077 this many changes, just stop. It's pointless to burn up CPU
4078 time trying to compress the last 1% or so out of the array. */
4081 /* First we must sort .ents by its .cuOff fields, so we
4082 can index into it. */
4083 VG_(setCmpFnXA
)( ents
, (XACmpFn_t
) ML_(TyEnt__cmp_by_cuOff_only
) );
4084 VG_(sortXA
)( ents
);
4086 /* Now repeatedly do commoning and substitution passes over
4087 the array, until there are no more changes. */
4089 nDel
= dedup_types_commoning_pass ( ents
);
4090 nSubst
= dedup_types_substitution_pass ( ents
, ents_cache
);
4091 vg_assert(nDel
>= 0 && nSubst
>= 0);
4092 TRACE_D3(" %ld deletions, %ld substitutions\n", nDel
, nSubst
);
4093 } while (nDel
> nThresh
|| nSubst
> nThresh
);
4095 /* Sanity check: all INDIR nodes should point at a non-INDIR thing.
4096 In fact this should be true at the end of every loop iteration
4097 above (a commoning pass followed by a substitution pass), but
4098 checking it on every iteration is excessively expensive. Note,
4099 this loop also computes 'm' for the stats printing below it. */
4101 n
= VG_(sizeXA
)( ents
);
4102 for (i
= 0; i
< n
; i
++) {
4104 ent
= VG_(indexXA
)( ents
, i
);
4105 if (ent
->tag
!= Te_INDIR
) continue;
4107 ind
= ML_(TyEnts__index_by_cuOff
)( ents
, ents_cache
,
4108 ent
->Te
.INDIR
.indR
);
4110 vg_assert(ind
->tag
!= Te_INDIR
);
4113 TRACE_D3("Overall: %ld before, %ld after\n", n
, n
-m
);
4117 /*------------------------------------------------------------*/
4119 /*--- Resolution of references to type DIEs ---*/
4121 /*------------------------------------------------------------*/
4123 /* Make a pass through the (temporary) variables array. Examine the
4124 type of each variable, check is it found, and chase any Te_INDIRs.
4125 Postcondition is: each variable has a typeR field that refers to a
4126 valid type in tyents, or a Te_UNKNOWN, and is certainly guaranteed
4127 not to refer to a Te_INDIR. (This is so that we can throw all the
4128 Te_INDIRs away later). */
4130 __attribute__((noinline
))
4131 static void resolve_variable_types (
4132 void (*barf
)( const HChar
* ) __attribute__((noreturn
)),
4133 /*R-O*/XArray
* /* of TyEnt */ ents
,
4134 /*MOD*/TyEntIndexCache
* ents_cache
,
4135 /*MOD*/XArray
* /* of TempVar* */ vars
4139 n
= VG_(sizeXA
)( vars
);
4140 for (i
= 0; i
< n
; i
++) {
4141 TempVar
* var
= *(TempVar
**)VG_(indexXA
)( vars
, i
);
4142 /* This is the stated type of the variable. But it might be
4143 an indirection, so be careful. */
4144 TyEnt
* ent
= ML_(TyEnts__index_by_cuOff
)( ents
, ents_cache
,
4146 if (ent
&& ent
->tag
== Te_INDIR
) {
4147 ent
= ML_(TyEnts__index_by_cuOff
)( ents
, ents_cache
,
4148 ent
->Te
.INDIR
.indR
);
4150 vg_assert(ent
->tag
!= Te_INDIR
);
4153 /* Deal first with "normal" cases */
4154 if (ent
&& ML_(TyEnt__is_type
)(ent
)) {
4155 var
->typeR
= ent
->cuOff
;
4159 /* If there's no ent, it probably we did not manage to read a
4160 type at the cuOffset which is stated as being this variable's
4161 type. Maybe a deficiency in parse_type_DIE. Complain. */
4163 VG_(printf
)("\n: Invalid cuOff = 0x%05lx\n", var
->typeR
);
4164 barf("resolve_variable_types: "
4165 "cuOff does not refer to a known type");
4168 /* If ent has any other tag, something bad happened, along the
4169 lines of var->typeR not referring to a type at all. */
4170 vg_assert(ent
->tag
== Te_UNKNOWN
);
4171 /* Just accept it; the type will be useless, but at least keep
4173 var
->typeR
= ent
->cuOff
;
4178 /*------------------------------------------------------------*/
4180 /*--- Parsing of Compilation Units ---*/
4182 /*------------------------------------------------------------*/
4184 static Int
cmp_TempVar_by_dioff ( const void* v1
, const void* v2
) {
4185 const TempVar
* t1
= *(const TempVar
*const *)v1
;
4186 const TempVar
* t2
= *(const TempVar
*const *)v2
;
4187 if (t1
->dioff
< t2
->dioff
) return -1;
4188 if (t1
->dioff
> t2
->dioff
) return 1;
4192 static void read_DIE (
4193 /*MOD*/WordFM
* /* of (XArray* of AddrRange, void) */ rangestree
,
4194 /*MOD*/XArray
* /* of TyEnt */ tyents
,
4195 /*MOD*/XArray
* /* of TempVar* */ tempvars
,
4196 /*MOD*/XArray
* /* of GExpr* */ gexprs
,
4197 /*MOD*/D3TypeParser
* typarser
,
4198 /*MOD*/D3VarParser
* varparser
,
4199 /*MOD*/D3InlParser
* inlparser
,
4200 Cursor
* c
, Bool td3
, CUConst
* cc
, Int level
4204 ULong atag
, abbv_code
;
4207 UWord start_die_c_offset
;
4208 UWord after_die_c_offset
;
4209 // If the DIE we will parse has a sibling and the parser(s) are
4210 // all indicating that parse_children is not necessary, then
4211 // we will skip the children by jumping to the sibling of this DIE
4212 // (if it has a sibling).
4214 Bool parse_children
= False
;
4216 /* --- Deal with this DIE --- */
4217 posn
= cook_die( cc
, get_position_of_Cursor( c
) );
4218 abbv_code
= get_ULEB128( c
);
4219 abbv
= get_abbv(cc
, abbv_code
);
4224 trace_DIE ((DW_TAG
)atag
, posn
, level
,
4225 get_position_of_Cursor( c
), abbv
, cc
);
4229 cc
->barf("read_DIE: invalid zero tag on DIE");
4231 has_children
= abbv
->has_children
;
4232 if (has_children
!= DW_children_no
&& has_children
!= DW_children_yes
)
4233 cc
->barf("read_DIE: invalid has_children value");
4235 /* We're set up to look at the fields of this DIE. Hand it off to
4236 any parser(s) that want to see it. Since they will in general
4237 advance the DIE cursor, remember the current settings so that we
4238 can then back up. . */
4239 start_die_c_offset
= get_position_of_Cursor( c
);
4240 after_die_c_offset
= 0; // set to c position if a parser has read the DIE.
4242 if (VG_(clo_read_var_info
)) {
4243 parse_type_DIE( tyents
,
4252 if (get_position_of_Cursor( c
) != start_die_c_offset
) {
4253 after_die_c_offset
= get_position_of_Cursor( c
);
4254 set_position_of_Cursor( c
, start_die_c_offset
);
4257 parse_var_DIE( rangestree
,
4268 if (get_position_of_Cursor( c
) != start_die_c_offset
) {
4269 after_die_c_offset
= get_position_of_Cursor( c
);
4270 set_position_of_Cursor( c
, start_die_c_offset
);
4273 parse_children
= True
;
4274 // type and var parsers do not have logic to skip childrens and establish
4275 // the value of sibling.
4278 if (VG_(clo_read_inline_info
)) {
4279 inlparser
->sibling
= 0;
4281 parse_inl_DIE( inlparser
,
4290 if (get_position_of_Cursor( c
) != start_die_c_offset
) {
4291 after_die_c_offset
= get_position_of_Cursor( c
);
4292 // Last parser, no need to reset the cursor to start_die_c_offset.
4295 sibling
= inlparser
->sibling
;
4296 vg_assert (inlparser
->sibling
== 0 || inlparser
->sibling
== sibling
);
4299 if (after_die_c_offset
> 0) {
4300 // DIE was read by a parser above, so we know where the DIE ends.
4301 set_position_of_Cursor( c
, after_die_c_offset
);
4303 /* No parser has parsed this DIE. So, we need to skip the DIE,
4304 in order to read the next DIE.
4305 At the same time, establish sibling value if the DIE has one. */
4306 TRACE_D3(" uninteresting DIE -> skipping ...\n");
4307 skip_DIE (&sibling
, c
, abbv
, cc
);
4310 /* --- Now recurse into its children, if any
4311 and the parsing of the children is requested by a parser --- */
4312 if (has_children
== DW_children_yes
) {
4313 if (parse_children
|| sibling
== 0) {
4314 if (0) TRACE_D3("BEGIN children of level %d\n", level
);
4316 atag
= peek_ULEB128( c
);
4317 if (atag
== 0) break;
4318 read_DIE( rangestree
, tyents
, tempvars
, gexprs
,
4319 typarser
, varparser
, inlparser
,
4320 c
, td3
, cc
, level
+1 );
4322 /* Now we need to eat the terminating zero */
4323 atag
= get_ULEB128( c
);
4324 vg_assert(atag
== 0);
4325 if (0) TRACE_D3("END children of level %d\n", level
);
4327 // We can skip the childrens, by jumping to the sibling
4328 TRACE_D3(" SKIPPING DIE's children,"
4329 "jumping to sibling <%d><%lx>\n",
4331 set_position_of_Cursor( c
, sibling
);
4337 static void trace_debug_loc (const DebugInfo
* di
,
4338 __attribute__((noreturn
)) void (*barf
)( const HChar
* ),
4339 DiSlice escn_debug_loc
)
4342 /* This doesn't work properly because it assumes all entries are
4343 packed end to end, with no holes. But that doesn't always
4344 appear to be the case, so it loses sync. And the D3 spec
4345 doesn't appear to require a no-hole situation either. */
4346 /* Display .debug_loc */
4349 Cursor loc
; /* for showing .debug_loc */
4350 Bool td3
= di
->trace_symtab
;
4353 TRACE_SYMTAB("\n------ The contents of .debug_loc ------\n");
4354 TRACE_SYMTAB(" Offset Begin End Expression\n");
4355 if (ML_(sli_is_valid
)(escn_debug_loc
)) {
4356 init_Cursor( &loc
, escn_debug_loc
, 0, barf
,
4357 "Overrun whilst reading .debug_loc section(1)" );
4363 if (is_at_end_Cursor( &loc
))
4366 /* Read a (host-)word pair. This is something of a hack since
4367 the word size to read is really dictated by the ELF file;
4368 however, we assume we're reading a file with the same
4369 word-sizeness as the host. Reasonably enough. */
4370 w1
= get_UWord( &loc
);
4371 w2
= get_UWord( &loc
);
4373 if (w1
== 0 && w2
== 0) {
4374 /* end of list. reset 'base' */
4375 TRACE_D3(" %08lx <End of list>\n", dl_offset
);
4377 dl_offset
= get_position_of_Cursor( &loc
);
4382 /* new value for 'base' */
4383 TRACE_D3(" %08lx %16lx %08lx (base address)\n",
4389 /* else a location expression follows */
4390 TRACE_D3(" %08lx %08lx %08lx ",
4391 dl_offset
, w1
+ dl_base
, w2
+ dl_base
);
4392 len
= (UWord
)get_UShort( &loc
);
4394 UChar byte
= get_UChar( &loc
);
4395 TRACE_D3("%02x", (UInt
)byte
);
4404 static void trace_debug_ranges (const DebugInfo
* di
,
4405 __attribute__((noreturn
)) void (*barf
)( const HChar
* ),
4406 DiSlice escn_debug_ranges
)
4408 Cursor ranges
; /* for showing .debug_ranges */
4411 Bool td3
= di
->trace_symtab
;
4413 /* Display .debug_ranges */
4415 TRACE_SYMTAB("\n------ The contents of .debug_ranges ------\n");
4416 TRACE_SYMTAB(" Offset Begin End\n");
4417 if (ML_(sli_is_valid
)(escn_debug_ranges
)) {
4418 init_Cursor( &ranges
, escn_debug_ranges
, 0, barf
,
4419 "Overrun whilst reading .debug_ranges section(1)" );
4425 if (is_at_end_Cursor( &ranges
))
4428 /* Read a (host-)word pair. This is something of a hack since
4429 the word size to read is really dictated by the ELF file;
4430 however, we assume we're reading a file with the same
4431 word-sizeness as the host. Reasonably enough. */
4432 w1
= get_UWord( &ranges
);
4433 w2
= get_UWord( &ranges
);
4435 if (w1
== 0 && w2
== 0) {
4436 /* end of list. reset 'base' */
4437 TRACE_D3(" %08lx <End of list>\n", dr_offset
);
4439 dr_offset
= get_position_of_Cursor( &ranges
);
4444 /* new value for 'base' */
4445 TRACE_D3(" %08lx %16lx %08lx (base address)\n",
4451 /* else a range [w1+base, w2+base) is denoted */
4452 TRACE_D3(" %08lx %08lx %08lx\n",
4453 dr_offset
, w1
+ dr_base
, w2
+ dr_base
);
4458 static void trace_debug_abbrev (const DebugInfo
* di
,
4459 __attribute__((noreturn
)) void (*barf
)( const HChar
* ),
4460 DiSlice escn_debug_abbv
)
4462 Cursor abbv
; /* for showing .debug_abbrev */
4463 Bool td3
= di
->trace_symtab
;
4465 /* Display .debug_abbrev */
4467 TRACE_SYMTAB("\n------ The contents of .debug_abbrev ------\n");
4468 if (ML_(sli_is_valid
)(escn_debug_abbv
)) {
4469 init_Cursor( &abbv
, escn_debug_abbv
, 0, barf
,
4470 "Overrun whilst reading .debug_abbrev section" );
4472 if (is_at_end_Cursor( &abbv
))
4474 /* Read one abbreviation table */
4475 TRACE_D3(" Number TAG\n");
4479 ULong acode
= get_ULEB128( &abbv
);
4480 if (acode
== 0) break; /* end of the table */
4481 atag
= get_ULEB128( &abbv
);
4482 has_children
= get_UChar( &abbv
);
4483 TRACE_D3(" %llu %s [%s]\n",
4484 acode
, ML_(pp_DW_TAG
)(atag
),
4485 ML_(pp_DW_children
)(has_children
));
4487 ULong at_name
= get_ULEB128( &abbv
);
4488 ULong at_form
= get_ULEB128( &abbv
);
4489 if (at_name
== 0 && at_form
== 0) break;
4490 TRACE_D3(" %-18s %s\n",
4491 ML_(pp_DW_AT
)(at_name
), ML_(pp_DW_FORM
)(at_form
));
4499 void new_dwarf3_reader_wrk (
4501 __attribute__((noreturn
)) void (*barf
)( const HChar
* ),
4502 DiSlice escn_debug_info
, DiSlice escn_debug_types
,
4503 DiSlice escn_debug_abbv
, DiSlice escn_debug_line
,
4504 DiSlice escn_debug_str
, DiSlice escn_debug_ranges
,
4505 DiSlice escn_debug_loc
, DiSlice escn_debug_info_alt
,
4506 DiSlice escn_debug_abbv_alt
, DiSlice escn_debug_line_alt
,
4507 DiSlice escn_debug_str_alt
4510 XArray
* /* of TyEnt */ tyents
= NULL
;
4511 XArray
* /* of TyEnt */ tyents_to_keep
= NULL
;
4512 XArray
* /* of GExpr* */ gexprs
= NULL
;
4513 XArray
* /* of TempVar* */ tempvars
= NULL
;
4514 WordFM
* /* of (XArray* of AddrRange, void) */ rangestree
= NULL
;
4515 TyEntIndexCache
* tyents_cache
= NULL
;
4516 TyEntIndexCache
* tyents_to_keep_cache
= NULL
;
4517 TempVar
*varp
, *varp2
;
4519 Cursor info
; /* primary cursor for parsing .debug_info */
4520 D3TypeParser typarser
;
4521 D3VarParser varparser
;
4522 D3InlParser inlparser
;
4524 Bool td3
= di
->trace_symtab
;
4525 XArray
* /* of TempVar* */ dioff_lookup_tab
;
4527 VgHashTable
*signature_types
= NULL
;
4529 /* Display/trace various information, if requested. */
4531 trace_debug_loc (di
, barf
, escn_debug_loc
);
4532 trace_debug_ranges (di
, barf
, escn_debug_ranges
);
4533 trace_debug_abbrev (di
, barf
, escn_debug_abbv
);
4537 /* Zero out all parsers. Parsers will really be initialised
4538 according to VG_(clo_read_*_info). */
4539 VG_(memset
)( &inlparser
, 0, sizeof(inlparser
) );
4541 if (VG_(clo_read_var_info
)) {
4542 /* We'll park the harvested type information in here. Also create
4543 a fake "void" entry with offset D3_FAKEVOID_CUOFF, so we always
4544 have at least one type entry to refer to. D3_FAKEVOID_CUOFF is
4545 huge and presumably will not occur in any valid DWARF3 file --
4546 it would need to have a .debug_info section 4GB long for that to
4547 happen. These type entries end up in the DebugInfo. */
4548 tyents
= VG_(newXA
)( ML_(dinfo_zalloc
),
4549 "di.readdwarf3.ndrw.1 (TyEnt temp array)",
4550 ML_(dinfo_free
), sizeof(TyEnt
) );
4552 VG_(memset
)(&tyent
, 0, sizeof(tyent
));
4553 tyent
.tag
= Te_TyVoid
;
4554 tyent
.cuOff
= D3_FAKEVOID_CUOFF
;
4555 tyent
.Te
.TyVoid
.isFake
= True
;
4556 VG_(addToXA
)( tyents
, &tyent
);
4559 VG_(memset
)(&tyent
, 0, sizeof(tyent
));
4560 tyent
.tag
= Te_UNKNOWN
;
4561 tyent
.cuOff
= D3_INVALID_CUOFF
;
4562 VG_(addToXA
)( tyents
, &tyent
);
4565 /* This is a tree used to unique-ify the range lists that are
4566 manufactured by parse_var_DIE. References to the keys in the
4567 tree wind up in .rngMany fields in TempVars. We'll need to
4568 delete this tree, and the XArrays attached to it, at the end of
4570 rangestree
= VG_(newFM
)( ML_(dinfo_zalloc
),
4571 "di.readdwarf3.ndrw.2 (rangestree)",
4573 (Word(*)(UWord
,UWord
))cmp__XArrays_of_AddrRange
);
4575 /* List of variables we're accumulating. These don't end up in the
4576 DebugInfo; instead their contents are handed to ML_(addVar) and
4577 the list elements are then deleted. */
4578 tempvars
= VG_(newXA
)( ML_(dinfo_zalloc
),
4579 "di.readdwarf3.ndrw.3 (TempVar*s array)",
4583 /* List of GExprs we're accumulating. These wind up in the
4585 gexprs
= VG_(newXA
)( ML_(dinfo_zalloc
), "di.readdwarf3.ndrw.4",
4586 ML_(dinfo_free
), sizeof(GExpr
*) );
4588 /* We need a D3TypeParser to keep track of partially constructed
4589 types. It'll be discarded as soon as we've completed the CU,
4590 since the resulting information is tipped in to 'tyents' as it
4592 type_parser_init(&typarser
);
4594 var_parser_init(&varparser
);
4596 signature_types
= VG_(HT_construct
) ("signature_types");
4599 /* Do an initial pass to scan the .debug_types section, if any, and
4600 fill in the signatured types hash table. This lets us handle
4601 mapping from a type signature to a (cooked) DIE offset directly
4602 in get_Form_contents. */
4603 if (VG_(clo_read_var_info
) && ML_(sli_is_valid
)(escn_debug_types
)) {
4604 init_Cursor( &info
, escn_debug_types
, 0, barf
,
4605 "Overrun whilst reading .debug_types section" );
4606 TRACE_D3("\n------ Collecting signatures from "
4607 ".debug_types section ------\n");
4610 UWord cu_start_offset
, cu_offset_now
;
4613 cu_start_offset
= get_position_of_Cursor( &info
);
4615 TRACE_D3(" Compilation Unit @ offset 0x%lx:\n", cu_start_offset
);
4616 /* parse_CU_header initialises the CU's abbv hash table. */
4617 parse_CU_Header( &cc
, td3
, &info
, escn_debug_abbv
, True
, False
);
4619 /* Needed by cook_die. */
4620 cc
.types_cuOff_bias
= escn_debug_info
.szB
;
4622 record_signatured_type( signature_types
, cc
.type_signature
,
4623 cook_die( &cc
, cc
.type_offset
));
4625 /* Until proven otherwise we assume we don't need the icc9
4626 workaround in this case; see the DIE-reading loop below
4628 cu_offset_now
= (cu_start_offset
+ cc
.unit_length
4629 + (cc
.is_dw64
? 12 : 4));
4631 clear_CUConst ( &cc
);
4633 if (cu_offset_now
>= escn_debug_types
.szB
) {
4637 set_position_of_Cursor ( &info
, cu_offset_now
);
4641 /* Perform three DIE-reading passes. The first pass reads DIEs from
4642 alternate .debug_info (if any), the second pass reads DIEs from
4643 .debug_info, and the third pass reads DIEs from .debug_types.
4644 Moving the body of this loop into a separate function would
4645 require a large number of arguments to be passed in, so it is
4646 kept inline instead. */
4647 for (pass
= 0; pass
< 3; ++pass
) {
4651 if (!ML_(sli_is_valid
)(escn_debug_info_alt
))
4653 /* Now loop over the Compilation Units listed in the alternate
4654 .debug_info section (see D3SPEC sec 7.5) paras 1 and 2.
4655 Each compilation unit contains a Compilation Unit Header
4656 followed by precisely one DW_TAG_compile_unit or
4657 DW_TAG_partial_unit DIE. */
4658 init_Cursor( &info
, escn_debug_info_alt
, 0, barf
,
4659 "Overrun whilst reading alternate .debug_info section" );
4660 section_size
= escn_debug_info_alt
.szB
;
4662 TRACE_D3("\n------ Parsing alternate .debug_info section ------\n");
4663 } else if (pass
== 1) {
4664 /* Now loop over the Compilation Units listed in the .debug_info
4665 section (see D3SPEC sec 7.5) paras 1 and 2. Each compilation
4666 unit contains a Compilation Unit Header followed by precisely
4667 one DW_TAG_compile_unit or DW_TAG_partial_unit DIE. */
4668 init_Cursor( &info
, escn_debug_info
, 0, barf
,
4669 "Overrun whilst reading .debug_info section" );
4670 section_size
= escn_debug_info
.szB
;
4672 TRACE_D3("\n------ Parsing .debug_info section ------\n");
4674 if (!ML_(sli_is_valid
)(escn_debug_types
))
4676 if (!VG_(clo_read_var_info
))
4677 continue; // Types not needed when only reading inline info.
4678 init_Cursor( &info
, escn_debug_types
, 0, barf
,
4679 "Overrun whilst reading .debug_types section" );
4680 section_size
= escn_debug_types
.szB
;
4682 TRACE_D3("\n------ Parsing .debug_types section ------\n");
4686 ULong cu_start_offset
, cu_offset_now
;
4688 /* It may be that the stated size of this CU is larger than the
4689 amount of stuff actually in it. icc9 seems to generate CUs
4690 thusly. We use these variables to figure out if this is
4691 indeed the case, and if so how many bytes we need to skip to
4692 get to the start of the next CU. Not skipping those bytes
4693 causes us to misidentify the start of the next CU, and it all
4694 goes badly wrong after that (not surprisingly). */
4695 UWord cu_size_including_IniLen
, cu_amount_used
;
4697 /* It seems icc9 finishes the DIE info before debug_info_sz
4698 bytes have been used up. So be flexible, and declare the
4699 sequence complete if there is not enough remaining bytes to
4700 hold even the smallest conceivable CU header. (11 bytes I
4702 /* JRS 23Jan09: I suspect this is no longer necessary now that
4703 the code below contains a 'while (cu_amount_used <
4704 cu_size_including_IniLen ...' style loop, which skips over
4705 any leftover bytes at the end of a CU in the case where the
4706 CU's stated size is larger than its actual size (as
4707 determined by reading all its DIEs). However, for prudence,
4708 I'll leave the following test in place. I can't see that a
4709 CU header can be smaller than 11 bytes, so I don't think
4710 there's any harm possible through the test -- it just adds
4712 Word avail
= get_remaining_length_Cursor( &info
);
4715 TRACE_D3("new_dwarf3_reader_wrk: warning: "
4716 "%ld unused bytes after end of DIEs\n", avail
);
4720 if (VG_(clo_read_var_info
)) {
4721 /* Check the varparser's stack is in a sane state. */
4722 vg_assert(varparser
.sp
== -1);
4723 /* Check the typarser's stack is in a sane state. */
4724 vg_assert(typarser
.sp
== -1);
4727 cu_start_offset
= get_position_of_Cursor( &info
);
4729 TRACE_D3(" Compilation Unit @ offset 0x%llx:\n", cu_start_offset
);
4730 /* parse_CU_header initialises the CU's hashtable of abbvs ht_abbvs */
4732 parse_CU_Header( &cc
, td3
, &info
, escn_debug_abbv_alt
,
4735 parse_CU_Header( &cc
, td3
, &info
, escn_debug_abbv
,
4738 cc
.escn_debug_str
= pass
== 0 ? escn_debug_str_alt
4740 cc
.escn_debug_ranges
= escn_debug_ranges
;
4741 cc
.escn_debug_loc
= escn_debug_loc
;
4742 cc
.escn_debug_line
= pass
== 0 ? escn_debug_line_alt
4744 cc
.escn_debug_info
= pass
== 0 ? escn_debug_info_alt
4746 cc
.escn_debug_types
= escn_debug_types
;
4747 cc
.escn_debug_info_alt
= escn_debug_info_alt
;
4748 cc
.escn_debug_str_alt
= escn_debug_str_alt
;
4749 cc
.types_cuOff_bias
= escn_debug_info
.szB
;
4750 cc
.alt_cuOff_bias
= escn_debug_info
.szB
+ escn_debug_types
.szB
;
4751 cc
.cu_start_offset
= cu_start_offset
;
4753 /* The CU's svma can be deduced by looking at the AT_low_pc
4754 value in the top level TAG_compile_unit, which is the topmost
4755 DIE. We'll leave it for the 'varparser' to acquire that info
4756 and fill it in -- since it is the only party to want to know
4758 cc
.cu_svma_known
= False
;
4761 if (VG_(clo_read_var_info
)) {
4762 cc
.signature_types
= signature_types
;
4764 /* Create a fake outermost-level range covering the entire
4765 address range. So we always have *something* to catch all
4766 variable declarations. */
4767 varstack_push( &cc
, &varparser
, td3
,
4768 unitary_range_list(0UL, ~0UL),
4769 -1, False
/*isFunc*/, NULL
/*fbGX*/ );
4771 /* And set up the fndn_ix_Table. When we come across the top
4772 level DIE for this CU (which is what the next call to
4773 read_DIE should process) we will copy all the file names out
4774 of the .debug_line img area and use this table to look up the
4775 copies when we later see filename numbers in DW_TAG_variables
4777 vg_assert(!varparser
.fndn_ix_Table
);
4778 varparser
.fndn_ix_Table
4779 = VG_(newXA
)( ML_(dinfo_zalloc
), "di.readdwarf3.ndrw.5var",
4784 if (VG_(clo_read_inline_info
)) {
4785 /* fndn_ix_Table for the inlined call parser */
4786 vg_assert(!inlparser
.fndn_ix_Table
);
4787 inlparser
.fndn_ix_Table
4788 = VG_(newXA
)( ML_(dinfo_zalloc
), "di.readdwarf3.ndrw.5inl",
4793 /* Now read the one-and-only top-level DIE for this CU. */
4794 vg_assert(!VG_(clo_read_var_info
) || varparser
.sp
== 0);
4795 read_DIE( rangestree
,
4796 tyents
, tempvars
, gexprs
,
4797 &typarser
, &varparser
, &inlparser
,
4798 &info
, td3
, &cc
, 0 );
4800 cu_offset_now
= get_position_of_Cursor( &info
);
4802 if (0) VG_(printf
)("Travelled: %llu size %llu\n",
4803 cu_offset_now
- cc
.cu_start_offset
,
4804 cc
.unit_length
+ (cc
.is_dw64
? 12 : 4));
4806 /* How big the CU claims it is .. */
4807 cu_size_including_IniLen
= cc
.unit_length
+ (cc
.is_dw64
? 12 : 4);
4808 /* .. vs how big we have found it to be */
4809 cu_amount_used
= cu_offset_now
- cc
.cu_start_offset
;
4811 if (1) TRACE_D3("offset now %llu, d-i-size %llu\n",
4812 cu_offset_now
, section_size
);
4813 if (cu_offset_now
> section_size
)
4814 barf("toplevel DIEs beyond end of CU");
4816 /* If the CU is bigger than it claims to be, we've got a serious
4818 if (cu_amount_used
> cu_size_including_IniLen
)
4819 barf("CU's actual size appears to be larger than it claims it is");
4821 /* If the CU is smaller than it claims to be, we need to skip some
4822 bytes. Loop updates cu_offset_new and cu_amount_used. */
4823 while (cu_amount_used
< cu_size_including_IniLen
4824 && get_remaining_length_Cursor( &info
) > 0) {
4825 if (0) VG_(printf
)("SKIP\n");
4826 (void)get_UChar( &info
);
4827 cu_offset_now
= get_position_of_Cursor( &info
);
4828 cu_amount_used
= cu_offset_now
- cc
.cu_start_offset
;
4831 if (VG_(clo_read_var_info
)) {
4832 /* Preen to level -2. DIEs have level >= 0 so -2 cannot occur
4833 anywhere else at all. Our fake the-entire-address-space
4834 range is at level -1, so preening to -2 should completely
4835 empty the stack out. */
4837 varstack_preen( &varparser
, td3
, -2 );
4838 /* Similarly, empty the type stack out. */
4839 typestack_preen( &typarser
, td3
, -2 );
4842 if (VG_(clo_read_var_info
)) {
4843 vg_assert(varparser
.fndn_ix_Table
);
4844 VG_(deleteXA
)( varparser
.fndn_ix_Table
);
4845 varparser
.fndn_ix_Table
= NULL
;
4847 if (VG_(clo_read_inline_info
)) {
4848 vg_assert(inlparser
.fndn_ix_Table
);
4849 VG_(deleteXA
)( inlparser
.fndn_ix_Table
);
4850 inlparser
.fndn_ix_Table
= NULL
;
4854 if (cu_offset_now
== section_size
)
4856 /* else keep going */
4861 if (VG_(clo_read_var_info
)) {
4862 /* From here on we're post-processing the stuff we got
4863 out of the .debug_info section. */
4866 ML_(pp_TyEnts
)(tyents
, "Initial type entity (TyEnt) array");
4868 TRACE_D3("------ Compressing type entries ------\n");
4871 tyents_cache
= ML_(dinfo_zalloc
)( "di.readdwarf3.ndrw.6",
4872 sizeof(TyEntIndexCache
) );
4873 ML_(TyEntIndexCache__invalidate
)( tyents_cache
);
4874 dedup_types( td3
, tyents
, tyents_cache
);
4877 ML_(pp_TyEnts
)(tyents
, "After type entity (TyEnt) compression");
4881 TRACE_D3("------ Resolving the types of variables ------\n" );
4882 resolve_variable_types( barf
, tyents
, tyents_cache
, tempvars
);
4884 /* Copy all the non-INDIR tyents into a new table. For large
4885 .so's, about 90% of the tyents will by now have been resolved to
4886 INDIRs, and we no longer need them, and so don't need to store
4889 = VG_(newXA
)( ML_(dinfo_zalloc
),
4890 "di.readdwarf3.ndrw.7 (TyEnt to-keep array)",
4891 ML_(dinfo_free
), sizeof(TyEnt
) );
4892 n
= VG_(sizeXA
)( tyents
);
4893 for (i
= 0; i
< n
; i
++) {
4894 TyEnt
* ent
= VG_(indexXA
)( tyents
, i
);
4895 if (ent
->tag
!= Te_INDIR
)
4896 VG_(addToXA
)( tyents_to_keep
, ent
);
4899 VG_(deleteXA
)( tyents
);
4901 ML_(dinfo_free
)( tyents_cache
);
4902 tyents_cache
= NULL
;
4904 /* Sort tyents_to_keep so we can lookup in it. A complete (if
4905 minor) waste of time, since tyents itself is sorted, but
4906 necessary since VG_(lookupXA) refuses to cooperate if we
4908 VG_(setCmpFnXA
)( tyents_to_keep
, (XACmpFn_t
) ML_(TyEnt__cmp_by_cuOff_only
) );
4909 VG_(sortXA
)( tyents_to_keep
);
4911 /* Enable cacheing on tyents_to_keep */
4912 tyents_to_keep_cache
4913 = ML_(dinfo_zalloc
)( "di.readdwarf3.ndrw.8",
4914 sizeof(TyEntIndexCache
) );
4915 ML_(TyEntIndexCache__invalidate
)( tyents_to_keep_cache
);
4917 /* And record the tyents in the DebugInfo. We do this before
4918 starting to hand variables to ML_(addVar), since if ML_(addVar)
4919 wants to do debug printing (of the types of said vars) then it
4920 will need the tyents.*/
4921 vg_assert(!di
->admin_tyents
);
4922 di
->admin_tyents
= tyents_to_keep
;
4924 /* Bias all the location expressions. */
4926 TRACE_D3("------ Biasing the location expressions ------\n" );
4928 n
= VG_(sizeXA
)( gexprs
);
4929 for (i
= 0; i
< n
; i
++) {
4930 gexpr
= *(GExpr
**)VG_(indexXA
)( gexprs
, i
);
4931 bias_GX( gexpr
, di
);
4935 TRACE_D3("------ Acquired the following variables: ------\n\n");
4937 /* Park (pointers to) all the vars in an XArray, so we can look up
4938 abstract origins quickly. The array is sorted (hence, looked-up
4939 by) the .dioff fields. Since the .dioffs should be in strictly
4940 ascending order, there is no need to sort the array after
4941 construction. The ascendingness is however asserted for. */
4943 = VG_(newXA
)( ML_(dinfo_zalloc
), "di.readdwarf3.ndrw.9",
4947 n
= VG_(sizeXA
)( tempvars
);
4948 Word first_primary_var
= 0;
4949 for (first_primary_var
= 0;
4950 escn_debug_info_alt
.szB
/*really?*/ && first_primary_var
< n
;
4951 first_primary_var
++) {
4952 varp
= *(TempVar
**)VG_(indexXA
)( tempvars
, first_primary_var
);
4953 if (varp
->dioff
< escn_debug_info
.szB
+ escn_debug_types
.szB
)
4956 for (i
= 0; i
< n
; i
++) {
4957 varp
= *(TempVar
**)VG_(indexXA
)( tempvars
, (i
+ first_primary_var
) % n
);
4958 if (i
> first_primary_var
) {
4959 varp2
= *(TempVar
**)VG_(indexXA
)( tempvars
,
4960 (i
+ first_primary_var
- 1) % n
);
4961 /* why should this hold? Only, I think, because we've
4962 constructed the array by reading .debug_info sequentially,
4963 and so the array .dioff fields should reflect that, and be
4964 strictly ascending. */
4965 vg_assert(varp2
->dioff
< varp
->dioff
);
4967 VG_(addToXA
)( dioff_lookup_tab
, &varp
);
4969 VG_(setCmpFnXA
)( dioff_lookup_tab
, cmp_TempVar_by_dioff
);
4970 VG_(sortXA
)( dioff_lookup_tab
); /* POINTLESS; FIXME: rm */
4972 /* Now visit each var. Collect up as much info as possible for
4973 each var and hand it to ML_(addVar). */
4974 n
= VG_(sizeXA
)( tempvars
);
4975 for (j
= 0; j
< n
; j
++) {
4977 varp
= *(TempVar
**)VG_(indexXA
)( tempvars
, j
);
4979 /* Possibly show .. */
4981 VG_(printf
)("<%lx> addVar: level %d: %s :: ",
4984 varp
->name
? varp
->name
: "<anon_var>" );
4986 ML_(pp_TyEnt_C_ishly
)( tyents_to_keep
, varp
->typeR
);
4988 VG_(printf
)("NULL");
4990 VG_(printf
)("\n Loc=");
4992 ML_(pp_GX
)(varp
->gexpr
);
4994 VG_(printf
)("NULL");
4998 VG_(printf
)(" FrB=");
4999 ML_(pp_GX
)( varp
->fbGX
);
5002 VG_(printf
)(" FrB=none\n");
5004 VG_(printf
)(" declared at: %u %s:%d\n",
5006 ML_(fndn_ix2filename
) (di
, varp
->fndn_ix
),
5008 if (varp
->absOri
!= (UWord
)D3_INVALID_CUOFF
)
5009 VG_(printf
)(" abstract origin: <%lx>\n", varp
->absOri
);
5012 /* Skip variables which have no location. These must be
5013 abstract instances; they are useless as-is since with no
5014 location they have no specified memory location. They will
5015 presumably be referred to via the absOri fields of other
5018 TRACE_D3(" SKIP (no location)\n\n");
5022 /* So it has a location, at least. If it refers to some other
5023 entry through its absOri field, pull in further info through
5025 if (varp
->absOri
!= (UWord
)D3_INVALID_CUOFF
) {
5027 Word ixFirst
, ixLast
;
5029 TempVar
* keyp
= &key
;
5031 VG_(memset
)(&key
, 0, sizeof(key
)); /* not necessary */
5032 key
.dioff
= varp
->absOri
; /* this is what we want to find */
5033 found
= VG_(lookupXA
)( dioff_lookup_tab
, &keyp
,
5034 &ixFirst
, &ixLast
);
5036 /* barf("DW_AT_abstract_origin can't be resolved"); */
5037 TRACE_D3(" SKIP (DW_AT_abstract_origin can't be resolved)\n\n");
5040 /* If the following fails, there is more than one entry with
5041 the same dioff. Which can't happen. */
5042 vg_assert(ixFirst
== ixLast
);
5043 varAI
= *(TempVar
**)VG_(indexXA
)( dioff_lookup_tab
, ixFirst
);
5046 vg_assert(varAI
->dioff
== varp
->absOri
);
5048 /* Copy what useful info we can. */
5049 if (varAI
->typeR
&& !varp
->typeR
)
5050 varp
->typeR
= varAI
->typeR
;
5051 if (varAI
->name
&& !varp
->name
)
5052 varp
->name
= varAI
->name
;
5053 if (varAI
->fndn_ix
&& !varp
->fndn_ix
)
5054 varp
->fndn_ix
= varAI
->fndn_ix
;
5055 if (varAI
->fLine
> 0 && varp
->fLine
== 0)
5056 varp
->fLine
= varAI
->fLine
;
5059 /* Give it a name if it doesn't have one. */
5061 varp
->name
= ML_(addStr
)( di
, "<anon_var>", -1 );
5063 /* So now does it have enough info to be useful? */
5064 /* NOTE: re typeR: this is a hack. If typeR is Te_UNKNOWN then
5065 the type didn't get resolved. Really, in that case
5066 something's broken earlier on, and should be fixed, rather
5067 than just skipping the variable. */
5068 ent
= ML_(TyEnts__index_by_cuOff
)( tyents_to_keep
,
5069 tyents_to_keep_cache
,
5071 /* The next two assertions should be guaranteed by
5072 our previous call to resolve_variable_types. */
5074 vg_assert(ML_(TyEnt__is_type
)(ent
) || ent
->tag
== Te_UNKNOWN
);
5076 if (ent
->tag
== Te_UNKNOWN
) continue;
5078 vg_assert(varp
->gexpr
);
5079 vg_assert(varp
->name
);
5080 vg_assert(varp
->typeR
);
5081 vg_assert(varp
->level
>= 0);
5083 /* Ok. So we're going to keep it. Call ML_(addVar) once for
5084 each address range in which the variable exists. */
5085 TRACE_D3(" ACQUIRE for range(s) ");
5086 { AddrRange oneRange
;
5087 AddrRange
* varPcRanges
;
5089 /* Set up to iterate over address ranges, however
5091 if (varp
->nRanges
== 0 || varp
->nRanges
== 1) {
5092 vg_assert(!varp
->rngMany
);
5093 if (varp
->nRanges
== 0) {
5094 vg_assert(varp
->rngOneMin
== 0);
5095 vg_assert(varp
->rngOneMax
== 0);
5097 nVarPcRanges
= varp
->nRanges
;
5098 oneRange
.aMin
= varp
->rngOneMin
;
5099 oneRange
.aMax
= varp
->rngOneMax
;
5100 varPcRanges
= &oneRange
;
5102 vg_assert(varp
->rngMany
);
5103 vg_assert(varp
->rngOneMin
== 0);
5104 vg_assert(varp
->rngOneMax
== 0);
5105 nVarPcRanges
= VG_(sizeXA
)(varp
->rngMany
);
5106 vg_assert(nVarPcRanges
>= 2);
5107 vg_assert(nVarPcRanges
== (Word
)varp
->nRanges
);
5108 varPcRanges
= VG_(indexXA
)(varp
->rngMany
, 0);
5110 if (varp
->level
== 0)
5111 vg_assert( nVarPcRanges
== 1 );
5113 for (i
= 0; i
< nVarPcRanges
; i
++) {
5114 Addr pcMin
= varPcRanges
[i
].aMin
;
5115 Addr pcMax
= varPcRanges
[i
].aMax
;
5116 vg_assert(pcMin
<= pcMax
);
5117 /* Level 0 is the global address range. So at level 0 we
5118 don't want to bias pcMin/pcMax; but at all other levels
5119 we do since those are derived from svmas in the Dwarf
5120 we're reading. Be paranoid ... */
5121 if (varp
->level
== 0) {
5122 vg_assert(pcMin
== (Addr
)0);
5123 vg_assert(pcMax
== ~(Addr
)0);
5125 /* vg_assert(pcMin > (Addr)0);
5126 No .. we can legitimately expect to see ranges like
5127 0x0-0x11D (pre-biasing, of course). */
5128 vg_assert(pcMax
< ~(Addr
)0);
5131 /* Apply text biasing, for non-global variables. */
5132 if (varp
->level
> 0) {
5133 pcMin
+= di
->text_debug_bias
;
5134 pcMax
+= di
->text_debug_bias
;
5137 if (i
> 0 && (i
%2) == 0)
5139 TRACE_D3("[%#lx,%#lx] ", pcMin
, pcMax
);
5144 varp
->name
, varp
->typeR
,
5145 varp
->gexpr
, varp
->fbGX
,
5146 varp
->fndn_ix
, varp
->fLine
, td3
5152 /* and move on to the next var */
5155 /* Now free all the TempVars */
5156 n
= VG_(sizeXA
)( tempvars
);
5157 for (i
= 0; i
< n
; i
++) {
5158 varp
= *(TempVar
**)VG_(indexXA
)( tempvars
, i
);
5159 ML_(dinfo_free
)(varp
);
5161 VG_(deleteXA
)( tempvars
);
5164 /* and the temp lookup table */
5165 VG_(deleteXA
)( dioff_lookup_tab
);
5167 /* and the ranges tree. Note that we need to also free the XArrays
5168 which constitute the keys, hence pass VG_(deleteXA) as a
5170 VG_(deleteFM
)( rangestree
, (void(*)(UWord
))VG_(deleteXA
), NULL
);
5172 /* and the tyents_to_keep cache */
5173 ML_(dinfo_free
)( tyents_to_keep_cache
);
5174 tyents_to_keep_cache
= NULL
;
5176 vg_assert( varparser
.fndn_ix_Table
== NULL
);
5178 /* And the signatured type hash. */
5179 VG_(HT_destruct
) ( signature_types
, ML_(dinfo_free
) );
5181 /* record the GExprs in di so they can be freed later */
5182 vg_assert(!di
->admin_gexprs
);
5183 di
->admin_gexprs
= gexprs
;
5186 // Free up dynamically allocated memory
5187 if (VG_(clo_read_var_info
)) {
5188 type_parser_release(&typarser
);
5189 var_parser_release(&varparser
);
5194 /*------------------------------------------------------------*/
5196 /*--- The "new" DWARF3 reader -- top level control logic ---*/
5198 /*------------------------------------------------------------*/
5200 static Bool d3rd_jmpbuf_valid
= False
;
5201 static const HChar
* d3rd_jmpbuf_reason
= NULL
;
5202 static VG_MINIMAL_JMP_BUF(d3rd_jmpbuf
);
5204 static __attribute__((noreturn
)) void barf ( const HChar
* reason
) {
5205 vg_assert(d3rd_jmpbuf_valid
);
5206 d3rd_jmpbuf_reason
= reason
;
5207 VG_MINIMAL_LONGJMP(d3rd_jmpbuf
);
5214 ML_(new_dwarf3_reader
) (
5216 DiSlice escn_debug_info
, DiSlice escn_debug_types
,
5217 DiSlice escn_debug_abbv
, DiSlice escn_debug_line
,
5218 DiSlice escn_debug_str
, DiSlice escn_debug_ranges
,
5219 DiSlice escn_debug_loc
, DiSlice escn_debug_info_alt
,
5220 DiSlice escn_debug_abbv_alt
, DiSlice escn_debug_line_alt
,
5221 DiSlice escn_debug_str_alt
5224 volatile Int jumped
;
5225 volatile Bool td3
= di
->trace_symtab
;
5227 /* Run the _wrk function to read the dwarf3. If it succeeds, it
5228 just returns normally. If there is any failure, it longjmp's
5229 back here, having first set d3rd_jmpbuf_reason to something
5231 vg_assert(d3rd_jmpbuf_valid
== False
);
5232 vg_assert(d3rd_jmpbuf_reason
== NULL
);
5234 d3rd_jmpbuf_valid
= True
;
5235 jumped
= VG_MINIMAL_SETJMP(d3rd_jmpbuf
);
5238 new_dwarf3_reader_wrk( di
, barf
,
5239 escn_debug_info
, escn_debug_types
,
5240 escn_debug_abbv
, escn_debug_line
,
5241 escn_debug_str
, escn_debug_ranges
,
5242 escn_debug_loc
, escn_debug_info_alt
,
5243 escn_debug_abbv_alt
, escn_debug_line_alt
,
5244 escn_debug_str_alt
);
5245 d3rd_jmpbuf_valid
= False
;
5246 TRACE_D3("\n------ .debug_info reading was successful ------\n");
5249 d3rd_jmpbuf_valid
= False
;
5250 /* Can't longjump without giving some sort of reason. */
5251 vg_assert(d3rd_jmpbuf_reason
!= NULL
);
5253 TRACE_D3("\n------ .debug_info reading failed ------\n");
5255 ML_(symerr
)(di
, True
, d3rd_jmpbuf_reason
);
5258 d3rd_jmpbuf_valid
= False
;
5259 d3rd_jmpbuf_reason
= NULL
;
5264 /* --- Unused code fragments which might be useful one day. --- */
5267 /* Read the arange tables */
5269 TRACE_SYMTAB("\n------ The contents of .debug_arange ------\n");
5270 init_Cursor( &aranges
, debug_aranges_img
,
5271 debug_aranges_sz
, 0, barf
,
5272 "Overrun whilst reading .debug_aranges section" );
5274 ULong len
, d_i_offset
;
5277 UChar asize
, segsize
;
5279 if (is_at_end_Cursor( &aranges
))
5281 /* Read one arange thingy */
5282 /* initial_length field */
5283 len
= get_Initial_Length( &is64
, &aranges
,
5284 "in .debug_aranges: invalid initial-length field" );
5285 version
= get_UShort( &aranges
);
5286 d_i_offset
= get_Dwarfish_UWord( &aranges
, is64
);
5287 asize
= get_UChar( &aranges
);
5288 segsize
= get_UChar( &aranges
);
5289 TRACE_D3(" Length: %llu\n", len
);
5290 TRACE_D3(" Version: %d\n", (Int
)version
);
5291 TRACE_D3(" Offset into .debug_info: %llx\n", d_i_offset
);
5292 TRACE_D3(" Pointer Size: %d\n", (Int
)asize
);
5293 TRACE_D3(" Segment Size: %d\n", (Int
)segsize
);
5295 TRACE_D3(" Address Length\n");
5297 while ((get_position_of_Cursor( &aranges
) % (2 * asize
)) > 0) {
5298 (void)get_UChar( & aranges
);
5301 ULong address
= get_Dwarfish_UWord( &aranges
, asize
==8 );
5302 ULong length
= get_Dwarfish_UWord( &aranges
, asize
==8 );
5303 TRACE_D3(" 0x%016llx 0x%llx\n", address
, length
);
5304 if (address
== 0 && length
== 0) break;
5310 #endif // defined(VGO_linux) || defined(VGO_darwin) || defined(VGO_solaris)
5312 /*--------------------------------------------------------------------*/
5314 /*--------------------------------------------------------------------*/