coregrind/m_debuginfo/readdwarf3.c

   1 /* -*- mode: C; c-basic-offset: 3; -*- */
   2
   3 /*--------------------------------------------------------------------*/
   4 /*--- Read DWARF3/4 ".debug_info" sections (DIE trees).            ---*/
   5 /*---                                                 readdwarf3.c ---*/
   6 /*--------------------------------------------------------------------*/
   7
   8 /*
   9    This file is part of Valgrind, a dynamic binary instrumentation
  10    framework.
  11
  12    Copyright (C) 2008-2017 OpenWorks LLP
  13       info@open-works.co.uk
  14
  15    This program is free software; you can redistribute it and/or
  16    modify it under the terms of the GNU General Public License as
  17    published by the Free Software Foundation; either version 2 of the
  18    License, or (at your option) any later version.
  19
  20    This program is distributed in the hope that it will be useful, but
  21    WITHOUT ANY WARRANTY; without even the implied warranty of
  22    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  23    General Public License for more details.
  24
  25    You should have received a copy of the GNU General Public License
  26    along with this program; if not, see <http://www.gnu.org/licenses/>.
  27
  28    The GNU General Public License is contained in the file COPYING.
  29
  30    Neither the names of the U.S. Department of Energy nor the
  31    University of California nor the names of its contributors may be
  32    used to endorse or promote products derived from this software
  33    without prior written permission.
  34 */
  35
  36 #if defined(VGO_linux) || defined(VGO_darwin) || defined(VGO_solaris)
  37
  38 /* REFERENCE (without which this code will not make much sense):
  39
  40    DWARF Debugging Information Format, Version 3,
  41    dated 20 December 2005 (the "D3 spec").
  42
  43    Available at http://www.dwarfstd.org/Dwarf3.pdf.  There's also a
  44    .doc (MS Word) version, but for some reason the section numbers
  45    between the Word and PDF versions differ by 1 in the first digit.
  46    All section references in this code are to the PDF version.
  47
  48    CURRENT HACKS:
  49
  50    DW_TAG_{const,volatile}_type no DW_AT_type is allowed; it is
  51       assumed to mean "const void" or "volatile void" respectively.
  52       GDB appears to interpret them like this, anyway.
  53
  54    In many cases it is important to know the svma of a CU (the "base
  55    address of the CU", as the D3 spec calls it).  There are some
  56    situations in which the spec implies this value is unknown, but the
  57    Dwarf3 produced by gcc-4.1 seems to assume is not unknown but
  58    merely zero when not explicitly stated.  So we too have to make
  59    that assumption.
  60
  61    POTENTIAL BUG?  Spotted 6 Sept 08.  Why doesn't
  62    unitary_range_list() bias the resulting range list in the same way
  63    that its more general cousin, get_range_list(), does?  I don't
  64    know.
  65
  66    TODO, 2008 Feb 17:
  67
  68    get rid of cu_svma_known and document the assumed-zero svma hack.
  69
  70    ML_(sizeOfType): differentiate between zero sized types and types
  71    for which the size is unknown.  Is this important?  I don't know.
  72
  73    DW_TAG_array_types: deal with explicit sizes (currently we compute
  74    the size from the bounds and the element size, although that's
  75    fragile, if the bounds incompletely specified, or completely
  76    absent)
  77
  78    Document reason for difference (by 1) of stack preening depth in
  79    parse_var_DIE vs parse_type_DIE.
  80
  81    Don't hand to ML_(addVars), vars whose locations are entirely in
  82    registers (DW_OP_reg*).  This is merely a space-saving
  83    optimisation, as ML_(evaluate_Dwarf3_Expr) should handle these
  84    expressions correctly, by failing to evaluate them and hence
  85    effectively ignoring the variable with which they are associated.
  86
  87    Deal with DW_TAG_array_types which have element size != stride
  88
  89    In some cases, the info for a variable is split between two
  90    different DIEs (generally a declarer and a definer).  We punt on
  91    these.  Could do better here.
  92
  93    The 'data_bias' argument passed to the expression evaluator
  94    (ML_(evaluate_Dwarf3_Expr)) should really be changed to a
  95    MaybeUWord, to make it clear when we do vs don't know what it is
  96    for the evaluation of an expression.  At the moment zero is passed
  97    for this parameter in the don't know case.  That's a bit fragile
  98    and obscure; using a MaybeUWord would be clearer.
  99
 100    POTENTIAL PERFORMANCE IMPROVEMENTS:
 101
 102    Currently, duplicate removal and all other queries for the type
 103    entities array is done using cuOffset-based pointing, which
 104    involves a binary search (VG_(lookupXA)) for each access.  This is
 105    wildly inefficient, although simple.  It would be better to
 106    translate all the cuOffset-based references (iow, all the "R" and
 107    "Rs" fields in the TyEnts in 'tyents') to direct index numbers in
 108    'tyents' right at the start of dedup_types(), and use direct
 109    indexing (VG_(indexXA)) wherever possible after that.
 110
 111    cmp__XArrays_of_AddrRange is also a performance bottleneck.  Move
 112    VG_(indexXA) into pub_tool_xarray.h so it can be inlined at all use
 113    points, and possibly also make an _UNCHECKED version which skips
 114    the range checks in performance-critical situations such as this.
 115
 116    Handle interaction between read_DIE and parse_{var,type}_DIE
 117    better.  Currently read_DIE reads the entire DIE just to find where
 118    the end is (and for debug printing), so that it can later reliably
 119    move the cursor to the end regardless of what parse_{var,type}_DIE
 120    do.  This means many DIEs (most, even?) are read twice.  It would
 121    be smarter to make parse_{var,type}_DIE return a Bool indicating
 122    whether or not they advanced the DIE cursor, and only if they
 123    didn't should read_DIE itself read through the DIE.
 124
 125    ML_(addVar) and add_var_to_arange: quite a lot of DiAddrRanges have
 126    zero variables in their .vars XArray.  Rather than have an XArray
 127    with zero elements (which uses 2 malloc'd blocks), allow the .vars
 128    pointer to be NULL in this case.
 129
 130    More generally, reduce the amount of memory allocated and freed
 131    while reading Dwarf3 type/variable information.  Even modest (20MB)
 132    objects cause this module to allocate and free hundreds of
 133    thousands of small blocks, and ML_(arena_malloc) and its various
 134    groupies always show up at the top of performance profiles. */
 135
 136 #include "pub_core_basics.h"
 137 #include "pub_core_debuginfo.h"
 138 #include "pub_core_libcbase.h"
 139 #include "pub_core_libcassert.h"
 140 #include "pub_core_libcprint.h"
 141 #include "pub_core_libcsetjmp.h"   // setjmp facilities
 142 #include "pub_core_hashtable.h"
 143 #include "pub_core_options.h"
 144 #include "pub_core_tooliface.h"    /* VG_(needs) */
 145 #include "pub_core_xarray.h"
 146 #include "pub_core_wordfm.h"
 147 #include "priv_misc.h"             /* dinfo_zalloc/free */
 148 #include "priv_image.h"
 149 #include "priv_tytypes.h"
 150 #include "priv_d3basics.h"
 151 #include "priv_storage.h"
 152 #include "priv_readdwarf3.h"       /* self */
 153
 154
 155 /*------------------------------------------------------------*/
 156 /*---                                                      ---*/
 157 /*--- Basic machinery for parsing DIEs.                    ---*/
 158 /*---                                                      ---*/
 159 /*------------------------------------------------------------*/
 160
 161 #define TRACE_D3(format, args...) \
 162    if (UNLIKELY(td3)) { VG_(printf)(format, ## args); }
 163 #define TD3 (UNLIKELY(td3))
 164
 165 #define D3_INVALID_CUOFF  ((UWord)(-1UL))
 166 #define D3_FAKEVOID_CUOFF ((UWord)(-2UL))
 167
 168 typedef
 169    struct {
 170       DiSlice sli;      // to which this cursor applies
 171       DiOffT  sli_next; // offset in underlying DiImage; must be >= sli.ioff
 172       void (*barf)( const HChar* ) __attribute__((noreturn));
 173       const HChar* barfstr;
 174    }
 175    Cursor;
 176
 177 static inline Bool is_sane_Cursor ( const Cursor* c ) {
 178    if (!c)                return False;
 179    if (!c->barf)          return False;
 180    if (!c->barfstr)       return False;
 181    if (!ML_(sli_is_valid)(c->sli))    return False;
 182    if (c->sli.ioff == DiOffT_INVALID) return False;
 183    if (c->sli_next < c->sli.ioff)     return False;
 184    return True;
 185 }
 186
 187 // Initialise a cursor from a DiSlice (ELF section, really) so as to
 188 // start reading at offset |sli_initial_offset| from the start of the
 189 // slice.
 190 static void init_Cursor ( /*OUT*/Cursor* c,
 191                           DiSlice sli,
 192                           ULong   sli_initial_offset,
 193                           __attribute__((noreturn)) void (*barf)(const HChar*),
 194                           const HChar* barfstr )
 195 {
 196    vg_assert(c);
 197    VG_(bzero_inline)(c, sizeof(*c));
 198    c->sli              = sli;
 199    c->sli_next         = c->sli.ioff + sli_initial_offset;
 200    c->barf             = barf;
 201    c->barfstr          = barfstr;
 202    vg_assert(is_sane_Cursor(c));
 203 }
 204
 205 static Bool is_at_end_Cursor ( const Cursor* c ) {
 206    vg_assert(is_sane_Cursor(c));
 207    return c->sli_next >= c->sli.ioff + c->sli.szB;
 208 }
 209
 210 static inline ULong get_position_of_Cursor ( const Cursor* c ) {
 211    vg_assert(is_sane_Cursor(c));
 212    return c->sli_next - c->sli.ioff;
 213 }
 214 static inline void set_position_of_Cursor ( Cursor* c, ULong pos ) {
 215    c->sli_next = c->sli.ioff + pos;
 216    vg_assert(is_sane_Cursor(c));
 217 }
 218 static inline void advance_position_of_Cursor ( Cursor* c, ULong delta ) {
 219    c->sli_next += delta;
 220    vg_assert(is_sane_Cursor(c));
 221 }
 222
 223 static /*signed*/Long get_remaining_length_Cursor ( const Cursor* c ) {
 224    vg_assert(is_sane_Cursor(c));
 225    return c->sli.ioff + c->sli.szB - c->sli_next;
 226 }
 227
 228 //static void* get_address_of_Cursor ( Cursor* c ) {
 229 //   vg_assert(is_sane_Cursor(c));
 230 //   return &c->region_start_img[ c->region_next ];
 231 //}
 232
 233 static DiCursor get_DiCursor_from_Cursor ( const Cursor* c ) {
 234    return mk_DiCursor(c->sli.img, c->sli_next);
 235 }
 236
 237 /* FIXME: document assumptions on endianness for
 238    get_UShort/UInt/ULong. */
 239 static inline UChar get_UChar ( Cursor* c ) {
 240    UChar r;
 241    vg_assert(is_sane_Cursor(c));
 242    if (c->sli_next + sizeof(UChar) > c->sli.ioff + c->sli.szB) {
 243       c->barf(c->barfstr);
 244       /*NOTREACHED*/
 245       vg_assert(0);
 246    }
 247    r = ML_(img_get_UChar)(c->sli.img, c->sli_next);
 248    c->sli_next += sizeof(UChar);
 249    return r;
 250 }
 251 static UShort get_UShort ( Cursor* c ) {
 252    UShort r;
 253    vg_assert(is_sane_Cursor(c));
 254    if (c->sli_next + sizeof(UShort) > c->sli.ioff + c->sli.szB) {
 255       c->barf(c->barfstr);
 256       /*NOTREACHED*/
 257       vg_assert(0);
 258    }
 259    r = ML_(img_get_UShort)(c->sli.img, c->sli_next);
 260    c->sli_next += sizeof(UShort);
 261    return r;
 262 }
 263 static UInt get_UInt ( Cursor* c ) {
 264    UInt r;
 265    vg_assert(is_sane_Cursor(c));
 266    if (c->sli_next + sizeof(UInt) > c->sli.ioff + c->sli.szB) {
 267       c->barf(c->barfstr);
 268       /*NOTREACHED*/
 269       vg_assert(0);
 270    }
 271    r = ML_(img_get_UInt)(c->sli.img, c->sli_next);
 272    c->sli_next += sizeof(UInt);
 273    return r;
 274 }
 275 static ULong get_ULong ( Cursor* c ) {
 276    ULong r;
 277    vg_assert(is_sane_Cursor(c));
 278    if (c->sli_next + sizeof(ULong) > c->sli.ioff + c->sli.szB) {
 279       c->barf(c->barfstr);
 280       /*NOTREACHED*/
 281       vg_assert(0);
 282    }
 283    r = ML_(img_get_ULong)(c->sli.img, c->sli_next);
 284    c->sli_next += sizeof(ULong);
 285    return r;
 286 }
 287 static ULong get_ULEB128 ( Cursor* c ) {
 288    ULong result;
 289    Int   shift;
 290    UChar byte;
 291    /* unroll first iteration */
 292    byte = get_UChar( c );
 293    result = (ULong)(byte & 0x7f);
 294    if (LIKELY(!(byte & 0x80))) return result;
 295    shift = 7;
 296    /* end unroll first iteration */
 297    do {
 298       byte = get_UChar( c );
 299       result |= ((ULong)(byte & 0x7f)) << shift;
 300       shift += 7;
 301    } while (byte & 0x80);
 302    return result;
 303 }
 304 static Long get_SLEB128 ( Cursor* c ) {
 305    ULong  result = 0;
 306    Int    shift = 0;
 307    UChar  byte;
 308    do {
 309       byte = get_UChar(c);
 310       result |= ((ULong)(byte & 0x7f)) << shift;
 311       shift += 7;
 312    } while (byte & 0x80);
 313    if (shift < 64 && (byte & 0x40))
 314       result |= -(1ULL << shift);
 315    return result;
 316 }
 317
 318 /* Assume 'c' points to the start of a string.  Return a DiCursor of
 319    whatever it points at, and advance it past the terminating zero.
 320    This makes it safe for the caller to then copy the string with
 321    ML_(addStr), since (w.r.t. image overruns) the process of advancing
 322    past the terminating zero will already have "vetted" the string. */
 323 static DiCursor get_AsciiZ ( Cursor* c ) {
 324    UChar uc;
 325    DiCursor res = get_DiCursor_from_Cursor(c);
 326    do { uc = get_UChar(c); } while (uc != 0);
 327    return res;
 328 }
 329
 330 static ULong peek_ULEB128 ( Cursor* c ) {
 331    DiOffT here = c->sli_next;
 332    ULong  r    = get_ULEB128( c );
 333    c->sli_next = here;
 334    return r;
 335 }
 336 static UChar peek_UChar ( Cursor* c ) {
 337    DiOffT here = c->sli_next;
 338    UChar  r    = get_UChar( c );
 339    c->sli_next = here;
 340    return r;
 341 }
 342
 343 static ULong get_Dwarfish_UWord ( Cursor* c, Bool is_dw64 ) {
 344    return is_dw64 ? get_ULong(c) : (ULong) get_UInt(c);
 345 }
 346
 347 static UWord get_UWord ( Cursor* c ) {
 348    vg_assert(sizeof(UWord) == sizeof(void*));
 349    if (sizeof(UWord) == 4) return get_UInt(c);
 350    if (sizeof(UWord) == 8) return get_ULong(c);
 351    vg_assert(0);
 352 }
 353
 354 /* Read a DWARF3 'Initial Length' field */
 355 static ULong get_Initial_Length ( /*OUT*/Bool* is64,
 356                                   Cursor* c,
 357                                   const HChar* barfMsg )
 358 {
 359    ULong w64;
 360    UInt  w32;
 361    *is64 = False;
 362    w32 = get_UInt( c );
 363    if (w32 >= 0xFFFFFFF0 && w32 < 0xFFFFFFFF) {
 364       c->barf( barfMsg );
 365    }
 366    else if (w32 == 0xFFFFFFFF) {
 367       *is64 = True;
 368       w64   = get_ULong( c );
 369    } else {
 370       *is64 = False;
 371       w64 = (ULong)w32;
 372    }
 373    return w64;
 374 }
 375
 376
 377 /*------------------------------------------------------------*/
 378 /*---                                                      ---*/
 379 /*--- "CUConst" structure                                  ---*/
 380 /*---                                                      ---*/
 381 /*------------------------------------------------------------*/
 382
 383 typedef
 384    struct _name_form {
 385       ULong at_name;  // Dwarf Attribute name
 386       ULong at_form;  // Dwarf Attribute form
 387       Long  at_val;   // Dwarf Attribute value (for implicit_const)
 388       UInt  skip_szB; // Nr of bytes skippable from here ...
 389       UInt  next_nf;  // ... to reach this attr/form index in the g_abbv.nf
 390    } name_form;
 391 /* skip_szB and next_nf are used to optimise the skipping of uninteresting DIEs.
 392    Each name_form maintains how many (fixed) nr of bytes can be skipped from
 393    the beginning of this form till the next attr/form to look at.
 394    The next form to look can be:
 395        an 'interesting' attr/form to read while skipping a DIE
 396           (currently, this is only DW_AT_sibling)
 397    or
 398        a variable length form which must be read to be skipped.
 399    For a variable length form, the skip_szB will be equal to VARSZ_FORM.
 400
 401    Note: this technique could also be used to speed up the parsing
 402    of DIEs : for each parser kind, we could have the nr of bytes
 403    to skip to directly reach the interesting form(s) for the parser. */
 404
 405 typedef
 406    struct _g_abbv {
 407       struct _g_abbv *next; // read/write by hash table.
 408       UWord  abbv_code;     // key, read by hash table
 409       ULong  atag;
 410       ULong  has_children;
 411       name_form nf[0];
 412       /* Variable-length array of name/form pairs, terminated
 413          by a 0/0 pair.
 414          The skip_szB/next_nf allows to skip efficiently a DIE
 415          described by this g_abbv; */
 416     } g_abbv;
 417
 418 /* Holds information that is constant through the parsing of a
 419    Compilation Unit.  This is basically plumbed through to
 420    everywhere. */
 421 typedef
 422    struct {
 423       /* Call here if anything goes wrong */
 424       void (*barf)( const HChar* ) __attribute__((noreturn));
 425       /* Is this 64-bit DWARF ? */
 426       Bool   is_dw64;
 427       /* Which DWARF version ?  (2, 3, 4 or 5) */
 428       UShort version;
 429       /* Length of this Compilation Unit, as stated in the
 430          .unit_length :: InitialLength field of the CU Header.
 431          However, this size (as specified by the D3 spec) does not
 432          include the size of the .unit_length field itself, which is
 433          either 4 or 12 bytes (32-bit or 64-bit Dwarf3).  That value
 434          can be obtained through the expression ".is_dw64 ? 12 : 4". */
 435       ULong  unit_length;
 436       /* Offset of start of this unit in .debug_info */
 437       UWord  cu_start_offset;
 438       /* SVMA for this CU.  In the D3 spec, is known as the "base
 439          address of the compilation unit (last para sec 3.1.1).
 440          Needed for (amongst things) interpretation of location-list
 441          values. */
 442       Addr   cu_svma;
 443       Bool   cu_svma_known;
 444
 445       /* The debug_abbreviations table to be used for this Unit */
 446       //UChar* debug_abbv;
 447       /* Upper bound on size thereof (an overestimate, in general) */
 448       //UWord  debug_abbv_maxszB;
 449       /* A bounded area of the image, to be used as the
 450          debug_abbreviations table tobe used for this Unit. */
 451       DiSlice debug_abbv;
 452
 453       /* Image information for various sections. */
 454       DiSlice escn_debug_str;
 455       DiSlice escn_debug_ranges;
 456       DiSlice escn_debug_rnglists;
 457       DiSlice escn_debug_loclists;
 458       DiSlice escn_debug_loc;
 459       DiSlice escn_debug_line;
 460       DiSlice escn_debug_info;
 461       DiSlice escn_debug_types;
 462       DiSlice escn_debug_info_alt;
 463       DiSlice escn_debug_str_alt;
 464       DiSlice escn_debug_line_str;
 465       /* How much to add to .debug_types resp. alternate .debug_info offsets
 466          in cook_die*.  */
 467       UWord  types_cuOff_bias;
 468       UWord  alt_cuOff_bias;
 469       /* --- Needed so we can add stuff to the string table. --- */
 470       struct _DebugInfo* di;
 471       /* --- a hash table of g_abbv (i.e. parsed abbreviations) --- */
 472       VgHashTable *ht_abbvs;
 473
 474       /* True if this came from .debug_types; otherwise it came from
 475          .debug_info.  */
 476       Bool is_type_unit;
 477       /* For a unit coming from .debug_types, these hold the TU's type
 478          signature and the uncooked DIE offset of the TU's signatured
 479          type.  For a unit coming from .debug_info, these are unused.  */
 480       ULong type_signature;
 481       ULong type_offset;
 482
 483       /* Signatured type hash; computed once and then shared by all
 484          CUs.  */
 485       VgHashTable *signature_types;
 486
 487       /* True if this came from alternate .debug_info; otherwise
 488          it came from normal .debug_info or .debug_types.  */
 489       Bool is_alt_info;
 490    }
 491    CUConst;
 492
 493
 494 /* Return the cooked value of DIE depending on whether CC represents a
 495    .debug_types unit.  To cook a DIE, we pretend that the .debug_info,
 496    .debug_types and optional alternate .debug_info sections form
 497    a contiguous whole, so that DIEs coming from .debug_types are numbered
 498    starting at the end of .debug_info and DIEs coming from alternate
 499    .debug_info are numbered starting at the end of .debug_types.  */
 500 static UWord cook_die( const CUConst* cc, UWord die )
 501 {
 502    if (cc->is_type_unit)
 503       die += cc->types_cuOff_bias;
 504    else if (cc->is_alt_info)
 505       die += cc->alt_cuOff_bias;
 506    return die;
 507 }
 508
 509 /* Like cook_die, but understand that DIEs coming from a
 510    DW_FORM_ref_sig8 reference are already cooked.  Also, handle
 511    DW_FORM_GNU_ref_alt from within primary .debug_info or .debug_types
 512    as reference to alternate .debug_info.  */
 513 static UWord cook_die_using_form( const CUConst *cc, UWord die, DW_FORM form)
 514 {
 515    if (form == DW_FORM_ref_sig8)
 516       return die;
 517    if (form == DW_FORM_GNU_ref_alt)
 518       return die + cc->alt_cuOff_bias;
 519    return cook_die( cc, die );
 520 }
 521
 522 /* Return the uncooked offset of DIE and set *TYPE_FLAG to true if the DIE
 523    came from the .debug_types section and *ALT_FLAG to true if the DIE
 524    came from alternate .debug_info section.  */
 525 static UWord uncook_die( const CUConst *cc, UWord die, /*OUT*/Bool *type_flag,
 526                          Bool *alt_flag )
 527 {
 528    *alt_flag = False;
 529    *type_flag = False;
 530    /* The use of escn_debug_{info,types}.szB seems safe to me even if
 531       escn_debug_{info,types} are DiSlice_INVALID (meaning the
 532       sections were not found), because DiSlice_INVALID.szB is always
 533       zero.  That said, it seems unlikely we'd ever get here if
 534       .debug_info or .debug_types were missing. */
 535    if (die >= cc->escn_debug_info.szB) {
 536       if (die >= cc->escn_debug_info.szB + cc->escn_debug_types.szB) {
 537          *alt_flag = True;
 538          die -= cc->escn_debug_info.szB + cc->escn_debug_types.szB;
 539       } else {
 540          *type_flag = True;
 541          die -= cc->escn_debug_info.szB;
 542       }
 543    }
 544    return die;
 545 }
 546
 547 /*------------------------------------------------------------*/
 548 /*---                                                      ---*/
 549 /*--- Helper functions for Guarded Expressions             ---*/
 550 /*---                                                      ---*/
 551 /*------------------------------------------------------------*/
 552
 553 /* Parse the location list starting at img-offset 'debug_loc_offset'
 554    in .debug_loc.  Results are biased with 'svma_of_referencing_CU'
 555    and so I believe are correct SVMAs for the object as a whole.  This
 556    function allocates the UChar*, and the caller must deallocate it.
 557    The resulting block is in so-called Guarded-Expression format.
 558
 559    Guarded-Expression format is similar but not identical to the DWARF3
 560    location-list format.  The format of each returned block is:
 561
 562       UChar biasMe;
 563       UChar isEnd;
 564       followed by zero or more of
 565
 566       (Addr aMin;  Addr aMax;  UShort nbytes;  ..bytes..;  UChar isEnd)
 567
 568    '..bytes..' is an standard DWARF3 location expression which is
 569    valid when aMin <= pc <= aMax (possibly after suitable biasing).
 570
 571    The number of bytes in '..bytes..' is nbytes.
 572
 573    The end of the sequence is marked by an isEnd == 1 value.  All
 574    previous isEnd values must be zero.
 575
 576    biasMe is 1 if the aMin/aMax fields need this DebugInfo's
 577    text_bias added before use, and 0 if the GX is this is not
 578    necessary (is ready to go).
 579
 580    Hence the block can be quickly parsed and is self-describing.  Note
 581    that aMax is 1 less than the corresponding value in a DWARF3
 582    location list.  Zero length ranges, with aMax == aMin-1, are not
 583    allowed.
 584 */
 585 /* 2008-sept-12: moved ML_(pp_GX) from here to d3basics.c, where
 586    it more logically belongs. */
 587
 588
 589 /* Apply a text bias to a GX. */
 590 static void bias_GX ( /*MOD*/GExpr* gx, const DebugInfo* di )
 591 {
 592    UShort nbytes;
 593    UChar* p = &gx->payload[0];
 594    UChar* pA;
 595    UChar  uc;
 596    uc = *p++; /*biasMe*/
 597    if (uc == 0)
 598       return;
 599    vg_assert(uc == 1);
 600    p[-1] = 0; /* mark it as done */
 601    while (True) {
 602       uc = *p++;
 603       if (uc == 1)
 604          break; /*isEnd*/
 605       vg_assert(uc == 0);
 606       /* t-bias aMin */
 607       pA = (UChar*)p;
 608       ML_(write_Addr)(pA, ML_(read_Addr)(pA) + di->text_debug_bias);
 609       p += sizeof(Addr);
 610       /* t-bias aMax */
 611       pA = (UChar*)p;
 612       ML_(write_Addr)(pA, ML_(read_Addr)(pA) + di->text_debug_bias);
 613       p += sizeof(Addr);
 614       /* nbytes, and actual expression */
 615       nbytes = ML_(read_UShort)(p); p += sizeof(UShort);
 616       p += nbytes;
 617    }
 618 }
 619
 620 __attribute__((noinline))
 621 static GExpr* make_singleton_GX ( DiCursor block, ULong nbytes )
 622 {
 623    SizeT  bytesReqd;
 624    GExpr* gx;
 625    UChar *p, *pstart;
 626
 627    vg_assert(sizeof(UWord) == sizeof(Addr));
 628    vg_assert(nbytes <= 0xFFFF); /* else we overflow the nbytes field */
 629    bytesReqd
 630       =   sizeof(UChar)  /*biasMe*/    + sizeof(UChar) /*!isEnd*/
 631         + sizeof(UWord)  /*aMin*/      + sizeof(UWord) /*aMax*/
 632         + sizeof(UShort) /*nbytes*/    + (SizeT)nbytes
 633         + sizeof(UChar); /*isEnd*/
 634
 635    gx = ML_(dinfo_zalloc)( "di.readdwarf3.msGX.1",
 636                            sizeof(GExpr) + bytesReqd );
 637
 638    p = pstart = &gx->payload[0];
 639
 640    p = ML_(write_UChar)(p, 0);        /*biasMe*/
 641    p = ML_(write_UChar)(p, 0);        /*!isEnd*/
 642    p = ML_(write_Addr)(p, 0);         /*aMin*/
 643    p = ML_(write_Addr)(p, ~0);        /*aMax*/
 644    p = ML_(write_UShort)(p, nbytes);  /*nbytes*/
 645    ML_(cur_read_get)(p, block, nbytes); p += nbytes;
 646    p = ML_(write_UChar)(p, 1);        /*isEnd*/
 647
 648    vg_assert( (SizeT)(p - pstart) == bytesReqd);
 649    vg_assert( &gx->payload[bytesReqd]
 650               == ((UChar*)gx) + sizeof(GExpr) + bytesReqd );
 651
 652    return gx;
 653 }
 654
 655 __attribute__((noinline))
 656 static GExpr* make_general_GX ( const CUConst* cc,
 657                                 Bool     td3,
 658                                 ULong    offset,
 659                                 Addr     svma_of_referencing_CU )
 660 {
 661    Bool      done;
 662    Addr      base;
 663    Cursor    loc;
 664    XArray*   xa; /* XArray of UChar */
 665    GExpr*    gx;
 666    Word      nbytes;
 667    Bool      addBase = cc->version < 5;
 668
 669    vg_assert(sizeof(UWord) == sizeof(Addr));
 670    if (cc->version < 5 && (!ML_(sli_is_valid)(cc->escn_debug_loc)
 671                            || cc->escn_debug_loc.szB == 0))
 672       cc->barf("make_general_GX: .debug_loc is empty/missing");
 673    if (cc->version >= 5 && (!ML_(sli_is_valid)(cc->escn_debug_loclists)
 674                            || cc->escn_debug_loclists.szB == 0))
 675       cc->barf("make_general_GX: .debug_loclists is empty/missing");
 676
 677    if (cc->version < 5)
 678       init_Cursor( &loc, cc->escn_debug_loc, 0, cc->barf,
 679                    "Overrun whilst reading .debug_loc section(2)" );
 680    else
 681       init_Cursor( &loc, cc->escn_debug_loclists, 0, cc->barf,
 682                    "Overrun whilst reading .debug_loclists section(2)" );
 683    set_position_of_Cursor( &loc, offset );
 684
 685    TRACE_D3("make_general_GX (offset = %llu, ioff = %llu) {\n",
 686             offset, get_DiCursor_from_Cursor(&loc).ioff );
 687
 688    /* Who frees this xa?  It is freed before this fn exits. */
 689    xa = VG_(newXA)( ML_(dinfo_zalloc), "di.readdwarf3.mgGX.1",
 690                     ML_(dinfo_free),
 691                     sizeof(UChar) );
 692
 693    { UChar c = 1; /*biasMe*/ VG_(addBytesToXA)( xa, &c, sizeof(c) ); }
 694
 695    base = 0;
 696    done = False;
 697    while (!done) {
 698       Bool  acquire;
 699       UWord len;
 700       UWord w1;
 701       UWord w2;
 702       if (cc->version < 5) {
 703          /* Read a (host-)word pair.  This is something of a hack since
 704             the word size to read is really dictated by the ELF file;
 705             however, we assume we're reading a file with the same
 706             word-sizeness as the host.  Reasonably enough. */
 707          w1 = get_UWord( &loc );
 708          w2 = get_UWord( &loc );
 709
 710          TRACE_D3("   %08lx %08lx\n", w1, w2);
 711          if (w1 == 0 && w2 == 0) {
 712             done = True;
 713             break; /* end of list */
 714          }
 715
 716          if (w1 == -1UL) {
 717             /* new value for 'base' */
 718             base = w2;
 719             continue;
 720          }
 721          /* else a location expression follows */
 722          len = (UWord)get_UShort( &loc );
 723       } else {
 724          w1 = 0;
 725          w2 = 0;
 726          len = 0;
 727          DW_LLE r = get_UChar( &loc );
 728          switch (r) {
 729          case DW_LLE_end_of_list:
 730             done = True;
 731             break;
 732          case DW_LLE_base_address:
 733             base = get_UWord( &loc );
 734             break;
 735          case DW_LLE_start_length:
 736             w1 = get_UWord( &loc );
 737             w2 = w1 + get_ULEB128( &loc );
 738             len = get_ULEB128( &loc );
 739             break;
 740          case DW_LLE_offset_pair:
 741             w1 = base + get_ULEB128( &loc );
 742             w2 = base + get_ULEB128( &loc );
 743             len = get_ULEB128( &loc );
 744             break;
 745          case DW_LLE_start_end:
 746             w1 = get_UWord ( &loc );
 747             w2 = get_UWord ( &loc );
 748             len = get_ULEB128( &loc );
 749             break;
 750          case DW_LLE_GNU_view_pair:
 751             get_ULEB128( &loc );
 752             get_ULEB128( &loc );
 753             break;
 754          case DW_LLE_base_addressx:
 755          case DW_LLE_startx_endx:
 756          case DW_LLE_startx_length:
 757          case DW_LLE_default_location:
 758          default:
 759             cc->barf( "Unhandled or unknown loclists entry" );
 760             done = True;
 761          }
 762       }
 763
 764       /* else enumerate [w1+base, w2+base) */
 765       /* w2 is 1 past end of range, as per D3 defn for "DW_AT_high_pc"
 766          (sec 2.17.2) */
 767       if (w1 > w2) {
 768          TRACE_D3("negative range is for .debug_loc expr at "
 769                   "file offset %llu\n",
 770                   offset);
 771          cc->barf( "negative range in .debug_loc section" );
 772       }
 773
 774       /* ignore zero length ranges */
 775       acquire = w1 < w2;
 776
 777       if (acquire) {
 778          UWord  w;
 779          UShort s;
 780          UChar  c;
 781          c = 0; /* !isEnd*/
 782          VG_(addBytesToXA)( xa, &c, sizeof(c) );
 783          w = w1    + (addBase ? base : 0) + svma_of_referencing_CU;
 784          VG_(addBytesToXA)( xa, &w, sizeof(w) );
 785          w = w2 -1 + (addBase ? base : 0) + svma_of_referencing_CU;
 786          VG_(addBytesToXA)( xa, &w, sizeof(w) );
 787          s = (UShort)len;
 788          VG_(addBytesToXA)( xa, &s, sizeof(s) );
 789       }
 790
 791       while (len > 0) {
 792          UChar byte = get_UChar( &loc );
 793          TRACE_D3("%02x", (UInt)byte);
 794          if (acquire)
 795             VG_(addBytesToXA)( xa, &byte, 1 );
 796          len--;
 797       }
 798       TRACE_D3("\n");
 799    }
 800
 801    { UChar c = 1; /*isEnd*/ VG_(addBytesToXA)( xa, &c, sizeof(c) ); }
 802
 803    nbytes = VG_(sizeXA)( xa );
 804    vg_assert(nbytes >= 1);
 805
 806    gx = ML_(dinfo_zalloc)( "di.readdwarf3.mgGX.2", sizeof(GExpr) + nbytes );
 807    VG_(memcpy)( &gx->payload[0], (UChar*)VG_(indexXA)(xa,0), nbytes );
 808    vg_assert( &gx->payload[nbytes]
 809               == ((UChar*)gx) + sizeof(GExpr) + nbytes );
 810
 811    VG_(deleteXA)( xa );
 812
 813    TRACE_D3("}\n");
 814
 815    return gx;
 816 }
 817
 818
 819 /*------------------------------------------------------------*/
 820 /*---                                                      ---*/
 821 /*--- Helper functions for range lists and CU headers      ---*/
 822 /*---                                                      ---*/
 823 /*------------------------------------------------------------*/
 824
 825 /* Denotes an address range.  Both aMin and aMax are included in the
 826    range; hence a complete range is (0, ~0) and an empty range is any
 827    (X, X-1) for X > 0.*/
 828 typedef
 829    struct { Addr aMin; Addr aMax; }
 830    AddrRange;
 831
 832
 833 /* Generate an arbitrary structural total ordering on
 834    XArray* of AddrRange. */
 835 static Word cmp__XArrays_of_AddrRange ( const XArray* rngs1,
 836                                         const XArray* rngs2 )
 837 {
 838    Word n1, n2, i;
 839    vg_assert(rngs1 && rngs2);
 840    n1 = VG_(sizeXA)( rngs1 );
 841    n2 = VG_(sizeXA)( rngs2 );
 842    if (n1 < n2) return -1;
 843    if (n1 > n2) return 1;
 844    for (i = 0; i < n1; i++) {
 845       AddrRange* rng1 = (AddrRange*)VG_(indexXA)( rngs1, i );
 846       AddrRange* rng2 = (AddrRange*)VG_(indexXA)( rngs2, i );
 847       if (rng1->aMin < rng2->aMin) return -1;
 848       if (rng1->aMin > rng2->aMin) return 1;
 849       if (rng1->aMax < rng2->aMax) return -1;
 850       if (rng1->aMax > rng2->aMax) return 1;
 851    }
 852    return 0;
 853 }
 854
 855
 856 __attribute__((noinline))
 857 static XArray* /* of AddrRange */ empty_range_list ( void )
 858 {
 859    XArray* xa; /* XArray of AddrRange */
 860    /* Who frees this xa?  varstack_preen() does. */
 861    xa = VG_(newXA)( ML_(dinfo_zalloc), "di.readdwarf3.erl.1",
 862                     ML_(dinfo_free),
 863                     sizeof(AddrRange) );
 864    return xa;
 865 }
 866
 867
 868 __attribute__((noinline))
 869 static XArray* unitary_range_list ( Addr aMin, Addr aMax )
 870 {
 871    XArray*   xa;
 872    AddrRange pair;
 873    vg_assert(aMin <= aMax);
 874    /* Who frees this xa?  varstack_preen() does. */
 875    xa = VG_(newXA)( ML_(dinfo_zalloc),  "di.readdwarf3.url.1",
 876                     ML_(dinfo_free),
 877                     sizeof(AddrRange) );
 878    pair.aMin = aMin;
 879    pair.aMax = aMax;
 880    VG_(addToXA)( xa, &pair );
 881    return xa;
 882 }
 883
 884
 885 /* Enumerate the address ranges starting at img-offset
 886    'debug_ranges_offset' in .debug_ranges.  Results are biased with
 887    'svma_of_referencing_CU' and so I believe are correct SVMAs for the
 888    object as a whole.  This function allocates the XArray, and the
 889    caller must deallocate it. */
 890 __attribute__((noinline))
 891 static XArray* /* of AddrRange */
 892 get_range_list ( const CUConst* cc,
 893                  Bool     td3,
 894                  UWord    debug_ranges_offset,
 895                  Addr     svma_of_referencing_CU )
 896 {
 897    Addr      base;
 898    Cursor    ranges;
 899    XArray*   xa; /* XArray of AddrRange */
 900    AddrRange pair;
 901
 902    if (cc->version < 5 && (!ML_(sli_is_valid)(cc->escn_debug_ranges)
 903                            || cc->escn_debug_ranges.szB == 0))
 904       cc->barf("get_range_list: .debug_ranges is empty/missing");
 905    if (cc->version >= 5 && (!ML_(sli_is_valid)(cc->escn_debug_rnglists)
 906                             || cc->escn_debug_rnglists.szB == 0))
 907       cc->barf("get_range_list: .debug_rnglists is empty/missing");
 908
 909    if (cc->version < 5)
 910       init_Cursor( &ranges, cc->escn_debug_ranges, 0, cc->barf,
 911                    "Overrun whilst reading .debug_ranges section(2)" );
 912    else
 913       init_Cursor( &ranges, cc->escn_debug_rnglists, 0, cc->barf,
 914                    "Overrun whilst reading .debug_rnglists section(2)" );
 915
 916    set_position_of_Cursor( &ranges, debug_ranges_offset );
 917
 918    /* Who frees this xa?  varstack_preen() does. */
 919    xa = VG_(newXA)( ML_(dinfo_zalloc), "di.readdwarf3.grl.1", ML_(dinfo_free),
 920                     sizeof(AddrRange) );
 921    base = 0;
 922    if (cc->version < 5) {
 923       while (True) {
 924          /* Read a (host-)word pair.  This is something of a hack since
 925             the word size to read is really dictated by the ELF file;
 926             however, we assume we're reading a file with the same
 927             word-sizeness as the host.  Reasonably enough. */
 928          UWord w1 = get_UWord( &ranges );
 929          UWord w2 = get_UWord( &ranges );
 930
 931          if (w1 == 0 && w2 == 0)
 932             break; /* end of list. */
 933
 934          if (w1 == -1UL) {
 935             /* new value for 'base' */
 936             base = w2;
 937             continue;
 938          }
 939
 940          /* else enumerate [w1+base, w2+base) */
 941          /* w2 is 1 past end of range, as per D3 defn for "DW_AT_high_pc"
 942             (sec 2.17.2) */
 943          if (w1 > w2)
 944             cc->barf( "negative range in .debug_ranges section" );
 945          if (w1 < w2) {
 946             pair.aMin = w1     + base + svma_of_referencing_CU;
 947             pair.aMax = w2 - 1 + base + svma_of_referencing_CU;
 948             vg_assert(pair.aMin <= pair.aMax);
 949             VG_(addToXA)( xa, &pair );
 950          }
 951       }
 952    } else {
 953       Bool done = False;
 954       while (!done) {
 955          UWord w1 = 0;
 956          UWord w2 = 0;
 957          DW_RLE r = get_UChar( &ranges );
 958          switch (r) {
 959          case DW_RLE_end_of_list:
 960             done = True;
 961             break;
 962          case DW_RLE_base_address:
 963             base = get_UWord( &ranges );
 964             break;
 965          case DW_RLE_start_length:
 966             w1 = get_UWord( &ranges );
 967             w2 = w1 + get_ULEB128( &ranges );
 968             break;
 969          case DW_RLE_offset_pair:
 970             w1 = base + get_ULEB128( &ranges );
 971             w2 = base + get_ULEB128( &ranges );
 972             break;
 973          case DW_RLE_start_end:
 974             w1 = get_UWord ( &ranges );
 975             w2 = get_UWord ( &ranges );
 976             break;
 977          case DW_RLE_base_addressx:
 978          case DW_RLE_startx_endx:
 979          case DW_RLE_startx_length:
 980          default:
 981             cc->barf( "Unhandled or unknown range list entry" );
 982             done = True;
 983          }
 984          if (w1 > w2)
 985             cc->barf( "negative range in .debug_rnglists section" );
 986          if (w1 < w2) {
 987             pair.aMin = w1     + svma_of_referencing_CU;
 988             pair.aMax = w2 - 1 + svma_of_referencing_CU;
 989             vg_assert(pair.aMin <= pair.aMax);
 990             VG_(addToXA)( xa, &pair );
 991          }
 992       }
 993    }
 994    return xa;
 995 }
 996
 997 #define VARSZ_FORM 0xffffffff
 998 static UInt get_Form_szB (const CUConst* cc, DW_FORM form );
 999
1000 /* Initialises the hash table of abbreviations.
1001    We do a single scan of the abbv slice to parse and
1002    build all abbreviations, for the following reasons:
1003      * all or most abbreviations will be needed in any case
1004        (at least for var-info reading).
1005      * re-reading each time an abbreviation causes a lot of calls
1006        to get_ULEB128.
1007      * a CU should not have many abbreviations. */
1008 static void init_ht_abbvs (CUConst* cc,
1009                            Bool td3)
1010 {
1011    Cursor c;
1012    g_abbv *ta; // temporary abbreviation, reallocated if needed.
1013    UInt ta_nf_maxE; // max nr of pairs in ta.nf[], doubled when reallocated.
1014    UInt ta_nf_n;    // nr of pairs in ta->nf that are initialised.
1015    g_abbv *ht_ta; // abbv to insert in hash table.
1016    Int i;
1017
1018    #define SZ_G_ABBV(_nf_szE) (sizeof(g_abbv) + _nf_szE * sizeof(name_form))
1019
1020    ta_nf_maxE = 10; // starting with enough for 9 pairs+terminating pair.
1021    ta = ML_(dinfo_zalloc) ("di.readdwarf3.ht_ta_nf", SZ_G_ABBV(ta_nf_maxE));
1022    cc->ht_abbvs = VG_(HT_construct) ("di.readdwarf3.ht_abbvs");
1023
1024    init_Cursor( &c, cc->debug_abbv, 0, cc->barf,
1025                "Overrun whilst parsing .debug_abbrev section(2)" );
1026    while (True) {
1027       ta->abbv_code = get_ULEB128( &c );
1028       if (ta->abbv_code == 0) break; /* end of the table */
1029
1030       ta->atag = get_ULEB128( &c );
1031       ta->has_children = get_UChar( &c );
1032       ta_nf_n = 0;
1033       while (True) {
1034          if (ta_nf_n >= ta_nf_maxE) {
1035             g_abbv *old_ta = ta;
1036             ta = ML_(dinfo_zalloc) ("di.readdwarf3.ht_ta_nf",
1037                                     SZ_G_ABBV(2 * ta_nf_maxE));
1038             ta_nf_maxE = 2 * ta_nf_maxE;
1039             VG_(memcpy) (ta, old_ta, SZ_G_ABBV(ta_nf_n));
1040             ML_(dinfo_free) (old_ta);
1041          }
1042          ta->nf[ta_nf_n].at_name = get_ULEB128( &c );
1043          ta->nf[ta_nf_n].at_form = get_ULEB128( &c );
1044          if (ta->nf[ta_nf_n].at_form == DW_FORM_implicit_const)
1045             ta->nf[ta_nf_n].at_val = get_SLEB128( &c );
1046          if (ta->nf[ta_nf_n].at_name == 0 && ta->nf[ta_nf_n].at_form == 0) {
1047             ta_nf_n++;
1048             break;
1049          }
1050         ta_nf_n++;
1051       }
1052
1053       // Initialises the skip_szB/next_nf elements : an element at position
1054       // i must contain the sum of its own size + the sizes of all elements
1055       // following i till either the next variable size element, the next
1056       // sibling element or the end of the DIE.
1057       ta->nf[ta_nf_n - 1].skip_szB = 0;
1058       ta->nf[ta_nf_n - 1].next_nf = 0;
1059       for (i = ta_nf_n - 2; i >= 0; i--) {
1060          const UInt form_szB = get_Form_szB (cc, (DW_FORM)ta->nf[i].at_form);
1061
1062          if (ta->nf[i+1].at_name == DW_AT_sibling
1063              || ta->nf[i+1].skip_szB == VARSZ_FORM) {
1064             ta->nf[i].skip_szB = form_szB;
1065             ta->nf[i].next_nf  = i+1;
1066          } else if (form_szB == VARSZ_FORM) {
1067             ta->nf[i].skip_szB = form_szB;
1068             ta->nf[i].next_nf  = i+1;
1069          } else {
1070             ta->nf[i].skip_szB = ta->nf[i+1].skip_szB + form_szB;
1071             ta->nf[i].next_nf  = ta->nf[i+1].next_nf;
1072          }
1073       }
1074
1075       ht_ta = ML_(dinfo_zalloc) ("di.readdwarf3.ht_ta", SZ_G_ABBV(ta_nf_n));
1076       VG_(memcpy) (ht_ta, ta, SZ_G_ABBV(ta_nf_n));
1077       VG_(HT_add_node) ( cc->ht_abbvs, ht_ta );
1078       if (TD3) {
1079          TRACE_D3("  Adding abbv_code %lu TAG  %s [%s] nf %u ",
1080                   ht_ta->abbv_code, ML_(pp_DW_TAG)(ht_ta->atag),
1081                   ML_(pp_DW_children)(ht_ta->has_children),
1082                   ta_nf_n);
1083          TRACE_D3("  ");
1084          for (i = 0; i < ta_nf_n; i++)
1085             TRACE_D3("[%u,%u] ", ta->nf[i].skip_szB, ta->nf[i].next_nf);
1086          TRACE_D3("\n");
1087       }
1088    }
1089
1090    ML_(dinfo_free) (ta);
1091    #undef SZ_G_ABBV
1092 }
1093
1094 static g_abbv* get_abbv (const CUConst* cc, ULong abbv_code)
1095 {
1096    g_abbv *abbv;
1097
1098    abbv = VG_(HT_lookup) (cc->ht_abbvs, abbv_code);
1099    if (!abbv)
1100       cc->barf ("abbv_code not found in ht_abbvs table");
1101    return abbv;
1102 }
1103
1104 /* Free the memory allocated in CUConst. */
1105 static void clear_CUConst (CUConst* cc)
1106 {
1107    VG_(HT_destruct) ( cc->ht_abbvs, ML_(dinfo_free));
1108    cc->ht_abbvs = NULL;
1109 }
1110
1111 /* Parse the Compilation Unit header indicated at 'c' and
1112    initialise 'cc' accordingly. */
1113 static __attribute__((noinline))
1114 void parse_CU_Header ( /*OUT*/CUConst* cc,
1115                        Bool td3,
1116                        Cursor* c,
1117                        DiSlice escn_debug_abbv,
1118                        Bool type_unit,
1119                        Bool alt_info )
1120 {
1121    UChar  address_size, unit_type;
1122    ULong  debug_abbrev_offset;
1123
1124    VG_(memset)(cc, 0, sizeof(*cc));
1125    vg_assert(c && c->barf);
1126    cc->barf = c->barf;
1127
1128    /* initial_length field */
1129    cc->unit_length
1130       = get_Initial_Length( &cc->is_dw64, c,
1131            "parse_CU_Header: invalid initial-length field" );
1132
1133    TRACE_D3("   Length:        %llu\n", cc->unit_length );
1134
1135    /* version */
1136    cc->version = get_UShort( c );
1137    if (cc->version != 2 && cc->version != 3 && cc->version != 4
1138        && cc->version != 5)
1139       cc->barf( "parse_CU_Header: "
1140                 "is neither DWARF2 nor DWARF3 nor DWARF4 nor DWARF5" );
1141    TRACE_D3("   Version:       %d\n", (Int)cc->version );
1142
1143    /* unit type */
1144    if (cc->version >= 5) {
1145       unit_type = get_UChar( c );
1146       address_size = get_UChar( c );
1147    } else {
1148       unit_type = type_unit ? DW_UT_type : DW_UT_compile;
1149       address_size = 0; /* Will be read later. */
1150    }
1151
1152    /* debug_abbrev_offset */
1153    debug_abbrev_offset = get_Dwarfish_UWord( c, cc->is_dw64 );
1154    if (debug_abbrev_offset >= escn_debug_abbv.szB)
1155       cc->barf( "parse_CU_Header: invalid debug_abbrev_offset" );
1156    TRACE_D3("   Abbrev Offset: %llu\n", debug_abbrev_offset );
1157
1158    /* address size.  If this isn't equal to the host word size, just
1159       give up.  This makes it safe to assume elsewhere that
1160       DW_FORM_addr and DW_FORM_ref_addr can be treated as a host
1161       word. */
1162    if (cc->version < 5)
1163       address_size = get_UChar( c );
1164
1165    if (address_size != sizeof(void*))
1166       cc->barf( "parse_CU_Header: invalid address_size" );
1167    TRACE_D3("   Pointer Size:  %d\n", (Int)address_size );
1168
1169    cc->is_type_unit = type_unit;
1170    cc->is_alt_info = alt_info;
1171
1172    if (type_unit || (cc->version >= 5 && unit_type == DW_UT_type)) {
1173       cc->type_signature = get_ULong( c );
1174       cc->type_offset = get_Dwarfish_UWord( c, cc->is_dw64 );
1175    }
1176
1177    /* Set up cc->debug_abbv to point to the relevant table for this
1178       CU.  Set its .szB so that at least we can't read off the end of
1179       the debug_abbrev section -- potentially (and quite likely) too
1180       big, if this isn't the last table in the section, but at least
1181       it's safe.
1182
1183       This amounts to taking debug_abbv_escn and moving the start
1184       position along by debug_abbrev_offset bytes, hence forming a
1185       smaller DiSlice which has the same end point.  Since we checked
1186       just above that debug_abbrev_offset is less than the size of
1187       debug_abbv_escn, this should leave us with a nonempty slice. */
1188    vg_assert(debug_abbrev_offset < escn_debug_abbv.szB);
1189    cc->debug_abbv      = escn_debug_abbv;
1190    cc->debug_abbv.ioff += debug_abbrev_offset;
1191    cc->debug_abbv.szB  -= debug_abbrev_offset;
1192
1193    init_ht_abbvs(cc, td3);
1194 }
1195
1196 /* This represents a single signatured type.  It maps a type signature
1197    (a ULong) to a cooked DIE offset.  Objects of this type are stored
1198    in the type signature hash table.  */
1199 typedef
1200    struct D3SignatureType {
1201       struct D3SignatureType *next;
1202       UWord data;
1203       ULong type_signature;
1204       UWord die;
1205    }
1206    D3SignatureType;
1207
1208 /* Record a signatured type in the hash table.  */
1209 static void record_signatured_type ( VgHashTable *tab,
1210                                      ULong type_signature,
1211                                      UWord die )
1212 {
1213    D3SignatureType *dstype = ML_(dinfo_zalloc) ( "di.readdwarf3.sigtype",
1214                                                  sizeof(D3SignatureType) );
1215    dstype->data = (UWord) type_signature;
1216    dstype->type_signature = type_signature;
1217    dstype->die = die;
1218    VG_(HT_add_node) ( tab, dstype );
1219 }
1220
1221 /* Given a type signature hash table and a type signature, return the
1222    cooked DIE offset of the type.  If the type cannot be found, call
1223    BARF.  */
1224 static UWord lookup_signatured_type ( const VgHashTable *tab,
1225                                       ULong type_signature,
1226                                       void (*barf)( const HChar* ) __attribute__((noreturn)) )
1227 {
1228    D3SignatureType *dstype = VG_(HT_lookup) ( tab, (UWord) type_signature );
1229    /* This may be unwarranted chumminess with the hash table
1230       implementation.  */
1231    while ( dstype != NULL && dstype->type_signature != type_signature)
1232       dstype = dstype->next;
1233    if (dstype == NULL) {
1234       barf("lookup_signatured_type: could not find signatured type");
1235       /*NOTREACHED*/
1236       vg_assert(0);
1237    }
1238    return dstype->die;
1239 }
1240
1241
1242 /* Represents Form data.  If szB is 1/2/4/8 then the result is in the
1243    lowest 1/2/4/8 bytes of u.val.  If szB is zero or negative then the
1244    result is an image section beginning at u.cur and with size -szB.
1245    No other szB values are allowed. */
1246 typedef
1247    struct {
1248       Long szB; // 1, 2, 4, 8 or non-positive values only.
1249       union { ULong val; DiCursor cur; } u;
1250    }
1251    FormContents;
1252
1253 /* From 'c', get the Form data into 'cts'.  Either it gets a 1/2/4/8
1254    byte scalar value, or (a reference to) zero or more bytes starting
1255    at a DiCursor.*/
1256 static
1257 void get_Form_contents ( /*OUT*/FormContents* cts,
1258                          const CUConst* cc, Cursor* c,
1259                          Bool td3, const name_form *abbv )
1260 {
1261    DW_FORM form = abbv->at_form;
1262    VG_(bzero_inline)(cts, sizeof(*cts));
1263    // !!! keep switch in sync with get_Form_szB. The nr of characters read below
1264    // must be computed similarly in get_Form_szB.
1265    // The consistency is verified in trace_DIE.
1266    switch (form) {
1267       case DW_FORM_data1:
1268          cts->u.val = (ULong)(UChar)get_UChar(c);
1269          cts->szB   = 1;
1270          TRACE_D3("%u", (UInt)cts->u.val);
1271          break;
1272       case DW_FORM_data2:
1273          cts->u.val = (ULong)(UShort)get_UShort(c);
1274          cts->szB   = 2;
1275          TRACE_D3("%u", (UInt)cts->u.val);
1276          break;
1277       case DW_FORM_data4:
1278          cts->u.val = (ULong)(UInt)get_UInt(c);
1279          cts->szB   = 4;
1280          TRACE_D3("%u", (UInt)cts->u.val);
1281          break;
1282       case DW_FORM_data8:
1283          cts->u.val = get_ULong(c);
1284          cts->szB   = 8;
1285          TRACE_D3("%llu", cts->u.val);
1286          break;
1287      case DW_FORM_data16: {
1288          /* This is more like a block than an integral value.  */
1289          ULong    u64b;
1290          DiCursor data16 = get_DiCursor_from_Cursor(c);
1291          TRACE_D3("data16: ");
1292          for (u64b = 16; u64b > 0; u64b--) {
1293             UChar u8 = get_UChar(c);
1294             TRACE_D3("%x ", (UInt)u8);
1295          }
1296          cts->u.cur = data16;
1297          cts->szB   = - (Long)16;
1298          break;
1299          }
1300       case DW_FORM_sec_offset:
1301          cts->u.val = (ULong)get_Dwarfish_UWord( c, cc->is_dw64 );
1302          cts->szB   = cc->is_dw64 ? 8 : 4;
1303          TRACE_D3("%llu", cts->u.val);
1304          break;
1305       case DW_FORM_sdata:
1306          cts->u.val = (ULong)(Long)get_SLEB128(c);
1307          cts->szB   = 8;
1308          TRACE_D3("%llu", cts->u.val);
1309          break;
1310       case DW_FORM_udata:
1311          cts->u.val = (ULong)(Long)get_ULEB128(c);
1312          cts->szB   = 8;
1313          TRACE_D3("%llu", cts->u.val);
1314          break;
1315       case DW_FORM_addr:
1316          /* note, this is a hack.  DW_FORM_addr is defined as getting
1317             a word the size of the target machine as defined by the
1318             address_size field in the CU Header.  However,
1319             parse_CU_Header() rejects all inputs except those for
1320             which address_size == sizeof(Word), hence we can just
1321             treat it as a (host) Word.  */
1322          cts->u.val = (ULong)(UWord)get_UWord(c);
1323          cts->szB   = sizeof(UWord);
1324          TRACE_D3("0x%lx", (UWord)cts->u.val);
1325          break;
1326
1327       case DW_FORM_ref_addr:
1328          /* We make the same word-size assumption as DW_FORM_addr. */
1329          /* What does this really mean?  From D3 Sec 7.5.4,
1330             description of "reference", it would appear to reference
1331             some other DIE, by specifying the offset from the
1332             beginning of a .debug_info section.  The D3 spec mentions
1333             that this might be in some other shared object and
1334             executable.  But I don't see how the name of the other
1335             object/exe is specified.
1336
1337             At least for the DW_FORM_ref_addrs created by icc11, the
1338             references seem to be within the same object/executable.
1339             So for the moment we merely range-check, to see that they
1340             actually do specify a plausible offset within this
1341             object's .debug_info, and return the value unchanged.
1342
1343             In DWARF 2, DW_FORM_ref_addr is address-sized, but in
1344             DWARF 3 and later, it is offset-sized.
1345          */
1346          if (cc->version == 2) {
1347             cts->u.val = (ULong)(UWord)get_UWord(c);
1348             cts->szB   = sizeof(UWord);
1349          } else {
1350             cts->u.val = get_Dwarfish_UWord(c, cc->is_dw64);
1351             cts->szB   = cc->is_dw64 ? sizeof(ULong) : sizeof(UInt);
1352          }
1353          TRACE_D3("0x%lx", (UWord)cts->u.val);
1354          if (0) VG_(printf)("DW_FORM_ref_addr 0x%lx\n", (UWord)cts->u.val);
1355          if (/* the following is surely impossible, but ... */
1356              !ML_(sli_is_valid)(cc->escn_debug_info)
1357              || cts->u.val >= (ULong)cc->escn_debug_info.szB) {
1358             /* Hmm.  Offset is nonsensical for this object's .debug_info
1359                section.  Be safe and reject it. */
1360             cc->barf("get_Form_contents: DW_FORM_ref_addr points "
1361                      "outside .debug_info");
1362          }
1363          break;
1364
1365       case DW_FORM_strp: {
1366          /* this is an offset into .debug_str */
1367          UWord uw = (UWord)get_Dwarfish_UWord( c, cc->is_dw64 );
1368          if (!ML_(sli_is_valid)(cc->escn_debug_str)
1369              || uw >= cc->escn_debug_str.szB)
1370             cc->barf("get_Form_contents: DW_FORM_strp "
1371                      "points outside .debug_str");
1372          /* FIXME: check the entire string lies inside debug_str,
1373             not just the first byte of it. */
1374          DiCursor str
1375             = ML_(cur_plus)( ML_(cur_from_sli)(cc->escn_debug_str), uw );
1376          if (TD3) {
1377             HChar* tmp = ML_(cur_read_strdup)(str, "di.getFC.1");
1378             TRACE_D3("(indirect string, offset: 0x%lx): %s", uw, tmp);
1379             ML_(dinfo_free)(tmp);
1380          }
1381          cts->u.cur = str;
1382          cts->szB   = - (Long)(1 + (ULong)ML_(cur_strlen)(str));
1383          break;
1384       }
1385       case DW_FORM_line_strp: {
1386          /* this is an offset into .debug_line_str */
1387          UWord uw = (UWord)get_Dwarfish_UWord( c, cc->is_dw64 );
1388          if (!ML_(sli_is_valid)(cc->escn_debug_line_str)
1389              || uw >= cc->escn_debug_line_str.szB)
1390             cc->barf("get_Form_contents: DW_FORM_line_strp "
1391                      "points outside .debug_line_str");
1392          /* FIXME: check the entire string lies inside debug_line_str,
1393             not just the first byte of it. */
1394          DiCursor line_str
1395             = ML_(cur_plus)( ML_(cur_from_sli)(cc->escn_debug_line_str), uw );
1396          if (TD3) {
1397             HChar* tmp = ML_(cur_read_strdup)(line_str, "di.getFC.1.5");
1398             TRACE_D3("(indirect line string, offset: 0x%lx): %s", uw, tmp);
1399             ML_(dinfo_free)(tmp);
1400          }
1401          cts->u.cur = line_str;
1402          cts->szB   = - (Long)(1 + (ULong)ML_(cur_strlen)(line_str));
1403          break;
1404       }
1405       case DW_FORM_string: {
1406          DiCursor str = get_AsciiZ(c);
1407          if (TD3) {
1408             HChar* tmp = ML_(cur_read_strdup)(str, "di.getFC.2");
1409             TRACE_D3("%s", tmp);
1410             ML_(dinfo_free)(tmp);
1411          }
1412          cts->u.cur = str;
1413          /* strlen is safe because get_AsciiZ already 'vetted' the
1414             entire string */
1415          cts->szB   = - (Long)(1 + (ULong)ML_(cur_strlen)(str));
1416          break;
1417       }
1418       case DW_FORM_ref1: {
1419          UChar u8   = get_UChar(c);
1420          UWord res  = cc->cu_start_offset + (UWord)u8;
1421          cts->u.val = (ULong)res;
1422          cts->szB   = sizeof(UWord);
1423          TRACE_D3("<%lx>", res);
1424          break;
1425       }
1426       case DW_FORM_ref2: {
1427          UShort u16 = get_UShort(c);
1428          UWord  res = cc->cu_start_offset + (UWord)u16;
1429          cts->u.val = (ULong)res;
1430          cts->szB   = sizeof(UWord);
1431          TRACE_D3("<%lx>", res);
1432          break;
1433       }
1434       case DW_FORM_ref4: {
1435          UInt  u32  = get_UInt(c);
1436          UWord res  = cc->cu_start_offset + (UWord)u32;
1437          cts->u.val = (ULong)res;
1438          cts->szB   = sizeof(UWord);
1439          TRACE_D3("<%lx>", res);
1440          break;
1441       }
1442       case DW_FORM_ref8: {
1443          ULong u64  = get_ULong(c);
1444          UWord res  = cc->cu_start_offset + (UWord)u64;
1445          cts->u.val = (ULong)res;
1446          cts->szB   = sizeof(UWord);
1447          TRACE_D3("<%lx>", res);
1448          break;
1449       }
1450       case DW_FORM_ref_udata: {
1451          ULong u64  = get_ULEB128(c);
1452          UWord res  = cc->cu_start_offset + (UWord)u64;
1453          cts->u.val = (ULong)res;
1454          cts->szB   = sizeof(UWord);
1455          TRACE_D3("<%lx>", res);
1456          break;
1457       }
1458       case DW_FORM_flag: {
1459          UChar u8 = get_UChar(c);
1460          TRACE_D3("%u", (UInt)u8);
1461          cts->u.val = (ULong)u8;
1462          cts->szB   = 1;
1463          break;
1464       }
1465       case DW_FORM_flag_present:
1466          TRACE_D3("1");
1467          cts->u.val = 1;
1468          cts->szB   = 1;
1469          break;
1470       case DW_FORM_implicit_const:
1471          cts->u.val = (ULong)abbv->at_val;
1472          cts->szB   = 8;
1473          TRACE_D3("%llu", cts->u.val);
1474          break;
1475       case DW_FORM_block1: {
1476          ULong    u64b;
1477          ULong    u64   = (ULong)get_UChar(c);
1478          DiCursor block = get_DiCursor_from_Cursor(c);
1479          TRACE_D3("%llu byte block: ", u64);
1480          for (u64b = u64; u64b > 0; u64b--) {
1481             UChar u8 = get_UChar(c);
1482             TRACE_D3("%x ", (UInt)u8);
1483          }
1484          cts->u.cur = block;
1485          cts->szB   = - (Long)u64;
1486          break;
1487       }
1488       case DW_FORM_block2: {
1489          ULong    u64b;
1490          ULong    u64   = (ULong)get_UShort(c);
1491          DiCursor block = get_DiCursor_from_Cursor(c);
1492          TRACE_D3("%llu byte block: ", u64);
1493          for (u64b = u64; u64b > 0; u64b--) {
1494             UChar u8 = get_UChar(c);
1495             TRACE_D3("%x ", (UInt)u8);
1496          }
1497          cts->u.cur = block;
1498          cts->szB   = - (Long)u64;
1499          break;
1500       }
1501       case DW_FORM_block4: {
1502          ULong    u64b;
1503          ULong    u64   = (ULong)get_UInt(c);
1504          DiCursor block = get_DiCursor_from_Cursor(c);
1505          TRACE_D3("%llu byte block: ", u64);
1506          for (u64b = u64; u64b > 0; u64b--) {
1507             UChar u8 = get_UChar(c);
1508             TRACE_D3("%x ", (UInt)u8);
1509          }
1510          cts->u.cur = block;
1511          cts->szB   = - (Long)u64;
1512          break;
1513       }
1514       case DW_FORM_exprloc:
1515       case DW_FORM_block: {
1516          ULong    u64b;
1517          ULong    u64   = (ULong)get_ULEB128(c);
1518          DiCursor block = get_DiCursor_from_Cursor(c);
1519          TRACE_D3("%llu byte block: ", u64);
1520          for (u64b = u64; u64b > 0; u64b--) {
1521             UChar u8 = get_UChar(c);
1522             TRACE_D3("%x ", (UInt)u8);
1523          }
1524          cts->u.cur = block;
1525          cts->szB   = - (Long)u64;
1526          break;
1527       }
1528       case DW_FORM_ref_sig8: {
1529          ULong  u64b;
1530          ULong  signature = get_ULong (c);
1531          ULong  work = signature;
1532          TRACE_D3("8 byte signature: ");
1533          for (u64b = 8; u64b > 0; u64b--) {
1534             UChar u8 = work & 0xff;
1535             TRACE_D3("%x ", (UInt)u8);
1536             work >>= 8;
1537          }
1538
1539          /* cc->signature_types is only built/initialised when
1540             VG_(clo_read_var_info) is set. In this case,
1541             the DW_FORM_ref_sig8 can be looked up.
1542             But we can also arrive here when only reading inline info
1543             and VG_(clo_trace_symtab) is set. In such a case,
1544             we cannot lookup the DW_FORM_ref_sig8, we rather assign
1545             a dummy value. This is a kludge, but otherwise,
1546             the 'dwarf inline info reader' tracing would have to
1547             do type processing/reading. It is better to avoid
1548             adding significant 'real' processing only due to tracing. */
1549          if (VG_(clo_read_var_info)) {
1550             /* Due to the way that the hash table is constructed, the
1551                resulting DIE offset here is already "cooked".  See
1552                cook_die_using_form.  */
1553             cts->u.val = lookup_signatured_type (cc->signature_types, signature,
1554                                                  c->barf);
1555          } else {
1556             vg_assert (td3);
1557             vg_assert (VG_(clo_read_inline_info));
1558             TRACE_D3("<not dereferencing signature type>");
1559             cts->u.val = 0; /* Assign a dummy/rubbish value */
1560          }
1561          cts->szB   = sizeof(UWord);
1562          break;
1563       }
1564       case DW_FORM_indirect: {
1565          /* Urgh, this is ugly and somewhat unclear how it works
1566             with DW_FORM_implicit_const. HACK.  */
1567          name_form nfi = *abbv;
1568          nfi.at_form = (DW_FORM)get_ULEB128(c);
1569          get_Form_contents (cts, cc, c, td3, &nfi);
1570          return;
1571       }
1572
1573       case DW_FORM_GNU_ref_alt:
1574          cts->u.val = get_Dwarfish_UWord(c, cc->is_dw64);
1575          cts->szB   = cc->is_dw64 ? sizeof(ULong) : sizeof(UInt);
1576          TRACE_D3("0x%lx", (UWord)cts->u.val);
1577          if (0) VG_(printf)("DW_FORM_GNU_ref_alt 0x%lx\n", (UWord)cts->u.val);
1578          if (/* the following is surely impossible, but ... */
1579              !ML_(sli_is_valid)(cc->escn_debug_info_alt))
1580             cc->barf("get_Form_contents: DW_FORM_GNU_ref_addr used, "
1581                      "but no alternate .debug_info");
1582          else if (cts->u.val >= (ULong)cc->escn_debug_info_alt.szB) {
1583             /* Hmm.  Offset is nonsensical for this object's .debug_info
1584                section.  Be safe and reject it. */
1585             cc->barf("get_Form_contents: DW_FORM_GNU_ref_addr points "
1586                      "outside alternate .debug_info");
1587          }
1588          break;
1589
1590       case DW_FORM_GNU_strp_alt: {
1591          /* this is an offset into alternate .debug_str */
1592          SizeT uw = (UWord)get_Dwarfish_UWord( c, cc->is_dw64 );
1593          if (!ML_(sli_is_valid)(cc->escn_debug_str_alt))
1594             cc->barf("get_Form_contents: DW_FORM_GNU_strp_alt used, "
1595                      "but no alternate .debug_str");
1596          else if (uw >= cc->escn_debug_str_alt.szB)
1597             cc->barf("get_Form_contents: DW_FORM_GNU_strp_alt "
1598                      "points outside alternate .debug_str");
1599          /* FIXME: check the entire string lies inside debug_str,
1600             not just the first byte of it. */
1601          DiCursor str
1602             = ML_(cur_plus)( ML_(cur_from_sli)(cc->escn_debug_str_alt), uw);
1603          if (TD3) {
1604             HChar* tmp = ML_(cur_read_strdup)(str, "di.getFC.3");
1605             TRACE_D3("(indirect alt string, offset: 0x%lx): %s", uw, tmp);
1606             ML_(dinfo_free)(tmp);
1607          }
1608          cts->u.cur = str;
1609          cts->szB   = - (Long)(1 + (ULong)ML_(cur_strlen)(str));
1610          break;
1611       }
1612
1613       default:
1614          VG_(printf)(
1615             "get_Form_contents: unhandled %u (%s) at <%llx>\n",
1616             form, ML_(pp_DW_FORM)(form), get_position_of_Cursor(c));
1617          c->barf("get_Form_contents: unhandled DW_FORM");
1618    }
1619 }
1620
1621 static inline UInt sizeof_Dwarfish_UWord (Bool is_dw64)
1622 {
1623    if (is_dw64)
1624       return sizeof(ULong);
1625    else
1626       return sizeof(UInt);
1627 }
1628
1629 #define VARSZ_FORM 0xffffffff
1630 /* If the form is a fixed length form, return the nr of bytes for this form.
1631    If the form is a variable length form, return VARSZ_FORM. */
1632 static
1633 UInt get_Form_szB (const CUConst* cc, DW_FORM form )
1634 {
1635    // !!! keep switch in sync with get_Form_contents : the nr of bytes
1636    // read from a cursor by get_Form_contents must be returned by
1637    // the below switch.
1638    // The consistency is verified in trace_DIE.
1639    switch (form) {
1640       case DW_FORM_data1: return 1;
1641       case DW_FORM_data2: return 2;
1642       case DW_FORM_data4: return 4;
1643       case DW_FORM_data8: return 8;
1644       case DW_FORM_data16: return 16;
1645       case DW_FORM_sec_offset:
1646          if (cc->is_dw64)
1647             return 8;
1648          else
1649             return 4;
1650       case DW_FORM_sdata:
1651          return VARSZ_FORM;
1652       case DW_FORM_udata:
1653          return VARSZ_FORM;
1654       case DW_FORM_addr: // See hack in get_Form_contents
1655          return sizeof(UWord);
1656       case DW_FORM_ref_addr: // See hack in get_Form_contents
1657          if (cc->version == 2)
1658             return sizeof(UWord);
1659          else
1660             return sizeof_Dwarfish_UWord (cc->is_dw64);
1661       case DW_FORM_strp:
1662       case DW_FORM_line_strp:
1663          return sizeof_Dwarfish_UWord (cc->is_dw64);
1664       case DW_FORM_string:
1665          return VARSZ_FORM;
1666       case DW_FORM_ref1:
1667          return 1;
1668       case DW_FORM_ref2:
1669          return 2;
1670       case DW_FORM_ref4:
1671          return 4;
1672       case DW_FORM_ref8:
1673          return 8;
1674       case DW_FORM_ref_udata:
1675          return VARSZ_FORM;
1676       case DW_FORM_flag:
1677          return 1;
1678       case DW_FORM_flag_present:
1679          return 0; // !!! special case, no data.
1680       case DW_FORM_block1:
1681          return VARSZ_FORM;
1682       case DW_FORM_block2:
1683          return VARSZ_FORM;
1684       case DW_FORM_block4:
1685          return VARSZ_FORM;
1686       case DW_FORM_exprloc:
1687       case DW_FORM_block:
1688          return VARSZ_FORM;
1689       case DW_FORM_ref_sig8:
1690          return 8;
1691       case DW_FORM_indirect:
1692          return VARSZ_FORM;
1693       case DW_FORM_GNU_ref_alt:
1694          return sizeof_Dwarfish_UWord(cc->is_dw64);
1695       case DW_FORM_GNU_strp_alt:
1696          return sizeof_Dwarfish_UWord(cc->is_dw64);
1697       case DW_FORM_implicit_const:
1698          return 0; /* Value inside abbrev. */
1699       default:
1700          VG_(printf)(
1701             "get_Form_szB: unhandled %u (%s)\n",
1702             form, ML_(pp_DW_FORM)(form));
1703          cc->barf("get_Form_contents: unhandled DW_FORM");
1704    }
1705 }
1706
1707 /* Skip a DIE as described by abbv.
1708    If the DIE has a sibling, *sibling is set to the skipped DIE sibling value. */
1709 static
1710 void skip_DIE (UWord  *sibling,
1711                Cursor* c_die,
1712                const g_abbv *abbv,
1713                const CUConst* cc)
1714 {
1715    UInt nf_i;
1716    FormContents cts;
1717    nf_i = 0;
1718    while (True) {
1719       if (abbv->nf[nf_i].at_name == DW_AT_sibling) {
1720          get_Form_contents( &cts, cc, c_die, False /*td3*/,
1721                             &abbv->nf[nf_i] );
1722          if ( cts.szB > 0 )
1723             *sibling = cts.u.val;
1724          nf_i++;
1725       } else if (abbv->nf[nf_i].skip_szB == VARSZ_FORM) {
1726          get_Form_contents( &cts, cc, c_die, False /*td3*/,
1727                             &abbv->nf[nf_i] );
1728          nf_i++;
1729       } else {
1730          advance_position_of_Cursor (c_die, (ULong)abbv->nf[nf_i].skip_szB);
1731          nf_i = abbv->nf[nf_i].next_nf;
1732       }
1733       if (nf_i == 0)
1734          break;
1735    }
1736 }
1737
1738
1739 /*------------------------------------------------------------*/
1740 /*---                                                      ---*/
1741 /*--- Parsing of variable-related DIEs                     ---*/
1742 /*---                                                      ---*/
1743 /*------------------------------------------------------------*/
1744
1745 typedef
1746    struct _TempVar {
1747       const HChar*  name; /* in DebugInfo's .strpool */
1748       /* Represent ranges economically.  nRanges is the number of
1749          ranges.  Cases:
1750          0: .rngOneMin .rngOneMax .manyRanges are all zero
1751          1: .rngOneMin .rngOneMax hold the range; .rngMany is NULL
1752          2: .rngOneMin .rngOneMax are zero; .rngMany holds the ranges.
1753          This is merely an optimisation to avoid having to allocate
1754          and free the XArray in the common (98%) of cases where there
1755          is zero or one address ranges. */
1756       UWord   nRanges;
1757       Addr    rngOneMin;
1758       Addr    rngOneMax;
1759       XArray* rngMany; /* of AddrRange.  NON-UNIQUE PTR in AR_DINFO. */
1760       /* Do not free .rngMany, since many TempVars will have the same
1761          value.  Instead the associated storage is to be freed by
1762          deleting 'rangetree', which stores a single copy of each
1763          range. */
1764       /* --- */
1765       Int     level;
1766       UWord   typeR; /* a cuOff */
1767       GExpr*  gexpr; /* for this variable */
1768       GExpr*  fbGX;  /* to find the frame base of the enclosing fn, if
1769                         any */
1770       UInt    fndn_ix; /* declaring file/dirname index in fndnpool, or 0 */
1771       Int     fLine; /* declaring file line number, or zero */
1772       /* offset in .debug_info, so that abstract instances can be
1773          found to satisfy references from concrete instances. */
1774       UWord   dioff;
1775       UWord   absOri; /* so the absOri fields refer to dioff fields
1776                          in some other, related TempVar. */
1777    }
1778    TempVar;
1779
1780 typedef
1781    struct {
1782       /* Contains the range stack: a stack of address ranges, one
1783          stack entry for each nested scope.
1784
1785          Some scope entries are created by function definitions
1786          (DW_AT_subprogram), and for those, we also note the GExpr
1787          derived from its DW_AT_frame_base attribute, if any.
1788          Consequently it should be possible to find, for any
1789          variable's DIE, the GExpr for the containing function's
1790          DW_AT_frame_base by scanning back through the stack to find
1791          the nearest entry associated with a function.  This somewhat
1792          elaborate scheme is provided so as to make it possible to
1793          obtain the correct DW_AT_frame_base expression even in the
1794          presence of nested functions (or to be more precise, in the
1795          presence of nested DW_AT_subprogram DIEs).
1796       */
1797       Int     sp; /* [sp] is innermost active entry; sp==-1 for empty
1798                      stack */
1799       Int     stack_size;
1800       XArray **ranges; /* XArray of AddrRange */
1801       Int     *level;  /* D3 DIE levels */
1802       Bool    *isFunc; /* from DW_AT_subprogram? */
1803       GExpr  **fbGX;   /* if isFunc, contains the FB expr, else NULL */
1804       /* The fndn_ix file name/dirname table.  Is a mapping from dwarf
1805          integer index to the index in di->fndnpool. */
1806       XArray* /* of UInt* */ fndn_ix_Table;
1807    }
1808    D3VarParser;
1809
1810 /* Completely initialise a variable parser object */
1811 static void
1812 var_parser_init ( D3VarParser *parser )
1813 {
1814    parser->sp = -1;
1815    parser->stack_size = 0;
1816    parser->ranges = NULL;
1817    parser->level  = NULL;
1818    parser->isFunc = NULL;
1819    parser->fbGX = NULL;
1820    parser->fndn_ix_Table = NULL;
1821 }
1822
1823 /* Release any memory hanging off a variable parser object */
1824 static void
1825 var_parser_release ( D3VarParser *parser )
1826 {
1827    ML_(dinfo_free)( parser->ranges );
1828    ML_(dinfo_free)( parser->level );
1829    ML_(dinfo_free)( parser->isFunc );
1830    ML_(dinfo_free)( parser->fbGX );
1831 }
1832
1833 static void varstack_show ( const D3VarParser* parser, const HChar* str )
1834 {
1835    Word i, j;
1836    VG_(printf)("  varstack (%s) {\n", str);
1837    for (i = 0; i <= parser->sp; i++) {
1838       XArray* xa = parser->ranges[i];
1839       vg_assert(xa);
1840       VG_(printf)("    [%ld] (level %d)", i, parser->level[i]);
1841       if (parser->isFunc[i]) {
1842          VG_(printf)(" (fbGX=%p)", parser->fbGX[i]);
1843       } else {
1844          vg_assert(parser->fbGX[i] == NULL);
1845       }
1846       VG_(printf)(": ");
1847       if (VG_(sizeXA)( xa ) == 0) {
1848          VG_(printf)("** empty PC range array **");
1849       } else {
1850          for (j = 0; j < VG_(sizeXA)( xa ); j++) {
1851             AddrRange* range = (AddrRange*) VG_(indexXA)( xa, j );
1852             vg_assert(range);
1853             VG_(printf)("[%#lx,%#lx] ", range->aMin, range->aMax);
1854          }
1855       }
1856       VG_(printf)("\n");
1857    }
1858    VG_(printf)("  }\n");
1859 }
1860
1861 /* Remove from the stack, all entries with .level > 'level' */
1862 static
1863 void varstack_preen ( D3VarParser* parser, Bool td3, Int level )
1864 {
1865    Bool changed = False;
1866    vg_assert(parser->sp < parser->stack_size);
1867    while (True) {
1868       vg_assert(parser->sp >= -1);
1869       if (parser->sp == -1) break;
1870       if (parser->level[parser->sp] <= level) break;
1871       if (0)
1872          TRACE_D3("BBBBAAAA varstack_pop [newsp=%d]\n", parser->sp-1);
1873       vg_assert(parser->ranges[parser->sp]);
1874       /* Who allocated this xa?  get_range_list() or
1875          unitary_range_list(). */
1876       VG_(deleteXA)( parser->ranges[parser->sp] );
1877       parser->sp--;
1878       changed = True;
1879    }
1880    if (changed && td3)
1881       varstack_show( parser, "after preen" );
1882 }
1883
1884 static void varstack_push ( const CUConst* cc,
1885                             D3VarParser* parser,
1886                             Bool td3,
1887                             XArray* ranges, Int level,
1888                             Bool    isFunc, GExpr* fbGX ) {
1889    if (0)
1890    TRACE_D3("BBBBAAAA varstack_push[newsp=%d]: %d  %p\n",
1891             parser->sp+1, level, ranges);
1892
1893    /* First we need to zap everything >= 'level', as we are about to
1894       replace any previous entry at 'level', so .. */
1895    varstack_preen(parser, /*td3*/False, level-1);
1896
1897    vg_assert(parser->sp >= -1);
1898    vg_assert(parser->sp < parser->stack_size);
1899    if (parser->sp == parser->stack_size - 1) {
1900       parser->stack_size += 48;
1901       parser->ranges =
1902          ML_(dinfo_realloc)("di.readdwarf3.varpush.1", parser->ranges,
1903                             parser->stack_size * sizeof parser->ranges[0]);
1904       parser->level =
1905          ML_(dinfo_realloc)("di.readdwarf3.varpush.2", parser->level,
1906                             parser->stack_size * sizeof parser->level[0]);
1907       parser->isFunc =
1908          ML_(dinfo_realloc)("di.readdwarf3.varpush.3", parser->isFunc,
1909                             parser->stack_size * sizeof parser->isFunc[0]);
1910       parser->fbGX =
1911          ML_(dinfo_realloc)("di.readdwarf3.varpush.4", parser->fbGX,
1912                             parser->stack_size * sizeof parser->fbGX[0]);
1913    }
1914    if (parser->sp >= 0)
1915       vg_assert(parser->level[parser->sp] < level);
1916    parser->sp++;
1917    vg_assert(ranges != NULL);
1918    if (!isFunc) vg_assert(fbGX == NULL);
1919    parser->ranges[parser->sp] = ranges;
1920    parser->level[parser->sp]  = level;
1921    parser->isFunc[parser->sp] = isFunc;
1922    parser->fbGX[parser->sp]   = fbGX;
1923    if (TD3)
1924       varstack_show( parser, "after push" );
1925 }
1926
1927
1928 /* cts is derived from a DW_AT_location and so refers either to a
1929    location expression or to a location list.  Figure out which, and
1930    in both cases bundle the expression or location list into a
1931    so-called GExpr (guarded expression). */
1932 __attribute__((noinline))
1933 static GExpr* get_GX ( const CUConst* cc, Bool td3, const FormContents* cts )
1934 {
1935    GExpr* gexpr = NULL;
1936    if (cts->szB < 0) {
1937       /* represents a non-empty in-line location expression, and
1938          cts->u.cur points at the image bytes */
1939       gexpr = make_singleton_GX( cts->u.cur, (ULong)(- cts->szB) );
1940    }
1941    else
1942    if (cts->szB > 0) {
1943       /* represents a location list.  cts->u.val is the offset of it
1944          in .debug_loc. */
1945       if (!cc->cu_svma_known)
1946          cc->barf("get_GX: location list, but CU svma is unknown");
1947       gexpr = make_general_GX( cc, td3, cts->u.val, cc->cu_svma );
1948    }
1949    else {
1950       vg_assert(0); /* else caller is bogus */
1951    }
1952    return gexpr;
1953 }
1954
1955 static
1956 HChar * get_line_str (struct _DebugInfo* di, Bool is_dw64,
1957                       Cursor *data, const UInt form,
1958                       DiSlice debugstr_img, DiSlice debuglinestr_img)
1959 {
1960    HChar *str = NULL;
1961    switch (form) {
1962    case DW_FORM_string: {
1963       DiCursor distr = get_AsciiZ(data);
1964       str = ML_(cur_step_strdup)(&distr, "di.gls.string");
1965       break;
1966    }
1967    case DW_FORM_strp: {
1968       UWord uw = (UWord)get_Dwarfish_UWord( data, is_dw64 );
1969       DiCursor distr
1970          = ML_(cur_plus)( ML_(cur_from_sli)(debugstr_img), uw );
1971       str = ML_(cur_read_strdup)(distr, "di.gls.strp");
1972       break;
1973    }
1974    case DW_FORM_line_strp: {
1975       UWord uw = (UWord)get_Dwarfish_UWord( data, is_dw64 );
1976       DiCursor distr
1977          = ML_(cur_plus)( ML_(cur_from_sli)(debuglinestr_img), uw );
1978       str = ML_(cur_read_strdup)(distr, "di.gls.line_strp");
1979       break;
1980    }
1981    default:
1982       ML_(symerr)(di, True,
1983                   "Unknown path string FORM in .debug_line");
1984       break;
1985    }
1986    return str;
1987 }
1988
1989 static
1990 Int get_line_ndx (struct _DebugInfo* di,
1991                   Cursor *data, const UInt form)
1992 {
1993    Int res = 0;
1994    switch (form) {
1995    case DW_FORM_data1:
1996       res = get_UChar(data);
1997       break;
1998    case DW_FORM_data2:
1999       res = get_UShort(data);
2000       break;
2001    case DW_FORM_udata:
2002       res = get_ULEB128(data);
2003       break;
2004    default:
2005       ML_(symerr)(di, True,
2006                   "Unknown directory_index value FORM in .debug_line");
2007       break;
2008    }
2009    return res;
2010 }
2011
2012 static
2013 void skip_line_form (struct _DebugInfo* di, Bool is_dw64,
2014                          Cursor *d, const UInt form)
2015 {
2016    switch (form) {
2017    case DW_FORM_block: {
2018       ULong len = get_ULEB128(d);
2019       advance_position_of_Cursor (d, len);
2020       break;
2021    }
2022    case DW_FORM_block1: {
2023       UChar len = get_UChar(d);
2024       advance_position_of_Cursor (d, len);
2025       break;
2026    }
2027    case DW_FORM_block2: {
2028       UShort len = get_UShort(d);
2029       advance_position_of_Cursor (d, len);
2030       break;
2031    }
2032    case DW_FORM_block4: {
2033       UInt len = get_UInt(d);
2034       advance_position_of_Cursor (d, len);
2035       break;
2036    }
2037    case DW_FORM_flag:
2038    case DW_FORM_data1:
2039       advance_position_of_Cursor (d, 1);
2040       break;
2041    case DW_FORM_data2:
2042       advance_position_of_Cursor (d, 2);
2043       break;
2044    case DW_FORM_data4:
2045       advance_position_of_Cursor (d, 4);
2046       break;
2047    case DW_FORM_data8:
2048       advance_position_of_Cursor (d, 8);
2049       break;
2050    case DW_FORM_data16:
2051       advance_position_of_Cursor (d, 16);
2052       break;
2053    case DW_FORM_string:
2054       (void)get_AsciiZ (d);
2055       break;
2056    case DW_FORM_strp:
2057    case DW_FORM_line_strp:
2058    case DW_FORM_sec_offset:
2059       advance_position_of_Cursor (d, is_dw64 ? 8 : 4);
2060       break;
2061    case DW_FORM_udata:
2062       (void)get_ULEB128(d);
2063       break;
2064    case DW_FORM_sdata:
2065       (void)get_SLEB128(d);
2066       break;
2067    default:
2068       ML_(symerr)(di, True, "Unknown FORM in .debug_line");
2069       break;
2070    }
2071 }
2072
2073 /* Returns an xarray* of directory names (indexed by the dwarf dirname
2074    integer).
2075    If 'compdir' is NULL, entry [0] will be set to "."
2076    otherwise entry [0] is set to compdir.
2077    Entry [0] basically means "the current directory of the compilation",
2078    whatever that means, according to the DWARF3 spec.
2079    FIXME??? readdwarf3.c/readdwarf.c have a lot of duplicated code */
2080 static
2081 XArray* read_dirname_xa (DebugInfo* di, UShort version, const HChar *compdir,
2082                          Cursor *c, const CUConst *cc,
2083                          Bool td3 )
2084 {
2085    XArray*        dirname_xa;   /* xarray of HChar* dirname */
2086    const HChar*   dirname;
2087    UInt           compdir_len;
2088
2089    dirname_xa = VG_(newXA) (ML_(dinfo_zalloc), "di.rdxa.1", ML_(dinfo_free),
2090                             sizeof(HChar*) );
2091
2092    if (compdir == NULL) {
2093       dirname = ".";
2094       compdir_len = 1;
2095    } else {
2096       dirname = compdir;
2097       compdir_len = VG_(strlen)(compdir);
2098    }
2099
2100    /* For version 5, the compdir is the first (zero) entry. */
2101    if (version < 5)
2102       VG_(addToXA) (dirname_xa, &dirname);
2103
2104    if (version < 5) {
2105       TRACE_D3("The Directory Table%s\n",
2106                peek_UChar(c) == 0 ? " is empty." : ":" );
2107
2108       while (peek_UChar(c) != 0) {
2109
2110          DiCursor cur = get_AsciiZ(c);
2111          HChar* data_str = ML_(cur_read_strdup)( cur, "dirname_xa.1" );
2112          TRACE_D3("  %s\n", data_str);
2113
2114          /* If data_str[0] is '/', then 'data' is an absolute path and we
2115             don't mess with it.  Otherwise, construct the
2116             path 'compdir' ++ "/" ++ 'data'. */
2117
2118          if (data_str[0] != '/'
2119              /* not an absolute path */
2120              && compdir
2121              /* actually got something sensible for compdir */
2122              && compdir_len)
2123          {
2124             SizeT  len = compdir_len + 1 + VG_(strlen)(data_str);
2125             HChar *buf = ML_(dinfo_zalloc)("dirname_xa.2", len + 1);
2126
2127             VG_(strcpy)(buf, compdir);
2128             VG_(strcat)(buf, "/");
2129             VG_(strcat)(buf, data_str);
2130
2131             dirname = ML_(addStr)(di, buf, len);
2132             VG_(addToXA) (dirname_xa, &dirname);
2133             if (0) VG_(printf)("rel path  %s\n", buf);
2134             ML_(dinfo_free)(buf);
2135          } else {
2136             /* just use 'data'. */
2137             dirname = ML_(addStr)(di,data_str,-1);
2138             VG_(addToXA) (dirname_xa, &dirname);
2139             if (0) VG_(printf)("abs path  %s\n", data_str);
2140          }
2141
2142          ML_(dinfo_free)(data_str);
2143       }
2144    } else {
2145       UChar forms[256];
2146       UChar p_ndx = 0;
2147       UInt directories_count;
2148       UChar directory_entry_format_count;
2149       UInt n;
2150       DiSlice debugstr_img = cc->escn_debug_str;
2151       DiSlice debuglinestr_img = cc->escn_debug_line_str;
2152
2153       directory_entry_format_count = get_UChar(c);
2154       for (n = 0; n < directory_entry_format_count; n++) {
2155          UInt lnct = get_ULEB128(c);
2156          UInt form = get_ULEB128(c);
2157          if (lnct == DW_LNCT_path)
2158             p_ndx = n;
2159          forms[n] = form;
2160       }
2161       directories_count = get_ULEB128(c);
2162       TRACE_D3("The Directory Table%s\n",
2163                directories_count == 0 ? " is empty." : ":" );
2164
2165       for (n = 0; n < directories_count; n++) {
2166          UInt f;
2167          for (f = 0; f < directory_entry_format_count; f++) {
2168             UInt form = forms[f];
2169             if (f == p_ndx) {
2170                HChar *data_str = get_line_str (di, cc->is_dw64, c, form,
2171                                                debugstr_img,
2172                                                debuglinestr_img);
2173                TRACE_D3("  %s\n", data_str);
2174
2175                /* If data_str[0] is '/', then 'data' is an absolute path and we
2176                   don't mess with it.  Otherwise, construct the
2177                   path 'compdir' ++ "/" ++ 'data'. */
2178
2179                if (data_str[0] != '/'
2180                    /* not an absolute path */
2181                    && compdir
2182                    /* actually got something sensible for compdir */
2183                    && compdir_len)
2184                {
2185                   SizeT  len = compdir_len + 1 + VG_(strlen)(data_str);
2186                   HChar *buf = ML_(dinfo_zalloc)("dirname_xa.2", len + 1);
2187
2188                   VG_(strcpy)(buf, compdir);
2189                   VG_(strcat)(buf, "/");
2190                   VG_(strcat)(buf, data_str);
2191
2192                   dirname = ML_(addStr)(di, buf, len);
2193                   VG_(addToXA) (dirname_xa, &dirname);
2194                   if (0) VG_(printf)("rel path  %s\n", buf);
2195                   ML_(dinfo_free)(buf);
2196                } else {
2197                   /* just use 'data'. */
2198                   dirname = ML_(addStr)(di,data_str,-1);
2199                   VG_(addToXA) (dirname_xa, &dirname);
2200                   if (0) VG_(printf)("abs path  %s\n", data_str);
2201                }
2202
2203                ML_(dinfo_free)(data_str);
2204             } else {
2205                skip_line_form (di, cc->is_dw64, c, form);
2206             }
2207          }
2208       }
2209    }
2210
2211    TRACE_D3 ("\n");
2212
2213    if (version < 5 && get_UChar (c) != 0) {
2214       ML_(symerr)(NULL, True,
2215                   "could not get NUL at end of DWARF directory table");
2216       VG_(deleteXA)(dirname_xa);
2217       return NULL;
2218    }
2219
2220    return dirname_xa;
2221 }
2222
2223 static
2224 void read_filename_table( /*MOD*/XArray* /* of UInt* */ fndn_ix_Table,
2225                           const HChar* compdir,
2226                           const CUConst* cc, ULong debug_line_offset,
2227                           Bool td3 )
2228 {
2229    Bool   is_dw64;
2230    Cursor c;
2231    Word   i;
2232    UShort version;
2233    UChar  opcode_base;
2234    const HChar* str;
2235    XArray* dirname_xa;   /* xarray of HChar* dirname */
2236    ULong  dir_xa_ix;     /* Index in dirname_xa, as read from dwarf info. */
2237    const HChar* dirname;
2238    UInt   fndn_ix;
2239
2240    vg_assert(fndn_ix_Table && cc && cc->barf);
2241    if (!ML_(sli_is_valid)(cc->escn_debug_line)
2242        || cc->escn_debug_line.szB <= debug_line_offset) {
2243       cc->barf("read_filename_table: .debug_line is missing?");
2244    }
2245
2246    init_Cursor( &c, cc->escn_debug_line, debug_line_offset, cc->barf,
2247                 "Overrun whilst reading .debug_line section(1)" );
2248
2249    /* unit_length = */
2250    get_Initial_Length( &is_dw64, &c,
2251                        "read_filename_table: invalid initial-length field" );
2252    version = get_UShort( &c );
2253    if (version != 2 && version != 3 && version != 4 && version != 5)
2254      cc->barf("read_filename_table: Only DWARF version 2, 3, 4 and 5 "
2255               "line info is currently supported.");
2256    if (version >= 5) {
2257       /* addrs_size = */ get_UChar( &c );
2258       /* seg_size =   */ get_UChar( &c );
2259    }
2260    /*header_length              = (ULong)*/ get_Dwarfish_UWord( &c, is_dw64 );
2261    /*minimum_instruction_length = */ get_UChar( &c );
2262    if (version >= 4)
2263       /*maximum_operations_per_insn = */ get_UChar( &c );
2264    /*default_is_stmt            = */ get_UChar( &c );
2265    /*line_base                  = (Char)*/ get_UChar( &c );
2266    /*line_range                 = */ get_UChar( &c );
2267    opcode_base                = get_UChar( &c );
2268    /* skip over "standard_opcode_lengths" */
2269    for (i = 1; i < (Word)opcode_base; i++)
2270      (void)get_UChar( &c );
2271
2272    dirname_xa = read_dirname_xa(cc->di, version, compdir, &c, cc, td3);
2273
2274    /* Read and record the file names table */
2275    vg_assert( VG_(sizeXA)( fndn_ix_Table ) == 0 );
2276    if (version < 5) {
2277       /* Add a dummy index-zero entry.  DWARF3 numbers its files
2278          from 1, for some reason. */
2279       fndn_ix = ML_(addFnDn) ( cc->di, "<unknown_file>", NULL );
2280       VG_(addToXA)( fndn_ix_Table, &fndn_ix );
2281       while (peek_UChar(&c) != 0) {
2282          DiCursor cur = get_AsciiZ(&c);
2283          str = ML_(addStrFromCursor)( cc->di, cur );
2284          dir_xa_ix = get_ULEB128( &c );
2285          if (dirname_xa != NULL
2286              && dir_xa_ix >= 0 && dir_xa_ix < VG_(sizeXA) (dirname_xa))
2287             dirname = *(HChar**)VG_(indexXA) ( dirname_xa, dir_xa_ix );
2288          else
2289             dirname = NULL;
2290          fndn_ix = ML_(addFnDn)( cc->di, str, dirname);
2291          TRACE_D3("  read_filename_table: %ld fndn_ix %u %s %s\n",
2292                   VG_(sizeXA)(fndn_ix_Table), fndn_ix,
2293                   dirname, str);
2294          VG_(addToXA)( fndn_ix_Table, &fndn_ix );
2295          (void)get_ULEB128( &c ); /* skip last mod time */
2296          (void)get_ULEB128( &c ); /* file size */
2297       }
2298    } else {
2299       UChar forms[256];
2300       UChar p_ndx = 0, d_ndx = 0;
2301       UInt file_names_count;
2302       UChar file_names_entry_format_count;
2303       UInt n;
2304       DiSlice debugstr_img = cc->escn_debug_str;
2305       DiSlice debuglinestr_img = cc->escn_debug_line_str;
2306       file_names_entry_format_count = get_UChar( &c );
2307       for (n = 0; n < file_names_entry_format_count; n++) {
2308          UInt lnct = get_ULEB128( &c );
2309          UInt form = get_ULEB128( &c );
2310          if (lnct == DW_LNCT_path)
2311             p_ndx = n;
2312          if (lnct == DW_LNCT_directory_index)
2313             d_ndx = n;
2314          forms[n] = form;
2315       }
2316       file_names_count = get_ULEB128( &c );
2317       for (n = 0; n < file_names_count; n++) {
2318          UInt f;
2319          dir_xa_ix  = 0;
2320          str = NULL;
2321          for (f = 0; f < file_names_entry_format_count; f++) {
2322             UInt form = forms[f];
2323             if (f == p_ndx)
2324                str = get_line_str (cc->di, cc->is_dw64, &c, form,
2325                                    debugstr_img, debuglinestr_img);
2326             else if (f == d_ndx)
2327                dir_xa_ix = get_line_ndx (cc->di, &c, form);
2328             else
2329                skip_line_form (cc->di, cc->is_dw64, &c, form);
2330          }
2331
2332          if (dirname_xa != NULL
2333              && dir_xa_ix >= 0 && dir_xa_ix < VG_(sizeXA) (dirname_xa))
2334             dirname = *(HChar**)VG_(indexXA) ( dirname_xa, dir_xa_ix );
2335          else
2336             dirname = NULL;
2337          fndn_ix = ML_(addFnDn)( cc->di, str, dirname);
2338          TRACE_D3("  read_filename_table: %ld fndn_ix %u %s %s\n",
2339                   VG_(sizeXA)(fndn_ix_Table), fndn_ix,
2340                   dirname, str);
2341          VG_(addToXA)( fndn_ix_Table, &fndn_ix );
2342       }
2343    }
2344    /* We're done!  The rest of it is not interesting. */
2345    if (dirname_xa != NULL)
2346       VG_(deleteXA)(dirname_xa);
2347 }
2348
2349 /* setup_cu_svma to be called when a cu is found at level 0,
2350    to establish the cu_svma. */
2351 static void setup_cu_svma(CUConst* cc, Bool have_lo, Addr ip_lo, Bool td3)
2352 {
2353    Addr cu_svma;
2354    /* We have potentially more than one type of parser parsing the
2355       dwarf information. At least currently, each parser establishes
2356       the cu_svma. So, in case cu_svma_known, we check that the same
2357       result is obtained by the 2nd parsing of the cu.
2358
2359       Alternatively, we could reset cu_svma_known after each parsing
2360       and then check that we only see a single DW_TAG_compile_unit DIE
2361       at level 0, DWARF3 only allows exactly one top level DIE per
2362       CU. */
2363
2364    if (have_lo)
2365       cu_svma = ip_lo;
2366    else {
2367       /* Now, it may be that this DIE doesn't tell us the CU's
2368          SVMA, by way of not having a DW_AT_low_pc.  That's OK --
2369          the CU doesn't *have* to have its SVMA specified.
2370
2371          But as per last para D3 spec sec 3.1.1 ("Normal and
2372          Partial Compilation Unit Entries", "If the base address
2373          (viz, the SVMA) is undefined, then any DWARF entry of
2374          structure defined interms of the base address of that
2375          compilation unit is not valid.".  So that means, if whilst
2376          processing the children of this top level DIE (or their
2377          children, etc) we see a DW_AT_range, and cu_svma_known is
2378          False, then the DIE that contains it is (per the spec)
2379          invalid, and we can legitimately stop and complain. */
2380       /* .. whereas The Reality is, simply assume the SVMA is zero
2381          if it isn't specified. */
2382       cu_svma = 0;
2383    }
2384
2385    if (cc->cu_svma_known) {
2386       vg_assert (cu_svma == cc->cu_svma);
2387    } else {
2388       cc->cu_svma_known = True;
2389       cc->cu_svma = cu_svma;
2390       if (0)
2391          TRACE_D3("setup_cu_svma: acquire CU_SVMA of %p\n", (void*) cc->cu_svma);
2392    }
2393 }
2394
2395 static void trace_DIE(
2396    DW_TAG dtag,
2397    UWord posn,
2398    Int level,
2399    UWord saved_die_c_offset,
2400    const g_abbv *abbv,
2401    const CUConst* cc)
2402 {
2403    Cursor c;
2404    FormContents cts;
2405    UWord sibling = 0;
2406    UInt nf_i;
2407    Bool  debug_types_flag;
2408    Bool  alt_flag;
2409    Cursor check_skip;
2410    UWord check_sibling = 0;
2411
2412    posn = uncook_die( cc, posn, &debug_types_flag, &alt_flag );
2413    init_Cursor (&c,
2414                 debug_types_flag ? cc->escn_debug_types :
2415                 alt_flag ? cc->escn_debug_info_alt : cc->escn_debug_info,
2416                 saved_die_c_offset, cc->barf,
2417                 "Overrun trace_DIE");
2418    check_skip = c;
2419    VG_(printf)(" <%d><%lx>: Abbrev Number: %llu (%s)%s%s\n",
2420                level, posn, (ULong) abbv->abbv_code, ML_(pp_DW_TAG)( dtag ),
2421                debug_types_flag ? " (in .debug_types)" : "",
2422                alt_flag ? " (in alternate .debug_info)" : "");
2423    nf_i = 0;
2424    while (True) {
2425       DW_AT   attr = (DW_AT)  abbv->nf[nf_i].at_name;
2426       DW_FORM form = (DW_FORM)abbv->nf[nf_i].at_form;
2427       const name_form *nf = &abbv->nf[nf_i];
2428       nf_i++;
2429       if (attr == 0 && form == 0) break;
2430       VG_(printf)("     %-18s: ", ML_(pp_DW_AT)(attr));
2431       /* Get the form contents, so as to print them */
2432       get_Form_contents( &cts, cc, &c, True, nf );
2433       if (attr == DW_AT_sibling && cts.szB > 0) {
2434          sibling = cts.u.val;
2435       }
2436       VG_(printf)("\t\n");
2437    }
2438
2439    /* Verify that skipping a DIE gives the same displacement as
2440       tracing (i.e. reading) a DIE. If there is an inconsistency in
2441       the nr of bytes read by get_Form_contents and get_Form_szB, this
2442       should be detected by the below. Using --trace-symtab=yes
2443       --read-var-info=yes will ensure all DIEs are systematically
2444       verified. */
2445    skip_DIE (&check_sibling, &check_skip, abbv, cc);
2446    vg_assert (check_sibling == sibling);
2447    vg_assert (get_position_of_Cursor (&check_skip)
2448               == get_position_of_Cursor (&c));
2449 }
2450
2451 __attribute__((noreturn))
2452 static void dump_bad_die_and_barf(
2453    const HChar *whichparser,
2454    DW_TAG dtag,
2455    UWord posn,
2456    Int level,
2457    Cursor* c_die,
2458    UWord saved_die_c_offset,
2459    const g_abbv *abbv,
2460    const CUConst* cc)
2461 {
2462    trace_DIE (dtag, posn, level, saved_die_c_offset, abbv, cc);
2463    VG_(printf)("%s:\n", whichparser);
2464    cc->barf("confused by the above DIE");
2465 }
2466
2467 __attribute__((noinline))
2468 static void bad_DIE_confusion(int linenr)
2469 {
2470    VG_(printf)("\nparse DIE(readdwarf3.c:%d): confused by:\n", linenr);
2471 }
2472 #define goto_bad_DIE do {bad_DIE_confusion(__LINE__); goto bad_DIE;} while (0)
2473
2474 __attribute__((noinline))
2475 static void parse_var_DIE (
2476    /*MOD*/WordFM* /* of (XArray* of AddrRange, void) */ rangestree,
2477    /*MOD*/XArray* /* of TempVar* */ tempvars,
2478    /*MOD*/XArray* /* of GExpr* */ gexprs,
2479    /*MOD*/D3VarParser* parser,
2480    DW_TAG dtag,
2481    UWord posn,
2482    Int level,
2483    Cursor* c_die,
2484    const g_abbv *abbv,
2485    CUConst* cc,
2486    Bool td3
2487 )
2488 {
2489    FormContents cts;
2490    UInt nf_i;
2491
2492    UWord saved_die_c_offset  = get_position_of_Cursor( c_die );
2493
2494    varstack_preen( parser, td3, level-1 );
2495
2496    if (dtag == DW_TAG_compile_unit
2497        || dtag == DW_TAG_type_unit
2498        || dtag == DW_TAG_partial_unit) {
2499       Bool have_lo    = False;
2500       Bool have_hi1   = False;
2501       Bool hiIsRelative = False;
2502       Bool have_range = False;
2503       Addr ip_lo    = 0;
2504       Addr ip_hi1   = 0;
2505       Addr rangeoff = 0;
2506       const HChar *compdir = NULL;
2507       nf_i = 0;
2508       while (True) {
2509          DW_AT   attr = (DW_AT)  abbv->nf[nf_i].at_name;
2510          DW_FORM form = (DW_FORM)abbv->nf[nf_i].at_form;
2511          const name_form *nf = &abbv->nf[nf_i];
2512          nf_i++;
2513          if (attr == 0 && form == 0) break;
2514          get_Form_contents( &cts, cc, c_die, False/*td3*/, nf );
2515          if (attr == DW_AT_low_pc && cts.szB > 0) {
2516             ip_lo   = cts.u.val;
2517             have_lo = True;
2518          }
2519          if (attr == DW_AT_high_pc && cts.szB > 0) {
2520             ip_hi1   = cts.u.val;
2521             have_hi1 = True;
2522             if (form != DW_FORM_addr)
2523                hiIsRelative = True;
2524          }
2525          if (attr == DW_AT_ranges && cts.szB > 0) {
2526             rangeoff   = cts.u.val;
2527             have_range = True;
2528          }
2529          if (attr == DW_AT_comp_dir) {
2530             if (cts.szB >= 0)
2531                cc->barf("parse_var_DIE compdir: expecting indirect string");
2532             HChar *str = ML_(cur_read_strdup)( cts.u.cur,
2533                                                "parse_var_DIE.compdir" );
2534             compdir = ML_(addStr)(cc->di, str, -1);
2535             ML_(dinfo_free) (str);
2536          }
2537          if (attr == DW_AT_stmt_list && cts.szB > 0) {
2538             read_filename_table( parser->fndn_ix_Table, compdir,
2539                                  cc, cts.u.val, td3 );
2540          }
2541       }
2542       if (have_lo && have_hi1 && hiIsRelative)
2543          ip_hi1 += ip_lo;
2544
2545       /* Now, does this give us an opportunity to find this
2546          CU's svma? */
2547       if (level == 0)
2548          setup_cu_svma(cc, have_lo, ip_lo, td3);
2549
2550       /* Do we have something that looks sane? */
2551       if (have_lo && have_hi1 && (!have_range)) {
2552          if (ip_lo < ip_hi1)
2553             varstack_push( cc, parser, td3,
2554                            unitary_range_list(ip_lo, ip_hi1 - 1),
2555                            level,
2556                            False/*isFunc*/, NULL/*fbGX*/ );
2557          else if (ip_lo == 0 && ip_hi1 == 0)
2558             /* CU has no code, presumably?
2559                Such situations have been encountered for code
2560                compiled with -ffunction-sections -fdata-sections
2561                and linked with --gc-sections. Completely
2562                eliminated CU gives such 0 lo/hi pc. Similarly
2563                to a CU which has no lo/hi/range pc, we push
2564                an empty range list. */
2565             varstack_push( cc, parser, td3,
2566                            empty_range_list(),
2567                            level,
2568                            False/*isFunc*/, NULL/*fbGX*/ );
2569       } else
2570       if ((!have_lo) && (!have_hi1) && have_range) {
2571          varstack_push( cc, parser, td3,
2572                         get_range_list( cc, td3,
2573                                         rangeoff, cc->cu_svma ),
2574                         level,
2575                         False/*isFunc*/, NULL/*fbGX*/ );
2576       } else
2577       if ((!have_lo) && (!have_hi1) && (!have_range)) {
2578          /* CU has no code, presumably? */
2579          varstack_push( cc, parser, td3,
2580                         empty_range_list(),
2581                         level,
2582                         False/*isFunc*/, NULL/*fbGX*/ );
2583       } else
2584       if (have_lo && (!have_hi1) && have_range && ip_lo == 0) {
2585          /* broken DIE created by gcc-4.3.X ?  Ignore the
2586             apparently-redundant DW_AT_low_pc and use the DW_AT_ranges
2587             instead. */
2588          varstack_push( cc, parser, td3,
2589                         get_range_list( cc, td3,
2590                                         rangeoff, cc->cu_svma ),
2591                         level,
2592                         False/*isFunc*/, NULL/*fbGX*/ );
2593       } else {
2594          if (0) VG_(printf)("I got hlo %d hhi1 %d hrange %d\n",
2595                             (Int)have_lo, (Int)have_hi1, (Int)have_range);
2596          goto_bad_DIE;
2597       }
2598    }
2599
2600    if (dtag == DW_TAG_lexical_block || dtag == DW_TAG_subprogram) {
2601       Bool   have_lo    = False;
2602       Bool   have_hi1   = False;
2603       Bool   have_range = False;
2604       Bool   hiIsRelative = False;
2605       Addr   ip_lo      = 0;
2606       Addr   ip_hi1     = 0;
2607       Addr   rangeoff   = 0;
2608       Bool   isFunc     = dtag == DW_TAG_subprogram;
2609       GExpr* fbGX       = NULL;
2610       nf_i = 0;
2611       while (True) {
2612          DW_AT   attr = (DW_AT)  abbv->nf[nf_i].at_name;
2613          DW_FORM form = (DW_FORM)abbv->nf[nf_i].at_form;
2614          const name_form *nf = &abbv->nf[nf_i];
2615          nf_i++;
2616          if (attr == 0 && form == 0) break;
2617          get_Form_contents( &cts, cc, c_die, False/*td3*/, nf );
2618          if (attr == DW_AT_low_pc && cts.szB > 0) {
2619             ip_lo   = cts.u.val;
2620             have_lo = True;
2621          }
2622          if (attr == DW_AT_high_pc && cts.szB > 0) {
2623             ip_hi1   = cts.u.val;
2624             have_hi1 = True;
2625             if (form != DW_FORM_addr)
2626                hiIsRelative = True;
2627          }
2628          if (attr == DW_AT_ranges && cts.szB > 0) {
2629             rangeoff   = cts.u.val;
2630             have_range = True;
2631          }
2632          if (isFunc
2633              && attr == DW_AT_frame_base
2634              && cts.szB != 0 /* either scalar or nonempty block */) {
2635             fbGX = get_GX( cc, False/*td3*/, &cts );
2636             vg_assert(fbGX);
2637             VG_(addToXA)(gexprs, &fbGX);
2638          }
2639       }
2640       if (have_lo && have_hi1 && hiIsRelative)
2641          ip_hi1 += ip_lo;
2642       /* Do we have something that looks sane? */
2643       if (dtag == DW_TAG_subprogram
2644           && (!have_lo) && (!have_hi1) && (!have_range)) {
2645          /* This is legit - ignore it. Sec 3.3.3: "A subroutine entry
2646             representing a subroutine declaration that is not also a
2647             definition does not have code address or range
2648             attributes." */
2649       } else
2650       if (dtag == DW_TAG_lexical_block
2651           && (!have_lo) && (!have_hi1) && (!have_range)) {
2652          /* I believe this is legit, and means the lexical block
2653             contains no insns (whatever that might mean).  Ignore. */
2654       } else
2655       if (have_lo && have_hi1 && (!have_range)) {
2656          /* This scope supplies just a single address range. */
2657          if (ip_lo < ip_hi1)
2658             varstack_push( cc, parser, td3,
2659                            unitary_range_list(ip_lo, ip_hi1 - 1),
2660                            level, isFunc, fbGX );
2661       } else
2662       if ((!have_lo) && (!have_hi1) && have_range) {
2663          /* This scope supplies multiple address ranges via the use of
2664             a range list. */
2665          varstack_push( cc, parser, td3,
2666                         get_range_list( cc, td3,
2667                                         rangeoff, cc->cu_svma ),
2668                         level, isFunc, fbGX );
2669       } else
2670       if (have_lo && (!have_hi1) && (!have_range)) {
2671          /* This scope is bogus.  The D3 spec sec 3.4 (Lexical Block
2672             Entries) says fairly clearly that a scope must have either
2673             _range or (_low_pc and _high_pc). */
2674          /* The spec is a bit ambiguous though.  Perhaps a single byte
2675             range is intended?  See sec 2.17 (Code Addresses And Ranges) */
2676          /* This case is here because icc9 produced this:
2677          <2><13bd>: DW_TAG_lexical_block
2678             DW_AT_decl_line   : 5229
2679             DW_AT_decl_column : 37
2680             DW_AT_decl_file   : 1
2681             DW_AT_low_pc      : 0x401b03
2682          */
2683          /* Ignore (seems safe than pushing a single byte range) */
2684       } else
2685          goto_bad_DIE;
2686    }
2687
2688    if (dtag == DW_TAG_variable || dtag == DW_TAG_formal_parameter) {
2689       const  HChar* name = NULL;
2690       UWord  typeR       = D3_INVALID_CUOFF;
2691       Bool   global      = False;
2692       GExpr* gexpr       = NULL;
2693       Int    n_attrs     = 0;
2694       UWord  abs_ori     = (UWord)D3_INVALID_CUOFF;
2695       Int    lineNo      = 0;
2696       UInt   fndn_ix     = 0;
2697       nf_i = 0;
2698       while (True) {
2699          DW_AT   attr = (DW_AT)  abbv->nf[nf_i].at_name;
2700          DW_FORM form = (DW_FORM)abbv->nf[nf_i].at_form;
2701          const name_form *nf = &abbv->nf[nf_i];
2702          nf_i++;
2703          if (attr == 0 && form == 0) break;
2704          get_Form_contents( &cts, cc, c_die, False/*td3*/, nf );
2705          n_attrs++;
2706          if (attr == DW_AT_name && cts.szB < 0) {
2707             name = ML_(addStrFromCursor)( cc->di, cts.u.cur );
2708          }
2709          if (attr == DW_AT_location
2710              && cts.szB != 0 /* either scalar or nonempty block */) {
2711             gexpr = get_GX( cc, False/*td3*/, &cts );
2712             vg_assert(gexpr);
2713             VG_(addToXA)(gexprs, &gexpr);
2714          }
2715          if (attr == DW_AT_type && cts.szB > 0) {
2716             typeR = cook_die_using_form( cc, cts.u.val, form );
2717          }
2718          if (attr == DW_AT_external && cts.szB > 0 && cts.u.val > 0) {
2719             global = True;
2720          }
2721          if (attr == DW_AT_abstract_origin && cts.szB > 0) {
2722             abs_ori = (UWord)cts.u.val;
2723          }
2724          if (attr == DW_AT_declaration && cts.szB > 0 && cts.u.val > 0) {
2725             /*declaration = True;*/
2726          }
2727          if (attr == DW_AT_decl_line && cts.szB > 0) {
2728             lineNo = (Int)cts.u.val;
2729          }
2730          if (attr == DW_AT_decl_file && cts.szB > 0) {
2731             Int ftabIx = (Int)cts.u.val;
2732             if (ftabIx >= 1
2733                 && ftabIx < VG_(sizeXA)( parser->fndn_ix_Table )) {
2734                fndn_ix = *(UInt*)VG_(indexXA)( parser->fndn_ix_Table, ftabIx );
2735             }
2736             if (0) VG_(printf)("XXX filename fndn_ix = %u %s\n", fndn_ix,
2737                                ML_(fndn_ix2filename) (cc->di, fndn_ix));
2738          }
2739       }
2740       if (!global && dtag == DW_TAG_variable && level == 1) {
2741          /* Case of a static variable. It is better to declare
2742             it global as the variable is not really related to
2743             a PC range, as its address can be used by program
2744             counters outside of the ranges where it is visible . */
2745          global = True;
2746       }
2747
2748       /* We'll collect it under if one of the following three
2749          conditions holds:
2750          (1) has location and type    -> completed
2751          (2) has type only            -> is an abstract instance
2752          (3) has location and abs_ori -> is a concrete instance
2753          Name, fndn_ix and line number are all optional frills.
2754       */
2755       if ( /* 1 */ (gexpr && typeR != D3_INVALID_CUOFF)
2756            /* 2 */ || (typeR != D3_INVALID_CUOFF)
2757            /* 3 */ || (gexpr && abs_ori != (UWord)D3_INVALID_CUOFF) ) {
2758
2759          /* Add this variable to the list of interesting looking
2760             variables.  Crucially, note along with it the address
2761             range(s) associated with the variable, which for locals
2762             will be the address ranges at the top of the varparser's
2763             stack. */
2764          GExpr*   fbGX = NULL;
2765          Word     i, nRanges;
2766          const XArray*  /* of AddrRange */ xa;
2767          TempVar* tv;
2768          /* Stack can't be empty; we put a dummy entry on it for the
2769             entire address range before starting with the DIEs for
2770             this CU. */
2771          vg_assert(parser->sp >= 0);
2772
2773          /* If this is a local variable (non-global), try to find
2774             the GExpr for the DW_AT_frame_base of the containing
2775             function.  It should have been pushed on the stack at the
2776             time we encountered its DW_TAG_subprogram DIE, so the way
2777             to find it is to scan back down the stack looking for it.
2778             If there isn't an enclosing stack entry marked 'isFunc'
2779             then we must be seeing variable or formal param DIEs
2780             outside of a function, so we deem the Dwarf to be
2781             malformed if that happens.  Note that the fbGX may be NULL
2782             if the containing DT_TAG_subprogram didn't supply a
2783             DW_AT_frame_base -- that's OK, but there must actually be
2784             a containing DW_TAG_subprogram. */
2785          if (!global) {
2786             Bool found = False;
2787             for (i = parser->sp; i >= 0; i--) {
2788                if (parser->isFunc[i]) {
2789                   fbGX = parser->fbGX[i];
2790                   found = True;
2791                   break;
2792                }
2793             }
2794             if (!found) {
2795                if (0 && VG_(clo_verbosity) >= 0) {
2796                   VG_(message)(Vg_DebugMsg,
2797                      "warning: parse_var_DIE: non-global variable "
2798                      "outside DW_TAG_subprogram\n");
2799                }
2800                /* goto_bad_DIE; */
2801                /* This seems to happen a lot.  Just ignore it -- if,
2802                   when we come to evaluation of the location (guarded)
2803                   expression, it requires a frame base value, and
2804                   there's no expression for that, then evaluation as a
2805                   whole will fail.  Harmless - a bit of a waste of
2806                   cycles but nothing more. */
2807             }
2808          }
2809
2810          /* re "global ? 0 : parser->sp" (twice), if the var is
2811             marked 'global' then we must put it at the global scope,
2812             as only the global scope (level 0) covers the entire PC
2813             address space.  It is asserted elsewhere that level 0
2814             always covers the entire address space. */
2815          xa = parser->ranges[global ? 0 : parser->sp];
2816          nRanges = VG_(sizeXA)(xa);
2817          vg_assert(nRanges >= 0);
2818
2819          tv = ML_(dinfo_zalloc)( "di.readdwarf3.pvD.1", sizeof(TempVar) );
2820          tv->name   = name;
2821          tv->level  = global ? 0 : parser->sp;
2822          tv->typeR  = typeR;
2823          tv->gexpr  = gexpr;
2824          tv->fbGX   = fbGX;
2825          tv->fndn_ix= fndn_ix;
2826          tv->fLine  = lineNo;
2827          tv->dioff  = posn;
2828          tv->absOri = abs_ori;
2829
2830          /* See explanation on definition of type TempVar for the
2831             reason for this elaboration. */
2832          tv->nRanges = nRanges;
2833          tv->rngOneMin = 0;
2834          tv->rngOneMax = 0;
2835          tv->rngMany = NULL;
2836          if (nRanges == 1) {
2837             AddrRange* range = VG_(indexXA)(xa, 0);
2838             tv->rngOneMin = range->aMin;
2839             tv->rngOneMax = range->aMax;
2840          }
2841          else if (nRanges > 1) {
2842             /* See if we already have a range list which is
2843                structurally identical.  If so, use that; if not, clone
2844                this one, and add it to our collection. */
2845             UWord keyW, valW;
2846             if (VG_(lookupFM)( rangestree, &keyW, &valW, (UWord)xa )) {
2847                XArray* old = (XArray*)keyW;
2848                vg_assert(valW == 0);
2849                vg_assert(old != xa);
2850                tv->rngMany = old;
2851             } else {
2852                XArray* cloned = VG_(cloneXA)( "di.readdwarf3.pvD.2", xa );
2853                tv->rngMany = cloned;
2854                VG_(addToFM)( rangestree, (UWord)cloned, 0 );
2855             }
2856          }
2857
2858          VG_(addToXA)( tempvars, &tv );
2859
2860          TRACE_D3("  Recording this variable, with %ld PC range(s)\n",
2861                   VG_(sizeXA)(xa) );
2862          /* collect stats on how effective the ->ranges special
2863             casing is */
2864          if (0) {
2865             static Int ntot=0, ngt=0;
2866             ntot++;
2867             if (tv->rngMany) ngt++;
2868             if (0 == (ntot % 100000))
2869                VG_(printf)("XXXX %d tot, %d cloned\n", ntot, ngt);
2870          }
2871
2872       }
2873
2874       /* Here are some other weird cases seen in the wild:
2875
2876             We have a variable with a name and a type, but no
2877             location.  I guess that's a sign that it has been
2878             optimised away.  Ignore it.  Here's an example:
2879
2880             static Int lc_compar(void* n1, void* n2) {
2881                MC_Chunk* mc1 = *(MC_Chunk**)n1;
2882                MC_Chunk* mc2 = *(MC_Chunk**)n2;
2883                return (mc1->data < mc2->data ? -1 : 1);
2884             }
2885
2886             Both mc1 and mc2 are like this
2887             <2><5bc>: Abbrev Number: 21 (DW_TAG_variable)
2888                 DW_AT_name        : mc1
2889                 DW_AT_decl_file   : 1
2890                 DW_AT_decl_line   : 216
2891                 DW_AT_type        : <5d3>
2892
2893             whereas n1 and n2 do have locations specified.
2894
2895             ---------------------------------------------
2896
2897             We see a DW_TAG_formal_parameter with a type, but
2898             no name and no location.  It's probably part of a function type
2899             construction, thusly, hence ignore it:
2900          <1><2b4>: Abbrev Number: 12 (DW_TAG_subroutine_type)
2901              DW_AT_sibling     : <2c9>
2902              DW_AT_prototyped  : 1
2903              DW_AT_type        : <114>
2904          <2><2be>: Abbrev Number: 13 (DW_TAG_formal_parameter)
2905              DW_AT_type        : <13e>
2906          <2><2c3>: Abbrev Number: 13 (DW_TAG_formal_parameter)
2907              DW_AT_type        : <133>
2908
2909             ---------------------------------------------
2910
2911             Is very minimal, like this:
2912             <4><81d>: Abbrev Number: 44 (DW_TAG_variable)
2913                 DW_AT_abstract_origin: <7ba>
2914             What that signifies I have no idea.  Ignore.
2915
2916             ----------------------------------------------
2917
2918             Is very minimal, like this:
2919             <200f>: DW_TAG_formal_parameter
2920                 DW_AT_abstract_ori: <1f4c>
2921                 DW_AT_location    : 13440
2922             What that signifies I have no idea.  Ignore.
2923             It might be significant, though: the variable at least
2924             has a location and so might exist somewhere.
2925             Maybe we should handle this.
2926
2927             ---------------------------------------------
2928
2929             <22407>: DW_TAG_variable
2930               DW_AT_name        : (indirect string, offset: 0x6579):
2931                                   vgPlain_trampoline_stuff_start
2932               DW_AT_decl_file   : 29
2933               DW_AT_decl_line   : 56
2934               DW_AT_external    : 1
2935               DW_AT_declaration : 1
2936
2937             Nameless and typeless variable that has a location?  Who
2938             knows.  Not me.
2939             <2><3d178>: Abbrev Number: 22 (DW_TAG_variable)
2940                  DW_AT_location    : 9 byte block: 3 c0 c7 13 38 0 0 0 0
2941                                      (DW_OP_addr: 3813c7c0)
2942
2943             No, really.  Check it out.  gcc is quite simply borked.
2944             <3><168cc>: Abbrev Number: 141 (DW_TAG_variable)
2945             // followed by no attributes, and the next DIE is a sibling,
2946             // not a child
2947             */
2948    }
2949    return;
2950
2951   bad_DIE:
2952    dump_bad_die_and_barf("parse_var_DIE", dtag, posn, level,
2953                          c_die, saved_die_c_offset,
2954                          abbv,
2955                          cc);
2956    /*NOTREACHED*/
2957 }
2958
2959 typedef
2960    struct {
2961       /* The fndn_ix file name/dirname table.  Is a mapping from dwarf
2962          integer index to the index in di->fndnpool. */
2963       XArray* /* of UInt* */ fndn_ix_Table;
2964       UWord sibling; // sibling of the last read DIE (if it has a sibling).
2965    }
2966    D3InlParser;
2967
2968 /* Return the function name corresponding to absori.
2969
2970    absori is a 'cooked' reference to a DIE, i.e. absori can be either
2971    in cc->escn_debug_info or in cc->escn_debug_info_alt.
2972    get_inlFnName will uncook absori.
2973
2974    The returned value is a (permanent) string in DebugInfo's .strchunks.
2975
2976    LIMITATION: absori must point in the CU of cc. If absori points
2977    in another CU, returns "UnknownInlinedFun".
2978
2979    Here are the problems to retrieve the fun name if absori is in
2980    another CU:  the DIE reading code cannot properly extract data from
2981    another CU, as the abbv code retrieved in the other CU cannot be
2982    translated in an abbreviation. Reading data from the alternate debug
2983    info also gives problems as the string reference is also in the alternate
2984    file, but when reading the alt DIE, the string form is a 'local' string,
2985    but cannot be read in the current CU, but must be read in the alt CU.
2986    See bug 338803 comment#3 and attachment for a failed attempt to handle
2987    these problems (failed because with the patch, only one alt abbrev hash
2988    table is kept, while we must handle all abbreviations in all CUs
2989    referenced by an absori (being a reference to an alt CU, or a previous
2990    or following CU). */
2991 static const HChar* get_inlFnName (Int absori, const CUConst* cc, Bool td3)
2992 {
2993    Cursor c;
2994    const g_abbv *abbv;
2995    ULong  atag, abbv_code;
2996    UInt   has_children;
2997    UWord  posn;
2998    Bool type_flag, alt_flag;
2999    const HChar *ret = NULL;
3000    FormContents cts;
3001    UInt nf_i;
3002
3003    /* Some inlined subroutine call dwarf entries do not have the abstract
3004       origin attribute, resulting in absori being 0 (see callers of
3005       get_inlFnName). This is observed at least with gcc 6.3.0 when compiling
3006       valgrind with lto. So, in case we have a 0 absori, do not report an
3007       error, instead, rather return an unknown inlined function. */
3008    if (absori == 0) {
3009       static Bool absori0_reported = False;
3010       if (!absori0_reported && VG_(clo_verbosity) > 1) {
3011          VG_(message)(Vg_DebugMsg,
3012                       "Warning: inlined fn name without absori\n"
3013                       "is shown as UnknownInlinedFun\n");
3014          absori0_reported = True;
3015       }
3016       TRACE_D3(" <get_inlFnName>: absori is not set");
3017       return ML_(addStr)(cc->di, "UnknownInlinedFun", -1);
3018    }
3019
3020    posn = uncook_die( cc, absori, &type_flag, &alt_flag);
3021    if (type_flag)
3022       cc->barf("get_inlFnName: uncooked absori in type debug info");
3023
3024    /* LIMITATION: check we are in the same CU.
3025       If not, return unknown inlined function name. */
3026    /* if crossing between alt debug info<>normal info
3027           or posn not in the cu range,
3028       then it is in another CU. */
3029    if (alt_flag != cc->is_alt_info
3030        || posn < cc->cu_start_offset
3031        || posn >= cc->cu_start_offset + cc->unit_length) {
3032       static Bool reported = False;
3033       if (!reported && VG_(clo_verbosity) > 1) {
3034          VG_(message)(Vg_DebugMsg,
3035                       "Warning: cross-CU LIMITATION: some inlined fn names\n"
3036                       "might be shown as UnknownInlinedFun\n");
3037          reported = True;
3038       }
3039       TRACE_D3(" <get_inlFnName><%lx>: cross-CU LIMITATION", posn);
3040       return ML_(addStr)(cc->di, "UnknownInlinedFun", -1);
3041    }
3042
3043    init_Cursor (&c, cc->escn_debug_info, posn, cc->barf,
3044                 "Overrun get_inlFnName absori");
3045
3046    abbv_code = get_ULEB128( &c );
3047    abbv      = get_abbv ( cc, abbv_code);
3048    atag      = abbv->atag;
3049    TRACE_D3(" <get_inlFnName><%lx>: Abbrev Number: %llu (%s)\n",
3050             posn, abbv_code, ML_(pp_DW_TAG)( atag ) );
3051
3052    if (atag == 0)
3053       cc->barf("get_inlFnName: invalid zero tag on DIE");
3054
3055    has_children = abbv->has_children;
3056    if (has_children != DW_children_no && has_children != DW_children_yes)
3057       cc->barf("get_inlFnName: invalid has_children value");
3058
3059    if (atag != DW_TAG_subprogram)
3060       cc->barf("get_inlFnName: absori not a subprogram");
3061
3062    nf_i = 0;
3063    while (True) {
3064       DW_AT   attr = (DW_AT)  abbv->nf[nf_i].at_name;
3065       DW_FORM form = (DW_FORM)abbv->nf[nf_i].at_form;
3066       const name_form *nf = &abbv->nf[nf_i];
3067       nf_i++;
3068       if (attr == 0 && form == 0) break;
3069       get_Form_contents( &cts, cc, &c, False/*td3*/, nf );
3070       if (attr == DW_AT_name) {
3071          HChar *fnname;
3072          if (cts.szB >= 0)
3073             cc->barf("get_inlFnName: expecting indirect string");
3074          fnname = ML_(cur_read_strdup)( cts.u.cur,
3075                                         "get_inlFnName.1" );
3076          ret = ML_(addStr)(cc->di, fnname, -1);
3077          ML_(dinfo_free) (fnname);
3078          break; /* Name found, get out of the loop, as this has priority over
3079                  DW_AT_specification. */
3080       }
3081       if (attr == DW_AT_specification) {
3082          UWord cdie;
3083
3084          if (cts.szB == 0)
3085             cc->barf("get_inlFnName: AT specification missing");
3086
3087          /* The recursive call to get_inlFnName will uncook its arg.
3088             So, we need to cook it here, so as to reference the
3089             correct section (e.g. the alt info). */
3090          cdie = cook_die_using_form(cc, (UWord)cts.u.val, form);
3091
3092          /* hoping that there is no loop */
3093          ret = get_inlFnName (cdie, cc, td3);
3094          /* Unclear if having both DW_AT_specification and DW_AT_name is
3095             possible but in any case, we do not break here.
3096             If we find later on a DW_AT_name, it will override the name found
3097             in the DW_AT_specification.*/
3098       }
3099    }
3100
3101    if (ret)
3102       return ret;
3103    else {
3104       TRACE_D3("AbsOriFnNameNotFound");
3105       return ML_(addStr)(cc->di, "AbsOriFnNameNotFound", -1);
3106    }
3107 }
3108
3109 /* Returns True if the (possibly) childrens of the current DIE are interesting
3110    to parse. Returns False otherwise.
3111    If the current DIE has a sibling, the non interesting children can
3112    maybe be skipped (if the DIE has a DW_AT_sibling).  */
3113 __attribute__((noinline))
3114 static Bool parse_inl_DIE (
3115    /*MOD*/D3InlParser* parser,
3116    DW_TAG dtag,
3117    UWord posn,
3118    Int level,
3119    Cursor* c_die,
3120    const g_abbv *abbv,
3121    CUConst* cc,
3122    Bool td3
3123 )
3124 {
3125    FormContents cts;
3126    UInt nf_i;
3127
3128    UWord saved_die_c_offset  = get_position_of_Cursor( c_die );
3129
3130    /* Get info about DW_TAG_compile_unit and DW_TAG_partial_unit 'which
3131       in theory could also contain inlined fn calls).  */
3132    if (dtag == DW_TAG_compile_unit || dtag == DW_TAG_partial_unit) {
3133       Bool have_lo    = False;
3134       Addr ip_lo    = 0;
3135       const HChar *compdir = NULL;
3136
3137       nf_i = 0;
3138       while (True) {
3139          DW_AT   attr = (DW_AT)  abbv->nf[nf_i].at_name;
3140          DW_FORM form = (DW_FORM)abbv->nf[nf_i].at_form;
3141          const name_form *nf = &abbv->nf[nf_i];
3142          nf_i++;
3143          if (attr == 0 && form == 0) break;
3144          get_Form_contents( &cts, cc, c_die, False/*td3*/, nf );
3145          if (attr == DW_AT_low_pc && cts.szB > 0) {
3146             ip_lo   = cts.u.val;
3147             have_lo = True;
3148          }
3149          if (attr == DW_AT_comp_dir) {
3150             if (cts.szB >= 0)
3151                cc->barf("parse_inl_DIE compdir: expecting indirect string");
3152             HChar *str = ML_(cur_read_strdup)( cts.u.cur,
3153                                                "parse_inl_DIE.compdir" );
3154             compdir = ML_(addStr)(cc->di, str, -1);
3155             ML_(dinfo_free) (str);
3156          }
3157          if (attr == DW_AT_stmt_list && cts.szB > 0) {
3158             read_filename_table( parser->fndn_ix_Table, compdir,
3159                                  cc, cts.u.val, td3 );
3160          }
3161          if (attr == DW_AT_sibling && cts.szB > 0) {
3162             parser->sibling = cts.u.val;
3163          }
3164       }
3165       if (level == 0)
3166          setup_cu_svma (cc, have_lo, ip_lo, td3);
3167    }
3168
3169    if (dtag == DW_TAG_inlined_subroutine) {
3170       Bool   have_lo    = False;
3171       Bool   have_hi1   = False;
3172       Bool   have_range = False;
3173       Bool   hiIsRelative = False;
3174       Addr   ip_lo      = 0;
3175       Addr   ip_hi1     = 0;
3176       Addr   rangeoff   = 0;
3177       UInt   caller_fndn_ix = 0;
3178       Int caller_lineno = 0;
3179       Int inlinedfn_abstract_origin = 0;
3180       // 0 will be interpreted as no abstract origin by get_inlFnName
3181
3182       nf_i = 0;
3183       while (True) {
3184          DW_AT   attr = (DW_AT)  abbv->nf[nf_i].at_name;
3185          DW_FORM form = (DW_FORM)abbv->nf[nf_i].at_form;
3186          const name_form *nf = &abbv->nf[nf_i];
3187          nf_i++;
3188          if (attr == 0 && form == 0) break;
3189          get_Form_contents( &cts, cc, c_die, False/*td3*/, nf );
3190          if (attr == DW_AT_call_file && cts.szB > 0) {
3191             Int ftabIx = (Int)cts.u.val;
3192             if (ftabIx >= 1
3193                 && ftabIx < VG_(sizeXA)( parser->fndn_ix_Table )) {
3194                caller_fndn_ix = *(UInt*)
3195                           VG_(indexXA)( parser->fndn_ix_Table, ftabIx );
3196             }
3197             if (0) VG_(printf)("XXX caller_fndn_ix = %u %s\n", caller_fndn_ix,
3198                                ML_(fndn_ix2filename) (cc->di, caller_fndn_ix));
3199          }
3200          if (attr == DW_AT_call_line && cts.szB > 0) {
3201             caller_lineno = cts.u.val;
3202          }
3203
3204          if (attr == DW_AT_abstract_origin  && cts.szB > 0) {
3205             inlinedfn_abstract_origin
3206                = cook_die_using_form (cc, (UWord)cts.u.val, form);
3207          }
3208
3209          if (attr == DW_AT_low_pc && cts.szB > 0) {
3210             ip_lo   = cts.u.val;
3211             have_lo = True;
3212          }
3213          if (attr == DW_AT_high_pc && cts.szB > 0) {
3214             ip_hi1   = cts.u.val;
3215             have_hi1 = True;
3216             if (form != DW_FORM_addr)
3217                hiIsRelative = True;
3218          }
3219          if (attr == DW_AT_ranges && cts.szB > 0) {
3220             rangeoff   = cts.u.val;
3221             have_range = True;
3222          }
3223          if (attr == DW_AT_sibling && cts.szB > 0) {
3224             parser->sibling = cts.u.val;
3225          }
3226       }
3227       if (have_lo && have_hi1 && hiIsRelative)
3228          ip_hi1 += ip_lo;
3229       /* Do we have something that looks sane? */
3230       if (dtag == DW_TAG_inlined_subroutine
3231           && (!have_lo) && (!have_hi1) && (!have_range)) {
3232          /* Seems strange. How can an inlined subroutine have
3233             no code ? */
3234          goto_bad_DIE;
3235       } else
3236       if (have_lo && have_hi1 && (!have_range)) {
3237          /* This inlined call is just a single address range. */
3238          if (ip_lo < ip_hi1) {
3239             /* Apply text debug biasing */
3240             ip_lo += cc->di->text_debug_bias;
3241             ip_hi1 += cc->di->text_debug_bias;
3242             ML_(addInlInfo) (cc->di,
3243                              ip_lo, ip_hi1,
3244                              get_inlFnName (inlinedfn_abstract_origin, cc, td3),
3245                              caller_fndn_ix,
3246                              caller_lineno, level);
3247          }
3248       } else if (have_range) {
3249          /* This inlined call is several address ranges. */
3250          XArray *ranges;
3251          Word j;
3252          const HChar *inlfnname =
3253             get_inlFnName (inlinedfn_abstract_origin, cc, td3);
3254
3255          /* Ranges are biased for the inline info using the same logic
3256             as what is used for biasing ranges for the var info, for which
3257             ranges are read using cc->cu_svma (see parse_var_DIE).
3258             Then text_debug_bias is added when a (non global) var
3259             is recorded (see just before the call to ML_(addVar)) */
3260          ranges = get_range_list( cc, td3,
3261                                   rangeoff, cc->cu_svma );
3262          for (j = 0; j < VG_(sizeXA)( ranges ); j++) {
3263             AddrRange* range = (AddrRange*) VG_(indexXA)( ranges, j );
3264             ML_(addInlInfo) (cc->di,
3265                              range->aMin   + cc->di->text_debug_bias,
3266                              range->aMax+1 + cc->di->text_debug_bias,
3267                              // aMax+1 as range has its last bound included
3268                              // while ML_(addInlInfo) expects last bound not
3269                              // included.
3270                              inlfnname,
3271                              caller_fndn_ix,
3272                              caller_lineno, level);
3273          }
3274          VG_(deleteXA)( ranges );
3275       } else
3276          goto_bad_DIE;
3277    }
3278
3279    // Only recursively parse the (possible) children for the DIE which
3280    // might maybe contain a DW_TAG_inlined_subroutine:
3281    return dtag == DW_TAG_lexical_block || dtag == DW_TAG_subprogram
3282       || dtag == DW_TAG_inlined_subroutine
3283       || dtag == DW_TAG_compile_unit || dtag == DW_TAG_partial_unit;
3284
3285   bad_DIE:
3286    dump_bad_die_and_barf("parse_inl_DIE", dtag, posn, level,
3287                          c_die, saved_die_c_offset,
3288                          abbv,
3289                          cc);
3290    /*NOTREACHED*/
3291 }
3292
3293
3294 /*------------------------------------------------------------*/
3295 /*---                                                      ---*/
3296 /*--- Parsing of type-related DIEs                         ---*/
3297 /*---                                                      ---*/
3298 /*------------------------------------------------------------*/
3299
3300 typedef
3301    struct {
3302       /* What source language?  'A'=Ada83/95,
3303                                 'C'=C/C++,
3304                                 'F'=Fortran,
3305                                 '?'=other
3306          Established once per compilation unit. */
3307       UChar language;
3308       /* A stack of types which are currently under construction */
3309       Int   sp; /* [sp] is innermost active entry; sp==-1 for empty
3310                    stack */
3311       Int   stack_size;
3312       /* Note that the TyEnts in qparentE are temporary copies of the
3313          ones accumulating in the main tyent array.  So it is not safe
3314          to free up anything on them when popping them off the stack
3315          (iow, it isn't safe to use TyEnt__make_EMPTY on them).  Just
3316          memset them to zero when done. */
3317       TyEnt *qparentE; /* parent TyEnts */
3318       Int   *qlevel;
3319    }
3320    D3TypeParser;
3321
3322 /* Completely initialise a type parser object */
3323 static void
3324 type_parser_init ( D3TypeParser *parser )
3325 {
3326    parser->sp = -1;
3327    parser->language = '?';
3328    parser->stack_size = 0;
3329    parser->qparentE = NULL;
3330    parser->qlevel   = NULL;
3331 }
3332
3333 /* Release any memory hanging off a type parser object */
3334 static void
3335 type_parser_release ( D3TypeParser *parser )
3336 {
3337    ML_(dinfo_free)( parser->qparentE );
3338    ML_(dinfo_free)( parser->qlevel );
3339 }
3340
3341 static void typestack_show ( const D3TypeParser* parser, const HChar* str )
3342 {
3343    Word i;
3344    VG_(printf)("  typestack (%s) {\n", str);
3345    for (i = 0; i <= parser->sp; i++) {
3346       VG_(printf)("    [%ld] (level %d): ", i, parser->qlevel[i]);
3347       ML_(pp_TyEnt)( &parser->qparentE[i] );
3348       VG_(printf)("\n");
3349    }
3350    VG_(printf)("  }\n");
3351 }
3352
3353 /* Remove from the stack, all entries with .level > 'level' */
3354 static
3355 void typestack_preen ( D3TypeParser* parser, Bool td3, Int level )
3356 {
3357    Bool changed = False;
3358    vg_assert(parser->sp < parser->stack_size);
3359    while (True) {
3360       vg_assert(parser->sp >= -1);
3361       if (parser->sp == -1) break;
3362       if (parser->qlevel[parser->sp] <= level) break;
3363       if (0)
3364          TRACE_D3("BBBBAAAA typestack_pop [newsp=%d]\n", parser->sp-1);
3365       vg_assert(ML_(TyEnt__is_type)(&parser->qparentE[parser->sp]));
3366       parser->sp--;
3367       changed = True;
3368    }
3369    if (changed && td3)
3370       typestack_show( parser, "after preen" );
3371 }
3372
3373 static Bool typestack_is_empty ( const D3TypeParser* parser )
3374 {
3375    vg_assert(parser->sp >= -1 && parser->sp < parser->stack_size);
3376    return parser->sp == -1;
3377 }
3378
3379 static void typestack_push ( const CUConst* cc,
3380                              D3TypeParser* parser,
3381                              Bool td3,
3382                              const TyEnt* parentE, Int level )
3383 {
3384    if (0)
3385    TRACE_D3("BBBBAAAA typestack_push[newsp=%d]: %d  %05lx\n",
3386             parser->sp+1, level, parentE->cuOff);
3387
3388    /* First we need to zap everything >= 'level', as we are about to
3389       replace any previous entry at 'level', so .. */
3390    typestack_preen(parser, /*td3*/False, level-1);
3391
3392    vg_assert(parser->sp >= -1);
3393    vg_assert(parser->sp < parser->stack_size);
3394    if (parser->sp == parser->stack_size - 1) {
3395       parser->stack_size += 16;
3396       parser->qparentE =
3397          ML_(dinfo_realloc)("di.readdwarf3.typush.1", parser->qparentE,
3398                             parser->stack_size * sizeof parser->qparentE[0]);
3399       parser->qlevel =
3400          ML_(dinfo_realloc)("di.readdwarf3.typush.2", parser->qlevel,
3401                             parser->stack_size * sizeof parser->qlevel[0]);
3402    }
3403    if (parser->sp >= 0)
3404       vg_assert(parser->qlevel[parser->sp] < level);
3405    parser->sp++;
3406    vg_assert(parentE);
3407    vg_assert(ML_(TyEnt__is_type)(parentE));
3408    vg_assert(parentE->cuOff != D3_INVALID_CUOFF);
3409    parser->qparentE[parser->sp] = *parentE;
3410    parser->qlevel[parser->sp]  = level;
3411    if (TD3)
3412       typestack_show( parser, "after push" );
3413 }
3414
3415 /* True if the subrange type being parsed gives the bounds of an array. */
3416 static Bool subrange_type_denotes_array_bounds ( const D3TypeParser* parser,
3417                                                  DW_TAG dtag ) {
3418    vg_assert(dtag == DW_TAG_subrange_type);
3419    /* For most languages, a subrange_type dtag always gives the
3420       bounds of an array.
3421       For Ada, there are additional conditions as a subrange_type
3422       is also used for other purposes. */
3423    if (parser->language != 'A')
3424       /* not Ada, so it definitely denotes an array bound. */
3425       return True;
3426    else
3427       /* Extra constraints for Ada: it only denotes an array bound if .. */
3428       return (! typestack_is_empty(parser)
3429               && parser->qparentE[parser->sp].tag == Te_TyArray);
3430 }
3431
3432 /* True if the form is one of the forms supported to give an array bound.
3433    For some arrays (scope local arrays with variable size),
3434    a DW_FORM_ref4 was used, and was wrongly used as the bound value.
3435    So, refuse the forms that are known to give a problem. */
3436 static Bool form_expected_for_bound ( DW_FORM form ) {
3437    if (form == DW_FORM_ref1
3438        || form == DW_FORM_ref2
3439        || form == DW_FORM_ref4
3440        || form == DW_FORM_ref8)
3441       return False;
3442
3443    return True;
3444 }
3445
3446 /* Parse a type-related DIE.  'parser' holds the current parser state.
3447    'admin' is where the completed types are dumped.  'dtag' is the tag
3448    for this DIE.  'c_die' points to the start of the data fields (FORM
3449    stuff) for the DIE.  abbv is the parsed abbreviation which describe
3450    the DIE.
3451
3452    We may find the DIE uninteresting, in which case we should ignore
3453    it.
3454
3455    What happens: the DIE is examined.  If uninteresting, it is ignored.
3456    Otherwise, the DIE gives rise to two things:
3457
3458    (1) the offset of this DIE in the CU -- the cuOffset, a UWord
3459    (2) a TyAdmin structure, which holds the type, or related stuff
3460
3461    (2) is added at the end of 'tyadmins', at some index, say 'i'.
3462
3463    A pair (cuOffset, i) is added to 'tydict'.
3464
3465    Hence 'tyadmins' holds the actual type entities, and 'tydict' holds
3466    a mapping from cuOffset to the index of the corresponding entry in
3467    'tyadmin'.
3468
3469    When resolving a cuOffset to a TyAdmin, first look up the cuOffset
3470    in the tydict (by binary search).  This gives an index into
3471    tyadmins, and the required entity lives in tyadmins at that index.
3472 */
3473 __attribute__((noinline))
3474 static void parse_type_DIE ( /*MOD*/XArray* /* of TyEnt */ tyents,
3475                              /*MOD*/D3TypeParser* parser,
3476                              DW_TAG dtag,
3477                              UWord posn,
3478                              Int level,
3479                              Cursor* c_die,
3480                              const g_abbv *abbv,
3481                              const CUConst* cc,
3482                              Bool td3 )
3483 {
3484    FormContents cts;
3485    UInt nf_i;
3486    TyEnt typeE;
3487    TyEnt atomE;
3488    TyEnt fieldE;
3489    TyEnt boundE;
3490
3491    UWord saved_die_c_offset  = get_position_of_Cursor( c_die );
3492
3493    VG_(memset)( &typeE,  0xAA, sizeof(typeE) );
3494    VG_(memset)( &atomE,  0xAA, sizeof(atomE) );
3495    VG_(memset)( &fieldE, 0xAA, sizeof(fieldE) );
3496    VG_(memset)( &boundE, 0xAA, sizeof(boundE) );
3497
3498    /* If we've returned to a level at or above any previously noted
3499       parent, un-note it, so we don't believe we're still collecting
3500       its children. */
3501    typestack_preen( parser, td3, level-1 );
3502
3503    if (dtag == DW_TAG_compile_unit
3504        || dtag == DW_TAG_type_unit
3505        || dtag == DW_TAG_partial_unit) {
3506       /* See if we can find DW_AT_language, since it is important for
3507          establishing array bounds (see DW_TAG_subrange_type below in
3508          this fn) */
3509       nf_i = 0;
3510       while (True) {
3511          DW_AT   attr = (DW_AT)  abbv->nf[nf_i].at_name;
3512          DW_FORM form = (DW_FORM)abbv->nf[nf_i].at_form;
3513          const name_form *nf = &abbv->nf[nf_i];
3514          nf_i++;
3515          if (attr == 0 && form == 0) break;
3516          get_Form_contents( &cts, cc, c_die, False/*td3*/, nf );
3517          if (attr != DW_AT_language)
3518             continue;
3519          if (cts.szB <= 0)
3520            goto_bad_DIE;
3521          switch (cts.u.val) {
3522             case DW_LANG_C89: case DW_LANG_C:
3523             case DW_LANG_C_plus_plus: case DW_LANG_ObjC:
3524             case DW_LANG_ObjC_plus_plus: case DW_LANG_UPC:
3525             case DW_LANG_Upc: case DW_LANG_C99: case DW_LANG_C11:
3526             case DW_LANG_C_plus_plus_11: case DW_LANG_C_plus_plus_14:
3527                parser->language = 'C'; break;
3528             case DW_LANG_Fortran77: case DW_LANG_Fortran90:
3529             case DW_LANG_Fortran95: case DW_LANG_Fortran03:
3530             case DW_LANG_Fortran08:
3531                parser->language = 'F'; break;
3532             case DW_LANG_Ada83: case DW_LANG_Ada95:
3533                parser->language = 'A'; break;
3534             case DW_LANG_Cobol74:
3535             case DW_LANG_Cobol85: case DW_LANG_Pascal83:
3536             case DW_LANG_Modula2: case DW_LANG_Java:
3537             case DW_LANG_PLI:
3538             case DW_LANG_D: case DW_LANG_Python: case DW_LANG_Go:
3539             case DW_LANG_Mips_Assembler:
3540                parser->language = '?'; break;
3541             default:
3542                goto_bad_DIE;
3543          }
3544       }
3545    }
3546
3547    if (dtag == DW_TAG_base_type) {
3548       /* We can pick up a new base type any time. */
3549       VG_(memset)(&typeE, 0, sizeof(typeE));
3550       typeE.cuOff = D3_INVALID_CUOFF;
3551       typeE.tag   = Te_TyBase;
3552       nf_i = 0;
3553       while (True) {
3554          DW_AT   attr = (DW_AT)  abbv->nf[nf_i].at_name;
3555          DW_FORM form = (DW_FORM)abbv->nf[nf_i].at_form;
3556          const name_form *nf = &abbv->nf[nf_i];
3557          nf_i++;
3558          if (attr == 0 && form == 0) break;
3559          get_Form_contents( &cts, cc, c_die, False/*td3*/, nf );
3560          if (attr == DW_AT_name && cts.szB < 0) {
3561             typeE.Te.TyBase.name
3562                = ML_(cur_read_strdup)( cts.u.cur,
3563                                        "di.readdwarf3.ptD.base_type.1" );
3564          }
3565          if (attr == DW_AT_byte_size && cts.szB > 0) {
3566             typeE.Te.TyBase.szB = cts.u.val;
3567          }
3568          if (attr == DW_AT_encoding && cts.szB > 0) {
3569             switch (cts.u.val) {
3570                case DW_ATE_unsigned: case DW_ATE_unsigned_char:
3571                case DW_ATE_UTF: /* since DWARF4, e.g. char16_t from C++ */
3572                case DW_ATE_boolean:/* FIXME - is this correct? */
3573                case DW_ATE_unsigned_fixed:
3574                   typeE.Te.TyBase.enc = 'U'; break;
3575                case DW_ATE_signed: case DW_ATE_signed_char:
3576                case DW_ATE_signed_fixed:
3577                   typeE.Te.TyBase.enc = 'S'; break;
3578                case DW_ATE_float:
3579                   typeE.Te.TyBase.enc = 'F'; break;
3580                case DW_ATE_complex_float:
3581                   typeE.Te.TyBase.enc = 'C'; break;
3582                default:
3583                   goto_bad_DIE;
3584             }
3585          }
3586       }
3587
3588       /* Invent a name if it doesn't have one.  gcc-4.3
3589          -ftree-vectorize is observed to emit nameless base types. */
3590       if (!typeE.Te.TyBase.name)
3591          typeE.Te.TyBase.name
3592             = ML_(dinfo_strdup)( "di.readdwarf3.ptD.base_type.2",
3593                                  "<anon_base_type>" );
3594
3595       /* Do we have something that looks sane? */
3596       if (/* must have a name */
3597           typeE.Te.TyBase.name == NULL
3598           /* and a plausible size.  Yes, really 32: "complex long
3599              double" apparently has size=32 */
3600           || typeE.Te.TyBase.szB < 0 || typeE.Te.TyBase.szB > 32
3601           /* and a plausible encoding */
3602           || (typeE.Te.TyBase.enc != 'U'
3603               && typeE.Te.TyBase.enc != 'S'
3604               && typeE.Te.TyBase.enc != 'F'
3605               && typeE.Te.TyBase.enc != 'C'))
3606          goto_bad_DIE;
3607       /* Last minute hack: if we see this
3608          <1><515>: DW_TAG_base_type
3609              DW_AT_byte_size   : 0
3610              DW_AT_encoding    : 5
3611              DW_AT_name        : void
3612          convert it into a real Void type. */
3613       if (typeE.Te.TyBase.szB == 0
3614           && 0 == VG_(strcmp)("void", typeE.Te.TyBase.name)) {
3615          ML_(TyEnt__make_EMPTY)(&typeE);
3616          typeE.tag = Te_TyVoid;
3617          typeE.Te.TyVoid.isFake = False; /* it's a real one! */
3618       }
3619
3620       goto acquire_Type;
3621    }
3622
3623    /*
3624     * An example of DW_TAG_rvalue_reference_type:
3625     *
3626     * $ readelf --debug-dump /usr/lib/debug/usr/lib/libstdc++.so.6.0.16.debug
3627     *  <1><1014>: Abbrev Number: 55 (DW_TAG_rvalue_reference_type)
3628     *     <1015>   DW_AT_byte_size   : 4
3629     *     <1016>   DW_AT_type        : <0xe52>
3630     */
3631    if (dtag == DW_TAG_pointer_type || dtag == DW_TAG_reference_type
3632        || dtag == DW_TAG_ptr_to_member_type
3633        || dtag == DW_TAG_rvalue_reference_type) {
3634       /* This seems legit for _pointer_type and _reference_type.  I
3635          don't know if rolling _ptr_to_member_type in here really is
3636          legit, but it's better than not handling it at all. */
3637       VG_(memset)(&typeE, 0, sizeof(typeE));
3638       typeE.cuOff = D3_INVALID_CUOFF;
3639       switch (dtag) {
3640       case DW_TAG_pointer_type:
3641          typeE.tag = Te_TyPtr;
3642          break;
3643       case DW_TAG_reference_type:
3644          typeE.tag = Te_TyRef;
3645          break;
3646       case DW_TAG_ptr_to_member_type:
3647          typeE.tag = Te_TyPtrMbr;
3648          break;
3649       case DW_TAG_rvalue_reference_type:
3650          typeE.tag = Te_TyRvalRef;
3651          break;
3652       default:
3653          vg_assert(False);
3654       }
3655       /* target type defaults to void */
3656       typeE.Te.TyPorR.typeR = D3_FAKEVOID_CUOFF;
3657       /* These four type kinds don't *have* to specify their size, in
3658          which case we assume it's a machine word.  But if they do
3659          specify it, it must be a machine word :-)  This probably
3660          assumes that the word size of the Dwarf3 we're reading is the
3661          same size as that on the machine.  gcc appears to give a size
3662          whereas icc9 doesn't. */
3663       typeE.Te.TyPorR.szB = sizeof(UWord);
3664       nf_i = 0;
3665       while (True) {
3666          DW_AT   attr = (DW_AT)  abbv->nf[nf_i].at_name;
3667          DW_FORM form = (DW_FORM)abbv->nf[nf_i].at_form;
3668          const name_form *nf = &abbv->nf[nf_i];
3669          nf_i++;
3670          if (attr == 0 && form == 0) break;
3671          get_Form_contents( &cts, cc, c_die, False/*td3*/, nf );
3672          if (attr == DW_AT_byte_size && cts.szB > 0) {
3673             typeE.Te.TyPorR.szB = cts.u.val;
3674          }
3675          if (attr == DW_AT_type && cts.szB > 0) {
3676             typeE.Te.TyPorR.typeR
3677                = cook_die_using_form( cc, (UWord)cts.u.val, form );
3678          }
3679       }
3680       /* Do we have something that looks sane? */
3681       if (typeE.Te.TyPorR.szB != sizeof(UWord))
3682          goto_bad_DIE;
3683       else
3684          goto acquire_Type;
3685    }
3686
3687    if (dtag == DW_TAG_enumeration_type) {
3688       /* Create a new Type to hold the results. */
3689       VG_(memset)(&typeE, 0, sizeof(typeE));
3690       typeE.cuOff = posn;
3691       typeE.tag   = Te_TyEnum;
3692       Bool is_decl = False;
3693       typeE.Te.TyEnum.atomRs
3694          = VG_(newXA)( ML_(dinfo_zalloc), "di.readdwarf3.ptD.enum_type.1",
3695                        ML_(dinfo_free),
3696                        sizeof(UWord) );
3697       nf_i=0;
3698       while (True) {
3699          DW_AT   attr = (DW_AT)  abbv->nf[nf_i].at_name;
3700          DW_FORM form = (DW_FORM)abbv->nf[nf_i].at_form;
3701          const name_form *nf = &abbv->nf[nf_i];
3702          nf_i++;
3703          if (attr == 0 && form == 0) break;
3704          get_Form_contents( &cts, cc, c_die, False/*td3*/, nf );
3705          if (attr == DW_AT_name && cts.szB < 0) {
3706             typeE.Te.TyEnum.name
3707                = ML_(cur_read_strdup)( cts.u.cur,
3708                                        "di.readdwarf3.pTD.enum_type.2" );
3709          }
3710          if (attr == DW_AT_byte_size && cts.szB > 0) {
3711             typeE.Te.TyEnum.szB = cts.u.val;
3712          }
3713          if (attr == DW_AT_declaration) {
3714             is_decl = True;
3715          }
3716       }
3717
3718       if (!typeE.Te.TyEnum.name)
3719          typeE.Te.TyEnum.name
3720             = ML_(dinfo_strdup)( "di.readdwarf3.pTD.enum_type.3",
3721                                  "<anon_enum_type>" );
3722
3723       /* Do we have something that looks sane? */
3724       if (typeE.Te.TyEnum.szB == 0
3725           /* we must know the size */
3726           /* but not for Ada, which uses such dummy
3727              enumerations as helper for gdb ada mode.
3728              Also GCC allows incomplete enums as GNU extension.
3729              http://gcc.gnu.org/onlinedocs/gcc/Incomplete-Enums.html
3730              These are marked as DW_AT_declaration and won't have
3731              a size. They can only be used in declaration or as
3732              pointer types.  You can't allocate variables or storage
3733              using such an enum type. (Also GCC seems to have a bug
3734              that will put such an enumeration_type into a .debug_types
3735              unit which should only contain complete types.) */
3736           && (parser->language != 'A' && !is_decl)) {
3737          goto_bad_DIE;
3738       }
3739
3740       /* On't stack! */
3741       typestack_push( cc, parser, td3, &typeE, level );
3742       goto acquire_Type;
3743    }
3744
3745    /* gcc (GCC) 4.4.0 20081017 (experimental) occasionally produces
3746       DW_TAG_enumerator with only a DW_AT_name but no
3747       DW_AT_const_value.  This is in violation of the Dwarf3 standard,
3748       and appears to be a new "feature" of gcc - versions 4.3.x and
3749       earlier do not appear to do this.  So accept DW_TAG_enumerator
3750       which only have a name but no value.  An example:
3751
3752       <1><180>: Abbrev Number: 6 (DW_TAG_enumeration_type)
3753          <181>   DW_AT_name        : (indirect string, offset: 0xda70):
3754                                      QtMsgType
3755          <185>   DW_AT_byte_size   : 4
3756          <186>   DW_AT_decl_file   : 14
3757          <187>   DW_AT_decl_line   : 1480
3758          <189>   DW_AT_sibling     : <0x1a7>
3759       <2><18d>: Abbrev Number: 7 (DW_TAG_enumerator)
3760          <18e>   DW_AT_name        : (indirect string, offset: 0x9e18):
3761                                      QtDebugMsg
3762       <2><192>: Abbrev Number: 7 (DW_TAG_enumerator)
3763          <193>   DW_AT_name        : (indirect string, offset: 0x1505f):
3764                                      QtWarningMsg
3765       <2><197>: Abbrev Number: 7 (DW_TAG_enumerator)
3766          <198>   DW_AT_name        : (indirect string, offset: 0x16f4a):
3767                                      QtCriticalMsg
3768       <2><19c>: Abbrev Number: 7 (DW_TAG_enumerator)
3769          <19d>   DW_AT_name        : (indirect string, offset: 0x156dd):
3770                                      QtFatalMsg
3771       <2><1a1>: Abbrev Number: 7 (DW_TAG_enumerator)
3772          <1a2>   DW_AT_name        : (indirect string, offset: 0x13660):
3773                                      QtSystemMsg
3774    */
3775    if (dtag == DW_TAG_enumerator) {
3776       VG_(memset)( &atomE, 0, sizeof(atomE) );
3777       atomE.cuOff = posn;
3778       atomE.tag   = Te_Atom;
3779       nf_i = 0;
3780       while (True) {
3781          DW_AT   attr = (DW_AT)  abbv->nf[nf_i].at_name;
3782          DW_FORM form = (DW_FORM)abbv->nf[nf_i].at_form;
3783          const name_form *nf = &abbv->nf[nf_i];
3784          nf_i++;
3785          if (attr == 0 && form == 0) break;
3786          get_Form_contents( &cts, cc, c_die, False/*td3*/, nf );
3787          if (attr == DW_AT_name && cts.szB < 0) {
3788             atomE.Te.Atom.name
3789               = ML_(cur_read_strdup)( cts.u.cur,
3790                                       "di.readdwarf3.pTD.enumerator.1" );
3791          }
3792          if (attr == DW_AT_const_value && cts.szB > 0) {
3793             atomE.Te.Atom.value      = cts.u.val;
3794             atomE.Te.Atom.valueKnown = True;
3795          }
3796       }
3797       /* Do we have something that looks sane? */
3798       if (atomE.Te.Atom.name == NULL)
3799          goto_bad_DIE;
3800       /* Do we have a plausible parent? */
3801       if (typestack_is_empty(parser)) goto_bad_DIE;
3802       vg_assert(ML_(TyEnt__is_type)(&parser->qparentE[parser->sp]));
3803       vg_assert(parser->qparentE[parser->sp].cuOff != D3_INVALID_CUOFF);
3804       if (level != parser->qlevel[parser->sp]+1) goto_bad_DIE;
3805       if (parser->qparentE[parser->sp].tag != Te_TyEnum) goto_bad_DIE;
3806       /* Record this child in the parent */
3807       vg_assert(parser->qparentE[parser->sp].Te.TyEnum.atomRs);
3808       VG_(addToXA)( parser->qparentE[parser->sp].Te.TyEnum.atomRs,
3809                     &atomE );
3810       /* And record the child itself */
3811       goto acquire_Atom;
3812    }
3813
3814    /* Treat DW_TAG_class_type as if it was a DW_TAG_structure_type.  I
3815       don't know if this is correct, but it at least makes this reader
3816       usable for gcc-4.3 produced Dwarf3. */
3817    if (dtag == DW_TAG_structure_type || dtag == DW_TAG_class_type
3818        || dtag == DW_TAG_union_type) {
3819       Bool have_szB = False;
3820       Bool is_decl  = False;
3821       Bool is_spec  = False;
3822       /* Create a new Type to hold the results. */
3823       VG_(memset)(&typeE, 0, sizeof(typeE));
3824       typeE.cuOff = posn;
3825       typeE.tag   = Te_TyStOrUn;
3826       typeE.Te.TyStOrUn.name = NULL;
3827       typeE.Te.TyStOrUn.typeR = D3_INVALID_CUOFF;
3828       typeE.Te.TyStOrUn.fieldRs
3829          = VG_(newXA)( ML_(dinfo_zalloc), "di.readdwarf3.pTD.struct_type.1",
3830                        ML_(dinfo_free),
3831                        sizeof(UWord) );
3832       typeE.Te.TyStOrUn.complete = True;
3833       typeE.Te.TyStOrUn.isStruct = dtag == DW_TAG_structure_type
3834                                    || dtag == DW_TAG_class_type;
3835       nf_i = 0;
3836       while (True) {
3837          DW_AT   attr = (DW_AT)  abbv->nf[nf_i].at_name;
3838          DW_FORM form = (DW_FORM)abbv->nf[nf_i].at_form;
3839          const name_form *nf = &abbv->nf[nf_i];
3840          nf_i++;
3841          if (attr == 0 && form == 0) break;
3842          get_Form_contents( &cts, cc, c_die, False/*td3*/, nf );
3843          if (attr == DW_AT_name && cts.szB < 0) {
3844             typeE.Te.TyStOrUn.name
3845                = ML_(cur_read_strdup)( cts.u.cur,
3846                                        "di.readdwarf3.ptD.struct_type.2" );
3847          }
3848          if (attr == DW_AT_byte_size && cts.szB >= 0) {
3849             typeE.Te.TyStOrUn.szB = cts.u.val;
3850             have_szB = True;
3851          }
3852          if (attr == DW_AT_declaration && cts.szB > 0 && cts.u.val > 0) {
3853             is_decl = True;
3854          }
3855          if (attr == DW_AT_specification && cts.szB > 0 && cts.u.val > 0) {
3856             is_spec = True;
3857          }
3858          if (attr == DW_AT_signature && form == DW_FORM_ref_sig8
3859              && cts.szB > 0) {
3860             have_szB = True;
3861             typeE.Te.TyStOrUn.szB = 8;
3862             typeE.Te.TyStOrUn.typeR
3863                = cook_die_using_form( cc, (UWord)cts.u.val, form );
3864          }
3865       }
3866       /* Do we have something that looks sane? */
3867       if (is_decl && (!is_spec)) {
3868          /* It's a DW_AT_declaration.  We require the name but
3869             nothing else. */
3870          /* JRS 2012-06-28: following discussion w/ tromey, if the
3871             type doesn't have name, just make one up, and accept it.
3872             It might be referred to by other DIEs, so ignoring it
3873             doesn't seem like a safe option. */
3874          if (typeE.Te.TyStOrUn.name == NULL)
3875             typeE.Te.TyStOrUn.name
3876                = ML_(dinfo_strdup)( "di.readdwarf3.ptD.struct_type.3",
3877                                     "<anon_struct_type>" );
3878          typeE.Te.TyStOrUn.complete = False;
3879          /* JRS 2009 Aug 10: <possible kludge>? */
3880          /* Push this tyent on the stack, even though it's incomplete.
3881             It appears that gcc-4.4 on Fedora 11 will sometimes create
3882             DW_TAG_member entries for it, and so we need to have a
3883             plausible parent present in order for that to work.  See
3884             #200029 comments 8 and 9. */
3885          typestack_push( cc, parser, td3, &typeE, level );
3886          /* </possible kludge> */
3887          goto acquire_Type;
3888       }
3889       if ((!is_decl) /* && (!is_spec) */) {
3890          /* this is the common, ordinary case */
3891          /* The name can be present, or not */
3892          if (!have_szB) {
3893             /* We must know the size.
3894                But in Ada, record with discriminants might have no size.
3895                But in C, VLA in the middle of a struct (gcc extension)
3896                might have no size.
3897                Instead, some GNAT dwarf extensions and/or dwarf entries
3898                allow to calculate the struct size at runtime.
3899                We cannot do that (yet?) so, the temporary kludge is to use
3900                a small size. */
3901             typeE.Te.TyStOrUn.szB = 1;
3902          }
3903          /* On't stack! */
3904          typestack_push( cc, parser, td3, &typeE, level );
3905          goto acquire_Type;
3906       }
3907       else {
3908          /* don't know how to handle any other variants just now */
3909          goto_bad_DIE;
3910       }
3911    }
3912
3913    if (dtag == DW_TAG_member) {
3914       /* Acquire member entries for both DW_TAG_structure_type and
3915          DW_TAG_union_type.  They differ minorly, in that struct
3916          members must have a DW_AT_data_member_location expression
3917          whereas union members must not. */
3918       Bool parent_is_struct;
3919       VG_(memset)( &fieldE, 0, sizeof(fieldE) );
3920       fieldE.cuOff = posn;
3921       fieldE.tag   = Te_Field;
3922       fieldE.Te.Field.typeR = D3_INVALID_CUOFF;
3923       nf_i = 0;
3924       while (True) {
3925          DW_AT   attr = (DW_AT)  abbv->nf[nf_i].at_name;
3926          DW_FORM form = (DW_FORM)abbv->nf[nf_i].at_form;
3927          const name_form *nf = &abbv->nf[nf_i];
3928          nf_i++;
3929          if (attr == 0 && form == 0) break;
3930          get_Form_contents( &cts, cc, c_die, False/*td3*/, nf );
3931          if (attr == DW_AT_name && cts.szB < 0) {
3932             fieldE.Te.Field.name
3933                = ML_(cur_read_strdup)( cts.u.cur,
3934                                        "di.readdwarf3.ptD.member.1" );
3935          }
3936          if (attr == DW_AT_type && cts.szB > 0) {
3937             fieldE.Te.Field.typeR
3938                = cook_die_using_form( cc, (UWord)cts.u.val, form );
3939          }
3940          /* There are 2 different cases for DW_AT_data_member_location.
3941             If it is a constant class attribute, it contains byte offset
3942             from the beginning of the containing entity.
3943             Otherwise it is a location expression.  */
3944          if (attr == DW_AT_data_member_location && cts.szB > 0) {
3945             fieldE.Te.Field.nLoc = -1;
3946             fieldE.Te.Field.pos.offset = cts.u.val;
3947          }
3948          if (attr == DW_AT_data_member_location && cts.szB <= 0) {
3949             fieldE.Te.Field.nLoc = (UWord)(-cts.szB);
3950             fieldE.Te.Field.pos.loc
3951                = ML_(cur_read_memdup)( cts.u.cur,
3952                                        (SizeT)fieldE.Te.Field.nLoc,
3953                                        "di.readdwarf3.ptD.member.2" );
3954          }
3955       }
3956       /* Do we have a plausible parent? */
3957       if (typestack_is_empty(parser)) goto_bad_DIE;
3958       vg_assert(ML_(TyEnt__is_type)(&parser->qparentE[parser->sp]));
3959       vg_assert(parser->qparentE[parser->sp].cuOff != D3_INVALID_CUOFF);
3960       if (level != parser->qlevel[parser->sp]+1) goto_bad_DIE;
3961       if (parser->qparentE[parser->sp].tag != Te_TyStOrUn) goto_bad_DIE;
3962       /* Do we have something that looks sane?  If this a member of a
3963          struct, we must have a location expression; but if a member
3964          of a union that is irrelevant (D3 spec sec 5.6.6).  We ought
3965          to reject in the latter case, but some compilers have been
3966          observed to emit constant-zero expressions.  So just ignore
3967          them. */
3968       parent_is_struct
3969          = parser->qparentE[parser->sp].Te.TyStOrUn.isStruct;
3970       if (!fieldE.Te.Field.name)
3971          fieldE.Te.Field.name
3972             = ML_(dinfo_strdup)( "di.readdwarf3.ptD.member.3",
3973                                  "<anon_field>" );
3974       if (fieldE.Te.Field.typeR == D3_INVALID_CUOFF)
3975          goto_bad_DIE;
3976       if (fieldE.Te.Field.nLoc) {
3977          if (!parent_is_struct) {
3978             /* If this is a union type, pretend we haven't seen the data
3979                member location expression, as it is by definition
3980                redundant (it must be zero). */
3981             if (fieldE.Te.Field.nLoc > 0)
3982                ML_(dinfo_free)(fieldE.Te.Field.pos.loc);
3983             fieldE.Te.Field.pos.loc = NULL;
3984             fieldE.Te.Field.nLoc = 0;
3985          }
3986          /* Record this child in the parent */
3987          fieldE.Te.Field.isStruct = parent_is_struct;
3988          vg_assert(parser->qparentE[parser->sp].Te.TyStOrUn.fieldRs);
3989          VG_(addToXA)( parser->qparentE[parser->sp].Te.TyStOrUn.fieldRs,
3990                        &posn );
3991          /* And record the child itself */
3992          goto acquire_Field;
3993       } else {
3994          /* Member with no location - this can happen with static
3995             const members in C++ code which are compile time constants
3996             that do no exist in the class. They're not of any interest
3997             to us so we ignore them. */
3998          ML_(TyEnt__make_EMPTY)(&fieldE);
3999       }
4000    }
4001
4002    if (dtag == DW_TAG_array_type) {
4003       VG_(memset)(&typeE, 0, sizeof(typeE));
4004       typeE.cuOff = posn;
4005       typeE.tag   = Te_TyArray;
4006       typeE.Te.TyArray.typeR = D3_INVALID_CUOFF;
4007       typeE.Te.TyArray.boundRs
4008          = VG_(newXA)( ML_(dinfo_zalloc), "di.readdwarf3.ptD.array_type.1",
4009                        ML_(dinfo_free),
4010                        sizeof(UWord) );
4011       nf_i = 0;
4012       while (True) {
4013          DW_AT   attr = (DW_AT)  abbv->nf[nf_i].at_name;
4014          DW_FORM form = (DW_FORM)abbv->nf[nf_i].at_form;
4015          const name_form *nf = &abbv->nf[nf_i];
4016          nf_i++;
4017          if (attr == 0 && form == 0) break;
4018          get_Form_contents( &cts, cc, c_die, False/*td3*/, nf );
4019          if (attr == DW_AT_type && cts.szB > 0) {
4020             typeE.Te.TyArray.typeR
4021                = cook_die_using_form( cc, (UWord)cts.u.val, form );
4022          }
4023       }
4024       if (typeE.Te.TyArray.typeR == D3_INVALID_CUOFF)
4025          goto_bad_DIE;
4026       /* On't stack! */
4027       typestack_push( cc, parser, td3, &typeE, level );
4028       goto acquire_Type;
4029    }
4030
4031    /* this is a subrange type defining the bounds of an array. */
4032    if (dtag == DW_TAG_subrange_type
4033        && subrange_type_denotes_array_bounds(parser, dtag)) {
4034       Bool have_lower = False;
4035       Bool have_upper = False;
4036       Bool have_count = False;
4037       Long lower = 0;
4038       Long upper = 0;
4039       Long count = 0;
4040
4041       switch (parser->language) {
4042          case 'C': have_lower = True;  lower = 0; break;
4043          case 'F': have_lower = True;  lower = 1; break;
4044          case '?': have_lower = False; break;
4045          case 'A': have_lower = False; break;
4046          default:  vg_assert(0); /* assured us by handling of
4047                                     DW_TAG_compile_unit in this fn */
4048       }
4049
4050       VG_(memset)( &boundE, 0, sizeof(boundE) );
4051       boundE.cuOff = D3_INVALID_CUOFF;
4052       boundE.tag   = Te_Bound;
4053       nf_i = 0;
4054       while (True) {
4055          DW_AT   attr = (DW_AT)  abbv->nf[nf_i].at_name;
4056          DW_FORM form = (DW_FORM)abbv->nf[nf_i].at_form;
4057          const name_form *nf = &abbv->nf[nf_i];
4058          nf_i++;
4059          if (attr == 0 && form == 0) break;
4060          get_Form_contents( &cts, cc, c_die, False/*td3*/, nf );
4061          if (attr == DW_AT_lower_bound && cts.szB > 0
4062              && form_expected_for_bound (form)) {
4063             lower      = (Long)cts.u.val;
4064             have_lower = True;
4065          }
4066          if (attr == DW_AT_upper_bound && cts.szB > 0
4067              && form_expected_for_bound (form)) {
4068             upper      = (Long)cts.u.val;
4069             have_upper = True;
4070          }
4071          if (attr == DW_AT_count && cts.szB > 0) {
4072             count    = (Long)cts.u.val;
4073             have_count = True;
4074          }
4075       }
4076       /* FIXME: potentially skip the rest if no parent present, since
4077          it could be the case that this subrange type is free-standing
4078          (not being used to describe the bounds of a containing array
4079          type) */
4080       /* Do we have a plausible parent? */
4081       if (typestack_is_empty(parser)) goto_bad_DIE;
4082       vg_assert(ML_(TyEnt__is_type)(&parser->qparentE[parser->sp]));
4083       vg_assert(parser->qparentE[parser->sp].cuOff != D3_INVALID_CUOFF);
4084       if (level != parser->qlevel[parser->sp]+1) goto_bad_DIE;
4085       if (parser->qparentE[parser->sp].tag != Te_TyArray) goto_bad_DIE;
4086
4087       /* Figure out if we have a definite range or not */
4088       if (have_lower && have_upper && (!have_count)) {
4089          boundE.Te.Bound.knownL = True;
4090          boundE.Te.Bound.knownU = True;
4091          boundE.Te.Bound.boundL = lower;
4092          boundE.Te.Bound.boundU = upper;
4093       }
4094       else if (have_lower && (!have_upper) && (!have_count)) {
4095          boundE.Te.Bound.knownL = True;
4096          boundE.Te.Bound.knownU = False;
4097          boundE.Te.Bound.boundL = lower;
4098          boundE.Te.Bound.boundU = 0;
4099       }
4100       else if ((!have_lower) && have_upper && (!have_count)) {
4101          boundE.Te.Bound.knownL = False;
4102          boundE.Te.Bound.knownU = True;
4103          boundE.Te.Bound.boundL = 0;
4104          boundE.Te.Bound.boundU = upper;
4105       }
4106       else if ((!have_lower) && (!have_upper) && (!have_count)) {
4107          boundE.Te.Bound.knownL = False;
4108          boundE.Te.Bound.knownU = False;
4109          boundE.Te.Bound.boundL = 0;
4110          boundE.Te.Bound.boundU = 0;
4111       } else if (have_lower && (!have_upper) && (have_count)) {
4112          boundE.Te.Bound.knownL = True;
4113          boundE.Te.Bound.knownU = True;
4114          boundE.Te.Bound.boundL = lower;
4115          boundE.Te.Bound.boundU = lower + count;
4116       } else {
4117          /* FIXME: handle more cases */
4118          goto_bad_DIE;
4119       }
4120
4121       /* Record this bound in the parent */
4122       boundE.cuOff = posn;
4123       vg_assert(parser->qparentE[parser->sp].Te.TyArray.boundRs);
4124       VG_(addToXA)( parser->qparentE[parser->sp].Te.TyArray.boundRs,
4125                     &boundE.cuOff );
4126       /* And record the child itself */
4127       goto acquire_Bound;
4128    }
4129
4130    /* typedef or subrange_type other than array bounds. */
4131    if (dtag == DW_TAG_typedef
4132        || (dtag == DW_TAG_subrange_type
4133            && !subrange_type_denotes_array_bounds(parser, dtag))) {
4134       /* subrange_type other than array bound is only for Ada. */
4135       vg_assert (dtag == DW_TAG_typedef || parser->language == 'A');
4136       /* We can pick up a new typedef/subrange_type any time. */
4137       VG_(memset)(&typeE, 0, sizeof(typeE));
4138       typeE.cuOff = D3_INVALID_CUOFF;
4139       typeE.tag   = Te_TyTyDef;
4140       typeE.Te.TyTyDef.name = NULL;
4141       typeE.Te.TyTyDef.typeR = D3_INVALID_CUOFF;
4142       nf_i = 0;
4143       while (True) {
4144          DW_AT   attr = (DW_AT)  abbv->nf[nf_i].at_name;
4145          DW_FORM form = (DW_FORM)abbv->nf[nf_i].at_form;
4146          const name_form *nf = &abbv->nf[nf_i];
4147          nf_i++;
4148          if (attr == 0 && form == 0) break;
4149          get_Form_contents( &cts, cc, c_die, False/*td3*/, nf );
4150          if (attr == DW_AT_name && cts.szB < 0) {
4151             typeE.Te.TyTyDef.name
4152                = ML_(cur_read_strdup)( cts.u.cur,
4153                                        "di.readdwarf3.ptD.typedef.1" );
4154          }
4155          if (attr == DW_AT_type && cts.szB > 0) {
4156             typeE.Te.TyTyDef.typeR
4157                = cook_die_using_form( cc, (UWord)cts.u.val, form );
4158          }
4159       }
4160       /* Do we have something that looks sane?
4161          gcc gnat Ada generates minimal typedef
4162          such as the below
4163          <6><91cc>: DW_TAG_typedef
4164             DW_AT_abstract_ori: <9066>
4165          g++ for OMP can generate artificial functions that have
4166          parameters that refer to pointers to unnamed typedefs.
4167          See https://bugs.kde.org/show_bug.cgi?id=273475
4168          So we cannot require a name for a DW_TAG_typedef.
4169       */
4170       goto acquire_Type;
4171    }
4172
4173    if (dtag == DW_TAG_subroutine_type) {
4174       /* function type? just record that one fact and ask no
4175          further questions. */
4176       VG_(memset)(&typeE, 0, sizeof(typeE));
4177       typeE.cuOff = D3_INVALID_CUOFF;
4178       typeE.tag   = Te_TyFn;
4179       goto acquire_Type;
4180    }
4181
4182    if (dtag == DW_TAG_volatile_type || dtag == DW_TAG_const_type
4183        || dtag == DW_TAG_restrict_type) {
4184       Int have_ty = 0;
4185       VG_(memset)(&typeE, 0, sizeof(typeE));
4186       typeE.cuOff = D3_INVALID_CUOFF;
4187       typeE.tag   = Te_TyQual;
4188       typeE.Te.TyQual.qual
4189          = (dtag == DW_TAG_volatile_type ? 'V'
4190             : (dtag == DW_TAG_const_type ? 'C' : 'R'));
4191       /* target type defaults to 'void' */
4192       typeE.Te.TyQual.typeR = D3_FAKEVOID_CUOFF;
4193       nf_i = 0;
4194       while (True) {
4195          DW_AT   attr = (DW_AT)  abbv->nf[nf_i].at_name;
4196          DW_FORM form = (DW_FORM)abbv->nf[nf_i].at_form;
4197          const name_form *nf = &abbv->nf[nf_i];
4198          nf_i++;
4199          if (attr == 0 && form == 0) break;
4200          get_Form_contents( &cts, cc, c_die, False/*td3*/, nf );
4201          if (attr == DW_AT_type && cts.szB > 0) {
4202             typeE.Te.TyQual.typeR
4203                = cook_die_using_form( cc, (UWord)cts.u.val, form );
4204             have_ty++;
4205          }
4206       }
4207       /* gcc sometimes generates DW_TAG_const/volatile_type without
4208          DW_AT_type and GDB appears to interpret the type as 'const
4209          void' (resp. 'volatile void').  So just allow it .. */
4210       if (have_ty == 1 || have_ty == 0)
4211          goto acquire_Type;
4212       else
4213          goto_bad_DIE;
4214    }
4215
4216    /*
4217     * Treat DW_TAG_unspecified_type as type void. An example of DW_TAG_unspecified_type:
4218     *
4219     * $ readelf --debug-dump /usr/lib/debug/usr/lib/libstdc++.so.6.0.16.debug
4220     *  <1><10d4>: Abbrev Number: 53 (DW_TAG_unspecified_type)
4221     *     <10d5>   DW_AT_name        : (indirect string, offset: 0xdb7): decltype(nullptr)
4222     */
4223    if (dtag == DW_TAG_unspecified_type) {
4224       VG_(memset)(&typeE, 0, sizeof(typeE));
4225       typeE.cuOff           = D3_INVALID_CUOFF;
4226       typeE.tag             = Te_TyQual;
4227       typeE.Te.TyQual.typeR = D3_FAKEVOID_CUOFF;
4228       goto acquire_Type;
4229    }
4230
4231    /* else ignore this DIE */
4232    return;
4233    /*NOTREACHED*/
4234
4235   acquire_Type:
4236    if (0) VG_(printf)("YYYY Acquire Type\n");
4237    vg_assert(ML_(TyEnt__is_type)( &typeE ));
4238    vg_assert(typeE.cuOff == D3_INVALID_CUOFF || typeE.cuOff == posn);
4239    typeE.cuOff = posn;
4240    VG_(addToXA)( tyents, &typeE );
4241    return;
4242    /*NOTREACHED*/
4243
4244   acquire_Atom:
4245    if (0) VG_(printf)("YYYY Acquire Atom\n");
4246    vg_assert(atomE.tag == Te_Atom);
4247    vg_assert(atomE.cuOff == D3_INVALID_CUOFF || atomE.cuOff == posn);
4248    atomE.cuOff = posn;
4249    VG_(addToXA)( tyents, &atomE );
4250    return;
4251    /*NOTREACHED*/
4252
4253   acquire_Field:
4254    /* For union members, Expr should be absent */
4255    if (0) VG_(printf)("YYYY Acquire Field\n");
4256    vg_assert(fieldE.tag == Te_Field);
4257    vg_assert(fieldE.Te.Field.nLoc <= 0 || fieldE.Te.Field.pos.loc != NULL);
4258    vg_assert(fieldE.Te.Field.nLoc != 0 || fieldE.Te.Field.pos.loc == NULL);
4259    if (fieldE.Te.Field.isStruct) {
4260       vg_assert(fieldE.Te.Field.nLoc != 0);
4261    } else {
4262       vg_assert(fieldE.Te.Field.nLoc == 0);
4263    }
4264    vg_assert(fieldE.cuOff == D3_INVALID_CUOFF || fieldE.cuOff == posn);
4265    fieldE.cuOff = posn;
4266    VG_(addToXA)( tyents, &fieldE );
4267    return;
4268    /*NOTREACHED*/
4269
4270   acquire_Bound:
4271    if (0) VG_(printf)("YYYY Acquire Bound\n");
4272    vg_assert(boundE.tag == Te_Bound);
4273    vg_assert(boundE.cuOff == D3_INVALID_CUOFF || boundE.cuOff == posn);
4274    boundE.cuOff = posn;
4275    VG_(addToXA)( tyents, &boundE );
4276    return;
4277    /*NOTREACHED*/
4278
4279   bad_DIE:
4280    dump_bad_die_and_barf("parse_type_DIE", dtag, posn, level,
4281                          c_die, saved_die_c_offset,
4282                          abbv,
4283                          cc);
4284    /*NOTREACHED*/
4285 }
4286
4287
4288 /*------------------------------------------------------------*/
4289 /*---                                                      ---*/
4290 /*--- Compression of type DIE information                  ---*/
4291 /*---                                                      ---*/
4292 /*------------------------------------------------------------*/
4293
4294 static UWord chase_cuOff ( Bool* changed,
4295                            const XArray* /* of TyEnt */ ents,
4296                            TyEntIndexCache* ents_cache,
4297                            UWord cuOff )
4298 {
4299    TyEnt* ent;
4300    ent = ML_(TyEnts__index_by_cuOff)( ents, ents_cache, cuOff );
4301
4302    if (!ent) {
4303       VG_(printf)("chase_cuOff: no entry for 0x%05lx\n", cuOff);
4304       *changed = False;
4305       return cuOff;
4306    }
4307
4308    vg_assert(ent->tag != Te_EMPTY);
4309    if (ent->tag != Te_INDIR) {
4310       *changed = False;
4311       return cuOff;
4312    } else {
4313       vg_assert(ent->Te.INDIR.indR < cuOff);
4314       *changed = True;
4315       return ent->Te.INDIR.indR;
4316    }
4317 }
4318
4319 static
4320 void chase_cuOffs_in_XArray ( Bool* changed,
4321                               const XArray* /* of TyEnt */ ents,
4322                               TyEntIndexCache* ents_cache,
4323                               /*MOD*/XArray* /* of UWord */ cuOffs )
4324 {
4325    Bool b2 = False;
4326    Word i, n = VG_(sizeXA)( cuOffs );
4327    for (i = 0; i < n; i++) {
4328       Bool   b = False;
4329       UWord* p = VG_(indexXA)( cuOffs, i );
4330       *p = chase_cuOff( &b, ents, ents_cache, *p );
4331       if (b)
4332          b2 = True;
4333    }
4334    *changed = b2;
4335 }
4336
4337 static Bool TyEnt__subst_R_fields ( const XArray* /* of TyEnt */ ents,
4338                                     TyEntIndexCache* ents_cache,
4339                                     /*MOD*/TyEnt* te )
4340 {
4341    Bool b, changed = False;
4342    switch (te->tag) {
4343       case Te_EMPTY:
4344          break;
4345       case Te_INDIR:
4346          te->Te.INDIR.indR
4347             = chase_cuOff( &b, ents, ents_cache, te->Te.INDIR.indR );
4348          if (b) changed = True;
4349          break;
4350       case Te_UNKNOWN:
4351          break;
4352       case Te_Atom:
4353          break;
4354       case Te_Field:
4355          te->Te.Field.typeR
4356             = chase_cuOff( &b, ents, ents_cache, te->Te.Field.typeR );
4357          if (b) changed = True;
4358          break;
4359       case Te_Bound:
4360          break;
4361       case Te_TyBase:
4362          break;
4363       case Te_TyPtr:
4364       case Te_TyRef:
4365       case Te_TyPtrMbr:
4366       case Te_TyRvalRef:
4367          te->Te.TyPorR.typeR
4368             = chase_cuOff( &b, ents, ents_cache, te->Te.TyPorR.typeR );
4369          if (b) changed = True;
4370          break;
4371       case Te_TyTyDef:
4372          te->Te.TyTyDef.typeR
4373             = chase_cuOff( &b, ents, ents_cache, te->Te.TyTyDef.typeR );
4374          if (b) changed = True;
4375          break;
4376       case Te_TyStOrUn:
4377          chase_cuOffs_in_XArray( &b, ents, ents_cache, te->Te.TyStOrUn.fieldRs );
4378          if (b) changed = True;
4379          break;
4380       case Te_TyEnum:
4381          chase_cuOffs_in_XArray( &b, ents, ents_cache, te->Te.TyEnum.atomRs );
4382          if (b) changed = True;
4383          break;
4384       case Te_TyArray:
4385          te->Te.TyArray.typeR
4386             = chase_cuOff( &b, ents, ents_cache, te->Te.TyArray.typeR );
4387          if (b) changed = True;
4388          chase_cuOffs_in_XArray( &b, ents, ents_cache, te->Te.TyArray.boundRs );
4389          if (b) changed = True;
4390          break;
4391       case Te_TyFn:
4392          break;
4393       case Te_TyQual:
4394          te->Te.TyQual.typeR
4395             = chase_cuOff( &b, ents, ents_cache, te->Te.TyQual.typeR );
4396          if (b) changed = True;
4397          break;
4398       case Te_TyVoid:
4399          break;
4400       default:
4401          ML_(pp_TyEnt)(te);
4402          vg_assert(0);
4403    }
4404    return changed;
4405 }
4406
4407 /* Make a pass over 'ents'.  For each tyent, inspect the target of any
4408    'R' or 'Rs' fields (those which refer to other tyents), and replace
4409    any which point to INDIR nodes with the target of the indirection
4410    (which should not itself be an indirection).  In summary, this
4411    routine shorts out all references to indirection nodes. */
4412 static
4413 Word dedup_types_substitution_pass ( /*MOD*/XArray* /* of TyEnt */ ents,
4414                                      TyEntIndexCache* ents_cache )
4415 {
4416    Word i, n, nChanged = 0;
4417    Bool b;
4418    n = VG_(sizeXA)( ents );
4419    for (i = 0; i < n; i++) {
4420       TyEnt* ent = VG_(indexXA)( ents, i );
4421       vg_assert(ent->tag != Te_EMPTY);
4422       /* We have to substitute everything, even indirections, so as to
4423          ensure that chains of indirections don't build up. */
4424       b = TyEnt__subst_R_fields( ents, ents_cache, ent );
4425       if (b)
4426          nChanged++;
4427    }
4428
4429    return nChanged;
4430 }
4431
4432
4433 /* Make a pass over 'ents', building a dictionary of TyEnts as we go.
4434    Look up each new tyent in the dictionary in turn.  If it is already
4435    in the dictionary, replace this tyent with an indirection to the
4436    existing one, and delete any malloc'd stuff hanging off this one.
4437    In summary, this routine commons up all tyents that are identical
4438    as defined by TyEnt__cmp_by_all_except_cuOff. */
4439 static
4440 Word dedup_types_commoning_pass ( /*MOD*/XArray* /* of TyEnt */ ents )
4441 {
4442    Word    n, i, nDeleted;
4443    WordFM* dict; /* TyEnt* -> void */
4444    TyEnt*  ent;
4445    UWord   keyW, valW;
4446
4447    dict = VG_(newFM)(
4448              ML_(dinfo_zalloc), "di.readdwarf3.dtcp.1",
4449              ML_(dinfo_free),
4450              (Word(*)(UWord,UWord)) ML_(TyEnt__cmp_by_all_except_cuOff)
4451           );
4452
4453    nDeleted = 0;
4454    n = VG_(sizeXA)( ents );
4455    for (i = 0; i < n; i++) {
4456       ent = VG_(indexXA)( ents, i );
4457       vg_assert(ent->tag != Te_EMPTY);
4458
4459       /* Ignore indirections, although check that they are
4460          not forming a cycle. */
4461       if (ent->tag == Te_INDIR) {
4462          vg_assert(ent->Te.INDIR.indR < ent->cuOff);
4463          continue;
4464       }
4465
4466       keyW = valW = 0;
4467       if (VG_(lookupFM)( dict, &keyW, &valW, (UWord)ent )) {
4468          /* it's already in the dictionary. */
4469          TyEnt* old = (TyEnt*)keyW;
4470          vg_assert(valW == 0);
4471          vg_assert(old != ent);
4472          vg_assert(old->tag != Te_INDIR);
4473          /* since we are traversing the array in increasing order of
4474             cuOff: */
4475          vg_assert(old->cuOff < ent->cuOff);
4476          /* So anyway, dump this entry and replace it with an
4477             indirection to the one in the dictionary.  Note that the
4478             assertion above guarantees that we cannot create cycles of
4479             indirections, since we are always creating an indirection
4480             to a tyent with a cuOff lower than this one. */
4481          ML_(TyEnt__make_EMPTY)( ent );
4482          ent->tag = Te_INDIR;
4483          ent->Te.INDIR.indR = old->cuOff;
4484          nDeleted++;
4485       } else {
4486          /* not in dictionary; add it and keep going. */
4487          VG_(addToFM)( dict, (UWord)ent, 0 );
4488       }
4489    }
4490
4491    VG_(deleteFM)( dict, NULL, NULL );
4492
4493    return nDeleted;
4494 }
4495
4496
4497 static
4498 void dedup_types ( Bool td3,
4499                    /*MOD*/XArray* /* of TyEnt */ ents,
4500                    TyEntIndexCache* ents_cache )
4501 {
4502    Word m, n, i, nDel, nSubst, nThresh;
4503    if (0) td3 = True;
4504
4505    n = VG_(sizeXA)( ents );
4506
4507    /* If a commoning pass and a substitution pass both make fewer than
4508       this many changes, just stop.  It's pointless to burn up CPU
4509       time trying to compress the last 1% or so out of the array. */
4510    nThresh = n / 200;
4511
4512    /* First we must sort .ents by its .cuOff fields, so we
4513       can index into it. */
4514    VG_(setCmpFnXA)( ents, (XACmpFn_t) ML_(TyEnt__cmp_by_cuOff_only) );
4515    VG_(sortXA)( ents );
4516
4517    /* Now repeatedly do commoning and substitution passes over
4518       the array, until there are no more changes. */
4519    do {
4520       nDel   = dedup_types_commoning_pass ( ents );
4521       nSubst = dedup_types_substitution_pass ( ents, ents_cache );
4522       vg_assert(nDel >= 0 && nSubst >= 0);
4523       TRACE_D3("   %ld deletions, %ld substitutions\n", nDel, nSubst);
4524    } while (nDel > nThresh || nSubst > nThresh);
4525
4526    /* Sanity check: all INDIR nodes should point at a non-INDIR thing.
4527       In fact this should be true at the end of every loop iteration
4528       above (a commoning pass followed by a substitution pass), but
4529       checking it on every iteration is excessively expensive.  Note,
4530       this loop also computes 'm' for the stats printing below it. */
4531    m = 0;
4532    n = VG_(sizeXA)( ents );
4533    for (i = 0; i < n; i++) {
4534       TyEnt *ent, *ind;
4535       ent = VG_(indexXA)( ents, i );
4536       if (ent->tag != Te_INDIR) continue;
4537       m++;
4538       ind = ML_(TyEnts__index_by_cuOff)( ents, ents_cache,
4539                                          ent->Te.INDIR.indR );
4540       vg_assert(ind);
4541       vg_assert(ind->tag != Te_INDIR);
4542    }
4543
4544    TRACE_D3("Overall: %ld before, %ld after\n", n, n-m);
4545 }
4546
4547
4548 /*------------------------------------------------------------*/
4549 /*---                                                      ---*/
4550 /*--- Resolution of references to type DIEs                ---*/
4551 /*---                                                      ---*/
4552 /*------------------------------------------------------------*/
4553
4554 /* Make a pass through the (temporary) variables array.  Examine the
4555    type of each variable, check is it found, and chase any Te_INDIRs.
4556    Postcondition is: each variable has a typeR field that refers to a
4557    valid type in tyents, or a Te_UNKNOWN, and is certainly guaranteed
4558    not to refer to a Te_INDIR.  (This is so that we can throw all the
4559    Te_INDIRs away later). */
4560
4561 __attribute__((noinline))
4562 static void resolve_variable_types (
4563                void (*barf)( const HChar* ) __attribute__((noreturn)),
4564                /*R-O*/XArray* /* of TyEnt */ ents,
4565                /*MOD*/TyEntIndexCache* ents_cache,
4566                /*MOD*/XArray* /* of TempVar* */ vars
4567             )
4568 {
4569    Word i, n;
4570    n = VG_(sizeXA)( vars );
4571    for (i = 0; i < n; i++) {
4572       TempVar* var = *(TempVar**)VG_(indexXA)( vars, i );
4573       /* This is the stated type of the variable.  But it might be
4574          an indirection, so be careful. */
4575       TyEnt* ent = ML_(TyEnts__index_by_cuOff)( ents, ents_cache,
4576                                                 var->typeR );
4577       if (ent && ent->tag == Te_INDIR) {
4578          ent = ML_(TyEnts__index_by_cuOff)( ents, ents_cache,
4579                                             ent->Te.INDIR.indR );
4580          vg_assert(ent);
4581          vg_assert(ent->tag != Te_INDIR);
4582       }
4583
4584       /* Deal first with "normal" cases */
4585       if (ent && ML_(TyEnt__is_type)(ent)) {
4586          var->typeR = ent->cuOff;
4587          continue;
4588       }
4589
4590       /* If there's no ent, it probably we did not manage to read a
4591          type at the cuOffset which is stated as being this variable's
4592          type.  Maybe a deficiency in parse_type_DIE.  Complain. */
4593       if (ent == NULL) {
4594          VG_(printf)("\n: Invalid cuOff = 0x%05lx\n", var->typeR );
4595          barf("resolve_variable_types: "
4596               "cuOff does not refer to a known type");
4597       }
4598       vg_assert(ent);
4599       /* If ent has any other tag, something bad happened, along the
4600          lines of var->typeR not referring to a type at all. */
4601       vg_assert(ent->tag == Te_UNKNOWN);
4602       /* Just accept it; the type will be useless, but at least keep
4603          going. */
4604       var->typeR = ent->cuOff;
4605    }
4606 }
4607
4608
4609 /*------------------------------------------------------------*/
4610 /*---                                                      ---*/
4611 /*--- Parsing of Compilation Units                         ---*/
4612 /*---                                                      ---*/
4613 /*------------------------------------------------------------*/
4614
4615 static Int cmp_TempVar_by_dioff ( const void* v1, const void* v2 ) {
4616    const TempVar* t1 = *(const TempVar *const *)v1;
4617    const TempVar* t2 = *(const TempVar *const *)v2;
4618    if (t1->dioff < t2->dioff) return -1;
4619    if (t1->dioff > t2->dioff) return 1;
4620    return 0;
4621 }
4622
4623 static void read_DIE (
4624    /*MOD*/WordFM* /* of (XArray* of AddrRange, void) */ rangestree,
4625    /*MOD*/XArray* /* of TyEnt */ tyents,
4626    /*MOD*/XArray* /* of TempVar* */ tempvars,
4627    /*MOD*/XArray* /* of GExpr* */ gexprs,
4628    /*MOD*/D3TypeParser* typarser,
4629    /*MOD*/D3VarParser* varparser,
4630    /*MOD*/D3InlParser* inlparser,
4631    Cursor* c, Bool td3, CUConst* cc, Int level
4632 )
4633 {
4634    const g_abbv *abbv;
4635    ULong  atag, abbv_code;
4636    UWord  posn;
4637    UInt   has_children;
4638    UWord  start_die_c_offset;
4639    UWord  after_die_c_offset;
4640    // If the DIE we will parse has a sibling and the parser(s) are
4641    // all indicating that parse_children is not necessary, then
4642    // we will skip the children by jumping to the sibling of this DIE
4643    // (if it has a sibling).
4644    UWord  sibling = 0;
4645    Bool   parse_children = False;
4646
4647    /* --- Deal with this DIE --- */
4648    posn      = cook_die( cc, get_position_of_Cursor( c ) );
4649    abbv_code = get_ULEB128( c );
4650    abbv = get_abbv(cc, abbv_code);
4651    atag      = abbv->atag;
4652
4653    if (TD3) {
4654       TRACE_D3("\n");
4655       trace_DIE ((DW_TAG)atag, posn, level,
4656                  get_position_of_Cursor( c ), abbv, cc);
4657    }
4658
4659    if (atag == 0)
4660       cc->barf("read_DIE: invalid zero tag on DIE");
4661
4662    has_children = abbv->has_children;
4663    if (has_children != DW_children_no && has_children != DW_children_yes)
4664       cc->barf("read_DIE: invalid has_children value");
4665
4666    /* We're set up to look at the fields of this DIE.  Hand it off to
4667       any parser(s) that want to see it.  Since they will in general
4668       advance the DIE cursor, remember the current settings so that we
4669       can then back up. . */
4670    start_die_c_offset  = get_position_of_Cursor( c );
4671    after_die_c_offset  = 0; // set to c position if a parser has read the DIE.
4672
4673    if (VG_(clo_read_var_info)) {
4674       parse_type_DIE( tyents,
4675                       typarser,
4676                       (DW_TAG)atag,
4677                       posn,
4678                       level,
4679                       c,     /* DIE cursor */
4680                       abbv,  /* abbrev */
4681                       cc,
4682                       td3 );
4683       if (get_position_of_Cursor( c ) != start_die_c_offset) {
4684          after_die_c_offset = get_position_of_Cursor( c );
4685          set_position_of_Cursor( c, start_die_c_offset );
4686       }
4687
4688       parse_var_DIE( rangestree,
4689                      tempvars,
4690                      gexprs,
4691                      varparser,
4692                      (DW_TAG)atag,
4693                      posn,
4694                      level,
4695                      c,     /* DIE cursor */
4696                      abbv,  /* abbrev */
4697                      cc,
4698                      td3 );
4699       if (get_position_of_Cursor( c ) != start_die_c_offset) {
4700          after_die_c_offset = get_position_of_Cursor( c );
4701          set_position_of_Cursor( c, start_die_c_offset );
4702       }
4703
4704       parse_children = True;
4705       // type and var parsers do not have logic to skip childrens and establish
4706       // the value of sibling.
4707    }
4708
4709    if (VG_(clo_read_inline_info)) {
4710       inlparser->sibling = 0;
4711       parse_children =
4712          parse_inl_DIE( inlparser,
4713                         (DW_TAG)atag,
4714                         posn,
4715                         level,
4716                         c,     /* DIE cursor */
4717                         abbv, /* abbrev */
4718                         cc,
4719                         td3 )
4720          || parse_children;
4721       if (get_position_of_Cursor( c ) != start_die_c_offset) {
4722          after_die_c_offset = get_position_of_Cursor( c );
4723          // Last parser, no need to reset the cursor to start_die_c_offset.
4724       }
4725       if (sibling == 0)
4726          sibling = inlparser->sibling;
4727       vg_assert (inlparser->sibling == 0 || inlparser->sibling == sibling);
4728    }
4729
4730    if (after_die_c_offset > 0) {
4731       // DIE was read by a parser above, so we know where the DIE ends.
4732       set_position_of_Cursor( c, after_die_c_offset );
4733    } else {
4734       /* No parser has parsed this DIE. So, we need to skip the DIE,
4735          in order to read the next DIE.
4736          At the same time, establish sibling value if the DIE has one. */
4737       TRACE_D3("    uninteresting DIE -> skipping ...\n");
4738       skip_DIE (&sibling, c, abbv, cc);
4739    }
4740
4741    /* --- Now recurse into its children, if any
4742       and the parsing of the children is requested by a parser --- */
4743    if (has_children == DW_children_yes) {
4744       if (parse_children || sibling == 0) {
4745          if (0) TRACE_D3("BEGIN children of level %d\n", level);
4746          while (True) {
4747             atag = peek_ULEB128( c );
4748             if (atag == 0) break;
4749             read_DIE( rangestree, tyents, tempvars, gexprs,
4750                       typarser, varparser, inlparser,
4751                       c, td3, cc, level+1 );
4752          }
4753          /* Now we need to eat the terminating zero */
4754          atag = get_ULEB128( c );
4755          vg_assert(atag == 0);
4756          if (0) TRACE_D3("END children of level %d\n", level);
4757       } else {
4758          // We can skip the childrens, by jumping to the sibling
4759          TRACE_D3("    SKIPPING DIE's children,"
4760                   "jumping to sibling <%d><%lx>\n",
4761                   level, sibling);
4762          set_position_of_Cursor( c, sibling );
4763       }
4764    }
4765
4766 }
4767
4768 static void trace_debug_loc (const DebugInfo* di,
4769                              __attribute__((noreturn)) void (*barf)( const HChar* ),
4770                              DiSlice escn_debug_loc)
4771 {
4772 #if 0
4773    /* This doesn't work properly because it assumes all entries are
4774       packed end to end, with no holes.  But that doesn't always
4775       appear to be the case, so it loses sync.  And the D3 spec
4776       doesn't appear to require a no-hole situation either. */
4777    /* Display .debug_loc */
4778    Addr  dl_base;
4779    UWord dl_offset;
4780    Cursor loc; /* for showing .debug_loc */
4781    Bool td3 = di->trace_symtab;
4782
4783    TRACE_SYMTAB("\n");
4784    TRACE_SYMTAB("\n------ The contents of .debug_loc ------\n");
4785    TRACE_SYMTAB("    Offset   Begin    End      Expression\n");
4786    if (ML_(sli_is_valid)(escn_debug_loc)) {
4787       init_Cursor( &loc, escn_debug_loc, 0, barf,
4788                    "Overrun whilst reading .debug_loc section(1)" );
4789       dl_base = 0;
4790       dl_offset = 0;
4791       while (True) {
4792          UWord  w1, w2;
4793          UWord  len;
4794          if (is_at_end_Cursor( &loc ))
4795             break;
4796
4797          /* Read a (host-)word pair.  This is something of a hack since
4798             the word size to read is really dictated by the ELF file;
4799             however, we assume we're reading a file with the same
4800             word-sizeness as the host.  Reasonably enough. */
4801          w1 = get_UWord( &loc );
4802          w2 = get_UWord( &loc );
4803
4804          if (w1 == 0 && w2 == 0) {
4805             /* end of list.  reset 'base' */
4806             TRACE_D3("    %08lx <End of list>\n", dl_offset);
4807             dl_base = 0;
4808             dl_offset = get_position_of_Cursor( &loc );
4809             continue;
4810          }
4811
4812          if (w1 == -1UL) {
4813             /* new value for 'base' */
4814             TRACE_D3("    %08lx %16lx %08lx (base address)\n",
4815                      dl_offset, w1, w2);
4816             dl_base = w2;
4817             continue;
4818          }
4819
4820          /* else a location expression follows */
4821          TRACE_D3("    %08lx %08lx %08lx ",
4822                   dl_offset, w1 + dl_base, w2 + dl_base);
4823          len = (UWord)get_UShort( &loc );
4824          while (len > 0) {
4825             UChar byte = get_UChar( &loc );
4826             TRACE_D3("%02x", (UInt)byte);
4827             len--;
4828          }
4829          TRACE_SYMTAB("\n");
4830       }
4831    }
4832 #endif
4833 }
4834
4835 static void trace_debug_ranges (const DebugInfo* di,
4836                                 __attribute__((noreturn)) void (*barf)( const HChar* ),
4837                                 DiSlice escn_debug_ranges)
4838 {
4839    Cursor ranges; /* for showing .debug_ranges */
4840    Addr  dr_base;
4841    UWord dr_offset;
4842    Bool td3 = di->trace_symtab;
4843
4844    /* Display .debug_ranges */
4845    TRACE_SYMTAB("\n");
4846    TRACE_SYMTAB("\n------ The contents of .debug_ranges ------\n");
4847    TRACE_SYMTAB("    Offset   Begin    End\n");
4848    if (ML_(sli_is_valid)(escn_debug_ranges)) {
4849       init_Cursor( &ranges, escn_debug_ranges, 0, barf,
4850                    "Overrun whilst reading .debug_ranges section(1)" );
4851       dr_base = 0;
4852       dr_offset = 0;
4853       while (True) {
4854          UWord  w1, w2;
4855
4856          if (is_at_end_Cursor( &ranges ))
4857             break;
4858
4859          /* Read a (host-)word pair.  This is something of a hack since
4860             the word size to read is really dictated by the ELF file;
4861             however, we assume we're reading a file with the same
4862             word-sizeness as the host.  Reasonably enough. */
4863          w1 = get_UWord( &ranges );
4864          w2 = get_UWord( &ranges );
4865
4866          if (w1 == 0 && w2 == 0) {
4867             /* end of list.  reset 'base' */
4868             TRACE_D3("    %08lx <End of list>\n", dr_offset);
4869             dr_base = 0;
4870             dr_offset = get_position_of_Cursor( &ranges );
4871             continue;
4872          }
4873
4874          if (w1 == -1UL) {
4875             /* new value for 'base' */
4876             TRACE_D3("    %08lx %16lx %08lx (base address)\n",
4877                      dr_offset, w1, w2);
4878             dr_base = w2;
4879             continue;
4880          }
4881
4882          /* else a range [w1+base, w2+base) is denoted */
4883          TRACE_D3("    %08lx %08lx %08lx\n",
4884                   dr_offset, w1 + dr_base, w2 + dr_base);
4885       }
4886    }
4887 }
4888
4889 static void trace_debug_abbrev (const DebugInfo* di,
4890                                 __attribute__((noreturn)) void (*barf)( const HChar* ),
4891                                 DiSlice escn_debug_abbv)
4892 {
4893    Cursor abbv; /* for showing .debug_abbrev */
4894    Bool td3 = di->trace_symtab;
4895
4896    /* Display .debug_abbrev */
4897    TRACE_SYMTAB("\n");
4898    TRACE_SYMTAB("\n------ The contents of .debug_abbrev ------\n");
4899    if (ML_(sli_is_valid)(escn_debug_abbv)) {
4900       init_Cursor( &abbv, escn_debug_abbv, 0, barf,
4901                    "Overrun whilst reading .debug_abbrev section" );
4902       while (True) {
4903          if (is_at_end_Cursor( &abbv ))
4904             break;
4905          /* Read one abbreviation table */
4906          TRACE_D3("  Number TAG\n");
4907          while (True) {
4908             ULong atag;
4909             UInt  has_children;
4910             ULong acode = get_ULEB128( &abbv );
4911             if (acode == 0) break; /* end of the table */
4912             atag = get_ULEB128( &abbv );
4913             has_children = get_UChar( &abbv );
4914             TRACE_D3("   %llu      %s    [%s]\n",
4915                      acode, ML_(pp_DW_TAG)(atag),
4916                             ML_(pp_DW_children)(has_children));
4917             while (True) {
4918                ULong at_name = get_ULEB128( &abbv );
4919                ULong at_form = get_ULEB128( &abbv );
4920                if (at_form == DW_FORM_implicit_const) {
4921                   /* Long at_val = */ get_SLEB128 ( &abbv );
4922                }
4923                if (at_name == 0 && at_form == 0) break;
4924                TRACE_D3("    %-18s %s\n",
4925                         ML_(pp_DW_AT)(at_name), ML_(pp_DW_FORM)(at_form));
4926             }
4927          }
4928       }
4929    }
4930 }
4931
4932 static
4933 void new_dwarf3_reader_wrk (
4934    DebugInfo* di,
4935    __attribute__((noreturn)) void (*barf)( const HChar* ),
4936    DiSlice escn_debug_info,      DiSlice escn_debug_types,
4937    DiSlice escn_debug_abbv,      DiSlice escn_debug_line,
4938    DiSlice escn_debug_str,       DiSlice escn_debug_ranges,
4939    DiSlice escn_debug_rnglists,  DiSlice escn_debug_loclists,
4940    DiSlice escn_debug_loc,       DiSlice escn_debug_info_alt,
4941    DiSlice escn_debug_abbv_alt,  DiSlice escn_debug_line_alt,
4942    DiSlice escn_debug_str_alt,   DiSlice escn_debug_line_str
4943 )
4944 {
4945    XArray* /* of TyEnt */     tyents = NULL;
4946    XArray* /* of TyEnt */     tyents_to_keep = NULL;
4947    XArray* /* of GExpr* */    gexprs = NULL;
4948    XArray* /* of TempVar* */  tempvars = NULL;
4949    WordFM* /* of (XArray* of AddrRange, void) */ rangestree = NULL;
4950    TyEntIndexCache* tyents_cache = NULL;
4951    TyEntIndexCache* tyents_to_keep_cache = NULL;
4952    TempVar *varp, *varp2;
4953    GExpr* gexpr;
4954    Cursor info; /* primary cursor for parsing .debug_info */
4955    D3TypeParser typarser;
4956    D3VarParser varparser;
4957    D3InlParser inlparser;
4958    Word  i, j, n;
4959    Bool td3 = di->trace_symtab;
4960    XArray* /* of TempVar* */ dioff_lookup_tab;
4961    Int pass;
4962    VgHashTable *signature_types = NULL;
4963
4964    /* Display/trace various information, if requested. */
4965    if (TD3) {
4966       trace_debug_loc    (di, barf, escn_debug_loc);
4967       trace_debug_ranges (di, barf, escn_debug_ranges);
4968       trace_debug_abbrev (di, barf, escn_debug_abbv);
4969       TRACE_SYMTAB("\n");
4970    }
4971
4972    /* Zero out all parsers. Parsers will really be initialised
4973       according to VG_(clo_read_*_info). */
4974    VG_(memset)( &inlparser, 0, sizeof(inlparser) );
4975
4976    if (VG_(clo_read_var_info)) {
4977       /* We'll park the harvested type information in here.  Also create
4978          a fake "void" entry with offset D3_FAKEVOID_CUOFF, so we always
4979          have at least one type entry to refer to.  D3_FAKEVOID_CUOFF is
4980          huge and presumably will not occur in any valid DWARF3 file --
4981          it would need to have a .debug_info section 4GB long for that to
4982          happen.  These type entries end up in the DebugInfo. */
4983       tyents = VG_(newXA)( ML_(dinfo_zalloc),
4984                            "di.readdwarf3.ndrw.1 (TyEnt temp array)",
4985                            ML_(dinfo_free), sizeof(TyEnt) );
4986       { TyEnt tyent;
4987         VG_(memset)(&tyent, 0, sizeof(tyent));
4988         tyent.tag   = Te_TyVoid;
4989         tyent.cuOff = D3_FAKEVOID_CUOFF;
4990         tyent.Te.TyVoid.isFake = True;
4991         VG_(addToXA)( tyents, &tyent );
4992       }
4993       { TyEnt tyent;
4994         VG_(memset)(&tyent, 0, sizeof(tyent));
4995         tyent.tag   = Te_UNKNOWN;
4996         tyent.cuOff = D3_INVALID_CUOFF;
4997         VG_(addToXA)( tyents, &tyent );
4998       }
4999
5000       /* This is a tree used to unique-ify the range lists that are
5001          manufactured by parse_var_DIE.  References to the keys in the
5002          tree wind up in .rngMany fields in TempVars.  We'll need to
5003          delete this tree, and the XArrays attached to it, at the end of
5004          this function. */
5005       rangestree = VG_(newFM)( ML_(dinfo_zalloc),
5006                                "di.readdwarf3.ndrw.2 (rangestree)",
5007                                ML_(dinfo_free),
5008                                (Word(*)(UWord,UWord))cmp__XArrays_of_AddrRange );
5009
5010       /* List of variables we're accumulating.  These don't end up in the
5011          DebugInfo; instead their contents are handed to ML_(addVar) and
5012          the list elements are then deleted. */
5013       tempvars = VG_(newXA)( ML_(dinfo_zalloc),
5014                              "di.readdwarf3.ndrw.3 (TempVar*s array)",
5015                              ML_(dinfo_free),
5016                              sizeof(TempVar*) );
5017
5018       /* List of GExprs we're accumulating.  These wind up in the
5019          DebugInfo. */
5020       gexprs = VG_(newXA)( ML_(dinfo_zalloc), "di.readdwarf3.ndrw.4",
5021                            ML_(dinfo_free), sizeof(GExpr*) );
5022
5023       /* We need a D3TypeParser to keep track of partially constructed
5024          types.  It'll be discarded as soon as we've completed the CU,
5025          since the resulting information is tipped in to 'tyents' as it
5026          is generated. */
5027       type_parser_init(&typarser);
5028
5029       var_parser_init(&varparser);
5030
5031       signature_types = VG_(HT_construct) ("signature_types");
5032    }
5033
5034    /* Do an initial pass to scan the .debug_types section, if any, and
5035       fill in the signatured types hash table.  This lets us handle
5036       mapping from a type signature to a (cooked) DIE offset directly
5037       in get_Form_contents.  */
5038    if (VG_(clo_read_var_info) && ML_(sli_is_valid)(escn_debug_types)) {
5039       init_Cursor( &info, escn_debug_types, 0, barf,
5040                    "Overrun whilst reading .debug_types section" );
5041       TRACE_D3("\n------ Collecting signatures from "
5042                ".debug_types section ------\n");
5043
5044       while (True) {
5045          UWord   cu_start_offset, cu_offset_now;
5046          CUConst cc;
5047
5048          cu_start_offset = get_position_of_Cursor( &info );
5049          TRACE_D3("\n");
5050          TRACE_D3("  Compilation Unit @ offset 0x%lx:\n", cu_start_offset);
5051          /* parse_CU_header initialises the CU's abbv hash table.  */
5052          parse_CU_Header( &cc, td3, &info, escn_debug_abbv, True, False );
5053
5054          /* Needed by cook_die.  */
5055          cc.types_cuOff_bias = escn_debug_info.szB;
5056
5057          record_signatured_type( signature_types, cc.type_signature,
5058                                  cook_die( &cc, cc.type_offset ));
5059
5060          /* Until proven otherwise we assume we don't need the icc9
5061             workaround in this case; see the DIE-reading loop below
5062             for details.  */
5063          cu_offset_now = (cu_start_offset + cc.unit_length
5064                           + (cc.is_dw64 ? 12 : 4));
5065
5066          clear_CUConst ( &cc);
5067
5068          if (cu_offset_now >= escn_debug_types.szB) {
5069             break;
5070          }
5071
5072          set_position_of_Cursor ( &info, cu_offset_now );
5073       }
5074    }
5075
5076    /* Perform three DIE-reading passes.  The first pass reads DIEs from
5077       alternate .debug_info (if any), the second pass reads DIEs from
5078       .debug_info, and the third pass reads DIEs from .debug_types.
5079       Moving the body of this loop into a separate function would
5080       require a large number of arguments to be passed in, so it is
5081       kept inline instead.  */
5082    for (pass = 0; pass < 3; ++pass) {
5083       ULong section_size;
5084
5085       if (pass == 0) {
5086          if (!ML_(sli_is_valid)(escn_debug_info_alt))
5087             continue;
5088          /* Now loop over the Compilation Units listed in the alternate
5089             .debug_info section (see D3SPEC sec 7.5) paras 1 and 2.
5090             Each compilation unit contains a Compilation Unit Header
5091             followed by precisely one DW_TAG_compile_unit or
5092             DW_TAG_partial_unit DIE. */
5093          init_Cursor( &info, escn_debug_info_alt, 0, barf,
5094                       "Overrun whilst reading alternate .debug_info section" );
5095          section_size = escn_debug_info_alt.szB;
5096
5097          TRACE_D3("\n------ Parsing alternate .debug_info section ------\n");
5098       } else if (pass == 1) {
5099          /* Now loop over the Compilation Units listed in the .debug_info
5100             section (see D3SPEC sec 7.5) paras 1 and 2.  Each compilation
5101             unit contains a Compilation Unit Header followed by precisely
5102             one DW_TAG_compile_unit or DW_TAG_partial_unit DIE. */
5103          init_Cursor( &info, escn_debug_info, 0, barf,
5104                       "Overrun whilst reading .debug_info section" );
5105          section_size = escn_debug_info.szB;
5106
5107          TRACE_D3("\n------ Parsing .debug_info section ------\n");
5108       } else {
5109          if (!ML_(sli_is_valid)(escn_debug_types))
5110             continue;
5111          if (!VG_(clo_read_var_info))
5112             continue; // Types not needed when only reading inline info.
5113          init_Cursor( &info, escn_debug_types, 0, barf,
5114                       "Overrun whilst reading .debug_types section" );
5115          section_size = escn_debug_types.szB;
5116
5117          TRACE_D3("\n------ Parsing .debug_types section ------\n");
5118       }
5119
5120       while (True) {
5121          ULong   cu_start_offset, cu_offset_now;
5122          CUConst cc;
5123          /* It may be that the stated size of this CU is larger than the
5124             amount of stuff actually in it.  icc9 seems to generate CUs
5125             thusly.  We use these variables to figure out if this is
5126             indeed the case, and if so how many bytes we need to skip to
5127             get to the start of the next CU.  Not skipping those bytes
5128             causes us to misidentify the start of the next CU, and it all
5129             goes badly wrong after that (not surprisingly). */
5130          UWord cu_size_including_IniLen, cu_amount_used;
5131
5132          /* It seems icc9 finishes the DIE info before debug_info_sz
5133             bytes have been used up.  So be flexible, and declare the
5134             sequence complete if there is not enough remaining bytes to
5135             hold even the smallest conceivable CU header.  (11 bytes I
5136             reckon). */
5137          /* JRS 23Jan09: I suspect this is no longer necessary now that
5138             the code below contains a 'while (cu_amount_used <
5139             cu_size_including_IniLen ...'  style loop, which skips over
5140             any leftover bytes at the end of a CU in the case where the
5141             CU's stated size is larger than its actual size (as
5142             determined by reading all its DIEs).  However, for prudence,
5143             I'll leave the following test in place.  I can't see that a
5144             CU header can be smaller than 11 bytes, so I don't think
5145             there's any harm possible through the test -- it just adds
5146             robustness. */
5147          Word avail = get_remaining_length_Cursor( &info );
5148          if (avail < 11) {
5149             if (avail > 0)
5150                TRACE_D3("new_dwarf3_reader_wrk: warning: "
5151                         "%ld unused bytes after end of DIEs\n", avail);
5152             break;
5153          }
5154
5155          if (VG_(clo_read_var_info)) {
5156             /* Check the varparser's stack is in a sane state. */
5157             vg_assert(varparser.sp == -1);
5158             /* Check the typarser's stack is in a sane state. */
5159             vg_assert(typarser.sp == -1);
5160          }
5161
5162          cu_start_offset = get_position_of_Cursor( &info );
5163          TRACE_D3("\n");
5164          TRACE_D3("  Compilation Unit @ offset 0x%llx:\n", cu_start_offset);
5165          /* parse_CU_header initialises the CU's hashtable of abbvs ht_abbvs */
5166          if (pass == 0) {
5167             parse_CU_Header( &cc, td3, &info, escn_debug_abbv_alt,
5168                              False, True );
5169          } else {
5170             parse_CU_Header( &cc, td3, &info, escn_debug_abbv,
5171                              pass == 2, False );
5172          }
5173          cc.escn_debug_str      = pass == 0 ? escn_debug_str_alt
5174                                             : escn_debug_str;
5175          cc.escn_debug_ranges   = escn_debug_ranges;
5176          cc.escn_debug_rnglists = escn_debug_rnglists;
5177          cc.escn_debug_loclists = escn_debug_loclists;
5178          cc.escn_debug_loc      = escn_debug_loc;
5179          cc.escn_debug_line     = pass == 0 ? escn_debug_line_alt
5180                                             : escn_debug_line;
5181          cc.escn_debug_info     = pass == 0 ? escn_debug_info_alt
5182                                             : escn_debug_info;
5183          cc.escn_debug_types    = escn_debug_types;
5184          cc.escn_debug_info_alt = escn_debug_info_alt;
5185          cc.escn_debug_str_alt  = escn_debug_str_alt;
5186          cc.escn_debug_line_str = escn_debug_line_str;
5187          cc.types_cuOff_bias    = escn_debug_info.szB;
5188          cc.alt_cuOff_bias      = escn_debug_info.szB + escn_debug_types.szB;
5189          cc.cu_start_offset     = cu_start_offset;
5190          cc.di = di;
5191          /* The CU's svma can be deduced by looking at the AT_low_pc
5192             value in the top level TAG_compile_unit, which is the topmost
5193             DIE.  We'll leave it for the 'varparser' to acquire that info
5194             and fill it in -- since it is the only party to want to know
5195             it. */
5196          cc.cu_svma_known = False;
5197          cc.cu_svma       = 0;
5198
5199          if (VG_(clo_read_var_info)) {
5200             cc.signature_types = signature_types;
5201
5202             /* Create a fake outermost-level range covering the entire
5203                address range.  So we always have *something* to catch all
5204                variable declarations. */
5205             varstack_push( &cc, &varparser, td3,
5206                            unitary_range_list(0UL, ~0UL),
5207                            -1, False/*isFunc*/, NULL/*fbGX*/ );
5208
5209             /* And set up the fndn_ix_Table.  When we come across the top
5210                level DIE for this CU (which is what the next call to
5211                read_DIE should process) we will copy all the file names out
5212                of the .debug_line img area and use this table to look up the
5213                copies when we later see filename numbers in DW_TAG_variables
5214                etc. */
5215             vg_assert(!varparser.fndn_ix_Table );
5216             varparser.fndn_ix_Table
5217                = VG_(newXA)( ML_(dinfo_zalloc), "di.readdwarf3.ndrw.5var",
5218                              ML_(dinfo_free),
5219                              sizeof(UInt) );
5220          }
5221
5222          if (VG_(clo_read_inline_info)) {
5223             /* fndn_ix_Table for the inlined call parser */
5224             vg_assert(!inlparser.fndn_ix_Table );
5225             inlparser.fndn_ix_Table
5226                = VG_(newXA)( ML_(dinfo_zalloc), "di.readdwarf3.ndrw.5inl",
5227                              ML_(dinfo_free),
5228                              sizeof(UInt) );
5229          }
5230
5231          /* Now read the one-and-only top-level DIE for this CU. */
5232          vg_assert(!VG_(clo_read_var_info) || varparser.sp == 0);
5233          read_DIE( rangestree,
5234                    tyents, tempvars, gexprs,
5235                    &typarser, &varparser, &inlparser,
5236                    &info, td3, &cc, 0 );
5237
5238          cu_offset_now = get_position_of_Cursor( &info );
5239
5240          if (0) VG_(printf)("Travelled: %llu  size %llu\n",
5241                             cu_offset_now - cc.cu_start_offset,
5242                             cc.unit_length + (cc.is_dw64 ? 12 : 4));
5243
5244          /* How big the CU claims it is .. */
5245          cu_size_including_IniLen = cc.unit_length + (cc.is_dw64 ? 12 : 4);
5246          /* .. vs how big we have found it to be */
5247          cu_amount_used = cu_offset_now - cc.cu_start_offset;
5248
5249          if (1) TRACE_D3("offset now %llu, d-i-size %llu\n",
5250                          cu_offset_now, section_size);
5251          if (cu_offset_now > section_size)
5252             barf("toplevel DIEs beyond end of CU");
5253
5254          /* If the CU is bigger than it claims to be, we've got a serious
5255             problem. */
5256          if (cu_amount_used > cu_size_including_IniLen)
5257             barf("CU's actual size appears to be larger than it claims it is");
5258
5259          /* If the CU is smaller than it claims to be, we need to skip some
5260             bytes.  Loop updates cu_offset_new and cu_amount_used. */
5261          while (cu_amount_used < cu_size_including_IniLen
5262                 && get_remaining_length_Cursor( &info ) > 0) {
5263             if (0) VG_(printf)("SKIP\n");
5264             (void)get_UChar( &info );
5265             cu_offset_now = get_position_of_Cursor( &info );
5266             cu_amount_used = cu_offset_now - cc.cu_start_offset;
5267          }
5268
5269          if (VG_(clo_read_var_info)) {
5270             /* Preen to level -2.  DIEs have level >= 0 so -2 cannot occur
5271                anywhere else at all.  Our fake the-entire-address-space
5272                range is at level -1, so preening to -2 should completely
5273                empty the stack out. */
5274             TRACE_D3("\n");
5275             varstack_preen( &varparser, td3, -2 );
5276             /* Similarly, empty the type stack out. */
5277             typestack_preen( &typarser, td3, -2 );
5278          }
5279
5280          if (VG_(clo_read_var_info)) {
5281             vg_assert(varparser.fndn_ix_Table );
5282             VG_(deleteXA)( varparser.fndn_ix_Table );
5283             varparser.fndn_ix_Table = NULL;
5284          }
5285          if (VG_(clo_read_inline_info)) {
5286             vg_assert(inlparser.fndn_ix_Table );
5287             VG_(deleteXA)( inlparser.fndn_ix_Table );
5288             inlparser.fndn_ix_Table = NULL;
5289          }
5290          clear_CUConst(&cc);
5291
5292          if (cu_offset_now == section_size)
5293             break;
5294          /* else keep going */
5295       }
5296    }
5297
5298
5299    if (VG_(clo_read_var_info)) {
5300       /* From here on we're post-processing the stuff we got
5301          out of the .debug_info section. */
5302       if (TD3) {
5303          TRACE_D3("\n");
5304          ML_(pp_TyEnts)(tyents, "Initial type entity (TyEnt) array");
5305          TRACE_D3("\n");
5306          TRACE_D3("------ Compressing type entries ------\n");
5307       }
5308
5309       tyents_cache = ML_(dinfo_zalloc)( "di.readdwarf3.ndrw.6",
5310                                         sizeof(TyEntIndexCache) );
5311       ML_(TyEntIndexCache__invalidate)( tyents_cache );
5312       dedup_types( td3, tyents, tyents_cache );
5313       if (TD3) {
5314          TRACE_D3("\n");
5315          ML_(pp_TyEnts)(tyents, "After type entity (TyEnt) compression");
5316       }
5317
5318       TRACE_D3("\n");
5319       TRACE_D3("------ Resolving the types of variables ------\n" );
5320       resolve_variable_types( barf, tyents, tyents_cache, tempvars );
5321
5322       /* Copy all the non-INDIR tyents into a new table.  For large
5323          .so's, about 90% of the tyents will by now have been resolved to
5324          INDIRs, and we no longer need them, and so don't need to store
5325          them. */
5326       tyents_to_keep
5327          = VG_(newXA)( ML_(dinfo_zalloc),
5328                        "di.readdwarf3.ndrw.7 (TyEnt to-keep array)",
5329                        ML_(dinfo_free), sizeof(TyEnt) );
5330       n = VG_(sizeXA)( tyents );
5331       for (i = 0; i < n; i++) {
5332          TyEnt* ent = VG_(indexXA)( tyents, i );
5333          if (ent->tag != Te_INDIR)
5334             VG_(addToXA)( tyents_to_keep, ent );
5335       }
5336
5337       VG_(deleteXA)( tyents );
5338       tyents = NULL;
5339       ML_(dinfo_free)( tyents_cache );
5340       tyents_cache = NULL;
5341
5342       /* Sort tyents_to_keep so we can lookup in it.  A complete (if
5343          minor) waste of time, since tyents itself is sorted, but
5344          necessary since VG_(lookupXA) refuses to cooperate if we
5345          don't. */
5346       VG_(setCmpFnXA)( tyents_to_keep, (XACmpFn_t) ML_(TyEnt__cmp_by_cuOff_only) );
5347       VG_(sortXA)( tyents_to_keep );
5348
5349       /* Enable cacheing on tyents_to_keep */
5350       tyents_to_keep_cache
5351          = ML_(dinfo_zalloc)( "di.readdwarf3.ndrw.8",
5352                               sizeof(TyEntIndexCache) );
5353       ML_(TyEntIndexCache__invalidate)( tyents_to_keep_cache );
5354
5355       /* And record the tyents in the DebugInfo.  We do this before
5356          starting to hand variables to ML_(addVar), since if ML_(addVar)
5357          wants to do debug printing (of the types of said vars) then it
5358          will need the tyents.*/
5359       vg_assert(!di->admin_tyents);
5360       di->admin_tyents = tyents_to_keep;
5361
5362       /* Bias all the location expressions. */
5363       TRACE_D3("\n");
5364       TRACE_D3("------ Biasing the location expressions ------\n" );
5365
5366       n = VG_(sizeXA)( gexprs );
5367       for (i = 0; i < n; i++) {
5368          gexpr = *(GExpr**)VG_(indexXA)( gexprs, i );
5369          bias_GX( gexpr, di );
5370       }
5371
5372       TRACE_D3("\n");
5373       TRACE_D3("------ Acquired the following variables: ------\n\n");
5374
5375       /* Park (pointers to) all the vars in an XArray, so we can look up
5376          abstract origins quickly.  The array is sorted (hence, looked-up
5377          by) the .dioff fields.  Since the .dioffs should be in strictly
5378          ascending order, there is no need to sort the array after
5379          construction.  The ascendingness is however asserted for. */
5380       dioff_lookup_tab
5381          = VG_(newXA)( ML_(dinfo_zalloc), "di.readdwarf3.ndrw.9",
5382                        ML_(dinfo_free),
5383                        sizeof(TempVar*) );
5384
5385       n = VG_(sizeXA)( tempvars );
5386       Word first_primary_var = 0;
5387       for (first_primary_var = 0;
5388            escn_debug_info_alt.szB/*really?*/ && first_primary_var < n;
5389            first_primary_var++) {
5390          varp = *(TempVar**)VG_(indexXA)( tempvars, first_primary_var );
5391          if (varp->dioff < escn_debug_info.szB + escn_debug_types.szB)
5392             break;
5393       }
5394       for (i = 0; i < n; i++) {
5395          varp = *(TempVar**)VG_(indexXA)( tempvars, (i + first_primary_var) % n );
5396          if (i > first_primary_var) {
5397             varp2 = *(TempVar**)VG_(indexXA)( tempvars,
5398                                               (i + first_primary_var - 1) % n );
5399             /* why should this hold?  Only, I think, because we've
5400                constructed the array by reading .debug_info sequentially,
5401                and so the array .dioff fields should reflect that, and be
5402                strictly ascending. */
5403             vg_assert(varp2->dioff < varp->dioff);
5404          }
5405          VG_(addToXA)( dioff_lookup_tab, &varp );
5406       }
5407       VG_(setCmpFnXA)( dioff_lookup_tab, cmp_TempVar_by_dioff );
5408       VG_(sortXA)( dioff_lookup_tab ); /* POINTLESS; FIXME: rm */
5409
5410       /* Now visit each var.  Collect up as much info as possible for
5411          each var and hand it to ML_(addVar). */
5412       n = VG_(sizeXA)( tempvars );
5413       for (j = 0; j < n; j++) {
5414          TyEnt* ent;
5415          varp = *(TempVar**)VG_(indexXA)( tempvars, j );
5416
5417          /* Possibly show .. */
5418          if (TD3) {
5419             VG_(printf)("<%lx> addVar: level %d: %s :: ",
5420                         varp->dioff,
5421                         varp->level,
5422                         varp->name ? varp->name : "<anon_var>" );
5423             if (varp->typeR) {
5424                ML_(pp_TyEnt_C_ishly)( tyents_to_keep, varp->typeR );
5425             } else {
5426                VG_(printf)("NULL");
5427             }
5428             VG_(printf)("\n  Loc=");
5429             if (varp->gexpr) {
5430                ML_(pp_GX)(varp->gexpr);
5431             } else {
5432                VG_(printf)("NULL");
5433             }
5434             VG_(printf)("\n");
5435             if (varp->fbGX) {
5436                VG_(printf)("  FrB=");
5437                ML_(pp_GX)( varp->fbGX );
5438                VG_(printf)("\n");
5439             } else {
5440                VG_(printf)("  FrB=none\n");
5441             }
5442             VG_(printf)("  declared at: %u %s:%d\n",
5443                         varp->fndn_ix,
5444                         ML_(fndn_ix2filename) (di, varp->fndn_ix),
5445                         varp->fLine );
5446             if (varp->absOri != (UWord)D3_INVALID_CUOFF)
5447                VG_(printf)("  abstract origin: <%lx>\n", varp->absOri);
5448          }
5449
5450          /* Skip variables which have no location.  These must be
5451             abstract instances; they are useless as-is since with no
5452             location they have no specified memory location.  They will
5453             presumably be referred to via the absOri fields of other
5454             variables. */
5455          if (!varp->gexpr) {
5456             TRACE_D3("  SKIP (no location)\n\n");
5457             continue;
5458          }
5459
5460          /* So it has a location, at least.  If it refers to some other
5461             entry through its absOri field, pull in further info through
5462             that. */
5463          if (varp->absOri != (UWord)D3_INVALID_CUOFF) {
5464             Bool found;
5465             Word ixFirst, ixLast;
5466             TempVar key;
5467             TempVar* keyp = &key;
5468             TempVar *varAI;
5469             VG_(memset)(&key, 0, sizeof(key)); /* not necessary */
5470             key.dioff = varp->absOri; /* this is what we want to find */
5471             found = VG_(lookupXA)( dioff_lookup_tab, &keyp,
5472                                    &ixFirst, &ixLast );
5473             if (!found) {
5474                /* barf("DW_AT_abstract_origin can't be resolved"); */
5475                TRACE_D3("  SKIP (DW_AT_abstract_origin can't be resolved)\n\n");
5476                continue;
5477             }
5478             /* If the following fails, there is more than one entry with
5479                the same dioff.  Which can't happen. */
5480             vg_assert(ixFirst == ixLast);
5481             varAI = *(TempVar**)VG_(indexXA)( dioff_lookup_tab, ixFirst );
5482             /* stay sane */
5483             vg_assert(varAI);
5484             vg_assert(varAI->dioff == varp->absOri);
5485
5486             /* Copy what useful info we can. */
5487             if (varAI->typeR && !varp->typeR)
5488                varp->typeR = varAI->typeR;
5489             if (varAI->name && !varp->name)
5490                varp->name = varAI->name;
5491             if (varAI->fndn_ix && !varp->fndn_ix)
5492                varp->fndn_ix = varAI->fndn_ix;
5493             if (varAI->fLine > 0 && varp->fLine == 0)
5494                varp->fLine = varAI->fLine;
5495          }
5496
5497          /* Give it a name if it doesn't have one. */
5498          if (!varp->name)
5499             varp->name = ML_(addStr)( di, "<anon_var>", -1 );
5500
5501          /* So now does it have enough info to be useful? */
5502          /* NOTE: re typeR: this is a hack.  If typeR is Te_UNKNOWN then
5503             the type didn't get resolved.  Really, in that case
5504             something's broken earlier on, and should be fixed, rather
5505             than just skipping the variable. */
5506          ent = ML_(TyEnts__index_by_cuOff)( tyents_to_keep,
5507                                             tyents_to_keep_cache,
5508                                             varp->typeR );
5509          /* The next two assertions should be guaranteed by
5510             our previous call to resolve_variable_types. */
5511          vg_assert(ent);
5512          vg_assert(ML_(TyEnt__is_type)(ent) || ent->tag == Te_UNKNOWN);
5513
5514          if (ent->tag == Te_UNKNOWN) continue;
5515
5516          vg_assert(varp->gexpr);
5517          vg_assert(varp->name);
5518          vg_assert(varp->typeR);
5519          vg_assert(varp->level >= 0);
5520
5521          /* Ok.  So we're going to keep it.  Call ML_(addVar) once for
5522             each address range in which the variable exists. */
5523          TRACE_D3("  ACQUIRE for range(s) ");
5524          { AddrRange  oneRange;
5525            AddrRange* varPcRanges;
5526            Word       nVarPcRanges;
5527            /* Set up to iterate over address ranges, however
5528               represented. */
5529            if (varp->nRanges == 0 || varp->nRanges == 1) {
5530               vg_assert(!varp->rngMany);
5531               if (varp->nRanges == 0) {
5532                  vg_assert(varp->rngOneMin == 0);
5533                  vg_assert(varp->rngOneMax == 0);
5534               }
5535               nVarPcRanges = varp->nRanges;
5536               oneRange.aMin = varp->rngOneMin;
5537               oneRange.aMax = varp->rngOneMax;
5538               varPcRanges = &oneRange;
5539            } else {
5540               vg_assert(varp->rngMany);
5541               vg_assert(varp->rngOneMin == 0);
5542               vg_assert(varp->rngOneMax == 0);
5543               nVarPcRanges = VG_(sizeXA)(varp->rngMany);
5544               vg_assert(nVarPcRanges >= 2);
5545               vg_assert(nVarPcRanges == (Word)varp->nRanges);
5546               varPcRanges = VG_(indexXA)(varp->rngMany, 0);
5547            }
5548            if (varp->level == 0)
5549               vg_assert( nVarPcRanges == 1 );
5550            /* and iterate */
5551            for (i = 0; i < nVarPcRanges; i++) {
5552               Addr pcMin = varPcRanges[i].aMin;
5553               Addr pcMax = varPcRanges[i].aMax;
5554               vg_assert(pcMin <= pcMax);
5555               /* Level 0 is the global address range.  So at level 0 we
5556                  don't want to bias pcMin/pcMax; but at all other levels
5557                  we do since those are derived from svmas in the Dwarf
5558                  we're reading.  Be paranoid ... */
5559               if (varp->level == 0) {
5560                  vg_assert(pcMin == (Addr)0);
5561                  vg_assert(pcMax == ~(Addr)0);
5562               } else {
5563                  /* vg_assert(pcMin > (Addr)0);
5564                     No .. we can legitimately expect to see ranges like
5565                     0x0-0x11D (pre-biasing, of course). */
5566                  vg_assert(pcMax < ~(Addr)0);
5567               }
5568
5569               /* Apply text biasing, for non-global variables. */
5570               if (varp->level > 0) {
5571                  pcMin += di->text_debug_bias;
5572                  pcMax += di->text_debug_bias;
5573               }
5574
5575               if (i > 0 && (i%2) == 0)
5576                  TRACE_D3("\n                       ");
5577               TRACE_D3("[%#lx,%#lx] ", pcMin, pcMax );
5578
5579               ML_(addVar)(
5580                  di, varp->level,
5581                      pcMin, pcMax,
5582                      varp->name,  varp->typeR,
5583                      varp->gexpr, varp->fbGX,
5584                      varp->fndn_ix, varp->fLine, td3
5585               );
5586            }
5587          }
5588
5589          TRACE_D3("\n\n");
5590          /* and move on to the next var */
5591       }
5592
5593       /* Now free all the TempVars */
5594       n = VG_(sizeXA)( tempvars );
5595       for (i = 0; i < n; i++) {
5596          varp = *(TempVar**)VG_(indexXA)( tempvars, i );
5597          ML_(dinfo_free)(varp);
5598       }
5599       VG_(deleteXA)( tempvars );
5600       tempvars = NULL;
5601
5602       /* and the temp lookup table */
5603       VG_(deleteXA)( dioff_lookup_tab );
5604
5605       /* and the ranges tree.  Note that we need to also free the XArrays
5606          which constitute the keys, hence pass VG_(deleteXA) as a
5607          key-finalizer. */
5608       VG_(deleteFM)( rangestree, (void(*)(UWord))VG_(deleteXA), NULL );
5609
5610       /* and the tyents_to_keep cache */
5611       ML_(dinfo_free)( tyents_to_keep_cache );
5612       tyents_to_keep_cache = NULL;
5613
5614       vg_assert( varparser.fndn_ix_Table == NULL );
5615
5616       /* And the signatured type hash.  */
5617       VG_(HT_destruct) ( signature_types, ML_(dinfo_free) );
5618
5619       /* record the GExprs in di so they can be freed later */
5620       vg_assert(!di->admin_gexprs);
5621       di->admin_gexprs = gexprs;
5622    }
5623
5624    // Free up dynamically allocated memory
5625    if (VG_(clo_read_var_info)) {
5626       type_parser_release(&typarser);
5627       var_parser_release(&varparser);
5628    }
5629 }
5630
5631
5632 /*------------------------------------------------------------*/
5633 /*---                                                      ---*/
5634 /*--- The "new" DWARF3 reader -- top level control logic   ---*/
5635 /*---                                                      ---*/
5636 /*------------------------------------------------------------*/
5637
5638 static Bool               d3rd_jmpbuf_valid  = False;
5639 static const HChar*       d3rd_jmpbuf_reason = NULL;
5640 static VG_MINIMAL_JMP_BUF(d3rd_jmpbuf);
5641
5642 static __attribute__((noreturn)) void barf ( const HChar* reason ) {
5643    vg_assert(d3rd_jmpbuf_valid);
5644    d3rd_jmpbuf_reason = reason;
5645    VG_MINIMAL_LONGJMP(d3rd_jmpbuf);
5646    /*NOTREACHED*/
5647    vg_assert(0);
5648 }
5649
5650
5651 void
5652 ML_(new_dwarf3_reader) (
5653    DebugInfo* di,
5654    DiSlice escn_debug_info,      DiSlice escn_debug_types,
5655    DiSlice escn_debug_abbv,      DiSlice escn_debug_line,
5656    DiSlice escn_debug_str,       DiSlice escn_debug_ranges,
5657    DiSlice escn_debug_rnglists,  DiSlice escn_debug_loclists,
5658    DiSlice escn_debug_loc,       DiSlice escn_debug_info_alt,
5659    DiSlice escn_debug_abbv_alt,  DiSlice escn_debug_line_alt,
5660    DiSlice escn_debug_str_alt,   DiSlice escn_debug_line_str
5661 )
5662 {
5663    volatile Int  jumped;
5664    volatile Bool td3 = di->trace_symtab;
5665
5666    /* Run the _wrk function to read the dwarf3.  If it succeeds, it
5667       just returns normally.  If there is any failure, it longjmp's
5668       back here, having first set d3rd_jmpbuf_reason to something
5669       useful. */
5670    vg_assert(d3rd_jmpbuf_valid  == False);
5671    vg_assert(d3rd_jmpbuf_reason == NULL);
5672
5673    d3rd_jmpbuf_valid = True;
5674    jumped = VG_MINIMAL_SETJMP(d3rd_jmpbuf);
5675    if (jumped == 0) {
5676       /* try this ... */
5677       new_dwarf3_reader_wrk( di, barf,
5678                              escn_debug_info,     escn_debug_types,
5679                              escn_debug_abbv,     escn_debug_line,
5680                              escn_debug_str,      escn_debug_ranges,
5681                              escn_debug_rnglists, escn_debug_loclists,
5682                              escn_debug_loc,      escn_debug_info_alt,
5683                              escn_debug_abbv_alt, escn_debug_line_alt,
5684                              escn_debug_str_alt,  escn_debug_line_str );
5685       d3rd_jmpbuf_valid = False;
5686       TRACE_D3("\n------ .debug_info reading was successful ------\n");
5687    } else {
5688       /* It longjmp'd. */
5689       d3rd_jmpbuf_valid = False;
5690       /* Can't longjump without giving some sort of reason. */
5691       vg_assert(d3rd_jmpbuf_reason != NULL);
5692
5693       TRACE_D3("\n------ .debug_info reading failed ------\n");
5694
5695       ML_(symerr)(di, True, d3rd_jmpbuf_reason);
5696    }
5697
5698    d3rd_jmpbuf_valid  = False;
5699    d3rd_jmpbuf_reason = NULL;
5700 }
5701
5702
5703
5704 /* --- Unused code fragments which might be useful one day. --- */
5705
5706 #if 0
5707    /* Read the arange tables */
5708    TRACE_SYMTAB("\n");
5709    TRACE_SYMTAB("\n------ The contents of .debug_arange ------\n");
5710    init_Cursor( &aranges, debug_aranges_img,
5711                 debug_aranges_sz, 0, barf,
5712                 "Overrun whilst reading .debug_aranges section" );
5713    while (True) {
5714       ULong  len, d_i_offset;
5715       Bool   is64;
5716       UShort version;
5717       UChar  asize, segsize;
5718
5719       if (is_at_end_Cursor( &aranges ))
5720          break;
5721       /* Read one arange thingy */
5722       /* initial_length field */
5723       len = get_Initial_Length( &is64, &aranges,
5724                "in .debug_aranges: invalid initial-length field" );
5725       version    = get_UShort( &aranges );
5726       d_i_offset = get_Dwarfish_UWord( &aranges, is64 );
5727       asize      = get_UChar( &aranges );
5728       segsize    = get_UChar( &aranges );
5729       TRACE_D3("  Length:                   %llu\n", len);
5730       TRACE_D3("  Version:                  %d\n", (Int)version);
5731       TRACE_D3("  Offset into .debug_info:  %llx\n", d_i_offset);
5732       TRACE_D3("  Pointer Size:             %d\n", (Int)asize);
5733       TRACE_D3("  Segment Size:             %d\n", (Int)segsize);
5734       TRACE_D3("\n");
5735       TRACE_D3("    Address            Length\n");
5736
5737       while ((get_position_of_Cursor( &aranges ) % (2 * asize)) > 0) {
5738          (void)get_UChar( & aranges );
5739       }
5740       while (True) {
5741          ULong address = get_Dwarfish_UWord( &aranges, asize==8 );
5742          ULong length = get_Dwarfish_UWord( &aranges, asize==8 );
5743          TRACE_D3("    0x%016llx 0x%llx\n", address, length);
5744          if (address == 0 && length == 0) break;
5745       }
5746    }
5747    TRACE_SYMTAB("\n");
5748 #endif
5749
5750 #endif // defined(VGO_linux) || defined(VGO_darwin) || defined(VGO_solaris)
5751
5752 /*--------------------------------------------------------------------*/
5753 /*--- end                                                          ---*/
5754 /*--------------------------------------------------------------------*/