coregrind/m_debuginfo/readdwarf3.c

   1 /* -*- mode: C; c-basic-offset: 3; -*- */
   2
   3 /*--------------------------------------------------------------------*/
   4 /*--- Read DWARF3/4 ".debug_info" sections (DIE trees).            ---*/
   5 /*---                                                 readdwarf3.c ---*/
   6 /*--------------------------------------------------------------------*/
   7
   8 /*
   9    This file is part of Valgrind, a dynamic binary instrumentation
  10    framework.
  11
  12    Copyright (C) 2008-2017 OpenWorks LLP
  13       info@open-works.co.uk
  14
  15    This program is free software; you can redistribute it and/or
  16    modify it under the terms of the GNU General Public License as
  17    published by the Free Software Foundation; either version 2 of the
  18    License, or (at your option) any later version.
  19
  20    This program is distributed in the hope that it will be useful, but
  21    WITHOUT ANY WARRANTY; without even the implied warranty of
  22    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  23    General Public License for more details.
  24
  25    You should have received a copy of the GNU General Public License
  26    along with this program; if not, see <http://www.gnu.org/licenses/>.
  27
  28    The GNU General Public License is contained in the file COPYING.
  29
  30    Neither the names of the U.S. Department of Energy nor the
  31    University of California nor the names of its contributors may be
  32    used to endorse or promote products derived from this software
  33    without prior written permission.
  34 */
  35
  36 #if defined(VGO_linux) || defined(VGO_darwin) || defined(VGO_solaris)
  37
  38 /* REFERENCE (without which this code will not make much sense):
  39
  40    DWARF Debugging Information Format, Version 3,
  41    dated 20 December 2005 (the "D3 spec").
  42
  43    Available at http://www.dwarfstd.org/Dwarf3.pdf.  There's also a
  44    .doc (MS Word) version, but for some reason the section numbers
  45    between the Word and PDF versions differ by 1 in the first digit.
  46    All section references in this code are to the PDF version.
  47
  48    CURRENT HACKS:
  49
  50    DW_TAG_{const,volatile}_type no DW_AT_type is allowed; it is
  51       assumed to mean "const void" or "volatile void" respectively.
  52       GDB appears to interpret them like this, anyway.
  53
  54    In many cases it is important to know the svma of a CU (the "base
  55    address of the CU", as the D3 spec calls it).  There are some
  56    situations in which the spec implies this value is unknown, but the
  57    Dwarf3 produced by gcc-4.1 seems to assume is not unknown but
  58    merely zero when not explicitly stated.  So we too have to make
  59    that assumption.
  60
  61    POTENTIAL BUG?  Spotted 6 Sept 08.  Why doesn't
  62    unitary_range_list() bias the resulting range list in the same way
  63    that its more general cousin, get_range_list(), does?  I don't
  64    know.
  65
  66    TODO, 2008 Feb 17:
  67
  68    get rid of cu_svma_known and document the assumed-zero svma hack.
  69
  70    ML_(sizeOfType): differentiate between zero sized types and types
  71    for which the size is unknown.  Is this important?  I don't know.
  72
  73    DW_TAG_array_types: deal with explicit sizes (currently we compute
  74    the size from the bounds and the element size, although that's
  75    fragile, if the bounds incompletely specified, or completely
  76    absent)
  77
  78    Document reason for difference (by 1) of stack preening depth in
  79    parse_var_DIE vs parse_type_DIE.
  80
  81    Don't hand to ML_(addVars), vars whose locations are entirely in
  82    registers (DW_OP_reg*).  This is merely a space-saving
  83    optimisation, as ML_(evaluate_Dwarf3_Expr) should handle these
  84    expressions correctly, by failing to evaluate them and hence
  85    effectively ignoring the variable with which they are associated.
  86
  87    Deal with DW_TAG_array_types which have element size != stride
  88
  89    In some cases, the info for a variable is split between two
  90    different DIEs (generally a declarer and a definer).  We punt on
  91    these.  Could do better here.
  92
  93    The 'data_bias' argument passed to the expression evaluator
  94    (ML_(evaluate_Dwarf3_Expr)) should really be changed to a
  95    MaybeUWord, to make it clear when we do vs don't know what it is
  96    for the evaluation of an expression.  At the moment zero is passed
  97    for this parameter in the don't know case.  That's a bit fragile
  98    and obscure; using a MaybeUWord would be clearer.
  99
 100    POTENTIAL PERFORMANCE IMPROVEMENTS:
 101
 102    Currently, duplicate removal and all other queries for the type
 103    entities array is done using cuOffset-based pointing, which
 104    involves a binary search (VG_(lookupXA)) for each access.  This is
 105    wildly inefficient, although simple.  It would be better to
 106    translate all the cuOffset-based references (iow, all the "R" and
 107    "Rs" fields in the TyEnts in 'tyents') to direct index numbers in
 108    'tyents' right at the start of dedup_types(), and use direct
 109    indexing (VG_(indexXA)) wherever possible after that.
 110
 111    cmp__XArrays_of_AddrRange is also a performance bottleneck.  Move
 112    VG_(indexXA) into pub_tool_xarray.h so it can be inlined at all use
 113    points, and possibly also make an _UNCHECKED version which skips
 114    the range checks in performance-critical situations such as this.
 115
 116    Handle interaction between read_DIE and parse_{var,type}_DIE
 117    better.  Currently read_DIE reads the entire DIE just to find where
 118    the end is (and for debug printing), so that it can later reliably
 119    move the cursor to the end regardless of what parse_{var,type}_DIE
 120    do.  This means many DIEs (most, even?) are read twice.  It would
 121    be smarter to make parse_{var,type}_DIE return a Bool indicating
 122    whether or not they advanced the DIE cursor, and only if they
 123    didn't should read_DIE itself read through the DIE.
 124
 125    ML_(addVar) and add_var_to_arange: quite a lot of DiAddrRanges have
 126    zero variables in their .vars XArray.  Rather than have an XArray
 127    with zero elements (which uses 2 malloc'd blocks), allow the .vars
 128    pointer to be NULL in this case.
 129
 130    More generally, reduce the amount of memory allocated and freed
 131    while reading Dwarf3 type/variable information.  Even modest (20MB)
 132    objects cause this module to allocate and free hundreds of
 133    thousands of small blocks, and ML_(arena_malloc) and its various
 134    groupies always show up at the top of performance profiles. */
 135
 136 #include "pub_core_basics.h"
 137 #include "pub_core_debuginfo.h"
 138 #include "pub_core_libcbase.h"
 139 #include "pub_core_libcassert.h"
 140 #include "pub_core_libcprint.h"
 141 #include "pub_core_libcsetjmp.h"   // setjmp facilities
 142 #include "pub_core_hashtable.h"
 143 #include "pub_core_options.h"
 144 #include "pub_core_tooliface.h"    /* VG_(needs) */
 145 #include "pub_core_xarray.h"
 146 #include "pub_core_wordfm.h"
 147 #include "priv_misc.h"             /* dinfo_zalloc/free */
 148 #include "priv_image.h"
 149 #include "priv_tytypes.h"
 150 #include "priv_d3basics.h"
 151 #include "priv_storage.h"
 152 #include "priv_readdwarf3.h"       /* self */
 153
 154
 155 /*------------------------------------------------------------*/
 156 /*---                                                      ---*/
 157 /*--- Basic machinery for parsing DIEs.                    ---*/
 158 /*---                                                      ---*/
 159 /*------------------------------------------------------------*/
 160
 161 #define TRACE_D3(format, args...) \
 162    if (UNLIKELY(td3)) { VG_(printf)(format, ## args); }
 163 #define TD3 (UNLIKELY(td3))
 164
 165 #define D3_INVALID_CUOFF  ((UWord)(-1UL))
 166 #define D3_FAKEVOID_CUOFF ((UWord)(-2UL))
 167
 168 typedef
 169    struct {
 170       DiSlice sli;      // to which this cursor applies
 171       DiOffT  sli_next; // offset in underlying DiImage; must be >= sli.ioff
 172       void (*barf)( const HChar* ) __attribute__((noreturn));
 173       const HChar* barfstr;
 174    }
 175    Cursor;
 176
 177 static inline Bool is_sane_Cursor ( const Cursor* c ) {
 178    if (!c)                return False;
 179    if (!c->barf)          return False;
 180    if (!c->barfstr)       return False;
 181    if (!ML_(sli_is_valid)(c->sli))    return False;
 182    if (c->sli.ioff == DiOffT_INVALID) return False;
 183    if (c->sli_next < c->sli.ioff)     return False;
 184    return True;
 185 }
 186
 187 // Initialise a cursor from a DiSlice (ELF section, really) so as to
 188 // start reading at offset |sli_initial_offset| from the start of the
 189 // slice.
 190 static void init_Cursor ( /*OUT*/Cursor* c,
 191                           DiSlice sli,
 192                           ULong   sli_initial_offset,
 193                           __attribute__((noreturn)) void (*barf)(const HChar*),
 194                           const HChar* barfstr )
 195 {
 196    vg_assert(c);
 197    VG_(bzero_inline)(c, sizeof(*c));
 198    c->sli              = sli;
 199    c->sli_next         = c->sli.ioff + sli_initial_offset;
 200    c->barf             = barf;
 201    c->barfstr          = barfstr;
 202    vg_assert(is_sane_Cursor(c));
 203 }
 204
 205 static Bool is_at_end_Cursor ( const Cursor* c ) {
 206    vg_assert(is_sane_Cursor(c));
 207    return c->sli_next >= c->sli.ioff + c->sli.szB;
 208 }
 209
 210 static inline ULong get_position_of_Cursor ( const Cursor* c ) {
 211    vg_assert(is_sane_Cursor(c));
 212    return c->sli_next - c->sli.ioff;
 213 }
 214 static inline void set_position_of_Cursor ( Cursor* c, ULong pos ) {
 215    c->sli_next = c->sli.ioff + pos;
 216    vg_assert(is_sane_Cursor(c));
 217 }
 218 static inline void advance_position_of_Cursor ( Cursor* c, ULong delta ) {
 219    c->sli_next += delta;
 220    vg_assert(is_sane_Cursor(c));
 221 }
 222
 223 static /*signed*/Long get_remaining_length_Cursor ( const Cursor* c ) {
 224    vg_assert(is_sane_Cursor(c));
 225    return c->sli.ioff + c->sli.szB - c->sli_next;
 226 }
 227
 228 //static void* get_address_of_Cursor ( Cursor* c ) {
 229 //   vg_assert(is_sane_Cursor(c));
 230 //   return &c->region_start_img[ c->region_next ];
 231 //}
 232
 233 static DiCursor get_DiCursor_from_Cursor ( const Cursor* c ) {
 234    return mk_DiCursor(c->sli.img, c->sli_next);
 235 }
 236
 237 /* FIXME: document assumptions on endianness for
 238    get_UShort/UInt/ULong. */
 239 static inline UChar get_UChar ( Cursor* c ) {
 240    UChar r;
 241    vg_assert(is_sane_Cursor(c));
 242    if (c->sli_next + sizeof(UChar) > c->sli.ioff + c->sli.szB) {
 243       c->barf(c->barfstr);
 244       /*NOTREACHED*/
 245       vg_assert(0);
 246    }
 247    r = ML_(img_get_UChar)(c->sli.img, c->sli_next);
 248    c->sli_next += sizeof(UChar);
 249    return r;
 250 }
 251 static UShort get_UShort ( Cursor* c ) {
 252    UShort r;
 253    vg_assert(is_sane_Cursor(c));
 254    if (c->sli_next + sizeof(UShort) > c->sli.ioff + c->sli.szB) {
 255       c->barf(c->barfstr);
 256       /*NOTREACHED*/
 257       vg_assert(0);
 258    }
 259    r = ML_(img_get_UShort)(c->sli.img, c->sli_next);
 260    c->sli_next += sizeof(UShort);
 261    return r;
 262 }
 263 static UInt get_UInt ( Cursor* c ) {
 264    UInt r;
 265    vg_assert(is_sane_Cursor(c));
 266    if (c->sli_next + sizeof(UInt) > c->sli.ioff + c->sli.szB) {
 267       c->barf(c->barfstr);
 268       /*NOTREACHED*/
 269       vg_assert(0);
 270    }
 271    r = ML_(img_get_UInt)(c->sli.img, c->sli_next);
 272    c->sli_next += sizeof(UInt);
 273    return r;
 274 }
 275 static ULong get_ULong ( Cursor* c ) {
 276    ULong r;
 277    vg_assert(is_sane_Cursor(c));
 278    if (c->sli_next + sizeof(ULong) > c->sli.ioff + c->sli.szB) {
 279       c->barf(c->barfstr);
 280       /*NOTREACHED*/
 281       vg_assert(0);
 282    }
 283    r = ML_(img_get_ULong)(c->sli.img, c->sli_next);
 284    c->sli_next += sizeof(ULong);
 285    return r;
 286 }
 287 static ULong get_ULEB128 ( Cursor* c ) {
 288    ULong result;
 289    Int   shift;
 290    UChar byte;
 291    /* unroll first iteration */
 292    byte = get_UChar( c );
 293    result = (ULong)(byte & 0x7f);
 294    if (LIKELY(!(byte & 0x80))) return result;
 295    shift = 7;
 296    /* end unroll first iteration */
 297    do {
 298       byte = get_UChar( c );
 299       result |= ((ULong)(byte & 0x7f)) << shift;
 300       shift += 7;
 301    } while (byte & 0x80);
 302    return result;
 303 }
 304 static Long get_SLEB128 ( Cursor* c ) {
 305    ULong  result = 0;
 306    Int    shift = 0;
 307    UChar  byte;
 308    do {
 309       byte = get_UChar(c);
 310       result |= ((ULong)(byte & 0x7f)) << shift;
 311       shift += 7;
 312    } while (byte & 0x80);
 313    if (shift < 64 && (byte & 0x40))
 314       result |= -(1ULL << shift);
 315    return result;
 316 }
 317
 318 /* Assume 'c' points to the start of a string.  Return a DiCursor of
 319    whatever it points at, and advance it past the terminating zero.
 320    This makes it safe for the caller to then copy the string with
 321    ML_(addStr), since (w.r.t. image overruns) the process of advancing
 322    past the terminating zero will already have "vetted" the string. */
 323 static DiCursor get_AsciiZ ( Cursor* c ) {
 324    UChar uc;
 325    DiCursor res = get_DiCursor_from_Cursor(c);
 326    do { uc = get_UChar(c); } while (uc != 0);
 327    return res;
 328 }
 329
 330 static ULong peek_ULEB128 ( Cursor* c ) {
 331    DiOffT here = c->sli_next;
 332    ULong  r    = get_ULEB128( c );
 333    c->sli_next = here;
 334    return r;
 335 }
 336 static UChar peek_UChar ( Cursor* c ) {
 337    DiOffT here = c->sli_next;
 338    UChar  r    = get_UChar( c );
 339    c->sli_next = here;
 340    return r;
 341 }
 342
 343 static ULong get_Dwarfish_UWord ( Cursor* c, Bool is_dw64 ) {
 344    return is_dw64 ? get_ULong(c) : (ULong) get_UInt(c);
 345 }
 346
 347 static UWord get_UWord ( Cursor* c ) {
 348    vg_assert(sizeof(UWord) == sizeof(void*));
 349    if (sizeof(UWord) == 4) return get_UInt(c);
 350    if (sizeof(UWord) == 8) return get_ULong(c);
 351    vg_assert(0);
 352 }
 353
 354 /* Read a DWARF3 'Initial Length' field */
 355 static ULong get_Initial_Length ( /*OUT*/Bool* is64,
 356                                   Cursor* c,
 357                                   const HChar* barfMsg )
 358 {
 359    ULong w64;
 360    UInt  w32;
 361    *is64 = False;
 362    w32 = get_UInt( c );
 363    if (w32 >= 0xFFFFFFF0 && w32 < 0xFFFFFFFF) {
 364       c->barf( barfMsg );
 365    }
 366    else if (w32 == 0xFFFFFFFF) {
 367       *is64 = True;
 368       w64   = get_ULong( c );
 369    } else {
 370       *is64 = False;
 371       w64 = (ULong)w32;
 372    }
 373    return w64;
 374 }
 375
 376
 377 /*------------------------------------------------------------*/
 378 /*---                                                      ---*/
 379 /*--- "CUConst" structure                                  ---*/
 380 /*---                                                      ---*/
 381 /*------------------------------------------------------------*/
 382
 383 typedef
 384    struct _name_form {
 385       ULong at_name;  // Dwarf Attribute name
 386       ULong at_form;  // Dwarf Attribute form
 387       UInt  skip_szB; // Nr of bytes skippable from here ...
 388       UInt  next_nf;  // ... to reach this attr/form index in the g_abbv.nf
 389    } name_form;
 390 /* skip_szB and next_nf are used to optimise the skipping of uninteresting DIEs.
 391    Each name_form maintains how many (fixed) nr of bytes can be skipped from
 392    the beginning of this form till the next attr/form to look at.
 393    The next form to look can be:
 394        an 'interesting' attr/form to read while skipping a DIE
 395           (currently, this is only DW_AT_sibling)
 396    or
 397        a variable length form which must be read to be skipped.
 398    For a variable length form, the skip_szB will be equal to VARSZ_FORM.
 399
 400    Note: this technique could also be used to speed up the parsing
 401    of DIEs : for each parser kind, we could have the nr of bytes
 402    to skip to directly reach the interesting form(s) for the parser. */
 403
 404 typedef
 405    struct _g_abbv {
 406       struct _g_abbv *next; // read/write by hash table.
 407       UWord  abbv_code;     // key, read by hash table
 408       ULong  atag;
 409       ULong  has_children;
 410       name_form nf[0];
 411       /* Variable-length array of name/form pairs, terminated
 412          by a 0/0 pair.
 413          The skip_szB/next_nf allows to skip efficiently a DIE
 414          described by this g_abbv; */
 415     } g_abbv;
 416
 417 /* Holds information that is constant through the parsing of a
 418    Compilation Unit.  This is basically plumbed through to
 419    everywhere. */
 420 typedef
 421    struct {
 422       /* Call here if anything goes wrong */
 423       void (*barf)( const HChar* ) __attribute__((noreturn));
 424       /* Is this 64-bit DWARF ? */
 425       Bool   is_dw64;
 426       /* Which DWARF version ?  (2, 3 or 4) */
 427       UShort version;
 428       /* Length of this Compilation Unit, as stated in the
 429          .unit_length :: InitialLength field of the CU Header.
 430          However, this size (as specified by the D3 spec) does not
 431          include the size of the .unit_length field itself, which is
 432          either 4 or 12 bytes (32-bit or 64-bit Dwarf3).  That value
 433          can be obtained through the expression ".is_dw64 ? 12 : 4". */
 434       ULong  unit_length;
 435       /* Offset of start of this unit in .debug_info */
 436       UWord  cu_start_offset;
 437       /* SVMA for this CU.  In the D3 spec, is known as the "base
 438          address of the compilation unit (last para sec 3.1.1).
 439          Needed for (amongst things) interpretation of location-list
 440          values. */
 441       Addr   cu_svma;
 442       Bool   cu_svma_known;
 443
 444       /* The debug_abbreviations table to be used for this Unit */
 445       //UChar* debug_abbv;
 446       /* Upper bound on size thereof (an overestimate, in general) */
 447       //UWord  debug_abbv_maxszB;
 448       /* A bounded area of the image, to be used as the
 449          debug_abbreviations table tobe used for this Unit. */
 450       DiSlice debug_abbv;
 451
 452       /* Image information for various sections. */
 453       DiSlice escn_debug_str;
 454       DiSlice escn_debug_ranges;
 455       DiSlice escn_debug_loc;
 456       DiSlice escn_debug_line;
 457       DiSlice escn_debug_info;
 458       DiSlice escn_debug_types;
 459       DiSlice escn_debug_info_alt;
 460       DiSlice escn_debug_str_alt;
 461       /* How much to add to .debug_types resp. alternate .debug_info offsets
 462          in cook_die*.  */
 463       UWord  types_cuOff_bias;
 464       UWord  alt_cuOff_bias;
 465       /* --- Needed so we can add stuff to the string table. --- */
 466       struct _DebugInfo* di;
 467       /* --- a hash table of g_abbv (i.e. parsed abbreviations) --- */
 468       VgHashTable *ht_abbvs;
 469
 470       /* True if this came from .debug_types; otherwise it came from
 471          .debug_info.  */
 472       Bool is_type_unit;
 473       /* For a unit coming from .debug_types, these hold the TU's type
 474          signature and the uncooked DIE offset of the TU's signatured
 475          type.  For a unit coming from .debug_info, these are unused.  */
 476       ULong type_signature;
 477       ULong type_offset;
 478
 479       /* Signatured type hash; computed once and then shared by all
 480          CUs.  */
 481       VgHashTable *signature_types;
 482
 483       /* True if this came from alternate .debug_info; otherwise
 484          it came from normal .debug_info or .debug_types.  */
 485       Bool is_alt_info;
 486    }
 487    CUConst;
 488
 489
 490 /* Return the cooked value of DIE depending on whether CC represents a
 491    .debug_types unit.  To cook a DIE, we pretend that the .debug_info,
 492    .debug_types and optional alternate .debug_info sections form
 493    a contiguous whole, so that DIEs coming from .debug_types are numbered
 494    starting at the end of .debug_info and DIEs coming from alternate
 495    .debug_info are numbered starting at the end of .debug_types.  */
 496 static UWord cook_die( const CUConst* cc, UWord die )
 497 {
 498    if (cc->is_type_unit)
 499       die += cc->types_cuOff_bias;
 500    else if (cc->is_alt_info)
 501       die += cc->alt_cuOff_bias;
 502    return die;
 503 }
 504
 505 /* Like cook_die, but understand that DIEs coming from a
 506    DW_FORM_ref_sig8 reference are already cooked.  Also, handle
 507    DW_FORM_GNU_ref_alt from within primary .debug_info or .debug_types
 508    as reference to alternate .debug_info.  */
 509 static UWord cook_die_using_form( const CUConst *cc, UWord die, DW_FORM form)
 510 {
 511    if (form == DW_FORM_ref_sig8)
 512       return die;
 513    if (form == DW_FORM_GNU_ref_alt)
 514       return die + cc->alt_cuOff_bias;
 515    return cook_die( cc, die );
 516 }
 517
 518 /* Return the uncooked offset of DIE and set *TYPE_FLAG to true if the DIE
 519    came from the .debug_types section and *ALT_FLAG to true if the DIE
 520    came from alternate .debug_info section.  */
 521 static UWord uncook_die( const CUConst *cc, UWord die, /*OUT*/Bool *type_flag,
 522                          Bool *alt_flag )
 523 {
 524    *alt_flag = False;
 525    *type_flag = False;
 526    /* The use of escn_debug_{info,types}.szB seems safe to me even if
 527       escn_debug_{info,types} are DiSlice_INVALID (meaning the
 528       sections were not found), because DiSlice_INVALID.szB is always
 529       zero.  That said, it seems unlikely we'd ever get here if
 530       .debug_info or .debug_types were missing. */
 531    if (die >= cc->escn_debug_info.szB) {
 532       if (die >= cc->escn_debug_info.szB + cc->escn_debug_types.szB) {
 533          *alt_flag = True;
 534          die -= cc->escn_debug_info.szB + cc->escn_debug_types.szB;
 535       } else {
 536          *type_flag = True;
 537          die -= cc->escn_debug_info.szB;
 538       }
 539    }
 540    return die;
 541 }
 542
 543 /*------------------------------------------------------------*/
 544 /*---                                                      ---*/
 545 /*--- Helper functions for Guarded Expressions             ---*/
 546 /*---                                                      ---*/
 547 /*------------------------------------------------------------*/
 548
 549 /* Parse the location list starting at img-offset 'debug_loc_offset'
 550    in .debug_loc.  Results are biased with 'svma_of_referencing_CU'
 551    and so I believe are correct SVMAs for the object as a whole.  This
 552    function allocates the UChar*, and the caller must deallocate it.
 553    The resulting block is in so-called Guarded-Expression format.
 554
 555    Guarded-Expression format is similar but not identical to the DWARF3
 556    location-list format.  The format of each returned block is:
 557
 558       UChar biasMe;
 559       UChar isEnd;
 560       followed by zero or more of
 561
 562       (Addr aMin;  Addr aMax;  UShort nbytes;  ..bytes..;  UChar isEnd)
 563
 564    '..bytes..' is an standard DWARF3 location expression which is
 565    valid when aMin <= pc <= aMax (possibly after suitable biasing).
 566
 567    The number of bytes in '..bytes..' is nbytes.
 568
 569    The end of the sequence is marked by an isEnd == 1 value.  All
 570    previous isEnd values must be zero.
 571
 572    biasMe is 1 if the aMin/aMax fields need this DebugInfo's
 573    text_bias added before use, and 0 if the GX is this is not
 574    necessary (is ready to go).
 575
 576    Hence the block can be quickly parsed and is self-describing.  Note
 577    that aMax is 1 less than the corresponding value in a DWARF3
 578    location list.  Zero length ranges, with aMax == aMin-1, are not
 579    allowed.
 580 */
 581 /* 2008-sept-12: moved ML_(pp_GX) from here to d3basics.c, where
 582    it more logically belongs. */
 583
 584
 585 /* Apply a text bias to a GX. */
 586 static void bias_GX ( /*MOD*/GExpr* gx, const DebugInfo* di )
 587 {
 588    UShort nbytes;
 589    UChar* p = &gx->payload[0];
 590    UChar* pA;
 591    UChar  uc;
 592    uc = *p++; /*biasMe*/
 593    if (uc == 0)
 594       return;
 595    vg_assert(uc == 1);
 596    p[-1] = 0; /* mark it as done */
 597    while (True) {
 598       uc = *p++;
 599       if (uc == 1)
 600          break; /*isEnd*/
 601       vg_assert(uc == 0);
 602       /* t-bias aMin */
 603       pA = (UChar*)p;
 604       ML_(write_Addr)(pA, ML_(read_Addr)(pA) + di->text_debug_bias);
 605       p += sizeof(Addr);
 606       /* t-bias aMax */
 607       pA = (UChar*)p;
 608       ML_(write_Addr)(pA, ML_(read_Addr)(pA) + di->text_debug_bias);
 609       p += sizeof(Addr);
 610       /* nbytes, and actual expression */
 611       nbytes = ML_(read_UShort)(p); p += sizeof(UShort);
 612       p += nbytes;
 613    }
 614 }
 615
 616 __attribute__((noinline))
 617 static GExpr* make_singleton_GX ( DiCursor block, ULong nbytes )
 618 {
 619    SizeT  bytesReqd;
 620    GExpr* gx;
 621    UChar *p, *pstart;
 622
 623    vg_assert(sizeof(UWord) == sizeof(Addr));
 624    vg_assert(nbytes <= 0xFFFF); /* else we overflow the nbytes field */
 625    bytesReqd
 626       =   sizeof(UChar)  /*biasMe*/    + sizeof(UChar) /*!isEnd*/
 627         + sizeof(UWord)  /*aMin*/      + sizeof(UWord) /*aMax*/
 628         + sizeof(UShort) /*nbytes*/    + (SizeT)nbytes
 629         + sizeof(UChar); /*isEnd*/
 630
 631    gx = ML_(dinfo_zalloc)( "di.readdwarf3.msGX.1",
 632                            sizeof(GExpr) + bytesReqd );
 633
 634    p = pstart = &gx->payload[0];
 635
 636    p = ML_(write_UChar)(p, 0);        /*biasMe*/
 637    p = ML_(write_UChar)(p, 0);        /*!isEnd*/
 638    p = ML_(write_Addr)(p, 0);         /*aMin*/
 639    p = ML_(write_Addr)(p, ~0);        /*aMax*/
 640    p = ML_(write_UShort)(p, nbytes);  /*nbytes*/
 641    ML_(cur_read_get)(p, block, nbytes); p += nbytes;
 642    p = ML_(write_UChar)(p, 1);        /*isEnd*/
 643
 644    vg_assert( (SizeT)(p - pstart) == bytesReqd);
 645    vg_assert( &gx->payload[bytesReqd]
 646               == ((UChar*)gx) + sizeof(GExpr) + bytesReqd );
 647
 648    return gx;
 649 }
 650
 651 __attribute__((noinline))
 652 static GExpr* make_general_GX ( const CUConst* cc,
 653                                 Bool     td3,
 654                                 ULong    debug_loc_offset,
 655                                 Addr     svma_of_referencing_CU )
 656 {
 657    Addr      base;
 658    Cursor    loc;
 659    XArray*   xa; /* XArray of UChar */
 660    GExpr*    gx;
 661    Word      nbytes;
 662
 663    vg_assert(sizeof(UWord) == sizeof(Addr));
 664    if (!ML_(sli_is_valid)(cc->escn_debug_loc) || cc->escn_debug_loc.szB == 0)
 665       cc->barf("make_general_GX: .debug_loc is empty/missing");
 666
 667    init_Cursor( &loc, cc->escn_debug_loc, 0, cc->barf,
 668                 "Overrun whilst reading .debug_loc section(2)" );
 669    set_position_of_Cursor( &loc, debug_loc_offset );
 670
 671    TRACE_D3("make_general_GX (.debug_loc_offset = %llu, ioff = %llu) {\n",
 672             debug_loc_offset, get_DiCursor_from_Cursor(&loc).ioff );
 673
 674    /* Who frees this xa?  It is freed before this fn exits. */
 675    xa = VG_(newXA)( ML_(dinfo_zalloc), "di.readdwarf3.mgGX.1",
 676                     ML_(dinfo_free),
 677                     sizeof(UChar) );
 678
 679    { UChar c = 1; /*biasMe*/ VG_(addBytesToXA)( xa, &c, sizeof(c) ); }
 680
 681    base = 0;
 682    while (True) {
 683       Bool  acquire;
 684       UWord len;
 685       /* Read a (host-)word pair.  This is something of a hack since
 686          the word size to read is really dictated by the ELF file;
 687          however, we assume we're reading a file with the same
 688          word-sizeness as the host.  Reasonably enough. */
 689       UWord w1 = get_UWord( &loc );
 690       UWord w2 = get_UWord( &loc );
 691
 692       TRACE_D3("   %08lx %08lx\n", w1, w2);
 693       if (w1 == 0 && w2 == 0)
 694          break; /* end of list */
 695
 696       if (w1 == -1UL) {
 697          /* new value for 'base' */
 698          base = w2;
 699          continue;
 700       }
 701
 702       /* else a location expression follows */
 703       /* else enumerate [w1+base, w2+base) */
 704       /* w2 is 1 past end of range, as per D3 defn for "DW_AT_high_pc"
 705          (sec 2.17.2) */
 706       if (w1 > w2) {
 707          TRACE_D3("negative range is for .debug_loc expr at "
 708                   "file offset %llu\n",
 709                   debug_loc_offset);
 710          cc->barf( "negative range in .debug_loc section" );
 711       }
 712
 713       /* ignore zero length ranges */
 714       acquire = w1 < w2;
 715       len     = (UWord)get_UShort( &loc );
 716
 717       if (acquire) {
 718          UWord  w;
 719          UShort s;
 720          UChar  c;
 721          c = 0; /* !isEnd*/
 722          VG_(addBytesToXA)( xa, &c, sizeof(c) );
 723          w = w1    + base + svma_of_referencing_CU;
 724          VG_(addBytesToXA)( xa, &w, sizeof(w) );
 725          w = w2 -1 + base + svma_of_referencing_CU;
 726          VG_(addBytesToXA)( xa, &w, sizeof(w) );
 727          s = (UShort)len;
 728          VG_(addBytesToXA)( xa, &s, sizeof(s) );
 729       }
 730
 731       while (len > 0) {
 732          UChar byte = get_UChar( &loc );
 733          TRACE_D3("%02x", (UInt)byte);
 734          if (acquire)
 735             VG_(addBytesToXA)( xa, &byte, 1 );
 736          len--;
 737       }
 738       TRACE_D3("\n");
 739    }
 740
 741    { UChar c = 1; /*isEnd*/ VG_(addBytesToXA)( xa, &c, sizeof(c) ); }
 742
 743    nbytes = VG_(sizeXA)( xa );
 744    vg_assert(nbytes >= 1);
 745
 746    gx = ML_(dinfo_zalloc)( "di.readdwarf3.mgGX.2", sizeof(GExpr) + nbytes );
 747    VG_(memcpy)( &gx->payload[0], (UChar*)VG_(indexXA)(xa,0), nbytes );
 748    vg_assert( &gx->payload[nbytes]
 749               == ((UChar*)gx) + sizeof(GExpr) + nbytes );
 750
 751    VG_(deleteXA)( xa );
 752
 753    TRACE_D3("}\n");
 754
 755    return gx;
 756 }
 757
 758
 759 /*------------------------------------------------------------*/
 760 /*---                                                      ---*/
 761 /*--- Helper functions for range lists and CU headers      ---*/
 762 /*---                                                      ---*/
 763 /*------------------------------------------------------------*/
 764
 765 /* Denotes an address range.  Both aMin and aMax are included in the
 766    range; hence a complete range is (0, ~0) and an empty range is any
 767    (X, X-1) for X > 0.*/
 768 typedef
 769    struct { Addr aMin; Addr aMax; }
 770    AddrRange;
 771
 772
 773 /* Generate an arbitrary structural total ordering on
 774    XArray* of AddrRange. */
 775 static Word cmp__XArrays_of_AddrRange ( const XArray* rngs1,
 776                                         const XArray* rngs2 )
 777 {
 778    Word n1, n2, i;
 779    vg_assert(rngs1 && rngs2);
 780    n1 = VG_(sizeXA)( rngs1 );
 781    n2 = VG_(sizeXA)( rngs2 );
 782    if (n1 < n2) return -1;
 783    if (n1 > n2) return 1;
 784    for (i = 0; i < n1; i++) {
 785       AddrRange* rng1 = (AddrRange*)VG_(indexXA)( rngs1, i );
 786       AddrRange* rng2 = (AddrRange*)VG_(indexXA)( rngs2, i );
 787       if (rng1->aMin < rng2->aMin) return -1;
 788       if (rng1->aMin > rng2->aMin) return 1;
 789       if (rng1->aMax < rng2->aMax) return -1;
 790       if (rng1->aMax > rng2->aMax) return 1;
 791    }
 792    return 0;
 793 }
 794
 795
 796 __attribute__((noinline))
 797 static XArray* /* of AddrRange */ empty_range_list ( void )
 798 {
 799    XArray* xa; /* XArray of AddrRange */
 800    /* Who frees this xa?  varstack_preen() does. */
 801    xa = VG_(newXA)( ML_(dinfo_zalloc), "di.readdwarf3.erl.1",
 802                     ML_(dinfo_free),
 803                     sizeof(AddrRange) );
 804    return xa;
 805 }
 806
 807
 808 __attribute__((noinline))
 809 static XArray* unitary_range_list ( Addr aMin, Addr aMax )
 810 {
 811    XArray*   xa;
 812    AddrRange pair;
 813    vg_assert(aMin <= aMax);
 814    /* Who frees this xa?  varstack_preen() does. */
 815    xa = VG_(newXA)( ML_(dinfo_zalloc),  "di.readdwarf3.url.1",
 816                     ML_(dinfo_free),
 817                     sizeof(AddrRange) );
 818    pair.aMin = aMin;
 819    pair.aMax = aMax;
 820    VG_(addToXA)( xa, &pair );
 821    return xa;
 822 }
 823
 824
 825 /* Enumerate the address ranges starting at img-offset
 826    'debug_ranges_offset' in .debug_ranges.  Results are biased with
 827    'svma_of_referencing_CU' and so I believe are correct SVMAs for the
 828    object as a whole.  This function allocates the XArray, and the
 829    caller must deallocate it. */
 830 __attribute__((noinline))
 831 static XArray* /* of AddrRange */
 832 get_range_list ( const CUConst* cc,
 833                  Bool     td3,
 834                  UWord    debug_ranges_offset,
 835                  Addr     svma_of_referencing_CU )
 836 {
 837    Addr      base;
 838    Cursor    ranges;
 839    XArray*   xa; /* XArray of AddrRange */
 840    AddrRange pair;
 841
 842    if (!ML_(sli_is_valid)(cc->escn_debug_ranges)
 843        || cc->escn_debug_ranges.szB == 0)
 844       cc->barf("get_range_list: .debug_ranges is empty/missing");
 845
 846    init_Cursor( &ranges, cc->escn_debug_ranges, 0, cc->barf,
 847                 "Overrun whilst reading .debug_ranges section(2)" );
 848    set_position_of_Cursor( &ranges, debug_ranges_offset );
 849
 850    /* Who frees this xa?  varstack_preen() does. */
 851    xa = VG_(newXA)( ML_(dinfo_zalloc), "di.readdwarf3.grl.1", ML_(dinfo_free),
 852                     sizeof(AddrRange) );
 853    base = 0;
 854    while (True) {
 855       /* Read a (host-)word pair.  This is something of a hack since
 856          the word size to read is really dictated by the ELF file;
 857          however, we assume we're reading a file with the same
 858          word-sizeness as the host.  Reasonably enough. */
 859       UWord w1 = get_UWord( &ranges );
 860       UWord w2 = get_UWord( &ranges );
 861
 862       if (w1 == 0 && w2 == 0)
 863          break; /* end of list. */
 864
 865       if (w1 == -1UL) {
 866          /* new value for 'base' */
 867          base = w2;
 868          continue;
 869       }
 870
 871       /* else enumerate [w1+base, w2+base) */
 872       /* w2 is 1 past end of range, as per D3 defn for "DW_AT_high_pc"
 873          (sec 2.17.2) */
 874       if (w1 > w2)
 875          cc->barf( "negative range in .debug_ranges section" );
 876       if (w1 < w2) {
 877          pair.aMin = w1     + base + svma_of_referencing_CU;
 878          pair.aMax = w2 - 1 + base + svma_of_referencing_CU;
 879          vg_assert(pair.aMin <= pair.aMax);
 880          VG_(addToXA)( xa, &pair );
 881       }
 882    }
 883    return xa;
 884 }
 885
 886 #define VARSZ_FORM 0xffffffff
 887 static UInt get_Form_szB (const CUConst* cc, DW_FORM form );
 888
 889 /* Initialises the hash table of abbreviations.
 890    We do a single scan of the abbv slice to parse and
 891    build all abbreviations, for the following reasons:
 892      * all or most abbreviations will be needed in any case
 893        (at least for var-info reading).
 894      * re-reading each time an abbreviation causes a lot of calls
 895        to get_ULEB128.
 896      * a CU should not have many abbreviations. */
 897 static void init_ht_abbvs (CUConst* cc,
 898                            Bool td3)
 899 {
 900    Cursor c;
 901    g_abbv *ta; // temporary abbreviation, reallocated if needed.
 902    UInt ta_nf_maxE; // max nr of pairs in ta.nf[], doubled when reallocated.
 903    UInt ta_nf_n;    // nr of pairs in ta->nf that are initialised.
 904    g_abbv *ht_ta; // abbv to insert in hash table.
 905    Int i;
 906
 907    #define SZ_G_ABBV(_nf_szE) (sizeof(g_abbv) + _nf_szE * sizeof(name_form))
 908
 909    ta_nf_maxE = 10; // starting with enough for 9 pairs+terminating pair.
 910    ta = ML_(dinfo_zalloc) ("di.readdwarf3.ht_ta_nf", SZ_G_ABBV(ta_nf_maxE));
 911    cc->ht_abbvs = VG_(HT_construct) ("di.readdwarf3.ht_abbvs");
 912
 913    init_Cursor( &c, cc->debug_abbv, 0, cc->barf,
 914                "Overrun whilst parsing .debug_abbrev section(2)" );
 915    while (True) {
 916       ta->abbv_code = get_ULEB128( &c );
 917       if (ta->abbv_code == 0) break; /* end of the table */
 918
 919       ta->atag = get_ULEB128( &c );
 920       ta->has_children = get_UChar( &c );
 921       ta_nf_n = 0;
 922       while (True) {
 923          if (ta_nf_n >= ta_nf_maxE) {
 924             g_abbv *old_ta = ta;
 925             ta = ML_(dinfo_zalloc) ("di.readdwarf3.ht_ta_nf",
 926                                     SZ_G_ABBV(2 * ta_nf_maxE));
 927             ta_nf_maxE = 2 * ta_nf_maxE;
 928             VG_(memcpy) (ta, old_ta, SZ_G_ABBV(ta_nf_n));
 929             ML_(dinfo_free) (old_ta);
 930          }
 931          ta->nf[ta_nf_n].at_name = get_ULEB128( &c );
 932          ta->nf[ta_nf_n].at_form = get_ULEB128( &c );
 933          if (ta->nf[ta_nf_n].at_name == 0 && ta->nf[ta_nf_n].at_form == 0) {
 934             ta_nf_n++;
 935             break;
 936          }
 937         ta_nf_n++;
 938       }
 939
 940       // Initialises the skip_szB/next_nf elements : an element at position
 941       // i must contain the sum of its own size + the sizes of all elements
 942       // following i till either the next variable size element, the next
 943       // sibling element or the end of the DIE.
 944       ta->nf[ta_nf_n - 1].skip_szB = 0;
 945       ta->nf[ta_nf_n - 1].next_nf = 0;
 946       for (i = ta_nf_n - 2; i >= 0; i--) {
 947          const UInt form_szB = get_Form_szB (cc, (DW_FORM)ta->nf[i].at_form);
 948
 949          if (ta->nf[i+1].at_name == DW_AT_sibling
 950              || ta->nf[i+1].skip_szB == VARSZ_FORM) {
 951             ta->nf[i].skip_szB = form_szB;
 952             ta->nf[i].next_nf  = i+1;
 953          } else if (form_szB == VARSZ_FORM) {
 954             ta->nf[i].skip_szB = form_szB;
 955             ta->nf[i].next_nf  = i+1;
 956          } else {
 957             ta->nf[i].skip_szB = ta->nf[i+1].skip_szB + form_szB;
 958             ta->nf[i].next_nf  = ta->nf[i+1].next_nf;
 959          }
 960       }
 961
 962       ht_ta = ML_(dinfo_zalloc) ("di.readdwarf3.ht_ta", SZ_G_ABBV(ta_nf_n));
 963       VG_(memcpy) (ht_ta, ta, SZ_G_ABBV(ta_nf_n));
 964       VG_(HT_add_node) ( cc->ht_abbvs, ht_ta );
 965       if (TD3) {
 966          TRACE_D3("  Adding abbv_code %lu TAG  %s [%s] nf %u ",
 967                   ht_ta->abbv_code, ML_(pp_DW_TAG)(ht_ta->atag),
 968                   ML_(pp_DW_children)(ht_ta->has_children),
 969                   ta_nf_n);
 970          TRACE_D3("  ");
 971          for (i = 0; i < ta_nf_n; i++)
 972             TRACE_D3("[%u,%u] ", ta->nf[i].skip_szB, ta->nf[i].next_nf);
 973          TRACE_D3("\n");
 974       }
 975    }
 976
 977    ML_(dinfo_free) (ta);
 978    #undef SZ_G_ABBV
 979 }
 980
 981 static g_abbv* get_abbv (const CUConst* cc, ULong abbv_code)
 982 {
 983    g_abbv *abbv;
 984
 985    abbv = VG_(HT_lookup) (cc->ht_abbvs, abbv_code);
 986    if (!abbv)
 987       cc->barf ("abbv_code not found in ht_abbvs table");
 988    return abbv;
 989 }
 990
 991 /* Free the memory allocated in CUConst. */
 992 static void clear_CUConst (CUConst* cc)
 993 {
 994    VG_(HT_destruct) ( cc->ht_abbvs, ML_(dinfo_free));
 995    cc->ht_abbvs = NULL;
 996 }
 997
 998 /* Parse the Compilation Unit header indicated at 'c' and
 999    initialise 'cc' accordingly. */
1000 static __attribute__((noinline))
1001 void parse_CU_Header ( /*OUT*/CUConst* cc,
1002                        Bool td3,
1003                        Cursor* c,
1004                        DiSlice escn_debug_abbv,
1005                        Bool type_unit,
1006                        Bool alt_info )
1007 {
1008    UChar  address_size;
1009    ULong  debug_abbrev_offset;
1010
1011    VG_(memset)(cc, 0, sizeof(*cc));
1012    vg_assert(c && c->barf);
1013    cc->barf = c->barf;
1014
1015    /* initial_length field */
1016    cc->unit_length
1017       = get_Initial_Length( &cc->is_dw64, c,
1018            "parse_CU_Header: invalid initial-length field" );
1019
1020    TRACE_D3("   Length:        %llu\n", cc->unit_length );
1021
1022    /* version */
1023    cc->version = get_UShort( c );
1024    if (cc->version != 2 && cc->version != 3 && cc->version != 4)
1025       cc->barf( "parse_CU_Header: is neither DWARF2 nor DWARF3 nor DWARF4" );
1026    TRACE_D3("   Version:       %d\n", (Int)cc->version );
1027
1028    /* debug_abbrev_offset */
1029    debug_abbrev_offset = get_Dwarfish_UWord( c, cc->is_dw64 );
1030    if (debug_abbrev_offset >= escn_debug_abbv.szB)
1031       cc->barf( "parse_CU_Header: invalid debug_abbrev_offset" );
1032    TRACE_D3("   Abbrev Offset: %llu\n", debug_abbrev_offset );
1033
1034    /* address size.  If this isn't equal to the host word size, just
1035       give up.  This makes it safe to assume elsewhere that
1036       DW_FORM_addr and DW_FORM_ref_addr can be treated as a host
1037       word. */
1038    address_size = get_UChar( c );
1039    if (address_size != sizeof(void*))
1040       cc->barf( "parse_CU_Header: invalid address_size" );
1041    TRACE_D3("   Pointer Size:  %d\n", (Int)address_size );
1042
1043    cc->is_type_unit = type_unit;
1044    cc->is_alt_info = alt_info;
1045
1046    if (type_unit) {
1047       cc->type_signature = get_ULong( c );
1048       cc->type_offset = get_Dwarfish_UWord( c, cc->is_dw64 );
1049    }
1050
1051    /* Set up cc->debug_abbv to point to the relevant table for this
1052       CU.  Set its .szB so that at least we can't read off the end of
1053       the debug_abbrev section -- potentially (and quite likely) too
1054       big, if this isn't the last table in the section, but at least
1055       it's safe.
1056
1057       This amounts to taking debug_abbv_escn and moving the start
1058       position along by debug_abbrev_offset bytes, hence forming a
1059       smaller DiSlice which has the same end point.  Since we checked
1060       just above that debug_abbrev_offset is less than the size of
1061       debug_abbv_escn, this should leave us with a nonempty slice. */
1062    vg_assert(debug_abbrev_offset < escn_debug_abbv.szB);
1063    cc->debug_abbv      = escn_debug_abbv;
1064    cc->debug_abbv.ioff += debug_abbrev_offset;
1065    cc->debug_abbv.szB  -= debug_abbrev_offset;
1066
1067    init_ht_abbvs(cc, td3);
1068 }
1069
1070 /* This represents a single signatured type.  It maps a type signature
1071    (a ULong) to a cooked DIE offset.  Objects of this type are stored
1072    in the type signature hash table.  */
1073 typedef
1074    struct D3SignatureType {
1075       struct D3SignatureType *next;
1076       UWord data;
1077       ULong type_signature;
1078       UWord die;
1079    }
1080    D3SignatureType;
1081
1082 /* Record a signatured type in the hash table.  */
1083 static void record_signatured_type ( VgHashTable *tab,
1084                                      ULong type_signature,
1085                                      UWord die )
1086 {
1087    D3SignatureType *dstype = ML_(dinfo_zalloc) ( "di.readdwarf3.sigtype",
1088                                                  sizeof(D3SignatureType) );
1089    dstype->data = (UWord) type_signature;
1090    dstype->type_signature = type_signature;
1091    dstype->die = die;
1092    VG_(HT_add_node) ( tab, dstype );
1093 }
1094
1095 /* Given a type signature hash table and a type signature, return the
1096    cooked DIE offset of the type.  If the type cannot be found, call
1097    BARF.  */
1098 static UWord lookup_signatured_type ( const VgHashTable *tab,
1099                                       ULong type_signature,
1100                                       void (*barf)( const HChar* ) __attribute__((noreturn)) )
1101 {
1102    D3SignatureType *dstype = VG_(HT_lookup) ( tab, (UWord) type_signature );
1103    /* This may be unwarranted chumminess with the hash table
1104       implementation.  */
1105    while ( dstype != NULL && dstype->type_signature != type_signature)
1106       dstype = dstype->next;
1107    if (dstype == NULL) {
1108       barf("lookup_signatured_type: could not find signatured type");
1109       /*NOTREACHED*/
1110       vg_assert(0);
1111    }
1112    return dstype->die;
1113 }
1114
1115
1116 /* Represents Form data.  If szB is 1/2/4/8 then the result is in the
1117    lowest 1/2/4/8 bytes of u.val.  If szB is zero or negative then the
1118    result is an image section beginning at u.cur and with size -szB.
1119    No other szB values are allowed. */
1120 typedef
1121    struct {
1122       Long szB; // 1, 2, 4, 8 or non-positive values only.
1123       union { ULong val; DiCursor cur; } u;
1124    }
1125    FormContents;
1126
1127 /* From 'c', get the Form data into 'cts'.  Either it gets a 1/2/4/8
1128    byte scalar value, or (a reference to) zero or more bytes starting
1129    at a DiCursor.*/
1130 static
1131 void get_Form_contents ( /*OUT*/FormContents* cts,
1132                          const CUConst* cc, Cursor* c,
1133                          Bool td3, DW_FORM form )
1134 {
1135    VG_(bzero_inline)(cts, sizeof(*cts));
1136    // !!! keep switch in sync with get_Form_szB. The nr of characters read below
1137    // must be computed similarly in get_Form_szB.
1138    // The consistency is verified in trace_DIE.
1139    switch (form) {
1140       case DW_FORM_data1:
1141          cts->u.val = (ULong)(UChar)get_UChar(c);
1142          cts->szB   = 1;
1143          TRACE_D3("%u", (UInt)cts->u.val);
1144          break;
1145       case DW_FORM_data2:
1146          cts->u.val = (ULong)(UShort)get_UShort(c);
1147          cts->szB   = 2;
1148          TRACE_D3("%u", (UInt)cts->u.val);
1149          break;
1150       case DW_FORM_data4:
1151          cts->u.val = (ULong)(UInt)get_UInt(c);
1152          cts->szB   = 4;
1153          TRACE_D3("%u", (UInt)cts->u.val);
1154          break;
1155       case DW_FORM_data8:
1156          cts->u.val = get_ULong(c);
1157          cts->szB   = 8;
1158          TRACE_D3("%llu", cts->u.val);
1159          break;
1160       case DW_FORM_sec_offset:
1161          cts->u.val = (ULong)get_Dwarfish_UWord( c, cc->is_dw64 );
1162          cts->szB   = cc->is_dw64 ? 8 : 4;
1163          TRACE_D3("%llu", cts->u.val);
1164          break;
1165       case DW_FORM_sdata:
1166          cts->u.val = (ULong)(Long)get_SLEB128(c);
1167          cts->szB   = 8;
1168          TRACE_D3("%llu", cts->u.val);
1169          break;
1170       case DW_FORM_udata:
1171          cts->u.val = (ULong)(Long)get_ULEB128(c);
1172          cts->szB   = 8;
1173          TRACE_D3("%llu", cts->u.val);
1174          break;
1175       case DW_FORM_addr:
1176          /* note, this is a hack.  DW_FORM_addr is defined as getting
1177             a word the size of the target machine as defined by the
1178             address_size field in the CU Header.  However,
1179             parse_CU_Header() rejects all inputs except those for
1180             which address_size == sizeof(Word), hence we can just
1181             treat it as a (host) Word.  */
1182          cts->u.val = (ULong)(UWord)get_UWord(c);
1183          cts->szB   = sizeof(UWord);
1184          TRACE_D3("0x%lx", (UWord)cts->u.val);
1185          break;
1186
1187       case DW_FORM_ref_addr:
1188          /* We make the same word-size assumption as DW_FORM_addr. */
1189          /* What does this really mean?  From D3 Sec 7.5.4,
1190             description of "reference", it would appear to reference
1191             some other DIE, by specifying the offset from the
1192             beginning of a .debug_info section.  The D3 spec mentions
1193             that this might be in some other shared object and
1194             executable.  But I don't see how the name of the other
1195             object/exe is specified.
1196
1197             At least for the DW_FORM_ref_addrs created by icc11, the
1198             references seem to be within the same object/executable.
1199             So for the moment we merely range-check, to see that they
1200             actually do specify a plausible offset within this
1201             object's .debug_info, and return the value unchanged.
1202
1203             In DWARF 2, DW_FORM_ref_addr is address-sized, but in
1204             DWARF 3 and later, it is offset-sized.
1205          */
1206          if (cc->version == 2) {
1207             cts->u.val = (ULong)(UWord)get_UWord(c);
1208             cts->szB   = sizeof(UWord);
1209          } else {
1210             cts->u.val = get_Dwarfish_UWord(c, cc->is_dw64);
1211             cts->szB   = cc->is_dw64 ? sizeof(ULong) : sizeof(UInt);
1212          }
1213          TRACE_D3("0x%lx", (UWord)cts->u.val);
1214          if (0) VG_(printf)("DW_FORM_ref_addr 0x%lx\n", (UWord)cts->u.val);
1215          if (/* the following is surely impossible, but ... */
1216              !ML_(sli_is_valid)(cc->escn_debug_info)
1217              || cts->u.val >= (ULong)cc->escn_debug_info.szB) {
1218             /* Hmm.  Offset is nonsensical for this object's .debug_info
1219                section.  Be safe and reject it. */
1220             cc->barf("get_Form_contents: DW_FORM_ref_addr points "
1221                      "outside .debug_info");
1222          }
1223          break;
1224
1225       case DW_FORM_strp: {
1226          /* this is an offset into .debug_str */
1227          UWord uw = (UWord)get_Dwarfish_UWord( c, cc->is_dw64 );
1228          if (!ML_(sli_is_valid)(cc->escn_debug_str)
1229              || uw >= cc->escn_debug_str.szB)
1230             cc->barf("get_Form_contents: DW_FORM_strp "
1231                      "points outside .debug_str");
1232          /* FIXME: check the entire string lies inside debug_str,
1233             not just the first byte of it. */
1234          DiCursor str
1235             = ML_(cur_plus)( ML_(cur_from_sli)(cc->escn_debug_str), uw );
1236          if (TD3) {
1237             HChar* tmp = ML_(cur_read_strdup)(str, "di.getFC.1");
1238             TRACE_D3("(indirect string, offset: 0x%lx): %s", uw, tmp);
1239             ML_(dinfo_free)(tmp);
1240          }
1241          cts->u.cur = str;
1242          cts->szB   = - (Long)(1 + (ULong)ML_(cur_strlen)(str));
1243          break;
1244       }
1245       case DW_FORM_string: {
1246          DiCursor str = get_AsciiZ(c);
1247          if (TD3) {
1248             HChar* tmp = ML_(cur_read_strdup)(str, "di.getFC.2");
1249             TRACE_D3("%s", tmp);
1250             ML_(dinfo_free)(tmp);
1251          }
1252          cts->u.cur = str;
1253          /* strlen is safe because get_AsciiZ already 'vetted' the
1254             entire string */
1255          cts->szB   = - (Long)(1 + (ULong)ML_(cur_strlen)(str));
1256          break;
1257       }
1258       case DW_FORM_ref1: {
1259          UChar u8   = get_UChar(c);
1260          UWord res  = cc->cu_start_offset + (UWord)u8;
1261          cts->u.val = (ULong)res;
1262          cts->szB   = sizeof(UWord);
1263          TRACE_D3("<%lx>", res);
1264          break;
1265       }
1266       case DW_FORM_ref2: {
1267          UShort u16 = get_UShort(c);
1268          UWord  res = cc->cu_start_offset + (UWord)u16;
1269          cts->u.val = (ULong)res;
1270          cts->szB   = sizeof(UWord);
1271          TRACE_D3("<%lx>", res);
1272          break;
1273       }
1274       case DW_FORM_ref4: {
1275          UInt  u32  = get_UInt(c);
1276          UWord res  = cc->cu_start_offset + (UWord)u32;
1277          cts->u.val = (ULong)res;
1278          cts->szB   = sizeof(UWord);
1279          TRACE_D3("<%lx>", res);
1280          break;
1281       }
1282       case DW_FORM_ref8: {
1283          ULong u64  = get_ULong(c);
1284          UWord res  = cc->cu_start_offset + (UWord)u64;
1285          cts->u.val = (ULong)res;
1286          cts->szB   = sizeof(UWord);
1287          TRACE_D3("<%lx>", res);
1288          break;
1289       }
1290       case DW_FORM_ref_udata: {
1291          ULong u64  = get_ULEB128(c);
1292          UWord res  = cc->cu_start_offset + (UWord)u64;
1293          cts->u.val = (ULong)res;
1294          cts->szB   = sizeof(UWord);
1295          TRACE_D3("<%lx>", res);
1296          break;
1297       }
1298       case DW_FORM_flag: {
1299          UChar u8 = get_UChar(c);
1300          TRACE_D3("%u", (UInt)u8);
1301          cts->u.val = (ULong)u8;
1302          cts->szB   = 1;
1303          break;
1304       }
1305       case DW_FORM_flag_present:
1306          TRACE_D3("1");
1307          cts->u.val = 1;
1308          cts->szB   = 1;
1309          break;
1310       case DW_FORM_block1: {
1311          ULong    u64b;
1312          ULong    u64   = (ULong)get_UChar(c);
1313          DiCursor block = get_DiCursor_from_Cursor(c);
1314          TRACE_D3("%llu byte block: ", u64);
1315          for (u64b = u64; u64b > 0; u64b--) {
1316             UChar u8 = get_UChar(c);
1317             TRACE_D3("%x ", (UInt)u8);
1318          }
1319          cts->u.cur = block;
1320          cts->szB   = - (Long)u64;
1321          break;
1322       }
1323       case DW_FORM_block2: {
1324          ULong    u64b;
1325          ULong    u64   = (ULong)get_UShort(c);
1326          DiCursor block = get_DiCursor_from_Cursor(c);
1327          TRACE_D3("%llu byte block: ", u64);
1328          for (u64b = u64; u64b > 0; u64b--) {
1329             UChar u8 = get_UChar(c);
1330             TRACE_D3("%x ", (UInt)u8);
1331          }
1332          cts->u.cur = block;
1333          cts->szB   = - (Long)u64;
1334          break;
1335       }
1336       case DW_FORM_block4: {
1337          ULong    u64b;
1338          ULong    u64   = (ULong)get_UInt(c);
1339          DiCursor block = get_DiCursor_from_Cursor(c);
1340          TRACE_D3("%llu byte block: ", u64);
1341          for (u64b = u64; u64b > 0; u64b--) {
1342             UChar u8 = get_UChar(c);
1343             TRACE_D3("%x ", (UInt)u8);
1344          }
1345          cts->u.cur = block;
1346          cts->szB   = - (Long)u64;
1347          break;
1348       }
1349       case DW_FORM_exprloc:
1350       case DW_FORM_block: {
1351          ULong    u64b;
1352          ULong    u64   = (ULong)get_ULEB128(c);
1353          DiCursor block = get_DiCursor_from_Cursor(c);
1354          TRACE_D3("%llu byte block: ", u64);
1355          for (u64b = u64; u64b > 0; u64b--) {
1356             UChar u8 = get_UChar(c);
1357             TRACE_D3("%x ", (UInt)u8);
1358          }
1359          cts->u.cur = block;
1360          cts->szB   = - (Long)u64;
1361          break;
1362       }
1363       case DW_FORM_ref_sig8: {
1364          ULong  u64b;
1365          ULong  signature = get_ULong (c);
1366          ULong  work = signature;
1367          TRACE_D3("8 byte signature: ");
1368          for (u64b = 8; u64b > 0; u64b--) {
1369             UChar u8 = work & 0xff;
1370             TRACE_D3("%x ", (UInt)u8);
1371             work >>= 8;
1372          }
1373
1374          /* cc->signature_types is only built/initialised when
1375             VG_(clo_read_var_info) is set. In this case,
1376             the DW_FORM_ref_sig8 can be looked up.
1377             But we can also arrive here when only reading inline info
1378             and VG_(clo_trace_symtab) is set. In such a case,
1379             we cannot lookup the DW_FORM_ref_sig8, we rather assign
1380             a dummy value. This is a kludge, but otherwise,
1381             the 'dwarf inline info reader' tracing would have to
1382             do type processing/reading. It is better to avoid
1383             adding significant 'real' processing only due to tracing. */
1384          if (VG_(clo_read_var_info)) {
1385             /* Due to the way that the hash table is constructed, the
1386                resulting DIE offset here is already "cooked".  See
1387                cook_die_using_form.  */
1388             cts->u.val = lookup_signatured_type (cc->signature_types, signature,
1389                                                  c->barf);
1390          } else {
1391             vg_assert (td3);
1392             vg_assert (VG_(clo_read_inline_info));
1393             TRACE_D3("<not dereferencing signature type>");
1394             cts->u.val = 0; /* Assign a dummy/rubbish value */
1395          }
1396          cts->szB   = sizeof(UWord);
1397          break;
1398       }
1399       case DW_FORM_indirect:
1400          get_Form_contents (cts, cc, c, td3, (DW_FORM)get_ULEB128(c));
1401          return;
1402
1403       case DW_FORM_GNU_ref_alt:
1404          cts->u.val = get_Dwarfish_UWord(c, cc->is_dw64);
1405          cts->szB   = cc->is_dw64 ? sizeof(ULong) : sizeof(UInt);
1406          TRACE_D3("0x%lx", (UWord)cts->u.val);
1407          if (0) VG_(printf)("DW_FORM_GNU_ref_alt 0x%lx\n", (UWord)cts->u.val);
1408          if (/* the following is surely impossible, but ... */
1409              !ML_(sli_is_valid)(cc->escn_debug_info_alt))
1410             cc->barf("get_Form_contents: DW_FORM_GNU_ref_addr used, "
1411                      "but no alternate .debug_info");
1412          else if (cts->u.val >= (ULong)cc->escn_debug_info_alt.szB) {
1413             /* Hmm.  Offset is nonsensical for this object's .debug_info
1414                section.  Be safe and reject it. */
1415             cc->barf("get_Form_contents: DW_FORM_GNU_ref_addr points "
1416                      "outside alternate .debug_info");
1417          }
1418          break;
1419
1420       case DW_FORM_GNU_strp_alt: {
1421          /* this is an offset into alternate .debug_str */
1422          SizeT uw = (UWord)get_Dwarfish_UWord( c, cc->is_dw64 );
1423          if (!ML_(sli_is_valid)(cc->escn_debug_str_alt))
1424             cc->barf("get_Form_contents: DW_FORM_GNU_strp_alt used, "
1425                      "but no alternate .debug_str");
1426          else if (uw >= cc->escn_debug_str_alt.szB)
1427             cc->barf("get_Form_contents: DW_FORM_GNU_strp_alt "
1428                      "points outside alternate .debug_str");
1429          /* FIXME: check the entire string lies inside debug_str,
1430             not just the first byte of it. */
1431          DiCursor str
1432             = ML_(cur_plus)( ML_(cur_from_sli)(cc->escn_debug_str_alt), uw);
1433          if (TD3) {
1434             HChar* tmp = ML_(cur_read_strdup)(str, "di.getFC.3");
1435             TRACE_D3("(indirect alt string, offset: 0x%lx): %s", uw, tmp);
1436             ML_(dinfo_free)(tmp);
1437          }
1438          cts->u.cur = str;
1439          cts->szB   = - (Long)(1 + (ULong)ML_(cur_strlen)(str));
1440          break;
1441       }
1442
1443       default:
1444          VG_(printf)(
1445             "get_Form_contents: unhandled %u (%s) at <%llx>\n",
1446             form, ML_(pp_DW_FORM)(form), get_position_of_Cursor(c));
1447          c->barf("get_Form_contents: unhandled DW_FORM");
1448    }
1449 }
1450
1451 static inline UInt sizeof_Dwarfish_UWord (Bool is_dw64)
1452 {
1453    if (is_dw64)
1454       return sizeof(ULong);
1455    else
1456       return sizeof(UInt);
1457 }
1458
1459 #define VARSZ_FORM 0xffffffff
1460 /* If the form is a fixed length form, return the nr of bytes for this form.
1461    If the form is a variable length form, return VARSZ_FORM. */
1462 static
1463 UInt get_Form_szB (const CUConst* cc, DW_FORM form )
1464 {
1465    // !!! keep switch in sync with get_Form_contents : the nr of bytes
1466    // read from a cursor by get_Form_contents must be returned by
1467    // the below switch.
1468    // The consistency is verified in trace_DIE.
1469    switch (form) {
1470       case DW_FORM_data1: return 1;
1471       case DW_FORM_data2: return 2;
1472       case DW_FORM_data4: return 4;
1473       case DW_FORM_data8: return 8;
1474       case DW_FORM_sec_offset:
1475          if (cc->is_dw64)
1476             return 8;
1477          else
1478             return 4;
1479       case DW_FORM_sdata:
1480          return VARSZ_FORM;
1481       case DW_FORM_udata:
1482          return VARSZ_FORM;
1483       case DW_FORM_addr: // See hack in get_Form_contents
1484          return sizeof(UWord);
1485       case DW_FORM_ref_addr: // See hack in get_Form_contents
1486          if (cc->version == 2)
1487             return sizeof(UWord);
1488          else
1489             return sizeof_Dwarfish_UWord (cc->is_dw64);
1490       case DW_FORM_strp:
1491          return sizeof_Dwarfish_UWord (cc->is_dw64);
1492       case DW_FORM_string:
1493          return VARSZ_FORM;
1494       case DW_FORM_ref1:
1495          return 1;
1496       case DW_FORM_ref2:
1497          return 2;
1498       case DW_FORM_ref4:
1499          return 4;
1500       case DW_FORM_ref8:
1501          return 8;
1502       case DW_FORM_ref_udata:
1503          return VARSZ_FORM;
1504       case DW_FORM_flag:
1505          return 1;
1506       case DW_FORM_flag_present:
1507          return 0; // !!! special case, no data.
1508       case DW_FORM_block1:
1509          return VARSZ_FORM;
1510       case DW_FORM_block2:
1511          return VARSZ_FORM;
1512       case DW_FORM_block4:
1513          return VARSZ_FORM;
1514       case DW_FORM_exprloc:
1515       case DW_FORM_block:
1516          return VARSZ_FORM;
1517       case DW_FORM_ref_sig8:
1518          return 8;
1519       case DW_FORM_indirect:
1520          return VARSZ_FORM;
1521       case DW_FORM_GNU_ref_alt:
1522          return sizeof_Dwarfish_UWord(cc->is_dw64);
1523       case DW_FORM_GNU_strp_alt:
1524          return sizeof_Dwarfish_UWord(cc->is_dw64);
1525       default:
1526          VG_(printf)(
1527             "get_Form_szB: unhandled %u (%s)\n",
1528             form, ML_(pp_DW_FORM)(form));
1529          cc->barf("get_Form_contents: unhandled DW_FORM");
1530    }
1531 }
1532
1533 /* Skip a DIE as described by abbv.
1534    If the DIE has a sibling, *sibling is set to the skipped DIE sibling value. */
1535 static
1536 void skip_DIE (UWord  *sibling,
1537                Cursor* c_die,
1538                const g_abbv *abbv,
1539                const CUConst* cc)
1540 {
1541    UInt nf_i;
1542    FormContents cts;
1543    nf_i = 0;
1544    while (True) {
1545       if (abbv->nf[nf_i].at_name == DW_AT_sibling) {
1546          get_Form_contents( &cts, cc, c_die, False /*td3*/,
1547                             (DW_FORM)abbv->nf[nf_i].at_form );
1548          if ( cts.szB > 0 )
1549             *sibling = cts.u.val;
1550          nf_i++;
1551       } else if (abbv->nf[nf_i].skip_szB == VARSZ_FORM) {
1552          get_Form_contents( &cts, cc, c_die, False /*td3*/,
1553                             (DW_FORM)abbv->nf[nf_i].at_form );
1554          nf_i++;
1555       } else {
1556          advance_position_of_Cursor (c_die, (ULong)abbv->nf[nf_i].skip_szB);
1557          nf_i = abbv->nf[nf_i].next_nf;
1558       }
1559       if (nf_i == 0)
1560          break;
1561    }
1562 }
1563
1564
1565 /*------------------------------------------------------------*/
1566 /*---                                                      ---*/
1567 /*--- Parsing of variable-related DIEs                     ---*/
1568 /*---                                                      ---*/
1569 /*------------------------------------------------------------*/
1570
1571 typedef
1572    struct _TempVar {
1573       const HChar*  name; /* in DebugInfo's .strpool */
1574       /* Represent ranges economically.  nRanges is the number of
1575          ranges.  Cases:
1576          0: .rngOneMin .rngOneMax .manyRanges are all zero
1577          1: .rngOneMin .rngOneMax hold the range; .rngMany is NULL
1578          2: .rngOneMin .rngOneMax are zero; .rngMany holds the ranges.
1579          This is merely an optimisation to avoid having to allocate
1580          and free the XArray in the common (98%) of cases where there
1581          is zero or one address ranges. */
1582       UWord   nRanges;
1583       Addr    rngOneMin;
1584       Addr    rngOneMax;
1585       XArray* rngMany; /* of AddrRange.  NON-UNIQUE PTR in AR_DINFO. */
1586       /* Do not free .rngMany, since many TempVars will have the same
1587          value.  Instead the associated storage is to be freed by
1588          deleting 'rangetree', which stores a single copy of each
1589          range. */
1590       /* --- */
1591       Int     level;
1592       UWord   typeR; /* a cuOff */
1593       GExpr*  gexpr; /* for this variable */
1594       GExpr*  fbGX;  /* to find the frame base of the enclosing fn, if
1595                         any */
1596       UInt    fndn_ix; /* declaring file/dirname index in fndnpool, or 0 */
1597       Int     fLine; /* declaring file line number, or zero */
1598       /* offset in .debug_info, so that abstract instances can be
1599          found to satisfy references from concrete instances. */
1600       UWord   dioff;
1601       UWord   absOri; /* so the absOri fields refer to dioff fields
1602                          in some other, related TempVar. */
1603    }
1604    TempVar;
1605
1606 typedef
1607    struct {
1608       /* Contains the range stack: a stack of address ranges, one
1609          stack entry for each nested scope.
1610
1611          Some scope entries are created by function definitions
1612          (DW_AT_subprogram), and for those, we also note the GExpr
1613          derived from its DW_AT_frame_base attribute, if any.
1614          Consequently it should be possible to find, for any
1615          variable's DIE, the GExpr for the containing function's
1616          DW_AT_frame_base by scanning back through the stack to find
1617          the nearest entry associated with a function.  This somewhat
1618          elaborate scheme is provided so as to make it possible to
1619          obtain the correct DW_AT_frame_base expression even in the
1620          presence of nested functions (or to be more precise, in the
1621          presence of nested DW_AT_subprogram DIEs).
1622       */
1623       Int     sp; /* [sp] is innermost active entry; sp==-1 for empty
1624                      stack */
1625       Int     stack_size;
1626       XArray **ranges; /* XArray of AddrRange */
1627       Int     *level;  /* D3 DIE levels */
1628       Bool    *isFunc; /* from DW_AT_subprogram? */
1629       GExpr  **fbGX;   /* if isFunc, contains the FB expr, else NULL */
1630       /* The fndn_ix file name/dirname table.  Is a mapping from dwarf
1631          integer index to the index in di->fndnpool. */
1632       XArray* /* of UInt* */ fndn_ix_Table;
1633    }
1634    D3VarParser;
1635
1636 /* Completely initialise a variable parser object */
1637 static void
1638 var_parser_init ( D3VarParser *parser )
1639 {
1640    parser->sp = -1;
1641    parser->stack_size = 0;
1642    parser->ranges = NULL;
1643    parser->level  = NULL;
1644    parser->isFunc = NULL;
1645    parser->fbGX = NULL;
1646    parser->fndn_ix_Table = NULL;
1647 }
1648
1649 /* Release any memory hanging off a variable parser object */
1650 static void
1651 var_parser_release ( D3VarParser *parser )
1652 {
1653    ML_(dinfo_free)( parser->ranges );
1654    ML_(dinfo_free)( parser->level );
1655    ML_(dinfo_free)( parser->isFunc );
1656    ML_(dinfo_free)( parser->fbGX );
1657 }
1658
1659 static void varstack_show ( const D3VarParser* parser, const HChar* str )
1660 {
1661    Word i, j;
1662    VG_(printf)("  varstack (%s) {\n", str);
1663    for (i = 0; i <= parser->sp; i++) {
1664       XArray* xa = parser->ranges[i];
1665       vg_assert(xa);
1666       VG_(printf)("    [%ld] (level %d)", i, parser->level[i]);
1667       if (parser->isFunc[i]) {
1668          VG_(printf)(" (fbGX=%p)", parser->fbGX[i]);
1669       } else {
1670          vg_assert(parser->fbGX[i] == NULL);
1671       }
1672       VG_(printf)(": ");
1673       if (VG_(sizeXA)( xa ) == 0) {
1674          VG_(printf)("** empty PC range array **");
1675       } else {
1676          for (j = 0; j < VG_(sizeXA)( xa ); j++) {
1677             AddrRange* range = (AddrRange*) VG_(indexXA)( xa, j );
1678             vg_assert(range);
1679             VG_(printf)("[%#lx,%#lx] ", range->aMin, range->aMax);
1680          }
1681       }
1682       VG_(printf)("\n");
1683    }
1684    VG_(printf)("  }\n");
1685 }
1686
1687 /* Remove from the stack, all entries with .level > 'level' */
1688 static
1689 void varstack_preen ( D3VarParser* parser, Bool td3, Int level )
1690 {
1691    Bool changed = False;
1692    vg_assert(parser->sp < parser->stack_size);
1693    while (True) {
1694       vg_assert(parser->sp >= -1);
1695       if (parser->sp == -1) break;
1696       if (parser->level[parser->sp] <= level) break;
1697       if (0)
1698          TRACE_D3("BBBBAAAA varstack_pop [newsp=%d]\n", parser->sp-1);
1699       vg_assert(parser->ranges[parser->sp]);
1700       /* Who allocated this xa?  get_range_list() or
1701          unitary_range_list(). */
1702       VG_(deleteXA)( parser->ranges[parser->sp] );
1703       parser->sp--;
1704       changed = True;
1705    }
1706    if (changed && td3)
1707       varstack_show( parser, "after preen" );
1708 }
1709
1710 static void varstack_push ( const CUConst* cc,
1711                             D3VarParser* parser,
1712                             Bool td3,
1713                             XArray* ranges, Int level,
1714                             Bool    isFunc, GExpr* fbGX ) {
1715    if (0)
1716    TRACE_D3("BBBBAAAA varstack_push[newsp=%d]: %d  %p\n",
1717             parser->sp+1, level, ranges);
1718
1719    /* First we need to zap everything >= 'level', as we are about to
1720       replace any previous entry at 'level', so .. */
1721    varstack_preen(parser, /*td3*/False, level-1);
1722
1723    vg_assert(parser->sp >= -1);
1724    vg_assert(parser->sp < parser->stack_size);
1725    if (parser->sp == parser->stack_size - 1) {
1726       parser->stack_size += 48;
1727       parser->ranges =
1728          ML_(dinfo_realloc)("di.readdwarf3.varpush.1", parser->ranges,
1729                             parser->stack_size * sizeof parser->ranges[0]);
1730       parser->level =
1731          ML_(dinfo_realloc)("di.readdwarf3.varpush.2", parser->level,
1732                             parser->stack_size * sizeof parser->level[0]);
1733       parser->isFunc =
1734          ML_(dinfo_realloc)("di.readdwarf3.varpush.3", parser->isFunc,
1735                             parser->stack_size * sizeof parser->isFunc[0]);
1736       parser->fbGX =
1737          ML_(dinfo_realloc)("di.readdwarf3.varpush.4", parser->fbGX,
1738                             parser->stack_size * sizeof parser->fbGX[0]);
1739    }
1740    if (parser->sp >= 0)
1741       vg_assert(parser->level[parser->sp] < level);
1742    parser->sp++;
1743    vg_assert(ranges != NULL);
1744    if (!isFunc) vg_assert(fbGX == NULL);
1745    parser->ranges[parser->sp] = ranges;
1746    parser->level[parser->sp]  = level;
1747    parser->isFunc[parser->sp] = isFunc;
1748    parser->fbGX[parser->sp]   = fbGX;
1749    if (TD3)
1750       varstack_show( parser, "after push" );
1751 }
1752
1753
1754 /* cts is derived from a DW_AT_location and so refers either to a
1755    location expression or to a location list.  Figure out which, and
1756    in both cases bundle the expression or location list into a
1757    so-called GExpr (guarded expression). */
1758 __attribute__((noinline))
1759 static GExpr* get_GX ( const CUConst* cc, Bool td3, const FormContents* cts )
1760 {
1761    GExpr* gexpr = NULL;
1762    if (cts->szB < 0) {
1763       /* represents a non-empty in-line location expression, and
1764          cts->u.cur points at the image bytes */
1765       gexpr = make_singleton_GX( cts->u.cur, (ULong)(- cts->szB) );
1766    }
1767    else
1768    if (cts->szB > 0) {
1769       /* represents a location list.  cts->u.val is the offset of it
1770          in .debug_loc. */
1771       if (!cc->cu_svma_known)
1772          cc->barf("get_GX: location list, but CU svma is unknown");
1773       gexpr = make_general_GX( cc, td3, cts->u.val, cc->cu_svma );
1774    }
1775    else {
1776       vg_assert(0); /* else caller is bogus */
1777    }
1778    return gexpr;
1779 }
1780
1781 /* Returns an xarray* of directory names (indexed by the dwarf dirname
1782    integer).
1783    If 'compdir' is NULL, entry [0] will be set to "."
1784    otherwise entry [0] is set to compdir.
1785    Entry [0] basically means "the current directory of the compilation",
1786    whatever that means, according to the DWARF3 spec.
1787    FIXME??? readdwarf3.c/readdwarf.c have a lot of duplicated code */
1788 static
1789 XArray* read_dirname_xa (DebugInfo* di, const HChar *compdir,
1790                          Cursor *c,
1791                          Bool td3 )
1792 {
1793    XArray*        dirname_xa;   /* xarray of HChar* dirname */
1794    const HChar*   dirname;
1795    UInt           compdir_len;
1796
1797    dirname_xa = VG_(newXA) (ML_(dinfo_zalloc), "di.rdxa.1", ML_(dinfo_free),
1798                             sizeof(HChar*) );
1799
1800    if (compdir == NULL) {
1801       dirname = ".";
1802       compdir_len = 1;
1803    } else {
1804       dirname = compdir;
1805       compdir_len = VG_(strlen)(compdir);
1806    }
1807    VG_(addToXA) (dirname_xa, &dirname);
1808
1809    TRACE_D3(" The Directory Table%s\n",
1810             peek_UChar(c) == 0 ? " is empty." : ":" );
1811
1812    while (peek_UChar(c) != 0) {
1813
1814       DiCursor cur = get_AsciiZ(c);
1815       HChar* data_str = ML_(cur_read_strdup)( cur, "dirname_xa.1" );
1816       TRACE_D3("  %s\n", data_str);
1817
1818       /* If data_str[0] is '/', then 'data' is an absolute path and we
1819          don't mess with it.  Otherwise, construct the
1820          path 'compdir' ++ "/" ++ 'data'. */
1821
1822       if (data_str[0] != '/'
1823           /* not an absolute path */
1824           && compdir
1825           /* actually got something sensible for compdir */
1826           && compdir_len)
1827       {
1828          SizeT  len = compdir_len + 1 + VG_(strlen)(data_str);
1829          HChar *buf = ML_(dinfo_zalloc)("dirname_xa.2", len + 1);
1830
1831          VG_(strcpy)(buf, compdir);
1832          VG_(strcat)(buf, "/");
1833          VG_(strcat)(buf, data_str);
1834
1835          dirname = ML_(addStr)(di, buf, len);
1836          VG_(addToXA) (dirname_xa, &dirname);
1837          if (0) VG_(printf)("rel path  %s\n", buf);
1838          ML_(dinfo_free)(buf);
1839       } else {
1840          /* just use 'data'. */
1841          dirname = ML_(addStr)(di,data_str,-1);
1842          VG_(addToXA) (dirname_xa, &dirname);
1843          if (0) VG_(printf)("abs path  %s\n", data_str);
1844       }
1845
1846       ML_(dinfo_free)(data_str);
1847    }
1848
1849    TRACE_D3 ("\n");
1850
1851    if (get_UChar (c) != 0) {
1852       ML_(symerr)(NULL, True,
1853                   "could not get NUL at end of DWARF directory table");
1854       VG_(deleteXA)(dirname_xa);
1855       return NULL;
1856    }
1857
1858    return dirname_xa;
1859 }
1860
1861 static
1862 void read_filename_table( /*MOD*/XArray* /* of UInt* */ fndn_ix_Table,
1863                           const HChar* compdir,
1864                           const CUConst* cc, ULong debug_line_offset,
1865                           Bool td3 )
1866 {
1867    Bool   is_dw64;
1868    Cursor c;
1869    Word   i;
1870    UShort version;
1871    UChar  opcode_base;
1872    const HChar* str;
1873    XArray* dirname_xa;   /* xarray of HChar* dirname */
1874    ULong  dir_xa_ix;     /* Index in dirname_xa, as read from dwarf info. */
1875    const HChar* dirname;
1876    UInt   fndn_ix;
1877
1878    vg_assert(fndn_ix_Table && cc && cc->barf);
1879    if (!ML_(sli_is_valid)(cc->escn_debug_line)
1880        || cc->escn_debug_line.szB <= debug_line_offset) {
1881       cc->barf("read_filename_table: .debug_line is missing?");
1882    }
1883
1884    init_Cursor( &c, cc->escn_debug_line, debug_line_offset, cc->barf,
1885                 "Overrun whilst reading .debug_line section(1)" );
1886
1887    /* unit_length = */
1888    get_Initial_Length( &is_dw64, &c,
1889                        "read_filename_table: invalid initial-length field" );
1890    version = get_UShort( &c );
1891    if (version != 2 && version != 3 && version != 4)
1892      cc->barf("read_filename_table: Only DWARF version 2, 3 and 4 line info "
1893               "is currently supported.");
1894    /*header_length              = (ULong)*/ get_Dwarfish_UWord( &c, is_dw64 );
1895    /*minimum_instruction_length = */ get_UChar( &c );
1896    if (version >= 4)
1897       /*maximum_operations_per_insn = */ get_UChar( &c );
1898    /*default_is_stmt            = */ get_UChar( &c );
1899    /*line_base                  = (Char)*/ get_UChar( &c );
1900    /*line_range                 = */ get_UChar( &c );
1901    opcode_base                = get_UChar( &c );
1902    /* skip over "standard_opcode_lengths" */
1903    for (i = 1; i < (Word)opcode_base; i++)
1904      (void)get_UChar( &c );
1905
1906    dirname_xa = read_dirname_xa(cc->di, compdir, &c, td3);
1907
1908    /* Read and record the file names table */
1909    vg_assert( VG_(sizeXA)( fndn_ix_Table ) == 0 );
1910    /* Add a dummy index-zero entry.  DWARF3 numbers its files
1911       from 1, for some reason. */
1912    fndn_ix = ML_(addFnDn) ( cc->di, "<unknown_file>", NULL );
1913    VG_(addToXA)( fndn_ix_Table, &fndn_ix );
1914    while (peek_UChar(&c) != 0) {
1915       DiCursor cur = get_AsciiZ(&c);
1916       str = ML_(addStrFromCursor)( cc->di, cur );
1917       dir_xa_ix = get_ULEB128( &c );
1918       if (dirname_xa != NULL
1919           && dir_xa_ix >= 0 && dir_xa_ix < VG_(sizeXA) (dirname_xa))
1920          dirname = *(HChar**)VG_(indexXA) ( dirname_xa, dir_xa_ix );
1921       else
1922          dirname = NULL;
1923       fndn_ix = ML_(addFnDn)( cc->di, str, dirname);
1924       TRACE_D3("  read_filename_table: %ld fndn_ix %u %s %s\n",
1925                VG_(sizeXA)(fndn_ix_Table), fndn_ix,
1926                dirname, str);
1927       VG_(addToXA)( fndn_ix_Table, &fndn_ix );
1928       (void)get_ULEB128( &c ); /* skip last mod time */
1929       (void)get_ULEB128( &c ); /* file size */
1930    }
1931    /* We're done!  The rest of it is not interesting. */
1932    if (dirname_xa != NULL)
1933       VG_(deleteXA)(dirname_xa);
1934 }
1935
1936 /* setup_cu_svma to be called when a cu is found at level 0,
1937    to establish the cu_svma. */
1938 static void setup_cu_svma(CUConst* cc, Bool have_lo, Addr ip_lo, Bool td3)
1939 {
1940    Addr cu_svma;
1941    /* We have potentially more than one type of parser parsing the
1942       dwarf information. At least currently, each parser establishes
1943       the cu_svma. So, in case cu_svma_known, we check that the same
1944       result is obtained by the 2nd parsing of the cu.
1945
1946       Alternatively, we could reset cu_svma_known after each parsing
1947       and then check that we only see a single DW_TAG_compile_unit DIE
1948       at level 0, DWARF3 only allows exactly one top level DIE per
1949       CU. */
1950
1951    if (have_lo)
1952       cu_svma = ip_lo;
1953    else {
1954       /* Now, it may be that this DIE doesn't tell us the CU's
1955          SVMA, by way of not having a DW_AT_low_pc.  That's OK --
1956          the CU doesn't *have* to have its SVMA specified.
1957
1958          But as per last para D3 spec sec 3.1.1 ("Normal and
1959          Partial Compilation Unit Entries", "If the base address
1960          (viz, the SVMA) is undefined, then any DWARF entry of
1961          structure defined interms of the base address of that
1962          compilation unit is not valid.".  So that means, if whilst
1963          processing the children of this top level DIE (or their
1964          children, etc) we see a DW_AT_range, and cu_svma_known is
1965          False, then the DIE that contains it is (per the spec)
1966          invalid, and we can legitimately stop and complain. */
1967       /* .. whereas The Reality is, simply assume the SVMA is zero
1968          if it isn't specified. */
1969       cu_svma = 0;
1970    }
1971
1972    if (cc->cu_svma_known) {
1973       vg_assert (cu_svma == cc->cu_svma);
1974    } else {
1975       cc->cu_svma_known = True;
1976       cc->cu_svma = cu_svma;
1977       if (0)
1978          TRACE_D3("setup_cu_svma: acquire CU_SVMA of %p\n", (void*) cc->cu_svma);
1979    }
1980 }
1981
1982 static void trace_DIE(
1983    DW_TAG dtag,
1984    UWord posn,
1985    Int level,
1986    UWord saved_die_c_offset,
1987    const g_abbv *abbv,
1988    const CUConst* cc)
1989 {
1990    Cursor c;
1991    FormContents cts;
1992    UWord sibling = 0;
1993    UInt nf_i;
1994    Bool  debug_types_flag;
1995    Bool  alt_flag;
1996    Cursor check_skip;
1997    UWord check_sibling = 0;
1998
1999    posn = uncook_die( cc, posn, &debug_types_flag, &alt_flag );
2000    init_Cursor (&c,
2001                 debug_types_flag ? cc->escn_debug_types :
2002                 alt_flag ? cc->escn_debug_info_alt : cc->escn_debug_info,
2003                 saved_die_c_offset, cc->barf,
2004                 "Overrun trace_DIE");
2005    check_skip = c;
2006    VG_(printf)(" <%d><%lx>: Abbrev Number: %llu (%s)%s%s\n",
2007                level, posn, (ULong) abbv->abbv_code, ML_(pp_DW_TAG)( dtag ),
2008                debug_types_flag ? " (in .debug_types)" : "",
2009                alt_flag ? " (in alternate .debug_info)" : "");
2010    nf_i = 0;
2011    while (True) {
2012       DW_AT   attr = (DW_AT)  abbv->nf[nf_i].at_name;
2013       DW_FORM form = (DW_FORM)abbv->nf[nf_i].at_form;
2014       nf_i++;
2015       if (attr == 0 && form == 0) break;
2016       VG_(printf)("     %-18s: ", ML_(pp_DW_AT)(attr));
2017       /* Get the form contents, so as to print them */
2018       get_Form_contents( &cts, cc, &c, True, form );
2019       if (attr == DW_AT_sibling && cts.szB > 0) {
2020          sibling = cts.u.val;
2021       }
2022       VG_(printf)("\t\n");
2023    }
2024
2025    /* Verify that skipping a DIE gives the same displacement as
2026       tracing (i.e. reading) a DIE. If there is an inconsistency in
2027       the nr of bytes read by get_Form_contents and get_Form_szB, this
2028       should be detected by the below. Using --trace-symtab=yes
2029       --read-var-info=yes will ensure all DIEs are systematically
2030       verified. */
2031    skip_DIE (&check_sibling, &check_skip, abbv, cc);
2032    vg_assert (check_sibling == sibling);
2033    vg_assert (get_position_of_Cursor (&check_skip)
2034               == get_position_of_Cursor (&c));
2035 }
2036
2037 __attribute__((noreturn))
2038 static void dump_bad_die_and_barf(
2039    const HChar *whichparser,
2040    DW_TAG dtag,
2041    UWord posn,
2042    Int level,
2043    Cursor* c_die,
2044    UWord saved_die_c_offset,
2045    const g_abbv *abbv,
2046    const CUConst* cc)
2047 {
2048    trace_DIE (dtag, posn, level, saved_die_c_offset, abbv, cc);
2049    VG_(printf)("%s:\n", whichparser);
2050    cc->barf("confused by the above DIE");
2051 }
2052
2053 __attribute__((noinline))
2054 static void bad_DIE_confusion(int linenr)
2055 {
2056    VG_(printf)("\nparse DIE(readdwarf3.c:%d): confused by:\n", linenr);
2057 }
2058 #define goto_bad_DIE do {bad_DIE_confusion(__LINE__); goto bad_DIE;} while (0)
2059
2060 __attribute__((noinline))
2061 static void parse_var_DIE (
2062    /*MOD*/WordFM* /* of (XArray* of AddrRange, void) */ rangestree,
2063    /*MOD*/XArray* /* of TempVar* */ tempvars,
2064    /*MOD*/XArray* /* of GExpr* */ gexprs,
2065    /*MOD*/D3VarParser* parser,
2066    DW_TAG dtag,
2067    UWord posn,
2068    Int level,
2069    Cursor* c_die,
2070    const g_abbv *abbv,
2071    CUConst* cc,
2072    Bool td3
2073 )
2074 {
2075    FormContents cts;
2076    UInt nf_i;
2077
2078    UWord saved_die_c_offset  = get_position_of_Cursor( c_die );
2079
2080    varstack_preen( parser, td3, level-1 );
2081
2082    if (dtag == DW_TAG_compile_unit
2083        || dtag == DW_TAG_type_unit
2084        || dtag == DW_TAG_partial_unit) {
2085       Bool have_lo    = False;
2086       Bool have_hi1   = False;
2087       Bool hiIsRelative = False;
2088       Bool have_range = False;
2089       Addr ip_lo    = 0;
2090       Addr ip_hi1   = 0;
2091       Addr rangeoff = 0;
2092       const HChar *compdir = NULL;
2093       nf_i = 0;
2094       while (True) {
2095          DW_AT   attr = (DW_AT)  abbv->nf[nf_i].at_name;
2096          DW_FORM form = (DW_FORM)abbv->nf[nf_i].at_form;
2097          nf_i++;
2098          if (attr == 0 && form == 0) break;
2099          get_Form_contents( &cts, cc, c_die, False/*td3*/, form );
2100          if (attr == DW_AT_low_pc && cts.szB > 0) {
2101             ip_lo   = cts.u.val;
2102             have_lo = True;
2103          }
2104          if (attr == DW_AT_high_pc && cts.szB > 0) {
2105             ip_hi1   = cts.u.val;
2106             have_hi1 = True;
2107             if (form != DW_FORM_addr)
2108                hiIsRelative = True;
2109          }
2110          if (attr == DW_AT_ranges && cts.szB > 0) {
2111             rangeoff   = cts.u.val;
2112             have_range = True;
2113          }
2114          if (attr == DW_AT_comp_dir) {
2115             if (cts.szB >= 0)
2116                cc->barf("parse_var_DIE compdir: expecting indirect string");
2117             HChar *str = ML_(cur_read_strdup)( cts.u.cur,
2118                                                "parse_var_DIE.compdir" );
2119             compdir = ML_(addStr)(cc->di, str, -1);
2120             ML_(dinfo_free) (str);
2121          }
2122          if (attr == DW_AT_stmt_list && cts.szB > 0) {
2123             read_filename_table( parser->fndn_ix_Table, compdir,
2124                                  cc, cts.u.val, td3 );
2125          }
2126       }
2127       if (have_lo && have_hi1 && hiIsRelative)
2128          ip_hi1 += ip_lo;
2129
2130       /* Now, does this give us an opportunity to find this
2131          CU's svma? */
2132       if (level == 0)
2133          setup_cu_svma(cc, have_lo, ip_lo, td3);
2134
2135       /* Do we have something that looks sane? */
2136       if (have_lo && have_hi1 && (!have_range)) {
2137          if (ip_lo < ip_hi1)
2138             varstack_push( cc, parser, td3,
2139                            unitary_range_list(ip_lo, ip_hi1 - 1),
2140                            level,
2141                            False/*isFunc*/, NULL/*fbGX*/ );
2142          else if (ip_lo == 0 && ip_hi1 == 0)
2143             /* CU has no code, presumably?
2144                Such situations have been encountered for code
2145                compiled with -ffunction-sections -fdata-sections
2146                and linked with --gc-sections. Completely
2147                eliminated CU gives such 0 lo/hi pc. Similarly
2148                to a CU which has no lo/hi/range pc, we push
2149                an empty range list. */
2150             varstack_push( cc, parser, td3,
2151                            empty_range_list(),
2152                            level,
2153                            False/*isFunc*/, NULL/*fbGX*/ );
2154       } else
2155       if ((!have_lo) && (!have_hi1) && have_range) {
2156          varstack_push( cc, parser, td3,
2157                         get_range_list( cc, td3,
2158                                         rangeoff, cc->cu_svma ),
2159                         level,
2160                         False/*isFunc*/, NULL/*fbGX*/ );
2161       } else
2162       if ((!have_lo) && (!have_hi1) && (!have_range)) {
2163          /* CU has no code, presumably? */
2164          varstack_push( cc, parser, td3,
2165                         empty_range_list(),
2166                         level,
2167                         False/*isFunc*/, NULL/*fbGX*/ );
2168       } else
2169       if (have_lo && (!have_hi1) && have_range && ip_lo == 0) {
2170          /* broken DIE created by gcc-4.3.X ?  Ignore the
2171             apparently-redundant DW_AT_low_pc and use the DW_AT_ranges
2172             instead. */
2173          varstack_push( cc, parser, td3,
2174                         get_range_list( cc, td3,
2175                                         rangeoff, cc->cu_svma ),
2176                         level,
2177                         False/*isFunc*/, NULL/*fbGX*/ );
2178       } else {
2179          if (0) VG_(printf)("I got hlo %d hhi1 %d hrange %d\n",
2180                             (Int)have_lo, (Int)have_hi1, (Int)have_range);
2181          goto_bad_DIE;
2182       }
2183    }
2184
2185    if (dtag == DW_TAG_lexical_block || dtag == DW_TAG_subprogram) {
2186       Bool   have_lo    = False;
2187       Bool   have_hi1   = False;
2188       Bool   have_range = False;
2189       Bool   hiIsRelative = False;
2190       Addr   ip_lo      = 0;
2191       Addr   ip_hi1     = 0;
2192       Addr   rangeoff   = 0;
2193       Bool   isFunc     = dtag == DW_TAG_subprogram;
2194       GExpr* fbGX       = NULL;
2195       nf_i = 0;
2196       while (True) {
2197          DW_AT   attr = (DW_AT)  abbv->nf[nf_i].at_name;
2198          DW_FORM form = (DW_FORM)abbv->nf[nf_i].at_form;
2199          nf_i++;
2200          if (attr == 0 && form == 0) break;
2201          get_Form_contents( &cts, cc, c_die, False/*td3*/, form );
2202          if (attr == DW_AT_low_pc && cts.szB > 0) {
2203             ip_lo   = cts.u.val;
2204             have_lo = True;
2205          }
2206          if (attr == DW_AT_high_pc && cts.szB > 0) {
2207             ip_hi1   = cts.u.val;
2208             have_hi1 = True;
2209             if (form != DW_FORM_addr)
2210                hiIsRelative = True;
2211          }
2212          if (attr == DW_AT_ranges && cts.szB > 0) {
2213             rangeoff   = cts.u.val;
2214             have_range = True;
2215          }
2216          if (isFunc
2217              && attr == DW_AT_frame_base
2218              && cts.szB != 0 /* either scalar or nonempty block */) {
2219             fbGX = get_GX( cc, False/*td3*/, &cts );
2220             vg_assert(fbGX);
2221             VG_(addToXA)(gexprs, &fbGX);
2222          }
2223       }
2224       if (have_lo && have_hi1 && hiIsRelative)
2225          ip_hi1 += ip_lo;
2226       /* Do we have something that looks sane? */
2227       if (dtag == DW_TAG_subprogram
2228           && (!have_lo) && (!have_hi1) && (!have_range)) {
2229          /* This is legit - ignore it. Sec 3.3.3: "A subroutine entry
2230             representing a subroutine declaration that is not also a
2231             definition does not have code address or range
2232             attributes." */
2233       } else
2234       if (dtag == DW_TAG_lexical_block
2235           && (!have_lo) && (!have_hi1) && (!have_range)) {
2236          /* I believe this is legit, and means the lexical block
2237             contains no insns (whatever that might mean).  Ignore. */
2238       } else
2239       if (have_lo && have_hi1 && (!have_range)) {
2240          /* This scope supplies just a single address range. */
2241          if (ip_lo < ip_hi1)
2242             varstack_push( cc, parser, td3,
2243                            unitary_range_list(ip_lo, ip_hi1 - 1),
2244                            level, isFunc, fbGX );
2245       } else
2246       if ((!have_lo) && (!have_hi1) && have_range) {
2247          /* This scope supplies multiple address ranges via the use of
2248             a range list. */
2249          varstack_push( cc, parser, td3,
2250                         get_range_list( cc, td3,
2251                                         rangeoff, cc->cu_svma ),
2252                         level, isFunc, fbGX );
2253       } else
2254       if (have_lo && (!have_hi1) && (!have_range)) {
2255          /* This scope is bogus.  The D3 spec sec 3.4 (Lexical Block
2256             Entries) says fairly clearly that a scope must have either
2257             _range or (_low_pc and _high_pc). */
2258          /* The spec is a bit ambiguous though.  Perhaps a single byte
2259             range is intended?  See sec 2.17 (Code Addresses And Ranges) */
2260          /* This case is here because icc9 produced this:
2261          <2><13bd>: DW_TAG_lexical_block
2262             DW_AT_decl_line   : 5229
2263             DW_AT_decl_column : 37
2264             DW_AT_decl_file   : 1
2265             DW_AT_low_pc      : 0x401b03
2266          */
2267          /* Ignore (seems safe than pushing a single byte range) */
2268       } else
2269          goto_bad_DIE;
2270    }
2271
2272    if (dtag == DW_TAG_variable || dtag == DW_TAG_formal_parameter) {
2273       const  HChar* name = NULL;
2274       UWord  typeR       = D3_INVALID_CUOFF;
2275       Bool   global      = False;
2276       GExpr* gexpr       = NULL;
2277       Int    n_attrs     = 0;
2278       UWord  abs_ori     = (UWord)D3_INVALID_CUOFF;
2279       Int    lineNo      = 0;
2280       UInt   fndn_ix     = 0;
2281       nf_i = 0;
2282       while (True) {
2283          DW_AT   attr = (DW_AT)  abbv->nf[nf_i].at_name;
2284          DW_FORM form = (DW_FORM)abbv->nf[nf_i].at_form;
2285          nf_i++;
2286          if (attr == 0 && form == 0) break;
2287          get_Form_contents( &cts, cc, c_die, False/*td3*/, form );
2288          n_attrs++;
2289          if (attr == DW_AT_name && cts.szB < 0) {
2290             name = ML_(addStrFromCursor)( cc->di, cts.u.cur );
2291          }
2292          if (attr == DW_AT_location
2293              && cts.szB != 0 /* either scalar or nonempty block */) {
2294             gexpr = get_GX( cc, False/*td3*/, &cts );
2295             vg_assert(gexpr);
2296             VG_(addToXA)(gexprs, &gexpr);
2297          }
2298          if (attr == DW_AT_type && cts.szB > 0) {
2299             typeR = cook_die_using_form( cc, cts.u.val, form );
2300          }
2301          if (attr == DW_AT_external && cts.szB > 0 && cts.u.val > 0) {
2302             global = True;
2303          }
2304          if (attr == DW_AT_abstract_origin && cts.szB > 0) {
2305             abs_ori = (UWord)cts.u.val;
2306          }
2307          if (attr == DW_AT_declaration && cts.szB > 0 && cts.u.val > 0) {
2308             /*declaration = True;*/
2309          }
2310          if (attr == DW_AT_decl_line && cts.szB > 0) {
2311             lineNo = (Int)cts.u.val;
2312          }
2313          if (attr == DW_AT_decl_file && cts.szB > 0) {
2314             Int ftabIx = (Int)cts.u.val;
2315             if (ftabIx >= 1
2316                 && ftabIx < VG_(sizeXA)( parser->fndn_ix_Table )) {
2317                fndn_ix = *(UInt*)VG_(indexXA)( parser->fndn_ix_Table, ftabIx );
2318             }
2319             if (0) VG_(printf)("XXX filename fndn_ix = %u %s\n", fndn_ix,
2320                                ML_(fndn_ix2filename) (cc->di, fndn_ix));
2321          }
2322       }
2323       if (!global && dtag == DW_TAG_variable && level == 1) {
2324          /* Case of a static variable. It is better to declare
2325             it global as the variable is not really related to
2326             a PC range, as its address can be used by program
2327             counters outside of the ranges where it is visible . */
2328          global = True;
2329       }
2330
2331       /* We'll collect it under if one of the following three
2332          conditions holds:
2333          (1) has location and type    -> completed
2334          (2) has type only            -> is an abstract instance
2335          (3) has location and abs_ori -> is a concrete instance
2336          Name, fndn_ix and line number are all optional frills.
2337       */
2338       if ( /* 1 */ (gexpr && typeR != D3_INVALID_CUOFF)
2339            /* 2 */ || (typeR != D3_INVALID_CUOFF)
2340            /* 3 */ || (gexpr && abs_ori != (UWord)D3_INVALID_CUOFF) ) {
2341
2342          /* Add this variable to the list of interesting looking
2343             variables.  Crucially, note along with it the address
2344             range(s) associated with the variable, which for locals
2345             will be the address ranges at the top of the varparser's
2346             stack. */
2347          GExpr*   fbGX = NULL;
2348          Word     i, nRanges;
2349          const XArray*  /* of AddrRange */ xa;
2350          TempVar* tv;
2351          /* Stack can't be empty; we put a dummy entry on it for the
2352             entire address range before starting with the DIEs for
2353             this CU. */
2354          vg_assert(parser->sp >= 0);
2355
2356          /* If this is a local variable (non-global), try to find
2357             the GExpr for the DW_AT_frame_base of the containing
2358             function.  It should have been pushed on the stack at the
2359             time we encountered its DW_TAG_subprogram DIE, so the way
2360             to find it is to scan back down the stack looking for it.
2361             If there isn't an enclosing stack entry marked 'isFunc'
2362             then we must be seeing variable or formal param DIEs
2363             outside of a function, so we deem the Dwarf to be
2364             malformed if that happens.  Note that the fbGX may be NULL
2365             if the containing DT_TAG_subprogram didn't supply a
2366             DW_AT_frame_base -- that's OK, but there must actually be
2367             a containing DW_TAG_subprogram. */
2368          if (!global) {
2369             Bool found = False;
2370             for (i = parser->sp; i >= 0; i--) {
2371                if (parser->isFunc[i]) {
2372                   fbGX = parser->fbGX[i];
2373                   found = True;
2374                   break;
2375                }
2376             }
2377             if (!found) {
2378                if (0 && VG_(clo_verbosity) >= 0) {
2379                   VG_(message)(Vg_DebugMsg,
2380                      "warning: parse_var_DIE: non-global variable "
2381                      "outside DW_TAG_subprogram\n");
2382                }
2383                /* goto_bad_DIE; */
2384                /* This seems to happen a lot.  Just ignore it -- if,
2385                   when we come to evaluation of the location (guarded)
2386                   expression, it requires a frame base value, and
2387                   there's no expression for that, then evaluation as a
2388                   whole will fail.  Harmless - a bit of a waste of
2389                   cycles but nothing more. */
2390             }
2391          }
2392
2393          /* re "global ? 0 : parser->sp" (twice), if the var is
2394             marked 'global' then we must put it at the global scope,
2395             as only the global scope (level 0) covers the entire PC
2396             address space.  It is asserted elsewhere that level 0
2397             always covers the entire address space. */
2398          xa = parser->ranges[global ? 0 : parser->sp];
2399          nRanges = VG_(sizeXA)(xa);
2400          vg_assert(nRanges >= 0);
2401
2402          tv = ML_(dinfo_zalloc)( "di.readdwarf3.pvD.1", sizeof(TempVar) );
2403          tv->name   = name;
2404          tv->level  = global ? 0 : parser->sp;
2405          tv->typeR  = typeR;
2406          tv->gexpr  = gexpr;
2407          tv->fbGX   = fbGX;
2408          tv->fndn_ix= fndn_ix;
2409          tv->fLine  = lineNo;
2410          tv->dioff  = posn;
2411          tv->absOri = abs_ori;
2412
2413          /* See explanation on definition of type TempVar for the
2414             reason for this elaboration. */
2415          tv->nRanges = nRanges;
2416          tv->rngOneMin = 0;
2417          tv->rngOneMax = 0;
2418          tv->rngMany = NULL;
2419          if (nRanges == 1) {
2420             AddrRange* range = VG_(indexXA)(xa, 0);
2421             tv->rngOneMin = range->aMin;
2422             tv->rngOneMax = range->aMax;
2423          }
2424          else if (nRanges > 1) {
2425             /* See if we already have a range list which is
2426                structurally identical.  If so, use that; if not, clone
2427                this one, and add it to our collection. */
2428             UWord keyW, valW;
2429             if (VG_(lookupFM)( rangestree, &keyW, &valW, (UWord)xa )) {
2430                XArray* old = (XArray*)keyW;
2431                vg_assert(valW == 0);
2432                vg_assert(old != xa);
2433                tv->rngMany = old;
2434             } else {
2435                XArray* cloned = VG_(cloneXA)( "di.readdwarf3.pvD.2", xa );
2436                tv->rngMany = cloned;
2437                VG_(addToFM)( rangestree, (UWord)cloned, 0 );
2438             }
2439          }
2440
2441          VG_(addToXA)( tempvars, &tv );
2442
2443          TRACE_D3("  Recording this variable, with %ld PC range(s)\n",
2444                   VG_(sizeXA)(xa) );
2445          /* collect stats on how effective the ->ranges special
2446             casing is */
2447          if (0) {
2448             static Int ntot=0, ngt=0;
2449             ntot++;
2450             if (tv->rngMany) ngt++;
2451             if (0 == (ntot % 100000))
2452                VG_(printf)("XXXX %d tot, %d cloned\n", ntot, ngt);
2453          }
2454
2455       }
2456
2457       /* Here are some other weird cases seen in the wild:
2458
2459             We have a variable with a name and a type, but no
2460             location.  I guess that's a sign that it has been
2461             optimised away.  Ignore it.  Here's an example:
2462
2463             static Int lc_compar(void* n1, void* n2) {
2464                MC_Chunk* mc1 = *(MC_Chunk**)n1;
2465                MC_Chunk* mc2 = *(MC_Chunk**)n2;
2466                return (mc1->data < mc2->data ? -1 : 1);
2467             }
2468
2469             Both mc1 and mc2 are like this
2470             <2><5bc>: Abbrev Number: 21 (DW_TAG_variable)
2471                 DW_AT_name        : mc1
2472                 DW_AT_decl_file   : 1
2473                 DW_AT_decl_line   : 216
2474                 DW_AT_type        : <5d3>
2475
2476             whereas n1 and n2 do have locations specified.
2477
2478             ---------------------------------------------
2479
2480             We see a DW_TAG_formal_parameter with a type, but
2481             no name and no location.  It's probably part of a function type
2482             construction, thusly, hence ignore it:
2483          <1><2b4>: Abbrev Number: 12 (DW_TAG_subroutine_type)
2484              DW_AT_sibling     : <2c9>
2485              DW_AT_prototyped  : 1
2486              DW_AT_type        : <114>
2487          <2><2be>: Abbrev Number: 13 (DW_TAG_formal_parameter)
2488              DW_AT_type        : <13e>
2489          <2><2c3>: Abbrev Number: 13 (DW_TAG_formal_parameter)
2490              DW_AT_type        : <133>
2491
2492             ---------------------------------------------
2493
2494             Is very minimal, like this:
2495             <4><81d>: Abbrev Number: 44 (DW_TAG_variable)
2496                 DW_AT_abstract_origin: <7ba>
2497             What that signifies I have no idea.  Ignore.
2498
2499             ----------------------------------------------
2500
2501             Is very minimal, like this:
2502             <200f>: DW_TAG_formal_parameter
2503                 DW_AT_abstract_ori: <1f4c>
2504                 DW_AT_location    : 13440
2505             What that signifies I have no idea.  Ignore.
2506             It might be significant, though: the variable at least
2507             has a location and so might exist somewhere.
2508             Maybe we should handle this.
2509
2510             ---------------------------------------------
2511
2512             <22407>: DW_TAG_variable
2513               DW_AT_name        : (indirect string, offset: 0x6579):
2514                                   vgPlain_trampoline_stuff_start
2515               DW_AT_decl_file   : 29
2516               DW_AT_decl_line   : 56
2517               DW_AT_external    : 1
2518               DW_AT_declaration : 1
2519
2520             Nameless and typeless variable that has a location?  Who
2521             knows.  Not me.
2522             <2><3d178>: Abbrev Number: 22 (DW_TAG_variable)
2523                  DW_AT_location    : 9 byte block: 3 c0 c7 13 38 0 0 0 0
2524                                      (DW_OP_addr: 3813c7c0)
2525
2526             No, really.  Check it out.  gcc is quite simply borked.
2527             <3><168cc>: Abbrev Number: 141 (DW_TAG_variable)
2528             // followed by no attributes, and the next DIE is a sibling,
2529             // not a child
2530             */
2531    }
2532    return;
2533
2534   bad_DIE:
2535    dump_bad_die_and_barf("parse_var_DIE", dtag, posn, level,
2536                          c_die, saved_die_c_offset,
2537                          abbv,
2538                          cc);
2539    /*NOTREACHED*/
2540 }
2541
2542 typedef
2543    struct {
2544       /* The fndn_ix file name/dirname table.  Is a mapping from dwarf
2545          integer index to the index in di->fndnpool. */
2546       XArray* /* of UInt* */ fndn_ix_Table;
2547       UWord sibling; // sibling of the last read DIE (if it has a sibling).
2548    }
2549    D3InlParser;
2550
2551 /* Return the function name corresponding to absori.
2552
2553    absori is a 'cooked' reference to a DIE, i.e. absori can be either
2554    in cc->escn_debug_info or in cc->escn_debug_info_alt.
2555    get_inlFnName will uncook absori.
2556
2557    The returned value is a (permanent) string in DebugInfo's .strchunks.
2558
2559    LIMITATION: absori must point in the CU of cc. If absori points
2560    in another CU, returns "UnknownInlinedFun".
2561
2562    Here are the problems to retrieve the fun name if absori is in
2563    another CU:  the DIE reading code cannot properly extract data from
2564    another CU, as the abbv code retrieved in the other CU cannot be
2565    translated in an abbreviation. Reading data from the alternate debug
2566    info also gives problems as the string reference is also in the alternate
2567    file, but when reading the alt DIE, the string form is a 'local' string,
2568    but cannot be read in the current CU, but must be read in the alt CU.
2569    See bug 338803 comment#3 and attachment for a failed attempt to handle
2570    these problems (failed because with the patch, only one alt abbrev hash
2571    table is kept, while we must handle all abbreviations in all CUs
2572    referenced by an absori (being a reference to an alt CU, or a previous
2573    or following CU). */
2574 static const HChar* get_inlFnName (Int absori, const CUConst* cc, Bool td3)
2575 {
2576    Cursor c;
2577    const g_abbv *abbv;
2578    ULong  atag, abbv_code;
2579    UInt   has_children;
2580    UWord  posn;
2581    Bool type_flag, alt_flag;
2582    const HChar *ret = NULL;
2583    FormContents cts;
2584    UInt nf_i;
2585
2586    /* Some inlined subroutine call dwarf entries do not have the abstract
2587       origin attribute, resulting in absori being 0 (see callers of
2588       get_inlFnName). This is observed at least with gcc 6.3.0 when compiling
2589       valgrind with lto. So, in case we have a 0 absori, do not report an
2590       error, instead, rather return an unknown inlined function. */
2591    if (absori == 0) {
2592       static Bool absori0_reported = False;
2593       if (!absori0_reported && VG_(clo_verbosity) > 1) {
2594          VG_(message)(Vg_DebugMsg,
2595                       "Warning: inlined fn name without absori\n"
2596                       "is shown as UnknownInlinedFun\n");
2597          absori0_reported = True;
2598       }
2599       TRACE_D3(" <get_inlFnName>: absori is not set");
2600       return ML_(addStr)(cc->di, "UnknownInlinedFun", -1);
2601    }
2602
2603    posn = uncook_die( cc, absori, &type_flag, &alt_flag);
2604    if (type_flag)
2605       cc->barf("get_inlFnName: uncooked absori in type debug info");
2606
2607    /* LIMITATION: check we are in the same CU.
2608       If not, return unknown inlined function name. */
2609    /* if crossing between alt debug info<>normal info
2610           or posn not in the cu range,
2611       then it is in another CU. */
2612    if (alt_flag != cc->is_alt_info
2613        || posn < cc->cu_start_offset
2614        || posn >= cc->cu_start_offset + cc->unit_length) {
2615       static Bool reported = False;
2616       if (!reported && VG_(clo_verbosity) > 1) {
2617          VG_(message)(Vg_DebugMsg,
2618                       "Warning: cross-CU LIMITATION: some inlined fn names\n"
2619                       "might be shown as UnknownInlinedFun\n");
2620          reported = True;
2621       }
2622       TRACE_D3(" <get_inlFnName><%lx>: cross-CU LIMITATION", posn);
2623       return ML_(addStr)(cc->di, "UnknownInlinedFun", -1);
2624    }
2625
2626    init_Cursor (&c, cc->escn_debug_info, posn, cc->barf,
2627                 "Overrun get_inlFnName absori");
2628
2629    abbv_code = get_ULEB128( &c );
2630    abbv      = get_abbv ( cc, abbv_code);
2631    atag      = abbv->atag;
2632    TRACE_D3(" <get_inlFnName><%lx>: Abbrev Number: %llu (%s)\n",
2633             posn, abbv_code, ML_(pp_DW_TAG)( atag ) );
2634
2635    if (atag == 0)
2636       cc->barf("get_inlFnName: invalid zero tag on DIE");
2637
2638    has_children = abbv->has_children;
2639    if (has_children != DW_children_no && has_children != DW_children_yes)
2640       cc->barf("get_inlFnName: invalid has_children value");
2641
2642    if (atag != DW_TAG_subprogram)
2643       cc->barf("get_inlFnName: absori not a subprogram");
2644
2645    nf_i = 0;
2646    while (True) {
2647       DW_AT   attr = (DW_AT)  abbv->nf[nf_i].at_name;
2648       DW_FORM form = (DW_FORM)abbv->nf[nf_i].at_form;
2649       nf_i++;
2650       if (attr == 0 && form == 0) break;
2651       get_Form_contents( &cts, cc, &c, False/*td3*/, form );
2652       if (attr == DW_AT_name) {
2653          HChar *fnname;
2654          if (cts.szB >= 0)
2655             cc->barf("get_inlFnName: expecting indirect string");
2656          fnname = ML_(cur_read_strdup)( cts.u.cur,
2657                                         "get_inlFnName.1" );
2658          ret = ML_(addStr)(cc->di, fnname, -1);
2659          ML_(dinfo_free) (fnname);
2660          break; /* Name found, get out of the loop, as this has priority over
2661                  DW_AT_specification. */
2662       }
2663       if (attr == DW_AT_specification) {
2664          UWord cdie;
2665
2666          if (cts.szB == 0)
2667             cc->barf("get_inlFnName: AT specification missing");
2668
2669          /* The recursive call to get_inlFnName will uncook its arg.
2670             So, we need to cook it here, so as to reference the
2671             correct section (e.g. the alt info). */
2672          cdie = cook_die_using_form(cc, (UWord)cts.u.val, form);
2673
2674          /* hoping that there is no loop */
2675          ret = get_inlFnName (cdie, cc, td3);
2676          /* Unclear if having both DW_AT_specification and DW_AT_name is
2677             possible but in any case, we do not break here.
2678             If we find later on a DW_AT_name, it will override the name found
2679             in the DW_AT_specification.*/
2680       }
2681    }
2682
2683    if (ret)
2684       return ret;
2685    else {
2686       TRACE_D3("AbsOriFnNameNotFound");
2687       return ML_(addStr)(cc->di, "AbsOriFnNameNotFound", -1);
2688    }
2689 }
2690
2691 /* Returns True if the (possibly) childrens of the current DIE are interesting
2692    to parse. Returns False otherwise.
2693    If the current DIE has a sibling, the non interesting children can
2694    maybe be skipped (if the DIE has a DW_AT_sibling).  */
2695 __attribute__((noinline))
2696 static Bool parse_inl_DIE (
2697    /*MOD*/D3InlParser* parser,
2698    DW_TAG dtag,
2699    UWord posn,
2700    Int level,
2701    Cursor* c_die,
2702    const g_abbv *abbv,
2703    CUConst* cc,
2704    Bool td3
2705 )
2706 {
2707    FormContents cts;
2708    UInt nf_i;
2709
2710    UWord saved_die_c_offset  = get_position_of_Cursor( c_die );
2711
2712    /* Get info about DW_TAG_compile_unit and DW_TAG_partial_unit 'which
2713       in theory could also contain inlined fn calls).  */
2714    if (dtag == DW_TAG_compile_unit || dtag == DW_TAG_partial_unit) {
2715       Bool have_lo    = False;
2716       Addr ip_lo    = 0;
2717       const HChar *compdir = NULL;
2718
2719       nf_i = 0;
2720       while (True) {
2721          DW_AT   attr = (DW_AT)  abbv->nf[nf_i].at_name;
2722          DW_FORM form = (DW_FORM)abbv->nf[nf_i].at_form;
2723          nf_i++;
2724          if (attr == 0 && form == 0) break;
2725          get_Form_contents( &cts, cc, c_die, False/*td3*/, form );
2726          if (attr == DW_AT_low_pc && cts.szB > 0) {
2727             ip_lo   = cts.u.val;
2728             have_lo = True;
2729          }
2730          if (attr == DW_AT_comp_dir) {
2731             if (cts.szB >= 0)
2732                cc->barf("parse_inl_DIE compdir: expecting indirect string");
2733             HChar *str = ML_(cur_read_strdup)( cts.u.cur,
2734                                                "parse_inl_DIE.compdir" );
2735             compdir = ML_(addStr)(cc->di, str, -1);
2736             ML_(dinfo_free) (str);
2737          }
2738          if (attr == DW_AT_stmt_list && cts.szB > 0) {
2739             read_filename_table( parser->fndn_ix_Table, compdir,
2740                                  cc, cts.u.val, td3 );
2741          }
2742          if (attr == DW_AT_sibling && cts.szB > 0) {
2743             parser->sibling = cts.u.val;
2744          }
2745       }
2746       if (level == 0)
2747          setup_cu_svma (cc, have_lo, ip_lo, td3);
2748    }
2749
2750    if (dtag == DW_TAG_inlined_subroutine) {
2751       Bool   have_lo    = False;
2752       Bool   have_hi1   = False;
2753       Bool   have_range = False;
2754       Bool   hiIsRelative = False;
2755       Addr   ip_lo      = 0;
2756       Addr   ip_hi1     = 0;
2757       Addr   rangeoff   = 0;
2758       UInt   caller_fndn_ix = 0;
2759       Int caller_lineno = 0;
2760       Int inlinedfn_abstract_origin = 0;
2761       // 0 will be interpreted as no abstract origin by get_inlFnName
2762
2763       nf_i = 0;
2764       while (True) {
2765          DW_AT   attr = (DW_AT)  abbv->nf[nf_i].at_name;
2766          DW_FORM form = (DW_FORM)abbv->nf[nf_i].at_form;
2767          nf_i++;
2768          if (attr == 0 && form == 0) break;
2769          get_Form_contents( &cts, cc, c_die, False/*td3*/, form );
2770          if (attr == DW_AT_call_file && cts.szB > 0) {
2771             Int ftabIx = (Int)cts.u.val;
2772             if (ftabIx >= 1
2773                 && ftabIx < VG_(sizeXA)( parser->fndn_ix_Table )) {
2774                caller_fndn_ix = *(UInt*)
2775                           VG_(indexXA)( parser->fndn_ix_Table, ftabIx );
2776             }
2777             if (0) VG_(printf)("XXX caller_fndn_ix = %u %s\n", caller_fndn_ix,
2778                                ML_(fndn_ix2filename) (cc->di, caller_fndn_ix));
2779          }
2780          if (attr == DW_AT_call_line && cts.szB > 0) {
2781             caller_lineno = cts.u.val;
2782          }
2783
2784          if (attr == DW_AT_abstract_origin  && cts.szB > 0) {
2785             inlinedfn_abstract_origin
2786                = cook_die_using_form (cc, (UWord)cts.u.val, form);
2787          }
2788
2789          if (attr == DW_AT_low_pc && cts.szB > 0) {
2790             ip_lo   = cts.u.val;
2791             have_lo = True;
2792          }
2793          if (attr == DW_AT_high_pc && cts.szB > 0) {
2794             ip_hi1   = cts.u.val;
2795             have_hi1 = True;
2796             if (form != DW_FORM_addr)
2797                hiIsRelative = True;
2798          }
2799          if (attr == DW_AT_ranges && cts.szB > 0) {
2800             rangeoff   = cts.u.val;
2801             have_range = True;
2802          }
2803          if (attr == DW_AT_sibling && cts.szB > 0) {
2804             parser->sibling = cts.u.val;
2805          }
2806       }
2807       if (have_lo && have_hi1 && hiIsRelative)
2808          ip_hi1 += ip_lo;
2809       /* Do we have something that looks sane? */
2810       if (dtag == DW_TAG_inlined_subroutine
2811           && (!have_lo) && (!have_hi1) && (!have_range)) {
2812          /* Seems strange. How can an inlined subroutine have
2813             no code ? */
2814          goto_bad_DIE;
2815       } else
2816       if (have_lo && have_hi1 && (!have_range)) {
2817          /* This inlined call is just a single address range. */
2818          if (ip_lo < ip_hi1) {
2819             /* Apply text debug biasing */
2820             ip_lo += cc->di->text_debug_bias;
2821             ip_hi1 += cc->di->text_debug_bias;
2822             ML_(addInlInfo) (cc->di,
2823                              ip_lo, ip_hi1,
2824                              get_inlFnName (inlinedfn_abstract_origin, cc, td3),
2825                              caller_fndn_ix,
2826                              caller_lineno, level);
2827          }
2828       } else if (have_range) {
2829          /* This inlined call is several address ranges. */
2830          XArray *ranges;
2831          Word j;
2832          const HChar *inlfnname =
2833             get_inlFnName (inlinedfn_abstract_origin, cc, td3);
2834
2835          /* Ranges are biased for the inline info using the same logic
2836             as what is used for biasing ranges for the var info, for which
2837             ranges are read using cc->cu_svma (see parse_var_DIE).
2838             Then text_debug_bias is added when a (non global) var
2839             is recorded (see just before the call to ML_(addVar)) */
2840          ranges = get_range_list( cc, td3,
2841                                   rangeoff, cc->cu_svma );
2842          for (j = 0; j < VG_(sizeXA)( ranges ); j++) {
2843             AddrRange* range = (AddrRange*) VG_(indexXA)( ranges, j );
2844             ML_(addInlInfo) (cc->di,
2845                              range->aMin   + cc->di->text_debug_bias,
2846                              range->aMax+1 + cc->di->text_debug_bias,
2847                              // aMax+1 as range has its last bound included
2848                              // while ML_(addInlInfo) expects last bound not
2849                              // included.
2850                              inlfnname,
2851                              caller_fndn_ix,
2852                              caller_lineno, level);
2853          }
2854          VG_(deleteXA)( ranges );
2855       } else
2856          goto_bad_DIE;
2857    }
2858
2859    // Only recursively parse the (possible) children for the DIE which
2860    // might maybe contain a DW_TAG_inlined_subroutine:
2861    return dtag == DW_TAG_lexical_block || dtag == DW_TAG_subprogram
2862       || dtag == DW_TAG_inlined_subroutine
2863       || dtag == DW_TAG_compile_unit || dtag == DW_TAG_partial_unit;
2864
2865   bad_DIE:
2866    dump_bad_die_and_barf("parse_inl_DIE", dtag, posn, level,
2867                          c_die, saved_die_c_offset,
2868                          abbv,
2869                          cc);
2870    /*NOTREACHED*/
2871 }
2872
2873
2874 /*------------------------------------------------------------*/
2875 /*---                                                      ---*/
2876 /*--- Parsing of type-related DIEs                         ---*/
2877 /*---                                                      ---*/
2878 /*------------------------------------------------------------*/
2879
2880 typedef
2881    struct {
2882       /* What source language?  'A'=Ada83/95,
2883                                 'C'=C/C++,
2884                                 'F'=Fortran,
2885                                 '?'=other
2886          Established once per compilation unit. */
2887       UChar language;
2888       /* A stack of types which are currently under construction */
2889       Int   sp; /* [sp] is innermost active entry; sp==-1 for empty
2890                    stack */
2891       Int   stack_size;
2892       /* Note that the TyEnts in qparentE are temporary copies of the
2893          ones accumulating in the main tyent array.  So it is not safe
2894          to free up anything on them when popping them off the stack
2895          (iow, it isn't safe to use TyEnt__make_EMPTY on them).  Just
2896          memset them to zero when done. */
2897       TyEnt *qparentE; /* parent TyEnts */
2898       Int   *qlevel;
2899    }
2900    D3TypeParser;
2901
2902 /* Completely initialise a type parser object */
2903 static void
2904 type_parser_init ( D3TypeParser *parser )
2905 {
2906    parser->sp = -1;
2907    parser->language = '?';
2908    parser->stack_size = 0;
2909    parser->qparentE = NULL;
2910    parser->qlevel   = NULL;
2911 }
2912
2913 /* Release any memory hanging off a type parser object */
2914 static void
2915 type_parser_release ( D3TypeParser *parser )
2916 {
2917    ML_(dinfo_free)( parser->qparentE );
2918    ML_(dinfo_free)( parser->qlevel );
2919 }
2920
2921 static void typestack_show ( const D3TypeParser* parser, const HChar* str )
2922 {
2923    Word i;
2924    VG_(printf)("  typestack (%s) {\n", str);
2925    for (i = 0; i <= parser->sp; i++) {
2926       VG_(printf)("    [%ld] (level %d): ", i, parser->qlevel[i]);
2927       ML_(pp_TyEnt)( &parser->qparentE[i] );
2928       VG_(printf)("\n");
2929    }
2930    VG_(printf)("  }\n");
2931 }
2932
2933 /* Remove from the stack, all entries with .level > 'level' */
2934 static
2935 void typestack_preen ( D3TypeParser* parser, Bool td3, Int level )
2936 {
2937    Bool changed = False;
2938    vg_assert(parser->sp < parser->stack_size);
2939    while (True) {
2940       vg_assert(parser->sp >= -1);
2941       if (parser->sp == -1) break;
2942       if (parser->qlevel[parser->sp] <= level) break;
2943       if (0)
2944          TRACE_D3("BBBBAAAA typestack_pop [newsp=%d]\n", parser->sp-1);
2945       vg_assert(ML_(TyEnt__is_type)(&parser->qparentE[parser->sp]));
2946       parser->sp--;
2947       changed = True;
2948    }
2949    if (changed && td3)
2950       typestack_show( parser, "after preen" );
2951 }
2952
2953 static Bool typestack_is_empty ( const D3TypeParser* parser )
2954 {
2955    vg_assert(parser->sp >= -1 && parser->sp < parser->stack_size);
2956    return parser->sp == -1;
2957 }
2958
2959 static void typestack_push ( const CUConst* cc,
2960                              D3TypeParser* parser,
2961                              Bool td3,
2962                              const TyEnt* parentE, Int level )
2963 {
2964    if (0)
2965    TRACE_D3("BBBBAAAA typestack_push[newsp=%d]: %d  %05lx\n",
2966             parser->sp+1, level, parentE->cuOff);
2967
2968    /* First we need to zap everything >= 'level', as we are about to
2969       replace any previous entry at 'level', so .. */
2970    typestack_preen(parser, /*td3*/False, level-1);
2971
2972    vg_assert(parser->sp >= -1);
2973    vg_assert(parser->sp < parser->stack_size);
2974    if (parser->sp == parser->stack_size - 1) {
2975       parser->stack_size += 16;
2976       parser->qparentE =
2977          ML_(dinfo_realloc)("di.readdwarf3.typush.1", parser->qparentE,
2978                             parser->stack_size * sizeof parser->qparentE[0]);
2979       parser->qlevel =
2980          ML_(dinfo_realloc)("di.readdwarf3.typush.2", parser->qlevel,
2981                             parser->stack_size * sizeof parser->qlevel[0]);
2982    }
2983    if (parser->sp >= 0)
2984       vg_assert(parser->qlevel[parser->sp] < level);
2985    parser->sp++;
2986    vg_assert(parentE);
2987    vg_assert(ML_(TyEnt__is_type)(parentE));
2988    vg_assert(parentE->cuOff != D3_INVALID_CUOFF);
2989    parser->qparentE[parser->sp] = *parentE;
2990    parser->qlevel[parser->sp]  = level;
2991    if (TD3)
2992       typestack_show( parser, "after push" );
2993 }
2994
2995 /* True if the subrange type being parsed gives the bounds of an array. */
2996 static Bool subrange_type_denotes_array_bounds ( const D3TypeParser* parser,
2997                                                  DW_TAG dtag ) {
2998    vg_assert(dtag == DW_TAG_subrange_type);
2999    /* For most languages, a subrange_type dtag always gives the
3000       bounds of an array.
3001       For Ada, there are additional conditions as a subrange_type
3002       is also used for other purposes. */
3003    if (parser->language != 'A')
3004       /* not Ada, so it definitely denotes an array bound. */
3005       return True;
3006    else
3007       /* Extra constraints for Ada: it only denotes an array bound if .. */
3008       return (! typestack_is_empty(parser)
3009               && parser->qparentE[parser->sp].tag == Te_TyArray);
3010 }
3011
3012 /* True if the form is one of the forms supported to give an array bound.
3013    For some arrays (scope local arrays with variable size),
3014    a DW_FORM_ref4 was used, and was wrongly used as the bound value.
3015    So, refuse the forms that are known to give a problem. */
3016 static Bool form_expected_for_bound ( DW_FORM form ) {
3017    if (form == DW_FORM_ref1
3018        || form == DW_FORM_ref2
3019        || form == DW_FORM_ref4
3020        || form == DW_FORM_ref8)
3021       return False;
3022
3023    return True;
3024 }
3025
3026 /* Parse a type-related DIE.  'parser' holds the current parser state.
3027    'admin' is where the completed types are dumped.  'dtag' is the tag
3028    for this DIE.  'c_die' points to the start of the data fields (FORM
3029    stuff) for the DIE.  abbv is the parsed abbreviation which describe
3030    the DIE.
3031
3032    We may find the DIE uninteresting, in which case we should ignore
3033    it.
3034
3035    What happens: the DIE is examined.  If uninteresting, it is ignored.
3036    Otherwise, the DIE gives rise to two things:
3037
3038    (1) the offset of this DIE in the CU -- the cuOffset, a UWord
3039    (2) a TyAdmin structure, which holds the type, or related stuff
3040
3041    (2) is added at the end of 'tyadmins', at some index, say 'i'.
3042
3043    A pair (cuOffset, i) is added to 'tydict'.
3044
3045    Hence 'tyadmins' holds the actual type entities, and 'tydict' holds
3046    a mapping from cuOffset to the index of the corresponding entry in
3047    'tyadmin'.
3048
3049    When resolving a cuOffset to a TyAdmin, first look up the cuOffset
3050    in the tydict (by binary search).  This gives an index into
3051    tyadmins, and the required entity lives in tyadmins at that index.
3052 */
3053 __attribute__((noinline))
3054 static void parse_type_DIE ( /*MOD*/XArray* /* of TyEnt */ tyents,
3055                              /*MOD*/D3TypeParser* parser,
3056                              DW_TAG dtag,
3057                              UWord posn,
3058                              Int level,
3059                              Cursor* c_die,
3060                              const g_abbv *abbv,
3061                              const CUConst* cc,
3062                              Bool td3 )
3063 {
3064    FormContents cts;
3065    UInt nf_i;
3066    TyEnt typeE;
3067    TyEnt atomE;
3068    TyEnt fieldE;
3069    TyEnt boundE;
3070
3071    UWord saved_die_c_offset  = get_position_of_Cursor( c_die );
3072
3073    VG_(memset)( &typeE,  0xAA, sizeof(typeE) );
3074    VG_(memset)( &atomE,  0xAA, sizeof(atomE) );
3075    VG_(memset)( &fieldE, 0xAA, sizeof(fieldE) );
3076    VG_(memset)( &boundE, 0xAA, sizeof(boundE) );
3077
3078    /* If we've returned to a level at or above any previously noted
3079       parent, un-note it, so we don't believe we're still collecting
3080       its children. */
3081    typestack_preen( parser, td3, level-1 );
3082
3083    if (dtag == DW_TAG_compile_unit
3084        || dtag == DW_TAG_type_unit
3085        || dtag == DW_TAG_partial_unit) {
3086       /* See if we can find DW_AT_language, since it is important for
3087          establishing array bounds (see DW_TAG_subrange_type below in
3088          this fn) */
3089       nf_i = 0;
3090       while (True) {
3091          DW_AT   attr = (DW_AT)  abbv->nf[nf_i].at_name;
3092          DW_FORM form = (DW_FORM)abbv->nf[nf_i].at_form;
3093          nf_i++;
3094          if (attr == 0 && form == 0) break;
3095          get_Form_contents( &cts, cc, c_die, False/*td3*/, form );
3096          if (attr != DW_AT_language)
3097             continue;
3098          if (cts.szB <= 0)
3099            goto_bad_DIE;
3100          switch (cts.u.val) {
3101             case DW_LANG_C89: case DW_LANG_C:
3102             case DW_LANG_C_plus_plus: case DW_LANG_ObjC:
3103             case DW_LANG_ObjC_plus_plus: case DW_LANG_UPC:
3104             case DW_LANG_Upc: case DW_LANG_C99: case DW_LANG_C11:
3105             case DW_LANG_C_plus_plus_11: case DW_LANG_C_plus_plus_14:
3106                parser->language = 'C'; break;
3107             case DW_LANG_Fortran77: case DW_LANG_Fortran90:
3108             case DW_LANG_Fortran95: case DW_LANG_Fortran03:
3109             case DW_LANG_Fortran08:
3110                parser->language = 'F'; break;
3111             case DW_LANG_Ada83: case DW_LANG_Ada95:
3112                parser->language = 'A'; break;
3113             case DW_LANG_Cobol74:
3114             case DW_LANG_Cobol85: case DW_LANG_Pascal83:
3115             case DW_LANG_Modula2: case DW_LANG_Java:
3116             case DW_LANG_PLI:
3117             case DW_LANG_D: case DW_LANG_Python: case DW_LANG_Go:
3118             case DW_LANG_Mips_Assembler:
3119                parser->language = '?'; break;
3120             default:
3121                goto_bad_DIE;
3122          }
3123       }
3124    }
3125
3126    if (dtag == DW_TAG_base_type) {
3127       /* We can pick up a new base type any time. */
3128       VG_(memset)(&typeE, 0, sizeof(typeE));
3129       typeE.cuOff = D3_INVALID_CUOFF;
3130       typeE.tag   = Te_TyBase;
3131       nf_i = 0;
3132       while (True) {
3133          DW_AT   attr = (DW_AT)  abbv->nf[nf_i].at_name;
3134          DW_FORM form = (DW_FORM)abbv->nf[nf_i].at_form;
3135          nf_i++;
3136          if (attr == 0 && form == 0) break;
3137          get_Form_contents( &cts, cc, c_die, False/*td3*/, form );
3138          if (attr == DW_AT_name && cts.szB < 0) {
3139             typeE.Te.TyBase.name
3140                = ML_(cur_read_strdup)( cts.u.cur,
3141                                        "di.readdwarf3.ptD.base_type.1" );
3142          }
3143          if (attr == DW_AT_byte_size && cts.szB > 0) {
3144             typeE.Te.TyBase.szB = cts.u.val;
3145          }
3146          if (attr == DW_AT_encoding && cts.szB > 0) {
3147             switch (cts.u.val) {
3148                case DW_ATE_unsigned: case DW_ATE_unsigned_char:
3149                case DW_ATE_UTF: /* since DWARF4, e.g. char16_t from C++ */
3150                case DW_ATE_boolean:/* FIXME - is this correct? */
3151                case DW_ATE_unsigned_fixed:
3152                   typeE.Te.TyBase.enc = 'U'; break;
3153                case DW_ATE_signed: case DW_ATE_signed_char:
3154                case DW_ATE_signed_fixed:
3155                   typeE.Te.TyBase.enc = 'S'; break;
3156                case DW_ATE_float:
3157                   typeE.Te.TyBase.enc = 'F'; break;
3158                case DW_ATE_complex_float:
3159                   typeE.Te.TyBase.enc = 'C'; break;
3160                default:
3161                   goto_bad_DIE;
3162             }
3163          }
3164       }
3165
3166       /* Invent a name if it doesn't have one.  gcc-4.3
3167          -ftree-vectorize is observed to emit nameless base types. */
3168       if (!typeE.Te.TyBase.name)
3169          typeE.Te.TyBase.name
3170             = ML_(dinfo_strdup)( "di.readdwarf3.ptD.base_type.2",
3171                                  "<anon_base_type>" );
3172
3173       /* Do we have something that looks sane? */
3174       if (/* must have a name */
3175           typeE.Te.TyBase.name == NULL
3176           /* and a plausible size.  Yes, really 32: "complex long
3177              double" apparently has size=32 */
3178           || typeE.Te.TyBase.szB < 0 || typeE.Te.TyBase.szB > 32
3179           /* and a plausible encoding */
3180           || (typeE.Te.TyBase.enc != 'U'
3181               && typeE.Te.TyBase.enc != 'S'
3182               && typeE.Te.TyBase.enc != 'F'
3183               && typeE.Te.TyBase.enc != 'C'))
3184          goto_bad_DIE;
3185       /* Last minute hack: if we see this
3186          <1><515>: DW_TAG_base_type
3187              DW_AT_byte_size   : 0
3188              DW_AT_encoding    : 5
3189              DW_AT_name        : void
3190          convert it into a real Void type. */
3191       if (typeE.Te.TyBase.szB == 0
3192           && 0 == VG_(strcmp)("void", typeE.Te.TyBase.name)) {
3193          ML_(TyEnt__make_EMPTY)(&typeE);
3194          typeE.tag = Te_TyVoid;
3195          typeE.Te.TyVoid.isFake = False; /* it's a real one! */
3196       }
3197
3198       goto acquire_Type;
3199    }
3200
3201    /*
3202     * An example of DW_TAG_rvalue_reference_type:
3203     *
3204     * $ readelf --debug-dump /usr/lib/debug/usr/lib/libstdc++.so.6.0.16.debug
3205     *  <1><1014>: Abbrev Number: 55 (DW_TAG_rvalue_reference_type)
3206     *     <1015>   DW_AT_byte_size   : 4
3207     *     <1016>   DW_AT_type        : <0xe52>
3208     */
3209    if (dtag == DW_TAG_pointer_type || dtag == DW_TAG_reference_type
3210        || dtag == DW_TAG_ptr_to_member_type
3211        || dtag == DW_TAG_rvalue_reference_type) {
3212       /* This seems legit for _pointer_type and _reference_type.  I
3213          don't know if rolling _ptr_to_member_type in here really is
3214          legit, but it's better than not handling it at all. */
3215       VG_(memset)(&typeE, 0, sizeof(typeE));
3216       typeE.cuOff = D3_INVALID_CUOFF;
3217       switch (dtag) {
3218       case DW_TAG_pointer_type:
3219          typeE.tag = Te_TyPtr;
3220          break;
3221       case DW_TAG_reference_type:
3222          typeE.tag = Te_TyRef;
3223          break;
3224       case DW_TAG_ptr_to_member_type:
3225          typeE.tag = Te_TyPtrMbr;
3226          break;
3227       case DW_TAG_rvalue_reference_type:
3228          typeE.tag = Te_TyRvalRef;
3229          break;
3230       default:
3231          vg_assert(False);
3232       }
3233       /* target type defaults to void */
3234       typeE.Te.TyPorR.typeR = D3_FAKEVOID_CUOFF;
3235       /* These four type kinds don't *have* to specify their size, in
3236          which case we assume it's a machine word.  But if they do
3237          specify it, it must be a machine word :-)  This probably
3238          assumes that the word size of the Dwarf3 we're reading is the
3239          same size as that on the machine.  gcc appears to give a size
3240          whereas icc9 doesn't. */
3241       typeE.Te.TyPorR.szB = sizeof(UWord);
3242       nf_i = 0;
3243       while (True) {
3244          DW_AT   attr = (DW_AT)  abbv->nf[nf_i].at_name;
3245          DW_FORM form = (DW_FORM)abbv->nf[nf_i].at_form;
3246          nf_i++;
3247          if (attr == 0 && form == 0) break;
3248          get_Form_contents( &cts, cc, c_die, False/*td3*/, form );
3249          if (attr == DW_AT_byte_size && cts.szB > 0) {
3250             typeE.Te.TyPorR.szB = cts.u.val;
3251          }
3252          if (attr == DW_AT_type && cts.szB > 0) {
3253             typeE.Te.TyPorR.typeR
3254                = cook_die_using_form( cc, (UWord)cts.u.val, form );
3255          }
3256       }
3257       /* Do we have something that looks sane? */
3258       if (typeE.Te.TyPorR.szB != sizeof(UWord))
3259          goto_bad_DIE;
3260       else
3261          goto acquire_Type;
3262    }
3263
3264    if (dtag == DW_TAG_enumeration_type) {
3265       /* Create a new Type to hold the results. */
3266       VG_(memset)(&typeE, 0, sizeof(typeE));
3267       typeE.cuOff = posn;
3268       typeE.tag   = Te_TyEnum;
3269       Bool is_decl = False;
3270       typeE.Te.TyEnum.atomRs
3271          = VG_(newXA)( ML_(dinfo_zalloc), "di.readdwarf3.ptD.enum_type.1",
3272                        ML_(dinfo_free),
3273                        sizeof(UWord) );
3274       nf_i=0;
3275       while (True) {
3276          DW_AT   attr = (DW_AT)  abbv->nf[nf_i].at_name;
3277          DW_FORM form = (DW_FORM)abbv->nf[nf_i].at_form;
3278          nf_i++;
3279          if (attr == 0 && form == 0) break;
3280          get_Form_contents( &cts, cc, c_die, False/*td3*/, form );
3281          if (attr == DW_AT_name && cts.szB < 0) {
3282             typeE.Te.TyEnum.name
3283                = ML_(cur_read_strdup)( cts.u.cur,
3284                                        "di.readdwarf3.pTD.enum_type.2" );
3285          }
3286          if (attr == DW_AT_byte_size && cts.szB > 0) {
3287             typeE.Te.TyEnum.szB = cts.u.val;
3288          }
3289          if (attr == DW_AT_declaration) {
3290             is_decl = True;
3291          }
3292       }
3293
3294       if (!typeE.Te.TyEnum.name)
3295          typeE.Te.TyEnum.name
3296             = ML_(dinfo_strdup)( "di.readdwarf3.pTD.enum_type.3",
3297                                  "<anon_enum_type>" );
3298
3299       /* Do we have something that looks sane? */
3300       if (typeE.Te.TyEnum.szB == 0
3301           /* we must know the size */
3302           /* but not for Ada, which uses such dummy
3303              enumerations as helper for gdb ada mode.
3304              Also GCC allows incomplete enums as GNU extension.
3305              http://gcc.gnu.org/onlinedocs/gcc/Incomplete-Enums.html
3306              These are marked as DW_AT_declaration and won't have
3307              a size. They can only be used in declaration or as
3308              pointer types.  You can't allocate variables or storage
3309              using such an enum type. (Also GCC seems to have a bug
3310              that will put such an enumeration_type into a .debug_types
3311              unit which should only contain complete types.) */
3312           && (parser->language != 'A' && !is_decl)) {
3313          goto_bad_DIE;
3314       }
3315
3316       /* On't stack! */
3317       typestack_push( cc, parser, td3, &typeE, level );
3318       goto acquire_Type;
3319    }
3320
3321    /* gcc (GCC) 4.4.0 20081017 (experimental) occasionally produces
3322       DW_TAG_enumerator with only a DW_AT_name but no
3323       DW_AT_const_value.  This is in violation of the Dwarf3 standard,
3324       and appears to be a new "feature" of gcc - versions 4.3.x and
3325       earlier do not appear to do this.  So accept DW_TAG_enumerator
3326       which only have a name but no value.  An example:
3327
3328       <1><180>: Abbrev Number: 6 (DW_TAG_enumeration_type)
3329          <181>   DW_AT_name        : (indirect string, offset: 0xda70):
3330                                      QtMsgType
3331          <185>   DW_AT_byte_size   : 4
3332          <186>   DW_AT_decl_file   : 14
3333          <187>   DW_AT_decl_line   : 1480
3334          <189>   DW_AT_sibling     : <0x1a7>
3335       <2><18d>: Abbrev Number: 7 (DW_TAG_enumerator)
3336          <18e>   DW_AT_name        : (indirect string, offset: 0x9e18):
3337                                      QtDebugMsg
3338       <2><192>: Abbrev Number: 7 (DW_TAG_enumerator)
3339          <193>   DW_AT_name        : (indirect string, offset: 0x1505f):
3340                                      QtWarningMsg
3341       <2><197>: Abbrev Number: 7 (DW_TAG_enumerator)
3342          <198>   DW_AT_name        : (indirect string, offset: 0x16f4a):
3343                                      QtCriticalMsg
3344       <2><19c>: Abbrev Number: 7 (DW_TAG_enumerator)
3345          <19d>   DW_AT_name        : (indirect string, offset: 0x156dd):
3346                                      QtFatalMsg
3347       <2><1a1>: Abbrev Number: 7 (DW_TAG_enumerator)
3348          <1a2>   DW_AT_name        : (indirect string, offset: 0x13660):
3349                                      QtSystemMsg
3350    */
3351    if (dtag == DW_TAG_enumerator) {
3352       VG_(memset)( &atomE, 0, sizeof(atomE) );
3353       atomE.cuOff = posn;
3354       atomE.tag   = Te_Atom;
3355       nf_i = 0;
3356       while (True) {
3357          DW_AT   attr = (DW_AT)  abbv->nf[nf_i].at_name;
3358          DW_FORM form = (DW_FORM)abbv->nf[nf_i].at_form;
3359          nf_i++;
3360          if (attr == 0 && form == 0) break;
3361          get_Form_contents( &cts, cc, c_die, False/*td3*/, form );
3362          if (attr == DW_AT_name && cts.szB < 0) {
3363             atomE.Te.Atom.name
3364               = ML_(cur_read_strdup)( cts.u.cur,
3365                                       "di.readdwarf3.pTD.enumerator.1" );
3366          }
3367          if (attr == DW_AT_const_value && cts.szB > 0) {
3368             atomE.Te.Atom.value      = cts.u.val;
3369             atomE.Te.Atom.valueKnown = True;
3370          }
3371       }
3372       /* Do we have something that looks sane? */
3373       if (atomE.Te.Atom.name == NULL)
3374          goto_bad_DIE;
3375       /* Do we have a plausible parent? */
3376       if (typestack_is_empty(parser)) goto_bad_DIE;
3377       vg_assert(ML_(TyEnt__is_type)(&parser->qparentE[parser->sp]));
3378       vg_assert(parser->qparentE[parser->sp].cuOff != D3_INVALID_CUOFF);
3379       if (level != parser->qlevel[parser->sp]+1) goto_bad_DIE;
3380       if (parser->qparentE[parser->sp].tag != Te_TyEnum) goto_bad_DIE;
3381       /* Record this child in the parent */
3382       vg_assert(parser->qparentE[parser->sp].Te.TyEnum.atomRs);
3383       VG_(addToXA)( parser->qparentE[parser->sp].Te.TyEnum.atomRs,
3384                     &atomE );
3385       /* And record the child itself */
3386       goto acquire_Atom;
3387    }
3388
3389    /* Treat DW_TAG_class_type as if it was a DW_TAG_structure_type.  I
3390       don't know if this is correct, but it at least makes this reader
3391       usable for gcc-4.3 produced Dwarf3. */
3392    if (dtag == DW_TAG_structure_type || dtag == DW_TAG_class_type
3393        || dtag == DW_TAG_union_type) {
3394       Bool have_szB = False;
3395       Bool is_decl  = False;
3396       Bool is_spec  = False;
3397       /* Create a new Type to hold the results. */
3398       VG_(memset)(&typeE, 0, sizeof(typeE));
3399       typeE.cuOff = posn;
3400       typeE.tag   = Te_TyStOrUn;
3401       typeE.Te.TyStOrUn.name = NULL;
3402       typeE.Te.TyStOrUn.typeR = D3_INVALID_CUOFF;
3403       typeE.Te.TyStOrUn.fieldRs
3404          = VG_(newXA)( ML_(dinfo_zalloc), "di.readdwarf3.pTD.struct_type.1",
3405                        ML_(dinfo_free),
3406                        sizeof(UWord) );
3407       typeE.Te.TyStOrUn.complete = True;
3408       typeE.Te.TyStOrUn.isStruct = dtag == DW_TAG_structure_type
3409                                    || dtag == DW_TAG_class_type;
3410       nf_i = 0;
3411       while (True) {
3412          DW_AT   attr = (DW_AT)  abbv->nf[nf_i].at_name;
3413          DW_FORM form = (DW_FORM)abbv->nf[nf_i].at_form;
3414          nf_i++;
3415          if (attr == 0 && form == 0) break;
3416          get_Form_contents( &cts, cc, c_die, False/*td3*/, form );
3417          if (attr == DW_AT_name && cts.szB < 0) {
3418             typeE.Te.TyStOrUn.name
3419                = ML_(cur_read_strdup)( cts.u.cur,
3420                                        "di.readdwarf3.ptD.struct_type.2" );
3421          }
3422          if (attr == DW_AT_byte_size && cts.szB >= 0) {
3423             typeE.Te.TyStOrUn.szB = cts.u.val;
3424             have_szB = True;
3425          }
3426          if (attr == DW_AT_declaration && cts.szB > 0 && cts.u.val > 0) {
3427             is_decl = True;
3428          }
3429          if (attr == DW_AT_specification && cts.szB > 0 && cts.u.val > 0) {
3430             is_spec = True;
3431          }
3432          if (attr == DW_AT_signature && form == DW_FORM_ref_sig8
3433              && cts.szB > 0) {
3434             have_szB = True;
3435             typeE.Te.TyStOrUn.szB = 8;
3436             typeE.Te.TyStOrUn.typeR
3437                = cook_die_using_form( cc, (UWord)cts.u.val, form );
3438          }
3439       }
3440       /* Do we have something that looks sane? */
3441       if (is_decl && (!is_spec)) {
3442          /* It's a DW_AT_declaration.  We require the name but
3443             nothing else. */
3444          /* JRS 2012-06-28: following discussion w/ tromey, if the
3445             type doesn't have name, just make one up, and accept it.
3446             It might be referred to by other DIEs, so ignoring it
3447             doesn't seem like a safe option. */
3448          if (typeE.Te.TyStOrUn.name == NULL)
3449             typeE.Te.TyStOrUn.name
3450                = ML_(dinfo_strdup)( "di.readdwarf3.ptD.struct_type.3",
3451                                     "<anon_struct_type>" );
3452          typeE.Te.TyStOrUn.complete = False;
3453          /* JRS 2009 Aug 10: <possible kludge>? */
3454          /* Push this tyent on the stack, even though it's incomplete.
3455             It appears that gcc-4.4 on Fedora 11 will sometimes create
3456             DW_TAG_member entries for it, and so we need to have a
3457             plausible parent present in order for that to work.  See
3458             #200029 comments 8 and 9. */
3459          typestack_push( cc, parser, td3, &typeE, level );
3460          /* </possible kludge> */
3461          goto acquire_Type;
3462       }
3463       if ((!is_decl) /* && (!is_spec) */) {
3464          /* this is the common, ordinary case */
3465          /* The name can be present, or not */
3466          if (!have_szB) {
3467             /* We must know the size.
3468                But in Ada, record with discriminants might have no size.
3469                But in C, VLA in the middle of a struct (gcc extension)
3470                might have no size.
3471                Instead, some GNAT dwarf extensions and/or dwarf entries
3472                allow to calculate the struct size at runtime.
3473                We cannot do that (yet?) so, the temporary kludge is to use
3474                a small size. */
3475             typeE.Te.TyStOrUn.szB = 1;
3476          }
3477          /* On't stack! */
3478          typestack_push( cc, parser, td3, &typeE, level );
3479          goto acquire_Type;
3480       }
3481       else {
3482          /* don't know how to handle any other variants just now */
3483          goto_bad_DIE;
3484       }
3485    }
3486
3487    if (dtag == DW_TAG_member) {
3488       /* Acquire member entries for both DW_TAG_structure_type and
3489          DW_TAG_union_type.  They differ minorly, in that struct
3490          members must have a DW_AT_data_member_location expression
3491          whereas union members must not. */
3492       Bool parent_is_struct;
3493       VG_(memset)( &fieldE, 0, sizeof(fieldE) );
3494       fieldE.cuOff = posn;
3495       fieldE.tag   = Te_Field;
3496       fieldE.Te.Field.typeR = D3_INVALID_CUOFF;
3497       nf_i = 0;
3498       while (True) {
3499          DW_AT   attr = (DW_AT)  abbv->nf[nf_i].at_name;
3500          DW_FORM form = (DW_FORM)abbv->nf[nf_i].at_form;
3501          nf_i++;
3502          if (attr == 0 && form == 0) break;
3503          get_Form_contents( &cts, cc, c_die, False/*td3*/, form );
3504          if (attr == DW_AT_name && cts.szB < 0) {
3505             fieldE.Te.Field.name
3506                = ML_(cur_read_strdup)( cts.u.cur,
3507                                        "di.readdwarf3.ptD.member.1" );
3508          }
3509          if (attr == DW_AT_type && cts.szB > 0) {
3510             fieldE.Te.Field.typeR
3511                = cook_die_using_form( cc, (UWord)cts.u.val, form );
3512          }
3513          /* There are 2 different cases for DW_AT_data_member_location.
3514             If it is a constant class attribute, it contains byte offset
3515             from the beginning of the containing entity.
3516             Otherwise it is a location expression.  */
3517          if (attr == DW_AT_data_member_location && cts.szB > 0) {
3518             fieldE.Te.Field.nLoc = -1;
3519             fieldE.Te.Field.pos.offset = cts.u.val;
3520          }
3521          if (attr == DW_AT_data_member_location && cts.szB <= 0) {
3522             fieldE.Te.Field.nLoc = (UWord)(-cts.szB);
3523             fieldE.Te.Field.pos.loc
3524                = ML_(cur_read_memdup)( cts.u.cur,
3525                                        (SizeT)fieldE.Te.Field.nLoc,
3526                                        "di.readdwarf3.ptD.member.2" );
3527          }
3528       }
3529       /* Do we have a plausible parent? */
3530       if (typestack_is_empty(parser)) goto_bad_DIE;
3531       vg_assert(ML_(TyEnt__is_type)(&parser->qparentE[parser->sp]));
3532       vg_assert(parser->qparentE[parser->sp].cuOff != D3_INVALID_CUOFF);
3533       if (level != parser->qlevel[parser->sp]+1) goto_bad_DIE;
3534       if (parser->qparentE[parser->sp].tag != Te_TyStOrUn) goto_bad_DIE;
3535       /* Do we have something that looks sane?  If this a member of a
3536          struct, we must have a location expression; but if a member
3537          of a union that is irrelevant (D3 spec sec 5.6.6).  We ought
3538          to reject in the latter case, but some compilers have been
3539          observed to emit constant-zero expressions.  So just ignore
3540          them. */
3541       parent_is_struct
3542          = parser->qparentE[parser->sp].Te.TyStOrUn.isStruct;
3543       if (!fieldE.Te.Field.name)
3544          fieldE.Te.Field.name
3545             = ML_(dinfo_strdup)( "di.readdwarf3.ptD.member.3",
3546                                  "<anon_field>" );
3547       if (fieldE.Te.Field.typeR == D3_INVALID_CUOFF)
3548          goto_bad_DIE;
3549       if (fieldE.Te.Field.nLoc) {
3550          if (!parent_is_struct) {
3551             /* If this is a union type, pretend we haven't seen the data
3552                member location expression, as it is by definition
3553                redundant (it must be zero). */
3554             if (fieldE.Te.Field.nLoc > 0)
3555                ML_(dinfo_free)(fieldE.Te.Field.pos.loc);
3556             fieldE.Te.Field.pos.loc = NULL;
3557             fieldE.Te.Field.nLoc = 0;
3558          }
3559          /* Record this child in the parent */
3560          fieldE.Te.Field.isStruct = parent_is_struct;
3561          vg_assert(parser->qparentE[parser->sp].Te.TyStOrUn.fieldRs);
3562          VG_(addToXA)( parser->qparentE[parser->sp].Te.TyStOrUn.fieldRs,
3563                        &posn );
3564          /* And record the child itself */
3565          goto acquire_Field;
3566       } else {
3567          /* Member with no location - this can happen with static
3568             const members in C++ code which are compile time constants
3569             that do no exist in the class. They're not of any interest
3570             to us so we ignore them. */
3571          ML_(TyEnt__make_EMPTY)(&fieldE);
3572       }
3573    }
3574
3575    if (dtag == DW_TAG_array_type) {
3576       VG_(memset)(&typeE, 0, sizeof(typeE));
3577       typeE.cuOff = posn;
3578       typeE.tag   = Te_TyArray;
3579       typeE.Te.TyArray.typeR = D3_INVALID_CUOFF;
3580       typeE.Te.TyArray.boundRs
3581          = VG_(newXA)( ML_(dinfo_zalloc), "di.readdwarf3.ptD.array_type.1",
3582                        ML_(dinfo_free),
3583                        sizeof(UWord) );
3584       nf_i = 0;
3585       while (True) {
3586          DW_AT   attr = (DW_AT)  abbv->nf[nf_i].at_name;
3587          DW_FORM form = (DW_FORM)abbv->nf[nf_i].at_form;
3588          nf_i++;
3589          if (attr == 0 && form == 0) break;
3590          get_Form_contents( &cts, cc, c_die, False/*td3*/, form );
3591          if (attr == DW_AT_type && cts.szB > 0) {
3592             typeE.Te.TyArray.typeR
3593                = cook_die_using_form( cc, (UWord)cts.u.val, form );
3594          }
3595       }
3596       if (typeE.Te.TyArray.typeR == D3_INVALID_CUOFF)
3597          goto_bad_DIE;
3598       /* On't stack! */
3599       typestack_push( cc, parser, td3, &typeE, level );
3600       goto acquire_Type;
3601    }
3602
3603    /* this is a subrange type defining the bounds of an array. */
3604    if (dtag == DW_TAG_subrange_type
3605        && subrange_type_denotes_array_bounds(parser, dtag)) {
3606       Bool have_lower = False;
3607       Bool have_upper = False;
3608       Bool have_count = False;
3609       Long lower = 0;
3610       Long upper = 0;
3611       Long count = 0;
3612
3613       switch (parser->language) {
3614          case 'C': have_lower = True;  lower = 0; break;
3615          case 'F': have_lower = True;  lower = 1; break;
3616          case '?': have_lower = False; break;
3617          case 'A': have_lower = False; break;
3618          default:  vg_assert(0); /* assured us by handling of
3619                                     DW_TAG_compile_unit in this fn */
3620       }
3621
3622       VG_(memset)( &boundE, 0, sizeof(boundE) );
3623       boundE.cuOff = D3_INVALID_CUOFF;
3624       boundE.tag   = Te_Bound;
3625       nf_i = 0;
3626       while (True) {
3627          DW_AT   attr = (DW_AT)  abbv->nf[nf_i].at_name;
3628          DW_FORM form = (DW_FORM)abbv->nf[nf_i].at_form;
3629          nf_i++;
3630          if (attr == 0 && form == 0) break;
3631          get_Form_contents( &cts, cc, c_die, False/*td3*/, form );
3632          if (attr == DW_AT_lower_bound && cts.szB > 0
3633              && form_expected_for_bound (form)) {
3634             lower      = (Long)cts.u.val;
3635             have_lower = True;
3636          }
3637          if (attr == DW_AT_upper_bound && cts.szB > 0
3638              && form_expected_for_bound (form)) {
3639             upper      = (Long)cts.u.val;
3640             have_upper = True;
3641          }
3642          if (attr == DW_AT_count && cts.szB > 0) {
3643             count    = (Long)cts.u.val;
3644             have_count = True;
3645          }
3646       }
3647       /* FIXME: potentially skip the rest if no parent present, since
3648          it could be the case that this subrange type is free-standing
3649          (not being used to describe the bounds of a containing array
3650          type) */
3651       /* Do we have a plausible parent? */
3652       if (typestack_is_empty(parser)) goto_bad_DIE;
3653       vg_assert(ML_(TyEnt__is_type)(&parser->qparentE[parser->sp]));
3654       vg_assert(parser->qparentE[parser->sp].cuOff != D3_INVALID_CUOFF);
3655       if (level != parser->qlevel[parser->sp]+1) goto_bad_DIE;
3656       if (parser->qparentE[parser->sp].tag != Te_TyArray) goto_bad_DIE;
3657
3658       /* Figure out if we have a definite range or not */
3659       if (have_lower && have_upper && (!have_count)) {
3660          boundE.Te.Bound.knownL = True;
3661          boundE.Te.Bound.knownU = True;
3662          boundE.Te.Bound.boundL = lower;
3663          boundE.Te.Bound.boundU = upper;
3664       }
3665       else if (have_lower && (!have_upper) && (!have_count)) {
3666          boundE.Te.Bound.knownL = True;
3667          boundE.Te.Bound.knownU = False;
3668          boundE.Te.Bound.boundL = lower;
3669          boundE.Te.Bound.boundU = 0;
3670       }
3671       else if ((!have_lower) && have_upper && (!have_count)) {
3672          boundE.Te.Bound.knownL = False;
3673          boundE.Te.Bound.knownU = True;
3674          boundE.Te.Bound.boundL = 0;
3675          boundE.Te.Bound.boundU = upper;
3676       }
3677       else if ((!have_lower) && (!have_upper) && (!have_count)) {
3678          boundE.Te.Bound.knownL = False;
3679          boundE.Te.Bound.knownU = False;
3680          boundE.Te.Bound.boundL = 0;
3681          boundE.Te.Bound.boundU = 0;
3682       } else if (have_lower && (!have_upper) && (have_count)) {
3683          boundE.Te.Bound.knownL = True;
3684          boundE.Te.Bound.knownU = True;
3685          boundE.Te.Bound.boundL = lower;
3686          boundE.Te.Bound.boundU = lower + count;
3687       } else {
3688          /* FIXME: handle more cases */
3689          goto_bad_DIE;
3690       }
3691
3692       /* Record this bound in the parent */
3693       boundE.cuOff = posn;
3694       vg_assert(parser->qparentE[parser->sp].Te.TyArray.boundRs);
3695       VG_(addToXA)( parser->qparentE[parser->sp].Te.TyArray.boundRs,
3696                     &boundE.cuOff );
3697       /* And record the child itself */
3698       goto acquire_Bound;
3699    }
3700
3701    /* typedef or subrange_type other than array bounds. */
3702    if (dtag == DW_TAG_typedef
3703        || (dtag == DW_TAG_subrange_type
3704            && !subrange_type_denotes_array_bounds(parser, dtag))) {
3705       /* subrange_type other than array bound is only for Ada. */
3706       vg_assert (dtag == DW_TAG_typedef || parser->language == 'A');
3707       /* We can pick up a new typedef/subrange_type any time. */
3708       VG_(memset)(&typeE, 0, sizeof(typeE));
3709       typeE.cuOff = D3_INVALID_CUOFF;
3710       typeE.tag   = Te_TyTyDef;
3711       typeE.Te.TyTyDef.name = NULL;
3712       typeE.Te.TyTyDef.typeR = D3_INVALID_CUOFF;
3713       nf_i = 0;
3714       while (True) {
3715          DW_AT   attr = (DW_AT)  abbv->nf[nf_i].at_name;
3716          DW_FORM form = (DW_FORM)abbv->nf[nf_i].at_form;
3717          nf_i++;
3718          if (attr == 0 && form == 0) break;
3719          get_Form_contents( &cts, cc, c_die, False/*td3*/, form );
3720          if (attr == DW_AT_name && cts.szB < 0) {
3721             typeE.Te.TyTyDef.name
3722                = ML_(cur_read_strdup)( cts.u.cur,
3723                                        "di.readdwarf3.ptD.typedef.1" );
3724          }
3725          if (attr == DW_AT_type && cts.szB > 0) {
3726             typeE.Te.TyTyDef.typeR
3727                = cook_die_using_form( cc, (UWord)cts.u.val, form );
3728          }
3729       }
3730       /* Do we have something that looks sane?
3731          gcc gnat Ada generates minimal typedef
3732          such as the below
3733          <6><91cc>: DW_TAG_typedef
3734             DW_AT_abstract_ori: <9066>
3735          g++ for OMP can generate artificial functions that have
3736          parameters that refer to pointers to unnamed typedefs.
3737          See https://bugs.kde.org/show_bug.cgi?id=273475
3738          So we cannot require a name for a DW_TAG_typedef.
3739       */
3740       goto acquire_Type;
3741    }
3742
3743    if (dtag == DW_TAG_subroutine_type) {
3744       /* function type? just record that one fact and ask no
3745          further questions. */
3746       VG_(memset)(&typeE, 0, sizeof(typeE));
3747       typeE.cuOff = D3_INVALID_CUOFF;
3748       typeE.tag   = Te_TyFn;
3749       goto acquire_Type;
3750    }
3751
3752    if (dtag == DW_TAG_volatile_type || dtag == DW_TAG_const_type
3753        || dtag == DW_TAG_restrict_type) {
3754       Int have_ty = 0;
3755       VG_(memset)(&typeE, 0, sizeof(typeE));
3756       typeE.cuOff = D3_INVALID_CUOFF;
3757       typeE.tag   = Te_TyQual;
3758       typeE.Te.TyQual.qual
3759          = (dtag == DW_TAG_volatile_type ? 'V'
3760             : (dtag == DW_TAG_const_type ? 'C' : 'R'));
3761       /* target type defaults to 'void' */
3762       typeE.Te.TyQual.typeR = D3_FAKEVOID_CUOFF;
3763       nf_i = 0;
3764       while (True) {
3765          DW_AT   attr = (DW_AT)  abbv->nf[nf_i].at_name;
3766          DW_FORM form = (DW_FORM)abbv->nf[nf_i].at_form;
3767          nf_i++;
3768          if (attr == 0 && form == 0) break;
3769          get_Form_contents( &cts, cc, c_die, False/*td3*/, form );
3770          if (attr == DW_AT_type && cts.szB > 0) {
3771             typeE.Te.TyQual.typeR
3772                = cook_die_using_form( cc, (UWord)cts.u.val, form );
3773             have_ty++;
3774          }
3775       }
3776       /* gcc sometimes generates DW_TAG_const/volatile_type without
3777          DW_AT_type and GDB appears to interpret the type as 'const
3778          void' (resp. 'volatile void').  So just allow it .. */
3779       if (have_ty == 1 || have_ty == 0)
3780          goto acquire_Type;
3781       else
3782          goto_bad_DIE;
3783    }
3784
3785    /*
3786     * Treat DW_TAG_unspecified_type as type void. An example of DW_TAG_unspecified_type:
3787     *
3788     * $ readelf --debug-dump /usr/lib/debug/usr/lib/libstdc++.so.6.0.16.debug
3789     *  <1><10d4>: Abbrev Number: 53 (DW_TAG_unspecified_type)
3790     *     <10d5>   DW_AT_name        : (indirect string, offset: 0xdb7): decltype(nullptr)
3791     */
3792    if (dtag == DW_TAG_unspecified_type) {
3793       VG_(memset)(&typeE, 0, sizeof(typeE));
3794       typeE.cuOff           = D3_INVALID_CUOFF;
3795       typeE.tag             = Te_TyQual;
3796       typeE.Te.TyQual.typeR = D3_FAKEVOID_CUOFF;
3797       goto acquire_Type;
3798    }
3799
3800    /* else ignore this DIE */
3801    return;
3802    /*NOTREACHED*/
3803
3804   acquire_Type:
3805    if (0) VG_(printf)("YYYY Acquire Type\n");
3806    vg_assert(ML_(TyEnt__is_type)( &typeE ));
3807    vg_assert(typeE.cuOff == D3_INVALID_CUOFF || typeE.cuOff == posn);
3808    typeE.cuOff = posn;
3809    VG_(addToXA)( tyents, &typeE );
3810    return;
3811    /*NOTREACHED*/
3812
3813   acquire_Atom:
3814    if (0) VG_(printf)("YYYY Acquire Atom\n");
3815    vg_assert(atomE.tag == Te_Atom);
3816    vg_assert(atomE.cuOff == D3_INVALID_CUOFF || atomE.cuOff == posn);
3817    atomE.cuOff = posn;
3818    VG_(addToXA)( tyents, &atomE );
3819    return;
3820    /*NOTREACHED*/
3821
3822   acquire_Field:
3823    /* For union members, Expr should be absent */
3824    if (0) VG_(printf)("YYYY Acquire Field\n");
3825    vg_assert(fieldE.tag == Te_Field);
3826    vg_assert(fieldE.Te.Field.nLoc <= 0 || fieldE.Te.Field.pos.loc != NULL);
3827    vg_assert(fieldE.Te.Field.nLoc != 0 || fieldE.Te.Field.pos.loc == NULL);
3828    if (fieldE.Te.Field.isStruct) {
3829       vg_assert(fieldE.Te.Field.nLoc != 0);
3830    } else {
3831       vg_assert(fieldE.Te.Field.nLoc == 0);
3832    }
3833    vg_assert(fieldE.cuOff == D3_INVALID_CUOFF || fieldE.cuOff == posn);
3834    fieldE.cuOff = posn;
3835    VG_(addToXA)( tyents, &fieldE );
3836    return;
3837    /*NOTREACHED*/
3838
3839   acquire_Bound:
3840    if (0) VG_(printf)("YYYY Acquire Bound\n");
3841    vg_assert(boundE.tag == Te_Bound);
3842    vg_assert(boundE.cuOff == D3_INVALID_CUOFF || boundE.cuOff == posn);
3843    boundE.cuOff = posn;
3844    VG_(addToXA)( tyents, &boundE );
3845    return;
3846    /*NOTREACHED*/
3847
3848   bad_DIE:
3849    dump_bad_die_and_barf("parse_type_DIE", dtag, posn, level,
3850                          c_die, saved_die_c_offset,
3851                          abbv,
3852                          cc);
3853    /*NOTREACHED*/
3854 }
3855
3856
3857 /*------------------------------------------------------------*/
3858 /*---                                                      ---*/
3859 /*--- Compression of type DIE information                  ---*/
3860 /*---                                                      ---*/
3861 /*------------------------------------------------------------*/
3862
3863 static UWord chase_cuOff ( Bool* changed,
3864                            const XArray* /* of TyEnt */ ents,
3865                            TyEntIndexCache* ents_cache,
3866                            UWord cuOff )
3867 {
3868    TyEnt* ent;
3869    ent = ML_(TyEnts__index_by_cuOff)( ents, ents_cache, cuOff );
3870
3871    if (!ent) {
3872       VG_(printf)("chase_cuOff: no entry for 0x%05lx\n", cuOff);
3873       *changed = False;
3874       return cuOff;
3875    }
3876
3877    vg_assert(ent->tag != Te_EMPTY);
3878    if (ent->tag != Te_INDIR) {
3879       *changed = False;
3880       return cuOff;
3881    } else {
3882       vg_assert(ent->Te.INDIR.indR < cuOff);
3883       *changed = True;
3884       return ent->Te.INDIR.indR;
3885    }
3886 }
3887
3888 static
3889 void chase_cuOffs_in_XArray ( Bool* changed,
3890                               const XArray* /* of TyEnt */ ents,
3891                               TyEntIndexCache* ents_cache,
3892                               /*MOD*/XArray* /* of UWord */ cuOffs )
3893 {
3894    Bool b2 = False;
3895    Word i, n = VG_(sizeXA)( cuOffs );
3896    for (i = 0; i < n; i++) {
3897       Bool   b = False;
3898       UWord* p = VG_(indexXA)( cuOffs, i );
3899       *p = chase_cuOff( &b, ents, ents_cache, *p );
3900       if (b)
3901          b2 = True;
3902    }
3903    *changed = b2;
3904 }
3905
3906 static Bool TyEnt__subst_R_fields ( const XArray* /* of TyEnt */ ents,
3907                                     TyEntIndexCache* ents_cache,
3908                                     /*MOD*/TyEnt* te )
3909 {
3910    Bool b, changed = False;
3911    switch (te->tag) {
3912       case Te_EMPTY:
3913          break;
3914       case Te_INDIR:
3915          te->Te.INDIR.indR
3916             = chase_cuOff( &b, ents, ents_cache, te->Te.INDIR.indR );
3917          if (b) changed = True;
3918          break;
3919       case Te_UNKNOWN:
3920          break;
3921       case Te_Atom:
3922          break;
3923       case Te_Field:
3924          te->Te.Field.typeR
3925             = chase_cuOff( &b, ents, ents_cache, te->Te.Field.typeR );
3926          if (b) changed = True;
3927          break;
3928       case Te_Bound:
3929          break;
3930       case Te_TyBase:
3931          break;
3932       case Te_TyPtr:
3933       case Te_TyRef:
3934       case Te_TyPtrMbr:
3935       case Te_TyRvalRef:
3936          te->Te.TyPorR.typeR
3937             = chase_cuOff( &b, ents, ents_cache, te->Te.TyPorR.typeR );
3938          if (b) changed = True;
3939          break;
3940       case Te_TyTyDef:
3941          te->Te.TyTyDef.typeR
3942             = chase_cuOff( &b, ents, ents_cache, te->Te.TyTyDef.typeR );
3943          if (b) changed = True;
3944          break;
3945       case Te_TyStOrUn:
3946          chase_cuOffs_in_XArray( &b, ents, ents_cache, te->Te.TyStOrUn.fieldRs );
3947          if (b) changed = True;
3948          break;
3949       case Te_TyEnum:
3950          chase_cuOffs_in_XArray( &b, ents, ents_cache, te->Te.TyEnum.atomRs );
3951          if (b) changed = True;
3952          break;
3953       case Te_TyArray:
3954          te->Te.TyArray.typeR
3955             = chase_cuOff( &b, ents, ents_cache, te->Te.TyArray.typeR );
3956          if (b) changed = True;
3957          chase_cuOffs_in_XArray( &b, ents, ents_cache, te->Te.TyArray.boundRs );
3958          if (b) changed = True;
3959          break;
3960       case Te_TyFn:
3961          break;
3962       case Te_TyQual:
3963          te->Te.TyQual.typeR
3964             = chase_cuOff( &b, ents, ents_cache, te->Te.TyQual.typeR );
3965          if (b) changed = True;
3966          break;
3967       case Te_TyVoid:
3968          break;
3969       default:
3970          ML_(pp_TyEnt)(te);
3971          vg_assert(0);
3972    }
3973    return changed;
3974 }
3975
3976 /* Make a pass over 'ents'.  For each tyent, inspect the target of any
3977    'R' or 'Rs' fields (those which refer to other tyents), and replace
3978    any which point to INDIR nodes with the target of the indirection
3979    (which should not itself be an indirection).  In summary, this
3980    routine shorts out all references to indirection nodes. */
3981 static
3982 Word dedup_types_substitution_pass ( /*MOD*/XArray* /* of TyEnt */ ents,
3983                                      TyEntIndexCache* ents_cache )
3984 {
3985    Word i, n, nChanged = 0;
3986    Bool b;
3987    n = VG_(sizeXA)( ents );
3988    for (i = 0; i < n; i++) {
3989       TyEnt* ent = VG_(indexXA)( ents, i );
3990       vg_assert(ent->tag != Te_EMPTY);
3991       /* We have to substitute everything, even indirections, so as to
3992          ensure that chains of indirections don't build up. */
3993       b = TyEnt__subst_R_fields( ents, ents_cache, ent );
3994       if (b)
3995          nChanged++;
3996    }
3997
3998    return nChanged;
3999 }
4000
4001
4002 /* Make a pass over 'ents', building a dictionary of TyEnts as we go.
4003    Look up each new tyent in the dictionary in turn.  If it is already
4004    in the dictionary, replace this tyent with an indirection to the
4005    existing one, and delete any malloc'd stuff hanging off this one.
4006    In summary, this routine commons up all tyents that are identical
4007    as defined by TyEnt__cmp_by_all_except_cuOff. */
4008 static
4009 Word dedup_types_commoning_pass ( /*MOD*/XArray* /* of TyEnt */ ents )
4010 {
4011    Word    n, i, nDeleted;
4012    WordFM* dict; /* TyEnt* -> void */
4013    TyEnt*  ent;
4014    UWord   keyW, valW;
4015
4016    dict = VG_(newFM)(
4017              ML_(dinfo_zalloc), "di.readdwarf3.dtcp.1",
4018              ML_(dinfo_free),
4019              (Word(*)(UWord,UWord)) ML_(TyEnt__cmp_by_all_except_cuOff)
4020           );
4021
4022    nDeleted = 0;
4023    n = VG_(sizeXA)( ents );
4024    for (i = 0; i < n; i++) {
4025       ent = VG_(indexXA)( ents, i );
4026       vg_assert(ent->tag != Te_EMPTY);
4027
4028       /* Ignore indirections, although check that they are
4029          not forming a cycle. */
4030       if (ent->tag == Te_INDIR) {
4031          vg_assert(ent->Te.INDIR.indR < ent->cuOff);
4032          continue;
4033       }
4034
4035       keyW = valW = 0;
4036       if (VG_(lookupFM)( dict, &keyW, &valW, (UWord)ent )) {
4037          /* it's already in the dictionary. */
4038          TyEnt* old = (TyEnt*)keyW;
4039          vg_assert(valW == 0);
4040          vg_assert(old != ent);
4041          vg_assert(old->tag != Te_INDIR);
4042          /* since we are traversing the array in increasing order of
4043             cuOff: */
4044          vg_assert(old->cuOff < ent->cuOff);
4045          /* So anyway, dump this entry and replace it with an
4046             indirection to the one in the dictionary.  Note that the
4047             assertion above guarantees that we cannot create cycles of
4048             indirections, since we are always creating an indirection
4049             to a tyent with a cuOff lower than this one. */
4050          ML_(TyEnt__make_EMPTY)( ent );
4051          ent->tag = Te_INDIR;
4052          ent->Te.INDIR.indR = old->cuOff;
4053          nDeleted++;
4054       } else {
4055          /* not in dictionary; add it and keep going. */
4056          VG_(addToFM)( dict, (UWord)ent, 0 );
4057       }
4058    }
4059
4060    VG_(deleteFM)( dict, NULL, NULL );
4061
4062    return nDeleted;
4063 }
4064
4065
4066 static
4067 void dedup_types ( Bool td3,
4068                    /*MOD*/XArray* /* of TyEnt */ ents,
4069                    TyEntIndexCache* ents_cache )
4070 {
4071    Word m, n, i, nDel, nSubst, nThresh;
4072    if (0) td3 = True;
4073
4074    n = VG_(sizeXA)( ents );
4075
4076    /* If a commoning pass and a substitution pass both make fewer than
4077       this many changes, just stop.  It's pointless to burn up CPU
4078       time trying to compress the last 1% or so out of the array. */
4079    nThresh = n / 200;
4080
4081    /* First we must sort .ents by its .cuOff fields, so we
4082       can index into it. */
4083    VG_(setCmpFnXA)( ents, (XACmpFn_t) ML_(TyEnt__cmp_by_cuOff_only) );
4084    VG_(sortXA)( ents );
4085
4086    /* Now repeatedly do commoning and substitution passes over
4087       the array, until there are no more changes. */
4088    do {
4089       nDel   = dedup_types_commoning_pass ( ents );
4090       nSubst = dedup_types_substitution_pass ( ents, ents_cache );
4091       vg_assert(nDel >= 0 && nSubst >= 0);
4092       TRACE_D3("   %ld deletions, %ld substitutions\n", nDel, nSubst);
4093    } while (nDel > nThresh || nSubst > nThresh);
4094
4095    /* Sanity check: all INDIR nodes should point at a non-INDIR thing.
4096       In fact this should be true at the end of every loop iteration
4097       above (a commoning pass followed by a substitution pass), but
4098       checking it on every iteration is excessively expensive.  Note,
4099       this loop also computes 'm' for the stats printing below it. */
4100    m = 0;
4101    n = VG_(sizeXA)( ents );
4102    for (i = 0; i < n; i++) {
4103       TyEnt *ent, *ind;
4104       ent = VG_(indexXA)( ents, i );
4105       if (ent->tag != Te_INDIR) continue;
4106       m++;
4107       ind = ML_(TyEnts__index_by_cuOff)( ents, ents_cache,
4108                                          ent->Te.INDIR.indR );
4109       vg_assert(ind);
4110       vg_assert(ind->tag != Te_INDIR);
4111    }
4112
4113    TRACE_D3("Overall: %ld before, %ld after\n", n, n-m);
4114 }
4115
4116
4117 /*------------------------------------------------------------*/
4118 /*---                                                      ---*/
4119 /*--- Resolution of references to type DIEs                ---*/
4120 /*---                                                      ---*/
4121 /*------------------------------------------------------------*/
4122
4123 /* Make a pass through the (temporary) variables array.  Examine the
4124    type of each variable, check is it found, and chase any Te_INDIRs.
4125    Postcondition is: each variable has a typeR field that refers to a
4126    valid type in tyents, or a Te_UNKNOWN, and is certainly guaranteed
4127    not to refer to a Te_INDIR.  (This is so that we can throw all the
4128    Te_INDIRs away later). */
4129
4130 __attribute__((noinline))
4131 static void resolve_variable_types (
4132                void (*barf)( const HChar* ) __attribute__((noreturn)),
4133                /*R-O*/XArray* /* of TyEnt */ ents,
4134                /*MOD*/TyEntIndexCache* ents_cache,
4135                /*MOD*/XArray* /* of TempVar* */ vars
4136             )
4137 {
4138    Word i, n;
4139    n = VG_(sizeXA)( vars );
4140    for (i = 0; i < n; i++) {
4141       TempVar* var = *(TempVar**)VG_(indexXA)( vars, i );
4142       /* This is the stated type of the variable.  But it might be
4143          an indirection, so be careful. */
4144       TyEnt* ent = ML_(TyEnts__index_by_cuOff)( ents, ents_cache,
4145                                                 var->typeR );
4146       if (ent && ent->tag == Te_INDIR) {
4147          ent = ML_(TyEnts__index_by_cuOff)( ents, ents_cache,
4148                                             ent->Te.INDIR.indR );
4149          vg_assert(ent);
4150          vg_assert(ent->tag != Te_INDIR);
4151       }
4152
4153       /* Deal first with "normal" cases */
4154       if (ent && ML_(TyEnt__is_type)(ent)) {
4155          var->typeR = ent->cuOff;
4156          continue;
4157       }
4158
4159       /* If there's no ent, it probably we did not manage to read a
4160          type at the cuOffset which is stated as being this variable's
4161          type.  Maybe a deficiency in parse_type_DIE.  Complain. */
4162       if (ent == NULL) {
4163          VG_(printf)("\n: Invalid cuOff = 0x%05lx\n", var->typeR );
4164          barf("resolve_variable_types: "
4165               "cuOff does not refer to a known type");
4166       }
4167       vg_assert(ent);
4168       /* If ent has any other tag, something bad happened, along the
4169          lines of var->typeR not referring to a type at all. */
4170       vg_assert(ent->tag == Te_UNKNOWN);
4171       /* Just accept it; the type will be useless, but at least keep
4172          going. */
4173       var->typeR = ent->cuOff;
4174    }
4175 }
4176
4177
4178 /*------------------------------------------------------------*/
4179 /*---                                                      ---*/
4180 /*--- Parsing of Compilation Units                         ---*/
4181 /*---                                                      ---*/
4182 /*------------------------------------------------------------*/
4183
4184 static Int cmp_TempVar_by_dioff ( const void* v1, const void* v2 ) {
4185    const TempVar* t1 = *(const TempVar *const *)v1;
4186    const TempVar* t2 = *(const TempVar *const *)v2;
4187    if (t1->dioff < t2->dioff) return -1;
4188    if (t1->dioff > t2->dioff) return 1;
4189    return 0;
4190 }
4191
4192 static void read_DIE (
4193    /*MOD*/WordFM* /* of (XArray* of AddrRange, void) */ rangestree,
4194    /*MOD*/XArray* /* of TyEnt */ tyents,
4195    /*MOD*/XArray* /* of TempVar* */ tempvars,
4196    /*MOD*/XArray* /* of GExpr* */ gexprs,
4197    /*MOD*/D3TypeParser* typarser,
4198    /*MOD*/D3VarParser* varparser,
4199    /*MOD*/D3InlParser* inlparser,
4200    Cursor* c, Bool td3, CUConst* cc, Int level
4201 )
4202 {
4203    const g_abbv *abbv;
4204    ULong  atag, abbv_code;
4205    UWord  posn;
4206    UInt   has_children;
4207    UWord  start_die_c_offset;
4208    UWord  after_die_c_offset;
4209    // If the DIE we will parse has a sibling and the parser(s) are
4210    // all indicating that parse_children is not necessary, then
4211    // we will skip the children by jumping to the sibling of this DIE
4212    // (if it has a sibling).
4213    UWord  sibling = 0;
4214    Bool   parse_children = False;
4215
4216    /* --- Deal with this DIE --- */
4217    posn      = cook_die( cc, get_position_of_Cursor( c ) );
4218    abbv_code = get_ULEB128( c );
4219    abbv = get_abbv(cc, abbv_code);
4220    atag      = abbv->atag;
4221
4222    if (TD3) {
4223       TRACE_D3("\n");
4224       trace_DIE ((DW_TAG)atag, posn, level,
4225                  get_position_of_Cursor( c ), abbv, cc);
4226    }
4227
4228    if (atag == 0)
4229       cc->barf("read_DIE: invalid zero tag on DIE");
4230
4231    has_children = abbv->has_children;
4232    if (has_children != DW_children_no && has_children != DW_children_yes)
4233       cc->barf("read_DIE: invalid has_children value");
4234
4235    /* We're set up to look at the fields of this DIE.  Hand it off to
4236       any parser(s) that want to see it.  Since they will in general
4237       advance the DIE cursor, remember the current settings so that we
4238       can then back up. . */
4239    start_die_c_offset  = get_position_of_Cursor( c );
4240    after_die_c_offset  = 0; // set to c position if a parser has read the DIE.
4241
4242    if (VG_(clo_read_var_info)) {
4243       parse_type_DIE( tyents,
4244                       typarser,
4245                       (DW_TAG)atag,
4246                       posn,
4247                       level,
4248                       c,     /* DIE cursor */
4249                       abbv,  /* abbrev */
4250                       cc,
4251                       td3 );
4252       if (get_position_of_Cursor( c ) != start_die_c_offset) {
4253          after_die_c_offset = get_position_of_Cursor( c );
4254          set_position_of_Cursor( c, start_die_c_offset );
4255       }
4256
4257       parse_var_DIE( rangestree,
4258                      tempvars,
4259                      gexprs,
4260                      varparser,
4261                      (DW_TAG)atag,
4262                      posn,
4263                      level,
4264                      c,     /* DIE cursor */
4265                      abbv,  /* abbrev */
4266                      cc,
4267                      td3 );
4268       if (get_position_of_Cursor( c ) != start_die_c_offset) {
4269          after_die_c_offset = get_position_of_Cursor( c );
4270          set_position_of_Cursor( c, start_die_c_offset );
4271       }
4272
4273       parse_children = True;
4274       // type and var parsers do not have logic to skip childrens and establish
4275       // the value of sibling.
4276    }
4277
4278    if (VG_(clo_read_inline_info)) {
4279       inlparser->sibling = 0;
4280       parse_children =
4281          parse_inl_DIE( inlparser,
4282                         (DW_TAG)atag,
4283                         posn,
4284                         level,
4285                         c,     /* DIE cursor */
4286                         abbv, /* abbrev */
4287                         cc,
4288                         td3 )
4289          || parse_children;
4290       if (get_position_of_Cursor( c ) != start_die_c_offset) {
4291          after_die_c_offset = get_position_of_Cursor( c );
4292          // Last parser, no need to reset the cursor to start_die_c_offset.
4293       }
4294       if (sibling == 0)
4295          sibling = inlparser->sibling;
4296       vg_assert (inlparser->sibling == 0 || inlparser->sibling == sibling);
4297    }
4298
4299    if (after_die_c_offset > 0) {
4300       // DIE was read by a parser above, so we know where the DIE ends.
4301       set_position_of_Cursor( c, after_die_c_offset );
4302    } else {
4303       /* No parser has parsed this DIE. So, we need to skip the DIE,
4304          in order to read the next DIE.
4305          At the same time, establish sibling value if the DIE has one. */
4306       TRACE_D3("    uninteresting DIE -> skipping ...\n");
4307       skip_DIE (&sibling, c, abbv, cc);
4308    }
4309
4310    /* --- Now recurse into its children, if any
4311       and the parsing of the children is requested by a parser --- */
4312    if (has_children == DW_children_yes) {
4313       if (parse_children || sibling == 0) {
4314          if (0) TRACE_D3("BEGIN children of level %d\n", level);
4315          while (True) {
4316             atag = peek_ULEB128( c );
4317             if (atag == 0) break;
4318             read_DIE( rangestree, tyents, tempvars, gexprs,
4319                       typarser, varparser, inlparser,
4320                       c, td3, cc, level+1 );
4321          }
4322          /* Now we need to eat the terminating zero */
4323          atag = get_ULEB128( c );
4324          vg_assert(atag == 0);
4325          if (0) TRACE_D3("END children of level %d\n", level);
4326       } else {
4327          // We can skip the childrens, by jumping to the sibling
4328          TRACE_D3("    SKIPPING DIE's children,"
4329                   "jumping to sibling <%d><%lx>\n",
4330                   level, sibling);
4331          set_position_of_Cursor( c, sibling );
4332       }
4333    }
4334
4335 }
4336
4337 static void trace_debug_loc (const DebugInfo* di,
4338                              __attribute__((noreturn)) void (*barf)( const HChar* ),
4339                              DiSlice escn_debug_loc)
4340 {
4341 #if 0
4342    /* This doesn't work properly because it assumes all entries are
4343       packed end to end, with no holes.  But that doesn't always
4344       appear to be the case, so it loses sync.  And the D3 spec
4345       doesn't appear to require a no-hole situation either. */
4346    /* Display .debug_loc */
4347    Addr  dl_base;
4348    UWord dl_offset;
4349    Cursor loc; /* for showing .debug_loc */
4350    Bool td3 = di->trace_symtab;
4351
4352    TRACE_SYMTAB("\n");
4353    TRACE_SYMTAB("\n------ The contents of .debug_loc ------\n");
4354    TRACE_SYMTAB("    Offset   Begin    End      Expression\n");
4355    if (ML_(sli_is_valid)(escn_debug_loc)) {
4356       init_Cursor( &loc, escn_debug_loc, 0, barf,
4357                    "Overrun whilst reading .debug_loc section(1)" );
4358       dl_base = 0;
4359       dl_offset = 0;
4360       while (True) {
4361          UWord  w1, w2;
4362          UWord  len;
4363          if (is_at_end_Cursor( &loc ))
4364             break;
4365
4366          /* Read a (host-)word pair.  This is something of a hack since
4367             the word size to read is really dictated by the ELF file;
4368             however, we assume we're reading a file with the same
4369             word-sizeness as the host.  Reasonably enough. */
4370          w1 = get_UWord( &loc );
4371          w2 = get_UWord( &loc );
4372
4373          if (w1 == 0 && w2 == 0) {
4374             /* end of list.  reset 'base' */
4375             TRACE_D3("    %08lx <End of list>\n", dl_offset);
4376             dl_base = 0;
4377             dl_offset = get_position_of_Cursor( &loc );
4378             continue;
4379          }
4380
4381          if (w1 == -1UL) {
4382             /* new value for 'base' */
4383             TRACE_D3("    %08lx %16lx %08lx (base address)\n",
4384                      dl_offset, w1, w2);
4385             dl_base = w2;
4386             continue;
4387          }
4388
4389          /* else a location expression follows */
4390          TRACE_D3("    %08lx %08lx %08lx ",
4391                   dl_offset, w1 + dl_base, w2 + dl_base);
4392          len = (UWord)get_UShort( &loc );
4393          while (len > 0) {
4394             UChar byte = get_UChar( &loc );
4395             TRACE_D3("%02x", (UInt)byte);
4396             len--;
4397          }
4398          TRACE_SYMTAB("\n");
4399       }
4400    }
4401 #endif
4402 }
4403
4404 static void trace_debug_ranges (const DebugInfo* di,
4405                                 __attribute__((noreturn)) void (*barf)( const HChar* ),
4406                                 DiSlice escn_debug_ranges)
4407 {
4408    Cursor ranges; /* for showing .debug_ranges */
4409    Addr  dr_base;
4410    UWord dr_offset;
4411    Bool td3 = di->trace_symtab;
4412
4413    /* Display .debug_ranges */
4414    TRACE_SYMTAB("\n");
4415    TRACE_SYMTAB("\n------ The contents of .debug_ranges ------\n");
4416    TRACE_SYMTAB("    Offset   Begin    End\n");
4417    if (ML_(sli_is_valid)(escn_debug_ranges)) {
4418       init_Cursor( &ranges, escn_debug_ranges, 0, barf,
4419                    "Overrun whilst reading .debug_ranges section(1)" );
4420       dr_base = 0;
4421       dr_offset = 0;
4422       while (True) {
4423          UWord  w1, w2;
4424
4425          if (is_at_end_Cursor( &ranges ))
4426             break;
4427
4428          /* Read a (host-)word pair.  This is something of a hack since
4429             the word size to read is really dictated by the ELF file;
4430             however, we assume we're reading a file with the same
4431             word-sizeness as the host.  Reasonably enough. */
4432          w1 = get_UWord( &ranges );
4433          w2 = get_UWord( &ranges );
4434
4435          if (w1 == 0 && w2 == 0) {
4436             /* end of list.  reset 'base' */
4437             TRACE_D3("    %08lx <End of list>\n", dr_offset);
4438             dr_base = 0;
4439             dr_offset = get_position_of_Cursor( &ranges );
4440             continue;
4441          }
4442
4443          if (w1 == -1UL) {
4444             /* new value for 'base' */
4445             TRACE_D3("    %08lx %16lx %08lx (base address)\n",
4446                      dr_offset, w1, w2);
4447             dr_base = w2;
4448             continue;
4449          }
4450
4451          /* else a range [w1+base, w2+base) is denoted */
4452          TRACE_D3("    %08lx %08lx %08lx\n",
4453                   dr_offset, w1 + dr_base, w2 + dr_base);
4454       }
4455    }
4456 }
4457
4458 static void trace_debug_abbrev (const DebugInfo* di,
4459                                 __attribute__((noreturn)) void (*barf)( const HChar* ),
4460                                 DiSlice escn_debug_abbv)
4461 {
4462    Cursor abbv; /* for showing .debug_abbrev */
4463    Bool td3 = di->trace_symtab;
4464
4465    /* Display .debug_abbrev */
4466    TRACE_SYMTAB("\n");
4467    TRACE_SYMTAB("\n------ The contents of .debug_abbrev ------\n");
4468    if (ML_(sli_is_valid)(escn_debug_abbv)) {
4469       init_Cursor( &abbv, escn_debug_abbv, 0, barf,
4470                    "Overrun whilst reading .debug_abbrev section" );
4471       while (True) {
4472          if (is_at_end_Cursor( &abbv ))
4473             break;
4474          /* Read one abbreviation table */
4475          TRACE_D3("  Number TAG\n");
4476          while (True) {
4477             ULong atag;
4478             UInt  has_children;
4479             ULong acode = get_ULEB128( &abbv );
4480             if (acode == 0) break; /* end of the table */
4481             atag = get_ULEB128( &abbv );
4482             has_children = get_UChar( &abbv );
4483             TRACE_D3("   %llu      %s    [%s]\n",
4484                      acode, ML_(pp_DW_TAG)(atag),
4485                             ML_(pp_DW_children)(has_children));
4486             while (True) {
4487                ULong at_name = get_ULEB128( &abbv );
4488                ULong at_form = get_ULEB128( &abbv );
4489                if (at_name == 0 && at_form == 0) break;
4490                TRACE_D3("    %-18s %s\n",
4491                         ML_(pp_DW_AT)(at_name), ML_(pp_DW_FORM)(at_form));
4492             }
4493          }
4494       }
4495    }
4496 }
4497
4498 static
4499 void new_dwarf3_reader_wrk (
4500    DebugInfo* di,
4501    __attribute__((noreturn)) void (*barf)( const HChar* ),
4502    DiSlice escn_debug_info,      DiSlice escn_debug_types,
4503    DiSlice escn_debug_abbv,      DiSlice escn_debug_line,
4504    DiSlice escn_debug_str,       DiSlice escn_debug_ranges,
4505    DiSlice escn_debug_loc,       DiSlice escn_debug_info_alt,
4506    DiSlice escn_debug_abbv_alt,  DiSlice escn_debug_line_alt,
4507    DiSlice escn_debug_str_alt
4508 )
4509 {
4510    XArray* /* of TyEnt */     tyents = NULL;
4511    XArray* /* of TyEnt */     tyents_to_keep = NULL;
4512    XArray* /* of GExpr* */    gexprs = NULL;
4513    XArray* /* of TempVar* */  tempvars = NULL;
4514    WordFM* /* of (XArray* of AddrRange, void) */ rangestree = NULL;
4515    TyEntIndexCache* tyents_cache = NULL;
4516    TyEntIndexCache* tyents_to_keep_cache = NULL;
4517    TempVar *varp, *varp2;
4518    GExpr* gexpr;
4519    Cursor info; /* primary cursor for parsing .debug_info */
4520    D3TypeParser typarser;
4521    D3VarParser varparser;
4522    D3InlParser inlparser;
4523    Word  i, j, n;
4524    Bool td3 = di->trace_symtab;
4525    XArray* /* of TempVar* */ dioff_lookup_tab;
4526    Int pass;
4527    VgHashTable *signature_types = NULL;
4528
4529    /* Display/trace various information, if requested. */
4530    if (TD3) {
4531       trace_debug_loc    (di, barf, escn_debug_loc);
4532       trace_debug_ranges (di, barf, escn_debug_ranges);
4533       trace_debug_abbrev (di, barf, escn_debug_abbv);
4534       TRACE_SYMTAB("\n");
4535    }
4536
4537    /* Zero out all parsers. Parsers will really be initialised
4538       according to VG_(clo_read_*_info). */
4539    VG_(memset)( &inlparser, 0, sizeof(inlparser) );
4540
4541    if (VG_(clo_read_var_info)) {
4542       /* We'll park the harvested type information in here.  Also create
4543          a fake "void" entry with offset D3_FAKEVOID_CUOFF, so we always
4544          have at least one type entry to refer to.  D3_FAKEVOID_CUOFF is
4545          huge and presumably will not occur in any valid DWARF3 file --
4546          it would need to have a .debug_info section 4GB long for that to
4547          happen.  These type entries end up in the DebugInfo. */
4548       tyents = VG_(newXA)( ML_(dinfo_zalloc),
4549                            "di.readdwarf3.ndrw.1 (TyEnt temp array)",
4550                            ML_(dinfo_free), sizeof(TyEnt) );
4551       { TyEnt tyent;
4552         VG_(memset)(&tyent, 0, sizeof(tyent));
4553         tyent.tag   = Te_TyVoid;
4554         tyent.cuOff = D3_FAKEVOID_CUOFF;
4555         tyent.Te.TyVoid.isFake = True;
4556         VG_(addToXA)( tyents, &tyent );
4557       }
4558       { TyEnt tyent;
4559         VG_(memset)(&tyent, 0, sizeof(tyent));
4560         tyent.tag   = Te_UNKNOWN;
4561         tyent.cuOff = D3_INVALID_CUOFF;
4562         VG_(addToXA)( tyents, &tyent );
4563       }
4564
4565       /* This is a tree used to unique-ify the range lists that are
4566          manufactured by parse_var_DIE.  References to the keys in the
4567          tree wind up in .rngMany fields in TempVars.  We'll need to
4568          delete this tree, and the XArrays attached to it, at the end of
4569          this function. */
4570       rangestree = VG_(newFM)( ML_(dinfo_zalloc),
4571                                "di.readdwarf3.ndrw.2 (rangestree)",
4572                                ML_(dinfo_free),
4573                                (Word(*)(UWord,UWord))cmp__XArrays_of_AddrRange );
4574
4575       /* List of variables we're accumulating.  These don't end up in the
4576          DebugInfo; instead their contents are handed to ML_(addVar) and
4577          the list elements are then deleted. */
4578       tempvars = VG_(newXA)( ML_(dinfo_zalloc),
4579                              "di.readdwarf3.ndrw.3 (TempVar*s array)",
4580                              ML_(dinfo_free),
4581                              sizeof(TempVar*) );
4582
4583       /* List of GExprs we're accumulating.  These wind up in the
4584          DebugInfo. */
4585       gexprs = VG_(newXA)( ML_(dinfo_zalloc), "di.readdwarf3.ndrw.4",
4586                            ML_(dinfo_free), sizeof(GExpr*) );
4587
4588       /* We need a D3TypeParser to keep track of partially constructed
4589          types.  It'll be discarded as soon as we've completed the CU,
4590          since the resulting information is tipped in to 'tyents' as it
4591          is generated. */
4592       type_parser_init(&typarser);
4593
4594       var_parser_init(&varparser);
4595
4596       signature_types = VG_(HT_construct) ("signature_types");
4597    }
4598
4599    /* Do an initial pass to scan the .debug_types section, if any, and
4600       fill in the signatured types hash table.  This lets us handle
4601       mapping from a type signature to a (cooked) DIE offset directly
4602       in get_Form_contents.  */
4603    if (VG_(clo_read_var_info) && ML_(sli_is_valid)(escn_debug_types)) {
4604       init_Cursor( &info, escn_debug_types, 0, barf,
4605                    "Overrun whilst reading .debug_types section" );
4606       TRACE_D3("\n------ Collecting signatures from "
4607                ".debug_types section ------\n");
4608
4609       while (True) {
4610          UWord   cu_start_offset, cu_offset_now;
4611          CUConst cc;
4612
4613          cu_start_offset = get_position_of_Cursor( &info );
4614          TRACE_D3("\n");
4615          TRACE_D3("  Compilation Unit @ offset 0x%lx:\n", cu_start_offset);
4616          /* parse_CU_header initialises the CU's abbv hash table.  */
4617          parse_CU_Header( &cc, td3, &info, escn_debug_abbv, True, False );
4618
4619          /* Needed by cook_die.  */
4620          cc.types_cuOff_bias = escn_debug_info.szB;
4621
4622          record_signatured_type( signature_types, cc.type_signature,
4623                                  cook_die( &cc, cc.type_offset ));
4624
4625          /* Until proven otherwise we assume we don't need the icc9
4626             workaround in this case; see the DIE-reading loop below
4627             for details.  */
4628          cu_offset_now = (cu_start_offset + cc.unit_length
4629                           + (cc.is_dw64 ? 12 : 4));
4630
4631          clear_CUConst ( &cc);
4632
4633          if (cu_offset_now >= escn_debug_types.szB) {
4634             break;
4635          }
4636
4637          set_position_of_Cursor ( &info, cu_offset_now );
4638       }
4639    }
4640
4641    /* Perform three DIE-reading passes.  The first pass reads DIEs from
4642       alternate .debug_info (if any), the second pass reads DIEs from
4643       .debug_info, and the third pass reads DIEs from .debug_types.
4644       Moving the body of this loop into a separate function would
4645       require a large number of arguments to be passed in, so it is
4646       kept inline instead.  */
4647    for (pass = 0; pass < 3; ++pass) {
4648       ULong section_size;
4649
4650       if (pass == 0) {
4651          if (!ML_(sli_is_valid)(escn_debug_info_alt))
4652             continue;
4653          /* Now loop over the Compilation Units listed in the alternate
4654             .debug_info section (see D3SPEC sec 7.5) paras 1 and 2.
4655             Each compilation unit contains a Compilation Unit Header
4656             followed by precisely one DW_TAG_compile_unit or
4657             DW_TAG_partial_unit DIE. */
4658          init_Cursor( &info, escn_debug_info_alt, 0, barf,
4659                       "Overrun whilst reading alternate .debug_info section" );
4660          section_size = escn_debug_info_alt.szB;
4661
4662          TRACE_D3("\n------ Parsing alternate .debug_info section ------\n");
4663       } else if (pass == 1) {
4664          /* Now loop over the Compilation Units listed in the .debug_info
4665             section (see D3SPEC sec 7.5) paras 1 and 2.  Each compilation
4666             unit contains a Compilation Unit Header followed by precisely
4667             one DW_TAG_compile_unit or DW_TAG_partial_unit DIE. */
4668          init_Cursor( &info, escn_debug_info, 0, barf,
4669                       "Overrun whilst reading .debug_info section" );
4670          section_size = escn_debug_info.szB;
4671
4672          TRACE_D3("\n------ Parsing .debug_info section ------\n");
4673       } else {
4674          if (!ML_(sli_is_valid)(escn_debug_types))
4675             continue;
4676          if (!VG_(clo_read_var_info))
4677             continue; // Types not needed when only reading inline info.
4678          init_Cursor( &info, escn_debug_types, 0, barf,
4679                       "Overrun whilst reading .debug_types section" );
4680          section_size = escn_debug_types.szB;
4681
4682          TRACE_D3("\n------ Parsing .debug_types section ------\n");
4683       }
4684
4685       while (True) {
4686          ULong   cu_start_offset, cu_offset_now;
4687          CUConst cc;
4688          /* It may be that the stated size of this CU is larger than the
4689             amount of stuff actually in it.  icc9 seems to generate CUs
4690             thusly.  We use these variables to figure out if this is
4691             indeed the case, and if so how many bytes we need to skip to
4692             get to the start of the next CU.  Not skipping those bytes
4693             causes us to misidentify the start of the next CU, and it all
4694             goes badly wrong after that (not surprisingly). */
4695          UWord cu_size_including_IniLen, cu_amount_used;
4696
4697          /* It seems icc9 finishes the DIE info before debug_info_sz
4698             bytes have been used up.  So be flexible, and declare the
4699             sequence complete if there is not enough remaining bytes to
4700             hold even the smallest conceivable CU header.  (11 bytes I
4701             reckon). */
4702          /* JRS 23Jan09: I suspect this is no longer necessary now that
4703             the code below contains a 'while (cu_amount_used <
4704             cu_size_including_IniLen ...'  style loop, which skips over
4705             any leftover bytes at the end of a CU in the case where the
4706             CU's stated size is larger than its actual size (as
4707             determined by reading all its DIEs).  However, for prudence,
4708             I'll leave the following test in place.  I can't see that a
4709             CU header can be smaller than 11 bytes, so I don't think
4710             there's any harm possible through the test -- it just adds
4711             robustness. */
4712          Word avail = get_remaining_length_Cursor( &info );
4713          if (avail < 11) {
4714             if (avail > 0)
4715                TRACE_D3("new_dwarf3_reader_wrk: warning: "
4716                         "%ld unused bytes after end of DIEs\n", avail);
4717             break;
4718          }
4719
4720          if (VG_(clo_read_var_info)) {
4721             /* Check the varparser's stack is in a sane state. */
4722             vg_assert(varparser.sp == -1);
4723             /* Check the typarser's stack is in a sane state. */
4724             vg_assert(typarser.sp == -1);
4725          }
4726
4727          cu_start_offset = get_position_of_Cursor( &info );
4728          TRACE_D3("\n");
4729          TRACE_D3("  Compilation Unit @ offset 0x%llx:\n", cu_start_offset);
4730          /* parse_CU_header initialises the CU's hashtable of abbvs ht_abbvs */
4731          if (pass == 0) {
4732             parse_CU_Header( &cc, td3, &info, escn_debug_abbv_alt,
4733                              False, True );
4734          } else {
4735             parse_CU_Header( &cc, td3, &info, escn_debug_abbv,
4736                              pass == 2, False );
4737          }
4738          cc.escn_debug_str      = pass == 0 ? escn_debug_str_alt
4739                                             : escn_debug_str;
4740          cc.escn_debug_ranges   = escn_debug_ranges;
4741          cc.escn_debug_loc      = escn_debug_loc;
4742          cc.escn_debug_line     = pass == 0 ? escn_debug_line_alt
4743                                             : escn_debug_line;
4744          cc.escn_debug_info     = pass == 0 ? escn_debug_info_alt
4745                                             : escn_debug_info;
4746          cc.escn_debug_types    = escn_debug_types;
4747          cc.escn_debug_info_alt = escn_debug_info_alt;
4748          cc.escn_debug_str_alt  = escn_debug_str_alt;
4749          cc.types_cuOff_bias    = escn_debug_info.szB;
4750          cc.alt_cuOff_bias      = escn_debug_info.szB + escn_debug_types.szB;
4751          cc.cu_start_offset     = cu_start_offset;
4752          cc.di = di;
4753          /* The CU's svma can be deduced by looking at the AT_low_pc
4754             value in the top level TAG_compile_unit, which is the topmost
4755             DIE.  We'll leave it for the 'varparser' to acquire that info
4756             and fill it in -- since it is the only party to want to know
4757             it. */
4758          cc.cu_svma_known = False;
4759          cc.cu_svma       = 0;
4760
4761          if (VG_(clo_read_var_info)) {
4762             cc.signature_types = signature_types;
4763
4764             /* Create a fake outermost-level range covering the entire
4765                address range.  So we always have *something* to catch all
4766                variable declarations. */
4767             varstack_push( &cc, &varparser, td3,
4768                            unitary_range_list(0UL, ~0UL),
4769                            -1, False/*isFunc*/, NULL/*fbGX*/ );
4770
4771             /* And set up the fndn_ix_Table.  When we come across the top
4772                level DIE for this CU (which is what the next call to
4773                read_DIE should process) we will copy all the file names out
4774                of the .debug_line img area and use this table to look up the
4775                copies when we later see filename numbers in DW_TAG_variables
4776                etc. */
4777             vg_assert(!varparser.fndn_ix_Table );
4778             varparser.fndn_ix_Table
4779                = VG_(newXA)( ML_(dinfo_zalloc), "di.readdwarf3.ndrw.5var",
4780                              ML_(dinfo_free),
4781                              sizeof(UInt) );
4782          }
4783
4784          if (VG_(clo_read_inline_info)) {
4785             /* fndn_ix_Table for the inlined call parser */
4786             vg_assert(!inlparser.fndn_ix_Table );
4787             inlparser.fndn_ix_Table
4788                = VG_(newXA)( ML_(dinfo_zalloc), "di.readdwarf3.ndrw.5inl",
4789                              ML_(dinfo_free),
4790                              sizeof(UInt) );
4791          }
4792
4793          /* Now read the one-and-only top-level DIE for this CU. */
4794          vg_assert(!VG_(clo_read_var_info) || varparser.sp == 0);
4795          read_DIE( rangestree,
4796                    tyents, tempvars, gexprs,
4797                    &typarser, &varparser, &inlparser,
4798                    &info, td3, &cc, 0 );
4799
4800          cu_offset_now = get_position_of_Cursor( &info );
4801
4802          if (0) VG_(printf)("Travelled: %llu  size %llu\n",
4803                             cu_offset_now - cc.cu_start_offset,
4804                             cc.unit_length + (cc.is_dw64 ? 12 : 4));
4805
4806          /* How big the CU claims it is .. */
4807          cu_size_including_IniLen = cc.unit_length + (cc.is_dw64 ? 12 : 4);
4808          /* .. vs how big we have found it to be */
4809          cu_amount_used = cu_offset_now - cc.cu_start_offset;
4810
4811          if (1) TRACE_D3("offset now %llu, d-i-size %llu\n",
4812                          cu_offset_now, section_size);
4813          if (cu_offset_now > section_size)
4814             barf("toplevel DIEs beyond end of CU");
4815
4816          /* If the CU is bigger than it claims to be, we've got a serious
4817             problem. */
4818          if (cu_amount_used > cu_size_including_IniLen)
4819             barf("CU's actual size appears to be larger than it claims it is");
4820
4821          /* If the CU is smaller than it claims to be, we need to skip some
4822             bytes.  Loop updates cu_offset_new and cu_amount_used. */
4823          while (cu_amount_used < cu_size_including_IniLen
4824                 && get_remaining_length_Cursor( &info ) > 0) {
4825             if (0) VG_(printf)("SKIP\n");
4826             (void)get_UChar( &info );
4827             cu_offset_now = get_position_of_Cursor( &info );
4828             cu_amount_used = cu_offset_now - cc.cu_start_offset;
4829          }
4830
4831          if (VG_(clo_read_var_info)) {
4832             /* Preen to level -2.  DIEs have level >= 0 so -2 cannot occur
4833                anywhere else at all.  Our fake the-entire-address-space
4834                range is at level -1, so preening to -2 should completely
4835                empty the stack out. */
4836             TRACE_D3("\n");
4837             varstack_preen( &varparser, td3, -2 );
4838             /* Similarly, empty the type stack out. */
4839             typestack_preen( &typarser, td3, -2 );
4840          }
4841
4842          if (VG_(clo_read_var_info)) {
4843             vg_assert(varparser.fndn_ix_Table );
4844             VG_(deleteXA)( varparser.fndn_ix_Table );
4845             varparser.fndn_ix_Table = NULL;
4846          }
4847          if (VG_(clo_read_inline_info)) {
4848             vg_assert(inlparser.fndn_ix_Table );
4849             VG_(deleteXA)( inlparser.fndn_ix_Table );
4850             inlparser.fndn_ix_Table = NULL;
4851          }
4852          clear_CUConst(&cc);
4853
4854          if (cu_offset_now == section_size)
4855             break;
4856          /* else keep going */
4857       }
4858    }
4859
4860
4861    if (VG_(clo_read_var_info)) {
4862       /* From here on we're post-processing the stuff we got
4863          out of the .debug_info section. */
4864       if (TD3) {
4865          TRACE_D3("\n");
4866          ML_(pp_TyEnts)(tyents, "Initial type entity (TyEnt) array");
4867          TRACE_D3("\n");
4868          TRACE_D3("------ Compressing type entries ------\n");
4869       }
4870
4871       tyents_cache = ML_(dinfo_zalloc)( "di.readdwarf3.ndrw.6",
4872                                         sizeof(TyEntIndexCache) );
4873       ML_(TyEntIndexCache__invalidate)( tyents_cache );
4874       dedup_types( td3, tyents, tyents_cache );
4875       if (TD3) {
4876          TRACE_D3("\n");
4877          ML_(pp_TyEnts)(tyents, "After type entity (TyEnt) compression");
4878       }
4879
4880       TRACE_D3("\n");
4881       TRACE_D3("------ Resolving the types of variables ------\n" );
4882       resolve_variable_types( barf, tyents, tyents_cache, tempvars );
4883
4884       /* Copy all the non-INDIR tyents into a new table.  For large
4885          .so's, about 90% of the tyents will by now have been resolved to
4886          INDIRs, and we no longer need them, and so don't need to store
4887          them. */
4888       tyents_to_keep
4889          = VG_(newXA)( ML_(dinfo_zalloc),
4890                        "di.readdwarf3.ndrw.7 (TyEnt to-keep array)",
4891                        ML_(dinfo_free), sizeof(TyEnt) );
4892       n = VG_(sizeXA)( tyents );
4893       for (i = 0; i < n; i++) {
4894          TyEnt* ent = VG_(indexXA)( tyents, i );
4895          if (ent->tag != Te_INDIR)
4896             VG_(addToXA)( tyents_to_keep, ent );
4897       }
4898
4899       VG_(deleteXA)( tyents );
4900       tyents = NULL;
4901       ML_(dinfo_free)( tyents_cache );
4902       tyents_cache = NULL;
4903
4904       /* Sort tyents_to_keep so we can lookup in it.  A complete (if
4905          minor) waste of time, since tyents itself is sorted, but
4906          necessary since VG_(lookupXA) refuses to cooperate if we
4907          don't. */
4908       VG_(setCmpFnXA)( tyents_to_keep, (XACmpFn_t) ML_(TyEnt__cmp_by_cuOff_only) );
4909       VG_(sortXA)( tyents_to_keep );
4910
4911       /* Enable cacheing on tyents_to_keep */
4912       tyents_to_keep_cache
4913          = ML_(dinfo_zalloc)( "di.readdwarf3.ndrw.8",
4914                               sizeof(TyEntIndexCache) );
4915       ML_(TyEntIndexCache__invalidate)( tyents_to_keep_cache );
4916
4917       /* And record the tyents in the DebugInfo.  We do this before
4918          starting to hand variables to ML_(addVar), since if ML_(addVar)
4919          wants to do debug printing (of the types of said vars) then it
4920          will need the tyents.*/
4921       vg_assert(!di->admin_tyents);
4922       di->admin_tyents = tyents_to_keep;
4923
4924       /* Bias all the location expressions. */
4925       TRACE_D3("\n");
4926       TRACE_D3("------ Biasing the location expressions ------\n" );
4927
4928       n = VG_(sizeXA)( gexprs );
4929       for (i = 0; i < n; i++) {
4930          gexpr = *(GExpr**)VG_(indexXA)( gexprs, i );
4931          bias_GX( gexpr, di );
4932       }
4933
4934       TRACE_D3("\n");
4935       TRACE_D3("------ Acquired the following variables: ------\n\n");
4936
4937       /* Park (pointers to) all the vars in an XArray, so we can look up
4938          abstract origins quickly.  The array is sorted (hence, looked-up
4939          by) the .dioff fields.  Since the .dioffs should be in strictly
4940          ascending order, there is no need to sort the array after
4941          construction.  The ascendingness is however asserted for. */
4942       dioff_lookup_tab
4943          = VG_(newXA)( ML_(dinfo_zalloc), "di.readdwarf3.ndrw.9",
4944                        ML_(dinfo_free),
4945                        sizeof(TempVar*) );
4946
4947       n = VG_(sizeXA)( tempvars );
4948       Word first_primary_var = 0;
4949       for (first_primary_var = 0;
4950            escn_debug_info_alt.szB/*really?*/ && first_primary_var < n;
4951            first_primary_var++) {
4952          varp = *(TempVar**)VG_(indexXA)( tempvars, first_primary_var );
4953          if (varp->dioff < escn_debug_info.szB + escn_debug_types.szB)
4954             break;
4955       }
4956       for (i = 0; i < n; i++) {
4957          varp = *(TempVar**)VG_(indexXA)( tempvars, (i + first_primary_var) % n );
4958          if (i > first_primary_var) {
4959             varp2 = *(TempVar**)VG_(indexXA)( tempvars,
4960                                               (i + first_primary_var - 1) % n );
4961             /* why should this hold?  Only, I think, because we've
4962                constructed the array by reading .debug_info sequentially,
4963                and so the array .dioff fields should reflect that, and be
4964                strictly ascending. */
4965             vg_assert(varp2->dioff < varp->dioff);
4966          }
4967          VG_(addToXA)( dioff_lookup_tab, &varp );
4968       }
4969       VG_(setCmpFnXA)( dioff_lookup_tab, cmp_TempVar_by_dioff );
4970       VG_(sortXA)( dioff_lookup_tab ); /* POINTLESS; FIXME: rm */
4971
4972       /* Now visit each var.  Collect up as much info as possible for
4973          each var and hand it to ML_(addVar). */
4974       n = VG_(sizeXA)( tempvars );
4975       for (j = 0; j < n; j++) {
4976          TyEnt* ent;
4977          varp = *(TempVar**)VG_(indexXA)( tempvars, j );
4978
4979          /* Possibly show .. */
4980          if (TD3) {
4981             VG_(printf)("<%lx> addVar: level %d: %s :: ",
4982                         varp->dioff,
4983                         varp->level,
4984                         varp->name ? varp->name : "<anon_var>" );
4985             if (varp->typeR) {
4986                ML_(pp_TyEnt_C_ishly)( tyents_to_keep, varp->typeR );
4987             } else {
4988                VG_(printf)("NULL");
4989             }
4990             VG_(printf)("\n  Loc=");
4991             if (varp->gexpr) {
4992                ML_(pp_GX)(varp->gexpr);
4993             } else {
4994                VG_(printf)("NULL");
4995             }
4996             VG_(printf)("\n");
4997             if (varp->fbGX) {
4998                VG_(printf)("  FrB=");
4999                ML_(pp_GX)( varp->fbGX );
5000                VG_(printf)("\n");
5001             } else {
5002                VG_(printf)("  FrB=none\n");
5003             }
5004             VG_(printf)("  declared at: %u %s:%d\n",
5005                         varp->fndn_ix,
5006                         ML_(fndn_ix2filename) (di, varp->fndn_ix),
5007                         varp->fLine );
5008             if (varp->absOri != (UWord)D3_INVALID_CUOFF)
5009                VG_(printf)("  abstract origin: <%lx>\n", varp->absOri);
5010          }
5011
5012          /* Skip variables which have no location.  These must be
5013             abstract instances; they are useless as-is since with no
5014             location they have no specified memory location.  They will
5015             presumably be referred to via the absOri fields of other
5016             variables. */
5017          if (!varp->gexpr) {
5018             TRACE_D3("  SKIP (no location)\n\n");
5019             continue;
5020          }
5021
5022          /* So it has a location, at least.  If it refers to some other
5023             entry through its absOri field, pull in further info through
5024             that. */
5025          if (varp->absOri != (UWord)D3_INVALID_CUOFF) {
5026             Bool found;
5027             Word ixFirst, ixLast;
5028             TempVar key;
5029             TempVar* keyp = &key;
5030             TempVar *varAI;
5031             VG_(memset)(&key, 0, sizeof(key)); /* not necessary */
5032             key.dioff = varp->absOri; /* this is what we want to find */
5033             found = VG_(lookupXA)( dioff_lookup_tab, &keyp,
5034                                    &ixFirst, &ixLast );
5035             if (!found) {
5036                /* barf("DW_AT_abstract_origin can't be resolved"); */
5037                TRACE_D3("  SKIP (DW_AT_abstract_origin can't be resolved)\n\n");
5038                continue;
5039             }
5040             /* If the following fails, there is more than one entry with
5041                the same dioff.  Which can't happen. */
5042             vg_assert(ixFirst == ixLast);
5043             varAI = *(TempVar**)VG_(indexXA)( dioff_lookup_tab, ixFirst );
5044             /* stay sane */
5045             vg_assert(varAI);
5046             vg_assert(varAI->dioff == varp->absOri);
5047
5048             /* Copy what useful info we can. */
5049             if (varAI->typeR && !varp->typeR)
5050                varp->typeR = varAI->typeR;
5051             if (varAI->name && !varp->name)
5052                varp->name = varAI->name;
5053             if (varAI->fndn_ix && !varp->fndn_ix)
5054                varp->fndn_ix = varAI->fndn_ix;
5055             if (varAI->fLine > 0 && varp->fLine == 0)
5056                varp->fLine = varAI->fLine;
5057          }
5058
5059          /* Give it a name if it doesn't have one. */
5060          if (!varp->name)
5061             varp->name = ML_(addStr)( di, "<anon_var>", -1 );
5062
5063          /* So now does it have enough info to be useful? */
5064          /* NOTE: re typeR: this is a hack.  If typeR is Te_UNKNOWN then
5065             the type didn't get resolved.  Really, in that case
5066             something's broken earlier on, and should be fixed, rather
5067             than just skipping the variable. */
5068          ent = ML_(TyEnts__index_by_cuOff)( tyents_to_keep,
5069                                             tyents_to_keep_cache,
5070                                             varp->typeR );
5071          /* The next two assertions should be guaranteed by
5072             our previous call to resolve_variable_types. */
5073          vg_assert(ent);
5074          vg_assert(ML_(TyEnt__is_type)(ent) || ent->tag == Te_UNKNOWN);
5075
5076          if (ent->tag == Te_UNKNOWN) continue;
5077
5078          vg_assert(varp->gexpr);
5079          vg_assert(varp->name);
5080          vg_assert(varp->typeR);
5081          vg_assert(varp->level >= 0);
5082
5083          /* Ok.  So we're going to keep it.  Call ML_(addVar) once for
5084             each address range in which the variable exists. */
5085          TRACE_D3("  ACQUIRE for range(s) ");
5086          { AddrRange  oneRange;
5087            AddrRange* varPcRanges;
5088            Word       nVarPcRanges;
5089            /* Set up to iterate over address ranges, however
5090               represented. */
5091            if (varp->nRanges == 0 || varp->nRanges == 1) {
5092               vg_assert(!varp->rngMany);
5093               if (varp->nRanges == 0) {
5094                  vg_assert(varp->rngOneMin == 0);
5095                  vg_assert(varp->rngOneMax == 0);
5096               }
5097               nVarPcRanges = varp->nRanges;
5098               oneRange.aMin = varp->rngOneMin;
5099               oneRange.aMax = varp->rngOneMax;
5100               varPcRanges = &oneRange;
5101            } else {
5102               vg_assert(varp->rngMany);
5103               vg_assert(varp->rngOneMin == 0);
5104               vg_assert(varp->rngOneMax == 0);
5105               nVarPcRanges = VG_(sizeXA)(varp->rngMany);
5106               vg_assert(nVarPcRanges >= 2);
5107               vg_assert(nVarPcRanges == (Word)varp->nRanges);
5108               varPcRanges = VG_(indexXA)(varp->rngMany, 0);
5109            }
5110            if (varp->level == 0)
5111               vg_assert( nVarPcRanges == 1 );
5112            /* and iterate */
5113            for (i = 0; i < nVarPcRanges; i++) {
5114               Addr pcMin = varPcRanges[i].aMin;
5115               Addr pcMax = varPcRanges[i].aMax;
5116               vg_assert(pcMin <= pcMax);
5117               /* Level 0 is the global address range.  So at level 0 we
5118                  don't want to bias pcMin/pcMax; but at all other levels
5119                  we do since those are derived from svmas in the Dwarf
5120                  we're reading.  Be paranoid ... */
5121               if (varp->level == 0) {
5122                  vg_assert(pcMin == (Addr)0);
5123                  vg_assert(pcMax == ~(Addr)0);
5124               } else {
5125                  /* vg_assert(pcMin > (Addr)0);
5126                     No .. we can legitimately expect to see ranges like
5127                     0x0-0x11D (pre-biasing, of course). */
5128                  vg_assert(pcMax < ~(Addr)0);
5129               }
5130
5131               /* Apply text biasing, for non-global variables. */
5132               if (varp->level > 0) {
5133                  pcMin += di->text_debug_bias;
5134                  pcMax += di->text_debug_bias;
5135               }
5136
5137               if (i > 0 && (i%2) == 0)
5138                  TRACE_D3("\n                       ");
5139               TRACE_D3("[%#lx,%#lx] ", pcMin, pcMax );
5140
5141               ML_(addVar)(
5142                  di, varp->level,
5143                      pcMin, pcMax,
5144                      varp->name,  varp->typeR,
5145                      varp->gexpr, varp->fbGX,
5146                      varp->fndn_ix, varp->fLine, td3
5147               );
5148            }
5149          }
5150
5151          TRACE_D3("\n\n");
5152          /* and move on to the next var */
5153       }
5154
5155       /* Now free all the TempVars */
5156       n = VG_(sizeXA)( tempvars );
5157       for (i = 0; i < n; i++) {
5158          varp = *(TempVar**)VG_(indexXA)( tempvars, i );
5159          ML_(dinfo_free)(varp);
5160       }
5161       VG_(deleteXA)( tempvars );
5162       tempvars = NULL;
5163
5164       /* and the temp lookup table */
5165       VG_(deleteXA)( dioff_lookup_tab );
5166
5167       /* and the ranges tree.  Note that we need to also free the XArrays
5168          which constitute the keys, hence pass VG_(deleteXA) as a
5169          key-finalizer. */
5170       VG_(deleteFM)( rangestree, (void(*)(UWord))VG_(deleteXA), NULL );
5171
5172       /* and the tyents_to_keep cache */
5173       ML_(dinfo_free)( tyents_to_keep_cache );
5174       tyents_to_keep_cache = NULL;
5175
5176       vg_assert( varparser.fndn_ix_Table == NULL );
5177
5178       /* And the signatured type hash.  */
5179       VG_(HT_destruct) ( signature_types, ML_(dinfo_free) );
5180
5181       /* record the GExprs in di so they can be freed later */
5182       vg_assert(!di->admin_gexprs);
5183       di->admin_gexprs = gexprs;
5184    }
5185
5186    // Free up dynamically allocated memory
5187    if (VG_(clo_read_var_info)) {
5188       type_parser_release(&typarser);
5189       var_parser_release(&varparser);
5190    }
5191 }
5192
5193
5194 /*------------------------------------------------------------*/
5195 /*---                                                      ---*/
5196 /*--- The "new" DWARF3 reader -- top level control logic   ---*/
5197 /*---                                                      ---*/
5198 /*------------------------------------------------------------*/
5199
5200 static Bool               d3rd_jmpbuf_valid  = False;
5201 static const HChar*       d3rd_jmpbuf_reason = NULL;
5202 static VG_MINIMAL_JMP_BUF(d3rd_jmpbuf);
5203
5204 static __attribute__((noreturn)) void barf ( const HChar* reason ) {
5205    vg_assert(d3rd_jmpbuf_valid);
5206    d3rd_jmpbuf_reason = reason;
5207    VG_MINIMAL_LONGJMP(d3rd_jmpbuf);
5208    /*NOTREACHED*/
5209    vg_assert(0);
5210 }
5211
5212
5213 void
5214 ML_(new_dwarf3_reader) (
5215    DebugInfo* di,
5216    DiSlice escn_debug_info,      DiSlice escn_debug_types,
5217    DiSlice escn_debug_abbv,      DiSlice escn_debug_line,
5218    DiSlice escn_debug_str,       DiSlice escn_debug_ranges,
5219    DiSlice escn_debug_loc,       DiSlice escn_debug_info_alt,
5220    DiSlice escn_debug_abbv_alt,  DiSlice escn_debug_line_alt,
5221    DiSlice escn_debug_str_alt
5222 )
5223 {
5224    volatile Int  jumped;
5225    volatile Bool td3 = di->trace_symtab;
5226
5227    /* Run the _wrk function to read the dwarf3.  If it succeeds, it
5228       just returns normally.  If there is any failure, it longjmp's
5229       back here, having first set d3rd_jmpbuf_reason to something
5230       useful. */
5231    vg_assert(d3rd_jmpbuf_valid  == False);
5232    vg_assert(d3rd_jmpbuf_reason == NULL);
5233
5234    d3rd_jmpbuf_valid = True;
5235    jumped = VG_MINIMAL_SETJMP(d3rd_jmpbuf);
5236    if (jumped == 0) {
5237       /* try this ... */
5238       new_dwarf3_reader_wrk( di, barf,
5239                              escn_debug_info,     escn_debug_types,
5240                              escn_debug_abbv,     escn_debug_line,
5241                              escn_debug_str,      escn_debug_ranges,
5242                              escn_debug_loc,      escn_debug_info_alt,
5243                              escn_debug_abbv_alt, escn_debug_line_alt,
5244                              escn_debug_str_alt );
5245       d3rd_jmpbuf_valid = False;
5246       TRACE_D3("\n------ .debug_info reading was successful ------\n");
5247    } else {
5248       /* It longjmp'd. */
5249       d3rd_jmpbuf_valid = False;
5250       /* Can't longjump without giving some sort of reason. */
5251       vg_assert(d3rd_jmpbuf_reason != NULL);
5252
5253       TRACE_D3("\n------ .debug_info reading failed ------\n");
5254
5255       ML_(symerr)(di, True, d3rd_jmpbuf_reason);
5256    }
5257
5258    d3rd_jmpbuf_valid  = False;
5259    d3rd_jmpbuf_reason = NULL;
5260 }
5261
5262
5263
5264 /* --- Unused code fragments which might be useful one day. --- */
5265
5266 #if 0
5267    /* Read the arange tables */
5268    TRACE_SYMTAB("\n");
5269    TRACE_SYMTAB("\n------ The contents of .debug_arange ------\n");
5270    init_Cursor( &aranges, debug_aranges_img,
5271                 debug_aranges_sz, 0, barf,
5272                 "Overrun whilst reading .debug_aranges section" );
5273    while (True) {
5274       ULong  len, d_i_offset;
5275       Bool   is64;
5276       UShort version;
5277       UChar  asize, segsize;
5278
5279       if (is_at_end_Cursor( &aranges ))
5280          break;
5281       /* Read one arange thingy */
5282       /* initial_length field */
5283       len = get_Initial_Length( &is64, &aranges,
5284                "in .debug_aranges: invalid initial-length field" );
5285       version    = get_UShort( &aranges );
5286       d_i_offset = get_Dwarfish_UWord( &aranges, is64 );
5287       asize      = get_UChar( &aranges );
5288       segsize    = get_UChar( &aranges );
5289       TRACE_D3("  Length:                   %llu\n", len);
5290       TRACE_D3("  Version:                  %d\n", (Int)version);
5291       TRACE_D3("  Offset into .debug_info:  %llx\n", d_i_offset);
5292       TRACE_D3("  Pointer Size:             %d\n", (Int)asize);
5293       TRACE_D3("  Segment Size:             %d\n", (Int)segsize);
5294       TRACE_D3("\n");
5295       TRACE_D3("    Address            Length\n");
5296
5297       while ((get_position_of_Cursor( &aranges ) % (2 * asize)) > 0) {
5298          (void)get_UChar( & aranges );
5299       }
5300       while (True) {
5301          ULong address = get_Dwarfish_UWord( &aranges, asize==8 );
5302          ULong length = get_Dwarfish_UWord( &aranges, asize==8 );
5303          TRACE_D3("    0x%016llx 0x%llx\n", address, length);
5304          if (address == 0 && length == 0) break;
5305       }
5306    }
5307    TRACE_SYMTAB("\n");
5308 #endif
5309
5310 #endif // defined(VGO_linux) || defined(VGO_darwin) || defined(VGO_solaris)
5311
5312 /*--------------------------------------------------------------------*/
5313 /*--- end                                                          ---*/
5314 /*--------------------------------------------------------------------*/